1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // localematcher.cpp
5 // created: 2019may08 Markus W. Scherer
6
7 #include <optional>
8
9 #include "unicode/utypes.h"
10 #include "unicode/localebuilder.h"
11 #include "unicode/localematcher.h"
12 #include "unicode/locid.h"
13 #include "unicode/stringpiece.h"
14 #include "unicode/uloc.h"
15 #include "unicode/uobject.h"
16 #include "cstring.h"
17 #include "localeprioritylist.h"
18 #include "loclikelysubtags.h"
19 #include "locdistance.h"
20 #include "lsr.h"
21 #include "uassert.h"
22 #include "uhash.h"
23 #include "ustr_imp.h"
24 #include "uvector.h"
25
26 #define UND_LSR LSR("und", "", "", LSR::EXPLICIT_LSR)
27
28 /**
29 * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
30 *
31 * @draft ICU 65
32 */
33 enum ULocMatchLifetime {
34 /**
35 * Locale objects are temporary.
36 * The matcher will make a copy of a locale that will be used beyond one function call.
37 *
38 * @draft ICU 65
39 */
40 ULOCMATCH_TEMPORARY_LOCALES,
41 /**
42 * Locale objects are stored at least as long as the matcher is used.
43 * The matcher will keep only a pointer to a locale that will be used beyond one function call,
44 * avoiding a copy.
45 *
46 * @draft ICU 65
47 */
48 ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone?
49 };
50 #ifndef U_IN_DOXYGEN
51 typedef enum ULocMatchLifetime ULocMatchLifetime;
52 #endif
53
54 U_NAMESPACE_BEGIN
55
Result(LocaleMatcher::Result && src)56 LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) noexcept :
57 desiredLocale(src.desiredLocale),
58 supportedLocale(src.supportedLocale),
59 desiredIndex(src.desiredIndex),
60 supportedIndex(src.supportedIndex),
61 desiredIsOwned(src.desiredIsOwned) {
62 if (desiredIsOwned) {
63 src.desiredLocale = nullptr;
64 src.desiredIndex = -1;
65 src.desiredIsOwned = false;
66 }
67 }
68
~Result()69 LocaleMatcher::Result::~Result() {
70 if (desiredIsOwned) {
71 delete desiredLocale;
72 }
73 }
74
operator =(LocaleMatcher::Result && src)75 LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) noexcept {
76 this->~Result();
77
78 desiredLocale = src.desiredLocale;
79 supportedLocale = src.supportedLocale;
80 desiredIndex = src.desiredIndex;
81 supportedIndex = src.supportedIndex;
82 desiredIsOwned = src.desiredIsOwned;
83
84 if (desiredIsOwned) {
85 src.desiredLocale = nullptr;
86 src.desiredIndex = -1;
87 src.desiredIsOwned = false;
88 }
89 return *this;
90 }
91
makeResolvedLocale(UErrorCode & errorCode) const92 Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
93 if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
94 return Locale::getRoot();
95 }
96 const Locale *bestDesired = getDesiredLocale();
97 if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
98 return *supportedLocale;
99 }
100 LocaleBuilder b;
101 b.setLocale(*supportedLocale);
102
103 // Copy the region from bestDesired, if there is one.
104 const char *region = bestDesired->getCountry();
105 if (*region != 0) {
106 b.setRegion(region);
107 }
108
109 // Copy the variants from bestDesired, if there are any.
110 // Note that this will override any supportedLocale variants.
111 // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
112 const char *variants = bestDesired->getVariant();
113 if (*variants != 0) {
114 b.setVariant(variants);
115 }
116
117 // Copy the extensions from bestDesired, if there are any.
118 // C++ note: The following note, copied from Java, may not be true,
119 // as long as C++ copies by legacy ICU keyword, not by extension singleton.
120 // Note that this will override any supportedLocale extensions.
121 // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
122 // (replacing calendar).
123 b.copyExtensionsFrom(*bestDesired, errorCode);
124 return b.build(errorCode);
125 }
126
Builder(LocaleMatcher::Builder && src)127 LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) noexcept :
128 errorCode_(src.errorCode_),
129 supportedLocales_(src.supportedLocales_),
130 thresholdDistance_(src.thresholdDistance_),
131 demotion_(src.demotion_),
132 defaultLocale_(src.defaultLocale_),
133 withDefault_(src.withDefault_),
134 favor_(src.favor_),
135 direction_(src.direction_) {
136 src.supportedLocales_ = nullptr;
137 src.defaultLocale_ = nullptr;
138 }
139
~Builder()140 LocaleMatcher::Builder::~Builder() {
141 delete supportedLocales_;
142 delete defaultLocale_;
143 delete maxDistanceDesired_;
144 delete maxDistanceSupported_;
145 }
146
operator =(LocaleMatcher::Builder && src)147 LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) noexcept {
148 this->~Builder();
149
150 errorCode_ = src.errorCode_;
151 supportedLocales_ = src.supportedLocales_;
152 thresholdDistance_ = src.thresholdDistance_;
153 demotion_ = src.demotion_;
154 defaultLocale_ = src.defaultLocale_;
155 withDefault_ = src.withDefault_,
156 favor_ = src.favor_;
157 direction_ = src.direction_;
158
159 src.supportedLocales_ = nullptr;
160 src.defaultLocale_ = nullptr;
161 return *this;
162 }
163
clearSupportedLocales()164 void LocaleMatcher::Builder::clearSupportedLocales() {
165 if (supportedLocales_ != nullptr) {
166 supportedLocales_->removeAllElements();
167 }
168 }
169
ensureSupportedLocaleVector()170 bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
171 if (U_FAILURE(errorCode_)) { return false; }
172 if (supportedLocales_ != nullptr) { return true; }
173 LocalPointer<UVector> lpSupportedLocales(new UVector(uprv_deleteUObject, nullptr, errorCode_), errorCode_);
174 if (U_FAILURE(errorCode_)) { return false; }
175 supportedLocales_ = lpSupportedLocales.orphan();
176 return true;
177 }
178
setSupportedLocalesFromListString(StringPiece locales)179 LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
180 StringPiece locales) {
181 LocalePriorityList list(locales, errorCode_);
182 if (U_FAILURE(errorCode_)) { return *this; }
183 clearSupportedLocales();
184 if (!ensureSupportedLocaleVector()) { return *this; }
185 int32_t length = list.getLengthIncludingRemoved();
186 for (int32_t i = 0; i < length; ++i) {
187 Locale *locale = list.orphanLocaleAt(i);
188 if (locale == nullptr) { continue; }
189 supportedLocales_->adoptElement(locale, errorCode_);
190 if (U_FAILURE(errorCode_)) {
191 break;
192 }
193 }
194 return *this;
195 }
196
setSupportedLocales(Locale::Iterator & locales)197 LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
198 if (ensureSupportedLocaleVector()) {
199 clearSupportedLocales();
200 while (locales.hasNext() && U_SUCCESS(errorCode_)) {
201 const Locale &locale = locales.next();
202 LocalPointer<Locale> clone (locale.clone(), errorCode_);
203 supportedLocales_->adoptElement(clone.orphan(), errorCode_);
204 }
205 }
206 return *this;
207 }
208
addSupportedLocale(const Locale & locale)209 LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
210 if (ensureSupportedLocaleVector()) {
211 LocalPointer<Locale> clone(locale.clone(), errorCode_);
212 supportedLocales_->adoptElement(clone.orphan(), errorCode_);
213 }
214 return *this;
215 }
216
setNoDefaultLocale()217 LocaleMatcher::Builder &LocaleMatcher::Builder::setNoDefaultLocale() {
218 if (U_FAILURE(errorCode_)) { return *this; }
219 delete defaultLocale_;
220 defaultLocale_ = nullptr;
221 withDefault_ = false;
222 return *this;
223 }
224
setDefaultLocale(const Locale * defaultLocale)225 LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
226 if (U_FAILURE(errorCode_)) { return *this; }
227 Locale *clone = nullptr;
228 if (defaultLocale != nullptr) {
229 clone = defaultLocale->clone();
230 if (clone == nullptr) {
231 errorCode_ = U_MEMORY_ALLOCATION_ERROR;
232 return *this;
233 }
234 }
235 delete defaultLocale_;
236 defaultLocale_ = clone;
237 withDefault_ = true;
238 return *this;
239 }
240
setFavorSubtag(ULocMatchFavorSubtag subtag)241 LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
242 if (U_FAILURE(errorCode_)) { return *this; }
243 favor_ = subtag;
244 return *this;
245 }
246
setDemotionPerDesiredLocale(ULocMatchDemotion demotion)247 LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
248 if (U_FAILURE(errorCode_)) { return *this; }
249 demotion_ = demotion;
250 return *this;
251 }
252
setMaxDistance(const Locale & desired,const Locale & supported)253 LocaleMatcher::Builder &LocaleMatcher::Builder::setMaxDistance(const Locale &desired,
254 const Locale &supported) {
255 if (U_FAILURE(errorCode_)) { return *this; }
256 Locale *desiredClone = desired.clone();
257 Locale *supportedClone = supported.clone();
258 if (desiredClone == nullptr || supportedClone == nullptr) {
259 delete desiredClone; // in case only one could not be allocated
260 delete supportedClone;
261 errorCode_ = U_MEMORY_ALLOCATION_ERROR;
262 return *this;
263 }
264 delete maxDistanceDesired_;
265 delete maxDistanceSupported_;
266 maxDistanceDesired_ = desiredClone;
267 maxDistanceSupported_ = supportedClone;
268 return *this;
269 }
270
271 #if 0
272 /**
273 * <i>Internal only!</i>
274 *
275 * @param thresholdDistance the thresholdDistance to set, with -1 = default
276 * @return this Builder object
277 * @internal
278 * @deprecated This API is ICU internal only.
279 */
280 @Deprecated
281 LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
282 if (U_FAILURE(errorCode_)) { return *this; }
283 if (thresholdDistance > 100) {
284 thresholdDistance = 100;
285 }
286 thresholdDistance_ = thresholdDistance;
287 return *this;
288 }
289 #endif
290
copyErrorTo(UErrorCode & outErrorCode) const291 UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
292 if (U_FAILURE(outErrorCode)) { return true; }
293 if (U_SUCCESS(errorCode_)) { return false; }
294 outErrorCode = errorCode_;
295 return true;
296 }
297
build(UErrorCode & errorCode) const298 LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
299 if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
300 errorCode = errorCode_;
301 }
302 return LocaleMatcher(*this, errorCode);
303 }
304
305 namespace {
306
getMaximalLsrOrUnd(const LikelySubtags & likelySubtags,const Locale & locale,UErrorCode & errorCode)307 LSR getMaximalLsrOrUnd(const LikelySubtags &likelySubtags, const Locale &locale,
308 UErrorCode &errorCode) {
309 if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
310 return UND_LSR;
311 } else {
312 return likelySubtags.makeMaximizedLsrFrom(locale, false, errorCode);
313 }
314 }
315
hashLSR(const UHashTok token)316 int32_t hashLSR(const UHashTok token) {
317 const LSR *lsr = static_cast<const LSR *>(token.pointer);
318 return lsr->hashCode;
319 }
320
compareLSRs(const UHashTok t1,const UHashTok t2)321 UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
322 const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
323 const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
324 return *lsr1 == *lsr2;
325 }
326
327 } // namespace
328
putIfAbsent(const LSR & lsr,int32_t i,int32_t suppLength,UErrorCode & errorCode)329 int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
330 UErrorCode &errorCode) {
331 if (U_FAILURE(errorCode)) { return suppLength; }
332 if (!uhash_containsKey(supportedLsrToIndex, &lsr)) {
333 uhash_putiAllowZero(supportedLsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
334 if (U_SUCCESS(errorCode)) {
335 supportedLSRs[suppLength] = &lsr;
336 supportedIndexes[suppLength++] = i;
337 }
338 }
339 return suppLength;
340 }
341
LocaleMatcher(const Builder & builder,UErrorCode & errorCode)342 LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
343 likelySubtags(*LikelySubtags::getSingleton(errorCode)),
344 localeDistance(*LocaleDistance::getSingleton(errorCode)),
345 thresholdDistance(builder.thresholdDistance_),
346 demotionPerDesiredLocale(0),
347 favorSubtag(builder.favor_),
348 direction(builder.direction_),
349 supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
350 supportedLsrToIndex(nullptr),
351 supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
352 ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
353 if (U_FAILURE(errorCode)) { return; }
354 const Locale *def = builder.defaultLocale_;
355 LSR builderDefaultLSR;
356 const LSR *defLSR = nullptr;
357 if (def != nullptr) {
358 ownedDefaultLocale = def->clone();
359 if (ownedDefaultLocale == nullptr) {
360 errorCode = U_MEMORY_ALLOCATION_ERROR;
361 return;
362 }
363 def = ownedDefaultLocale;
364 builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
365 if (U_FAILURE(errorCode)) { return; }
366 defLSR = &builderDefaultLSR;
367 }
368 supportedLocalesLength = builder.supportedLocales_ != nullptr ?
369 builder.supportedLocales_->size() : 0;
370 if (supportedLocalesLength > 0) {
371 // Store the supported locales in input order,
372 // so that when different types are used (e.g., language tag strings)
373 // we can return those by parallel index.
374 supportedLocales = static_cast<const Locale **>(
375 uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
376 // Supported LRSs in input order.
377 // In C++, we store these permanently to simplify ownership management
378 // in the hash tables. Duplicate LSRs (if any) are unused overhead.
379 lsrs = new LSR[supportedLocalesLength];
380 if (supportedLocales == nullptr || lsrs == nullptr) {
381 errorCode = U_MEMORY_ALLOCATION_ERROR;
382 return;
383 }
384 // If the constructor fails partway, we need null pointers for destructibility.
385 uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
386 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
387 const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
388 supportedLocales[i] = locale.clone();
389 if (supportedLocales[i] == nullptr) {
390 errorCode = U_MEMORY_ALLOCATION_ERROR;
391 return;
392 }
393 const Locale &supportedLocale = *supportedLocales[i];
394 LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
395 lsr.setHashCode();
396 if (U_FAILURE(errorCode)) { return; }
397 }
398
399 // We need an unordered map from LSR to first supported locale with that LSR,
400 // and an ordered list of (LSR, supported index) for
401 // the supported locales in the following order:
402 // 1. Default locale, if it is supported.
403 // 2. Priority locales (aka "paradigm locales") in builder order.
404 // 3. Remaining locales in builder order.
405 supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
406 supportedLocalesLength, &errorCode);
407 if (U_FAILURE(errorCode)) { return; }
408 supportedLSRs = static_cast<const LSR **>(
409 uprv_malloc(supportedLocalesLength * sizeof(const LSR *)));
410 supportedIndexes = static_cast<int32_t *>(
411 uprv_malloc(supportedLocalesLength * sizeof(int32_t)));
412 if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
413 errorCode = U_MEMORY_ALLOCATION_ERROR;
414 return;
415 }
416 int32_t suppLength = 0;
417 // Determine insertion order.
418 // Add locales immediately that are equivalent to the default.
419 MaybeStackArray<int8_t, 100> order(supportedLocalesLength, errorCode);
420 if (U_FAILURE(errorCode)) { return; }
421 int32_t numParadigms = 0;
422 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
423 const Locale &locale = *supportedLocales[i];
424 const LSR &lsr = lsrs[i];
425 if (defLSR == nullptr && builder.withDefault_) {
426 // Implicit default locale = first supported locale, if not turned off.
427 U_ASSERT(i == 0);
428 def = &locale;
429 defLSR = &lsr;
430 order[i] = 1;
431 suppLength = putIfAbsent(lsr, 0, suppLength, errorCode);
432 } else if (defLSR != nullptr && lsr.isEquivalentTo(*defLSR)) {
433 order[i] = 1;
434 suppLength = putIfAbsent(lsr, i, suppLength, errorCode);
435 } else if (localeDistance.isParadigmLSR(lsr)) {
436 order[i] = 2;
437 ++numParadigms;
438 } else {
439 order[i] = 3;
440 }
441 if (U_FAILURE(errorCode)) { return; }
442 }
443 // Add supported paradigm locales.
444 int32_t paradigmLimit = suppLength + numParadigms;
445 for (int32_t i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) {
446 if (order[i] == 2) {
447 suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
448 }
449 }
450 // Add remaining supported locales.
451 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
452 if (order[i] == 3) {
453 suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
454 }
455 }
456 supportedLSRsLength = suppLength;
457 // If supportedLSRsLength < supportedLocalesLength then
458 // we waste as many array slots as there are duplicate supported LSRs,
459 // but the amount of wasted space is small as long as there are few duplicates.
460 }
461
462 defaultLocale = def;
463
464 if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
465 demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
466 }
467
468 if (thresholdDistance >= 0) {
469 // already copied
470 } else if (builder.maxDistanceDesired_ != nullptr) {
471 LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceSupported_, errorCode);
472 const LSR *pSuppLSR = &suppLSR;
473 int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
474 getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceDesired_, errorCode),
475 &pSuppLSR, 1,
476 LocaleDistance::shiftDistance(100), favorSubtag, direction);
477 if (U_SUCCESS(errorCode)) {
478 // +1 for an exclusive threshold from an inclusive max.
479 thresholdDistance = LocaleDistance::getDistanceFloor(indexAndDistance) + 1;
480 } else {
481 thresholdDistance = 0;
482 }
483 } else {
484 thresholdDistance = localeDistance.getDefaultScriptDistance();
485 }
486 }
487
LocaleMatcher(LocaleMatcher && src)488 LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) noexcept :
489 likelySubtags(src.likelySubtags),
490 localeDistance(src.localeDistance),
491 thresholdDistance(src.thresholdDistance),
492 demotionPerDesiredLocale(src.demotionPerDesiredLocale),
493 favorSubtag(src.favorSubtag),
494 direction(src.direction),
495 supportedLocales(src.supportedLocales), lsrs(src.lsrs),
496 supportedLocalesLength(src.supportedLocalesLength),
497 supportedLsrToIndex(src.supportedLsrToIndex),
498 supportedLSRs(src.supportedLSRs),
499 supportedIndexes(src.supportedIndexes),
500 supportedLSRsLength(src.supportedLSRsLength),
501 ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale) {
502 src.supportedLocales = nullptr;
503 src.lsrs = nullptr;
504 src.supportedLocalesLength = 0;
505 src.supportedLsrToIndex = nullptr;
506 src.supportedLSRs = nullptr;
507 src.supportedIndexes = nullptr;
508 src.supportedLSRsLength = 0;
509 src.ownedDefaultLocale = nullptr;
510 src.defaultLocale = nullptr;
511 }
512
~LocaleMatcher()513 LocaleMatcher::~LocaleMatcher() {
514 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
515 delete supportedLocales[i];
516 }
517 uprv_free(supportedLocales);
518 delete[] lsrs;
519 uhash_close(supportedLsrToIndex);
520 uprv_free(supportedLSRs);
521 uprv_free(supportedIndexes);
522 delete ownedDefaultLocale;
523 }
524
operator =(LocaleMatcher && src)525 LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) noexcept {
526 this->~LocaleMatcher();
527
528 thresholdDistance = src.thresholdDistance;
529 demotionPerDesiredLocale = src.demotionPerDesiredLocale;
530 favorSubtag = src.favorSubtag;
531 direction = src.direction;
532 supportedLocales = src.supportedLocales;
533 lsrs = src.lsrs;
534 supportedLocalesLength = src.supportedLocalesLength;
535 supportedLsrToIndex = src.supportedLsrToIndex;
536 supportedLSRs = src.supportedLSRs;
537 supportedIndexes = src.supportedIndexes;
538 supportedLSRsLength = src.supportedLSRsLength;
539 ownedDefaultLocale = src.ownedDefaultLocale;
540 defaultLocale = src.defaultLocale;
541
542 src.supportedLocales = nullptr;
543 src.lsrs = nullptr;
544 src.supportedLocalesLength = 0;
545 src.supportedLsrToIndex = nullptr;
546 src.supportedLSRs = nullptr;
547 src.supportedIndexes = nullptr;
548 src.supportedLSRsLength = 0;
549 src.ownedDefaultLocale = nullptr;
550 src.defaultLocale = nullptr;
551 return *this;
552 }
553
554 class LocaleLsrIterator {
555 public:
LocaleLsrIterator(const LikelySubtags & likelySubtags,Locale::Iterator & locales,ULocMatchLifetime lifetime)556 LocaleLsrIterator(const LikelySubtags &likelySubtags, Locale::Iterator &locales,
557 ULocMatchLifetime lifetime) :
558 likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
559
~LocaleLsrIterator()560 ~LocaleLsrIterator() {
561 if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
562 delete remembered;
563 }
564 }
565
hasNext() const566 bool hasNext() const {
567 return locales.hasNext();
568 }
569
next(UErrorCode & errorCode)570 LSR next(UErrorCode &errorCode) {
571 current = &locales.next();
572 return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
573 }
574
rememberCurrent(int32_t desiredIndex,UErrorCode & errorCode)575 void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
576 if (U_FAILURE(errorCode)) { return; }
577 bestDesiredIndex = desiredIndex;
578 if (lifetime == ULOCMATCH_STORED_LOCALES) {
579 remembered = current;
580 } else {
581 // ULOCMATCH_TEMPORARY_LOCALES
582 delete remembered;
583 remembered = new Locale(*current);
584 if (remembered == nullptr) {
585 errorCode = U_MEMORY_ALLOCATION_ERROR;
586 }
587 }
588 }
589
orphanRemembered()590 const Locale *orphanRemembered() {
591 const Locale *rem = remembered;
592 remembered = nullptr;
593 return rem;
594 }
595
getBestDesiredIndex() const596 int32_t getBestDesiredIndex() const {
597 return bestDesiredIndex;
598 }
599
600 private:
601 const LikelySubtags &likelySubtags;
602 Locale::Iterator &locales;
603 ULocMatchLifetime lifetime;
604 const Locale *current = nullptr, *remembered = nullptr;
605 int32_t bestDesiredIndex = -1;
606 };
607
getBestMatch(const Locale & desiredLocale,UErrorCode & errorCode) const608 const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
609 if (U_FAILURE(errorCode)) { return nullptr; }
610 std::optional<int32_t> suppIndex = getBestSuppIndex(
611 getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
612 nullptr, errorCode);
613 return U_SUCCESS(errorCode) && suppIndex.has_value() ? supportedLocales[*suppIndex]
614 : defaultLocale;
615 }
616
getBestMatch(Locale::Iterator & desiredLocales,UErrorCode & errorCode) const617 const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
618 UErrorCode &errorCode) const {
619 if (U_FAILURE(errorCode)) { return nullptr; }
620 if (!desiredLocales.hasNext()) {
621 return defaultLocale;
622 }
623 LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
624 std::optional<int32_t> suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
625 return U_SUCCESS(errorCode) && suppIndex.has_value() ? supportedLocales[*suppIndex]
626 : defaultLocale;
627 }
628
getBestMatchForListString(StringPiece desiredLocaleList,UErrorCode & errorCode) const629 const Locale *LocaleMatcher::getBestMatchForListString(
630 StringPiece desiredLocaleList, UErrorCode &errorCode) const {
631 if (U_FAILURE(errorCode)) { return nullptr; }
632 LocalePriorityList list(desiredLocaleList, errorCode);
633 LocalePriorityList::Iterator iter = list.iterator();
634 return getBestMatch(iter, errorCode);
635 }
636
getBestMatchResult(const Locale & desiredLocale,UErrorCode & errorCode) const637 LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
638 const Locale &desiredLocale, UErrorCode &errorCode) const {
639 if (U_FAILURE(errorCode)) {
640 return Result(nullptr, defaultLocale, -1, -1, false);
641 }
642 std::optional<int32_t> suppIndex = getBestSuppIndex(
643 getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
644 nullptr, errorCode);
645 if (U_FAILURE(errorCode) || !suppIndex.has_value()) {
646 return Result(nullptr, defaultLocale, -1, -1, false);
647 } else {
648 return Result(&desiredLocale, supportedLocales[*suppIndex], 0, *suppIndex, false);
649 }
650 }
651
getBestMatchResult(Locale::Iterator & desiredLocales,UErrorCode & errorCode) const652 LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
653 Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
654 if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
655 return Result(nullptr, defaultLocale, -1, -1, false);
656 }
657 LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
658 std::optional<int32_t> suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
659 if (U_FAILURE(errorCode) || !suppIndex.has_value()) {
660 return Result(nullptr, defaultLocale, -1, -1, false);
661 } else {
662 return Result(lsrIter.orphanRemembered(), supportedLocales[*suppIndex],
663 lsrIter.getBestDesiredIndex(), *suppIndex, true);
664 }
665 }
666
getBestSuppIndex(LSR desiredLSR,LocaleLsrIterator * remainingIter,UErrorCode & errorCode) const667 std::optional<int32_t> LocaleMatcher::getBestSuppIndex(LSR desiredLSR,
668 LocaleLsrIterator *remainingIter,
669 UErrorCode &errorCode) const {
670 if (U_FAILURE(errorCode)) { return std::nullopt; }
671 int32_t desiredIndex = 0;
672 int32_t bestSupportedLsrIndex = -1;
673 for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
674 // Quick check for exact maximized LSR.
675 if (supportedLsrToIndex != nullptr) {
676 desiredLSR.setHashCode();
677 UBool found = false;
678 int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found);
679 if (found) {
680 if (remainingIter != nullptr) {
681 remainingIter->rememberCurrent(desiredIndex, errorCode);
682 }
683 return suppIndex;
684 }
685 }
686 int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
687 desiredLSR, supportedLSRs, supportedLSRsLength,
688 bestShiftedDistance, favorSubtag, direction);
689 if (bestIndexAndDistance >= 0) {
690 bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance);
691 if (remainingIter != nullptr) {
692 remainingIter->rememberCurrent(desiredIndex, errorCode);
693 if (U_FAILURE(errorCode)) { return std::nullopt; }
694 }
695 bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance);
696 }
697 if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) {
698 break;
699 }
700 if (remainingIter == nullptr || !remainingIter->hasNext()) {
701 break;
702 }
703 desiredLSR = remainingIter->next(errorCode);
704 if (U_FAILURE(errorCode)) { return std::nullopt; }
705 ++desiredIndex;
706 }
707 if (bestSupportedLsrIndex < 0) {
708 // no good match
709 return std::nullopt;
710 }
711 return supportedIndexes[bestSupportedLsrIndex];
712 }
713
isMatch(const Locale & desired,const Locale & supported,UErrorCode & errorCode) const714 UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
715 UErrorCode &errorCode) const {
716 if (U_FAILURE(errorCode)) { return false; }
717 LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
718 if (U_FAILURE(errorCode)) { return false; }
719 const LSR *pSuppLSR = &suppLSR;
720 int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
721 getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
722 &pSuppLSR, 1,
723 LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
724 return indexAndDistance >= 0;
725 }
726
internalMatch(const Locale & desired,const Locale & supported,UErrorCode & errorCode) const727 double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
728 if (U_FAILURE(errorCode)) { return 0.; }
729 // Returns the inverse of the distance: That is, 1-distance(desired, supported).
730 LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
731 if (U_FAILURE(errorCode)) { return 0.; }
732 const LSR *pSuppLSR = &suppLSR;
733 int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
734 getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
735 &pSuppLSR, 1,
736 LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
737 double distance = LocaleDistance::getDistanceDouble(indexAndDistance);
738 return (100.0 - distance) / 100.0;
739 }
740
741 U_NAMESPACE_END
742
743 // uloc_acceptLanguage() --------------------------------------------------- ***
744
745 U_NAMESPACE_USE
746
747 namespace {
748
749 class LocaleFromTag {
750 public:
LocaleFromTag()751 LocaleFromTag() : locale(Locale::getRoot()) {}
operator ()(const char * tag)752 const Locale &operator()(const char *tag) { return locale = Locale(tag); }
753
754 private:
755 // Store the locale in the converter, rather than return a reference to a temporary,
756 // or a value which could go out of scope with the caller's reference to it.
757 Locale locale;
758 };
759
acceptLanguage(UEnumeration & supportedLocales,Locale::Iterator & desiredLocales,char * dest,int32_t capacity,UAcceptResult * acceptResult,UErrorCode & errorCode)760 int32_t acceptLanguage(UEnumeration &supportedLocales, Locale::Iterator &desiredLocales,
761 char *dest, int32_t capacity, UAcceptResult *acceptResult,
762 UErrorCode &errorCode) {
763 if (U_FAILURE(errorCode)) { return 0; }
764 LocaleMatcher::Builder builder;
765 const char *locString;
766 while ((locString = uenum_next(&supportedLocales, nullptr, &errorCode)) != nullptr) {
767 Locale loc(locString);
768 if (loc.isBogus()) {
769 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
770 return 0;
771 }
772 builder.addSupportedLocale(loc);
773 }
774 LocaleMatcher matcher = builder.build(errorCode);
775 LocaleMatcher::Result result = matcher.getBestMatchResult(desiredLocales, errorCode);
776 if (U_FAILURE(errorCode)) { return 0; }
777 if (result.getDesiredIndex() >= 0) {
778 if (acceptResult != nullptr) {
779 *acceptResult = *result.getDesiredLocale() == *result.getSupportedLocale() ?
780 ULOC_ACCEPT_VALID : ULOC_ACCEPT_FALLBACK;
781 }
782 const char *bestStr = result.getSupportedLocale()->getName();
783 int32_t bestLength = (int32_t)uprv_strlen(bestStr);
784 if (bestLength <= capacity) {
785 uprv_memcpy(dest, bestStr, bestLength);
786 }
787 return u_terminateChars(dest, capacity, bestLength, &errorCode);
788 } else {
789 if (acceptResult != nullptr) {
790 *acceptResult = ULOC_ACCEPT_FAILED;
791 }
792 return u_terminateChars(dest, capacity, 0, &errorCode);
793 }
794 }
795
796 } // namespace
797
798 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguage(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char ** acceptList,int32_t acceptListCount,UEnumeration * availableLocales,UErrorCode * status)799 uloc_acceptLanguage(char *result, int32_t resultAvailable,
800 UAcceptResult *outResult,
801 const char **acceptList, int32_t acceptListCount,
802 UEnumeration *availableLocales,
803 UErrorCode *status) {
804 if (U_FAILURE(*status)) { return 0; }
805 if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
806 (acceptList == nullptr ? acceptListCount != 0 : acceptListCount < 0) ||
807 availableLocales == nullptr) {
808 *status = U_ILLEGAL_ARGUMENT_ERROR;
809 return 0;
810 }
811 LocaleFromTag converter;
812 Locale::ConvertingIterator<const char **, LocaleFromTag> desiredLocales(
813 acceptList, acceptList + acceptListCount, converter);
814 return acceptLanguage(*availableLocales, desiredLocales,
815 result, resultAvailable, outResult, *status);
816 }
817
818 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguageFromHTTP(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char * httpAcceptLanguage,UEnumeration * availableLocales,UErrorCode * status)819 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
820 UAcceptResult *outResult,
821 const char *httpAcceptLanguage,
822 UEnumeration *availableLocales,
823 UErrorCode *status) {
824 if (U_FAILURE(*status)) { return 0; }
825 if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
826 httpAcceptLanguage == nullptr || availableLocales == nullptr) {
827 *status = U_ILLEGAL_ARGUMENT_ERROR;
828 return 0;
829 }
830 LocalePriorityList list(httpAcceptLanguage, *status);
831 LocalePriorityList::Iterator desiredLocales = list.iterator();
832 return acceptLanguage(*availableLocales, desiredLocales,
833 result, resultAvailable, outResult, *status);
834 }
835