1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // localematcher.cpp
5 // created: 2019may08 Markus W. Scherer
6
7 #include "unicode/utypes.h"
8 #include "unicode/localebuilder.h"
9 #include "unicode/localematcher.h"
10 #include "unicode/locid.h"
11 #include "unicode/stringpiece.h"
12 #include "unicode/uloc.h"
13 #include "unicode/uobject.h"
14 #include "cstring.h"
15 #include "localeprioritylist.h"
16 #include "loclikelysubtags.h"
17 #include "locdistance.h"
18 #include "lsr.h"
19 #include "uassert.h"
20 #include "uhash.h"
21 #include "ustr_imp.h"
22 #include "uvector.h"
23
24 #define UND_LSR LSR("und", "", "", LSR::EXPLICIT_LSR)
25
26 /**
27 * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
28 *
29 * @draft ICU 65
30 */
31 enum ULocMatchLifetime {
32 /**
33 * Locale objects are temporary.
34 * The matcher will make a copy of a locale that will be used beyond one function call.
35 *
36 * @draft ICU 65
37 */
38 ULOCMATCH_TEMPORARY_LOCALES,
39 /**
40 * Locale objects are stored at least as long as the matcher is used.
41 * The matcher will keep only a pointer to a locale that will be used beyond one function call,
42 * avoiding a copy.
43 *
44 * @draft ICU 65
45 */
46 ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone?
47 };
48 #ifndef U_IN_DOXYGEN
49 typedef enum ULocMatchLifetime ULocMatchLifetime;
50 #endif
51
52 U_NAMESPACE_BEGIN
53
Result(LocaleMatcher::Result && src)54 LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) noexcept :
55 desiredLocale(src.desiredLocale),
56 supportedLocale(src.supportedLocale),
57 desiredIndex(src.desiredIndex),
58 supportedIndex(src.supportedIndex),
59 desiredIsOwned(src.desiredIsOwned) {
60 if (desiredIsOwned) {
61 src.desiredLocale = nullptr;
62 src.desiredIndex = -1;
63 src.desiredIsOwned = false;
64 }
65 }
66
~Result()67 LocaleMatcher::Result::~Result() {
68 if (desiredIsOwned) {
69 delete desiredLocale;
70 }
71 }
72
operator =(LocaleMatcher::Result && src)73 LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) noexcept {
74 this->~Result();
75
76 desiredLocale = src.desiredLocale;
77 supportedLocale = src.supportedLocale;
78 desiredIndex = src.desiredIndex;
79 supportedIndex = src.supportedIndex;
80 desiredIsOwned = src.desiredIsOwned;
81
82 if (desiredIsOwned) {
83 src.desiredLocale = nullptr;
84 src.desiredIndex = -1;
85 src.desiredIsOwned = false;
86 }
87 return *this;
88 }
89
makeResolvedLocale(UErrorCode & errorCode) const90 Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
91 if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
92 return Locale::getRoot();
93 }
94 const Locale *bestDesired = getDesiredLocale();
95 if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
96 return *supportedLocale;
97 }
98 LocaleBuilder b;
99 b.setLocale(*supportedLocale);
100
101 // Copy the region from bestDesired, if there is one.
102 const char *region = bestDesired->getCountry();
103 if (*region != 0) {
104 b.setRegion(region);
105 }
106
107 // Copy the variants from bestDesired, if there are any.
108 // Note that this will override any supportedLocale variants.
109 // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
110 const char *variants = bestDesired->getVariant();
111 if (*variants != 0) {
112 b.setVariant(variants);
113 }
114
115 // Copy the extensions from bestDesired, if there are any.
116 // C++ note: The following note, copied from Java, may not be true,
117 // as long as C++ copies by legacy ICU keyword, not by extension singleton.
118 // Note that this will override any supportedLocale extensions.
119 // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
120 // (replacing calendar).
121 b.copyExtensionsFrom(*bestDesired, errorCode);
122 return b.build(errorCode);
123 }
124
Builder(LocaleMatcher::Builder && src)125 LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) noexcept :
126 errorCode_(src.errorCode_),
127 supportedLocales_(src.supportedLocales_),
128 thresholdDistance_(src.thresholdDistance_),
129 demotion_(src.demotion_),
130 defaultLocale_(src.defaultLocale_),
131 withDefault_(src.withDefault_),
132 favor_(src.favor_),
133 direction_(src.direction_) {
134 src.supportedLocales_ = nullptr;
135 src.defaultLocale_ = nullptr;
136 }
137
~Builder()138 LocaleMatcher::Builder::~Builder() {
139 delete supportedLocales_;
140 delete defaultLocale_;
141 delete maxDistanceDesired_;
142 delete maxDistanceSupported_;
143 }
144
operator =(LocaleMatcher::Builder && src)145 LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) noexcept {
146 this->~Builder();
147
148 errorCode_ = src.errorCode_;
149 supportedLocales_ = src.supportedLocales_;
150 thresholdDistance_ = src.thresholdDistance_;
151 demotion_ = src.demotion_;
152 defaultLocale_ = src.defaultLocale_;
153 withDefault_ = src.withDefault_,
154 favor_ = src.favor_;
155 direction_ = src.direction_;
156
157 src.supportedLocales_ = nullptr;
158 src.defaultLocale_ = nullptr;
159 return *this;
160 }
161
clearSupportedLocales()162 void LocaleMatcher::Builder::clearSupportedLocales() {
163 if (supportedLocales_ != nullptr) {
164 supportedLocales_->removeAllElements();
165 }
166 }
167
ensureSupportedLocaleVector()168 bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
169 if (U_FAILURE(errorCode_)) { return false; }
170 if (supportedLocales_ != nullptr) { return true; }
171 LocalPointer<UVector> lpSupportedLocales(new UVector(uprv_deleteUObject, nullptr, errorCode_), errorCode_);
172 if (U_FAILURE(errorCode_)) { return false; }
173 supportedLocales_ = lpSupportedLocales.orphan();
174 return true;
175 }
176
setSupportedLocalesFromListString(StringPiece locales)177 LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
178 StringPiece locales) {
179 LocalePriorityList list(locales, errorCode_);
180 if (U_FAILURE(errorCode_)) { return *this; }
181 clearSupportedLocales();
182 if (!ensureSupportedLocaleVector()) { return *this; }
183 int32_t length = list.getLengthIncludingRemoved();
184 for (int32_t i = 0; i < length; ++i) {
185 Locale *locale = list.orphanLocaleAt(i);
186 if (locale == nullptr) { continue; }
187 supportedLocales_->adoptElement(locale, errorCode_);
188 if (U_FAILURE(errorCode_)) {
189 break;
190 }
191 }
192 return *this;
193 }
194
setSupportedLocales(Locale::Iterator & locales)195 LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
196 if (ensureSupportedLocaleVector()) {
197 clearSupportedLocales();
198 while (locales.hasNext() && U_SUCCESS(errorCode_)) {
199 const Locale &locale = locales.next();
200 LocalPointer<Locale> clone (locale.clone(), errorCode_);
201 supportedLocales_->adoptElement(clone.orphan(), errorCode_);
202 }
203 }
204 return *this;
205 }
206
addSupportedLocale(const Locale & locale)207 LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
208 if (ensureSupportedLocaleVector()) {
209 LocalPointer<Locale> clone(locale.clone(), errorCode_);
210 supportedLocales_->adoptElement(clone.orphan(), errorCode_);
211 }
212 return *this;
213 }
214
setNoDefaultLocale()215 LocaleMatcher::Builder &LocaleMatcher::Builder::setNoDefaultLocale() {
216 if (U_FAILURE(errorCode_)) { return *this; }
217 delete defaultLocale_;
218 defaultLocale_ = nullptr;
219 withDefault_ = false;
220 return *this;
221 }
222
setDefaultLocale(const Locale * defaultLocale)223 LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
224 if (U_FAILURE(errorCode_)) { return *this; }
225 Locale *clone = nullptr;
226 if (defaultLocale != nullptr) {
227 clone = defaultLocale->clone();
228 if (clone == nullptr) {
229 errorCode_ = U_MEMORY_ALLOCATION_ERROR;
230 return *this;
231 }
232 }
233 delete defaultLocale_;
234 defaultLocale_ = clone;
235 withDefault_ = true;
236 return *this;
237 }
238
setFavorSubtag(ULocMatchFavorSubtag subtag)239 LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
240 if (U_FAILURE(errorCode_)) { return *this; }
241 favor_ = subtag;
242 return *this;
243 }
244
setDemotionPerDesiredLocale(ULocMatchDemotion demotion)245 LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
246 if (U_FAILURE(errorCode_)) { return *this; }
247 demotion_ = demotion;
248 return *this;
249 }
250
setMaxDistance(const Locale & desired,const Locale & supported)251 LocaleMatcher::Builder &LocaleMatcher::Builder::setMaxDistance(const Locale &desired,
252 const Locale &supported) {
253 if (U_FAILURE(errorCode_)) { return *this; }
254 Locale *desiredClone = desired.clone();
255 Locale *supportedClone = supported.clone();
256 if (desiredClone == nullptr || supportedClone == nullptr) {
257 delete desiredClone; // in case only one could not be allocated
258 delete supportedClone;
259 errorCode_ = U_MEMORY_ALLOCATION_ERROR;
260 return *this;
261 }
262 delete maxDistanceDesired_;
263 delete maxDistanceSupported_;
264 maxDistanceDesired_ = desiredClone;
265 maxDistanceSupported_ = supportedClone;
266 return *this;
267 }
268
269 #if 0
270 /**
271 * <i>Internal only!</i>
272 *
273 * @param thresholdDistance the thresholdDistance to set, with -1 = default
274 * @return this Builder object
275 * @internal
276 * @deprecated This API is ICU internal only.
277 */
278 @Deprecated
279 LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
280 if (U_FAILURE(errorCode_)) { return *this; }
281 if (thresholdDistance > 100) {
282 thresholdDistance = 100;
283 }
284 thresholdDistance_ = thresholdDistance;
285 return *this;
286 }
287 #endif
288
copyErrorTo(UErrorCode & outErrorCode) const289 UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
290 if (U_FAILURE(outErrorCode)) { return true; }
291 if (U_SUCCESS(errorCode_)) { return false; }
292 outErrorCode = errorCode_;
293 return true;
294 }
295
build(UErrorCode & errorCode) const296 LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
297 if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
298 errorCode = errorCode_;
299 }
300 return LocaleMatcher(*this, errorCode);
301 }
302
303 namespace {
304
getMaximalLsrOrUnd(const XLikelySubtags & likelySubtags,const Locale & locale,UErrorCode & errorCode)305 LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale,
306 UErrorCode &errorCode) {
307 if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
308 return UND_LSR;
309 } else {
310 return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
311 }
312 }
313
hashLSR(const UHashTok token)314 int32_t hashLSR(const UHashTok token) {
315 const LSR *lsr = static_cast<const LSR *>(token.pointer);
316 return lsr->hashCode;
317 }
318
compareLSRs(const UHashTok t1,const UHashTok t2)319 UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
320 const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
321 const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
322 return *lsr1 == *lsr2;
323 }
324
325 } // namespace
326
putIfAbsent(const LSR & lsr,int32_t i,int32_t suppLength,UErrorCode & errorCode)327 int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
328 UErrorCode &errorCode) {
329 if (U_FAILURE(errorCode)) { return suppLength; }
330 if (!uhash_containsKey(supportedLsrToIndex, &lsr)) {
331 uhash_putiAllowZero(supportedLsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
332 if (U_SUCCESS(errorCode)) {
333 supportedLSRs[suppLength] = &lsr;
334 supportedIndexes[suppLength++] = i;
335 }
336 }
337 return suppLength;
338 }
339
LocaleMatcher(const Builder & builder,UErrorCode & errorCode)340 LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
341 likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
342 localeDistance(*LocaleDistance::getSingleton(errorCode)),
343 thresholdDistance(builder.thresholdDistance_),
344 demotionPerDesiredLocale(0),
345 favorSubtag(builder.favor_),
346 direction(builder.direction_),
347 supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
348 supportedLsrToIndex(nullptr),
349 supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
350 ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
351 if (U_FAILURE(errorCode)) { return; }
352 const Locale *def = builder.defaultLocale_;
353 LSR builderDefaultLSR;
354 const LSR *defLSR = nullptr;
355 if (def != nullptr) {
356 ownedDefaultLocale = def->clone();
357 if (ownedDefaultLocale == nullptr) {
358 errorCode = U_MEMORY_ALLOCATION_ERROR;
359 return;
360 }
361 def = ownedDefaultLocale;
362 builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
363 if (U_FAILURE(errorCode)) { return; }
364 defLSR = &builderDefaultLSR;
365 }
366 supportedLocalesLength = builder.supportedLocales_ != nullptr ?
367 builder.supportedLocales_->size() : 0;
368 if (supportedLocalesLength > 0) {
369 // Store the supported locales in input order,
370 // so that when different types are used (e.g., language tag strings)
371 // we can return those by parallel index.
372 supportedLocales = static_cast<const Locale **>(
373 uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
374 // Supported LRSs in input order.
375 // In C++, we store these permanently to simplify ownership management
376 // in the hash tables. Duplicate LSRs (if any) are unused overhead.
377 lsrs = new LSR[supportedLocalesLength];
378 if (supportedLocales == nullptr || lsrs == nullptr) {
379 errorCode = U_MEMORY_ALLOCATION_ERROR;
380 return;
381 }
382 // If the constructor fails partway, we need null pointers for destructibility.
383 uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
384 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
385 const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
386 supportedLocales[i] = locale.clone();
387 if (supportedLocales[i] == nullptr) {
388 errorCode = U_MEMORY_ALLOCATION_ERROR;
389 return;
390 }
391 const Locale &supportedLocale = *supportedLocales[i];
392 LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
393 lsr.setHashCode();
394 if (U_FAILURE(errorCode)) { return; }
395 }
396
397 // We need an unordered map from LSR to first supported locale with that LSR,
398 // and an ordered list of (LSR, supported index) for
399 // the supported locales in the following order:
400 // 1. Default locale, if it is supported.
401 // 2. Priority locales (aka "paradigm locales") in builder order.
402 // 3. Remaining locales in builder order.
403 supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
404 supportedLocalesLength, &errorCode);
405 if (U_FAILURE(errorCode)) { return; }
406 supportedLSRs = static_cast<const LSR **>(
407 uprv_malloc(supportedLocalesLength * sizeof(const LSR *)));
408 supportedIndexes = static_cast<int32_t *>(
409 uprv_malloc(supportedLocalesLength * sizeof(int32_t)));
410 if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
411 errorCode = U_MEMORY_ALLOCATION_ERROR;
412 return;
413 }
414 int32_t suppLength = 0;
415 // Determine insertion order.
416 // Add locales immediately that are equivalent to the default.
417 MaybeStackArray<int8_t, 100> order(supportedLocalesLength, errorCode);
418 if (U_FAILURE(errorCode)) { return; }
419 int32_t numParadigms = 0;
420 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
421 const Locale &locale = *supportedLocales[i];
422 const LSR &lsr = lsrs[i];
423 if (defLSR == nullptr && builder.withDefault_) {
424 // Implicit default locale = first supported locale, if not turned off.
425 U_ASSERT(i == 0);
426 def = &locale;
427 defLSR = &lsr;
428 order[i] = 1;
429 suppLength = putIfAbsent(lsr, 0, suppLength, errorCode);
430 } else if (defLSR != nullptr && lsr.isEquivalentTo(*defLSR)) {
431 order[i] = 1;
432 suppLength = putIfAbsent(lsr, i, suppLength, errorCode);
433 } else if (localeDistance.isParadigmLSR(lsr)) {
434 order[i] = 2;
435 ++numParadigms;
436 } else {
437 order[i] = 3;
438 }
439 if (U_FAILURE(errorCode)) { return; }
440 }
441 // Add supported paradigm locales.
442 int32_t paradigmLimit = suppLength + numParadigms;
443 for (int32_t i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) {
444 if (order[i] == 2) {
445 suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
446 }
447 }
448 // Add remaining supported locales.
449 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
450 if (order[i] == 3) {
451 suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
452 }
453 }
454 supportedLSRsLength = suppLength;
455 // If supportedLSRsLength < supportedLocalesLength then
456 // we waste as many array slots as there are duplicate supported LSRs,
457 // but the amount of wasted space is small as long as there are few duplicates.
458 }
459
460 defaultLocale = def;
461
462 if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
463 demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
464 }
465
466 if (thresholdDistance >= 0) {
467 // already copied
468 } else if (builder.maxDistanceDesired_ != nullptr) {
469 LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceSupported_, errorCode);
470 const LSR *pSuppLSR = &suppLSR;
471 int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
472 getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceDesired_, errorCode),
473 &pSuppLSR, 1,
474 LocaleDistance::shiftDistance(100), favorSubtag, direction);
475 if (U_SUCCESS(errorCode)) {
476 // +1 for an exclusive threshold from an inclusive max.
477 thresholdDistance = LocaleDistance::getDistanceFloor(indexAndDistance) + 1;
478 } else {
479 thresholdDistance = 0;
480 }
481 } else {
482 thresholdDistance = localeDistance.getDefaultScriptDistance();
483 }
484 }
485
LocaleMatcher(LocaleMatcher && src)486 LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) noexcept :
487 likelySubtags(src.likelySubtags),
488 localeDistance(src.localeDistance),
489 thresholdDistance(src.thresholdDistance),
490 demotionPerDesiredLocale(src.demotionPerDesiredLocale),
491 favorSubtag(src.favorSubtag),
492 direction(src.direction),
493 supportedLocales(src.supportedLocales), lsrs(src.lsrs),
494 supportedLocalesLength(src.supportedLocalesLength),
495 supportedLsrToIndex(src.supportedLsrToIndex),
496 supportedLSRs(src.supportedLSRs),
497 supportedIndexes(src.supportedIndexes),
498 supportedLSRsLength(src.supportedLSRsLength),
499 ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale) {
500 src.supportedLocales = nullptr;
501 src.lsrs = nullptr;
502 src.supportedLocalesLength = 0;
503 src.supportedLsrToIndex = nullptr;
504 src.supportedLSRs = nullptr;
505 src.supportedIndexes = nullptr;
506 src.supportedLSRsLength = 0;
507 src.ownedDefaultLocale = nullptr;
508 src.defaultLocale = nullptr;
509 }
510
~LocaleMatcher()511 LocaleMatcher::~LocaleMatcher() {
512 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
513 delete supportedLocales[i];
514 }
515 uprv_free(supportedLocales);
516 delete[] lsrs;
517 uhash_close(supportedLsrToIndex);
518 uprv_free(supportedLSRs);
519 uprv_free(supportedIndexes);
520 delete ownedDefaultLocale;
521 }
522
operator =(LocaleMatcher && src)523 LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) noexcept {
524 this->~LocaleMatcher();
525
526 thresholdDistance = src.thresholdDistance;
527 demotionPerDesiredLocale = src.demotionPerDesiredLocale;
528 favorSubtag = src.favorSubtag;
529 direction = src.direction;
530 supportedLocales = src.supportedLocales;
531 lsrs = src.lsrs;
532 supportedLocalesLength = src.supportedLocalesLength;
533 supportedLsrToIndex = src.supportedLsrToIndex;
534 supportedLSRs = src.supportedLSRs;
535 supportedIndexes = src.supportedIndexes;
536 supportedLSRsLength = src.supportedLSRsLength;
537 ownedDefaultLocale = src.ownedDefaultLocale;
538 defaultLocale = src.defaultLocale;
539
540 src.supportedLocales = nullptr;
541 src.lsrs = nullptr;
542 src.supportedLocalesLength = 0;
543 src.supportedLsrToIndex = nullptr;
544 src.supportedLSRs = nullptr;
545 src.supportedIndexes = nullptr;
546 src.supportedLSRsLength = 0;
547 src.ownedDefaultLocale = nullptr;
548 src.defaultLocale = nullptr;
549 return *this;
550 }
551
552 class LocaleLsrIterator {
553 public:
LocaleLsrIterator(const XLikelySubtags & likelySubtags,Locale::Iterator & locales,ULocMatchLifetime lifetime)554 LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales,
555 ULocMatchLifetime lifetime) :
556 likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
557
~LocaleLsrIterator()558 ~LocaleLsrIterator() {
559 if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
560 delete remembered;
561 }
562 }
563
hasNext() const564 bool hasNext() const {
565 return locales.hasNext();
566 }
567
next(UErrorCode & errorCode)568 LSR next(UErrorCode &errorCode) {
569 current = &locales.next();
570 return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
571 }
572
rememberCurrent(int32_t desiredIndex,UErrorCode & errorCode)573 void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
574 if (U_FAILURE(errorCode)) { return; }
575 bestDesiredIndex = desiredIndex;
576 if (lifetime == ULOCMATCH_STORED_LOCALES) {
577 remembered = current;
578 } else {
579 // ULOCMATCH_TEMPORARY_LOCALES
580 delete remembered;
581 remembered = new Locale(*current);
582 if (remembered == nullptr) {
583 errorCode = U_MEMORY_ALLOCATION_ERROR;
584 }
585 }
586 }
587
orphanRemembered()588 const Locale *orphanRemembered() {
589 const Locale *rem = remembered;
590 remembered = nullptr;
591 return rem;
592 }
593
getBestDesiredIndex() const594 int32_t getBestDesiredIndex() const {
595 return bestDesiredIndex;
596 }
597
598 private:
599 const XLikelySubtags &likelySubtags;
600 Locale::Iterator &locales;
601 ULocMatchLifetime lifetime;
602 const Locale *current = nullptr, *remembered = nullptr;
603 int32_t bestDesiredIndex = -1;
604 };
605
getBestMatch(const Locale & desiredLocale,UErrorCode & errorCode) const606 const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
607 if (U_FAILURE(errorCode)) { return nullptr; }
608 int32_t suppIndex = getBestSuppIndex(
609 getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
610 nullptr, errorCode);
611 return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
612 }
613
getBestMatch(Locale::Iterator & desiredLocales,UErrorCode & errorCode) const614 const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
615 UErrorCode &errorCode) const {
616 if (U_FAILURE(errorCode)) { return nullptr; }
617 if (!desiredLocales.hasNext()) {
618 return defaultLocale;
619 }
620 LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
621 int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
622 return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
623 }
624
getBestMatchForListString(StringPiece desiredLocaleList,UErrorCode & errorCode) const625 const Locale *LocaleMatcher::getBestMatchForListString(
626 StringPiece desiredLocaleList, UErrorCode &errorCode) const {
627 LocalePriorityList list(desiredLocaleList, errorCode);
628 LocalePriorityList::Iterator iter = list.iterator();
629 return getBestMatch(iter, errorCode);
630 }
631
getBestMatchResult(const Locale & desiredLocale,UErrorCode & errorCode) const632 LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
633 const Locale &desiredLocale, UErrorCode &errorCode) const {
634 if (U_FAILURE(errorCode)) {
635 return Result(nullptr, defaultLocale, -1, -1, false);
636 }
637 int32_t suppIndex = getBestSuppIndex(
638 getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
639 nullptr, errorCode);
640 if (U_FAILURE(errorCode) || suppIndex < 0) {
641 return Result(nullptr, defaultLocale, -1, -1, false);
642 } else {
643 return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, false);
644 }
645 }
646
getBestMatchResult(Locale::Iterator & desiredLocales,UErrorCode & errorCode) const647 LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
648 Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
649 if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
650 return Result(nullptr, defaultLocale, -1, -1, false);
651 }
652 LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
653 int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
654 if (U_FAILURE(errorCode) || suppIndex < 0) {
655 return Result(nullptr, defaultLocale, -1, -1, false);
656 } else {
657 return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
658 lsrIter.getBestDesiredIndex(), suppIndex, true);
659 }
660 }
661
getBestSuppIndex(LSR desiredLSR,LocaleLsrIterator * remainingIter,UErrorCode & errorCode) const662 int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter,
663 UErrorCode &errorCode) const {
664 if (U_FAILURE(errorCode)) { return -1; }
665 int32_t desiredIndex = 0;
666 int32_t bestSupportedLsrIndex = -1;
667 for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
668 // Quick check for exact maximized LSR.
669 if (supportedLsrToIndex != nullptr) {
670 desiredLSR.setHashCode();
671 UBool found = false;
672 int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found);
673 if (found) {
674 if (remainingIter != nullptr) {
675 remainingIter->rememberCurrent(desiredIndex, errorCode);
676 }
677 return suppIndex;
678 }
679 }
680 int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
681 desiredLSR, supportedLSRs, supportedLSRsLength,
682 bestShiftedDistance, favorSubtag, direction);
683 if (bestIndexAndDistance >= 0) {
684 bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance);
685 if (remainingIter != nullptr) {
686 remainingIter->rememberCurrent(desiredIndex, errorCode);
687 if (U_FAILURE(errorCode)) { return -1; }
688 }
689 bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance);
690 }
691 if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) {
692 break;
693 }
694 if (remainingIter == nullptr || !remainingIter->hasNext()) {
695 break;
696 }
697 desiredLSR = remainingIter->next(errorCode);
698 if (U_FAILURE(errorCode)) { return -1; }
699 ++desiredIndex;
700 }
701 if (bestSupportedLsrIndex < 0) {
702 // no good match
703 return -1;
704 }
705 return supportedIndexes[bestSupportedLsrIndex];
706 }
707
isMatch(const Locale & desired,const Locale & supported,UErrorCode & errorCode) const708 UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
709 UErrorCode &errorCode) const {
710 LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
711 if (U_FAILURE(errorCode)) { return 0; }
712 const LSR *pSuppLSR = &suppLSR;
713 int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
714 getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
715 &pSuppLSR, 1,
716 LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
717 return indexAndDistance >= 0;
718 }
719
internalMatch(const Locale & desired,const Locale & supported,UErrorCode & errorCode) const720 double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
721 // Returns the inverse of the distance: That is, 1-distance(desired, supported).
722 LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
723 if (U_FAILURE(errorCode)) { return 0; }
724 const LSR *pSuppLSR = &suppLSR;
725 int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
726 getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
727 &pSuppLSR, 1,
728 LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
729 double distance = LocaleDistance::getDistanceDouble(indexAndDistance);
730 return (100.0 - distance) / 100.0;
731 }
732
733 U_NAMESPACE_END
734
735 // uloc_acceptLanguage() --------------------------------------------------- ***
736
737 U_NAMESPACE_USE
738
739 namespace {
740
741 class LocaleFromTag {
742 public:
LocaleFromTag()743 LocaleFromTag() : locale(Locale::getRoot()) {}
operator ()(const char * tag)744 const Locale &operator()(const char *tag) { return locale = Locale(tag); }
745
746 private:
747 // Store the locale in the converter, rather than return a reference to a temporary,
748 // or a value which could go out of scope with the caller's reference to it.
749 Locale locale;
750 };
751
acceptLanguage(UEnumeration & supportedLocales,Locale::Iterator & desiredLocales,char * dest,int32_t capacity,UAcceptResult * acceptResult,UErrorCode & errorCode)752 int32_t acceptLanguage(UEnumeration &supportedLocales, Locale::Iterator &desiredLocales,
753 char *dest, int32_t capacity, UAcceptResult *acceptResult,
754 UErrorCode &errorCode) {
755 if (U_FAILURE(errorCode)) { return 0; }
756 LocaleMatcher::Builder builder;
757 const char *locString;
758 while ((locString = uenum_next(&supportedLocales, nullptr, &errorCode)) != nullptr) {
759 Locale loc(locString);
760 if (loc.isBogus()) {
761 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
762 return 0;
763 }
764 builder.addSupportedLocale(loc);
765 }
766 LocaleMatcher matcher = builder.build(errorCode);
767 LocaleMatcher::Result result = matcher.getBestMatchResult(desiredLocales, errorCode);
768 if (U_FAILURE(errorCode)) { return 0; }
769 if (result.getDesiredIndex() >= 0) {
770 if (acceptResult != nullptr) {
771 *acceptResult = *result.getDesiredLocale() == *result.getSupportedLocale() ?
772 ULOC_ACCEPT_VALID : ULOC_ACCEPT_FALLBACK;
773 }
774 const char *bestStr = result.getSupportedLocale()->getName();
775 int32_t bestLength = (int32_t)uprv_strlen(bestStr);
776 if (bestLength <= capacity) {
777 uprv_memcpy(dest, bestStr, bestLength);
778 }
779 return u_terminateChars(dest, capacity, bestLength, &errorCode);
780 } else {
781 if (acceptResult != nullptr) {
782 *acceptResult = ULOC_ACCEPT_FAILED;
783 }
784 return u_terminateChars(dest, capacity, 0, &errorCode);
785 }
786 }
787
788 } // namespace
789
790 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguage(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char ** acceptList,int32_t acceptListCount,UEnumeration * availableLocales,UErrorCode * status)791 uloc_acceptLanguage(char *result, int32_t resultAvailable,
792 UAcceptResult *outResult,
793 const char **acceptList, int32_t acceptListCount,
794 UEnumeration *availableLocales,
795 UErrorCode *status) {
796 if (U_FAILURE(*status)) { return 0; }
797 if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
798 (acceptList == nullptr ? acceptListCount != 0 : acceptListCount < 0) ||
799 availableLocales == nullptr) {
800 *status = U_ILLEGAL_ARGUMENT_ERROR;
801 return 0;
802 }
803 LocaleFromTag converter;
804 Locale::ConvertingIterator<const char **, LocaleFromTag> desiredLocales(
805 acceptList, acceptList + acceptListCount, converter);
806 return acceptLanguage(*availableLocales, desiredLocales,
807 result, resultAvailable, outResult, *status);
808 }
809
810 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguageFromHTTP(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char * httpAcceptLanguage,UEnumeration * availableLocales,UErrorCode * status)811 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
812 UAcceptResult *outResult,
813 const char *httpAcceptLanguage,
814 UEnumeration *availableLocales,
815 UErrorCode *status) {
816 if (U_FAILURE(*status)) { return 0; }
817 if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
818 httpAcceptLanguage == nullptr || availableLocales == nullptr) {
819 *status = U_ILLEGAL_ARGUMENT_ERROR;
820 return 0;
821 }
822 LocalePriorityList list(httpAcceptLanguage, *status);
823 LocalePriorityList::Iterator desiredLocales = list.iterator();
824 return acceptLanguage(*availableLocales, desiredLocales,
825 result, resultAvailable, outResult, *status);
826 }
827