1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 4 // localematcher.h 5 // created: 2019may08 Markus W. Scherer 6 7 #ifndef __LOCALEMATCHER_H__ 8 #define __LOCALEMATCHER_H__ 9 10 #include "unicode/utypes.h" 11 12 #if U_SHOW_CPLUSPLUS_API 13 14 #include "unicode/locid.h" 15 #include "unicode/stringpiece.h" 16 #include "unicode/uobject.h" 17 18 /** 19 * \file 20 * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales. 21 */ 22 23 #ifndef U_HIDE_DRAFT_API 24 25 /** 26 * Builder option for whether the language subtag or the script subtag is most important. 27 * 28 * @see Builder#setFavorSubtag(FavorSubtag) 29 * @draft ICU 65 30 */ 31 enum ULocMatchFavorSubtag { 32 /** 33 * Language differences are most important, then script differences, then region differences. 34 * (This is the default behavior.) 35 * 36 * @draft ICU 65 37 */ 38 ULOCMATCH_FAVOR_LANGUAGE, 39 /** 40 * Makes script differences matter relatively more than language differences. 41 * 42 * @draft ICU 65 43 */ 44 ULOCMATCH_FAVOR_SCRIPT 45 }; 46 #ifndef U_IN_DOXYGEN 47 typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag; 48 #endif 49 50 /** 51 * Builder option for whether all desired locales are treated equally or 52 * earlier ones are preferred. 53 * 54 * @see Builder#setDemotionPerDesiredLocale(Demotion) 55 * @draft ICU 65 56 */ 57 enum ULocMatchDemotion { 58 /** 59 * All desired locales are treated equally. 60 * 61 * @draft ICU 65 62 */ 63 ULOCMATCH_DEMOTION_NONE, 64 /** 65 * Earlier desired locales are preferred. 66 * 67 * <p>From each desired locale to the next, 68 * the distance to any supported locale is increased by an additional amount 69 * which is at least as large as most region mismatches. 70 * A later desired locale has to have a better match with some supported locale 71 * due to more than merely having the same region subtag. 72 * 73 * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code> 74 * yields <code>Result(en-GB, en)</code> because 75 * with the demotion of sv its perfect match is no better than 76 * the region distance between the earlier desired locale en-GB and en=en-US. 77 * 78 * <p>Notes: 79 * <ul> 80 * <li>In some cases, language and/or script differences can be as small as 81 * the typical region difference. (Example: sr-Latn vs. sr-Cyrl) 82 * <li>It is possible for certain region differences to be larger than usual, 83 * and larger than the demotion. 84 * (As of CLDR 35 there is no such case, but 85 * this is possible in future versions of the data.) 86 * </ul> 87 * 88 * @draft ICU 65 89 */ 90 ULOCMATCH_DEMOTION_REGION 91 }; 92 #ifndef U_IN_DOXYGEN 93 typedef enum ULocMatchDemotion ULocMatchDemotion; 94 #endif 95 96 struct UHashtable; 97 98 U_NAMESPACE_BEGIN 99 100 struct LSR; 101 102 class LocaleDistance; 103 class LocaleLsrIterator; 104 class UVector; 105 class XLikelySubtags; 106 107 /** 108 * Immutable class that picks the best match between a user's desired locales and 109 * an application's supported locales. 110 * Movable but not copyable. 111 * 112 * <p>Example: 113 * <pre> 114 * UErrorCode errorCode = U_ZERO_ERROR; 115 * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode); 116 * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en" 117 * </pre> 118 * 119 * <p>A matcher takes into account when languages are close to one another, 120 * such as Danish and Norwegian, 121 * and when regional variants are close, like en-GB and en-AU as opposed to en-US. 122 * 123 * <p>If there are multiple supported locales with the same (language, script, region) 124 * likely subtags, then the current implementation returns the first of those locales. 125 * It ignores variant subtags (except for pseudolocale variants) and extensions. 126 * This may change in future versions. 127 * 128 * <p>For example, the current implementation does not distinguish between 129 * de, de-DE, de-Latn, de-1901, de-u-co-phonebk. 130 * 131 * <p>If you prefer one equivalent locale over another, then provide only the preferred one, 132 * or place it earlier in the list of supported locales. 133 * 134 * <p>Otherwise, the order of supported locales may have no effect on the best-match results. 135 * The current implementation compares each desired locale with supported locales 136 * in the following order: 137 * 1. Default locale, if supported; 138 * 2. CLDR "paradigm locales" like en-GB and es-419; 139 * 3. other supported locales. 140 * This may change in future versions. 141 * 142 * <p>Often a product will just need one matcher instance, built with the languages 143 * that it supports. However, it may want multiple instances with different 144 * default languages based on additional information, such as the domain. 145 * 146 * <p>This class is not intended for public subclassing. 147 * 148 * @draft ICU 65 149 */ 150 class U_COMMON_API LocaleMatcher : public UMemory { 151 public: 152 /** 153 * Data for the best-matching pair of a desired and a supported locale. 154 * Movable but not copyable. 155 * 156 * @draft ICU 65 157 */ 158 class U_COMMON_API Result : public UMemory { 159 public: 160 /** 161 * Move constructor; might modify the source. 162 * This object will have the same contents that the source object had. 163 * 164 * @param src Result to move contents from. 165 * @draft ICU 65 166 */ 167 Result(Result &&src) U_NOEXCEPT; 168 169 /** 170 * Destructor. 171 * 172 * @draft ICU 65 173 */ 174 ~Result(); 175 176 /** 177 * Move assignment; might modify the source. 178 * This object will have the same contents that the source object had. 179 * 180 * @param src Result to move contents from. 181 * @draft ICU 65 182 */ 183 Result &operator=(Result &&src) U_NOEXCEPT; 184 185 /** 186 * Returns the best-matching desired locale. 187 * nullptr if the list of desired locales is empty or if none matched well enough. 188 * 189 * @return the best-matching desired locale, or nullptr. 190 * @draft ICU 65 191 */ getDesiredLocale()192 inline const Locale *getDesiredLocale() const { return desiredLocale; } 193 194 /** 195 * Returns the best-matching supported locale. 196 * If none matched well enough, this is the default locale. 197 * The default locale is nullptr if the list of supported locales is empty and 198 * no explicit default locale is set. 199 * 200 * @return the best-matching supported locale, or nullptr. 201 * @draft ICU 65 202 */ getSupportedLocale()203 inline const Locale *getSupportedLocale() const { return supportedLocale; } 204 205 /** 206 * Returns the index of the best-matching desired locale in the input Iterable order. 207 * -1 if the list of desired locales is empty or if none matched well enough. 208 * 209 * @return the index of the best-matching desired locale, or -1. 210 * @draft ICU 65 211 */ getDesiredIndex()212 inline int32_t getDesiredIndex() const { return desiredIndex; } 213 214 /** 215 * Returns the index of the best-matching supported locale in the 216 * constructor’s or builder’s input order (“set” Collection plus “added” locales). 217 * If the matcher was built from a locale list string, then the iteration order is that 218 * of a LocalePriorityList built from the same string. 219 * -1 if the list of supported locales is empty or if none matched well enough. 220 * 221 * @return the index of the best-matching supported locale, or -1. 222 * @draft ICU 65 223 */ getSupportedIndex()224 inline int32_t getSupportedIndex() const { return supportedIndex; } 225 226 /** 227 * Takes the best-matching supported locale and adds relevant fields of the 228 * best-matching desired locale, such as the -t- and -u- extensions. 229 * May replace some fields of the supported locale. 230 * The result is the locale that should be used for date and number formatting, collation, etc. 231 * Returns the root locale if getSupportedLocale() returns nullptr. 232 * 233 * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn 234 * 235 * @return a locale combining the best-matching desired and supported locales. 236 * @draft ICU 65 237 */ 238 Locale makeResolvedLocale(UErrorCode &errorCode) const; 239 240 private: Result(const Locale * desired,const Locale * supported,int32_t desIndex,int32_t suppIndex,UBool owned)241 Result(const Locale *desired, const Locale *supported, 242 int32_t desIndex, int32_t suppIndex, UBool owned) : 243 desiredLocale(desired), supportedLocale(supported), 244 desiredIndex(desIndex), supportedIndex(suppIndex), 245 desiredIsOwned(owned) {} 246 247 Result(const Result &other) = delete; 248 Result &operator=(const Result &other) = delete; 249 250 const Locale *desiredLocale; 251 const Locale *supportedLocale; 252 int32_t desiredIndex; 253 int32_t supportedIndex; 254 UBool desiredIsOwned; 255 256 friend class LocaleMatcher; 257 }; 258 259 /** 260 * LocaleMatcher builder. 261 * Movable but not copyable. 262 * 263 * @see LocaleMatcher#builder() 264 * @draft ICU 65 265 */ 266 class U_COMMON_API Builder : public UMemory { 267 public: 268 /** 269 * Constructs a builder used in chaining parameters for building a LocaleMatcher. 270 * 271 * @return a new Builder object 272 * @draft ICU 65 273 */ Builder()274 Builder() {} 275 276 /** 277 * Move constructor; might modify the source. 278 * This builder will have the same contents that the source builder had. 279 * 280 * @param src Builder to move contents from. 281 * @draft ICU 65 282 */ 283 Builder(Builder &&src) U_NOEXCEPT; 284 285 /** 286 * Destructor. 287 * 288 * @draft ICU 65 289 */ 290 ~Builder(); 291 292 /** 293 * Move assignment; might modify the source. 294 * This builder will have the same contents that the source builder had. 295 * 296 * @param src Builder to move contents from. 297 * @draft ICU 65 298 */ 299 Builder &operator=(Builder &&src) U_NOEXCEPT; 300 301 /** 302 * Parses an Accept-Language string 303 * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), 304 * such as "af, en, fr;q=0.9", and sets the supported locales accordingly. 305 * Allows whitespace in more places but does not allow "*". 306 * Clears any previously set/added supported locales first. 307 * 308 * @param locales the Accept-Language string of locales to set 309 * @return this Builder object 310 * @draft ICU 65 311 */ 312 Builder &setSupportedLocalesFromListString(StringPiece locales); 313 314 /** 315 * Copies the supported locales, preserving iteration order. 316 * Clears any previously set/added supported locales first. 317 * Duplicates are allowed, and are not removed. 318 * 319 * @param locales the list of locale 320 * @return this Builder object 321 * @draft ICU 65 322 */ 323 Builder &setSupportedLocales(Locale::Iterator &locales); 324 325 /** 326 * Copies the supported locales from the begin/end range, preserving iteration order. 327 * Clears any previously set/added supported locales first. 328 * Duplicates are allowed, and are not removed. 329 * 330 * Each of the iterator parameter values must be an 331 * input iterator whose value is convertible to const Locale &. 332 * 333 * @param begin Start of range. 334 * @param end Exclusive end of range. 335 * @return this Builder object 336 * @draft ICU 65 337 */ 338 template<typename Iter> setSupportedLocales(Iter begin,Iter end)339 Builder &setSupportedLocales(Iter begin, Iter end) { 340 if (U_FAILURE(errorCode_)) { return *this; } 341 clearSupportedLocales(); 342 while (begin != end) { 343 addSupportedLocale(*begin++); 344 } 345 return *this; 346 } 347 348 /** 349 * Copies the supported locales from the begin/end range, preserving iteration order. 350 * Calls the converter to convert each *begin to a Locale or const Locale &. 351 * Clears any previously set/added supported locales first. 352 * Duplicates are allowed, and are not removed. 353 * 354 * Each of the iterator parameter values must be an 355 * input iterator whose value is convertible to const Locale &. 356 * 357 * @param begin Start of range. 358 * @param end Exclusive end of range. 359 * @param converter Converter from *begin to const Locale & or compatible. 360 * @return this Builder object 361 * @draft ICU 65 362 */ 363 template<typename Iter, typename Conv> setSupportedLocalesViaConverter(Iter begin,Iter end,Conv converter)364 Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) { 365 if (U_FAILURE(errorCode_)) { return *this; } 366 clearSupportedLocales(); 367 while (begin != end) { 368 addSupportedLocale(converter(*begin++)); 369 } 370 return *this; 371 } 372 373 /** 374 * Adds another supported locale. 375 * Duplicates are allowed, and are not removed. 376 * 377 * @param locale another locale 378 * @return this Builder object 379 * @draft ICU 65 380 */ 381 Builder &addSupportedLocale(const Locale &locale); 382 383 /** 384 * Sets the default locale; if nullptr, or if it is not set explicitly, 385 * then the first supported locale is used as the default locale. 386 * 387 * @param defaultLocale the default locale (will be copied) 388 * @return this Builder object 389 * @draft ICU 65 390 */ 391 Builder &setDefaultLocale(const Locale *defaultLocale); 392 393 /** 394 * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script 395 * differences. 396 * This is used in situations (such as maps) where 397 * it is better to fall back to the same script than a similar language. 398 * 399 * @param subtag the subtag to favor 400 * @return this Builder object 401 * @draft ICU 65 402 */ 403 Builder &setFavorSubtag(ULocMatchFavorSubtag subtag); 404 405 /** 406 * Option for whether all desired locales are treated equally or 407 * earlier ones are preferred (this is the default). 408 * 409 * @param demotion the demotion per desired locale to set. 410 * @return this Builder object 411 * @draft ICU 65 412 */ 413 Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion); 414 415 /** 416 * Sets the UErrorCode if an error occurred while setting parameters. 417 * Preserves older error codes in the outErrorCode. 418 * 419 * @param outErrorCode Set to an error code if it does not contain one already 420 * and an error occurred while setting parameters. 421 * Otherwise unchanged. 422 * @return TRUE if U_FAILURE(outErrorCode) 423 * @draft ICU 65 424 */ 425 UBool copyErrorTo(UErrorCode &outErrorCode) const; 426 427 /** 428 * Builds and returns a new locale matcher. 429 * This builder can continue to be used. 430 * 431 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 432 * or else the function returns immediately. Check for U_FAILURE() 433 * on output or use with function chaining. (See User Guide for details.) 434 * @return new LocaleMatcher. 435 * @draft ICU 65 436 */ 437 LocaleMatcher build(UErrorCode &errorCode) const; 438 439 private: 440 friend class LocaleMatcher; 441 442 Builder(const Builder &other) = delete; 443 Builder &operator=(const Builder &other) = delete; 444 445 void clearSupportedLocales(); 446 bool ensureSupportedLocaleVector(); 447 448 UErrorCode errorCode_ = U_ZERO_ERROR; 449 UVector *supportedLocales_ = nullptr; 450 int32_t thresholdDistance_ = -1; 451 ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION; 452 Locale *defaultLocale_ = nullptr; 453 ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE; 454 }; 455 456 // FYI No public LocaleMatcher constructors in C++; use the Builder. 457 458 /** 459 * Move copy constructor; might modify the source. 460 * This matcher will have the same settings that the source matcher had. 461 * @param src source matcher 462 * @draft ICU 65 463 */ 464 LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT; 465 466 /** 467 * Destructor. 468 * @draft ICU 65 469 */ 470 ~LocaleMatcher(); 471 472 /** 473 * Move assignment operator; might modify the source. 474 * This matcher will have the same settings that the source matcher had. 475 * The behavior is undefined if *this and src are the same object. 476 * @param src source matcher 477 * @return *this 478 * @draft ICU 65 479 */ 480 LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT; 481 482 /** 483 * Returns the supported locale which best matches the desired locale. 484 * 485 * @param desiredLocale Typically a user's language. 486 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 487 * or else the function returns immediately. Check for U_FAILURE() 488 * on output or use with function chaining. (See User Guide for details.) 489 * @return the best-matching supported locale. 490 * @draft ICU 65 491 */ 492 const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const; 493 494 /** 495 * Returns the supported locale which best matches one of the desired locales. 496 * 497 * @param desiredLocales Typically a user's languages, in order of preference (descending). 498 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 499 * or else the function returns immediately. Check for U_FAILURE() 500 * on output or use with function chaining. (See User Guide for details.) 501 * @return the best-matching supported locale. 502 * @draft ICU 65 503 */ 504 const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const; 505 506 /** 507 * Parses an Accept-Language string 508 * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), 509 * such as "af, en, fr;q=0.9", 510 * and returns the supported locale which best matches one of the desired locales. 511 * Allows whitespace in more places but does not allow "*". 512 * 513 * @param desiredLocaleList Typically a user's languages, as an Accept-Language string. 514 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 515 * or else the function returns immediately. Check for U_FAILURE() 516 * on output or use with function chaining. (See User Guide for details.) 517 * @return the best-matching supported locale. 518 * @draft ICU 65 519 */ 520 const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const; 521 522 /** 523 * Returns the best match between the desired locale and the supported locales. 524 * If the result's desired locale is not nullptr, then it is the address of the input locale. 525 * It has not been cloned. 526 * 527 * @param desiredLocale Typically a user's language. 528 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 529 * or else the function returns immediately. Check for U_FAILURE() 530 * on output or use with function chaining. (See User Guide for details.) 531 * @return the best-matching pair of the desired and a supported locale. 532 * @draft ICU 65 533 */ 534 Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const; 535 536 /** 537 * Returns the best match between the desired and supported locales. 538 * If the result's desired locale is not nullptr, then it is a clone of 539 * the best-matching desired locale. The Result object owns the clone. 540 * 541 * @param desiredLocales Typically a user's languages, in order of preference (descending). 542 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 543 * or else the function returns immediately. Check for U_FAILURE() 544 * on output or use with function chaining. (See User Guide for details.) 545 * @return the best-matching pair of a desired and a supported locale. 546 * @draft ICU 65 547 */ 548 Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const; 549 550 #ifndef U_HIDE_INTERNAL_API 551 /** 552 * Returns a fraction between 0 and 1, where 1 means that the languages are a 553 * perfect match, and 0 means that they are completely different. 554 * 555 * <p>This is mostly an implementation detail, and the precise values may change over time. 556 * The implementation may use either the maximized forms or the others ones, or both. 557 * The implementation may or may not rely on the forms to be consistent with each other. 558 * 559 * <p>Callers should construct and use a matcher rather than match pairs of locales directly. 560 * 561 * @param desired Desired locale. 562 * @param supported Supported locale. 563 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 564 * or else the function returns immediately. Check for U_FAILURE() 565 * on output or use with function chaining. (See User Guide for details.) 566 * @return value between 0 and 1, inclusive. 567 * @internal (has a known user) 568 */ 569 double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const; 570 #endif // U_HIDE_INTERNAL_API 571 572 private: 573 LocaleMatcher(const Builder &builder, UErrorCode &errorCode); 574 LocaleMatcher(const LocaleMatcher &other) = delete; 575 LocaleMatcher &operator=(const LocaleMatcher &other) = delete; 576 577 int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const; 578 579 const XLikelySubtags &likelySubtags; 580 const LocaleDistance &localeDistance; 581 int32_t thresholdDistance; 582 int32_t demotionPerDesiredLocale; 583 ULocMatchFavorSubtag favorSubtag; 584 585 // These are in input order. 586 const Locale ** supportedLocales; 587 LSR *lsrs; 588 int32_t supportedLocalesLength; 589 // These are in preference order: 1. Default locale 2. paradigm locales 3. others. 590 UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found" 591 // Array versions of the supportedLsrToIndex keys and values. 592 // The distance lookup loops over the supportedLSRs and returns the index of the best match. 593 const LSR **supportedLSRs; 594 int32_t *supportedIndexes; 595 int32_t supportedLSRsLength; 596 Locale *ownedDefaultLocale; 597 const Locale *defaultLocale; 598 int32_t defaultLocaleIndex; 599 }; 600 601 U_NAMESPACE_END 602 603 #endif // U_HIDE_DRAFT_API 604 #endif // U_SHOW_CPLUSPLUS_API 605 #endif // __LOCALEMATCHER_H__ 606