1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 4 // localematcher.h 5 // created: 2019may08 Markus W. Scherer 6 7 #ifndef __LOCALEMATCHER_H__ 8 #define __LOCALEMATCHER_H__ 9 10 #include "unicode/utypes.h" 11 12 #if U_SHOW_CPLUSPLUS_API 13 14 #include "unicode/locid.h" 15 #include "unicode/stringpiece.h" 16 #include "unicode/uobject.h" 17 18 /** 19 * \file 20 * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales. 21 */ 22 23 #ifndef U_FORCE_HIDE_DRAFT_API 24 25 /** 26 * Builder option for whether the language subtag or the script subtag is most important. 27 * 28 * @see Builder#setFavorSubtag(ULocMatchFavorSubtag) 29 * @draft ICU 65 30 */ 31 enum ULocMatchFavorSubtag { 32 /** 33 * Language differences are most important, then script differences, then region differences. 34 * (This is the default behavior.) 35 * 36 * @draft ICU 65 37 */ 38 ULOCMATCH_FAVOR_LANGUAGE, 39 /** 40 * Makes script differences matter relatively more than language differences. 41 * 42 * @draft ICU 65 43 */ 44 ULOCMATCH_FAVOR_SCRIPT 45 }; 46 #ifndef U_IN_DOXYGEN 47 typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag; 48 #endif 49 50 /** 51 * Builder option for whether all desired locales are treated equally or 52 * earlier ones are preferred. 53 * 54 * @see Builder#setDemotionPerDesiredLocale(ULocMatchDemotion) 55 * @draft ICU 65 56 */ 57 enum ULocMatchDemotion { 58 /** 59 * All desired locales are treated equally. 60 * 61 * @draft ICU 65 62 */ 63 ULOCMATCH_DEMOTION_NONE, 64 /** 65 * Earlier desired locales are preferred. 66 * 67 * <p>From each desired locale to the next, 68 * the distance to any supported locale is increased by an additional amount 69 * which is at least as large as most region mismatches. 70 * A later desired locale has to have a better match with some supported locale 71 * due to more than merely having the same region subtag. 72 * 73 * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code> 74 * yields <code>Result(en-GB, en)</code> because 75 * with the demotion of sv its perfect match is no better than 76 * the region distance between the earlier desired locale en-GB and en=en-US. 77 * 78 * <p>Notes: 79 * <ul> 80 * <li>In some cases, language and/or script differences can be as small as 81 * the typical region difference. (Example: sr-Latn vs. sr-Cyrl) 82 * <li>It is possible for certain region differences to be larger than usual, 83 * and larger than the demotion. 84 * (As of CLDR 35 there is no such case, but 85 * this is possible in future versions of the data.) 86 * </ul> 87 * 88 * @draft ICU 65 89 */ 90 ULOCMATCH_DEMOTION_REGION 91 }; 92 #ifndef U_IN_DOXYGEN 93 typedef enum ULocMatchDemotion ULocMatchDemotion; 94 #endif 95 96 /** 97 * Builder option for whether to include or ignore one-way (fallback) match data. 98 * The LocaleMatcher uses CLDR languageMatch data which includes fallback (oneway=true) entries. 99 * Sometimes it is desirable to ignore those. 100 * 101 * <p>For example, consider a web application with the UI in a given language, 102 * with a link to another, related web app. 103 * The link should include the UI language, and the target server may also use 104 * the client’s Accept-Language header data. 105 * The target server has its own list of supported languages. 106 * One may want to favor UI language consistency, that is, 107 * if there is a decent match for the original UI language, we want to use it, 108 * but not if it is merely a fallback. 109 * 110 * @see Builder#setDirection(ULocMatchDirection) 111 * @draft ICU 67 112 */ 113 enum ULocMatchDirection { 114 /** 115 * Locale matching includes one-way matches such as Breton→French. (default) 116 * 117 * @draft ICU 67 118 */ 119 ULOCMATCH_DIRECTION_WITH_ONE_WAY, 120 /** 121 * Locale matching limited to two-way matches including e.g. Danish↔Norwegian 122 * but ignoring one-way matches. 123 * 124 * @draft ICU 67 125 */ 126 ULOCMATCH_DIRECTION_ONLY_TWO_WAY 127 }; 128 #ifndef U_IN_DOXYGEN 129 typedef enum ULocMatchDirection ULocMatchDirection; 130 #endif 131 132 struct UHashtable; 133 134 U_NAMESPACE_BEGIN 135 136 struct LSR; 137 138 class LocaleDistance; 139 class LocaleLsrIterator; 140 class UVector; 141 class XLikelySubtags; 142 143 /** 144 * Immutable class that picks the best match between a user's desired locales and 145 * an application's supported locales. 146 * Movable but not copyable. 147 * 148 * <p>Example: 149 * <pre> 150 * UErrorCode errorCode = U_ZERO_ERROR; 151 * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode); 152 * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en" 153 * </pre> 154 * 155 * <p>A matcher takes into account when languages are close to one another, 156 * such as Danish and Norwegian, 157 * and when regional variants are close, like en-GB and en-AU as opposed to en-US. 158 * 159 * <p>If there are multiple supported locales with the same (language, script, region) 160 * likely subtags, then the current implementation returns the first of those locales. 161 * It ignores variant subtags (except for pseudolocale variants) and extensions. 162 * This may change in future versions. 163 * 164 * <p>For example, the current implementation does not distinguish between 165 * de, de-DE, de-Latn, de-1901, de-u-co-phonebk. 166 * 167 * <p>If you prefer one equivalent locale over another, then provide only the preferred one, 168 * or place it earlier in the list of supported locales. 169 * 170 * <p>Otherwise, the order of supported locales may have no effect on the best-match results. 171 * The current implementation compares each desired locale with supported locales 172 * in the following order: 173 * 1. Default locale, if supported; 174 * 2. CLDR "paradigm locales" like en-GB and es-419; 175 * 3. other supported locales. 176 * This may change in future versions. 177 * 178 * <p>Often a product will just need one matcher instance, built with the languages 179 * that it supports. However, it may want multiple instances with different 180 * default languages based on additional information, such as the domain. 181 * 182 * <p>This class is not intended for public subclassing. 183 * 184 * @draft ICU 65 185 */ 186 class U_COMMON_API LocaleMatcher : public UMemory { 187 public: 188 /** 189 * Data for the best-matching pair of a desired and a supported locale. 190 * Movable but not copyable. 191 * 192 * @draft ICU 65 193 */ 194 class U_COMMON_API Result : public UMemory { 195 public: 196 /** 197 * Move constructor; might modify the source. 198 * This object will have the same contents that the source object had. 199 * 200 * @param src Result to move contents from. 201 * @draft ICU 65 202 */ 203 Result(Result &&src) U_NOEXCEPT; 204 205 /** 206 * Destructor. 207 * 208 * @draft ICU 65 209 */ 210 ~Result(); 211 212 /** 213 * Move assignment; might modify the source. 214 * This object will have the same contents that the source object had. 215 * 216 * @param src Result to move contents from. 217 * @draft ICU 65 218 */ 219 Result &operator=(Result &&src) U_NOEXCEPT; 220 221 #ifndef U_HIDE_DRAFT_API 222 /** 223 * Returns the best-matching desired locale. 224 * nullptr if the list of desired locales is empty or if none matched well enough. 225 * 226 * @return the best-matching desired locale, or nullptr. 227 * @draft ICU 65 228 */ getDesiredLocale()229 inline const Locale *getDesiredLocale() const { return desiredLocale; } 230 231 /** 232 * Returns the best-matching supported locale. 233 * If none matched well enough, this is the default locale. 234 * The default locale is nullptr if the list of supported locales is empty and 235 * no explicit default locale is set. 236 * 237 * @return the best-matching supported locale, or nullptr. 238 * @draft ICU 65 239 */ getSupportedLocale()240 inline const Locale *getSupportedLocale() const { return supportedLocale; } 241 242 /** 243 * Returns the index of the best-matching desired locale in the input Iterable order. 244 * -1 if the list of desired locales is empty or if none matched well enough. 245 * 246 * @return the index of the best-matching desired locale, or -1. 247 * @draft ICU 65 248 */ getDesiredIndex()249 inline int32_t getDesiredIndex() const { return desiredIndex; } 250 251 /** 252 * Returns the index of the best-matching supported locale in the 253 * constructor’s or builder’s input order (“set” Collection plus “added” locales). 254 * If the matcher was built from a locale list string, then the iteration order is that 255 * of a LocalePriorityList built from the same string. 256 * -1 if the list of supported locales is empty or if none matched well enough. 257 * 258 * @return the index of the best-matching supported locale, or -1. 259 * @draft ICU 65 260 */ getSupportedIndex()261 inline int32_t getSupportedIndex() const { return supportedIndex; } 262 263 /** 264 * Takes the best-matching supported locale and adds relevant fields of the 265 * best-matching desired locale, such as the -t- and -u- extensions. 266 * May replace some fields of the supported locale. 267 * The result is the locale that should be used for date and number formatting, collation, etc. 268 * Returns the root locale if getSupportedLocale() returns nullptr. 269 * 270 * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn 271 * 272 * @return a locale combining the best-matching desired and supported locales. 273 * @draft ICU 65 274 */ 275 Locale makeResolvedLocale(UErrorCode &errorCode) const; 276 #endif // U_HIDE_DRAFT_API 277 278 private: Result(const Locale * desired,const Locale * supported,int32_t desIndex,int32_t suppIndex,UBool owned)279 Result(const Locale *desired, const Locale *supported, 280 int32_t desIndex, int32_t suppIndex, UBool owned) : 281 desiredLocale(desired), supportedLocale(supported), 282 desiredIndex(desIndex), supportedIndex(suppIndex), 283 desiredIsOwned(owned) {} 284 285 Result(const Result &other) = delete; 286 Result &operator=(const Result &other) = delete; 287 288 const Locale *desiredLocale; 289 const Locale *supportedLocale; 290 int32_t desiredIndex; 291 int32_t supportedIndex; 292 UBool desiredIsOwned; 293 294 friend class LocaleMatcher; 295 }; 296 297 /** 298 * LocaleMatcher builder. 299 * Movable but not copyable. 300 * 301 * @see LocaleMatcher#builder() 302 * @draft ICU 65 303 */ 304 class U_COMMON_API Builder : public UMemory { 305 public: 306 /** 307 * Constructs a builder used in chaining parameters for building a LocaleMatcher. 308 * 309 * @return a new Builder object 310 * @draft ICU 65 311 */ Builder()312 Builder() {} 313 314 /** 315 * Move constructor; might modify the source. 316 * This builder will have the same contents that the source builder had. 317 * 318 * @param src Builder to move contents from. 319 * @draft ICU 65 320 */ 321 Builder(Builder &&src) U_NOEXCEPT; 322 323 /** 324 * Destructor. 325 * 326 * @draft ICU 65 327 */ 328 ~Builder(); 329 330 /** 331 * Move assignment; might modify the source. 332 * This builder will have the same contents that the source builder had. 333 * 334 * @param src Builder to move contents from. 335 * @draft ICU 65 336 */ 337 Builder &operator=(Builder &&src) U_NOEXCEPT; 338 339 #ifndef U_HIDE_DRAFT_API 340 /** 341 * Parses an Accept-Language string 342 * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), 343 * such as "af, en, fr;q=0.9", and sets the supported locales accordingly. 344 * Allows whitespace in more places but does not allow "*". 345 * Clears any previously set/added supported locales first. 346 * 347 * @param locales the Accept-Language string of locales to set 348 * @return this Builder object 349 * @draft ICU 65 350 */ 351 Builder &setSupportedLocalesFromListString(StringPiece locales); 352 353 /** 354 * Copies the supported locales, preserving iteration order. 355 * Clears any previously set/added supported locales first. 356 * Duplicates are allowed, and are not removed. 357 * 358 * @param locales the list of locale 359 * @return this Builder object 360 * @draft ICU 65 361 */ 362 Builder &setSupportedLocales(Locale::Iterator &locales); 363 364 /** 365 * Copies the supported locales from the begin/end range, preserving iteration order. 366 * Clears any previously set/added supported locales first. 367 * Duplicates are allowed, and are not removed. 368 * 369 * Each of the iterator parameter values must be an 370 * input iterator whose value is convertible to const Locale &. 371 * 372 * @param begin Start of range. 373 * @param end Exclusive end of range. 374 * @return this Builder object 375 * @draft ICU 65 376 */ 377 template<typename Iter> setSupportedLocales(Iter begin,Iter end)378 Builder &setSupportedLocales(Iter begin, Iter end) { 379 if (U_FAILURE(errorCode_)) { return *this; } 380 clearSupportedLocales(); 381 while (begin != end) { 382 addSupportedLocale(*begin++); 383 } 384 return *this; 385 } 386 387 /** 388 * Copies the supported locales from the begin/end range, preserving iteration order. 389 * Calls the converter to convert each *begin to a Locale or const Locale &. 390 * Clears any previously set/added supported locales first. 391 * Duplicates are allowed, and are not removed. 392 * 393 * Each of the iterator parameter values must be an 394 * input iterator whose value is convertible to const Locale &. 395 * 396 * @param begin Start of range. 397 * @param end Exclusive end of range. 398 * @param converter Converter from *begin to const Locale & or compatible. 399 * @return this Builder object 400 * @draft ICU 65 401 */ 402 template<typename Iter, typename Conv> setSupportedLocalesViaConverter(Iter begin,Iter end,Conv converter)403 Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) { 404 if (U_FAILURE(errorCode_)) { return *this; } 405 clearSupportedLocales(); 406 while (begin != end) { 407 addSupportedLocale(converter(*begin++)); 408 } 409 return *this; 410 } 411 412 /** 413 * Adds another supported locale. 414 * Duplicates are allowed, and are not removed. 415 * 416 * @param locale another locale 417 * @return this Builder object 418 * @draft ICU 65 419 */ 420 Builder &addSupportedLocale(const Locale &locale); 421 422 /** 423 * Sets the default locale; if nullptr, or if it is not set explicitly, 424 * then the first supported locale is used as the default locale. 425 * 426 * @param defaultLocale the default locale (will be copied) 427 * @return this Builder object 428 * @draft ICU 65 429 */ 430 Builder &setDefaultLocale(const Locale *defaultLocale); 431 432 /** 433 * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script 434 * differences. 435 * This is used in situations (such as maps) where 436 * it is better to fall back to the same script than a similar language. 437 * 438 * @param subtag the subtag to favor 439 * @return this Builder object 440 * @draft ICU 65 441 */ 442 Builder &setFavorSubtag(ULocMatchFavorSubtag subtag); 443 444 /** 445 * Option for whether all desired locales are treated equally or 446 * earlier ones are preferred (this is the default). 447 * 448 * @param demotion the demotion per desired locale to set. 449 * @return this Builder object 450 * @draft ICU 65 451 */ 452 Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion); 453 454 /** 455 * Option for whether to include or ignore one-way (fallback) match data. 456 * By default, they are included. 457 * 458 * @param direction the match direction to set. 459 * @return this Builder object 460 * @draft ICU 67 461 */ setDirection(ULocMatchDirection direction)462 Builder &setDirection(ULocMatchDirection direction) { 463 if (U_SUCCESS(errorCode_)) { 464 direction_ = direction; 465 } 466 return *this; 467 } 468 469 /** 470 * Sets the UErrorCode if an error occurred while setting parameters. 471 * Preserves older error codes in the outErrorCode. 472 * 473 * @param outErrorCode Set to an error code if it does not contain one already 474 * and an error occurred while setting parameters. 475 * Otherwise unchanged. 476 * @return TRUE if U_FAILURE(outErrorCode) 477 * @draft ICU 65 478 */ 479 UBool copyErrorTo(UErrorCode &outErrorCode) const; 480 481 /** 482 * Builds and returns a new locale matcher. 483 * This builder can continue to be used. 484 * 485 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 486 * or else the function returns immediately. Check for U_FAILURE() 487 * on output or use with function chaining. (See User Guide for details.) 488 * @return new LocaleMatcher. 489 * @draft ICU 65 490 */ 491 LocaleMatcher build(UErrorCode &errorCode) const; 492 #endif // U_HIDE_DRAFT_API 493 494 private: 495 friend class LocaleMatcher; 496 497 Builder(const Builder &other) = delete; 498 Builder &operator=(const Builder &other) = delete; 499 500 void clearSupportedLocales(); 501 bool ensureSupportedLocaleVector(); 502 503 UErrorCode errorCode_ = U_ZERO_ERROR; 504 UVector *supportedLocales_ = nullptr; 505 int32_t thresholdDistance_ = -1; 506 ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION; 507 Locale *defaultLocale_ = nullptr; 508 ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE; 509 ULocMatchDirection direction_ = ULOCMATCH_DIRECTION_WITH_ONE_WAY; 510 }; 511 512 // FYI No public LocaleMatcher constructors in C++; use the Builder. 513 514 /** 515 * Move copy constructor; might modify the source. 516 * This matcher will have the same settings that the source matcher had. 517 * @param src source matcher 518 * @draft ICU 65 519 */ 520 LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT; 521 522 /** 523 * Destructor. 524 * @draft ICU 65 525 */ 526 ~LocaleMatcher(); 527 528 /** 529 * Move assignment operator; might modify the source. 530 * This matcher will have the same settings that the source matcher had. 531 * The behavior is undefined if *this and src are the same object. 532 * @param src source matcher 533 * @return *this 534 * @draft ICU 65 535 */ 536 LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT; 537 538 #ifndef U_HIDE_DRAFT_API 539 /** 540 * Returns the supported locale which best matches the desired locale. 541 * 542 * @param desiredLocale Typically a user's language. 543 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 544 * or else the function returns immediately. Check for U_FAILURE() 545 * on output or use with function chaining. (See User Guide for details.) 546 * @return the best-matching supported locale. 547 * @draft ICU 65 548 */ 549 const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const; 550 551 /** 552 * Returns the supported locale which best matches one of the desired locales. 553 * 554 * @param desiredLocales Typically a user's languages, in order of preference (descending). 555 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 556 * or else the function returns immediately. Check for U_FAILURE() 557 * on output or use with function chaining. (See User Guide for details.) 558 * @return the best-matching supported locale. 559 * @draft ICU 65 560 */ 561 const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const; 562 563 /** 564 * Parses an Accept-Language string 565 * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), 566 * such as "af, en, fr;q=0.9", 567 * and returns the supported locale which best matches one of the desired locales. 568 * Allows whitespace in more places but does not allow "*". 569 * 570 * @param desiredLocaleList Typically a user's languages, as an Accept-Language string. 571 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 572 * or else the function returns immediately. Check for U_FAILURE() 573 * on output or use with function chaining. (See User Guide for details.) 574 * @return the best-matching supported locale. 575 * @draft ICU 65 576 */ 577 const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const; 578 579 /** 580 * Returns the best match between the desired locale and the supported locales. 581 * If the result's desired locale is not nullptr, then it is the address of the input locale. 582 * It has not been cloned. 583 * 584 * @param desiredLocale Typically a user's language. 585 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 586 * or else the function returns immediately. Check for U_FAILURE() 587 * on output or use with function chaining. (See User Guide for details.) 588 * @return the best-matching pair of the desired and a supported locale. 589 * @draft ICU 65 590 */ 591 Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const; 592 593 /** 594 * Returns the best match between the desired and supported locales. 595 * If the result's desired locale is not nullptr, then it is a clone of 596 * the best-matching desired locale. The Result object owns the clone. 597 * 598 * @param desiredLocales Typically a user's languages, in order of preference (descending). 599 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 600 * or else the function returns immediately. Check for U_FAILURE() 601 * on output or use with function chaining. (See User Guide for details.) 602 * @return the best-matching pair of a desired and a supported locale. 603 * @draft ICU 65 604 */ 605 Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const; 606 #endif // U_HIDE_DRAFT_API 607 608 #ifndef U_HIDE_INTERNAL_API 609 /** 610 * Returns a fraction between 0 and 1, where 1 means that the languages are a 611 * perfect match, and 0 means that they are completely different. 612 * 613 * <p>This is mostly an implementation detail, and the precise values may change over time. 614 * The implementation may use either the maximized forms or the others ones, or both. 615 * The implementation may or may not rely on the forms to be consistent with each other. 616 * 617 * <p>Callers should construct and use a matcher rather than match pairs of locales directly. 618 * 619 * @param desired Desired locale. 620 * @param supported Supported locale. 621 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, 622 * or else the function returns immediately. Check for U_FAILURE() 623 * on output or use with function chaining. (See User Guide for details.) 624 * @return value between 0 and 1, inclusive. 625 * @internal (has a known user) 626 */ 627 double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const; 628 #endif // U_HIDE_INTERNAL_API 629 630 private: 631 LocaleMatcher(const Builder &builder, UErrorCode &errorCode); 632 LocaleMatcher(const LocaleMatcher &other) = delete; 633 LocaleMatcher &operator=(const LocaleMatcher &other) = delete; 634 635 int32_t putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode); 636 637 int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const; 638 639 const XLikelySubtags &likelySubtags; 640 const LocaleDistance &localeDistance; 641 int32_t thresholdDistance; 642 int32_t demotionPerDesiredLocale; 643 ULocMatchFavorSubtag favorSubtag; 644 ULocMatchDirection direction; 645 646 // These are in input order. 647 const Locale ** supportedLocales; 648 LSR *lsrs; 649 int32_t supportedLocalesLength; 650 // These are in preference order: 1. Default locale 2. paradigm locales 3. others. 651 UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found" 652 // Array versions of the supportedLsrToIndex keys and values. 653 // The distance lookup loops over the supportedLSRs and returns the index of the best match. 654 const LSR **supportedLSRs; 655 int32_t *supportedIndexes; 656 int32_t supportedLSRsLength; 657 Locale *ownedDefaultLocale; 658 const Locale *defaultLocale; 659 }; 660 661 U_NAMESPACE_END 662 663 #endif // U_FORCE_HIDE_DRAFT_API 664 #endif // U_SHOW_CPLUSPLUS_API 665 #endif // __LOCALEMATCHER_H__ 666