• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 
4 // localematcher.h
5 // created: 2019may08 Markus W. Scherer
6 
7 #ifndef __LOCALEMATCHER_H__
8 #define __LOCALEMATCHER_H__
9 
10 #include "unicode/utypes.h"
11 
12 #if U_SHOW_CPLUSPLUS_API
13 
14 #include "unicode/locid.h"
15 #include "unicode/stringpiece.h"
16 #include "unicode/uobject.h"
17 
18 /**
19  * \file
20  * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
21  */
22 
23 #ifndef U_HIDE_DRAFT_API
24 
25 /**
26  * Builder option for whether the language subtag or the script subtag is most important.
27  *
28  * @see Builder#setFavorSubtag(FavorSubtag)
29  * @draft ICU 65
30  */
31 enum ULocMatchFavorSubtag {
32     /**
33      * Language differences are most important, then script differences, then region differences.
34      * (This is the default behavior.)
35      *
36      * @draft ICU 65
37      */
38     ULOCMATCH_FAVOR_LANGUAGE,
39     /**
40      * Makes script differences matter relatively more than language differences.
41      *
42      * @draft ICU 65
43      */
44     ULOCMATCH_FAVOR_SCRIPT
45 };
46 #ifndef U_IN_DOXYGEN
47 typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
48 #endif
49 
50 /**
51  * Builder option for whether all desired locales are treated equally or
52  * earlier ones are preferred.
53  *
54  * @see Builder#setDemotionPerDesiredLocale(Demotion)
55  * @draft ICU 65
56  */
57 enum ULocMatchDemotion {
58     /**
59      * All desired locales are treated equally.
60      *
61      * @draft ICU 65
62      */
63     ULOCMATCH_DEMOTION_NONE,
64     /**
65      * Earlier desired locales are preferred.
66      *
67      * <p>From each desired locale to the next,
68      * the distance to any supported locale is increased by an additional amount
69      * which is at least as large as most region mismatches.
70      * A later desired locale has to have a better match with some supported locale
71      * due to more than merely having the same region subtag.
72      *
73      * <p>For example: <code>Supported={en, sv}  desired=[en-GB, sv]</code>
74      * yields <code>Result(en-GB, en)</code> because
75      * with the demotion of sv its perfect match is no better than
76      * the region distance between the earlier desired locale en-GB and en=en-US.
77      *
78      * <p>Notes:
79      * <ul>
80      *   <li>In some cases, language and/or script differences can be as small as
81      *       the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
82      *   <li>It is possible for certain region differences to be larger than usual,
83      *       and larger than the demotion.
84      *       (As of CLDR 35 there is no such case, but
85      *        this is possible in future versions of the data.)
86      * </ul>
87      *
88      * @draft ICU 65
89      */
90     ULOCMATCH_DEMOTION_REGION
91 };
92 #ifndef U_IN_DOXYGEN
93 typedef enum ULocMatchDemotion ULocMatchDemotion;
94 #endif
95 
96 struct UHashtable;
97 
98 U_NAMESPACE_BEGIN
99 
100 struct LSR;
101 
102 class LocaleDistance;
103 class LocaleLsrIterator;
104 class UVector;
105 class XLikelySubtags;
106 
107 /**
108  * Immutable class that picks the best match between a user's desired locales and
109  * an application's supported locales.
110  * Movable but not copyable.
111  *
112  * <p>Example:
113  * <pre>
114  * UErrorCode errorCode = U_ZERO_ERROR;
115  * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
116  * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode);  // "en"
117  * </pre>
118  *
119  * <p>A matcher takes into account when languages are close to one another,
120  * such as Danish and Norwegian,
121  * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
122  *
123  * <p>If there are multiple supported locales with the same (language, script, region)
124  * likely subtags, then the current implementation returns the first of those locales.
125  * It ignores variant subtags (except for pseudolocale variants) and extensions.
126  * This may change in future versions.
127  *
128  * <p>For example, the current implementation does not distinguish between
129  * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
130  *
131  * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
132  * or place it earlier in the list of supported locales.
133  *
134  * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
135  * The current implementation compares each desired locale with supported locales
136  * in the following order:
137  * 1. Default locale, if supported;
138  * 2. CLDR "paradigm locales" like en-GB and es-419;
139  * 3. other supported locales.
140  * This may change in future versions.
141  *
142  * <p>Often a product will just need one matcher instance, built with the languages
143  * that it supports. However, it may want multiple instances with different
144  * default languages based on additional information, such as the domain.
145  *
146  * <p>This class is not intended for public subclassing.
147  *
148  * @draft ICU 65
149  */
150 class U_COMMON_API LocaleMatcher : public UMemory {
151 public:
152     /**
153      * Data for the best-matching pair of a desired and a supported locale.
154      * Movable but not copyable.
155      *
156      * @draft ICU 65
157      */
158     class U_COMMON_API Result : public UMemory {
159     public:
160         /**
161          * Move constructor; might modify the source.
162          * This object will have the same contents that the source object had.
163          *
164          * @param src Result to move contents from.
165          * @draft ICU 65
166          */
167         Result(Result &&src) U_NOEXCEPT;
168 
169         /**
170          * Destructor.
171          *
172          * @draft ICU 65
173          */
174         ~Result();
175 
176         /**
177          * Move assignment; might modify the source.
178          * This object will have the same contents that the source object had.
179          *
180          * @param src Result to move contents from.
181          * @draft ICU 65
182          */
183         Result &operator=(Result &&src) U_NOEXCEPT;
184 
185         /**
186          * Returns the best-matching desired locale.
187          * nullptr if the list of desired locales is empty or if none matched well enough.
188          *
189          * @return the best-matching desired locale, or nullptr.
190          * @draft ICU 65
191          */
getDesiredLocale()192         inline const Locale *getDesiredLocale() const { return desiredLocale; }
193 
194         /**
195          * Returns the best-matching supported locale.
196          * If none matched well enough, this is the default locale.
197          * The default locale is nullptr if the list of supported locales is empty and
198          * no explicit default locale is set.
199          *
200          * @return the best-matching supported locale, or nullptr.
201          * @draft ICU 65
202          */
getSupportedLocale()203         inline const Locale *getSupportedLocale() const { return supportedLocale; }
204 
205         /**
206          * Returns the index of the best-matching desired locale in the input Iterable order.
207          * -1 if the list of desired locales is empty or if none matched well enough.
208          *
209          * @return the index of the best-matching desired locale, or -1.
210          * @draft ICU 65
211          */
getDesiredIndex()212         inline int32_t getDesiredIndex() const { return desiredIndex; }
213 
214         /**
215          * Returns the index of the best-matching supported locale in the
216          * constructor’s or builder’s input order (“set” Collection plus “added” locales).
217          * If the matcher was built from a locale list string, then the iteration order is that
218          * of a LocalePriorityList built from the same string.
219          * -1 if the list of supported locales is empty or if none matched well enough.
220          *
221          * @return the index of the best-matching supported locale, or -1.
222          * @draft ICU 65
223          */
getSupportedIndex()224         inline int32_t getSupportedIndex() const { return supportedIndex; }
225 
226         /**
227          * Takes the best-matching supported locale and adds relevant fields of the
228          * best-matching desired locale, such as the -t- and -u- extensions.
229          * May replace some fields of the supported locale.
230          * The result is the locale that should be used for date and number formatting, collation, etc.
231          * Returns the root locale if getSupportedLocale() returns nullptr.
232          *
233          * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
234          *
235          * @return a locale combining the best-matching desired and supported locales.
236          * @draft ICU 65
237          */
238         Locale makeResolvedLocale(UErrorCode &errorCode) const;
239 
240     private:
Result(const Locale * desired,const Locale * supported,int32_t desIndex,int32_t suppIndex,UBool owned)241         Result(const Locale *desired, const Locale *supported,
242                int32_t desIndex, int32_t suppIndex, UBool owned) :
243                 desiredLocale(desired), supportedLocale(supported),
244                 desiredIndex(desIndex), supportedIndex(suppIndex),
245                 desiredIsOwned(owned) {}
246 
247         Result(const Result &other) = delete;
248         Result &operator=(const Result &other) = delete;
249 
250         const Locale *desiredLocale;
251         const Locale *supportedLocale;
252         int32_t desiredIndex;
253         int32_t supportedIndex;
254         UBool desiredIsOwned;
255 
256         friend class LocaleMatcher;
257     };
258 
259     /**
260      * LocaleMatcher builder.
261      * Movable but not copyable.
262      *
263      * @see LocaleMatcher#builder()
264      * @draft ICU 65
265      */
266     class U_COMMON_API Builder : public UMemory {
267     public:
268         /**
269          * Constructs a builder used in chaining parameters for building a LocaleMatcher.
270          *
271          * @return a new Builder object
272          * @draft ICU 65
273          */
Builder()274         Builder() {}
275 
276         /**
277          * Move constructor; might modify the source.
278          * This builder will have the same contents that the source builder had.
279          *
280          * @param src Builder to move contents from.
281          * @draft ICU 65
282          */
283         Builder(Builder &&src) U_NOEXCEPT;
284 
285         /**
286          * Destructor.
287          *
288          * @draft ICU 65
289          */
290         ~Builder();
291 
292         /**
293          * Move assignment; might modify the source.
294          * This builder will have the same contents that the source builder had.
295          *
296          * @param src Builder to move contents from.
297          * @draft ICU 65
298          */
299         Builder &operator=(Builder &&src) U_NOEXCEPT;
300 
301         /**
302          * Parses an Accept-Language string
303          * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
304          * such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
305          * Allows whitespace in more places but does not allow "*".
306          * Clears any previously set/added supported locales first.
307          *
308          * @param locales the Accept-Language string of locales to set
309          * @return this Builder object
310          * @draft ICU 65
311          */
312         Builder &setSupportedLocalesFromListString(StringPiece locales);
313 
314         /**
315          * Copies the supported locales, preserving iteration order.
316          * Clears any previously set/added supported locales first.
317          * Duplicates are allowed, and are not removed.
318          *
319          * @param locales the list of locale
320          * @return this Builder object
321          * @draft ICU 65
322          */
323         Builder &setSupportedLocales(Locale::Iterator &locales);
324 
325         /**
326          * Copies the supported locales from the begin/end range, preserving iteration order.
327          * Clears any previously set/added supported locales first.
328          * Duplicates are allowed, and are not removed.
329          *
330          * Each of the iterator parameter values must be an
331          * input iterator whose value is convertible to const Locale &.
332          *
333          * @param begin Start of range.
334          * @param end Exclusive end of range.
335          * @return this Builder object
336          * @draft ICU 65
337          */
338         template<typename Iter>
setSupportedLocales(Iter begin,Iter end)339         Builder &setSupportedLocales(Iter begin, Iter end) {
340             if (U_FAILURE(errorCode_)) { return *this; }
341             clearSupportedLocales();
342             while (begin != end) {
343                 addSupportedLocale(*begin++);
344             }
345             return *this;
346         }
347 
348         /**
349          * Copies the supported locales from the begin/end range, preserving iteration order.
350          * Calls the converter to convert each *begin to a Locale or const Locale &.
351          * Clears any previously set/added supported locales first.
352          * Duplicates are allowed, and are not removed.
353          *
354          * Each of the iterator parameter values must be an
355          * input iterator whose value is convertible to const Locale &.
356          *
357          * @param begin Start of range.
358          * @param end Exclusive end of range.
359          * @param converter Converter from *begin to const Locale & or compatible.
360          * @return this Builder object
361          * @draft ICU 65
362          */
363         template<typename Iter, typename Conv>
setSupportedLocalesViaConverter(Iter begin,Iter end,Conv converter)364         Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {
365             if (U_FAILURE(errorCode_)) { return *this; }
366             clearSupportedLocales();
367             while (begin != end) {
368                 addSupportedLocale(converter(*begin++));
369             }
370             return *this;
371         }
372 
373         /**
374          * Adds another supported locale.
375          * Duplicates are allowed, and are not removed.
376          *
377          * @param locale another locale
378          * @return this Builder object
379          * @draft ICU 65
380          */
381         Builder &addSupportedLocale(const Locale &locale);
382 
383         /**
384          * Sets the default locale; if nullptr, or if it is not set explicitly,
385          * then the first supported locale is used as the default locale.
386          *
387          * @param defaultLocale the default locale (will be copied)
388          * @return this Builder object
389          * @draft ICU 65
390          */
391         Builder &setDefaultLocale(const Locale *defaultLocale);
392 
393         /**
394          * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
395          * differences.
396          * This is used in situations (such as maps) where
397          * it is better to fall back to the same script than a similar language.
398          *
399          * @param subtag the subtag to favor
400          * @return this Builder object
401          * @draft ICU 65
402          */
403         Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);
404 
405         /**
406          * Option for whether all desired locales are treated equally or
407          * earlier ones are preferred (this is the default).
408          *
409          * @param demotion the demotion per desired locale to set.
410          * @return this Builder object
411          * @draft ICU 65
412          */
413         Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
414 
415         /**
416          * Sets the UErrorCode if an error occurred while setting parameters.
417          * Preserves older error codes in the outErrorCode.
418          *
419          * @param outErrorCode Set to an error code if it does not contain one already
420          *                  and an error occurred while setting parameters.
421          *                  Otherwise unchanged.
422          * @return TRUE if U_FAILURE(outErrorCode)
423          * @draft ICU 65
424          */
425         UBool copyErrorTo(UErrorCode &outErrorCode) const;
426 
427         /**
428          * Builds and returns a new locale matcher.
429          * This builder can continue to be used.
430          *
431          * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
432          *                  or else the function returns immediately. Check for U_FAILURE()
433          *                  on output or use with function chaining. (See User Guide for details.)
434          * @return new LocaleMatcher.
435          * @draft ICU 65
436          */
437         LocaleMatcher build(UErrorCode &errorCode) const;
438 
439     private:
440         friend class LocaleMatcher;
441 
442         Builder(const Builder &other) = delete;
443         Builder &operator=(const Builder &other) = delete;
444 
445         void clearSupportedLocales();
446         bool ensureSupportedLocaleVector();
447 
448         UErrorCode errorCode_ = U_ZERO_ERROR;
449         UVector *supportedLocales_ = nullptr;
450         int32_t thresholdDistance_ = -1;
451         ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;
452         Locale *defaultLocale_ = nullptr;
453         ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
454     };
455 
456     // FYI No public LocaleMatcher constructors in C++; use the Builder.
457 
458     /**
459      * Move copy constructor; might modify the source.
460      * This matcher will have the same settings that the source matcher had.
461      * @param src source matcher
462      * @draft ICU 65
463      */
464     LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT;
465 
466     /**
467      * Destructor.
468      * @draft ICU 65
469      */
470     ~LocaleMatcher();
471 
472     /**
473      * Move assignment operator; might modify the source.
474      * This matcher will have the same settings that the source matcher had.
475      * The behavior is undefined if *this and src are the same object.
476      * @param src source matcher
477      * @return *this
478      * @draft ICU 65
479      */
480     LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT;
481 
482     /**
483      * Returns the supported locale which best matches the desired locale.
484      *
485      * @param desiredLocale Typically a user's language.
486      * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
487      *                  or else the function returns immediately. Check for U_FAILURE()
488      *                  on output or use with function chaining. (See User Guide for details.)
489      * @return the best-matching supported locale.
490      * @draft ICU 65
491      */
492     const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;
493 
494     /**
495      * Returns the supported locale which best matches one of the desired locales.
496      *
497      * @param desiredLocales Typically a user's languages, in order of preference (descending).
498      * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
499      *                  or else the function returns immediately. Check for U_FAILURE()
500      *                  on output or use with function chaining. (See User Guide for details.)
501      * @return the best-matching supported locale.
502      * @draft ICU 65
503      */
504     const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
505 
506     /**
507      * Parses an Accept-Language string
508      * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
509      * such as "af, en, fr;q=0.9",
510      * and returns the supported locale which best matches one of the desired locales.
511      * Allows whitespace in more places but does not allow "*".
512      *
513      * @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
514      * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
515      *                  or else the function returns immediately. Check for U_FAILURE()
516      *                  on output or use with function chaining. (See User Guide for details.)
517      * @return the best-matching supported locale.
518      * @draft ICU 65
519      */
520     const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;
521 
522     /**
523      * Returns the best match between the desired locale and the supported locales.
524      * If the result's desired locale is not nullptr, then it is the address of the input locale.
525      * It has not been cloned.
526      *
527      * @param desiredLocale Typically a user's language.
528      * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
529      *                  or else the function returns immediately. Check for U_FAILURE()
530      *                  on output or use with function chaining. (See User Guide for details.)
531      * @return the best-matching pair of the desired and a supported locale.
532      * @draft ICU 65
533      */
534     Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;
535 
536     /**
537      * Returns the best match between the desired and supported locales.
538      * If the result's desired locale is not nullptr, then it is a clone of
539      * the best-matching desired locale. The Result object owns the clone.
540      *
541      * @param desiredLocales Typically a user's languages, in order of preference (descending).
542      * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
543      *                  or else the function returns immediately. Check for U_FAILURE()
544      *                  on output or use with function chaining. (See User Guide for details.)
545      * @return the best-matching pair of a desired and a supported locale.
546      * @draft ICU 65
547      */
548     Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
549 
550 #ifndef U_HIDE_INTERNAL_API
551     /**
552      * Returns a fraction between 0 and 1, where 1 means that the languages are a
553      * perfect match, and 0 means that they are completely different.
554      *
555      * <p>This is mostly an implementation detail, and the precise values may change over time.
556      * The implementation may use either the maximized forms or the others ones, or both.
557      * The implementation may or may not rely on the forms to be consistent with each other.
558      *
559      * <p>Callers should construct and use a matcher rather than match pairs of locales directly.
560      *
561      * @param desired Desired locale.
562      * @param supported Supported locale.
563      * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
564      *                  or else the function returns immediately. Check for U_FAILURE()
565      *                  on output or use with function chaining. (See User Guide for details.)
566      * @return value between 0 and 1, inclusive.
567      * @internal (has a known user)
568      */
569     double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
570 #endif  // U_HIDE_INTERNAL_API
571 
572 private:
573     LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
574     LocaleMatcher(const LocaleMatcher &other) = delete;
575     LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
576 
577     int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
578 
579     const XLikelySubtags &likelySubtags;
580     const LocaleDistance &localeDistance;
581     int32_t thresholdDistance;
582     int32_t demotionPerDesiredLocale;
583     ULocMatchFavorSubtag favorSubtag;
584 
585     // These are in input order.
586     const Locale ** supportedLocales;
587     LSR *lsrs;
588     int32_t supportedLocalesLength;
589     // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
590     UHashtable *supportedLsrToIndex;  // Map<LSR, Integer> stores index+1 because 0 is "not found"
591     // Array versions of the supportedLsrToIndex keys and values.
592     // The distance lookup loops over the supportedLSRs and returns the index of the best match.
593     const LSR **supportedLSRs;
594     int32_t *supportedIndexes;
595     int32_t supportedLSRsLength;
596     Locale *ownedDefaultLocale;
597     const Locale *defaultLocale;
598     int32_t defaultLocaleIndex;
599 };
600 
601 U_NAMESPACE_END
602 
603 #endif  // U_HIDE_DRAFT_API
604 #endif  // U_SHOW_CPLUSPLUS_API
605 #endif  // __LOCALEMATCHER_H__
606