• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 
4 // locdistance.h
5 // created: 2019may08 Markus W. Scherer
6 
7 #ifndef __LOCDISTANCE_H__
8 #define __LOCDISTANCE_H__
9 
10 #include "unicode/utypes.h"
11 #include "unicode/bytestrie.h"
12 #include "unicode/localematcher.h"
13 #include "unicode/locid.h"
14 #include "unicode/uobject.h"
15 #include "lsr.h"
16 
17 U_NAMESPACE_BEGIN
18 
19 struct LocaleDistanceData;
20 
21 /**
22  * Offline-built data for LocaleMatcher.
23  * Mostly but not only the data for mapping locales to their maximized forms.
24  */
25 class LocaleDistance final : public UMemory {
26 public:
27     static const LocaleDistance *getSingleton(UErrorCode &errorCode);
28 
29     /**
30      * Finds the supported LSR with the smallest distance from the desired one.
31      * Equivalent LSR subtags must be normalized into a canonical form.
32      *
33      * <p>Returns the index of the lowest-distance supported LSR in bits 31..8
34      * (negative if none has a distance below the threshold),
35      * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
36      */
37     int32_t getBestIndexAndDistance(const LSR &desired,
38                                     const LSR **supportedLSRs, int32_t supportedLSRsLength,
39                                     int32_t threshold, ULocMatchFavorSubtag favorSubtag) const;
40 
getParadigmLSRsLength()41     int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; }
42 
43     UBool isParadigmLSR(const LSR &lsr) const;
44 
getDefaultScriptDistance()45     int32_t getDefaultScriptDistance() const {
46         return defaultScriptDistance;
47     }
48 
getDefaultDemotionPerDesiredLocale()49     int32_t getDefaultDemotionPerDesiredLocale() const {
50         return defaultDemotionPerDesiredLocale;
51     }
52 
53 private:
54     LocaleDistance(const LocaleDistanceData &data);
55     LocaleDistance(const LocaleDistance &other) = delete;
56     LocaleDistance &operator=(const LocaleDistance &other) = delete;
57 
58     static void initLocaleDistance(UErrorCode &errorCode);
59 
60     static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
61                                             const char *desired, const char *supported);
62 
63     static int32_t getRegionPartitionsDistance(
64         BytesTrie &iter, uint64_t startState,
65         const char *desiredPartitions, const char *supportedPartitions,
66         int32_t threshold);
67 
68     static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
69 
70     static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
71 
partitionsForRegion(const LSR & lsr)72     const char *partitionsForRegion(const LSR &lsr) const {
73         // ill-formed region -> one non-matching string
74         int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
75         return partitionArrays[pIndex];
76     }
77 
getDefaultRegionDistance()78     int32_t getDefaultRegionDistance() const {
79         return defaultRegionDistance;
80     }
81 
82     // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
83     // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
84     // There is also a trie value for each subsequence of whole subtags.
85     // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
86     BytesTrie trie;
87 
88     /**
89      * Maps each region to zero or more single-character partitions.
90      */
91     const uint8_t *regionToPartitionsIndex;
92     const char **partitionArrays;
93 
94     /**
95      * Used to get the paradigm region for a cluster, if there is one.
96      */
97     const LSR *paradigmLSRs;
98     int32_t paradigmLSRsLength;
99 
100     int32_t defaultLanguageDistance;
101     int32_t defaultScriptDistance;
102     int32_t defaultRegionDistance;
103     int32_t minRegionDistance;
104     int32_t defaultDemotionPerDesiredLocale;
105 };
106 
107 U_NAMESPACE_END
108 
109 #endif  // __LOCDISTANCE_H__
110