1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 4 // locdistance.h 5 // created: 2019may08 Markus W. Scherer 6 7 #ifndef __LOCDISTANCE_H__ 8 #define __LOCDISTANCE_H__ 9 10 #include "unicode/utypes.h" 11 #include "unicode/bytestrie.h" 12 #include "unicode/localematcher.h" 13 #include "unicode/locid.h" 14 #include "unicode/uobject.h" 15 #include "lsr.h" 16 17 U_NAMESPACE_BEGIN 18 19 struct LocaleDistanceData; 20 21 /** 22 * Offline-built data for LocaleMatcher. 23 * Mostly but not only the data for mapping locales to their maximized forms. 24 */ 25 class LocaleDistance final : public UMemory { 26 public: 27 static const LocaleDistance *getSingleton(UErrorCode &errorCode); 28 29 /** 30 * Finds the supported LSR with the smallest distance from the desired one. 31 * Equivalent LSR subtags must be normalized into a canonical form. 32 * 33 * <p>Returns the index of the lowest-distance supported LSR in bits 31..8 34 * (negative if none has a distance below the threshold), 35 * and its distance (0..ABOVE_THRESHOLD) in bits 7..0. 36 */ 37 int32_t getBestIndexAndDistance(const LSR &desired, 38 const LSR **supportedLSRs, int32_t supportedLSRsLength, 39 int32_t threshold, ULocMatchFavorSubtag favorSubtag) const; 40 getParadigmLSRsLength()41 int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; } 42 43 UBool isParadigmLSR(const LSR &lsr) const; 44 getDefaultScriptDistance()45 int32_t getDefaultScriptDistance() const { 46 return defaultScriptDistance; 47 } 48 getDefaultDemotionPerDesiredLocale()49 int32_t getDefaultDemotionPerDesiredLocale() const { 50 return defaultDemotionPerDesiredLocale; 51 } 52 53 private: 54 LocaleDistance(const LocaleDistanceData &data); 55 LocaleDistance(const LocaleDistance &other) = delete; 56 LocaleDistance &operator=(const LocaleDistance &other) = delete; 57 58 static void initLocaleDistance(UErrorCode &errorCode); 59 60 static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState, 61 const char *desired, const char *supported); 62 63 static int32_t getRegionPartitionsDistance( 64 BytesTrie &iter, uint64_t startState, 65 const char *desiredPartitions, const char *supportedPartitions, 66 int32_t threshold); 67 68 static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState); 69 70 static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue); 71 partitionsForRegion(const LSR & lsr)72 const char *partitionsForRegion(const LSR &lsr) const { 73 // ill-formed region -> one non-matching string 74 int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex]; 75 return partitionArrays[pIndex]; 76 } 77 getDefaultRegionDistance()78 int32_t getDefaultRegionDistance() const { 79 return defaultRegionDistance; 80 } 81 82 // The trie maps each dlang+slang+dscript+sscript+dregion+sregion 83 // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance. 84 // There is also a trie value for each subsequence of whole subtags. 85 // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"". 86 BytesTrie trie; 87 88 /** 89 * Maps each region to zero or more single-character partitions. 90 */ 91 const uint8_t *regionToPartitionsIndex; 92 const char **partitionArrays; 93 94 /** 95 * Used to get the paradigm region for a cluster, if there is one. 96 */ 97 const LSR *paradigmLSRs; 98 int32_t paradigmLSRsLength; 99 100 int32_t defaultLanguageDistance; 101 int32_t defaultScriptDistance; 102 int32_t defaultRegionDistance; 103 int32_t minRegionDistance; 104 int32_t defaultDemotionPerDesiredLocale; 105 }; 106 107 U_NAMESPACE_END 108 109 #endif // __LOCDISTANCE_H__ 110