• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ****************************************************************************************
3  * Copyright (C) 2009-2015, Google, Inc.; International Business Machines Corporation   *
4  * and others. All Rights Reserved.                                                     *
5  ****************************************************************************************
6  */
7 package com.ibm.icu.util;
8 
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.Iterator;
12 import java.util.LinkedHashMap;
13 import java.util.LinkedHashSet;
14 import java.util.Map;
15 import java.util.Map.Entry;
16 import java.util.Objects;
17 import java.util.Set;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 
21 import com.ibm.icu.impl.ICUData;
22 import com.ibm.icu.impl.ICUResourceBundle;
23 import com.ibm.icu.impl.Relation;
24 import com.ibm.icu.impl.Row;
25 import com.ibm.icu.impl.Row.R3;
26 
27 /**
28  * Provides a way to match the languages (locales) supported by a product to the
29  * languages (locales) acceptable to a user, and get the best match. For
30  * example:
31  *
32  * <pre>
33  * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en");
34  *
35  * // afterwards:
36  * matcher.getBestMatch("en-US").toLanguageTag() => "en"
37  * </pre>
38  *
39  * It takes into account when languages are close to one another, such as fil
40  * and tl, and when language regional variants are close, like en-GB and en-AU.
41  * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test
42  * file.
43  * <p>All classes implementing this interface should be immutable. Often a
44  * product will just need one static instance, built with the languages
45  * that it supports. However, it may want multiple instances with different
46  * default languages based on additional information, such as the domain.
47  *
48  * @author markdavis@google.com
49  * @stable ICU 4.4
50  */
51 public class LocaleMatcher {
52 
53     public static final boolean DEBUG = false;
54 
55     private static final ULocale UNKNOWN_LOCALE = new ULocale("und");
56 
57     /**
58      * Threshold for falling back to the default (first) language. May make this
59      * a parameter in the future.
60      */
61     private static final double DEFAULT_THRESHOLD = 0.5;
62 
63     /**
64      * The default language, in case the threshold is not met.
65      */
66     private final ULocale defaultLanguage;
67 
68     /**
69      * The default language, in case the threshold is not met.
70      */
71     private final double threshold;
72 
73     /**
74      * Create a new language matcher. The highest-weighted language is the
75      * default. That means that if no other language is matches closer than a given
76      * threshold, that default language is chosen. Typically the default is English,
77      * but it could be different based on additional information, such as the domain
78      * of the page.
79      *
80      * @param languagePriorityList weighted list
81      * @stable ICU 4.4
82      */
LocaleMatcher(LocalePriorityList languagePriorityList)83     public LocaleMatcher(LocalePriorityList languagePriorityList) {
84         this(languagePriorityList, defaultWritten);
85     }
86 
87     /**
88      * Create a new language matcher from a String form. The highest-weighted
89      * language is the default.
90      *
91      * @param languagePriorityListString String form of LanguagePriorityList
92      * @stable ICU 4.4
93      */
LocaleMatcher(String languagePriorityListString)94     public LocaleMatcher(String languagePriorityListString) {
95         this(LocalePriorityList.add(languagePriorityListString).build());
96     }
97 
98     /**
99      * Internal testing function; may expose API later.
100      * @param languagePriorityList LocalePriorityList to match
101      * @param matcherData Internal matching data
102      * @internal
103      * @deprecated This API is ICU internal only.
104      */
105     @Deprecated
LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData)106     public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) {
107         this(languagePriorityList, matcherData, DEFAULT_THRESHOLD);
108     }
109 
110     /**
111      * Internal testing function; may expose API later.
112      * @param languagePriorityList LocalePriorityList to match
113      * @param matcherData Internal matching data
114      * @internal
115      * @deprecated This API is ICU internal only.
116      */
117     @Deprecated
LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold)118     public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) {
119         this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze();
120         for (final ULocale language : languagePriorityList) {
121             add(language, languagePriorityList.getWeight(language));
122         }
123         processMapping();
124         Iterator<ULocale> it = languagePriorityList.iterator();
125         defaultLanguage = it.hasNext() ? it.next() : null;
126         this.threshold = threshold;
127     }
128 
129     /**
130      * Returns a fraction between 0 and 1, where 1 means that the languages are a
131      * perfect match, and 0 means that they are completely different. Note that
132      * the precise values may change over time; no code should be made dependent
133      * on the values remaining constant.
134      * @param desired Desired locale
135      * @param desiredMax Maximized locale (using likely subtags)
136      * @param supported Supported locale
137      * @param supportedMax Maximized locale (using likely subtags)
138      * @return value between 0 and 1, inclusive.
139      * @stable ICU 4.4
140      */
match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax)141     public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
142         return matcherData.match(desired, desiredMax, supported, supportedMax);
143     }
144 
145     /**
146      * Canonicalize a locale (language). Note that for now, it is canonicalizing
147      * according to CLDR conventions (he vs iw, etc), since that is what is needed
148      * for likelySubtags.
149      * @param ulocale language/locale code
150      * @return ULocale with remapped subtags.
151      * @stable ICU 4.4
152      */
canonicalize(ULocale ulocale)153     public ULocale canonicalize(ULocale ulocale) {
154         // TODO Get the data from CLDR, use Java conventions.
155         String lang = ulocale.getLanguage();
156         String lang2 = canonicalMap.get(lang);
157         String script = ulocale.getScript();
158         String script2 = canonicalMap.get(script);
159         String region = ulocale.getCountry();
160         String region2 = canonicalMap.get(region);
161         if (lang2 != null || script2 != null || region2 != null) {
162             return new ULocale(
163                 lang2 == null ? lang : lang2,
164                     script2 == null ? script : script2,
165                         region2 == null ? region : region2);
166         }
167         return ulocale;
168     }
169 
170     /**
171      * Get the best match for a LanguagePriorityList
172      *
173      * @param languageList list to match
174      * @return best matching language code
175      * @stable ICU 4.4
176      */
getBestMatch(LocalePriorityList languageList)177     public ULocale getBestMatch(LocalePriorityList languageList) {
178         double bestWeight = 0;
179         ULocale bestTableMatch = null;
180         double penalty = 0;
181         OutputDouble matchWeight = new OutputDouble();
182         for (final ULocale language : languageList) {
183             final ULocale matchLocale = getBestMatchInternal(language, matchWeight);
184             final double weight = matchWeight.value * languageList.getWeight(language) - penalty;
185             if (weight > bestWeight) {
186                 bestWeight = weight;
187                 bestTableMatch = matchLocale;
188             }
189             penalty += 0.07000001;
190         }
191         if (bestWeight < threshold) {
192             bestTableMatch = defaultLanguage;
193         }
194         return bestTableMatch;
195     }
196 
197     /**
198      * Convenience method: Get the best match for a LanguagePriorityList
199      *
200      * @param languageList String form of language priority list
201      * @return best matching language code
202      * @stable ICU 4.4
203      */
getBestMatch(String languageList)204     public ULocale getBestMatch(String languageList) {
205         return getBestMatch(LocalePriorityList.add(languageList).build());
206     }
207 
208     /**
209      * Get the best match for an individual language code.
210      *
211      * @param ulocale locale/language code to match
212      * @return best matching language code
213      * @stable ICU 4.4
214      */
getBestMatch(ULocale ulocale)215     public ULocale getBestMatch(ULocale ulocale) {
216         return getBestMatchInternal(ulocale, null);
217     }
218 
219     /**
220      * @internal
221      * @deprecated This API is ICU internal only.
222      */
223     @Deprecated
getBestMatch(ULocale... ulocales)224     public ULocale getBestMatch(ULocale... ulocales) {
225         return getBestMatch(LocalePriorityList.add(ulocales).build());
226     }
227 
228     /**
229      * {@inheritDoc}
230      * @stable ICU 4.4
231      */
232     @Override
toString()233     public String toString() {
234         return "{" + defaultLanguage + ", "
235             + localeToMaxLocaleAndWeight + "}";
236     }
237 
238     // ================= Privates =====================
239 
240     /**
241      * Get the best match for an individual language code.
242      *
243      * @param languageCode
244      * @return best matching language code and weight (as per
245      *         {@link #match(ULocale, ULocale)})
246      */
getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight)247     private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) {
248         languageCode = canonicalize(languageCode);
249         final ULocale maximized = addLikelySubtags(languageCode);
250         if (DEBUG) {
251             System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized);
252         }
253         double bestWeight = 0;
254         ULocale bestTableMatch = null;
255         String baseLanguage = maximized.getLanguage();
256         Set<R3<ULocale, ULocale, Double>> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage);
257         if (searchTable != null) { // we preprocessed the table so as to filter by lanugage
258             if (DEBUG) System.out.println("\tSearching: " + searchTable);
259             for (final R3<ULocale, ULocale, Double> tableKeyValue : searchTable) {
260                 ULocale tableKey = tableKeyValue.get0();
261                 ULocale maxLocale = tableKeyValue.get1();
262                 Double matchedWeight = tableKeyValue.get2();
263                 final double match = match(languageCode, maximized, tableKey, maxLocale);
264                 if (DEBUG) {
265                     System.out.println("\t" + tableKeyValue + ";\t" + match + "\n");
266                 }
267                 final double weight = match * matchedWeight;
268                 if (weight > bestWeight) {
269                     bestWeight = weight;
270                     bestTableMatch = tableKey;
271                     if (weight > 0.999d) { // bail on good enough match.
272                         break;
273                     }
274                 }
275             }
276         }
277         if (bestWeight < threshold) {
278             bestTableMatch = defaultLanguage;
279         }
280         if (outputWeight != null) {
281             outputWeight.value = bestWeight; // only return the weight when needed
282         }
283         return bestTableMatch;
284     }
285 
286     public static class OutputDouble { // TODO, move to where OutputInt is
287         double value;
288     }
289 
add(ULocale language, Double weight)290     private void add(ULocale language, Double weight) {
291         language = canonicalize(language);
292         R3<ULocale, ULocale, Double> row = Row.of(language, addLikelySubtags(language), weight);
293         row.freeze();
294         localeToMaxLocaleAndWeight.add(row);
295     }
296 
297     /**
298      * We preprocess the data to get just the possible matches for each desired base language.
299      */
processMapping()300     private void processMapping() {
301         for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) {
302             String desired = desiredToMatchingLanguages.getKey();
303             Set<String> supported = desiredToMatchingLanguages.getValue();
304             for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
305                 final ULocale key = localeToMaxAndWeight.get0();
306                 String lang = key.getLanguage();
307                 if (supported.contains(lang)) {
308                     addFiltered(desired, localeToMaxAndWeight);
309                 }
310             }
311         }
312         // now put in the values directly, since languages always map to themselves
313         for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
314             final ULocale key = localeToMaxAndWeight.get0();
315             String lang = key.getLanguage();
316             addFiltered(lang, localeToMaxAndWeight);
317         }
318     }
319 
addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight)320     private void addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight) {
321         Set<R3<ULocale, ULocale, Double>> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired);
322         if (map == null) {
323             desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<>());
324         }
325         map.add(localeToMaxAndWeight);
326         if (DEBUG) {
327             System.out.println(desired + ", " + localeToMaxAndWeight);
328         }
329     }
330 
331     Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<>();
332     Map<String, Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData = new LinkedHashMap<>();
333 
334     // =============== Special Mapping Information ==============
335 
336     /**
337      * We need to add another method to addLikelySubtags that doesn't return
338      * null, but instead substitutes Zzzz and ZZ if unknown. There are also
339      * a few cases where addLikelySubtags needs to have expanded data, to handle
340      * all deprecated codes.
341      * @param languageCode
342      * @return "fixed" addLikelySubtags
343      */
addLikelySubtags(ULocale languageCode)344     private ULocale addLikelySubtags(ULocale languageCode) {
345         // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined
346         // language would normally match English.  But that would produce the counterintuitive results
347         // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and
348         // getBestMatch("en", LocaleMatcher("it,und")) would be "und".
349         //
350         // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults)
351         // so that max("und")="und". That produces the following, more desirable results:
352         if (languageCode.equals(UNKNOWN_LOCALE)) {
353             return UNKNOWN_LOCALE;
354         }
355         final ULocale result = ULocale.addLikelySubtags(languageCode);
356         // should have method on getLikelySubtags for this
357         if (result == null || result.equals(languageCode)) {
358             final String language = languageCode.getLanguage();
359             final String script = languageCode.getScript();
360             final String region = languageCode.getCountry();
361             return new ULocale((language.length() == 0 ? "und"
362                 : language)
363                 + "_"
364                 + (script.length() == 0 ? "Zzzz" : script)
365                 + "_"
366                 + (region.length() == 0 ? "ZZ" : region));
367         }
368         return result;
369     }
370 
371     private static class LocalePatternMatcher {
372         // a value of null means a wildcard; matches any.
373         private String lang;
374         private String script;
375         private String region;
376         private Level level;
377         static Pattern pattern = Pattern.compile(
378             "([a-z]{1,8}|\\*)"
379                 + "(?:[_-]([A-Z][a-z]{3}|\\*))?"
380                 + "(?:[_-]([$]!?[a-zA-Z]+|[A-Z]{2}|[0-9]{3}|\\*))?");
381 
LocalePatternMatcher(String toMatch)382         public LocalePatternMatcher(String toMatch) {
383             Matcher matcher = pattern.matcher(toMatch);
384             if (!matcher.matches()) {
385                 throw new IllegalArgumentException("Bad pattern: " + toMatch);
386             }
387             lang = matcher.group(1);
388             script = matcher.group(2);
389             region = matcher.group(3);
390             level = region != null ? Level.region : script != null ? Level.script : Level.language;
391 
392             if (lang.equals("*")) {
393                 lang = null;
394             }
395             if (script != null && script.equals("*")) {
396                 script = null;
397             }
398             if (region != null && region.equals("*")) {
399                 region = null;
400             }
401         }
402 
matches(ULocale ulocale)403         boolean matches(ULocale ulocale) {
404             if (lang != null && !lang.equals(ulocale.getLanguage())) {
405                 return false;
406             }
407             if (script != null && !script.equals(ulocale.getScript())) {
408                 return false;
409             }
410             if (region != null && !region.equals(ulocale.getCountry())) {
411                 return false;
412             }
413             return true;
414         }
415 
getLevel()416         public Level getLevel() {
417             return level;
418         }
419 
getLanguage()420         public String getLanguage() {
421             return (lang == null ? "*" : lang);
422         }
423 
getScript()424         public String getScript() {
425             return (script == null ? "*" : script);
426         }
427 
getRegion()428         public String getRegion() {
429             return (region == null ? "*" : region);
430         }
431 
432         @Override
toString()433         public String toString() {
434             String result = getLanguage();
435             if (level != Level.language) {
436                 result += "-" + getScript();
437                 if (level != Level.script) {
438                     result += "-" + getRegion();
439                 }
440             }
441             return result;
442         }
443 
444         /* (non-Javadoc)
445          * @see java.lang.Object#equals(java.lang.Object)
446          */
447         @Override
equals(Object obj)448         public boolean equals(Object obj) {
449             LocalePatternMatcher other = (LocalePatternMatcher) obj;
450             return Objects.equals(level, other.level)
451                 && Objects.equals(lang, other.lang)
452                 && Objects.equals(script, other.script)
453                 && Objects.equals(region, other.region);
454         }
455 
456         /* (non-Javadoc)
457          * @see java.lang.Object#hashCode()
458          */
459         @Override
hashCode()460         public int hashCode() {
461             return level.ordinal()
462                 ^ (lang == null ? 0 : lang.hashCode())
463                 ^ (script == null ? 0 : script.hashCode())
464                 ^ (region == null ? 0 : region.hashCode());
465         }
466     }
467 
468     enum Level {
469         language(0.99),
470         script(0.2),
471         region(0.04);
472 
473         final double worst;
474 
Level(double d)475         Level(double d) {
476             worst = d;
477         }
478     }
479 
480     private static class ScoreData implements Freezable<ScoreData> {
481         @SuppressWarnings("unused")
482         private static final double maxUnequal_changeD_sameS = 0.5;
483 
484         @SuppressWarnings("unused")
485         private static final double maxUnequal_changeEqual = 0.75;
486 
487         LinkedHashSet<Row.R3<LocalePatternMatcher, LocalePatternMatcher, Double>> scores = new LinkedHashSet<>();
488         final Level level;
489 
ScoreData(Level level)490         public ScoreData(Level level) {
491             this.level = level;
492         }
493 
addDataToScores(String desired, String supported, R3<LocalePatternMatcher, LocalePatternMatcher, Double> data)494         void addDataToScores(String desired, String supported, R3<LocalePatternMatcher, LocalePatternMatcher, Double> data) {
495             //            Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desired);
496             //            if (lang_result == null) {
497             //                scores.put(desired, lang_result = new HashMap());
498             //            }
499             //            Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>> result = lang_result.get(supported);
500             //            if (result == null) {
501             //                lang_result.put(supported, result = new LinkedHashSet());
502             //            }
503             //            result.add(data);
504             boolean added = scores.add(data);
505             if (!added) {
506                 throw new ICUException("trying to add duplicate data: " + data);
507             }
508         }
509 
getScore(ULocale dMax, String desiredRaw, String desiredMax, ULocale sMax, String supportedRaw, String supportedMax)510         double getScore(ULocale dMax, String desiredRaw, String desiredMax,
511             ULocale sMax, String supportedRaw, String supportedMax) {
512             double distance = 0;
513             if (!desiredMax.equals(supportedMax)) {
514                 distance = getRawScore(dMax, sMax);
515             } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal
516                 distance += 0.001;
517             }
518             return distance;
519         }
520 
getRawScore(ULocale desiredLocale, ULocale supportedLocale)521         private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) {
522             if (DEBUG) {
523                 System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale);
524             }
525             for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> datum : scores) { // : result
526                 if (datum.get0().matches(desiredLocale)
527                     && datum.get1().matches(supportedLocale)) {
528                     if (DEBUG) {
529                         System.out.println("\t\t\t\tFOUND\t" + datum);
530                     }
531                     return datum.get2();
532                 }
533             }
534             if (DEBUG) {
535                 System.out.println("\t\t\t\tNOTFOUND\t" + level.worst);
536             }
537             return level.worst;
538         }
539 
540         @Override
toString()541         public String toString() {
542             StringBuilder result = new StringBuilder().append(level);
543             for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) {
544                 result.append("\n\t\t").append(score);
545             }
546             return result.toString();
547         }
548 
549         @Override
550         @SuppressWarnings("unchecked")
cloneAsThawed()551         public ScoreData cloneAsThawed() {
552             try {
553                 ScoreData result = (ScoreData) clone();
554                 result.scores = (LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>) result.scores.clone();
555                 result.frozen = false;
556                 return result;
557             } catch (CloneNotSupportedException e) {
558                 throw new ICUCloneNotSupportedException(e); // will never happen
559             }
560 
561         }
562 
563         private volatile boolean frozen = false;
564 
565         @Override
freeze()566         public ScoreData freeze() {
567             return this;
568         }
569 
570         @Override
isFrozen()571         public boolean isFrozen() {
572             return frozen;
573         }
574 
getMatchingLanguages()575         public Relation<String, String> getMatchingLanguages() {
576             Relation<String, String> desiredToSupported = Relation.of(new LinkedHashMap<String, Set<String>>(), HashSet.class);
577             for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> item : scores) {
578                 LocalePatternMatcher desired = item.get0();
579                 LocalePatternMatcher supported = item.get1();
580                 if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance
581                     desiredToSupported.put(desired.lang, supported.lang);
582                 }
583             }
584             desiredToSupported.freeze();
585             return desiredToSupported;
586         }
587     }
588 
589     /**
590      * Only for testing and use by tools. Interface may change!!
591      * @internal
592      * @deprecated This API is ICU internal only.
593      */
594     @Deprecated
595     public static class LanguageMatcherData implements Freezable<LanguageMatcherData> {
596         private ScoreData languageScores = new ScoreData(Level.language);
597         private ScoreData scriptScores = new ScoreData(Level.script);
598         private ScoreData regionScores = new ScoreData(Level.region);
599         private Relation<String, String> matchingLanguages;
600         private volatile boolean frozen = false;
601 
602         /**
603          * @internal
604          * @deprecated This API is ICU internal only.
605          */
606         @Deprecated
LanguageMatcherData()607         public LanguageMatcherData() {
608         }
609 
610         /**
611          * @internal
612          * @deprecated This API is ICU internal only.
613          */
614         @Deprecated
matchingLanguages()615         public Relation<String, String> matchingLanguages() {
616             return matchingLanguages;
617         }
618 
619         /**
620          * @internal
621          * @deprecated This API is ICU internal only.
622          */
623         @Override
624         @Deprecated
toString()625         public String toString() {
626             return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores;
627         }
628 
629         /**
630          * @internal
631          * @deprecated This API is ICU internal only.
632          */
633         @Deprecated
match(ULocale a, ULocale aMax, ULocale b, ULocale bMax)634         public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) {
635             double diff = 0;
636             diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage());
637             if (diff > 0.999d) { // with no language match, we bail
638                 return 0.0d;
639             }
640             diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript());
641             diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry());
642 
643             if (!a.getVariant().equals(b.getVariant())) {
644                 diff += 0.01;
645             }
646             if (diff < 0.0d) {
647                 diff = 0.0d;
648             } else if (diff > 1.0d) {
649                 diff = 1.0d;
650             }
651             if (DEBUG) {
652                 System.out.println("\t\t\tTotal Distance\t" + diff);
653             }
654             return 1.0 - diff;
655         }
656 
657         /**
658          * Add an exceptional distance between languages, typically because regional
659          * dialects were given their own language codes. At this point the code is
660          * symmetric. We don't bother producing an equivalence class because there are
661          * so few cases; this function depends on the other permutations being
662          * added specifically.
663          * @internal
664          * @deprecated This API is ICU internal only.
665          */
666         @SuppressWarnings("unused")
667         @Deprecated
addDistance(String desired, String supported, int percent)668         private LanguageMatcherData addDistance(String desired, String supported, int percent) {
669             return addDistance(desired, supported, percent, false, null);
670         }
671 
672         /**
673          * @internal
674          * @deprecated This API is ICU internal only.
675          */
676         @Deprecated
addDistance(String desired, String supported, int percent, String comment)677         public LanguageMatcherData addDistance(String desired, String supported, int percent, String comment) {
678             return addDistance(desired, supported, percent, false, comment);
679         }
680 
681         /**
682          * @internal
683          * @deprecated This API is ICU internal only.
684          */
685         @Deprecated
addDistance(String desired, String supported, int percent, boolean oneway)686         public LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway) {
687             return addDistance(desired, supported, percent, oneway, null);
688         }
689 
addDistance(String desired, String supported, int percent, boolean oneway, String comment)690         private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) {
691             if (DEBUG) {
692                 System.out.println("\t<languageMatch desired=\"" + desired + "\"" +
693                     " supported=\"" + supported + "\"" +
694                     " percent=\"" + percent + "\""
695                     + (oneway ? " oneway=\"true\"" : "")
696                     + "/>"
697                     + (comment == null ? "" : "\t<!-- " + comment + " -->"));
698                 //                    //     .addDistance("nn", "nb", 4, true)
699                 //                        System.out.println(".addDistance(\"" + desired + "\"" +
700                 //                                ", \"" + supported + "\"" +
701                 //                                ", " + percent + ""
702                 //                                + (oneway ? "" : ", true")
703                 //                                + (comment == null ? "" : ", \"" + comment + "\"")
704                 //                                + ")"
705                 //                        );
706 
707             }
708             double score = 1 - percent / 100.0; // convert from percentage
709             LocalePatternMatcher desiredMatcher = new LocalePatternMatcher(desired);
710             Level desiredLen = desiredMatcher.getLevel();
711             LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported);
712             Level supportedLen = supportedMatcher.getLevel();
713             if (desiredLen != supportedLen) {
714                 throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported);
715             }
716             R3<LocalePatternMatcher, LocalePatternMatcher, Double> data = Row.of(desiredMatcher, supportedMatcher, score);
717             R3<LocalePatternMatcher, LocalePatternMatcher, Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score);
718             boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher);
719             switch (desiredLen) {
720             case language:
721                 String dlanguage = desiredMatcher.getLanguage();
722                 String slanguage = supportedMatcher.getLanguage();
723                 languageScores.addDataToScores(dlanguage, slanguage, data);
724                 if (!oneway && !desiredEqualsSupported) {
725                     languageScores.addDataToScores(slanguage, dlanguage, data2);
726                 }
727                 break;
728             case script:
729                 String dscript = desiredMatcher.getScript();
730                 String sscript = supportedMatcher.getScript();
731                 scriptScores.addDataToScores(dscript, sscript, data);
732                 if (!oneway && !desiredEqualsSupported) {
733                     scriptScores.addDataToScores(sscript, dscript, data2);
734                 }
735                 break;
736             case region:
737                 String dregion = desiredMatcher.getRegion();
738                 String sregion = supportedMatcher.getRegion();
739                 regionScores.addDataToScores(dregion, sregion, data);
740                 if (!oneway && !desiredEqualsSupported) {
741                     regionScores.addDataToScores(sregion, dregion, data2);
742                 }
743                 break;
744             }
745             return this;
746         }
747 
748         /**
749          * {@inheritDoc}
750          * @internal
751          * @deprecated This API is ICU internal only.
752          */
753         @Override
754         @Deprecated
cloneAsThawed()755         public LanguageMatcherData cloneAsThawed() {
756             LanguageMatcherData result;
757             try {
758                 result = (LanguageMatcherData) clone();
759                 result.languageScores = languageScores.cloneAsThawed();
760                 result.scriptScores = scriptScores.cloneAsThawed();
761                 result.regionScores = regionScores.cloneAsThawed();
762                 result.frozen = false;
763                 return result;
764             } catch (CloneNotSupportedException e) {
765                 throw new ICUCloneNotSupportedException(e); // will never happen
766             }
767         }
768 
769         /**
770          * {@inheritDoc}
771          * @internal
772          * @deprecated This API is ICU internal only.
773          */
774         @Override
775         @Deprecated
freeze()776         public LanguageMatcherData freeze() {
777             languageScores.freeze();
778             regionScores.freeze();
779             scriptScores.freeze();
780             matchingLanguages = languageScores.getMatchingLanguages();
781             frozen = true;
782             return this;
783         }
784 
785         /**
786          * {@inheritDoc}
787          * @internal
788          * @deprecated This API is ICU internal only.
789          */
790         @Override
791         @Deprecated
isFrozen()792         public boolean isFrozen() {
793             return frozen;
794         }
795     }
796 
797     LanguageMatcherData matcherData;
798 
799     private static final LanguageMatcherData defaultWritten;
800 
801     private static HashMap<String, String> canonicalMap = new HashMap<>();
802 
803     static {
804         canonicalMap.put("iw", "he");
805         canonicalMap.put("mo", "ro");
806         canonicalMap.put("tl", "fil");
807 
808         ICUResourceBundle suppData = getICUSupplementalData();
809         ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching");
810         ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written");
811         defaultWritten = new LanguageMatcherData();
812 
813         for (UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
814             ICUResourceBundle item = (ICUResourceBundle) iter.next();
815             /*
816             "*_*_*",
817             "*_*_*",
818             "96",
819              */
820             // <languageMatch desired="gsw" supported="de" percent="96" oneway="true" />
821             boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
822             defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway);
823         }
defaultWritten.freeze()824         defaultWritten.freeze();
825     }
826 
827     /**
828      * @internal
829      * @deprecated This API is ICU internal only.
830      */
831     @Deprecated
getICUSupplementalData()832     public static ICUResourceBundle getICUSupplementalData() {
833         ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance(
834             ICUData.ICU_BASE_NAME,
835             "supplementalData",
836             ICUResourceBundle.ICU_DATA_CLASS_LOADER);
837         return suppData;
838     }
839 
840     /**
841      * @internal
842      * @deprecated This API is ICU internal only.
843      */
844     @Deprecated
match(ULocale a, ULocale b)845     public static double match(ULocale a, ULocale b) {
846         final LocaleMatcher matcher = new LocaleMatcher("");
847         return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b));
848     }
849 }
850