• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  ****************************************************************************************
5  * Copyright (C) 2009-2016, Google, Inc.; International Business Machines Corporation
6  * and others. All Rights Reserved.
7  ****************************************************************************************
8  */
9 package com.ibm.icu.util;
10 
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.Iterator;
14 import java.util.LinkedHashMap;
15 import java.util.LinkedHashSet;
16 import java.util.Map;
17 import java.util.Map.Entry;
18 import java.util.Set;
19 import java.util.regex.Matcher;
20 import java.util.regex.Pattern;
21 
22 import com.ibm.icu.impl.ICUData;
23 import com.ibm.icu.impl.ICUResourceBundle;
24 import com.ibm.icu.impl.Relation;
25 import com.ibm.icu.impl.Row;
26 import com.ibm.icu.impl.Row.R3;
27 import com.ibm.icu.impl.Utility;
28 
29 /**
30  * Provides a way to match the languages (locales) supported by a product to the
31  * languages (locales) acceptable to a user, and get the best match. For
32  * example:
33  *
34  * <pre>
35  * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en");
36  *
37  * // afterwards:
38  * matcher.getBestMatch("en-US").toLanguageTag() =&gt; "en"
39  * </pre>
40  *
41  * It takes into account when languages are close to one another, such as fil
42  * and tl, and when language regional variants are close, like en-GB and en-AU.
43  * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test
44  * file.
45  * <p>All classes implementing this interface should be immutable. Often a
46  * product will just need one static instance, built with the languages
47  * that it supports. However, it may want multiple instances with different
48  * default languages based on additional information, such as the domain.
49  *
50  * @author markdavis@google.com
51  * @stable ICU 4.4
52  */
53 public class LocaleMatcher {
54 
55     /**
56      * @internal
57      * @deprecated This API is ICU internal only.
58      */
59     @Deprecated
60     public static final boolean DEBUG = false;
61 
62     private static final ULocale UNKNOWN_LOCALE = new ULocale("und");
63 
64     /**
65      * Threshold for falling back to the default (first) language. May make this
66      * a parameter in the future.
67      */
68     private static final double DEFAULT_THRESHOLD = 0.5;
69 
70     /**
71      * The default language, in case the threshold is not met.
72      */
73     private final ULocale defaultLanguage;
74 
75     /**
76      * The default language, in case the threshold is not met.
77      */
78     private final double threshold;
79 
80     /**
81      * Create a new language matcher. The highest-weighted language is the
82      * default. That means that if no other language is matches closer than a given
83      * threshold, that default language is chosen. Typically the default is English,
84      * but it could be different based on additional information, such as the domain
85      * of the page.
86      *
87      * @param languagePriorityList weighted list
88      * @stable ICU 4.4
89      */
LocaleMatcher(LocalePriorityList languagePriorityList)90     public LocaleMatcher(LocalePriorityList languagePriorityList) {
91         this(languagePriorityList, defaultWritten);
92     }
93 
94     /**
95      * Create a new language matcher from a String form. The highest-weighted
96      * language is the default.
97      *
98      * @param languagePriorityListString String form of LanguagePriorityList
99      * @stable ICU 4.4
100      */
LocaleMatcher(String languagePriorityListString)101     public LocaleMatcher(String languagePriorityListString) {
102         this(LocalePriorityList.add(languagePriorityListString).build());
103     }
104 
105     /**
106      * Internal testing function; may expose API later.
107      * @param languagePriorityList LocalePriorityList to match
108      * @param matcherData Internal matching data
109      * @internal
110      * @deprecated This API is ICU internal only.
111      */
112     @Deprecated
LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData)113     public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) {
114         this(languagePriorityList, matcherData, DEFAULT_THRESHOLD);
115     }
116 
117     /**
118      * Internal testing function; may expose API later.
119      * @param languagePriorityList LocalePriorityList to match
120      * @param matcherData Internal matching data
121      * @internal
122      * @deprecated This API is ICU internal only.
123      */
124     @Deprecated
LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold)125     public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) {
126         this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze();
127         for (final ULocale language : languagePriorityList) {
128             add(language, languagePriorityList.getWeight(language));
129         }
130         processMapping();
131         Iterator<ULocale> it = languagePriorityList.iterator();
132         defaultLanguage = it.hasNext() ? it.next() : null;
133         this.threshold = threshold;
134     }
135 
136 
137     /**
138      * Returns a fraction between 0 and 1, where 1 means that the languages are a
139      * perfect match, and 0 means that they are completely different. Note that
140      * the precise values may change over time; no code should be made dependent
141      * on the values remaining constant.
142      * @param desired Desired locale
143      * @param desiredMax Maximized locale (using likely subtags)
144      * @param supported Supported locale
145      * @param supportedMax Maximized locale (using likely subtags)
146      * @return value between 0 and 1, inclusive.
147      * @stable ICU 4.4
148      */
match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax)149     public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
150         return matcherData.match(desired, desiredMax, supported, supportedMax);
151     }
152 
153 
154     /**
155      * Canonicalize a locale (language). Note that for now, it is canonicalizing
156      * according to CLDR conventions (he vs iw, etc), since that is what is needed
157      * for likelySubtags.
158      * @param ulocale language/locale code
159      * @return ULocale with remapped subtags.
160      * @stable ICU 4.4
161      */
canonicalize(ULocale ulocale)162     public ULocale canonicalize(ULocale ulocale) {
163         // TODO Get the data from CLDR, use Java conventions.
164         String lang = ulocale.getLanguage();
165         String lang2 = canonicalMap.get(lang);
166         String script = ulocale.getScript();
167         String script2 = canonicalMap.get(script);
168         String region = ulocale.getCountry();
169         String region2 = canonicalMap.get(region);
170         if (lang2 != null || script2 != null || region2 != null) {
171             return new ULocale(
172                 lang2 == null ? lang : lang2,
173                     script2 == null ? script : script2,
174                         region2 == null ? region : region2
175                 );
176         }
177         return ulocale;
178     }
179 
180     /**
181      * Get the best match for a LanguagePriorityList
182      *
183      * @param languageList list to match
184      * @return best matching language code
185      * @stable ICU 4.4
186      */
getBestMatch(LocalePriorityList languageList)187     public ULocale getBestMatch(LocalePriorityList languageList) {
188         double bestWeight = 0;
189         ULocale bestTableMatch = null;
190         double penalty = 0;
191         OutputDouble matchWeight = new OutputDouble();
192         for (final ULocale language : languageList) {
193             final ULocale matchLocale = getBestMatchInternal(language, matchWeight);
194             final double weight = matchWeight.value * languageList.getWeight(language) - penalty;
195             if (weight > bestWeight) {
196                 bestWeight = weight;
197                 bestTableMatch = matchLocale;
198             }
199             penalty += 0.07000001;
200         }
201         if (bestWeight < threshold) {
202             bestTableMatch = defaultLanguage;
203         }
204         return bestTableMatch;
205     }
206 
207     /**
208      * Convenience method: Get the best match for a LanguagePriorityList
209      *
210      * @param languageList String form of language priority list
211      * @return best matching language code
212      * @stable ICU 4.4
213      */
getBestMatch(String languageList)214     public ULocale getBestMatch(String languageList) {
215         return getBestMatch(LocalePriorityList.add(languageList).build());
216     }
217 
218     /**
219      * Get the best match for an individual language code.
220      *
221      * @param ulocale locale/language code to match
222      * @return best matching language code
223      * @stable ICU 4.4
224      */
getBestMatch(ULocale ulocale)225     public ULocale getBestMatch(ULocale ulocale) {
226         return getBestMatchInternal(ulocale, null);
227     }
228 
229     /**
230      * @internal
231      * @deprecated This API is ICU internal only.
232      */
233     @Deprecated
getBestMatch(ULocale... ulocales)234     public ULocale getBestMatch(ULocale... ulocales) {
235         return getBestMatch(LocalePriorityList.add(ulocales).build());
236     }
237 
238     /**
239      * {@inheritDoc}
240      * @stable ICU 4.4
241      */
242     @Override
toString()243     public String toString() {
244         return "{" + defaultLanguage + ", "
245             + localeToMaxLocaleAndWeight + "}";
246     }
247     // ================= Privates =====================
248 
249     /**
250      * Get the best match for an individual language code.
251      *
252      * @param languageCode
253      * @return best matching language code and weight (as per
254      *         {@link #match(ULocale, ULocale)})
255      */
getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight)256     private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) {
257         languageCode = canonicalize(languageCode);
258         final ULocale maximized = addLikelySubtags(languageCode);
259         if (DEBUG) {
260             System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized);
261         }
262         double bestWeight = 0;
263         ULocale bestTableMatch = null;
264         String baseLanguage = maximized.getLanguage();
265         Set<R3<ULocale, ULocale, Double>> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage);
266         if (searchTable != null) { // we preprocessed the table so as to filter by lanugage
267             if (DEBUG) System.out.println("\tSearching: " + searchTable);
268             for (final R3<ULocale, ULocale, Double> tableKeyValue : searchTable) {
269                 ULocale tableKey = tableKeyValue.get0();
270                 ULocale maxLocale = tableKeyValue.get1();
271                 Double matchedWeight = tableKeyValue.get2();
272                 final double match = match(languageCode, maximized, tableKey, maxLocale);
273                 if (DEBUG) {
274                     System.out.println("\t" + tableKeyValue + ";\t" + match + "\n");
275                 }
276                 final double weight = match * matchedWeight;
277                 if (weight > bestWeight) {
278                     bestWeight = weight;
279                     bestTableMatch = tableKey;
280                     if (weight > 0.999d) { // bail on good enough match.
281                         break;
282                     }
283                 }
284             }
285         }
286         if (bestWeight < threshold) {
287             bestTableMatch = defaultLanguage;
288         }
289         if (outputWeight != null) {
290             outputWeight.value = bestWeight; // only return the weight when needed
291         }
292         return bestTableMatch;
293     }
294 
295     /**
296      * @internal
297      * @deprecated This API is ICU internal only.
298      */
299     @Deprecated
300     private static class OutputDouble { // TODO, move to where OutputInt is
301         double value;
302     }
303 
add(ULocale language, Double weight)304     private void add(ULocale language, Double weight) {
305         language = canonicalize(language);
306         R3<ULocale, ULocale, Double> row = Row.of(language, addLikelySubtags(language), weight);
307         row.freeze();
308         localeToMaxLocaleAndWeight.add(row);
309     }
310 
311     /**
312      * We preprocess the data to get just the possible matches for each desired base language.
313      */
processMapping()314     private void processMapping() {
315         for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) {
316             String desired = desiredToMatchingLanguages.getKey();
317             Set<String> supported = desiredToMatchingLanguages.getValue();
318             for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
319                 final ULocale key = localeToMaxAndWeight.get0();
320                 String lang = key.getLanguage();
321                 if (supported.contains(lang)) {
322                     addFiltered(desired, localeToMaxAndWeight);
323                 }
324             }
325         }
326         // now put in the values directly, since languages always map to themselves
327         for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
328             final ULocale key = localeToMaxAndWeight.get0();
329             String lang = key.getLanguage();
330             addFiltered(lang, localeToMaxAndWeight);
331         }
332     }
333 
addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight)334     private void addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight) {
335         Set<R3<ULocale, ULocale, Double>> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired);
336         if (map == null) {
337             desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<R3<ULocale, ULocale, Double>>());
338         }
339         map.add(localeToMaxAndWeight);
340         if (DEBUG) {
341             System.out.println(desired + ", " + localeToMaxAndWeight);
342         }
343     }
344 
345     Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<Row.R3<ULocale, ULocale, Double>>();
346     Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData
347     = new LinkedHashMap<String,Set<Row.R3<ULocale, ULocale, Double>>>();
348 
349     // =============== Special Mapping Information ==============
350 
351     /**
352      * We need to add another method to addLikelySubtags that doesn't return
353      * null, but instead substitutes Zzzz and ZZ if unknown. There are also
354      * a few cases where addLikelySubtags needs to have expanded data, to handle
355      * all deprecated codes.
356      * @param languageCode
357      * @return "fixed" addLikelySubtags
358      */
addLikelySubtags(ULocale languageCode)359     private ULocale addLikelySubtags(ULocale languageCode) {
360         // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined
361         // language would normally match English.  But that would produce the counterintuitive results
362         // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and
363         // getBestMatch("en", LocaleMatcher("it,und")) would be "und".
364         //
365         // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults)
366         // so that max("und")="und". That produces the following, more desirable results:
367         if (languageCode.equals(UNKNOWN_LOCALE)) {
368             return UNKNOWN_LOCALE;
369         }
370         final ULocale result = ULocale.addLikelySubtags(languageCode);
371         // should have method on getLikelySubtags for this
372         if (result == null || result.equals(languageCode)) {
373             final String language = languageCode.getLanguage();
374             final String script = languageCode.getScript();
375             final String region = languageCode.getCountry();
376             return new ULocale((language.length()==0 ? "und"
377                 : language)
378                 + "_"
379                 + (script.length()==0 ? "Zzzz" : script)
380                 + "_"
381                 + (region.length()==0 ? "ZZ" : region));
382         }
383         return result;
384     }
385 
386     private static class LocalePatternMatcher {
387         // a value of null means a wildcard; matches any.
388         private String lang;
389         private String script;
390         private String region;
391         private Level level;
392         static Pattern pattern = Pattern.compile(
393             "([a-z]{1,8}|\\*)"
394                 + "(?:[_-]([A-Z][a-z]{3}|\\*))?"
395                 + "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?");
396 
LocalePatternMatcher(String toMatch)397         public LocalePatternMatcher(String toMatch) {
398             Matcher matcher = pattern.matcher(toMatch);
399             if (!matcher.matches()) {
400                 throw new IllegalArgumentException("Bad pattern: " + toMatch);
401             }
402             lang = matcher.group(1);
403             script = matcher.group(2);
404             region = matcher.group(3);
405             level = region != null ? Level.region : script != null ? Level.script : Level.language;
406 
407             if (lang.equals("*")) {
408                 lang = null;
409             }
410             if (script != null && script.equals("*")) {
411                 script = null;
412             }
413             if (region != null && region.equals("*")) {
414                 region = null;
415             }
416         }
417 
matches(ULocale ulocale)418         boolean matches(ULocale ulocale) {
419             if (lang != null && !lang.equals(ulocale.getLanguage())) {
420                 return false;
421             }
422             if (script != null && !script.equals(ulocale.getScript())) {
423                 return false;
424             }
425             if (region != null && !region.equals(ulocale.getCountry())) {
426                 return false;
427             }
428             return true;
429         }
430 
getLevel()431         public Level getLevel() {
432             return level;
433         }
434 
getLanguage()435         public String getLanguage() {
436             return (lang == null ? "*" : lang);
437         }
438 
getScript()439         public String getScript() {
440             return (script == null ? "*" : script);
441         }
442 
getRegion()443         public String getRegion() {
444             return (region == null ? "*" : region);
445         }
446 
toString()447         public String toString() {
448             String result = getLanguage();
449             if (level != Level.language) {
450                 result += "-" + getScript();
451                 if (level != Level.script) {
452                     result += "-" + getRegion();
453                 }
454             }
455             return result;
456         }
457 
458         /* (non-Javadoc)
459          * @see java.lang.Object#equals(java.lang.Object)
460          */
461         @Override
equals(Object obj)462         public boolean equals(Object obj) {
463             if (obj == this) {
464                 return true;
465             }
466             if (obj == null || !(obj instanceof LocalePatternMatcher)) {
467                 return false;
468             }
469             LocalePatternMatcher other = (LocalePatternMatcher) obj;
470             return Utility.objectEquals(level, other.level)
471                 && Utility.objectEquals(lang, other.lang)
472                 && Utility.objectEquals(script, other.script)
473                 && Utility.objectEquals(region, other.region);
474         }
475 
476         /* (non-Javadoc)
477          * @see java.lang.Object#hashCode()
478          */
479         @Override
hashCode()480         public int hashCode() {
481             return level.ordinal()
482                 ^ (lang == null ? 0 : lang.hashCode())
483                 ^ (script == null ? 0 : script.hashCode())
484                 ^ (region == null ? 0 : region.hashCode());
485         }
486     }
487 
488     enum Level {
489         language(0.99),
490         script(0.2),
491         region(0.04);
492 
493         final double worst;
494 
Level(double d)495         Level(double d) {
496             worst = d;
497         }
498     }
499 
500     private static class ScoreData implements Freezable<ScoreData> {
501         @SuppressWarnings("unused")
502         private static final double maxUnequal_changeD_sameS = 0.5;
503 
504         @SuppressWarnings("unused")
505         private static final double maxUnequal_changeEqual = 0.75;
506 
507         LinkedHashSet<Row.R3<LocalePatternMatcher,LocalePatternMatcher,Double>> scores = new LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>();
508         final Level level;
509 
ScoreData(Level level)510         public ScoreData(Level level) {
511             this.level = level;
512         }
513 
addDataToScores(String desired, String supported, R3<LocalePatternMatcher,LocalePatternMatcher,Double> data)514         void addDataToScores(String desired, String supported, R3<LocalePatternMatcher,LocalePatternMatcher,Double> data) {
515             //            Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desired);
516             //            if (lang_result == null) {
517             //                scores.put(desired, lang_result = new HashMap());
518             //            }
519             //            Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>> result = lang_result.get(supported);
520             //            if (result == null) {
521             //                lang_result.put(supported, result = new LinkedHashSet());
522             //            }
523             //            result.add(data);
524             boolean added = scores.add(data);
525             if (!added) {
526                 throw new ICUException("trying to add duplicate data: " +  data);
527             }
528         }
529 
getScore(ULocale dMax, String desiredRaw, String desiredMax, ULocale sMax, String supportedRaw, String supportedMax)530         double getScore(ULocale dMax, String desiredRaw, String desiredMax,
531             ULocale sMax, String supportedRaw, String supportedMax) {
532             double distance = 0;
533             if (!desiredMax.equals(supportedMax)) {
534                 distance = getRawScore(dMax, sMax);
535             } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal
536                 distance += 0.001;
537             }
538             return distance;
539         }
540 
getRawScore(ULocale desiredLocale, ULocale supportedLocale)541         private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) {
542             if (DEBUG) {
543                 System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale);
544             }
545             for (R3<LocalePatternMatcher,LocalePatternMatcher,Double> datum : scores) { // : result
546                 if (datum.get0().matches(desiredLocale)
547                     && datum.get1().matches(supportedLocale)) {
548                     if (DEBUG) {
549                         System.out.println("\t\t\t\tFOUND\t" + datum);
550                     }
551                     return datum.get2();
552                 }
553             }
554             if (DEBUG) {
555                 System.out.println("\t\t\t\tNOTFOUND\t" + level.worst);
556             }
557             return level.worst;
558         }
559 
toString()560         public String toString() {
561             StringBuilder result = new StringBuilder().append(level);
562             for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) {
563                 result.append("\n\t\t").append(score);
564             }
565             return result.toString();
566         }
567 
568 
569         @SuppressWarnings("unchecked")
cloneAsThawed()570         public ScoreData cloneAsThawed() {
571             try {
572                 ScoreData result = (ScoreData) clone();
573                 result.scores = (LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>) result.scores.clone();
574                 result.frozen = false;
575                 return result;
576             } catch (CloneNotSupportedException e) {
577                 throw new ICUCloneNotSupportedException(e); // will never happen
578             }
579 
580         }
581 
582         private volatile boolean frozen = false;
583 
freeze()584         public ScoreData freeze() {
585             return this;
586         }
587 
isFrozen()588         public boolean isFrozen() {
589             return frozen;
590         }
591 
getMatchingLanguages()592         public Relation<String,String> getMatchingLanguages() {
593             Relation<String,String> desiredToSupported = Relation.of(new LinkedHashMap<String,Set<String>>(), HashSet.class);
594             for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> item : scores) {
595                 LocalePatternMatcher desired = item.get0();
596                 LocalePatternMatcher supported = item.get1();
597                 if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance
598                     desiredToSupported.put(desired.lang, supported.lang);
599                 }
600             }
601             desiredToSupported.freeze();
602             return desiredToSupported;
603         }
604     }
605 
606     /**
607      * Only for testing and use by tools. Interface may change!!
608      * @internal
609      * @deprecated This API is ICU internal only.
610      */
611     @Deprecated
612     public static class LanguageMatcherData implements Freezable<LanguageMatcherData> {
613         private ScoreData languageScores = new ScoreData(Level.language);
614         private ScoreData scriptScores = new ScoreData(Level.script);
615         private ScoreData regionScores = new ScoreData(Level.region);
616         private Relation<String, String> matchingLanguages;
617         private volatile boolean frozen = false;
618 
619 
620         /**
621          * @internal
622          * @deprecated This API is ICU internal only.
623          */
624         @Deprecated
LanguageMatcherData()625         public LanguageMatcherData() {
626         }
627 
628         /**
629          * @internal
630          * @deprecated This API is ICU internal only.
631          */
632         @Deprecated
matchingLanguages()633         public Relation<String, String> matchingLanguages() {
634             return matchingLanguages;
635         }
636 
637         /**
638          * @internal
639          * @deprecated This API is ICU internal only.
640          */
641         @Deprecated
toString()642         public String toString() {
643             return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores;
644         }
645 
646         /**
647          * @internal
648          * @deprecated This API is ICU internal only.
649          */
650         @Deprecated
match(ULocale a, ULocale aMax, ULocale b, ULocale bMax)651         public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) {
652             double diff = 0;
653             diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage());
654             if (diff > 0.999d) { // with no language match, we bail
655                 return 0.0d;
656             }
657             diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript());
658             diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry());
659 
660             if (!a.getVariant().equals(b.getVariant())) {
661                 diff += 0.01;
662             }
663             if (diff < 0.0d) {
664                 diff = 0.0d;
665             } else if (diff > 1.0d) {
666                 diff = 1.0d;
667             }
668             if (DEBUG) {
669                 System.out.println("\t\t\tTotal Distance\t" + diff);
670             }
671             return 1.0 - diff;
672         }
673 
674         /**
675          * @internal
676          * @deprecated This API is ICU internal only.
677          */
678         @Deprecated
addDistance(String desired, String supported, int percent, String comment)679         public LanguageMatcherData addDistance(String desired, String supported, int percent, String comment) {
680             return addDistance(desired, supported, percent, false, comment);
681         }
682         /**
683          * @internal
684          * @deprecated This API is ICU internal only.
685          */
686         @Deprecated
addDistance(String desired, String supported, int percent, boolean oneway)687         public LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway) {
688             return addDistance(desired, supported, percent, oneway, null);
689         }
690 
addDistance(String desired, String supported, int percent, boolean oneway, String comment)691         private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) {
692             if (DEBUG) {
693                 System.out.println("\t<languageMatch desired=\"" + desired + "\"" +
694                     " supported=\"" + supported + "\"" +
695                     " percent=\"" + percent + "\""
696                     + (oneway ? " oneway=\"true\"" : "")
697                     + "/>"
698                     + (comment == null ? "" : "\t<!-- " + comment + " -->"));
699                 //                    //     .addDistance("nn", "nb", 4, true)
700                 //                        System.out.println(".addDistance(\"" + desired + "\"" +
701                 //                                ", \"" + supported + "\"" +
702                 //                                ", " + percent + ""
703                 //                                + (oneway ? "" : ", true")
704                 //                                + (comment == null ? "" : ", \"" + comment + "\"")
705                 //                                + ")"
706                 //                        );
707 
708             }
709             double score = 1-percent/100.0; // convert from percentage
710             LocalePatternMatcher desiredMatcher = new LocalePatternMatcher(desired);
711             Level desiredLen = desiredMatcher.getLevel();
712             LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported);
713             Level supportedLen = supportedMatcher.getLevel();
714             if (desiredLen != supportedLen) {
715                 throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported);
716             }
717             R3<LocalePatternMatcher,LocalePatternMatcher,Double> data = Row.of(desiredMatcher, supportedMatcher, score);
718             R3<LocalePatternMatcher,LocalePatternMatcher,Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score);
719             boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher);
720             switch (desiredLen) {
721             case language:
722                 String dlanguage = desiredMatcher.getLanguage();
723                 String slanguage = supportedMatcher.getLanguage();
724                 languageScores.addDataToScores(dlanguage, slanguage, data);
725                 if (!oneway && !desiredEqualsSupported) {
726                     languageScores.addDataToScores(slanguage, dlanguage, data2);
727                 }
728                 break;
729             case script:
730                 String dscript = desiredMatcher.getScript();
731                 String sscript = supportedMatcher.getScript();
732                 scriptScores.addDataToScores(dscript, sscript, data);
733                 if (!oneway && !desiredEqualsSupported) {
734                     scriptScores.addDataToScores(sscript, dscript, data2);
735                 }
736                 break;
737             case region:
738                 String dregion = desiredMatcher.getRegion();
739                 String sregion = supportedMatcher.getRegion();
740                 regionScores.addDataToScores(dregion, sregion, data);
741                 if (!oneway && !desiredEqualsSupported) {
742                     regionScores.addDataToScores(sregion, dregion, data2);
743                 }
744                 break;
745             }
746             return this;
747         }
748 
749         /**
750          * {@inheritDoc}
751          * @internal
752          * @deprecated This API is ICU internal only.
753          */
754         @Deprecated
cloneAsThawed()755         public LanguageMatcherData cloneAsThawed() {
756             LanguageMatcherData result;
757             try {
758                 result = (LanguageMatcherData) clone();
759                 result.languageScores = languageScores.cloneAsThawed();
760                 result.scriptScores = scriptScores.cloneAsThawed();
761                 result.regionScores = regionScores.cloneAsThawed();
762                 result.frozen = false;
763                 return result;
764             } catch (CloneNotSupportedException e) {
765                 throw new ICUCloneNotSupportedException(e); // will never happen
766             }
767         }
768 
769         /**
770          * {@inheritDoc}
771          * @internal
772          * @deprecated This API is ICU internal only.
773          */
774         @Deprecated
freeze()775         public LanguageMatcherData freeze() {
776             languageScores.freeze();
777             regionScores.freeze();
778             scriptScores.freeze();
779             matchingLanguages = languageScores.getMatchingLanguages();
780             frozen = true;
781             return this;
782         }
783 
784         /**
785          * {@inheritDoc}
786          * @internal
787          * @deprecated This API is ICU internal only.
788          */
789         @Deprecated
isFrozen()790         public boolean isFrozen() {
791             return frozen;
792         }
793     }
794 
795     LanguageMatcherData matcherData;
796 
797     private static final LanguageMatcherData defaultWritten;
798 
799     private static HashMap<String,String> canonicalMap = new HashMap<String, String>();
800 
801 
802     static {
803         canonicalMap.put("iw", "he");
804         canonicalMap.put("mo", "ro");
805         canonicalMap.put("tl", "fil");
806 
807         ICUResourceBundle suppData = getICUSupplementalData();
808         ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching");
809         ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written");
810         defaultWritten = new LanguageMatcherData();
811 
812         for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
813             ICUResourceBundle item = (ICUResourceBundle) iter.next();
814             /*
815             "*_*_*",
816             "*_*_*",
817             "96",
818              */
819             // <languageMatch desired="gsw" supported="de" percent="96" oneway="true" />
820             boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
821             defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway);
822         }
defaultWritten.freeze()823         defaultWritten.freeze();
824     }
825 
826     /**
827      * @internal
828      * @deprecated This API is ICU internal only.
829      */
830     @Deprecated
getICUSupplementalData()831     public static ICUResourceBundle getICUSupplementalData() {
832         ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance(
833             ICUData.ICU_BASE_NAME,
834             "supplementalData",
835             ICUResourceBundle.ICU_DATA_CLASS_LOADER);
836         return suppData;
837     }
838 
839     /**
840      * @internal
841      * @deprecated This API is ICU internal only.
842      */
843     @Deprecated
match(ULocale a, ULocale b)844     public static double match(ULocale a, ULocale b) {
845         final LocaleMatcher matcher = new LocaleMatcher("");
846         return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b));
847     }
848 }
849