1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 **************************************************************************************** 5 * Copyright (C) 2009-2016, Google, Inc.; International Business Machines Corporation 6 * and others. All Rights Reserved. 7 **************************************************************************************** 8 */ 9 package com.ibm.icu.util; 10 11 import java.util.HashMap; 12 import java.util.HashSet; 13 import java.util.Iterator; 14 import java.util.LinkedHashMap; 15 import java.util.LinkedHashSet; 16 import java.util.Map; 17 import java.util.Map.Entry; 18 import java.util.Set; 19 import java.util.regex.Matcher; 20 import java.util.regex.Pattern; 21 22 import com.ibm.icu.impl.ICUData; 23 import com.ibm.icu.impl.ICUResourceBundle; 24 import com.ibm.icu.impl.Relation; 25 import com.ibm.icu.impl.Row; 26 import com.ibm.icu.impl.Row.R3; 27 import com.ibm.icu.impl.Utility; 28 29 /** 30 * Provides a way to match the languages (locales) supported by a product to the 31 * languages (locales) acceptable to a user, and get the best match. For 32 * example: 33 * 34 * <pre> 35 * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en"); 36 * 37 * // afterwards: 38 * matcher.getBestMatch("en-US").toLanguageTag() => "en" 39 * </pre> 40 * 41 * It takes into account when languages are close to one another, such as fil 42 * and tl, and when language regional variants are close, like en-GB and en-AU. 43 * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test 44 * file. 45 * <p>All classes implementing this interface should be immutable. Often a 46 * product will just need one static instance, built with the languages 47 * that it supports. However, it may want multiple instances with different 48 * default languages based on additional information, such as the domain. 49 * 50 * @author markdavis@google.com 51 * @stable ICU 4.4 52 */ 53 public class LocaleMatcher { 54 55 /** 56 * @internal 57 * @deprecated This API is ICU internal only. 58 */ 59 @Deprecated 60 public static final boolean DEBUG = false; 61 62 private static final ULocale UNKNOWN_LOCALE = new ULocale("und"); 63 64 /** 65 * Threshold for falling back to the default (first) language. May make this 66 * a parameter in the future. 67 */ 68 private static final double DEFAULT_THRESHOLD = 0.5; 69 70 /** 71 * The default language, in case the threshold is not met. 72 */ 73 private final ULocale defaultLanguage; 74 75 /** 76 * The default language, in case the threshold is not met. 77 */ 78 private final double threshold; 79 80 /** 81 * Create a new language matcher. The highest-weighted language is the 82 * default. That means that if no other language is matches closer than a given 83 * threshold, that default language is chosen. Typically the default is English, 84 * but it could be different based on additional information, such as the domain 85 * of the page. 86 * 87 * @param languagePriorityList weighted list 88 * @stable ICU 4.4 89 */ LocaleMatcher(LocalePriorityList languagePriorityList)90 public LocaleMatcher(LocalePriorityList languagePriorityList) { 91 this(languagePriorityList, defaultWritten); 92 } 93 94 /** 95 * Create a new language matcher from a String form. The highest-weighted 96 * language is the default. 97 * 98 * @param languagePriorityListString String form of LanguagePriorityList 99 * @stable ICU 4.4 100 */ LocaleMatcher(String languagePriorityListString)101 public LocaleMatcher(String languagePriorityListString) { 102 this(LocalePriorityList.add(languagePriorityListString).build()); 103 } 104 105 /** 106 * Internal testing function; may expose API later. 107 * @param languagePriorityList LocalePriorityList to match 108 * @param matcherData Internal matching data 109 * @internal 110 * @deprecated This API is ICU internal only. 111 */ 112 @Deprecated LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData)113 public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) { 114 this(languagePriorityList, matcherData, DEFAULT_THRESHOLD); 115 } 116 117 /** 118 * Internal testing function; may expose API later. 119 * @param languagePriorityList LocalePriorityList to match 120 * @param matcherData Internal matching data 121 * @internal 122 * @deprecated This API is ICU internal only. 123 */ 124 @Deprecated LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold)125 public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) { 126 this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze(); 127 for (final ULocale language : languagePriorityList) { 128 add(language, languagePriorityList.getWeight(language)); 129 } 130 processMapping(); 131 Iterator<ULocale> it = languagePriorityList.iterator(); 132 defaultLanguage = it.hasNext() ? it.next() : null; 133 this.threshold = threshold; 134 } 135 136 137 /** 138 * Returns a fraction between 0 and 1, where 1 means that the languages are a 139 * perfect match, and 0 means that they are completely different. Note that 140 * the precise values may change over time; no code should be made dependent 141 * on the values remaining constant. 142 * @param desired Desired locale 143 * @param desiredMax Maximized locale (using likely subtags) 144 * @param supported Supported locale 145 * @param supportedMax Maximized locale (using likely subtags) 146 * @return value between 0 and 1, inclusive. 147 * @stable ICU 4.4 148 */ match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax)149 public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) { 150 return matcherData.match(desired, desiredMax, supported, supportedMax); 151 } 152 153 154 /** 155 * Canonicalize a locale (language). Note that for now, it is canonicalizing 156 * according to CLDR conventions (he vs iw, etc), since that is what is needed 157 * for likelySubtags. 158 * @param ulocale language/locale code 159 * @return ULocale with remapped subtags. 160 * @stable ICU 4.4 161 */ canonicalize(ULocale ulocale)162 public ULocale canonicalize(ULocale ulocale) { 163 // TODO Get the data from CLDR, use Java conventions. 164 String lang = ulocale.getLanguage(); 165 String lang2 = canonicalMap.get(lang); 166 String script = ulocale.getScript(); 167 String script2 = canonicalMap.get(script); 168 String region = ulocale.getCountry(); 169 String region2 = canonicalMap.get(region); 170 if (lang2 != null || script2 != null || region2 != null) { 171 return new ULocale( 172 lang2 == null ? lang : lang2, 173 script2 == null ? script : script2, 174 region2 == null ? region : region2 175 ); 176 } 177 return ulocale; 178 } 179 180 /** 181 * Get the best match for a LanguagePriorityList 182 * 183 * @param languageList list to match 184 * @return best matching language code 185 * @stable ICU 4.4 186 */ getBestMatch(LocalePriorityList languageList)187 public ULocale getBestMatch(LocalePriorityList languageList) { 188 double bestWeight = 0; 189 ULocale bestTableMatch = null; 190 double penalty = 0; 191 OutputDouble matchWeight = new OutputDouble(); 192 for (final ULocale language : languageList) { 193 final ULocale matchLocale = getBestMatchInternal(language, matchWeight); 194 final double weight = matchWeight.value * languageList.getWeight(language) - penalty; 195 if (weight > bestWeight) { 196 bestWeight = weight; 197 bestTableMatch = matchLocale; 198 } 199 penalty += 0.07000001; 200 } 201 if (bestWeight < threshold) { 202 bestTableMatch = defaultLanguage; 203 } 204 return bestTableMatch; 205 } 206 207 /** 208 * Convenience method: Get the best match for a LanguagePriorityList 209 * 210 * @param languageList String form of language priority list 211 * @return best matching language code 212 * @stable ICU 4.4 213 */ getBestMatch(String languageList)214 public ULocale getBestMatch(String languageList) { 215 return getBestMatch(LocalePriorityList.add(languageList).build()); 216 } 217 218 /** 219 * Get the best match for an individual language code. 220 * 221 * @param ulocale locale/language code to match 222 * @return best matching language code 223 * @stable ICU 4.4 224 */ getBestMatch(ULocale ulocale)225 public ULocale getBestMatch(ULocale ulocale) { 226 return getBestMatchInternal(ulocale, null); 227 } 228 229 /** 230 * @internal 231 * @deprecated This API is ICU internal only. 232 */ 233 @Deprecated getBestMatch(ULocale... ulocales)234 public ULocale getBestMatch(ULocale... ulocales) { 235 return getBestMatch(LocalePriorityList.add(ulocales).build()); 236 } 237 238 /** 239 * {@inheritDoc} 240 * @stable ICU 4.4 241 */ 242 @Override toString()243 public String toString() { 244 return "{" + defaultLanguage + ", " 245 + localeToMaxLocaleAndWeight + "}"; 246 } 247 // ================= Privates ===================== 248 249 /** 250 * Get the best match for an individual language code. 251 * 252 * @param languageCode 253 * @return best matching language code and weight (as per 254 * {@link #match(ULocale, ULocale)}) 255 */ getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight)256 private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) { 257 languageCode = canonicalize(languageCode); 258 final ULocale maximized = addLikelySubtags(languageCode); 259 if (DEBUG) { 260 System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized); 261 } 262 double bestWeight = 0; 263 ULocale bestTableMatch = null; 264 String baseLanguage = maximized.getLanguage(); 265 Set<R3<ULocale, ULocale, Double>> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage); 266 if (searchTable != null) { // we preprocessed the table so as to filter by lanugage 267 if (DEBUG) System.out.println("\tSearching: " + searchTable); 268 for (final R3<ULocale, ULocale, Double> tableKeyValue : searchTable) { 269 ULocale tableKey = tableKeyValue.get0(); 270 ULocale maxLocale = tableKeyValue.get1(); 271 Double matchedWeight = tableKeyValue.get2(); 272 final double match = match(languageCode, maximized, tableKey, maxLocale); 273 if (DEBUG) { 274 System.out.println("\t" + tableKeyValue + ";\t" + match + "\n"); 275 } 276 final double weight = match * matchedWeight; 277 if (weight > bestWeight) { 278 bestWeight = weight; 279 bestTableMatch = tableKey; 280 if (weight > 0.999d) { // bail on good enough match. 281 break; 282 } 283 } 284 } 285 } 286 if (bestWeight < threshold) { 287 bestTableMatch = defaultLanguage; 288 } 289 if (outputWeight != null) { 290 outputWeight.value = bestWeight; // only return the weight when needed 291 } 292 return bestTableMatch; 293 } 294 295 /** 296 * @internal 297 * @deprecated This API is ICU internal only. 298 */ 299 @Deprecated 300 private static class OutputDouble { // TODO, move to where OutputInt is 301 double value; 302 } 303 add(ULocale language, Double weight)304 private void add(ULocale language, Double weight) { 305 language = canonicalize(language); 306 R3<ULocale, ULocale, Double> row = Row.of(language, addLikelySubtags(language), weight); 307 row.freeze(); 308 localeToMaxLocaleAndWeight.add(row); 309 } 310 311 /** 312 * We preprocess the data to get just the possible matches for each desired base language. 313 */ processMapping()314 private void processMapping() { 315 for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) { 316 String desired = desiredToMatchingLanguages.getKey(); 317 Set<String> supported = desiredToMatchingLanguages.getValue(); 318 for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) { 319 final ULocale key = localeToMaxAndWeight.get0(); 320 String lang = key.getLanguage(); 321 if (supported.contains(lang)) { 322 addFiltered(desired, localeToMaxAndWeight); 323 } 324 } 325 } 326 // now put in the values directly, since languages always map to themselves 327 for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) { 328 final ULocale key = localeToMaxAndWeight.get0(); 329 String lang = key.getLanguage(); 330 addFiltered(lang, localeToMaxAndWeight); 331 } 332 } 333 addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight)334 private void addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight) { 335 Set<R3<ULocale, ULocale, Double>> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired); 336 if (map == null) { 337 desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<R3<ULocale, ULocale, Double>>()); 338 } 339 map.add(localeToMaxAndWeight); 340 if (DEBUG) { 341 System.out.println(desired + ", " + localeToMaxAndWeight); 342 } 343 } 344 345 Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<Row.R3<ULocale, ULocale, Double>>(); 346 Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData 347 = new LinkedHashMap<String,Set<Row.R3<ULocale, ULocale, Double>>>(); 348 349 // =============== Special Mapping Information ============== 350 351 /** 352 * We need to add another method to addLikelySubtags that doesn't return 353 * null, but instead substitutes Zzzz and ZZ if unknown. There are also 354 * a few cases where addLikelySubtags needs to have expanded data, to handle 355 * all deprecated codes. 356 * @param languageCode 357 * @return "fixed" addLikelySubtags 358 */ addLikelySubtags(ULocale languageCode)359 private ULocale addLikelySubtags(ULocale languageCode) { 360 // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined 361 // language would normally match English. But that would produce the counterintuitive results 362 // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and 363 // getBestMatch("en", LocaleMatcher("it,und")) would be "und". 364 // 365 // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults) 366 // so that max("und")="und". That produces the following, more desirable results: 367 if (languageCode.equals(UNKNOWN_LOCALE)) { 368 return UNKNOWN_LOCALE; 369 } 370 final ULocale result = ULocale.addLikelySubtags(languageCode); 371 // should have method on getLikelySubtags for this 372 if (result == null || result.equals(languageCode)) { 373 final String language = languageCode.getLanguage(); 374 final String script = languageCode.getScript(); 375 final String region = languageCode.getCountry(); 376 return new ULocale((language.length()==0 ? "und" 377 : language) 378 + "_" 379 + (script.length()==0 ? "Zzzz" : script) 380 + "_" 381 + (region.length()==0 ? "ZZ" : region)); 382 } 383 return result; 384 } 385 386 private static class LocalePatternMatcher { 387 // a value of null means a wildcard; matches any. 388 private String lang; 389 private String script; 390 private String region; 391 private Level level; 392 static Pattern pattern = Pattern.compile( 393 "([a-z]{1,8}|\\*)" 394 + "(?:[_-]([A-Z][a-z]{3}|\\*))?" 395 + "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?"); 396 LocalePatternMatcher(String toMatch)397 public LocalePatternMatcher(String toMatch) { 398 Matcher matcher = pattern.matcher(toMatch); 399 if (!matcher.matches()) { 400 throw new IllegalArgumentException("Bad pattern: " + toMatch); 401 } 402 lang = matcher.group(1); 403 script = matcher.group(2); 404 region = matcher.group(3); 405 level = region != null ? Level.region : script != null ? Level.script : Level.language; 406 407 if (lang.equals("*")) { 408 lang = null; 409 } 410 if (script != null && script.equals("*")) { 411 script = null; 412 } 413 if (region != null && region.equals("*")) { 414 region = null; 415 } 416 } 417 matches(ULocale ulocale)418 boolean matches(ULocale ulocale) { 419 if (lang != null && !lang.equals(ulocale.getLanguage())) { 420 return false; 421 } 422 if (script != null && !script.equals(ulocale.getScript())) { 423 return false; 424 } 425 if (region != null && !region.equals(ulocale.getCountry())) { 426 return false; 427 } 428 return true; 429 } 430 getLevel()431 public Level getLevel() { 432 return level; 433 } 434 getLanguage()435 public String getLanguage() { 436 return (lang == null ? "*" : lang); 437 } 438 getScript()439 public String getScript() { 440 return (script == null ? "*" : script); 441 } 442 getRegion()443 public String getRegion() { 444 return (region == null ? "*" : region); 445 } 446 toString()447 public String toString() { 448 String result = getLanguage(); 449 if (level != Level.language) { 450 result += "-" + getScript(); 451 if (level != Level.script) { 452 result += "-" + getRegion(); 453 } 454 } 455 return result; 456 } 457 458 /* (non-Javadoc) 459 * @see java.lang.Object#equals(java.lang.Object) 460 */ 461 @Override equals(Object obj)462 public boolean equals(Object obj) { 463 if (obj == this) { 464 return true; 465 } 466 if (obj == null || !(obj instanceof LocalePatternMatcher)) { 467 return false; 468 } 469 LocalePatternMatcher other = (LocalePatternMatcher) obj; 470 return Utility.objectEquals(level, other.level) 471 && Utility.objectEquals(lang, other.lang) 472 && Utility.objectEquals(script, other.script) 473 && Utility.objectEquals(region, other.region); 474 } 475 476 /* (non-Javadoc) 477 * @see java.lang.Object#hashCode() 478 */ 479 @Override hashCode()480 public int hashCode() { 481 return level.ordinal() 482 ^ (lang == null ? 0 : lang.hashCode()) 483 ^ (script == null ? 0 : script.hashCode()) 484 ^ (region == null ? 0 : region.hashCode()); 485 } 486 } 487 488 enum Level { 489 language(0.99), 490 script(0.2), 491 region(0.04); 492 493 final double worst; 494 Level(double d)495 Level(double d) { 496 worst = d; 497 } 498 } 499 500 private static class ScoreData implements Freezable<ScoreData> { 501 @SuppressWarnings("unused") 502 private static final double maxUnequal_changeD_sameS = 0.5; 503 504 @SuppressWarnings("unused") 505 private static final double maxUnequal_changeEqual = 0.75; 506 507 LinkedHashSet<Row.R3<LocalePatternMatcher,LocalePatternMatcher,Double>> scores = new LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>(); 508 final Level level; 509 ScoreData(Level level)510 public ScoreData(Level level) { 511 this.level = level; 512 } 513 addDataToScores(String desired, String supported, R3<LocalePatternMatcher,LocalePatternMatcher,Double> data)514 void addDataToScores(String desired, String supported, R3<LocalePatternMatcher,LocalePatternMatcher,Double> data) { 515 // Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desired); 516 // if (lang_result == null) { 517 // scores.put(desired, lang_result = new HashMap()); 518 // } 519 // Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>> result = lang_result.get(supported); 520 // if (result == null) { 521 // lang_result.put(supported, result = new LinkedHashSet()); 522 // } 523 // result.add(data); 524 boolean added = scores.add(data); 525 if (!added) { 526 throw new ICUException("trying to add duplicate data: " + data); 527 } 528 } 529 getScore(ULocale dMax, String desiredRaw, String desiredMax, ULocale sMax, String supportedRaw, String supportedMax)530 double getScore(ULocale dMax, String desiredRaw, String desiredMax, 531 ULocale sMax, String supportedRaw, String supportedMax) { 532 double distance = 0; 533 if (!desiredMax.equals(supportedMax)) { 534 distance = getRawScore(dMax, sMax); 535 } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal 536 distance += 0.001; 537 } 538 return distance; 539 } 540 getRawScore(ULocale desiredLocale, ULocale supportedLocale)541 private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) { 542 if (DEBUG) { 543 System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale); 544 } 545 for (R3<LocalePatternMatcher,LocalePatternMatcher,Double> datum : scores) { // : result 546 if (datum.get0().matches(desiredLocale) 547 && datum.get1().matches(supportedLocale)) { 548 if (DEBUG) { 549 System.out.println("\t\t\t\tFOUND\t" + datum); 550 } 551 return datum.get2(); 552 } 553 } 554 if (DEBUG) { 555 System.out.println("\t\t\t\tNOTFOUND\t" + level.worst); 556 } 557 return level.worst; 558 } 559 toString()560 public String toString() { 561 StringBuilder result = new StringBuilder().append(level); 562 for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) { 563 result.append("\n\t\t").append(score); 564 } 565 return result.toString(); 566 } 567 568 569 @SuppressWarnings("unchecked") cloneAsThawed()570 public ScoreData cloneAsThawed() { 571 try { 572 ScoreData result = (ScoreData) clone(); 573 result.scores = (LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>) result.scores.clone(); 574 result.frozen = false; 575 return result; 576 } catch (CloneNotSupportedException e) { 577 throw new ICUCloneNotSupportedException(e); // will never happen 578 } 579 580 } 581 582 private volatile boolean frozen = false; 583 freeze()584 public ScoreData freeze() { 585 return this; 586 } 587 isFrozen()588 public boolean isFrozen() { 589 return frozen; 590 } 591 getMatchingLanguages()592 public Relation<String,String> getMatchingLanguages() { 593 Relation<String,String> desiredToSupported = Relation.of(new LinkedHashMap<String,Set<String>>(), HashSet.class); 594 for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> item : scores) { 595 LocalePatternMatcher desired = item.get0(); 596 LocalePatternMatcher supported = item.get1(); 597 if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance 598 desiredToSupported.put(desired.lang, supported.lang); 599 } 600 } 601 desiredToSupported.freeze(); 602 return desiredToSupported; 603 } 604 } 605 606 /** 607 * Only for testing and use by tools. Interface may change!! 608 * @internal 609 * @deprecated This API is ICU internal only. 610 */ 611 @Deprecated 612 public static class LanguageMatcherData implements Freezable<LanguageMatcherData> { 613 private ScoreData languageScores = new ScoreData(Level.language); 614 private ScoreData scriptScores = new ScoreData(Level.script); 615 private ScoreData regionScores = new ScoreData(Level.region); 616 private Relation<String, String> matchingLanguages; 617 private volatile boolean frozen = false; 618 619 620 /** 621 * @internal 622 * @deprecated This API is ICU internal only. 623 */ 624 @Deprecated LanguageMatcherData()625 public LanguageMatcherData() { 626 } 627 628 /** 629 * @internal 630 * @deprecated This API is ICU internal only. 631 */ 632 @Deprecated matchingLanguages()633 public Relation<String, String> matchingLanguages() { 634 return matchingLanguages; 635 } 636 637 /** 638 * @internal 639 * @deprecated This API is ICU internal only. 640 */ 641 @Deprecated toString()642 public String toString() { 643 return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores; 644 } 645 646 /** 647 * @internal 648 * @deprecated This API is ICU internal only. 649 */ 650 @Deprecated match(ULocale a, ULocale aMax, ULocale b, ULocale bMax)651 public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) { 652 double diff = 0; 653 diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage()); 654 if (diff > 0.999d) { // with no language match, we bail 655 return 0.0d; 656 } 657 diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript()); 658 diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry()); 659 660 if (!a.getVariant().equals(b.getVariant())) { 661 diff += 0.01; 662 } 663 if (diff < 0.0d) { 664 diff = 0.0d; 665 } else if (diff > 1.0d) { 666 diff = 1.0d; 667 } 668 if (DEBUG) { 669 System.out.println("\t\t\tTotal Distance\t" + diff); 670 } 671 return 1.0 - diff; 672 } 673 674 /** 675 * @internal 676 * @deprecated This API is ICU internal only. 677 */ 678 @Deprecated addDistance(String desired, String supported, int percent, String comment)679 public LanguageMatcherData addDistance(String desired, String supported, int percent, String comment) { 680 return addDistance(desired, supported, percent, false, comment); 681 } 682 /** 683 * @internal 684 * @deprecated This API is ICU internal only. 685 */ 686 @Deprecated addDistance(String desired, String supported, int percent, boolean oneway)687 public LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway) { 688 return addDistance(desired, supported, percent, oneway, null); 689 } 690 addDistance(String desired, String supported, int percent, boolean oneway, String comment)691 private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) { 692 if (DEBUG) { 693 System.out.println("\t<languageMatch desired=\"" + desired + "\"" + 694 " supported=\"" + supported + "\"" + 695 " percent=\"" + percent + "\"" 696 + (oneway ? " oneway=\"true\"" : "") 697 + "/>" 698 + (comment == null ? "" : "\t<!-- " + comment + " -->")); 699 // // .addDistance("nn", "nb", 4, true) 700 // System.out.println(".addDistance(\"" + desired + "\"" + 701 // ", \"" + supported + "\"" + 702 // ", " + percent + "" 703 // + (oneway ? "" : ", true") 704 // + (comment == null ? "" : ", \"" + comment + "\"") 705 // + ")" 706 // ); 707 708 } 709 double score = 1-percent/100.0; // convert from percentage 710 LocalePatternMatcher desiredMatcher = new LocalePatternMatcher(desired); 711 Level desiredLen = desiredMatcher.getLevel(); 712 LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported); 713 Level supportedLen = supportedMatcher.getLevel(); 714 if (desiredLen != supportedLen) { 715 throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported); 716 } 717 R3<LocalePatternMatcher,LocalePatternMatcher,Double> data = Row.of(desiredMatcher, supportedMatcher, score); 718 R3<LocalePatternMatcher,LocalePatternMatcher,Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score); 719 boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher); 720 switch (desiredLen) { 721 case language: 722 String dlanguage = desiredMatcher.getLanguage(); 723 String slanguage = supportedMatcher.getLanguage(); 724 languageScores.addDataToScores(dlanguage, slanguage, data); 725 if (!oneway && !desiredEqualsSupported) { 726 languageScores.addDataToScores(slanguage, dlanguage, data2); 727 } 728 break; 729 case script: 730 String dscript = desiredMatcher.getScript(); 731 String sscript = supportedMatcher.getScript(); 732 scriptScores.addDataToScores(dscript, sscript, data); 733 if (!oneway && !desiredEqualsSupported) { 734 scriptScores.addDataToScores(sscript, dscript, data2); 735 } 736 break; 737 case region: 738 String dregion = desiredMatcher.getRegion(); 739 String sregion = supportedMatcher.getRegion(); 740 regionScores.addDataToScores(dregion, sregion, data); 741 if (!oneway && !desiredEqualsSupported) { 742 regionScores.addDataToScores(sregion, dregion, data2); 743 } 744 break; 745 } 746 return this; 747 } 748 749 /** 750 * {@inheritDoc} 751 * @internal 752 * @deprecated This API is ICU internal only. 753 */ 754 @Deprecated cloneAsThawed()755 public LanguageMatcherData cloneAsThawed() { 756 LanguageMatcherData result; 757 try { 758 result = (LanguageMatcherData) clone(); 759 result.languageScores = languageScores.cloneAsThawed(); 760 result.scriptScores = scriptScores.cloneAsThawed(); 761 result.regionScores = regionScores.cloneAsThawed(); 762 result.frozen = false; 763 return result; 764 } catch (CloneNotSupportedException e) { 765 throw new ICUCloneNotSupportedException(e); // will never happen 766 } 767 } 768 769 /** 770 * {@inheritDoc} 771 * @internal 772 * @deprecated This API is ICU internal only. 773 */ 774 @Deprecated freeze()775 public LanguageMatcherData freeze() { 776 languageScores.freeze(); 777 regionScores.freeze(); 778 scriptScores.freeze(); 779 matchingLanguages = languageScores.getMatchingLanguages(); 780 frozen = true; 781 return this; 782 } 783 784 /** 785 * {@inheritDoc} 786 * @internal 787 * @deprecated This API is ICU internal only. 788 */ 789 @Deprecated isFrozen()790 public boolean isFrozen() { 791 return frozen; 792 } 793 } 794 795 LanguageMatcherData matcherData; 796 797 private static final LanguageMatcherData defaultWritten; 798 799 private static HashMap<String,String> canonicalMap = new HashMap<String, String>(); 800 801 802 static { 803 canonicalMap.put("iw", "he"); 804 canonicalMap.put("mo", "ro"); 805 canonicalMap.put("tl", "fil"); 806 807 ICUResourceBundle suppData = getICUSupplementalData(); 808 ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching"); 809 ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written"); 810 defaultWritten = new LanguageMatcherData(); 811 812 for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) { 813 ICUResourceBundle item = (ICUResourceBundle) iter.next(); 814 /* 815 "*_*_*", 816 "*_*_*", 817 "96", 818 */ 819 // <languageMatch desired="gsw" supported="de" percent="96" oneway="true" /> 820 boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3)); 821 defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway); 822 } defaultWritten.freeze()823 defaultWritten.freeze(); 824 } 825 826 /** 827 * @internal 828 * @deprecated This API is ICU internal only. 829 */ 830 @Deprecated getICUSupplementalData()831 public static ICUResourceBundle getICUSupplementalData() { 832 ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance( 833 ICUData.ICU_BASE_NAME, 834 "supplementalData", 835 ICUResourceBundle.ICU_DATA_CLASS_LOADER); 836 return suppData; 837 } 838 839 /** 840 * @internal 841 * @deprecated This API is ICU internal only. 842 */ 843 @Deprecated match(ULocale a, ULocale b)844 public static double match(ULocale a, ULocale b) { 845 final LocaleMatcher matcher = new LocaleMatcher(""); 846 return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b)); 847 } 848 } 849