1 /* 2 **************************************************************************************** 3 * Copyright (C) 2009-2015, Google, Inc.; International Business Machines Corporation * 4 * and others. All Rights Reserved. * 5 **************************************************************************************** 6 */ 7 package com.ibm.icu.util; 8 9 import java.util.HashMap; 10 import java.util.HashSet; 11 import java.util.Iterator; 12 import java.util.LinkedHashMap; 13 import java.util.LinkedHashSet; 14 import java.util.Map; 15 import java.util.Map.Entry; 16 import java.util.Objects; 17 import java.util.Set; 18 import java.util.regex.Matcher; 19 import java.util.regex.Pattern; 20 21 import com.ibm.icu.impl.ICUData; 22 import com.ibm.icu.impl.ICUResourceBundle; 23 import com.ibm.icu.impl.Relation; 24 import com.ibm.icu.impl.Row; 25 import com.ibm.icu.impl.Row.R3; 26 27 /** 28 * Provides a way to match the languages (locales) supported by a product to the 29 * languages (locales) acceptable to a user, and get the best match. For 30 * example: 31 * 32 * <pre> 33 * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en"); 34 * 35 * // afterwards: 36 * matcher.getBestMatch("en-US").toLanguageTag() => "en" 37 * </pre> 38 * 39 * It takes into account when languages are close to one another, such as fil 40 * and tl, and when language regional variants are close, like en-GB and en-AU. 41 * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test 42 * file. 43 * <p>All classes implementing this interface should be immutable. Often a 44 * product will just need one static instance, built with the languages 45 * that it supports. However, it may want multiple instances with different 46 * default languages based on additional information, such as the domain. 47 * 48 * @author markdavis@google.com 49 * @stable ICU 4.4 50 */ 51 public class LocaleMatcher { 52 53 public static final boolean DEBUG = false; 54 55 private static final ULocale UNKNOWN_LOCALE = new ULocale("und"); 56 57 /** 58 * Threshold for falling back to the default (first) language. May make this 59 * a parameter in the future. 60 */ 61 private static final double DEFAULT_THRESHOLD = 0.5; 62 63 /** 64 * The default language, in case the threshold is not met. 65 */ 66 private final ULocale defaultLanguage; 67 68 /** 69 * The default language, in case the threshold is not met. 70 */ 71 private final double threshold; 72 73 /** 74 * Create a new language matcher. The highest-weighted language is the 75 * default. That means that if no other language is matches closer than a given 76 * threshold, that default language is chosen. Typically the default is English, 77 * but it could be different based on additional information, such as the domain 78 * of the page. 79 * 80 * @param languagePriorityList weighted list 81 * @stable ICU 4.4 82 */ LocaleMatcher(LocalePriorityList languagePriorityList)83 public LocaleMatcher(LocalePriorityList languagePriorityList) { 84 this(languagePriorityList, defaultWritten); 85 } 86 87 /** 88 * Create a new language matcher from a String form. The highest-weighted 89 * language is the default. 90 * 91 * @param languagePriorityListString String form of LanguagePriorityList 92 * @stable ICU 4.4 93 */ LocaleMatcher(String languagePriorityListString)94 public LocaleMatcher(String languagePriorityListString) { 95 this(LocalePriorityList.add(languagePriorityListString).build()); 96 } 97 98 /** 99 * Internal testing function; may expose API later. 100 * @param languagePriorityList LocalePriorityList to match 101 * @param matcherData Internal matching data 102 * @internal 103 * @deprecated This API is ICU internal only. 104 */ 105 @Deprecated LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData)106 public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) { 107 this(languagePriorityList, matcherData, DEFAULT_THRESHOLD); 108 } 109 110 /** 111 * Internal testing function; may expose API later. 112 * @param languagePriorityList LocalePriorityList to match 113 * @param matcherData Internal matching data 114 * @internal 115 * @deprecated This API is ICU internal only. 116 */ 117 @Deprecated LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold)118 public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) { 119 this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze(); 120 for (final ULocale language : languagePriorityList) { 121 add(language, languagePriorityList.getWeight(language)); 122 } 123 processMapping(); 124 Iterator<ULocale> it = languagePriorityList.iterator(); 125 defaultLanguage = it.hasNext() ? it.next() : null; 126 this.threshold = threshold; 127 } 128 129 /** 130 * Returns a fraction between 0 and 1, where 1 means that the languages are a 131 * perfect match, and 0 means that they are completely different. Note that 132 * the precise values may change over time; no code should be made dependent 133 * on the values remaining constant. 134 * @param desired Desired locale 135 * @param desiredMax Maximized locale (using likely subtags) 136 * @param supported Supported locale 137 * @param supportedMax Maximized locale (using likely subtags) 138 * @return value between 0 and 1, inclusive. 139 * @stable ICU 4.4 140 */ match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax)141 public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) { 142 return matcherData.match(desired, desiredMax, supported, supportedMax); 143 } 144 145 /** 146 * Canonicalize a locale (language). Note that for now, it is canonicalizing 147 * according to CLDR conventions (he vs iw, etc), since that is what is needed 148 * for likelySubtags. 149 * @param ulocale language/locale code 150 * @return ULocale with remapped subtags. 151 * @stable ICU 4.4 152 */ canonicalize(ULocale ulocale)153 public ULocale canonicalize(ULocale ulocale) { 154 // TODO Get the data from CLDR, use Java conventions. 155 String lang = ulocale.getLanguage(); 156 String lang2 = canonicalMap.get(lang); 157 String script = ulocale.getScript(); 158 String script2 = canonicalMap.get(script); 159 String region = ulocale.getCountry(); 160 String region2 = canonicalMap.get(region); 161 if (lang2 != null || script2 != null || region2 != null) { 162 return new ULocale( 163 lang2 == null ? lang : lang2, 164 script2 == null ? script : script2, 165 region2 == null ? region : region2); 166 } 167 return ulocale; 168 } 169 170 /** 171 * Get the best match for a LanguagePriorityList 172 * 173 * @param languageList list to match 174 * @return best matching language code 175 * @stable ICU 4.4 176 */ getBestMatch(LocalePriorityList languageList)177 public ULocale getBestMatch(LocalePriorityList languageList) { 178 double bestWeight = 0; 179 ULocale bestTableMatch = null; 180 double penalty = 0; 181 OutputDouble matchWeight = new OutputDouble(); 182 for (final ULocale language : languageList) { 183 final ULocale matchLocale = getBestMatchInternal(language, matchWeight); 184 final double weight = matchWeight.value * languageList.getWeight(language) - penalty; 185 if (weight > bestWeight) { 186 bestWeight = weight; 187 bestTableMatch = matchLocale; 188 } 189 penalty += 0.07000001; 190 } 191 if (bestWeight < threshold) { 192 bestTableMatch = defaultLanguage; 193 } 194 return bestTableMatch; 195 } 196 197 /** 198 * Convenience method: Get the best match for a LanguagePriorityList 199 * 200 * @param languageList String form of language priority list 201 * @return best matching language code 202 * @stable ICU 4.4 203 */ getBestMatch(String languageList)204 public ULocale getBestMatch(String languageList) { 205 return getBestMatch(LocalePriorityList.add(languageList).build()); 206 } 207 208 /** 209 * Get the best match for an individual language code. 210 * 211 * @param ulocale locale/language code to match 212 * @return best matching language code 213 * @stable ICU 4.4 214 */ getBestMatch(ULocale ulocale)215 public ULocale getBestMatch(ULocale ulocale) { 216 return getBestMatchInternal(ulocale, null); 217 } 218 219 /** 220 * @internal 221 * @deprecated This API is ICU internal only. 222 */ 223 @Deprecated getBestMatch(ULocale... ulocales)224 public ULocale getBestMatch(ULocale... ulocales) { 225 return getBestMatch(LocalePriorityList.add(ulocales).build()); 226 } 227 228 /** 229 * {@inheritDoc} 230 * @stable ICU 4.4 231 */ 232 @Override toString()233 public String toString() { 234 return "{" + defaultLanguage + ", " 235 + localeToMaxLocaleAndWeight + "}"; 236 } 237 238 // ================= Privates ===================== 239 240 /** 241 * Get the best match for an individual language code. 242 * 243 * @param languageCode 244 * @return best matching language code and weight (as per 245 * {@link #match(ULocale, ULocale)}) 246 */ getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight)247 private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) { 248 languageCode = canonicalize(languageCode); 249 final ULocale maximized = addLikelySubtags(languageCode); 250 if (DEBUG) { 251 System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized); 252 } 253 double bestWeight = 0; 254 ULocale bestTableMatch = null; 255 String baseLanguage = maximized.getLanguage(); 256 Set<R3<ULocale, ULocale, Double>> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage); 257 if (searchTable != null) { // we preprocessed the table so as to filter by lanugage 258 if (DEBUG) System.out.println("\tSearching: " + searchTable); 259 for (final R3<ULocale, ULocale, Double> tableKeyValue : searchTable) { 260 ULocale tableKey = tableKeyValue.get0(); 261 ULocale maxLocale = tableKeyValue.get1(); 262 Double matchedWeight = tableKeyValue.get2(); 263 final double match = match(languageCode, maximized, tableKey, maxLocale); 264 if (DEBUG) { 265 System.out.println("\t" + tableKeyValue + ";\t" + match + "\n"); 266 } 267 final double weight = match * matchedWeight; 268 if (weight > bestWeight) { 269 bestWeight = weight; 270 bestTableMatch = tableKey; 271 if (weight > 0.999d) { // bail on good enough match. 272 break; 273 } 274 } 275 } 276 } 277 if (bestWeight < threshold) { 278 bestTableMatch = defaultLanguage; 279 } 280 if (outputWeight != null) { 281 outputWeight.value = bestWeight; // only return the weight when needed 282 } 283 return bestTableMatch; 284 } 285 286 public static class OutputDouble { // TODO, move to where OutputInt is 287 double value; 288 } 289 add(ULocale language, Double weight)290 private void add(ULocale language, Double weight) { 291 language = canonicalize(language); 292 R3<ULocale, ULocale, Double> row = Row.of(language, addLikelySubtags(language), weight); 293 row.freeze(); 294 localeToMaxLocaleAndWeight.add(row); 295 } 296 297 /** 298 * We preprocess the data to get just the possible matches for each desired base language. 299 */ processMapping()300 private void processMapping() { 301 for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) { 302 String desired = desiredToMatchingLanguages.getKey(); 303 Set<String> supported = desiredToMatchingLanguages.getValue(); 304 for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) { 305 final ULocale key = localeToMaxAndWeight.get0(); 306 String lang = key.getLanguage(); 307 if (supported.contains(lang)) { 308 addFiltered(desired, localeToMaxAndWeight); 309 } 310 } 311 } 312 // now put in the values directly, since languages always map to themselves 313 for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) { 314 final ULocale key = localeToMaxAndWeight.get0(); 315 String lang = key.getLanguage(); 316 addFiltered(lang, localeToMaxAndWeight); 317 } 318 } 319 addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight)320 private void addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight) { 321 Set<R3<ULocale, ULocale, Double>> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired); 322 if (map == null) { 323 desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<>()); 324 } 325 map.add(localeToMaxAndWeight); 326 if (DEBUG) { 327 System.out.println(desired + ", " + localeToMaxAndWeight); 328 } 329 } 330 331 Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<>(); 332 Map<String, Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData = new LinkedHashMap<>(); 333 334 // =============== Special Mapping Information ============== 335 336 /** 337 * We need to add another method to addLikelySubtags that doesn't return 338 * null, but instead substitutes Zzzz and ZZ if unknown. There are also 339 * a few cases where addLikelySubtags needs to have expanded data, to handle 340 * all deprecated codes. 341 * @param languageCode 342 * @return "fixed" addLikelySubtags 343 */ addLikelySubtags(ULocale languageCode)344 private ULocale addLikelySubtags(ULocale languageCode) { 345 // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined 346 // language would normally match English. But that would produce the counterintuitive results 347 // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and 348 // getBestMatch("en", LocaleMatcher("it,und")) would be "und". 349 // 350 // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults) 351 // so that max("und")="und". That produces the following, more desirable results: 352 if (languageCode.equals(UNKNOWN_LOCALE)) { 353 return UNKNOWN_LOCALE; 354 } 355 final ULocale result = ULocale.addLikelySubtags(languageCode); 356 // should have method on getLikelySubtags for this 357 if (result == null || result.equals(languageCode)) { 358 final String language = languageCode.getLanguage(); 359 final String script = languageCode.getScript(); 360 final String region = languageCode.getCountry(); 361 return new ULocale((language.length() == 0 ? "und" 362 : language) 363 + "_" 364 + (script.length() == 0 ? "Zzzz" : script) 365 + "_" 366 + (region.length() == 0 ? "ZZ" : region)); 367 } 368 return result; 369 } 370 371 private static class LocalePatternMatcher { 372 // a value of null means a wildcard; matches any. 373 private String lang; 374 private String script; 375 private String region; 376 private Level level; 377 static Pattern pattern = Pattern.compile( 378 "([a-z]{1,8}|\\*)" 379 + "(?:[_-]([A-Z][a-z]{3}|\\*))?" 380 + "(?:[_-]([$]!?[a-zA-Z]+|[A-Z]{2}|[0-9]{3}|\\*))?"); 381 LocalePatternMatcher(String toMatch)382 public LocalePatternMatcher(String toMatch) { 383 Matcher matcher = pattern.matcher(toMatch); 384 if (!matcher.matches()) { 385 throw new IllegalArgumentException("Bad pattern: " + toMatch); 386 } 387 lang = matcher.group(1); 388 script = matcher.group(2); 389 region = matcher.group(3); 390 level = region != null ? Level.region : script != null ? Level.script : Level.language; 391 392 if (lang.equals("*")) { 393 lang = null; 394 } 395 if (script != null && script.equals("*")) { 396 script = null; 397 } 398 if (region != null && region.equals("*")) { 399 region = null; 400 } 401 } 402 matches(ULocale ulocale)403 boolean matches(ULocale ulocale) { 404 if (lang != null && !lang.equals(ulocale.getLanguage())) { 405 return false; 406 } 407 if (script != null && !script.equals(ulocale.getScript())) { 408 return false; 409 } 410 if (region != null && !region.equals(ulocale.getCountry())) { 411 return false; 412 } 413 return true; 414 } 415 getLevel()416 public Level getLevel() { 417 return level; 418 } 419 getLanguage()420 public String getLanguage() { 421 return (lang == null ? "*" : lang); 422 } 423 getScript()424 public String getScript() { 425 return (script == null ? "*" : script); 426 } 427 getRegion()428 public String getRegion() { 429 return (region == null ? "*" : region); 430 } 431 432 @Override toString()433 public String toString() { 434 String result = getLanguage(); 435 if (level != Level.language) { 436 result += "-" + getScript(); 437 if (level != Level.script) { 438 result += "-" + getRegion(); 439 } 440 } 441 return result; 442 } 443 444 /* (non-Javadoc) 445 * @see java.lang.Object#equals(java.lang.Object) 446 */ 447 @Override equals(Object obj)448 public boolean equals(Object obj) { 449 LocalePatternMatcher other = (LocalePatternMatcher) obj; 450 return Objects.equals(level, other.level) 451 && Objects.equals(lang, other.lang) 452 && Objects.equals(script, other.script) 453 && Objects.equals(region, other.region); 454 } 455 456 /* (non-Javadoc) 457 * @see java.lang.Object#hashCode() 458 */ 459 @Override hashCode()460 public int hashCode() { 461 return level.ordinal() 462 ^ (lang == null ? 0 : lang.hashCode()) 463 ^ (script == null ? 0 : script.hashCode()) 464 ^ (region == null ? 0 : region.hashCode()); 465 } 466 } 467 468 enum Level { 469 language(0.99), 470 script(0.2), 471 region(0.04); 472 473 final double worst; 474 Level(double d)475 Level(double d) { 476 worst = d; 477 } 478 } 479 480 private static class ScoreData implements Freezable<ScoreData> { 481 @SuppressWarnings("unused") 482 private static final double maxUnequal_changeD_sameS = 0.5; 483 484 @SuppressWarnings("unused") 485 private static final double maxUnequal_changeEqual = 0.75; 486 487 LinkedHashSet<Row.R3<LocalePatternMatcher, LocalePatternMatcher, Double>> scores = new LinkedHashSet<>(); 488 final Level level; 489 ScoreData(Level level)490 public ScoreData(Level level) { 491 this.level = level; 492 } 493 addDataToScores(String desired, String supported, R3<LocalePatternMatcher, LocalePatternMatcher, Double> data)494 void addDataToScores(String desired, String supported, R3<LocalePatternMatcher, LocalePatternMatcher, Double> data) { 495 // Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desired); 496 // if (lang_result == null) { 497 // scores.put(desired, lang_result = new HashMap()); 498 // } 499 // Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>> result = lang_result.get(supported); 500 // if (result == null) { 501 // lang_result.put(supported, result = new LinkedHashSet()); 502 // } 503 // result.add(data); 504 boolean added = scores.add(data); 505 if (!added) { 506 throw new ICUException("trying to add duplicate data: " + data); 507 } 508 } 509 getScore(ULocale dMax, String desiredRaw, String desiredMax, ULocale sMax, String supportedRaw, String supportedMax)510 double getScore(ULocale dMax, String desiredRaw, String desiredMax, 511 ULocale sMax, String supportedRaw, String supportedMax) { 512 double distance = 0; 513 if (!desiredMax.equals(supportedMax)) { 514 distance = getRawScore(dMax, sMax); 515 } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal 516 distance += 0.001; 517 } 518 return distance; 519 } 520 getRawScore(ULocale desiredLocale, ULocale supportedLocale)521 private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) { 522 if (DEBUG) { 523 System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale); 524 } 525 for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> datum : scores) { // : result 526 if (datum.get0().matches(desiredLocale) 527 && datum.get1().matches(supportedLocale)) { 528 if (DEBUG) { 529 System.out.println("\t\t\t\tFOUND\t" + datum); 530 } 531 return datum.get2(); 532 } 533 } 534 if (DEBUG) { 535 System.out.println("\t\t\t\tNOTFOUND\t" + level.worst); 536 } 537 return level.worst; 538 } 539 540 @Override toString()541 public String toString() { 542 StringBuilder result = new StringBuilder().append(level); 543 for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) { 544 result.append("\n\t\t").append(score); 545 } 546 return result.toString(); 547 } 548 549 @Override 550 @SuppressWarnings("unchecked") cloneAsThawed()551 public ScoreData cloneAsThawed() { 552 try { 553 ScoreData result = (ScoreData) clone(); 554 result.scores = (LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>) result.scores.clone(); 555 result.frozen = false; 556 return result; 557 } catch (CloneNotSupportedException e) { 558 throw new ICUCloneNotSupportedException(e); // will never happen 559 } 560 561 } 562 563 private volatile boolean frozen = false; 564 565 @Override freeze()566 public ScoreData freeze() { 567 return this; 568 } 569 570 @Override isFrozen()571 public boolean isFrozen() { 572 return frozen; 573 } 574 getMatchingLanguages()575 public Relation<String, String> getMatchingLanguages() { 576 Relation<String, String> desiredToSupported = Relation.of(new LinkedHashMap<String, Set<String>>(), HashSet.class); 577 for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> item : scores) { 578 LocalePatternMatcher desired = item.get0(); 579 LocalePatternMatcher supported = item.get1(); 580 if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance 581 desiredToSupported.put(desired.lang, supported.lang); 582 } 583 } 584 desiredToSupported.freeze(); 585 return desiredToSupported; 586 } 587 } 588 589 /** 590 * Only for testing and use by tools. Interface may change!! 591 * @internal 592 * @deprecated This API is ICU internal only. 593 */ 594 @Deprecated 595 public static class LanguageMatcherData implements Freezable<LanguageMatcherData> { 596 private ScoreData languageScores = new ScoreData(Level.language); 597 private ScoreData scriptScores = new ScoreData(Level.script); 598 private ScoreData regionScores = new ScoreData(Level.region); 599 private Relation<String, String> matchingLanguages; 600 private volatile boolean frozen = false; 601 602 /** 603 * @internal 604 * @deprecated This API is ICU internal only. 605 */ 606 @Deprecated LanguageMatcherData()607 public LanguageMatcherData() { 608 } 609 610 /** 611 * @internal 612 * @deprecated This API is ICU internal only. 613 */ 614 @Deprecated matchingLanguages()615 public Relation<String, String> matchingLanguages() { 616 return matchingLanguages; 617 } 618 619 /** 620 * @internal 621 * @deprecated This API is ICU internal only. 622 */ 623 @Override 624 @Deprecated toString()625 public String toString() { 626 return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores; 627 } 628 629 /** 630 * @internal 631 * @deprecated This API is ICU internal only. 632 */ 633 @Deprecated match(ULocale a, ULocale aMax, ULocale b, ULocale bMax)634 public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) { 635 double diff = 0; 636 diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage()); 637 if (diff > 0.999d) { // with no language match, we bail 638 return 0.0d; 639 } 640 diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript()); 641 diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry()); 642 643 if (!a.getVariant().equals(b.getVariant())) { 644 diff += 0.01; 645 } 646 if (diff < 0.0d) { 647 diff = 0.0d; 648 } else if (diff > 1.0d) { 649 diff = 1.0d; 650 } 651 if (DEBUG) { 652 System.out.println("\t\t\tTotal Distance\t" + diff); 653 } 654 return 1.0 - diff; 655 } 656 657 /** 658 * Add an exceptional distance between languages, typically because regional 659 * dialects were given their own language codes. At this point the code is 660 * symmetric. We don't bother producing an equivalence class because there are 661 * so few cases; this function depends on the other permutations being 662 * added specifically. 663 * @internal 664 * @deprecated This API is ICU internal only. 665 */ 666 @SuppressWarnings("unused") 667 @Deprecated addDistance(String desired, String supported, int percent)668 private LanguageMatcherData addDistance(String desired, String supported, int percent) { 669 return addDistance(desired, supported, percent, false, null); 670 } 671 672 /** 673 * @internal 674 * @deprecated This API is ICU internal only. 675 */ 676 @Deprecated addDistance(String desired, String supported, int percent, String comment)677 public LanguageMatcherData addDistance(String desired, String supported, int percent, String comment) { 678 return addDistance(desired, supported, percent, false, comment); 679 } 680 681 /** 682 * @internal 683 * @deprecated This API is ICU internal only. 684 */ 685 @Deprecated addDistance(String desired, String supported, int percent, boolean oneway)686 public LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway) { 687 return addDistance(desired, supported, percent, oneway, null); 688 } 689 addDistance(String desired, String supported, int percent, boolean oneway, String comment)690 private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) { 691 if (DEBUG) { 692 System.out.println("\t<languageMatch desired=\"" + desired + "\"" + 693 " supported=\"" + supported + "\"" + 694 " percent=\"" + percent + "\"" 695 + (oneway ? " oneway=\"true\"" : "") 696 + "/>" 697 + (comment == null ? "" : "\t<!-- " + comment + " -->")); 698 // // .addDistance("nn", "nb", 4, true) 699 // System.out.println(".addDistance(\"" + desired + "\"" + 700 // ", \"" + supported + "\"" + 701 // ", " + percent + "" 702 // + (oneway ? "" : ", true") 703 // + (comment == null ? "" : ", \"" + comment + "\"") 704 // + ")" 705 // ); 706 707 } 708 double score = 1 - percent / 100.0; // convert from percentage 709 LocalePatternMatcher desiredMatcher = new LocalePatternMatcher(desired); 710 Level desiredLen = desiredMatcher.getLevel(); 711 LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported); 712 Level supportedLen = supportedMatcher.getLevel(); 713 if (desiredLen != supportedLen) { 714 throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported); 715 } 716 R3<LocalePatternMatcher, LocalePatternMatcher, Double> data = Row.of(desiredMatcher, supportedMatcher, score); 717 R3<LocalePatternMatcher, LocalePatternMatcher, Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score); 718 boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher); 719 switch (desiredLen) { 720 case language: 721 String dlanguage = desiredMatcher.getLanguage(); 722 String slanguage = supportedMatcher.getLanguage(); 723 languageScores.addDataToScores(dlanguage, slanguage, data); 724 if (!oneway && !desiredEqualsSupported) { 725 languageScores.addDataToScores(slanguage, dlanguage, data2); 726 } 727 break; 728 case script: 729 String dscript = desiredMatcher.getScript(); 730 String sscript = supportedMatcher.getScript(); 731 scriptScores.addDataToScores(dscript, sscript, data); 732 if (!oneway && !desiredEqualsSupported) { 733 scriptScores.addDataToScores(sscript, dscript, data2); 734 } 735 break; 736 case region: 737 String dregion = desiredMatcher.getRegion(); 738 String sregion = supportedMatcher.getRegion(); 739 regionScores.addDataToScores(dregion, sregion, data); 740 if (!oneway && !desiredEqualsSupported) { 741 regionScores.addDataToScores(sregion, dregion, data2); 742 } 743 break; 744 } 745 return this; 746 } 747 748 /** 749 * {@inheritDoc} 750 * @internal 751 * @deprecated This API is ICU internal only. 752 */ 753 @Override 754 @Deprecated cloneAsThawed()755 public LanguageMatcherData cloneAsThawed() { 756 LanguageMatcherData result; 757 try { 758 result = (LanguageMatcherData) clone(); 759 result.languageScores = languageScores.cloneAsThawed(); 760 result.scriptScores = scriptScores.cloneAsThawed(); 761 result.regionScores = regionScores.cloneAsThawed(); 762 result.frozen = false; 763 return result; 764 } catch (CloneNotSupportedException e) { 765 throw new ICUCloneNotSupportedException(e); // will never happen 766 } 767 } 768 769 /** 770 * {@inheritDoc} 771 * @internal 772 * @deprecated This API is ICU internal only. 773 */ 774 @Override 775 @Deprecated freeze()776 public LanguageMatcherData freeze() { 777 languageScores.freeze(); 778 regionScores.freeze(); 779 scriptScores.freeze(); 780 matchingLanguages = languageScores.getMatchingLanguages(); 781 frozen = true; 782 return this; 783 } 784 785 /** 786 * {@inheritDoc} 787 * @internal 788 * @deprecated This API is ICU internal only. 789 */ 790 @Override 791 @Deprecated isFrozen()792 public boolean isFrozen() { 793 return frozen; 794 } 795 } 796 797 LanguageMatcherData matcherData; 798 799 private static final LanguageMatcherData defaultWritten; 800 801 private static HashMap<String, String> canonicalMap = new HashMap<>(); 802 803 static { 804 canonicalMap.put("iw", "he"); 805 canonicalMap.put("mo", "ro"); 806 canonicalMap.put("tl", "fil"); 807 808 ICUResourceBundle suppData = getICUSupplementalData(); 809 ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching"); 810 ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written"); 811 defaultWritten = new LanguageMatcherData(); 812 813 for (UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) { 814 ICUResourceBundle item = (ICUResourceBundle) iter.next(); 815 /* 816 "*_*_*", 817 "*_*_*", 818 "96", 819 */ 820 // <languageMatch desired="gsw" supported="de" percent="96" oneway="true" /> 821 boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3)); 822 defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway); 823 } defaultWritten.freeze()824 defaultWritten.freeze(); 825 } 826 827 /** 828 * @internal 829 * @deprecated This API is ICU internal only. 830 */ 831 @Deprecated getICUSupplementalData()832 public static ICUResourceBundle getICUSupplementalData() { 833 ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance( 834 ICUData.ICU_BASE_NAME, 835 "supplementalData", 836 ICUResourceBundle.ICU_DATA_CLASS_LOADER); 837 return suppData; 838 } 839 840 /** 841 * @internal 842 * @deprecated This API is ICU internal only. 843 */ 844 @Deprecated match(ULocale a, ULocale b)845 public static double match(ULocale a, ULocale b) { 846 final LocaleMatcher matcher = new LocaleMatcher(""); 847 return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b)); 848 } 849 } 850