1 package org.unicode.cldr.unittest; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.nio.charset.Charset; 6 import java.util.Arrays; 7 import java.util.Collection; 8 import java.util.EnumSet; 9 import java.util.HashSet; 10 import java.util.Iterator; 11 import java.util.LinkedHashSet; 12 import java.util.Locale; 13 import java.util.Map; 14 import java.util.Map.Entry; 15 import java.util.Set; 16 17 import javax.xml.xpath.XPathException; 18 19 import org.unicode.cldr.test.ExampleGenerator; 20 import org.unicode.cldr.test.ExampleGenerator.ExampleType; 21 import org.unicode.cldr.util.AttributeValueValidity; 22 import org.unicode.cldr.util.AttributeValueValidity.MatcherPattern; 23 import org.unicode.cldr.util.CLDRConfig; 24 import org.unicode.cldr.util.CLDRFile; 25 import org.unicode.cldr.util.CLDRPaths; 26 import org.unicode.cldr.util.Factory; 27 import org.unicode.cldr.util.Iso639Data; 28 import org.unicode.cldr.util.Iso639Data.Scope; 29 import org.unicode.cldr.util.Iso639Data.Type; 30 import org.unicode.cldr.util.LanguageTagCanonicalizer; 31 import org.unicode.cldr.util.LanguageTagParser; 32 import org.unicode.cldr.util.LanguageTagParser.Format; 33 import org.unicode.cldr.util.SimpleFactory; 34 import org.unicode.cldr.util.SimpleXMLSource; 35 import org.unicode.cldr.util.StandardCodes.CodeType; 36 import org.unicode.cldr.util.StandardCodes.LstrType; 37 import org.unicode.cldr.util.SupplementalDataInfo; 38 import org.unicode.cldr.util.TransliteratorUtilities; 39 import org.unicode.cldr.util.XPathExpressionParser; 40 import org.w3c.dom.Element; 41 import org.w3c.dom.Node; 42 43 import com.google.common.base.CharMatcher; 44 import com.google.common.base.Splitter; 45 import com.google.common.collect.ImmutableSet; 46 import com.google.common.io.Files; 47 import com.ibm.icu.dev.util.CollectionUtilities; 48 import com.ibm.icu.impl.Relation; 49 import com.ibm.icu.impl.Row; 50 import com.ibm.icu.impl.Row.R2; 51 import com.ibm.icu.text.UnicodeSet; 52 import com.ibm.icu.util.ICUUncheckedIOException; 53 54 public class TestLocale extends TestFmwkPlus { 55 private static final Charset UTF_8 = Charset.forName("UTF-8"); 56 static CLDRConfig testInfo = CLDRConfig.getInstance(); 57 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = testInfo.getSupplementalDataInfo(); 58 public static Splitter AT_AND_SEMI = Splitter.on(CharMatcher.anyOf(";@")); 59 main(String[] args)60 public static void main(String[] args) { 61 new TestLocale().run(args); 62 } 63 64 static Set<Type> ALLOWED_LANGUAGE_TYPES = EnumSet.of(Type.Ancient, 65 Type.Living, Type.Constructed, Type.Historical, Type.Extinct, Type.Special); 66 static Set<Scope> ALLOWED_LANGUAGE_SCOPES = EnumSet.of(Scope.Individual, 67 Scope.Macrolanguage, Scope.Special); // , Special, Collection, PrivateUse, Unknown 68 static Set<String> ALLOWED_SCRIPTS = testInfo.getStandardCodes() 69 .getGoodAvailableCodes(CodeType.script); 70 static Set<String> ALLOWED_REGIONS = testInfo.getStandardCodes() 71 .getGoodAvailableCodes(CodeType.territory); 72 73 /** 74 * XPath expression that will find all alias tags 75 */ 76 static String XPATH_ALIAS_STRING = "//alias"; 77 TestLanguageRegions()78 public void TestLanguageRegions() { 79 Set<String> missingLanguageRegion = new LinkedHashSet<String>(); 80 // TODO This should be derived from metadata: https://unicode.org/cldr/trac/ticket/11224 81 Set<String> knownMultiScriptLanguages = new HashSet<String>(Arrays.asList("az", "ff", "bs", "pa", "shi", "sr", "vai", "uz", "yue", "zh")); 82 Set<String> available = testInfo.getCldrFactory().getAvailable(); 83 LanguageTagParser ltp = new LanguageTagParser(); 84 Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO 85 .getDefaultContentLocales(); 86 for (String locale : available) { 87 String base = ltp.set(locale).getLanguage(); 88 String script = ltp.getScript(); 89 String region = ltp.getRegion(); 90 if (script.isEmpty()) { 91 continue; 92 } 93 ltp.setRegion(""); 94 String baseScript = ltp.toString(); 95 if (!knownMultiScriptLanguages.contains(base)) { 96 assertFalse("Should NOT have " + locale, 97 defaultContents.contains(baseScript)); 98 } 99 if (region.isEmpty()) { 100 continue; 101 } 102 ltp.setScript(""); 103 ltp.setRegion(region); 104 String baseRegion = ltp.toString(); 105 if (knownMultiScriptLanguages.contains(base)) { 106 continue; 107 } 108 if (!missingLanguageRegion.contains(baseRegion) 109 && !assertTrue("Should have " + baseRegion, 110 available.contains(baseRegion))) { 111 missingLanguageRegion.add(baseRegion); 112 } 113 } 114 } 115 116 /** 117 * Determine whether the file should be checked for aliases; this is 118 * currently not done for Keyboard definitions or DTD's 119 * 120 * @param f 121 * the file to check 122 * @return 123 */ shouldCheckForAliases(File f)124 protected boolean shouldCheckForAliases(File f) { 125 if (!f.canRead()) { 126 return false; 127 } 128 String absPath = f.getAbsolutePath(); 129 return absPath.endsWith("xml") && !absPath.contains("dtd") 130 && !absPath.contains("keyboard") 131 && !absPath.contains("Keyboard"); 132 } 133 134 /** 135 * Check a single file for aliases, on a content level, the only check that 136 * is done is that the one for readability. 137 * 138 * @param localeName 139 * - the localename 140 * @param file 141 * - the file to check 142 * @param localesWithAliases 143 * - a set of locale strings the files of which contain aliases 144 */ checkForAliases(final String localeName, File file, final Set<String> localesWithAliases)145 private void checkForAliases(final String localeName, File file, 146 final Set<String> localesWithAliases) { 147 try { 148 if (file.canRead()) { 149 XPathExpressionParser parser = new XPathExpressionParser(file); 150 parser.iterateThroughNodeSet(XPATH_ALIAS_STRING, 151 new XPathExpressionParser.NodeHandlingInterface() { 152 153 // Handle gets called for every node of the node set 154 @Override 155 public void handle(Node result) { 156 if (result instanceof Element) { 157 Element el = (Element) result; 158 // this node likely has an attribute source 159 if (el.hasAttributes()) { 160 String sourceAttr = el 161 .getAttribute("source"); 162 if (sourceAttr != null 163 && !sourceAttr.isEmpty()) { 164 localesWithAliases.add(localeName); 165 } 166 } 167 } 168 } 169 }); 170 } 171 } catch (IOException e) { 172 // TODO Auto-generated catch block 173 e.printStackTrace(); 174 } catch (XPathException e) { 175 // TODO Auto-generated catch block 176 e.printStackTrace(); 177 } 178 } 179 180 /** 181 * Tests the validity of the file names and of the English localeDisplayName 182 * types. Also tests for aliases outside root 183 */ TestLocalePartsValidity()184 public void TestLocalePartsValidity() { 185 LanguageTagParser ltp = new LanguageTagParser(); 186 final Set<String> localesWithAliases = new HashSet<>(); 187 for (File file : CLDRConfig.getInstance().getAllCLDRFilesEndingWith( 188 ".xml")) { 189 String parent = file.getParent(); 190 if (parent.contains("transform") 191 || parent.contains("bcp47") 192 || parent.contains("supplemental") 193 || parent.contains("validity")) { 194 continue; 195 } 196 String localeName = file.getName(); 197 localeName = localeName.substring(0, localeName.length() - 4); // remove 198 // .xml 199 if (localeName.equals("root") || localeName.equals("_platform")) { 200 continue; 201 } 202 String fileString = file.toString(); 203 checkLocale(fileString, localeName, ltp); 204 // check for aliases 205 if (shouldCheckForAliases(file)) { 206 checkForAliases(localeName, file, localesWithAliases); 207 } 208 } 209 // we ran through all of them 210 if (!localesWithAliases.isEmpty()) { 211 StringBuilder sb = new StringBuilder(); 212 sb.append("\r\n"); 213 sb.append("The following locales have aliases, but must not: "); 214 Iterator<String> lIter = localesWithAliases.iterator(); 215 while (lIter.hasNext()) { 216 sb.append(lIter.next()); 217 sb.append(" "); 218 } 219 System.out.println(sb.toString()); 220 } 221 // now check English-resolved 222 CLDRFile english = testInfo.getEnglish(); 223 for (String xpath : english) { 224 if (!xpath.startsWith("//ldml/localeDisplayNames/")) { 225 continue; 226 } 227 switch (CLDRFile.getNameType(xpath)) { 228 case 0: 229 checkLocale("English xpath", CLDRFile.getCode(xpath), ltp); 230 break; 231 case 1: 232 checkScript("English xpath", CLDRFile.getCode(xpath)); 233 break; 234 case 2: 235 checkRegion("English xpath", CLDRFile.getCode(xpath)); 236 break; 237 } 238 } 239 } 240 checkLocale(String fileString, String localeName, LanguageTagParser ltp)241 public void checkLocale(String fileString, String localeName, 242 LanguageTagParser ltp) { 243 ltp.set(localeName); 244 checkLanguage(fileString, ltp.getLanguage()); 245 checkScript(fileString, ltp.getScript()); 246 checkRegion(fileString, ltp.getRegion()); 247 } 248 checkRegion(String file, String region)249 public void checkRegion(String file, String region) { 250 if (!region.isEmpty() && !region.equals("AN") 251 && !region.equals("XA") && !region.equals("XB")) { 252 assertRelation("Region ok? " + region + " in " + file, true, 253 ALLOWED_REGIONS, TestFmwkPlus.CONTAINS, region); 254 } 255 } 256 257 final MatcherPattern SCRIPT_NON_UNICODE = AttributeValueValidity.getMatcherPattern("$scriptNonUnicode"); 258 checkScript(String file, String script)259 public void checkScript(String file, String script) { 260 if (!script.isEmpty()) { 261 if (!ALLOWED_SCRIPTS.contains(script) && SCRIPT_NON_UNICODE.matches(script, null)) { 262 return; 263 } 264 assertRelation("Script ok? " + script + " in " + file, true, 265 ALLOWED_SCRIPTS, TestFmwkPlus.CONTAINS, script); 266 } 267 } 268 checkLanguage(String file, String language)269 public void checkLanguage(String file, String language) { 270 if (!language.equals("root")) { 271 Scope scope = Iso639Data.getScope(language); 272 if (assertRelation("Language ok? " + language + " in " + file, 273 true, ALLOWED_LANGUAGE_SCOPES, TestFmwkPlus.CONTAINS, scope)) { 274 Type type = Iso639Data.getType(language); 275 assertRelation("Language ok? " + language + " in " + file, 276 true, ALLOWED_LANGUAGE_TYPES, TestFmwkPlus.CONTAINS, 277 type); 278 } 279 } 280 } 281 TestConsistency()282 public void TestConsistency() { 283 LanguageTagParser ltp = new LanguageTagParser(); 284 SupplementalDataInfo supplementalDataInfo = SUPPLEMENTAL_DATA_INFO; 285 Set<String> defaultContentLocales = supplementalDataInfo 286 .getDefaultContentLocales(); 287 Map<String, String> likelySubtags = supplementalDataInfo 288 .getLikelySubtags(); 289 290 for (String locale : testInfo.getCldrFactory().getAvailable()) { 291 if (locale.equals("root")) { 292 continue; 293 } 294 ltp.set(locale); 295 boolean isDefaultContent = defaultContentLocales.contains(locale); 296 boolean hasScript = !ltp.getScript().isEmpty(); 297 boolean hasRegion = !ltp.getRegion().isEmpty(); 298 String language = ltp.getLanguage(); 299 String maximized = likelySubtags.get(language); 300 boolean hasLikelySubtag = maximized != null; 301 302 // verify that the parent locales are consistent with the default 303 // locales, for scripts 304 // that is, if zh-Hant has a parent of root, then it is not the 305 // default content locale, and vice versa 306 307 if (hasScript && !hasRegion) { 308 boolean parentIsRoot = "root".equals(supplementalDataInfo 309 .getExplicitParentLocale(locale)); 310 if (parentIsRoot == isDefaultContent) { 311 errln("Inconsistency between parentLocales and defaultContents: " 312 + locale 313 + (parentIsRoot ? " +" : " -") 314 + "parentIsRoot" 315 + (isDefaultContent ? " +" : " -") 316 + "isDefaultContent"); 317 } 318 319 // we'd better have a separate likelySubtag 320 if (parentIsRoot && !hasLikelySubtag) { 321 errln("Missing likely subtags for: " + locale + " " 322 + TestInheritance.suggestLikelySubtagFor(locale)); 323 } 324 } 325 326 // verify that likelySubtags has all the languages 327 328 if (!hasScript && !hasRegion) { 329 if (!hasLikelySubtag) { 330 errln("Missing likely subtags for: " + locale + " " 331 + TestInheritance.suggestLikelySubtagFor(locale)); 332 } 333 } 334 } 335 } 336 TestCanonicalizer()337 public void TestCanonicalizer() { 338 LanguageTagCanonicalizer canonicalizer = new LanguageTagCanonicalizer(); 339 String[][] tests = { { "iw", "he" }, { "no-YU", "nb_RS" }, 340 { "no", "nb" }, { "eng-833", "en_IM" }, { "mo", "ro" }, 341 { "mo_Cyrl", "ro_Cyrl" }, { "mo_US", "ro_US" }, 342 { "mo_Cyrl_US", "ro_Cyrl_US" }, { "sh", "sr_Latn" }, 343 { "sh_US", "sr_Latn_US" }, { "sh_Cyrl", "sr" }, 344 { "sh_Cyrl_US", "sr_US" }, { "hy_SU", "hy" }, 345 { "hy_AM", "hy" }, { "en_SU", "en_RU" }, 346 { "rO-cYrl-aQ", "ro_Cyrl_AQ" }, }; 347 for (String[] pair : tests) { 348 String actual = canonicalizer.transform(pair[0]); 349 assertEquals("Canonical", pair[1], actual); 350 } 351 } 352 TestBrackets()353 public void TestBrackets() { 354 String[][] tests = { 355 { 356 "language", 357 "en", 358 "Anglish (abc)", 359 "en", 360 "Anglish [abc]", 361 "〖?Anglish [abc]?❬ (U.S. [ghi])❭〗〖?Anglish [abc]?❬ (Latine [def])❭〗〖?Anglish [abc]?❬ (Latine [def], U.S. [ghi])❭〗〖❬Langue: ❭?Anglish (abc)?〗" }, 362 { 363 "script", 364 "Latn", 365 "Latine (def)", 366 "en_Latn", 367 "Anglish [abc] (Latine [def])", 368 "〖❬Anglish [abc] (❭?Latine [def]?❬)❭〗〖❬Anglish [abc] (❭?Latine [def]?❬, U.S. [ghi])❭〗〖❬Scripte: ❭?Latine (def)?〗" }, 369 { 370 "territory", 371 "US", 372 "U.S. (ghi)", 373 "en_Latn_US", 374 "Anglish [abc] (Latine [def], U.S. [ghi])", 375 "〖❬Anglish [abc] (❭?U.S. [ghi]?❬)❭〗〖❬Anglish [abc] (Latine [def], ❭?U.S. [ghi]?❬)❭〗〖❬Territorie: ❭?U.S. (ghi)?〗" }, 376 { null, null, null, "en_US", "Anglish [abc] (U.S. [ghi])", null }, 377 { "variant", "FOOBAR", "foo (jkl)", "en_foobar", "Anglish [abc] (foo [jkl])", null }, 378 { "key", "co", "sort (mno)", "en_foobar@co=FOO", "Anglish [abc] (foo [jkl], sort [mno]=foo)", null }, 379 { "key|type", "co|fii", "sortfii (mno)", "en_foobar@co=FII", "Anglish [abc] (foo [jkl], sortfii [mno])", null }, }; 380 // load up a dummy source 381 SimpleXMLSource dxs = new SimpleXMLSource("xx"); 382 for (String[] row : tests) { 383 if (row[0] == null) { 384 continue; 385 } 386 int typeCode = CLDRFile.typeNameToCode(row[0]); 387 String path = CLDRFile.getKey(typeCode, row[1]); 388 dxs.putValueAtDPath(path, row[2]); 389 } 390 // create a cldrfile from it and test 391 SimpleXMLSource root = new SimpleXMLSource("root"); 392 root.putValueAtDPath( 393 "//ldml/localeDisplayNames/localeDisplayPattern/localePattern", 394 "{0} ({1})"); 395 root.putValueAtDPath( 396 "//ldml/localeDisplayNames/localeDisplayPattern/localeSeparator", 397 "{0}, {1}"); 398 root.putValueAtDPath( 399 "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"language\"]", 400 "Langue: {0}"); 401 root.putValueAtDPath( 402 "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"script\"]", 403 "Scripte: {0}"); 404 root.putValueAtDPath( 405 "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"territory\"]", 406 "Territorie: {0}"); 407 CLDRFile f = new CLDRFile(dxs, root); 408 ExampleGenerator eg = new ExampleGenerator(f, testInfo.getEnglish(), 409 CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 410 for (String[] row : tests) { 411 if (row[0] != null) { 412 int typeCode = CLDRFile.typeNameToCode(row[0]); 413 String standAlone = f.getName(typeCode, row[1]); 414 if (!assertEquals("stand-alone " + row[3], row[2], standAlone)) { 415 typeCode = CLDRFile.typeNameToCode(row[0]); 416 standAlone = f.getName(typeCode, row[1]); 417 } 418 ; 419 if (row[5] != null) { 420 String path = CLDRFile.getKey(typeCode, row[1]); 421 String example = eg.getExampleHtml(path, "?" + row[2] + "?", ExampleType.NATIVE); 422 assertEquals("example " + row[3], row[5], ExampleGenerator.simplify(example)); 423 } 424 } 425 String displayName = f.getName(row[3], true, "{0}={1}", 426 "{0} ({1})", "{0}, {1}"); 427 assertEquals("locale " + row[3], row[4], displayName); 428 } 429 } 430 TestLocaleNamePattern()431 public void TestLocaleNamePattern() { 432 assertEquals("Locale name", "Chinese", 433 testInfo.getEnglish().getName("zh")); 434 assertEquals("Locale name", "Chinese (United States)", testInfo 435 .getEnglish().getName("zh-US")); 436 assertEquals("Locale name", "Chinese (Arabic, United States)", testInfo 437 .getEnglish().getName("zh-Arab-US")); 438 CLDRFile japanese = testInfo.getCLDRFile("ja", true); 439 assertEquals("Locale name", "中国語", japanese.getName("zh")); 440 assertEquals("Locale name", "中国語 (アメリカ合衆国)", japanese.getName("zh-US")); 441 assertEquals("Locale name", "中国語 (アラビア文字\u3001アメリカ合衆国)", 442 japanese.getName("zh-Arab-US")); 443 } 444 TestLocaleDisplay()445 public void TestLocaleDisplay() { 446 System.out.println("\nUse -v to get samples for tests"); 447 String fileName = CLDRPaths.TEST_DATA + "localeIdentifiers/localeDisplayName.txt"; 448 LanguageTagCanonicalizer canonicalizer = new LanguageTagCanonicalizer(LstrType.redundant); 449 450 CLDRFile cldrFile = null; 451 boolean compound = true; 452 StringBuilder formattedExamplesForSpec = new StringBuilder("\nformattedExamplesForSpec\n"); 453 File[] paths = { 454 new File(CLDRPaths.MAIN_DIRECTORY), 455 new File(CLDRPaths.SUBDIVISIONS_DIRECTORY), 456 }; 457 Factory factory = SimpleFactory.make(paths, ".*"); 458 Set<String> seen = new HashSet<>(); 459 460 try { 461 for (String line : Files.readLines(new File(fileName), UTF_8)) { 462 line = line.trim(); 463 if (line.startsWith("#") || line.isEmpty()) continue; 464 if (line.startsWith("@")) { 465 String[] parts = line.split("="); 466 switch(parts[0]) { 467 case "@locale": 468 cldrFile = factory.make(parts[1], true); 469 break; 470 case "@compound": 471 switch(parts[1]) { 472 case "true": compound=true; break; 473 case "false": compound=false; break; 474 } 475 break; 476 default: throw new IllegalArgumentException("Bad line: " + line); 477 } 478 continue; 479 } 480 int semi = line.indexOf(';'); 481 String localeId=line; 482 String expected=""; 483 if (semi >= 0) { 484 localeId = line.substring(0, semi).trim(); 485 expected = line.substring(semi+1).trim(); 486 } 487 LanguageTagParser ltp = new LanguageTagParser().set(localeId); 488 seen.add(localeId); 489 490 // ULocale forComparison = ULocale.forLanguageTag(localeId); 491 // String uLocaleAsBcp47 = forComparison.toLanguageTag(); 492 // assertEquals("ICU roundtrips", localeId, uLocaleAsBcp47); 493 494 495 // String bcp47 = ltp.toString(OutputOption.BCP47); 496 // String icuFormat = ltp.toString(OutputOption.ICU); 497 498 // // check that the icuFormat is ok except for order 499 // Set<String> icuComponents = new TreeSet<>(AT_AND_SEMI.splitToList(forComparison.toString().toLowerCase(Locale.ROOT))); 500 // Set<String> icuFormatComponents = new TreeSet<>(AT_AND_SEMI.splitToList(icuFormat.toLowerCase(Locale.ROOT))); 501 // assertEquals("ICU vs LTP", icuComponents, icuFormatComponents); 502 503 // // check that the icuFormat roundtrips 504 // LanguageTagParser ltp2 = new LanguageTagParser() 505 // .set(icuFormat); 506 // String roundTripId = ltp2.toString(OutputOption.BCP47); 507 508 509 // // check that the format roundtrips 510 // assertEquals("LTP(BCP47)=>ICU=>BCP47", bcp47, roundTripId); 511 512 canonicalizer.transform(ltp); 513 String name = cldrFile.getName(ltp, true, null); 514 if (assertEquals(cldrFile.getLocaleID() + "; " + localeId, expected, name)) { 515 formattedExamplesForSpec.append("<tr><td>") 516 .append(TransliteratorUtilities.toHTML.transform(localeId)) 517 .append("</td><td>") 518 .append(TransliteratorUtilities.toHTML.transform(expected)) 519 .append("</td><tr>\n") 520 ; 521 } 522 } 523 if (isVerbose()) { 524 System.out.println(formattedExamplesForSpec.toString()); 525 } 526 } catch (IOException e) { 527 throw new ICUUncheckedIOException(e); 528 } 529 // generate forms 530 Map<R2<String, String>, String> deprecatedMap = SUPPLEMENTAL_DATA_INFO.getBcp47Deprecated(); 531 keyLoop: 532 for (Entry<String, Set<String>> keyValues : SUPPLEMENTAL_DATA_INFO.getBcp47Keys().keyValuesSet()) { 533 String key = keyValues.getKey(); 534 if ("true".equals(deprecatedMap.get(Row.of(key, "")))) { 535 continue; 536 } 537 String localeBase = "en-" + (LanguageTagParser.isTKey(key) ? "t-" : "u-") + key + "-"; 538 // abbreviate some values 539 switch(key) { 540 case "cu": 541 showName(cldrFile, seen, localeBase, "eur", "jpy", "usd", "chf"); 542 continue keyLoop; 543 case "tz": 544 showName(cldrFile, seen, localeBase, "uslax", "gblon", "chzrh"); 545 continue keyLoop; 546 } 547 for (String value : keyValues.getValue()) { 548 if ("true".equals(deprecatedMap.get(Row.of(key, value)))) { 549 continue; 550 } 551 if (isSpecialBcp47Value(value)) { 552 showName(cldrFile, seen, localeBase, getSpecialBcp47ValueSamples(value)); 553 } else { 554 showName(cldrFile, seen, localeBase, value); 555 } 556 } 557 } 558 } 559 showName(CLDRFile cldrFile, Set<String> skipLocales, String localeBase, Collection<String> samples)560 private void showName(CLDRFile cldrFile, Set<String> skipLocales, String localeBase, Collection<String> samples) { 561 for (String sample : samples) { 562 showName(cldrFile, skipLocales, localeBase, sample); 563 } 564 } 565 showName(CLDRFile cldrFile, Set<String> skipLocales, String localeBase, String... samples)566 private void showName(CLDRFile cldrFile, Set<String> skipLocales, String localeBase, String... samples) { 567 for (String sample : samples) { 568 showName(cldrFile, skipLocales, localeBase, sample); 569 } 570 } 571 572 static final UnicodeSet LOCALIZED = new UnicodeSet("[A-Z€$¥${foobar2}]"); 573 showName(CLDRFile cldrFile, Set<String> skipLocales, String localeBase, String value)574 private void showName(CLDRFile cldrFile, Set<String> skipLocales, String localeBase, String value) { 575 String locale = localeBase + value; 576 if (skipLocales.contains(locale)) { 577 return; 578 } 579 if (locale.equals("en-t-d0-accents")) { 580 int debug = 0; 581 } 582 String name = cldrFile.getName(locale, true, null); 583 if (isVerbose()) { 584 System.out.println(locale + "; " + name); 585 } 586 // rough check of name to ensure 587 int parenPos = name.indexOf('('); 588 if (parenPos > 0 && cldrFile.getLocaleID().equals("en")) { 589 for (String part1 : name.substring(parenPos).split(",")) { 590 String[] part2s = part1.split(":"); 591 if (part2s.length > 1 && !LOCALIZED.containsSome(part2s[1])) { 592 errln(locale + "; " + name); 593 } 594 } 595 } 596 } 597 TestExtendedLanguage()598 public void TestExtendedLanguage() { 599 assertEquals("Extended language translation", "Simplified Chinese", 600 testInfo.getEnglish().getName("zh_Hans")); 601 assertEquals("Extended language translation", 602 "Simplified Chinese (Singapore)", testInfo.getEnglish() 603 .getName("zh_Hans_SG")); 604 assertEquals("Extended language translation", "American English", 605 testInfo.getEnglish().getName("en-US")); 606 assertEquals("Extended language translation", 607 "American English (Arabic)", 608 testInfo.getEnglish().getName("en-Arab-US")); 609 } 610 testAllVariants()611 public void testAllVariants() { 612 Relation<String, String> extensionToKeys = SUPPLEMENTAL_DATA_INFO.getBcp47Extension2Keys(); 613 Relation<String, String> keyToValues = SUPPLEMENTAL_DATA_INFO.getBcp47Keys(); 614 Map<R2<String, String>, String> extKeyToDeprecated = SUPPLEMENTAL_DATA_INFO.getBcp47Deprecated(); 615 Map<String, String> keyToValueType = SUPPLEMENTAL_DATA_INFO.getBcp47ValueType(); 616 LanguageTagParser ltp = new LanguageTagParser(); 617 String lastKey = ""; 618 CLDRFile english = testInfo.getEnglish(); 619 620 String extName = english.getKeyName("t"); // special case where we need name 621 assertNotNull("Name of extension: " + "t", extName); 622 623 Set<String> allowedNoKeyValueNameSet = ImmutableSet.of("cu", "tz"); 624 625 main: 626 for (Entry<String, String> entry : extensionToKeys.entrySet()) { 627 String extension = entry.getKey(); 628 String key = entry.getValue(); 629 630 String dep = extKeyToDeprecated.get(Row.of(key, "")); 631 if ("true".equals(dep)) { 632 logln("# Deprecated: " + Row.of(extension, key)); 633 // # MULTIPLE: [u, vt, CODEPOINTS] 634 continue; 635 } 636 637 boolean allowedNoKeyValueName = allowedNoKeyValueNameSet.contains(key); 638 639 String kname = english.getKeyName(key); 640 assertNotNull("Name of key: " + key, kname); 641 642 //System.out.println("\n#Key: " + key + (kname == null ? "" : " (" + kname + ")")); 643 644 // if (extension.equals("t")) { 645 // System.out.println("skipping -t- for now: " + key); 646 // continue; 647 // } 648 boolean isMultiple = "multiple".equals(keyToValueType.get(key)); // single | multiple | incremental | any 649 650 Set<String> values = keyToValues.get(key); 651 String lastValue = null; 652 int count = 0; 653 for (String value : values) { 654 655 dep = extKeyToDeprecated.get(Row.of(key, value)); 656 if ("true".equals(dep)) { 657 logln("# Deprecated: " + Row.of(extension, key)); 658 // # MULTIPLE: [u, vt, CODEPOINTS] 659 continue; 660 } 661 662 boolean specialValue = isSpecialBcp47Value(value); 663 664 String kvname = english.getKeyValueName(key, value); 665 if (!allowedNoKeyValueName && !specialValue) { 666 assertNotNull("Name of <" + key + "," + value + ">", kvname); 667 } else { 668 // logln("Name of <" + key + "," + value + ">" + " = " + kvname); 669 } 670 671 //System.out.println("\n#Value: " + value + (kname == null ? "" : " (" + kvname + ")")); 672 673 674 String gorp = key.equals(lastKey) ? "" : 675 (key.equals("t") ? "-u-ca-persian" : "-t-hi") 676 + "-a-AA-v-VV-y-YY-x-foobar"; 677 678 lastKey = key; 679 if (++count > 4) { 680 continue; 681 } 682 683 if (specialValue) { 684 Set<String> valuesSet = getSpecialBcp47ValueSamples(value); 685 showItem(ltp, extension, key, gorp, valuesSet.toArray(new String[valuesSet.size()])); 686 687 continue; 688 } 689 showItem(ltp, extension, key, gorp, value); 690 if (isMultiple) { 691 if (lastValue != null) { 692 showItem(ltp, extension, key, gorp, value, lastValue); 693 } else { 694 lastValue = value; 695 } 696 } 697 } 698 } 699 } 700 getSpecialBcp47ValueSamples(String value)701 public static Set<String> getSpecialBcp47ValueSamples(String value) { 702 Set<String> valuesSet; 703 switch (value) { 704 case "PRIVATE_USE": // [t, x0, PRIVATE_USE] 705 valuesSet = ImmutableSet.of("foobar2"); 706 break; 707 case "REORDER_CODE": // [u, kr, REORDER_CODE] 708 valuesSet = ImmutableSet.of("arab", "digit-deva-latn"); 709 break; 710 case "RG_KEY_VALUE": // [u, rg, RG_KEY_VALUE] 711 valuesSet = ImmutableSet.of("ustx", "gbeng"); 712 break; 713 case "SUBDIVISION_CODE": // [u, sd, SUBDIVISION_CODE] 714 valuesSet = ImmutableSet.of("usca", "gbsct", "frnor"); 715 break; 716 default: 717 throw new IllegalArgumentException(); 718 } 719 return valuesSet; 720 } 721 isSpecialBcp47Value(String value)722 public static boolean isSpecialBcp47Value(String value) { 723 return value.equals(value.toUpperCase(Locale.ROOT)); 724 } 725 showItem(LanguageTagParser ltp, String extension, String key, String gorp, String... values)726 private void showItem(LanguageTagParser ltp, String extension, String key, String gorp, String... values) { 727 728 String locale = "en-GB-" + extension + (extension.equals("t") ? "-hi" : "") 729 + "-" + key + "-" + CollectionUtilities.join(values, "-") + gorp; 730 ltp.set(locale); 731 732 logln(ltp.toString(Format.bcp47) 733 + " == " + ltp.toString(Format.icu) 734 + "\n\t\tstructure:\t" + ltp.toString(Format.structure)); 735 try { 736 String name = testInfo.getEnglish().getName(locale); 737 logln("\tname:\t" + name); 738 } catch (Exception e) { 739 errln("Name for " + locale + "; " + e.getMessage()); 740 e.printStackTrace(); 741 } 742 } 743 } 744