1 package org.unicode.cldr.tool; 2 3 import com.google.common.collect.ImmutableMultimap; 4 import com.google.common.collect.ImmutableSortedSet; 5 import com.google.common.collect.Multimap; 6 import com.google.common.collect.TreeMultimap; 7 import com.ibm.icu.impl.Relation; 8 import com.ibm.icu.impl.Row; 9 import com.ibm.icu.impl.Row.R2; 10 import com.ibm.icu.text.UnicodeSet; 11 import java.io.IOException; 12 import java.util.Arrays; 13 import java.util.Collection; 14 import java.util.Map; 15 import java.util.Map.Entry; 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRPaths; 19 import org.unicode.cldr.util.CldrUtility; 20 import org.unicode.cldr.util.LsrvCanonicalizer; 21 import org.unicode.cldr.util.LsrvCanonicalizer.TestDataTypes; 22 import org.unicode.cldr.util.StandardCodes.LstrType; 23 import org.unicode.cldr.util.SupplementalDataInfo; 24 import org.unicode.cldr.util.TempPrintWriter; 25 26 public class GenerateLocaleIDTestData { 27 private static final LsrvCanonicalizer rrs = LsrvCanonicalizer.getInstance(); 28 private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance(); 29 private static final CLDRFile ENGLISH = CLDR_CONFIG.getEnglish(); 30 main(String[] args)31 public static void main(String[] args) throws IOException { 32 try (TempPrintWriter pw = 33 TempPrintWriter.openUTF8Writer( 34 CLDRPaths.TEST_DATA + "localeIdentifiers", "localeCanonicalization.txt")) { 35 pw.println("# Test data for locale identifier canonicalization"); 36 pw.println(CldrUtility.getCopyrightString("# ")); 37 pw.println( 38 "#\n" 39 + "# Format:\n" 40 + "# <source locale identifier>\t;\t<expected canonicalized locale identifier>\n" 41 + "#\n" 42 + "# The data lines are divided into 4 sets:\n" 43 + "# " 44 + LsrvCanonicalizer.TestDataTypes.explicit 45 + ": a short list of explicit test cases.\n" 46 + "# " 47 + LsrvCanonicalizer.TestDataTypes.fromAliases 48 + ": test cases generated from the alias data.\n" 49 + "# " 50 + LsrvCanonicalizer.TestDataTypes.decanonicalized 51 + ": test cases generated by reversing the normalization process.\n" 52 + "# " 53 + LsrvCanonicalizer.TestDataTypes.withIrrelevants 54 + ": test cases generated from the others by adding irrelevant fields where possible,\n" 55 + "# to ensure that the canonicalization implementation is not sensitive to irrelevant fields. These include:\n" 56 + "# Language: " 57 + rrs.getIrrelevantField(LstrType.language) 58 + "\n" 59 + "# Script: " 60 + rrs.getIrrelevantField(LstrType.script) 61 + "\n" 62 + "# Region: " 63 + rrs.getIrrelevantField(LstrType.region) 64 + "\n" 65 + "# Variant: " 66 + rrs.getIrrelevantField(LstrType.variant) 67 + "\n" 68 + "######\n\n"); 69 for (Entry<TestDataTypes, Map<String, String>> mainEntry : 70 rrs.getTestData(null).entrySet()) { 71 TestDataTypes type = mainEntry.getKey(); 72 pw.println("\n# " + type + "\n"); 73 for (Entry<String, String> entry : mainEntry.getValue().entrySet()) { 74 String toTest = entry.getKey(); 75 String expected = entry.getValue(); 76 pw.println(toTest + "\t;\t" + expected); 77 } 78 } 79 } 80 81 try (TempPrintWriter pw = 82 TempPrintWriter.openUTF8Writer( 83 CLDRPaths.TEST_DATA + "localeIdentifiers", "localeDisplayName.txt")) { 84 pw.println( 85 "# Test data for locale display name generation\n" 86 + CldrUtility.getCopyrightString("# ") 87 + "\n# Format:\n" 88 + "# @locale=<locale to display in>\n" 89 + "# @compound=<whether to form compounds like \"Flemish\" for nl_BE>\n" 90 + "# <locale to display> ; <expected display name>\n" 91 + "\n" 92 + "@locale=en\n" 93 + "@compound=false\n"); 94 pw.println("\n# Simple cases: Language, script, region, variants\n"); 95 showDisplayNames(pw, "es", "es-419", "es-Cyrl-MX", "hi-Latn"); 96 pw.println( 97 "\n#Note that the order of the variants is alphabetized before generating names\n"); 98 showDisplayNames(pw, "en-Latn-GB-scouse-fonipa"); 99 pw.println("\n# Add extensions, and verify their order\n"); 100 showDisplayNames( 101 pw, 102 "en-u-nu-thai-ca-islamic-civil", 103 "hi-u-nu-latn-t-en-h0-hybrid", 104 "en-u-nu-deva-t-de"); 105 pw.println("\n# Test ordering of extensions (include well-formed but invalid cases)\n"); 106 showDisplayNames(pw, "fr-z-zz-zzz-v-vv-vvv-u-uu-uuu-t-ru-Cyrl-s-ss-sss-a-aa-aaa-x-u-x"); 107 108 pw.println( 109 "\n# Comprehensive list (mostly comprehensive: currencies, subdivisions, timezones have abbreviated lists)\n"); 110 SupplementalDataInfo SDI = CLDR_CONFIG.getSupplementalDataInfo(); 111 Relation<String, String> extensionToKeys = SDI.getBcp47Extension2Keys(); 112 Multimap<String, String> keyToExtensions = TreeMultimap.create(); 113 for (Entry<String, String> entry : extensionToKeys.entrySet()) { 114 keyToExtensions.put(entry.getValue(), entry.getKey()); 115 } 116 final Relation<String, String> keyToValues = SDI.getBcp47Keys(); 117 Map<R2<String, String>, String> deprecated = SDI.getBcp47Deprecated(); 118 119 ImmutableMultimap<String, String> overrides = 120 ImmutableMultimap.<String, String>builder() 121 .putAll("cu", "eur", "jpy", "usd", "chf") 122 .putAll("rg", "gbsct", "gbeng") 123 .putAll("sd", "gbsct", "gbwls") 124 .putAll("tz", "uslax", "gblon", "chzrh") 125 .putAll("dx", "thai") 126 .putAll("vt", "abcd") 127 .putAll("x0", "foobar2") 128 .putAll( 129 "kr", 130 "arab", 131 "digit-deva-latn", 132 "currency", 133 "digit", 134 "punct", 135 "space", 136 "symbol") 137 .build(); 138 139 final UnicodeSet upper = new UnicodeSet("[A-Z]").freeze(); 140 141 for (String key : keyToValues.keySet()) { 142 if ("true".equals(deprecated.get(Row.of(key, "")))) { 143 continue; 144 } 145 for (String extension : keyToExtensions.get(key)) { 146 Collection<String> values = 147 overrides.containsKey(key) 148 ? overrides.get(key) 149 : ImmutableSortedSet.copyOf(keyToValues.get(key)); 150 for (String value : values) { 151 if ("true".equals(deprecated.get(Row.of(key, value)))) { 152 continue; 153 } 154 final String sampleLocale = "en-" + extension + "-" + key + "-" + value; 155 if (upper.containsSome(value)) { 156 System.err.println("** FIX NAME: " + sampleLocale); 157 } else { 158 showDisplayNames(pw, sampleLocale); 159 } 160 } 161 } 162 } 163 } 164 } 165 showDisplayNames(TempPrintWriter pw, String... locales)166 private static void showDisplayNames(TempPrintWriter pw, String... locales) { 167 showDisplayNames(pw, Arrays.asList(locales)); 168 } 169 showDisplayNames(TempPrintWriter pw, Collection<String> locales)170 private static void showDisplayNames(TempPrintWriter pw, Collection<String> locales) { 171 for (String locale : locales) { 172 String name = ENGLISH.getName(locale, true); 173 if (name.contains("null")) { 174 System.err.println("** REPLACE: " + locale + "; " + name); 175 } else { 176 pw.println(locale + "; " + name); 177 } 178 } 179 } 180 } 181