• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.collect.ImmutableMultimap;
4 import com.google.common.collect.ImmutableSortedSet;
5 import com.google.common.collect.Multimap;
6 import com.google.common.collect.TreeMultimap;
7 import com.ibm.icu.impl.Relation;
8 import com.ibm.icu.impl.Row;
9 import com.ibm.icu.impl.Row.R2;
10 import com.ibm.icu.text.UnicodeSet;
11 import java.io.IOException;
12 import java.util.Arrays;
13 import java.util.Collection;
14 import java.util.Map;
15 import java.util.Map.Entry;
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRPaths;
19 import org.unicode.cldr.util.CldrUtility;
20 import org.unicode.cldr.util.LsrvCanonicalizer;
21 import org.unicode.cldr.util.LsrvCanonicalizer.TestDataTypes;
22 import org.unicode.cldr.util.StandardCodes.LstrType;
23 import org.unicode.cldr.util.SupplementalDataInfo;
24 import org.unicode.cldr.util.TempPrintWriter;
25 
26 public class GenerateLocaleIDTestData {
27     private static final LsrvCanonicalizer rrs = LsrvCanonicalizer.getInstance();
28     private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance();
29     private static final CLDRFile ENGLISH = CLDR_CONFIG.getEnglish();
30 
main(String[] args)31     public static void main(String[] args) throws IOException {
32         try (TempPrintWriter pw =
33                 TempPrintWriter.openUTF8Writer(
34                         CLDRPaths.TEST_DATA + "localeIdentifiers", "localeCanonicalization.txt")) {
35             pw.println("# Test data for locale identifier canonicalization");
36             pw.println(CldrUtility.getCopyrightString("#  "));
37             pw.println(
38                     "#\n"
39                             + "# Format:\n"
40                             + "# <source locale identifier>\t;\t<expected canonicalized locale identifier>\n"
41                             + "#\n"
42                             + "# The data lines are divided into 4 sets:\n"
43                             + "#   "
44                             + LsrvCanonicalizer.TestDataTypes.explicit
45                             + ":    a short list of explicit test cases.\n"
46                             + "#   "
47                             + LsrvCanonicalizer.TestDataTypes.fromAliases
48                             + ": test cases generated from the alias data.\n"
49                             + "#   "
50                             + LsrvCanonicalizer.TestDataTypes.decanonicalized
51                             + ": test cases generated by reversing the normalization process.\n"
52                             + "#   "
53                             + LsrvCanonicalizer.TestDataTypes.withIrrelevants
54                             + ": test cases generated from the others by adding irrelevant fields where possible,\n"
55                             + "#                           to ensure that the canonicalization implementation is not sensitive to irrelevant fields. These include:\n"
56                             + "#     Language: "
57                             + rrs.getIrrelevantField(LstrType.language)
58                             + "\n"
59                             + "#     Script:   "
60                             + rrs.getIrrelevantField(LstrType.script)
61                             + "\n"
62                             + "#     Region:   "
63                             + rrs.getIrrelevantField(LstrType.region)
64                             + "\n"
65                             + "#     Variant:  "
66                             + rrs.getIrrelevantField(LstrType.variant)
67                             + "\n"
68                             + "######\n\n");
69             for (Entry<TestDataTypes, Map<String, String>> mainEntry :
70                     rrs.getTestData(null).entrySet()) {
71                 TestDataTypes type = mainEntry.getKey();
72                 pw.println("\n# " + type + "\n");
73                 for (Entry<String, String> entry : mainEntry.getValue().entrySet()) {
74                     String toTest = entry.getKey();
75                     String expected = entry.getValue();
76                     pw.println(toTest + "\t;\t" + expected);
77                 }
78             }
79         }
80 
81         try (TempPrintWriter pw =
82                 TempPrintWriter.openUTF8Writer(
83                         CLDRPaths.TEST_DATA + "localeIdentifiers", "localeDisplayName.txt")) {
84             pw.println(
85                     "# Test data for locale display name generation\n"
86                             + CldrUtility.getCopyrightString("#  ")
87                             + "\n# Format:\n"
88                             + "# @locale=<locale to display in>\n"
89                             + "# @compound=<whether to form compounds like \"Flemish\" for nl_BE>\n"
90                             + "# <locale to display> ; <expected display name>\n"
91                             + "\n"
92                             + "@locale=en\n"
93                             + "@compound=false\n");
94             pw.println("\n# Simple cases: Language, script, region, variants\n");
95             showDisplayNames(pw, "es", "es-419", "es-Cyrl-MX", "hi-Latn");
96             pw.println(
97                     "\n#Note that the order of the variants is alphabetized before generating names\n");
98             showDisplayNames(pw, "en-Latn-GB-scouse-fonipa");
99             pw.println("\n# Add extensions, and verify their order\n");
100             showDisplayNames(
101                     pw,
102                     "en-u-nu-thai-ca-islamic-civil",
103                     "hi-u-nu-latn-t-en-h0-hybrid",
104                     "en-u-nu-deva-t-de");
105             pw.println("\n# Test ordering of extensions (include well-formed but invalid cases)\n");
106             showDisplayNames(pw, "fr-z-zz-zzz-v-vv-vvv-u-uu-uuu-t-ru-Cyrl-s-ss-sss-a-aa-aaa-x-u-x");
107 
108             pw.println(
109                     "\n# Comprehensive list (mostly comprehensive: currencies, subdivisions, timezones have abbreviated lists)\n");
110             SupplementalDataInfo SDI = CLDR_CONFIG.getSupplementalDataInfo();
111             Relation<String, String> extensionToKeys = SDI.getBcp47Extension2Keys();
112             Multimap<String, String> keyToExtensions = TreeMultimap.create();
113             for (Entry<String, String> entry : extensionToKeys.entrySet()) {
114                 keyToExtensions.put(entry.getValue(), entry.getKey());
115             }
116             final Relation<String, String> keyToValues = SDI.getBcp47Keys();
117             Map<R2<String, String>, String> deprecated = SDI.getBcp47Deprecated();
118 
119             ImmutableMultimap<String, String> overrides =
120                     ImmutableMultimap.<String, String>builder()
121                             .putAll("cu", "eur", "jpy", "usd", "chf")
122                             .putAll("rg", "gbsct", "gbeng")
123                             .putAll("sd", "gbsct", "gbwls")
124                             .putAll("tz", "uslax", "gblon", "chzrh")
125                             .putAll("dx", "thai")
126                             .putAll("vt", "abcd")
127                             .putAll("x0", "foobar2")
128                             .putAll(
129                                     "kr",
130                                     "arab",
131                                     "digit-deva-latn",
132                                     "currency",
133                                     "digit",
134                                     "punct",
135                                     "space",
136                                     "symbol")
137                             .build();
138 
139             final UnicodeSet upper = new UnicodeSet("[A-Z]").freeze();
140 
141             for (String key : keyToValues.keySet()) {
142                 if ("true".equals(deprecated.get(Row.of(key, "")))) {
143                     continue;
144                 }
145                 for (String extension : keyToExtensions.get(key)) {
146                     Collection<String> values =
147                             overrides.containsKey(key)
148                                     ? overrides.get(key)
149                                     : ImmutableSortedSet.copyOf(keyToValues.get(key));
150                     for (String value : values) {
151                         if ("true".equals(deprecated.get(Row.of(key, value)))) {
152                             continue;
153                         }
154                         final String sampleLocale = "en-" + extension + "-" + key + "-" + value;
155                         if (upper.containsSome(value)) {
156                             System.err.println("** FIX NAME: " + sampleLocale);
157                         } else {
158                             showDisplayNames(pw, sampleLocale);
159                         }
160                     }
161                 }
162             }
163         }
164     }
165 
showDisplayNames(TempPrintWriter pw, String... locales)166     private static void showDisplayNames(TempPrintWriter pw, String... locales) {
167         showDisplayNames(pw, Arrays.asList(locales));
168     }
169 
showDisplayNames(TempPrintWriter pw, Collection<String> locales)170     private static void showDisplayNames(TempPrintWriter pw, Collection<String> locales) {
171         for (String locale : locales) {
172             String name = ENGLISH.getName(locale, true);
173             if (name.contains("null")) {
174                 System.err.println("** REPLACE: " + locale + "; " + name);
175             } else {
176                 pw.println(locale + "; " + name);
177             }
178         }
179     }
180 }
181