• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.collect.Multimap;
4 import com.google.common.collect.TreeMultimap;
5 import java.util.Collection;
6 import java.util.LinkedHashSet;
7 import java.util.Map;
8 import java.util.Map.Entry;
9 import java.util.Set;
10 import java.util.TreeSet;
11 import org.unicode.cldr.util.CLDRConfig;
12 import org.unicode.cldr.util.CLDRLocale;
13 import org.unicode.cldr.util.CLDRPaths;
14 import org.unicode.cldr.util.CalculatedCoverageLevels;
15 import org.unicode.cldr.util.CldrUtility;
16 import org.unicode.cldr.util.LanguageTagParser;
17 import org.unicode.cldr.util.Level;
18 import org.unicode.cldr.util.StandardCodes.LstrType;
19 import org.unicode.cldr.util.TempPrintWriter;
20 import org.unicode.cldr.util.Validity;
21 import org.unicode.cldr.util.Validity.Status;
22 
23 public class GenerateLikelyTestData {
24     private static final String DUMMY_SCRIPT = "Egyp";
25     private static final String DUMMY_REGION = "AQ";
26     static CLDRConfig config = CLDRConfig.getInstance();
27     static Map<String, String> data = config.getSupplementalDataInfo().getLikelySubtags();
28     static LikelySubtags likely = new LikelySubtags();
29     private static final Validity VALIDITY = Validity.getInstance();
30     static Set<String> okRegions = VALIDITY.getStatusToCodes(LstrType.region).get(Status.regular);
31 
main(String[] args)32     public static void main(String[] args) {
33         try (TempPrintWriter pw =
34                 TempPrintWriter.openUTF8Writer(
35                         CLDRPaths.TEST_DATA + "localeIdentifiers", "likelySubtags.txt")) {
36 
37             pw.println(
38                     "# Test data for Likely Subtags\n"
39                             + CldrUtility.getCopyrightString("#  ")
40                             + "\n"
41                             + "#\n"
42                             + "# Test data for https://www.unicode.org/reports/tr35/tr35.html#Likely_Subtags\n"
43                             + "#\n"
44                             + "# Format:\n");
45             showLine(pw, "# Source", "AddLikely", "RemoveFavorScript", "RemoveFavorRegion");
46             pw.println(
47                     "#   Source: a locale to which the following operations are applied.\n"
48                             + "#   AddLikely: the result of the Add Likely Subtags.\n"
49                             + "#                      If Add Likely Subtags fails, then “FAIL”.\n"
50                             + "#   RemoveFavorScript: Remove Likely Subtags, when the script is favored.\n"
51                             + "#                      Only included when different than AddLikely.\n"
52                             + "#   RemoveFavorRegion: Remove Likely Subtags, when the region is favored.\n"
53                             + "#                      Only included when different than RemoveFavorScript.\n"
54                             + "#\n"
55                             + "# Generation: GenerateLikelyTestData.java\n");
56 
57             // generate alternates
58             // for now, simple case
59             Set<String> testCases = getTestCases(data);
60 
61             for (String testRaw : testCases) {
62                 if (testRaw.startsWith("qaa")) {
63                     int debug = 0;
64                 }
65                 final CLDRLocale source = CLDRLocale.getInstance(testRaw);
66                 final String test = source.toLanguageTag();
67 
68                 // if the maxLang is empty, we have no data for the language
69                 String lang = source.getLanguage();
70                 String maxLang = likely.maximize(lang);
71                 if (maxLang == null || maxLang.isEmpty()) {
72                     showLine(pw, test, "FAIL", "FAIL", "FAIL");
73                     continue;
74                 }
75 
76                 final String maximize = likely.maximize(test);
77                 final String max = CLDRLocale.getInstance(maximize).toLanguageTag();
78                 final CLDRLocale minFavorScriptLocale =
79                         CLDRLocale.getInstance(likely.setFavorRegion(false).minimize(test));
80                 final String favorScript = minFavorScriptLocale.toLanguageTag();
81                 final CLDRLocale minFavorRegionLocale =
82                         CLDRLocale.getInstance(likely.setFavorRegion(true).minimize(test));
83                 final String minFavorRegion = minFavorRegionLocale.toLanguageTag();
84                 showLine(pw, test, max, favorScript, minFavorRegion);
85             }
86         }
87     }
88 
check(String test0)89     public static void check(String test0) {
90         String check = likely.maximize(test0);
91         System.out.println(test0 + " → " + check);
92     }
93 
94     // test data
95 
96     private static Set<String> ALLOWED_WITH_MACROREGION =
97             Set.of("ar_001", "en_001", "en_150", "es_419"); // only intentional CLDR locales
98 
getTestCases(Map<String, String> data)99     public static Set<String> getTestCases(Map<String, String> data) {
100         CalculatedCoverageLevels coverage = CalculatedCoverageLevels.getInstance();
101         Set<String> skipping = new TreeSet<>();
102         TreeSet<String> testCases = new TreeSet<>();
103         // for CLDR locales, add combinations
104         // collect together the scripts&regions for each language. Will filter later
105         Multimap<String, String> combinations = TreeMultimap.create();
106         for (String localeString : config.getCldrFactory().getAvailable()) {
107             Level effective = coverage.getEffectiveCoverageLevel(localeString);
108             if (effective == null || effective.compareTo(Level.BASIC) < 0) {
109                 continue;
110             }
111             if (localeString.equals("root")) {
112                 continue;
113             }
114             CLDRLocale locale = CLDRLocale.getInstance(localeString);
115             String lang = locale.getLanguage();
116             CLDRLocale max = CLDRLocale.getInstance(likely.maximize(localeString));
117             if (!okRegions.contains(max.getCountry())
118                     && !ALLOWED_WITH_MACROREGION.contains(localeString)) {
119                 skipping.add(localeString);
120                 continue;
121             }
122             combinations.put(lang, max.getScript());
123             combinations.put(lang, max.getCountry());
124             combinations.put(lang, DUMMY_REGION); // check odd conditions
125             combinations.put(lang, DUMMY_SCRIPT); // check odd conditions
126             combinations.put(lang, ""); // check odd conditions
127         }
128         Set<String> undCombinations = new TreeSet<>();
129         for (Entry<String, Collection<String>> entry : combinations.asMap().entrySet()) {
130             undCombinations.addAll(entry.getValue());
131         }
132         combinations.putAll("und", undCombinations);
133 
134         LanguageTagParser ltp = new LanguageTagParser();
135         for (Entry<String, Collection<String>> entry : combinations.asMap().entrySet()) {
136             final String lang = entry.getKey();
137             Set<String> items = new TreeSet<>(entry.getValue());
138             Set<String> scripts = new LinkedHashSet<>();
139             Set<String> regions = new LinkedHashSet<>();
140             for (String scriptOrRegion : items) {
141                 ltp.set(lang); // clears script, region
142                 if (scriptOrRegion.length() == 4) {
143                     ltp.setScript(scriptOrRegion);
144                     scripts.add(scriptOrRegion);
145                 } else {
146                     ltp.setRegion(scriptOrRegion);
147                     if (!scriptOrRegion.isBlank()) {
148                         regions.add(scriptOrRegion);
149                     }
150                 }
151                 testCases.add(CLDRLocale.getInstance(ltp.toString()).toLanguageTag());
152             }
153             scripts.remove(DUMMY_REGION);
154             scripts.remove(DUMMY_SCRIPT);
155 
156             if (!lang.equals("und")) { // record script/region combinations
157                 ltp.set("und");
158                 for (String script : scripts) {
159                     ltp.setScript(script);
160                     for (String region : regions) {
161                         ltp.setRegion(region);
162                         testCases.add(CLDRLocale.getInstance(ltp.toString()).toLanguageTag());
163                     }
164                 }
165             }
166         }
167         testCases.add("qaa");
168         testCases.add("qaa_Cyrl");
169         testCases.add("qaa_CH");
170         testCases.add("qaa_Cyrl_CH");
171 
172         System.out.println("Skipping " + skipping);
173         return testCases;
174     }
175 
showLine( TempPrintWriter tempWriter, String test, final String max, final String minScript, final String minRegion)176     public static void showLine(
177             TempPrintWriter tempWriter,
178             String test,
179             final String max,
180             final String minScript,
181             final String minRegion) {
182         tempWriter.println(
183                 test //
184                         + " ;\t"
185                         + (max.equals(test) ? "" : max) //
186                         + " ;\t"
187                         + (minScript.equals(max) ? "" : minScript) // script favored
188                         + " ;\t"
189                         + (minRegion.equals(minScript) ? "" : minRegion) // region favored
190                 );
191     }
192 }
193