• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.collect.Multimap;
4 import com.google.common.collect.TreeMultimap;
5 import java.util.Collection;
6 import java.util.LinkedHashSet;
7 import java.util.Map;
8 import java.util.Map.Entry;
9 import java.util.Set;
10 import java.util.TreeSet;
11 import org.unicode.cldr.util.CLDRConfig;
12 import org.unicode.cldr.util.CLDRLocale;
13 import org.unicode.cldr.util.CLDRPaths;
14 import org.unicode.cldr.util.CalculatedCoverageLevels;
15 import org.unicode.cldr.util.CldrUtility;
16 import org.unicode.cldr.util.LanguageTagParser;
17 import org.unicode.cldr.util.Level;
18 import org.unicode.cldr.util.StandardCodes.LstrType;
19 import org.unicode.cldr.util.TempPrintWriter;
20 import org.unicode.cldr.util.Validity;
21 import org.unicode.cldr.util.Validity.Status;
22 
23 public class GenerateLikelyTestData {
24     private static final String DUMMY_SCRIPT = "Egyp";
25     private static final String DUMMY_REGION = "AQ";
26     static CLDRConfig config = CLDRConfig.getInstance();
27     static Map<String, String> data = config.getSupplementalDataInfo().getLikelySubtags();
28     static LikelySubtags likely = new LikelySubtags();
29     private static final Validity VALIDITY = Validity.getInstance();
30     static Set<String> okRegions = VALIDITY.getStatusToCodes(LstrType.region).get(Status.regular);
31 
main(String[] args)32     public static void main(String[] args) {
33         try (TempPrintWriter pw =
34                 TempPrintWriter.openUTF8Writer(
35                         CLDRPaths.TEST_DATA + "localeIdentifiers", "likelySubtags.txt")) {
36 
37             pw.println(
38                     "# Test data for Likely Subtags\n"
39                             + CldrUtility.getCopyrightString("#  ")
40                             + "\n"
41                             + "#\n"
42                             + "# Test data for https://www.unicode.org/reports/tr35/tr35.html#Likely_Subtags\n"
43                             + "#\n"
44                             + "# Format:\n");
45             showLine(pw, "# Source", "AddLikely", "RemoveFavorScript", "RemoveFavorRegion");
46             pw.println(
47                     "#   Source: a locale to which the following operations are applied.\n"
48                             + "#   AddLikely: the result of the Add Likely Subtags.\n"
49                             + "#                      If Add Likely Subtags fails, then “FAIL”.\n"
50                             + "#   RemoveFavorScript: Remove Likely Subtags, when the script is favored.\n"
51                             + "#                      Only included when different than AddLikely.\n"
52                             + "#   RemoveFavorRegion: Remove Likely Subtags, when the region is favored.\n"
53                             + "#                      Only included when different than RemoveFavorScript.\n"
54                             + "#\n"
55                             + "# Generation: GenerateLikelyTestData.java\n");
56 
57             // generate alternates
58             // for now, simple case
59             Set<String> testCases = getTestCases(data);
60 
61             for (String testRaw : testCases) {
62                 final CLDRLocale source = CLDRLocale.getInstance(testRaw);
63                 final String test = source.toLanguageTag();
64 
65                 // if the maxLang is empty, we have no data for the language
66                 String lang = source.getLanguage();
67                 String maxLang = likely.maximize(lang);
68                 if (maxLang == null || maxLang.isEmpty()) {
69                     showLine(pw, test, "FAIL", "FAIL", "FAIL");
70                     continue;
71                 }
72 
73                 final String maximize = likely.maximize(test);
74                 final String max = CLDRLocale.getInstance(maximize).toLanguageTag();
75                 final CLDRLocale minFavorScriptLocale =
76                         CLDRLocale.getInstance(likely.setFavorRegion(false).minimize(test));
77                 final String favorScript = minFavorScriptLocale.toLanguageTag();
78                 final CLDRLocale minFavorRegionLocale =
79                         CLDRLocale.getInstance(likely.setFavorRegion(true).minimize(test));
80                 final String minFavorRegion = minFavorRegionLocale.toLanguageTag();
81                 showLine(pw, test, max, favorScript, minFavorRegion);
82             }
83         }
84     }
85 
check(String test0)86     public static void check(String test0) {
87         String check = likely.maximize(test0);
88         System.out.println(test0 + " → " + check);
89     }
90 
91     // test data
92 
93     private static Set<String> ALLOWED_WITH_MACROREGION =
94             Set.of("ar_001", "en_001", "en_150", "es_419"); // only intentional CLDR locales
95 
getTestCases(Map<String, String> data)96     public static Set<String> getTestCases(Map<String, String> data) {
97         CalculatedCoverageLevels coverage = CalculatedCoverageLevels.getInstance();
98         Set<String> skipping = new TreeSet<>();
99         TreeSet<String> testCases = new TreeSet<>();
100         // for CLDR locales, add combinations
101         // collect together the scripts&regions for each language. Will filter later
102         Multimap<String, String> combinations = TreeMultimap.create();
103         for (String localeString : config.getCldrFactory().getAvailable()) {
104             Level effective = coverage.getEffectiveCoverageLevel(localeString);
105             if (effective == null || effective.compareTo(Level.BASIC) < 0) {
106                 continue;
107             }
108             if (localeString.equals("root")) {
109                 continue;
110             }
111             CLDRLocale locale = CLDRLocale.getInstance(localeString);
112             String lang = locale.getLanguage();
113             CLDRLocale max = CLDRLocale.getInstance(likely.maximize(localeString));
114             if (!okRegions.contains(max.getCountry())
115                     && !ALLOWED_WITH_MACROREGION.contains(localeString)) {
116                 skipping.add(localeString);
117                 continue;
118             }
119             combinations.put(lang, max.getScript());
120             combinations.put(lang, max.getCountry());
121             combinations.put(lang, DUMMY_REGION); // check odd conditions
122             combinations.put(lang, DUMMY_SCRIPT); // check odd conditions
123             combinations.put(lang, ""); // check odd conditions
124         }
125         Set<String> undCombinations = new TreeSet<>();
126         for (Entry<String, Collection<String>> entry : combinations.asMap().entrySet()) {
127             undCombinations.addAll(entry.getValue());
128         }
129         combinations.putAll("und", undCombinations);
130 
131         LanguageTagParser ltp = new LanguageTagParser();
132         for (Entry<String, Collection<String>> entry : combinations.asMap().entrySet()) {
133             final String lang = entry.getKey();
134             Set<String> items = new TreeSet<>(entry.getValue());
135             Set<String> scripts = new LinkedHashSet<>();
136             Set<String> regions = new LinkedHashSet<>();
137             for (String scriptOrRegion : items) {
138                 ltp.set(lang); // clears script, region
139                 if (scriptOrRegion.length() == 4) {
140                     ltp.setScript(scriptOrRegion);
141                     scripts.add(scriptOrRegion);
142                 } else {
143                     ltp.setRegion(scriptOrRegion);
144                     if (!scriptOrRegion.isBlank()) {
145                         regions.add(scriptOrRegion);
146                     }
147                 }
148                 testCases.add(CLDRLocale.getInstance(ltp.toString()).toLanguageTag());
149             }
150             scripts.remove(DUMMY_REGION);
151             scripts.remove(DUMMY_SCRIPT);
152 
153             if (!lang.equals("und")) { // record script/region combinations
154                 ltp.set("und");
155                 for (String script : scripts) {
156                     ltp.setScript(script);
157                     for (String region : regions) {
158                         ltp.setRegion(region);
159                         testCases.add(CLDRLocale.getInstance(ltp.toString()).toLanguageTag());
160                     }
161                 }
162             }
163         }
164         testCases.add("qaa");
165         testCases.add("qaa_Cyrl");
166         testCases.add("qaa_CH");
167         testCases.add("qaa_Cyrl_CH");
168 
169         System.out.println("Skipping " + skipping);
170         return testCases;
171     }
172 
showLine( TempPrintWriter tempWriter, String test, final String max, final String minScript, final String minRegion)173     public static void showLine(
174             TempPrintWriter tempWriter,
175             String test,
176             final String max,
177             final String minScript,
178             final String minRegion) {
179         tempWriter.println(
180                 test //
181                         + " ;\t"
182                         + (max.equals(test) ? "" : max) //
183                         + " ;\t"
184                         + (minScript.equals(max) ? "" : minScript) // script favored
185                         + " ;\t"
186                         + (minRegion.equals(minScript) ? "" : minRegion) // region favored
187                 );
188     }
189 }
190