1 package org.unicode.cldr.tool; 2 3 import com.google.common.collect.Multimap; 4 import com.google.common.collect.TreeMultimap; 5 import java.util.Collection; 6 import java.util.LinkedHashSet; 7 import java.util.Map; 8 import java.util.Map.Entry; 9 import java.util.Set; 10 import java.util.TreeSet; 11 import org.unicode.cldr.util.CLDRConfig; 12 import org.unicode.cldr.util.CLDRLocale; 13 import org.unicode.cldr.util.CLDRPaths; 14 import org.unicode.cldr.util.CalculatedCoverageLevels; 15 import org.unicode.cldr.util.CldrUtility; 16 import org.unicode.cldr.util.LanguageTagParser; 17 import org.unicode.cldr.util.Level; 18 import org.unicode.cldr.util.StandardCodes.LstrType; 19 import org.unicode.cldr.util.TempPrintWriter; 20 import org.unicode.cldr.util.Validity; 21 import org.unicode.cldr.util.Validity.Status; 22 23 public class GenerateLikelyTestData { 24 private static final String DUMMY_SCRIPT = "Egyp"; 25 private static final String DUMMY_REGION = "AQ"; 26 static CLDRConfig config = CLDRConfig.getInstance(); 27 static Map<String, String> data = config.getSupplementalDataInfo().getLikelySubtags(); 28 static LikelySubtags likely = new LikelySubtags(); 29 private static final Validity VALIDITY = Validity.getInstance(); 30 static Set<String> okRegions = VALIDITY.getStatusToCodes(LstrType.region).get(Status.regular); 31 main(String[] args)32 public static void main(String[] args) { 33 try (TempPrintWriter pw = 34 TempPrintWriter.openUTF8Writer( 35 CLDRPaths.TEST_DATA + "localeIdentifiers", "likelySubtags.txt")) { 36 37 pw.println( 38 "# Test data for Likely Subtags\n" 39 + CldrUtility.getCopyrightString("# ") 40 + "\n" 41 + "#\n" 42 + "# Test data for https://www.unicode.org/reports/tr35/tr35.html#Likely_Subtags\n" 43 + "#\n" 44 + "# Format:\n"); 45 showLine(pw, "# Source", "AddLikely", "RemoveFavorScript", "RemoveFavorRegion"); 46 pw.println( 47 "# Source: a locale to which the following operations are applied.\n" 48 + "# AddLikely: the result of the Add Likely Subtags.\n" 49 + "# If Add Likely Subtags fails, then “FAIL”.\n" 50 + "# RemoveFavorScript: Remove Likely Subtags, when the script is favored.\n" 51 + "# Only included when different than AddLikely.\n" 52 + "# RemoveFavorRegion: Remove Likely Subtags, when the region is favored.\n" 53 + "# Only included when different than RemoveFavorScript.\n" 54 + "#\n" 55 + "# Generation: GenerateLikelyTestData.java\n"); 56 57 // generate alternates 58 // for now, simple case 59 Set<String> testCases = getTestCases(data); 60 61 for (String testRaw : testCases) { 62 if (testRaw.startsWith("qaa")) { 63 int debug = 0; 64 } 65 final CLDRLocale source = CLDRLocale.getInstance(testRaw); 66 final String test = source.toLanguageTag(); 67 68 // if the maxLang is empty, we have no data for the language 69 String lang = source.getLanguage(); 70 String maxLang = likely.maximize(lang); 71 if (maxLang == null || maxLang.isEmpty()) { 72 showLine(pw, test, "FAIL", "FAIL", "FAIL"); 73 continue; 74 } 75 76 final String maximize = likely.maximize(test); 77 final String max = CLDRLocale.getInstance(maximize).toLanguageTag(); 78 final CLDRLocale minFavorScriptLocale = 79 CLDRLocale.getInstance(likely.setFavorRegion(false).minimize(test)); 80 final String favorScript = minFavorScriptLocale.toLanguageTag(); 81 final CLDRLocale minFavorRegionLocale = 82 CLDRLocale.getInstance(likely.setFavorRegion(true).minimize(test)); 83 final String minFavorRegion = minFavorRegionLocale.toLanguageTag(); 84 showLine(pw, test, max, favorScript, minFavorRegion); 85 } 86 } 87 } 88 check(String test0)89 public static void check(String test0) { 90 String check = likely.maximize(test0); 91 System.out.println(test0 + " → " + check); 92 } 93 94 // test data 95 96 private static Set<String> ALLOWED_WITH_MACROREGION = 97 Set.of("ar_001", "en_001", "en_150", "es_419"); // only intentional CLDR locales 98 getTestCases(Map<String, String> data)99 public static Set<String> getTestCases(Map<String, String> data) { 100 CalculatedCoverageLevels coverage = CalculatedCoverageLevels.getInstance(); 101 Set<String> skipping = new TreeSet<>(); 102 TreeSet<String> testCases = new TreeSet<>(); 103 // for CLDR locales, add combinations 104 // collect together the scripts®ions for each language. Will filter later 105 Multimap<String, String> combinations = TreeMultimap.create(); 106 for (String localeString : config.getCldrFactory().getAvailable()) { 107 Level effective = coverage.getEffectiveCoverageLevel(localeString); 108 if (effective == null || effective.compareTo(Level.BASIC) < 0) { 109 continue; 110 } 111 if (localeString.equals("root")) { 112 continue; 113 } 114 CLDRLocale locale = CLDRLocale.getInstance(localeString); 115 String lang = locale.getLanguage(); 116 CLDRLocale max = CLDRLocale.getInstance(likely.maximize(localeString)); 117 if (!okRegions.contains(max.getCountry()) 118 && !ALLOWED_WITH_MACROREGION.contains(localeString)) { 119 skipping.add(localeString); 120 continue; 121 } 122 combinations.put(lang, max.getScript()); 123 combinations.put(lang, max.getCountry()); 124 combinations.put(lang, DUMMY_REGION); // check odd conditions 125 combinations.put(lang, DUMMY_SCRIPT); // check odd conditions 126 combinations.put(lang, ""); // check odd conditions 127 } 128 Set<String> undCombinations = new TreeSet<>(); 129 for (Entry<String, Collection<String>> entry : combinations.asMap().entrySet()) { 130 undCombinations.addAll(entry.getValue()); 131 } 132 combinations.putAll("und", undCombinations); 133 134 LanguageTagParser ltp = new LanguageTagParser(); 135 for (Entry<String, Collection<String>> entry : combinations.asMap().entrySet()) { 136 final String lang = entry.getKey(); 137 Set<String> items = new TreeSet<>(entry.getValue()); 138 Set<String> scripts = new LinkedHashSet<>(); 139 Set<String> regions = new LinkedHashSet<>(); 140 for (String scriptOrRegion : items) { 141 ltp.set(lang); // clears script, region 142 if (scriptOrRegion.length() == 4) { 143 ltp.setScript(scriptOrRegion); 144 scripts.add(scriptOrRegion); 145 } else { 146 ltp.setRegion(scriptOrRegion); 147 if (!scriptOrRegion.isBlank()) { 148 regions.add(scriptOrRegion); 149 } 150 } 151 testCases.add(CLDRLocale.getInstance(ltp.toString()).toLanguageTag()); 152 } 153 scripts.remove(DUMMY_REGION); 154 scripts.remove(DUMMY_SCRIPT); 155 156 if (!lang.equals("und")) { // record script/region combinations 157 ltp.set("und"); 158 for (String script : scripts) { 159 ltp.setScript(script); 160 for (String region : regions) { 161 ltp.setRegion(region); 162 testCases.add(CLDRLocale.getInstance(ltp.toString()).toLanguageTag()); 163 } 164 } 165 } 166 } 167 testCases.add("qaa"); 168 testCases.add("qaa_Cyrl"); 169 testCases.add("qaa_CH"); 170 testCases.add("qaa_Cyrl_CH"); 171 172 System.out.println("Skipping " + skipping); 173 return testCases; 174 } 175 showLine( TempPrintWriter tempWriter, String test, final String max, final String minScript, final String minRegion)176 public static void showLine( 177 TempPrintWriter tempWriter, 178 String test, 179 final String max, 180 final String minScript, 181 final String minRegion) { 182 tempWriter.println( 183 test // 184 + " ;\t" 185 + (max.equals(test) ? "" : max) // 186 + " ;\t" 187 + (minScript.equals(max) ? "" : minScript) // script favored 188 + " ;\t" 189 + (minRegion.equals(minScript) ? "" : minRegion) // region favored 190 ); 191 } 192 } 193