1 package org.unicode.cldr.tool; 2 3 import com.google.common.collect.Multimap; 4 import com.google.common.collect.TreeMultimap; 5 import java.util.Collection; 6 import java.util.LinkedHashSet; 7 import java.util.Map; 8 import java.util.Map.Entry; 9 import java.util.Set; 10 import java.util.TreeSet; 11 import org.unicode.cldr.util.CLDRConfig; 12 import org.unicode.cldr.util.CLDRLocale; 13 import org.unicode.cldr.util.CLDRPaths; 14 import org.unicode.cldr.util.CalculatedCoverageLevels; 15 import org.unicode.cldr.util.CldrUtility; 16 import org.unicode.cldr.util.LanguageTagParser; 17 import org.unicode.cldr.util.Level; 18 import org.unicode.cldr.util.StandardCodes.LstrType; 19 import org.unicode.cldr.util.TempPrintWriter; 20 import org.unicode.cldr.util.Validity; 21 import org.unicode.cldr.util.Validity.Status; 22 23 public class GenerateLikelyTestData { 24 private static final String DUMMY_SCRIPT = "Egyp"; 25 private static final String DUMMY_REGION = "AQ"; 26 static CLDRConfig config = CLDRConfig.getInstance(); 27 static Map<String, String> data = config.getSupplementalDataInfo().getLikelySubtags(); 28 static LikelySubtags likely = new LikelySubtags(); 29 private static final Validity VALIDITY = Validity.getInstance(); 30 static Set<String> okRegions = VALIDITY.getStatusToCodes(LstrType.region).get(Status.regular); 31 main(String[] args)32 public static void main(String[] args) { 33 try (TempPrintWriter pw = 34 TempPrintWriter.openUTF8Writer( 35 CLDRPaths.TEST_DATA + "localeIdentifiers", "likelySubtags.txt")) { 36 37 pw.println( 38 "# Test data for Likely Subtags\n" 39 + CldrUtility.getCopyrightString("# ") 40 + "\n" 41 + "#\n" 42 + "# Test data for https://www.unicode.org/reports/tr35/tr35.html#Likely_Subtags\n" 43 + "#\n" 44 + "# Format:\n"); 45 showLine(pw, "# Source", "AddLikely", "RemoveFavorScript", "RemoveFavorRegion"); 46 pw.println( 47 "# Source: a locale to which the following operations are applied.\n" 48 + "# AddLikely: the result of the Add Likely Subtags.\n" 49 + "# If Add Likely Subtags fails, then “FAIL”.\n" 50 + "# RemoveFavorScript: Remove Likely Subtags, when the script is favored.\n" 51 + "# Only included when different than AddLikely.\n" 52 + "# RemoveFavorRegion: Remove Likely Subtags, when the region is favored.\n" 53 + "# Only included when different than RemoveFavorScript.\n" 54 + "#\n" 55 + "# Generation: GenerateLikelyTestData.java\n"); 56 57 // generate alternates 58 // for now, simple case 59 Set<String> testCases = getTestCases(data); 60 61 for (String testRaw : testCases) { 62 final CLDRLocale source = CLDRLocale.getInstance(testRaw); 63 final String test = source.toLanguageTag(); 64 65 // if the maxLang is empty, we have no data for the language 66 String lang = source.getLanguage(); 67 String maxLang = likely.maximize(lang); 68 if (maxLang == null || maxLang.isEmpty()) { 69 showLine(pw, test, "FAIL", "FAIL", "FAIL"); 70 continue; 71 } 72 73 final String maximize = likely.maximize(test); 74 final String max = CLDRLocale.getInstance(maximize).toLanguageTag(); 75 final CLDRLocale minFavorScriptLocale = 76 CLDRLocale.getInstance(likely.setFavorRegion(false).minimize(test)); 77 final String favorScript = minFavorScriptLocale.toLanguageTag(); 78 final CLDRLocale minFavorRegionLocale = 79 CLDRLocale.getInstance(likely.setFavorRegion(true).minimize(test)); 80 final String minFavorRegion = minFavorRegionLocale.toLanguageTag(); 81 showLine(pw, test, max, favorScript, minFavorRegion); 82 } 83 } 84 } 85 check(String test0)86 public static void check(String test0) { 87 String check = likely.maximize(test0); 88 System.out.println(test0 + " → " + check); 89 } 90 91 // test data 92 93 private static Set<String> ALLOWED_WITH_MACROREGION = 94 Set.of("ar_001", "en_001", "en_150", "es_419"); // only intentional CLDR locales 95 getTestCases(Map<String, String> data)96 public static Set<String> getTestCases(Map<String, String> data) { 97 CalculatedCoverageLevels coverage = CalculatedCoverageLevels.getInstance(); 98 Set<String> skipping = new TreeSet<>(); 99 TreeSet<String> testCases = new TreeSet<>(); 100 // for CLDR locales, add combinations 101 // collect together the scripts®ions for each language. Will filter later 102 Multimap<String, String> combinations = TreeMultimap.create(); 103 for (String localeString : config.getCldrFactory().getAvailable()) { 104 Level effective = coverage.getEffectiveCoverageLevel(localeString); 105 if (effective == null || effective.compareTo(Level.BASIC) < 0) { 106 continue; 107 } 108 if (localeString.equals("root")) { 109 continue; 110 } 111 CLDRLocale locale = CLDRLocale.getInstance(localeString); 112 String lang = locale.getLanguage(); 113 CLDRLocale max = CLDRLocale.getInstance(likely.maximize(localeString)); 114 if (!okRegions.contains(max.getCountry()) 115 && !ALLOWED_WITH_MACROREGION.contains(localeString)) { 116 skipping.add(localeString); 117 continue; 118 } 119 combinations.put(lang, max.getScript()); 120 combinations.put(lang, max.getCountry()); 121 combinations.put(lang, DUMMY_REGION); // check odd conditions 122 combinations.put(lang, DUMMY_SCRIPT); // check odd conditions 123 combinations.put(lang, ""); // check odd conditions 124 } 125 Set<String> undCombinations = new TreeSet<>(); 126 for (Entry<String, Collection<String>> entry : combinations.asMap().entrySet()) { 127 undCombinations.addAll(entry.getValue()); 128 } 129 combinations.putAll("und", undCombinations); 130 131 LanguageTagParser ltp = new LanguageTagParser(); 132 for (Entry<String, Collection<String>> entry : combinations.asMap().entrySet()) { 133 final String lang = entry.getKey(); 134 Set<String> items = new TreeSet<>(entry.getValue()); 135 Set<String> scripts = new LinkedHashSet<>(); 136 Set<String> regions = new LinkedHashSet<>(); 137 for (String scriptOrRegion : items) { 138 ltp.set(lang); // clears script, region 139 if (scriptOrRegion.length() == 4) { 140 ltp.setScript(scriptOrRegion); 141 scripts.add(scriptOrRegion); 142 } else { 143 ltp.setRegion(scriptOrRegion); 144 if (!scriptOrRegion.isBlank()) { 145 regions.add(scriptOrRegion); 146 } 147 } 148 testCases.add(CLDRLocale.getInstance(ltp.toString()).toLanguageTag()); 149 } 150 scripts.remove(DUMMY_REGION); 151 scripts.remove(DUMMY_SCRIPT); 152 153 if (!lang.equals("und")) { // record script/region combinations 154 ltp.set("und"); 155 for (String script : scripts) { 156 ltp.setScript(script); 157 for (String region : regions) { 158 ltp.setRegion(region); 159 testCases.add(CLDRLocale.getInstance(ltp.toString()).toLanguageTag()); 160 } 161 } 162 } 163 } 164 testCases.add("qaa"); 165 testCases.add("qaa_Cyrl"); 166 testCases.add("qaa_CH"); 167 testCases.add("qaa_Cyrl_CH"); 168 169 System.out.println("Skipping " + skipping); 170 return testCases; 171 } 172 showLine( TempPrintWriter tempWriter, String test, final String max, final String minScript, final String minRegion)173 public static void showLine( 174 TempPrintWriter tempWriter, 175 String test, 176 final String max, 177 final String minScript, 178 final String minRegion) { 179 tempWriter.println( 180 test // 181 + " ;\t" 182 + (max.equals(test) ? "" : max) // 183 + " ;\t" 184 + (minScript.equals(max) ? "" : minScript) // script favored 185 + " ;\t" 186 + (minRegion.equals(minScript) ? "" : minRegion) // region favored 187 ); 188 } 189 } 190