1 package org.unicode.cldr.test; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.util.Arrays; 6 import java.util.Collections; 7 import java.util.HashMap; 8 import java.util.Iterator; 9 import java.util.List; 10 import java.util.Map; 11 import java.util.Objects; 12 import java.util.Set; 13 import java.util.TreeMap; 14 import java.util.TreeSet; 15 import java.util.regex.Matcher; 16 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRPaths; 19 import org.unicode.cldr.util.CldrUtility; 20 import org.unicode.cldr.util.Factory; 21 import org.unicode.cldr.util.Pair; 22 import org.unicode.cldr.util.PatternCache; 23 import org.unicode.cldr.util.StandardCodes; 24 import org.unicode.cldr.util.SupplementalDataInfo; 25 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 26 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 27 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 28 import org.unicode.cldr.util.XPathParts; 29 30 import com.ibm.icu.impl.Relation; 31 32 public class TestSupplementalData { 33 static CLDRFile english; 34 private static SupplementalDataInfo supplementalData; 35 private static StandardCodes sc; 36 main(String[] args)37 public static void main(String[] args) throws IOException { 38 // genData(); 39 // if (true) return; 40 Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 41 english = cldrFactory.make("en", true); 42 root = cldrFactory.make("root", true); 43 supplementalData = SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); 44 sc = StandardCodes.make(); 45 46 showMultiZones(); 47 checkPlurals(); 48 49 System.out.println("Skipped Elements: " + supplementalData.getSkippedElements()); 50 checkAgainstLanguageScript(); 51 checkTerritoryMapping(); 52 53 checkTelephoneCodeData(); 54 } 55 showMultiZones()56 private static void showMultiZones() { 57 // reverse the list 58 Relation<String, String> territoryToZones = Relation.<String, String> of(new TreeMap<String, Set<String>>(), TreeSet.class); 59 for (String zone : supplementalData.getCanonicalZones()) { 60 territoryToZones.put(supplementalData.getZone_territory(zone), zone); 61 } 62 // gather the data 63 // this could be slightly simpler using supplementalData.get 64 Set<String> singulars = new TreeSet<>(); 65 for (String region : territoryToZones.keySet()) { 66 final Set<String> zones = territoryToZones.getAll(region); 67 if (zones.size() == 1 || region.equals("001")) { 68 singulars.addAll(zones); 69 continue; 70 } 71 System.out.println(region + "\t" + english.getName("territory", region)); 72 System.out.println("\t" + zones); 73 } 74 XPathParts xpp = XPathParts.getFrozenInstance(root.getFullXPath("//ldml/dates/timeZoneNames/singleCountries")); 75 List<String> singleCountries = Arrays.asList(xpp.getAttributeValue(-1, "list").split("\\s+")); 76 singulars.addAll(singleCountries); 77 singulars.remove("Etc/Unknown"); // remove special case 78 System.out.println("Excluded Zones (not necessary in Survey tool): " + singulars); 79 Set<String> otherExclusions = root.getExcludedZones(); 80 if (!otherExclusions.equals(singulars)) { 81 throw new IllegalArgumentException("problem with excluded zones"); 82 } 83 for (Iterator<String> it = english.iterator("//ldml/dates/timeZoneNames/zone"); it.hasNext();) { 84 String distinguishedPath = it.next(); 85 if (root.isPathExcludedForSurvey(distinguishedPath)) { 86 System.out.println("EX\t" + distinguishedPath); 87 } else { 88 System.out.println("\t" + distinguishedPath); 89 } 90 } 91 } 92 checkPlurals()93 private static void checkPlurals() { 94 Relation<PluralInfo, String> pluralsToLocale = Relation.<PluralInfo, String> of(new HashMap<PluralInfo, Set<String>>(), TreeSet.class); 95 for (String locale : new TreeSet<>(supplementalData.getPluralLocales())) { 96 PluralInfo pluralInfo = supplementalData.getPlurals(locale); 97 System.out.println(locale + ":\t" + pluralInfo); 98 pluralsToLocale.put(pluralInfo, locale); 99 } 100 String locale = "en_US"; 101 PluralInfo pluralInfo = supplementalData.getPlurals(locale); 102 System.out.println(locale + ":\t" + pluralInfo); 103 104 for (PluralInfo pluralInfo2 : pluralsToLocale.keySet()) { 105 System.out.println("Locales: \t" + pluralsToLocale.getAll(pluralInfo2)); 106 final Map<Count, String> typeToExamples = pluralInfo2.getCountToStringExamplesMap(); 107 for (Count type : typeToExamples.keySet()) { 108 System.out.println("\tPlural Code: \t" + type + " \t=>\t" + typeToExamples.get(type)); 109 } 110 System.out.println(); 111 } 112 113 } 114 checkTelephoneCodeData()115 private static void checkTelephoneCodeData() { 116 System.out.println("==== territories for telephoneCodeData ===="); 117 System.out.println(supplementalData.getTerritoriesForTelephoneCodeInfo()); 118 System.out.println("==== telephone code data for 001 ===="); 119 System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("001")); 120 System.out.println("==== telephone code data for US ===="); 121 System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("US")); 122 System.out.println("==== all telephoneCodeData ===="); 123 System.out.println(supplementalData.getTerritoryToTelephoneCodeInfo()); 124 } 125 126 static Matcher numericTerritory = PatternCache.get("[0-9]{3}").matcher(""); 127 private static CLDRFile root; 128 checkTerritoryMapping()129 private static void checkTerritoryMapping() { 130 Relation<String, String> alpha3 = supplementalData.getAlpha3TerritoryMapping(); 131 Set<String> temp = new TreeSet<>(sc.getAvailableCodes("territory")); 132 for (Iterator<String> it = temp.iterator(); it.hasNext();) { 133 String code = it.next(); 134 if (numericTerritory.reset(code).matches()) { 135 it.remove(); 136 continue; 137 } 138 // if (sc.getFullData("territory", code).get(0).equals("PRIVATE USE")) { 139 // it.remove(); 140 // continue; 141 // } 142 } 143 showAnyDifferences("alpha3", alpha3.keySet(), "sc", temp); 144 } 145 showAnyDifferences(String title, Set<String> set, String title2, Set<String> set2)146 private static void showAnyDifferences(String title, Set<String> set, String title2, Set<String> set2) { 147 if (!set.equals(set2)) { 148 showFirstMinusSecond("Failure " + title + "-" + title2 + ": ", set, set2); 149 showFirstMinusSecond("Failure " + title2 + "-" + title + ": ", set2, set); 150 } 151 } 152 showFirstMinusSecond(String title, Set<String> name, Set<String> availableCodes)153 private static void showFirstMinusSecond(String title, Set<String> name, Set<String> availableCodes) { 154 Set<String> temp = getFirstMinusSecond(name, availableCodes); 155 if (!temp.isEmpty()) { 156 System.out.println(title + getFirstMinusSecond(name, availableCodes)); 157 } 158 } 159 getFirstMinusSecond(Set<String> name, Set<String> availableCodes)160 private static Set<String> getFirstMinusSecond(Set<String> name, Set<String> availableCodes) { 161 Set<String> temp = new TreeSet<>(name); 162 temp.removeAll(availableCodes); 163 return temp; 164 } 165 checkAgainstLanguageScript()166 static void checkAgainstLanguageScript() { 167 Relation<String, String> otherTerritoryToLanguages = Relation.<String, String> of(new TreeMap<String, Set<String>>(), TreeSet.class, null); 168 // get other language data 169 for (String language : sc.getGoodAvailableCodes("language")) { 170 Set<BasicLanguageData> newLanguageData = supplementalData.getBasicLanguageData(language); 171 if (newLanguageData != null) { 172 for (BasicLanguageData languageData : newLanguageData) { 173 Set<String> territories = new TreeSet<>(languageData.getTerritories()); 174 territories.addAll(languageData.getTerritories()); 175 if (territories != null) { 176 Set<String> scripts = new TreeSet<>(languageData.getScripts()); 177 scripts.addAll(languageData.getScripts()); 178 if (scripts == null || scripts.size() < 2) { 179 otherTerritoryToLanguages.putAll(territories, language); 180 } else { 181 for (String script : scripts) { 182 otherTerritoryToLanguages.putAll(territories, language + "_" + script); 183 } 184 } 185 } 186 } 187 } 188 } 189 // compare them, listing differences 190 for (String territory : sc.getGoodAvailableCodes("territory")) { 191 Set<String> languages = supplementalData.getTerritoryToLanguages(territory); 192 Set<String> otherLanguages = otherTerritoryToLanguages.getAll(territory); 193 if (otherLanguages == null) otherLanguages = Collections.emptySet(); 194 if (!Objects.equals(languages, otherLanguages)) { 195 Set<String> languagesLeftover = new TreeSet<>(languages); 196 languagesLeftover.removeAll(otherLanguages); 197 Set<String> otherLanguagesLeftover = new TreeSet<>(otherLanguages); 198 otherLanguagesLeftover.removeAll(languages); 199 String territoryString = english.getName(CLDRFile.TERRITORY_NAME, territory); 200 if (otherLanguagesLeftover.size() != 0) { 201 for (String other : otherLanguagesLeftover) { 202 String name = english.getName(other); 203 System.out.println(territoryString + "\t" + territory + "\t" + name + "\t" + other); 204 } 205 } 206 } 207 } 208 } 209 210 /** 211 * Temporary function to transform data 212 * 213 * @throws IOException 214 */ genData()215 public static void genData() throws IOException { 216 BufferedReader codes = CldrUtility.getUTF8Data("territory_codes.txt"); 217 Set<Pair> sorted = new TreeSet<>(); 218 while (true) { 219 String line = codes.readLine(); 220 if (line == null) 221 break; 222 line = line.split("#")[0].trim(); 223 if (line.length() == 0) 224 continue; 225 String[] sourceValues = line.split("\\s+"); 226 String[] values = new String[5]; 227 for (int i = 0; i < values.length; ++i) { 228 if (i >= sourceValues.length || sourceValues[i].equals("-")) 229 values[i] = null; 230 else 231 values[i] = sourceValues[i]; 232 } 233 String alpha2 = values[0]; 234 String numeric = values[1]; 235 String alpha3 = values[2]; 236 String internet = values[3]; 237 if (internet != null) { 238 internet = internet.replace("/", " "); 239 } 240 if (internet != null) 241 internet = internet.toUpperCase(); 242 String fips10 = values[4]; 243 Pair item = new Pair(alpha2, new Pair(numeric, new Pair(alpha3, new Pair(fips10, internet)))); 244 sorted.add(item); 245 } 246 for (Pair item : sorted) { 247 // <territoryCodes type="CM" numeric="120" alpha3="CMR"/> 248 System.out.print("<territoryCodes"); 249 Comparable first = item.getFirst(); 250 showNonNull("type", first, null); 251 item = (Pair) item.getSecond(); 252 showNonNull("numeric", item.getFirst(), null); 253 item = (Pair) item.getSecond(); 254 showNonNull("alpha3", item.getFirst(), null); 255 item = (Pair) item.getSecond(); 256 showNonNull("fips10", item.getFirst(), first); 257 showNonNull("internet", item.getSecond(), first); 258 System.out.println("/>"); 259 } 260 codes.close(); 261 } 262 showNonNull(String title, Object first, Object noDup)263 private static void showNonNull(String title, Object first, Object noDup) { 264 if (first != null && !first.equals(noDup)) { 265 System.out.print(" " + title + "=\"" + first + "\""); 266 } 267 } 268 }