• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.test;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.util.Collections;
6 import java.util.HashMap;
7 import java.util.Iterator;
8 import java.util.Map;
9 import java.util.Objects;
10 import java.util.Set;
11 import java.util.TreeMap;
12 import java.util.TreeSet;
13 import java.util.regex.Matcher;
14 
15 import org.unicode.cldr.util.CLDRFile;
16 import org.unicode.cldr.util.CLDRPaths;
17 import org.unicode.cldr.util.CldrUtility;
18 import org.unicode.cldr.util.Factory;
19 import org.unicode.cldr.util.Pair;
20 import org.unicode.cldr.util.PatternCache;
21 import org.unicode.cldr.util.StandardCodes;
22 import org.unicode.cldr.util.SupplementalDataInfo;
23 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
24 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
25 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
26 
27 import com.ibm.icu.impl.Relation;
28 
29 public class TestSupplementalData {
30     static CLDRFile english;
31     private static SupplementalDataInfo supplementalData;
32     private static StandardCodes sc;
33 
main(String[] args)34     public static void main(String[] args) throws IOException {
35         // genData();
36         // if (true) return;
37         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
38         english = cldrFactory.make("en", true);
39         root = cldrFactory.make("root", true);
40         supplementalData = SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
41         sc = StandardCodes.make();
42 
43         checkPlurals();
44 
45         System.out.println("Skipped Elements: " + supplementalData.getSkippedElements());
46         checkAgainstLanguageScript();
47         checkTerritoryMapping();
48 
49         checkTelephoneCodeData();
50     }
51 
checkPlurals()52     private static void checkPlurals() {
53         Relation<PluralInfo, String> pluralsToLocale = Relation.<PluralInfo, String> of(new HashMap<PluralInfo, Set<String>>(), TreeSet.class);
54         for (String locale : new TreeSet<>(supplementalData.getPluralLocales())) {
55             PluralInfo pluralInfo = supplementalData.getPlurals(locale);
56             System.out.println(locale + ":\t" + pluralInfo);
57             pluralsToLocale.put(pluralInfo, locale);
58         }
59         String locale = "en_US";
60         PluralInfo pluralInfo = supplementalData.getPlurals(locale);
61         System.out.println(locale + ":\t" + pluralInfo);
62 
63         for (PluralInfo pluralInfo2 : pluralsToLocale.keySet()) {
64             System.out.println("Locales: \t" + pluralsToLocale.getAll(pluralInfo2));
65             final Map<Count, String> typeToExamples = pluralInfo2.getCountToStringExamplesMap();
66             for (Count type : typeToExamples.keySet()) {
67                 System.out.println("\tPlural Code: \t" + type + " \t=>\t" + typeToExamples.get(type));
68             }
69             System.out.println();
70         }
71 
72     }
73 
checkTelephoneCodeData()74     private static void checkTelephoneCodeData() {
75         System.out.println("==== territories for telephoneCodeData ====");
76         System.out.println(supplementalData.getTerritoriesForTelephoneCodeInfo());
77         System.out.println("==== telephone code data for 001 ====");
78         System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("001"));
79         System.out.println("==== telephone code data for US ====");
80         System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("US"));
81         System.out.println("==== all telephoneCodeData ====");
82         System.out.println(supplementalData.getTerritoryToTelephoneCodeInfo());
83     }
84 
85     static Matcher numericTerritory = PatternCache.get("[0-9]{3}").matcher("");
86     private static CLDRFile root;
87 
checkTerritoryMapping()88     private static void checkTerritoryMapping() {
89         Relation<String, String> alpha3 = supplementalData.getAlpha3TerritoryMapping();
90         Set<String> temp = new TreeSet<>(sc.getAvailableCodes("territory"));
91         for (Iterator<String> it = temp.iterator(); it.hasNext();) {
92             String code = it.next();
93             if (numericTerritory.reset(code).matches()) {
94                 it.remove();
95                 continue;
96             }
97             // if (sc.getFullData("territory", code).get(0).equals("PRIVATE USE")) {
98             // it.remove();
99             // continue;
100             // }
101         }
102         showAnyDifferences("alpha3", alpha3.keySet(), "sc", temp);
103     }
104 
showAnyDifferences(String title, Set<String> set, String title2, Set<String> set2)105     private static void showAnyDifferences(String title, Set<String> set, String title2, Set<String> set2) {
106         if (!set.equals(set2)) {
107             showFirstMinusSecond("Failure " + title + "-" + title2 + ": ", set, set2);
108             showFirstMinusSecond("Failure " + title2 + "-" + title + ": ", set2, set);
109         }
110     }
111 
showFirstMinusSecond(String title, Set<String> name, Set<String> availableCodes)112     private static void showFirstMinusSecond(String title, Set<String> name, Set<String> availableCodes) {
113         Set<String> temp = getFirstMinusSecond(name, availableCodes);
114         if (!temp.isEmpty()) {
115             System.out.println(title + getFirstMinusSecond(name, availableCodes));
116         }
117     }
118 
getFirstMinusSecond(Set<String> name, Set<String> availableCodes)119     private static Set<String> getFirstMinusSecond(Set<String> name, Set<String> availableCodes) {
120         Set<String> temp = new TreeSet<>(name);
121         temp.removeAll(availableCodes);
122         return temp;
123     }
124 
checkAgainstLanguageScript()125     static void checkAgainstLanguageScript() {
126         Relation<String, String> otherTerritoryToLanguages = Relation.<String, String> of(new TreeMap<String, Set<String>>(), TreeSet.class, null);
127         // get other language data
128         for (String language : sc.getGoodAvailableCodes("language")) {
129             Set<BasicLanguageData> newLanguageData = supplementalData.getBasicLanguageData(language);
130             if (newLanguageData != null) {
131                 for (BasicLanguageData languageData : newLanguageData) {
132                     Set<String> territories = new TreeSet<>(languageData.getTerritories());
133                     territories.addAll(languageData.getTerritories());
134                     if (territories != null) {
135                         Set<String> scripts = new TreeSet<>(languageData.getScripts());
136                         scripts.addAll(languageData.getScripts());
137                         if (scripts == null || scripts.size() < 2) {
138                             otherTerritoryToLanguages.putAll(territories, language);
139                         } else {
140                             for (String script : scripts) {
141                                 otherTerritoryToLanguages.putAll(territories, language + "_" + script);
142                             }
143                         }
144                     }
145                 }
146             }
147         }
148         // compare them, listing differences
149         for (String territory : sc.getGoodAvailableCodes("territory")) {
150             Set<String> languages = supplementalData.getTerritoryToLanguages(territory);
151             Set<String> otherLanguages = otherTerritoryToLanguages.getAll(territory);
152             if (otherLanguages == null) otherLanguages = Collections.emptySet();
153             if (!Objects.equals(languages, otherLanguages)) {
154                 Set<String> languagesLeftover = new TreeSet<>(languages);
155                 languagesLeftover.removeAll(otherLanguages);
156                 Set<String> otherLanguagesLeftover = new TreeSet<>(otherLanguages);
157                 otherLanguagesLeftover.removeAll(languages);
158                 String territoryString = english.getName(CLDRFile.TERRITORY_NAME, territory);
159                 if (otherLanguagesLeftover.size() != 0) {
160                     for (String other : otherLanguagesLeftover) {
161                         String name = english.getName(other);
162                         System.out.println(territoryString + "\t" + territory + "\t" + name + "\t" + other);
163                     }
164                 }
165             }
166         }
167     }
168 
169     /**
170      * Temporary function to transform data
171      *
172      * @throws IOException
173      */
genData()174     public static void genData() throws IOException {
175         BufferedReader codes = CldrUtility.getUTF8Data("territory_codes.txt");
176         Set<Pair> sorted = new TreeSet<>();
177         while (true) {
178             String line = codes.readLine();
179             if (line == null)
180                 break;
181             line = line.split("#")[0].trim();
182             if (line.length() == 0)
183                 continue;
184             String[] sourceValues = line.split("\\s+");
185             String[] values = new String[5];
186             for (int i = 0; i < values.length; ++i) {
187                 if (i >= sourceValues.length || sourceValues[i].equals("-"))
188                     values[i] = null;
189                 else
190                     values[i] = sourceValues[i];
191             }
192             String alpha2 = values[0];
193             String numeric = values[1];
194             String alpha3 = values[2];
195             String internet = values[3];
196             if (internet != null) {
197                 internet = internet.replace("/", " ");
198             }
199             if (internet != null)
200                 internet = internet.toUpperCase();
201             String fips10 = values[4];
202             Pair item = new Pair(alpha2, new Pair(numeric, new Pair(alpha3, new Pair(fips10, internet))));
203             sorted.add(item);
204         }
205         for (Pair item : sorted) {
206             // <territoryCodes type="CM" numeric="120" alpha3="CMR"/>
207             System.out.print("<territoryCodes");
208             Comparable first = item.getFirst();
209             showNonNull("type", first, null);
210             item = (Pair) item.getSecond();
211             showNonNull("numeric", item.getFirst(), null);
212             item = (Pair) item.getSecond();
213             showNonNull("alpha3", item.getFirst(), null);
214             item = (Pair) item.getSecond();
215             showNonNull("fips10", item.getFirst(), first);
216             showNonNull("internet", item.getSecond(), first);
217             System.out.println("/>");
218         }
219         codes.close();
220     }
221 
showNonNull(String title, Object first, Object noDup)222     private static void showNonNull(String title, Object first, Object noDup) {
223         if (first != null && !first.equals(noDup)) {
224             System.out.print(" " + title + "=\"" + first + "\"");
225         }
226     }
227 }