• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.io.File;
4 import java.util.Arrays;
5 import java.util.EnumMap;
6 import java.util.EnumSet;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.LinkedHashSet;
10 import java.util.Map;
11 import java.util.Set;
12 
13 import org.unicode.cldr.draft.ScriptMetadata;
14 import org.unicode.cldr.draft.ScriptMetadata.Info;
15 import org.unicode.cldr.draft.ScriptMetadata.Trinary;
16 import org.unicode.cldr.tool.LikelySubtags;
17 import org.unicode.cldr.util.CLDRFile.ExemplarType;
18 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
19 
20 import com.google.common.collect.ImmutableSet;
21 import com.google.common.collect.Multimap;
22 import com.ibm.icu.impl.Relation;
23 import com.ibm.icu.lang.UScript;
24 import com.ibm.icu.text.UnicodeSet;
25 
26 public class CoreCoverageInfo {
27 
28     private static final CLDRConfig config = CLDRConfig.getInstance();
29     private static final String CLDR_BASE_DIRECTORY = config.getCldrBaseDirectory().toString();
30     private static final SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
31     private static final LikelySubtags ls = new LikelySubtags();
32 
33     public enum CoreItems {
34         default_content(Level.CORE),
35         likely_subtags(Level.CORE),
36         country_data(Level.CORE),
37         orientation(Level.CORE),
38         time_cycle(Level.CORE),
39 
40         // time cycle
41 
42         casing(Level.MODERATE),
43         plurals(Level.MODERATE),
44         ordinals(Level.MODERATE),
45         collation(Level.MODERATE),
46 
47         grammar(Level.MODERN),
48         romanization(Level.MODERN),
49         ;
50 
51         public static Set<CoreItems> ONLY_RECOMMENDED = ImmutableSet.copyOf(
52             EnumSet.of(romanization, ordinals));
53 
54         public static final int COUNT = CoreItems.values().length;
55         public final Level desiredLevel;
56 
CoreItems(Level desiredLevel)57         CoreItems(Level desiredLevel) {
58             this.desiredLevel = desiredLevel;
59         }
CoreItems()60         CoreItems() {
61             this(Level.CORE);
62         }
63         @Override
toString()64         public String toString() {
65             return desiredLevel.getAbbreviation() + " " + name();
66         }
67     }
68     static UnicodeSet RTL = new UnicodeSet("[[:bc=R:][:bc=AL:]]").freeze();
69 
getCoreCoverageInfo(CLDRFile file, Multimap<CoreItems,String> detailedErrors)70     public static Set<CoreItems> getCoreCoverageInfo(CLDRFile file, Multimap<CoreItems,String> detailedErrors) {
71         detailedErrors.clear();
72         if (file.isResolved()) {
73             file = file.getUnresolved();
74         }
75         String locale = file.getLocaleID();
76         LanguageTagParser ltp = new LanguageTagParser();
77         locale = ltp.set(locale).getLanguageScript();
78         String baseLanguage = ltp.getLanguage();
79         String script = ltp.getScript();
80         String region = ltp.getRegion();
81 
82         Set<CoreItems> result = EnumSet.noneOf(CoreItems.class);
83 
84         //      (02) Orientation (bidi writing systems only) [main/xxx.xml]
85         UnicodeSet main = file.getExemplarSet(ExemplarType.main, null);
86         boolean isRtl = main.containsSome(RTL);
87 
88         String path = "//ldml/layout/orientation/characterOrder";
89         String value = file.getStringValue(path);
90         if ("right-to-left".equals(value) == isRtl) {
91             result.add(CoreItems.orientation);
92         } else {
93             detailedErrors.put(CoreItems.orientation, path);
94         }
95 
96         //      (01) Plural rules [supplemental/plurals.xml and ordinals.xml]
97         //      For more information, see cldr-spec/plural-rules.
98         if (sdi.getPluralLocales(PluralType.cardinal).contains(baseLanguage)) {
99             result.add(CoreItems.plurals);
100         } else {
101             detailedErrors.put(CoreItems.plurals, "//supplementalData/plurals[@type=\"cardinal\"]/pluralRules[@locales=\"" + locale
102                 + "\"]/pluralRule[@count=\"other\"]");
103         }
104         if (sdi.getPluralLocales(PluralType.ordinal).contains(baseLanguage)) {
105             result.add(CoreItems.ordinals);
106         } else {
107             detailedErrors.put(CoreItems.ordinals, "//supplementalData/plurals[@type=\"ordinal\"]/pluralRules[@locales=\"" + locale
108                 + "\"]/pluralRule[@count=\"other\"]");
109         }
110 
111         //      (01) Default content script and region (normally: normally country with largest population using that language, and normal script for that).  [supplemental/supplementalMetadata.xml]
112 
113         String defaultContent = sdi.getDefaultContentLocale(locale);
114         if (defaultContent != null || locale.equals("no")) {
115             result.add(CoreItems.default_content);
116         } else {
117             detailedErrors.put(CoreItems.default_content, "//supplementalData/supplementalMetadata/defaultContent");
118         }
119         // likely subtags
120         String max = ls.maximize(locale);
121         String maxLangScript = null;
122         if (max != null) {
123             ltp.set(max);
124             maxLangScript = ltp.getLanguageScript();
125             script = ltp.getScript();
126             region = ltp.getRegion();
127             if (!script.isEmpty() && !region.isEmpty()) {
128                 result.add(CoreItems.likely_subtags);
129             }
130         }
131         if (!result.contains(CoreItems.likely_subtags)) {
132             detailedErrors.put(CoreItems.likely_subtags, "//supplementalData/likelySubtags");
133         }
134         // (N) Verify the country data ( i.e. which territories in which the language is spoken enough to create a locale ) [supplemental/supplementalData.xml]
135         // we verify that there is at least one region
136         // we try 3 cases: language, locale, maxLangScript
137         Set<String> territories = sdi.getTerritoriesForPopulationData(locale);
138         if (territories == null) {
139             territories = sdi.getTerritoriesForPopulationData(baseLanguage);
140         }
141         if (territories == null && maxLangScript != null) {
142             territories = sdi.getTerritoriesForPopulationData(maxLangScript);
143         }
144         if (territories != null && territories.size() != 0) {
145             result.add(CoreItems.country_data);
146         } else {
147             detailedErrors.put(CoreItems.country_data, "//supplementalData/territoryInfo");
148             sdi.getTerritoriesForPopulationData(locale); // for debugging
149         }
150         //      *(N) Romanization table (non-Latin writing systems only) [spreadsheet, we'll translate into transforms/xxx-en.xml]
151         //      If a spreadsheet, for each letter (or sequence) in the exemplars, what is the corresponding Latin letter (or sequence).
152         //      More sophisticated users can do a better job, supplying a file of rules like transforms/Arabic-Latin-BGN.xml.
153 
154         if (script.equals("Latn")) {
155             result.add(CoreItems.romanization);
156         } else {
157             boolean found = false;
158             Set<String> scriptNames = getScriptNames(script);
159             Set<String> tempErrors = new LinkedHashSet<>();
160             for (String scriptName : scriptNames) {
161                 for (String[] pair : ROMANIZATION_PATHS) {
162                     String filename = pair[0] + scriptName + pair[1];
163                     if (hasFile(SpecialDir.transforms, filename)) {
164                         result.add(CoreItems.romanization);
165                         found = true;
166                         break;
167                     } else {
168                         tempErrors.add(script); // debugging
169                     }
170                 }
171             }
172             if (!found) {
173                 detailedErrors.put(CoreItems.romanization, "//supplementalData/transforms/transform"
174                     + "[@source=\"und-" + script + "\"]"
175                     + "[@target=\"und-Latn\"]"
176                     //+ "[@direction=\"forward\"]"
177                     );
178             }
179         }
180 
181         //      (N) Casing information (cased scripts only, according to ScriptMetadata.txt)
182         //      This will be in common/casing
183         Info scriptData = ScriptMetadata.getInfo(script);
184         if (scriptData.hasCase == Trinary.YES) {
185             if (hasFile(SpecialDir.casing, baseLanguage)) {
186                 result.add(CoreItems.casing);
187             } else {
188                 detailedErrors.put(CoreItems.casing, "//ldml/metadata/casingData/casingItem[@type=\"*\"]");
189             }
190         } else {
191             result.add(CoreItems.casing);
192         }
193         //      (N) Collation rules [non-Survey Tool]
194         //      For details, see cldr-spec/collation-guidelines.
195         //      The result will be a file like: common/collation/ar.xml or common/collation/da.xml.
196         //      Note that the "search" collators (which tend to be large) are not needed initially.
197 
198         // check for file cldr/collation/<language>.xml
199         if (hasFile(SpecialDir.collation, baseLanguage)) {
200             result.add(CoreItems.collation);
201         } else {
202             detailedErrors.put(CoreItems.collation, "//ldml/collations/collation[@type=\"standard\"]");
203         }
204 
205         Map<String, PreferredAndAllowedHour> timeData = sdi.getTimeData();
206         if (timeData.get(region) != null) {
207             result.add(CoreItems.time_cycle);
208         } else {
209             detailedErrors.put(CoreItems.time_cycle, "//supplementalData/timeData/hours");
210         }
211 
212         GrammarInfo grammarInfo = sdi.getGrammarInfo(locale);
213         if (grammarInfo != null) {
214             result.add(CoreItems.grammar);
215         } else {
216             detailedErrors.put(CoreItems.grammar, "//supplementalData/grammaticalData/grammaticalFeatures");
217         }
218 
219         // finalize
220         return ImmutableSet.copyOf(result);
221     }
222 
223     private static final String[][] ROMANIZATION_PATHS = {
224         { "", "-Latin" },
225         { "", "-Latin-BGN" },
226         { "Latin-", "" },
227     };
228 
229     private static final Relation SCRIPT_NAMES = Relation.of(new HashMap(), HashSet.class);
230     static {
231         SCRIPT_NAMES.putAll("Arab", Arrays.asList("Arabic", "Arab"));
232         SCRIPT_NAMES.putAll("Jpan", Arrays.asList("Jpan", "Han"));
233         SCRIPT_NAMES.putAll("Hant", Arrays.asList("Hant", "Han"));
234         SCRIPT_NAMES.putAll("Hans", Arrays.asList("Hans", "Han"));
235         SCRIPT_NAMES.putAll("Kore", Arrays.asList("Hang", "Hangul"));
SCRIPT_NAMES.freeze()236         SCRIPT_NAMES.freeze();
237     }
238 
getScriptNames(String script)239     private static Set<String> getScriptNames(String script) {
240         Set<String> result = SCRIPT_NAMES.get(script);
241         if (result != null) {
242             return result;
243         }
244         result = new HashSet();
245         String name = UScript.getName(UScript.getCodeFromName(script));
246         result.add(name);
247         result.add(script);
248         return result;
249     }
250 
251     private enum SpecialDir {
252         transforms, collation, casing
253     }
254 
255     private static final Relation<SpecialDir, String> SPECIAL_FILES = Relation.of(new EnumMap(SpecialDir.class), HashSet.class);
256     static {
257         for (SpecialDir dir : SpecialDir.values()) {
258             File realDir = new File(CLDR_BASE_DIRECTORY + "/common/" + dir);
259             for (String s : realDir.list()) {
260                 if (s.endsWith(".xml")) {
261                     s = s.substring(0, s.length() - 4);
262                 }
SPECIAL_FILES.put(dir, s)263                 SPECIAL_FILES.put(dir, s);
264             }
265         }
266     }
267 
hasFile(SpecialDir type, String filename)268     private static boolean hasFile(SpecialDir type, String filename) {
269         return SPECIAL_FILES.get(type).contains(filename);
270     }
271 }
272