• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.test;
2 
3 import static java.util.Collections.disjoint;
4 
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.LinkedList;
8 import java.util.List;
9 import java.util.Map;
10 import java.util.Map.Entry;
11 import java.util.Set;
12 import java.util.SortedSet;
13 import java.util.TreeMap;
14 import java.util.TreeSet;
15 import java.util.regex.Matcher;
16 
17 import org.unicode.cldr.tool.ToolConfig;
18 import org.unicode.cldr.util.Builder;
19 import org.unicode.cldr.util.CLDRConfig;
20 import org.unicode.cldr.util.CLDRFile;
21 import org.unicode.cldr.util.CLDRLocale;
22 import org.unicode.cldr.util.CLDRPaths;
23 import org.unicode.cldr.util.CldrUtility.VariableReplacer;
24 import org.unicode.cldr.util.LanguageTagParser;
25 import org.unicode.cldr.util.Level;
26 import org.unicode.cldr.util.PathHeader;
27 import org.unicode.cldr.util.PatternCache;
28 import org.unicode.cldr.util.RegexLookup;
29 import org.unicode.cldr.util.RegexLookup.Finder;
30 import org.unicode.cldr.util.RegexLookup.RegexFinder;
31 import org.unicode.cldr.util.SupplementalDataInfo;
32 import org.unicode.cldr.util.SupplementalDataInfo.ApprovalRequirementMatcher;
33 import org.unicode.cldr.util.SupplementalDataInfo.CoverageLevelInfo;
34 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo;
35 import org.unicode.cldr.util.XMLFileReader;
36 import org.unicode.cldr.util.XPathParts;
37 
38 import com.ibm.icu.util.Output;
39 import com.ibm.icu.util.VersionInfo;
40 
41 public class CoverageLevel2 {
42 
43     // To modify the results, see /cldr/common/supplemental/coverageLevels.xml
44 
45     /**
46      * Enable to get more verbose output when debugging
47      */
48     private static final boolean DEBUG_LOOKUP = false;
49 
50     private RegexLookup<Level> lookup = null;
51 
52     enum SetMatchType {
53         Target_Language, Target_Scripts, Target_Territories, Target_TimeZones, Target_Currencies, Target_Plurals, Calendar_List
54     }
55 
56     private static class LocaleSpecificInfo {
57         CoverageVariableInfo cvi;
58         String targetLanguage;
59     }
60 
61     final LocaleSpecificInfo myInfo = new LocaleSpecificInfo();
62 
63     /**
64      * We define a regex finder for use in the lookup. It has extra tests based on the ci value and the cvi value,
65      * duplicating
66      * what was in SupplementalDataInfo. It uses the sets instead of converting to regex strings.
67      *
68      * @author markdavis
69      *
70      */
71     public static class MyRegexFinder extends RegexFinder {
72         final private SetMatchType additionalMatch;
73         final private CoverageLevelInfo ci;
74 
MyRegexFinder(String pattern, String additionalMatch, CoverageLevelInfo ci)75         public MyRegexFinder(String pattern, String additionalMatch, CoverageLevelInfo ci) {
76             super(pattern);
77             // remove the ${ and the }, and change - to _.
78             this.additionalMatch = additionalMatch == null
79                 ? null
80                     : SetMatchType.valueOf(
81                         additionalMatch.substring(2, additionalMatch.length() - 1).replace('-', '_'));
82             this.ci = ci;
83         }
84 
85         @Override
find(String item, Object context, Info info)86         public boolean find(String item, Object context, Info info) {
87             LocaleSpecificInfo localeSpecificInfo = (LocaleSpecificInfo) context;
88             // Modified the logic to handle the case where we want specific languages and specific territories.
89             // Any match in language script or territory will succeed when multiple items are present.
90             boolean lstOK = false;
91             if (ci.inLanguage == null && ci.inScriptSet == null && ci.inTerritorySet == null) {
92                 lstOK = true;
93             } else if (ci.inLanguage != null
94                 && ci.inLanguage.matcher(localeSpecificInfo.targetLanguage).matches()) {
95                 lstOK = true;
96             } else if (ci.inScriptSet != null
97                 && !disjoint(ci.inScriptSet, localeSpecificInfo.cvi.targetScripts)) {
98                 lstOK = true;
99             } else if (ci.inTerritorySet != null
100                 && !disjoint(ci.inTerritorySet, localeSpecificInfo.cvi.targetTerritories)) {
101                 lstOK = true;
102             }
103 
104             if (!lstOK) {
105                 return false;
106             }
107             boolean result = super.find(item, context, info); // also sets matcher in RegexFinder
108             if (!result) {
109                 return false;
110             }
111             if (additionalMatch != null) {
112                 String groupMatch = info.value[1];
113 //                    String groupMatch = matcher.group(1);
114                 // we match on a group, so get the right one
115                 switch (additionalMatch) {
116                 case Target_Language:
117                     return localeSpecificInfo.targetLanguage.equals(groupMatch);
118                 case Target_Scripts:
119                     return localeSpecificInfo.cvi.targetScripts.contains(groupMatch);
120                 case Target_Territories:
121                     return localeSpecificInfo.cvi.targetTerritories.contains(groupMatch);
122                 case Target_TimeZones:
123                     return localeSpecificInfo.cvi.targetTimeZones.contains(groupMatch);
124                 case Target_Currencies:
125                     return localeSpecificInfo.cvi.targetCurrencies.contains(groupMatch);
126                     // For Target_Plurals, we have to account for the fact that the @count= part might not be in the
127                     // xpath, so we shouldn't reject the match because of that. ( i.e. The regex is usually
128                     // ([@count='${Target-Plurals}'])?
129                 case Target_Plurals:
130                     return (groupMatch == null ||
131                     groupMatch.length() == 0 || localeSpecificInfo.cvi.targetPlurals.contains(groupMatch));
132                 case Calendar_List:
133                     return localeSpecificInfo.cvi.calendars.contains(groupMatch);
134                 }
135             }
136 
137             return true;
138         }
139 
140         @Override
equals(Object obj)141         public boolean equals(Object obj) {
142             return false;
143         }
144     }
145 
CoverageLevel2(SupplementalDataInfo sdi, String locale)146     private CoverageLevel2(SupplementalDataInfo sdi, String locale) {
147         myInfo.targetLanguage = new LanguageTagParser().set(locale).getLanguage();
148         myInfo.cvi = sdi.getCoverageVariableInfo(myInfo.targetLanguage);
149         lookup = sdi.getCoverageLookup();
150     }
151 
CoverageLevel2(SupplementalDataInfo sdi, String locale, String ruleFile)152     private CoverageLevel2(SupplementalDataInfo sdi, String locale, String ruleFile) {
153         myInfo.targetLanguage = new LanguageTagParser().set(locale).getLanguage();
154         myInfo.cvi = sdi.getCoverageVariableInfo(myInfo.targetLanguage);
155         RawCoverageFile rcf = new RawCoverageFile();
156         lookup = rcf.load(ruleFile);
157     }
158 
159     /**
160      * get an instance, using CldrUtility.SUPPLEMENTAL_DIRECTORY
161      *
162      * @param locale
163      * @return
164      * @deprecated Don't use this. call the version which takes a SupplementalDataInfo as an argument.
165      * @see #getInstance(SupplementalDataInfo, String)
166      * @see CLDRPaths#SUPPLEMENTAL_DIRECTORY
167      */
168     @Deprecated
getInstance(String locale)169     public static CoverageLevel2 getInstance(String locale) {
170         return new CoverageLevel2(SupplementalDataInfo.getInstance(), locale);
171     }
172 
getInstance(SupplementalDataInfo sdi, String locale)173     public static CoverageLevel2 getInstance(SupplementalDataInfo sdi, String locale) {
174         return new CoverageLevel2(sdi, locale);
175     }
176 
getInstance(SupplementalDataInfo sdi, String locale, String ruleFile)177     public static CoverageLevel2 getInstance(SupplementalDataInfo sdi, String locale, String ruleFile) {
178         return new CoverageLevel2(sdi, locale, ruleFile);
179     }
180 
getLevel(String path)181     public Level getLevel(String path) {
182         if (path == null) {
183             return Level.UNDETERMINED;
184         }
185         synchronized (lookup) { // synchronize on the class, since the Matchers are changed during the matching process
186             Level result;
187             if (DEBUG_LOOKUP) { // for testing
188                 Output<String[]> checkItems = new Output<>();
189                 Output<Finder> matcherFound = new Output<>();
190                 List<String> failures = new ArrayList<>();
191                 result = lookup.get(path, myInfo, checkItems, matcherFound, failures);
192                 for (String s : failures) {
193                     System.out.println(s);
194                 }
195             } else {
196                 result = lookup.get(path, myInfo, null);
197             }
198             return result == null ? Level.COMPREHENSIVE : result;
199         }
200     }
201 
getIntLevel(String path)202     public int getIntLevel(String path) {
203         return getLevel(path).getLevel();
204     }
205 
206     // Moved code in from SupplementalInfo
207     //
208     // TODO:
209     // 1. drop the corresponding code in SupplementalInfo.
210     // 2. change SupplementalInfo to skip reading coverageLevels.xml
211     // 3. change the default creation of CoverageLevels2 to instead use this code with that file.
212     // Later
213     // 4. Generalize the RawCoverageFile code, and use with other supplemental files.
214     //    That way supplemental files can be read as needed instead of all at once.
215 
216     final private List<String> approvalRequirements = new LinkedList<>(); // xpath array
217     private VariableReplacer coverageVariables = new VariableReplacer();
218     private SortedSet<CoverageLevelInfo> coverageLevels = new TreeSet<>();
219 
220     public class RawCoverageFile {
221 
222         private VersionInfo cldrVersion;
223 
224         class MyHandler extends XMLFileReader.SimpleHandler {
225             @Override
handlePathValue(String path, String pathValue)226             public void handlePathValue(String path, String pathValue) {
227                 XPathParts parts = XPathParts.getFrozenInstance(path);
228                 String level1 = parts.size() < 2 ? null : parts.getElement(1);
229                 if (level1.equals("version")) {
230                     if (cldrVersion == null) {
231                         String version = parts.getAttributeValue(1, "cldrVersion");
232                         if (version == null) {
233                             version = parts.getAttributeValue(0, "version");
234                         }
235                         cldrVersion = VersionInfo.getInstance(version);
236                     }
237                 } else if (parts.containsElement("approvalRequirement")) {
238                     approvalRequirements.add(parts.toString());
239                 } else if (parts.containsElement("coverageLevel")) {
240                     String match = parts.containsAttribute("match") ? coverageVariables.replace(parts.getAttributeValue(-1,
241                         "match")) : null;
242                     String valueStr = parts.getAttributeValue(-1, "value");
243                     // Ticket 7125: map the number to English. So switch from English to number for construction
244                     valueStr = Integer.toString(Level.get(valueStr).getLevel());
245 
246                     String inLanguage = parts.containsAttribute("inLanguage") ? coverageVariables.replace(parts
247                         .getAttributeValue(-1, "inLanguage")) : null;
248                     String inScript = parts.containsAttribute("inScript") ? coverageVariables.replace(parts
249                         .getAttributeValue(-1, "inScript")) : null;
250                     String inTerritory = parts.containsAttribute("inTerritory") ? coverageVariables.replace(parts
251                         .getAttributeValue(-1, "inTerritory")) : null;
252                     Integer value = (valueStr != null) ? Integer.valueOf(valueStr) : Integer.valueOf("101");
253                     if (cldrVersion.getMajor() < 2) {
254                         value = 40;
255                     }
256                     CoverageLevelInfo ci = new CoverageLevelInfo(match, value, inLanguage, inScript, inTerritory);
257                     coverageLevels.add(ci);
258                 } else if (parts.containsElement("coverageVariable")) {
259                     String key = parts.getAttributeValue(-1, "key");
260                     String value = parts.getAttributeValue(-1, "value");
261                     coverageVariables.add(key, value);
262                 }
263             }
264             public void cleanup() {
265                 CLDRConfig testInfo = ToolConfig.getToolInstance();
266                 SupplementalDataInfo supplementalDataInfo2 = testInfo.getSupplementalDataInfo();
267                 CoverageLevelInfo.fixEU(coverageLevels, supplementalDataInfo2);
268                 coverageLevels = Collections.unmodifiableSortedSet(coverageLevels);
269             }
270         }
271 
272         public RegexLookup<Level> makeCoverageLookup() {
273             RegexLookup<Level> lookup = new RegexLookup<>(RegexLookup.LookupType.STAR_PATTERN_LOOKUP);
274 
275             Matcher variable = PatternCache.get("\\$\\{[A-Za-z][\\-A-Za-z]*\\}").matcher("");
276 
277             for (CoverageLevelInfo ci : coverageLevels) {
278                 String pattern = ci.match.replace('\'', '"')
279                     .replace("[@", "\\[@") // make sure that attributes are quoted
280                     .replace("(", "(?:") // make sure that there are no capturing groups (beyond what we generate
281                     .replace("(?:?!", "(?!"); // Allow negative lookahead
282                 pattern = "^//ldml/" + pattern + "$"; // for now, force a complete match
283                 String variableType = null;
284                 variable.reset(pattern);
285                 if (variable.find()) {
286                     pattern = pattern.substring(0, variable.start()) + "([^\"]*)" + pattern.substring(variable.end());
287                     variableType = variable.group();
288                     if (variable.find()) {
289                         throw new IllegalArgumentException("We can only handle a single variable on a line");
290                     }
291                 }
292 
293                 // .replaceAll("\\]","\\\\]");
294                 lookup.add(new CoverageLevel2.MyRegexFinder(pattern, variableType, ci), ci.value);
295             }
296             return lookup;
297         }
298 
299         public RegexLookup<Level> load (String file) {
300             MyHandler myHandler = new MyHandler();
301             XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
302             xfr.read(file, -1, true);
303             myHandler.cleanup();
304             return makeCoverageLookup();
305         }
306     }
307 
308     // run these from first to last to get the approval info.
309     volatile List<ApprovalRequirementMatcher> approvalMatchers = null;
310 
311     /**
312      * Get the preliminary number of required votes based on the given locale and PathHeader
313      *
314      * Important: this number may not agree with VoteResolver.getRequiredVotes
315      * since VoteResolver also takes the baseline status into account.
316      *
317      * Called by VoteResolver, ShowStarredCoverage, TestCoverage, and TestCoverageLevel.
318      *
319      * @param loc the CLDRLocale
320      * @param ph the PathHeader - which path this is applied to, or null if unknown.
321      * @return a number such as 4 or 8
322      */
323     public int getRequiredVotes(CLDRLocale loc, PathHeader ph) {
324         if (approvalMatchers == null) {
325             approvalMatchers = ApprovalRequirementMatcher.buildAll(approvalRequirements);
326         }
327 
328         for (ApprovalRequirementMatcher m : approvalMatchers) {
329             if (m.matches(loc, ph)) {
330                 return m.getRequiredVotes();
331             }
332         }
333         throw new RuntimeException("Error: " + loc + " " + ph + " ran off the end of the approvalMatchers.");
334     }
335 
336     // TODO: move to separate tool
337 
338     public static void main(String[] args) {
339         // Quick test during development to compare old to new coverageLevels
340 
341         checkCoverage("root");
342         checkCoverage("de");
343     }
344 
345     private static void checkCoverage(String locale) {
346         final CLDRConfig testInfo = ToolConfig.getToolInstance();
347         final SupplementalDataInfo supplementalDataInfo2 = testInfo.getSupplementalDataInfo();
348 
349         CoverageLevel2 cvOld = CoverageLevel2.getInstance(supplementalDataInfo2, locale);
350 
351         CoverageLevel2 cvNew = CoverageLevel2.getInstance(supplementalDataInfo2, locale, CLDRPaths.COMMON_DIRECTORY + "supplemental-temp/coverageLevels2.xml");
352 
353         CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true);
354         Set<String> paths = Builder.with(new TreeSet<String>()).addAll(cldrFile).get();
355         PathHeader.Factory phf = PathHeader.getFactory();
356         Map<PathHeader, String> diff = new TreeMap<>();
357         Map<PathHeader, String> same = new TreeMap<>();
358         for (String path : paths) {
359             Level levelOld = cvOld.getLevel(path);
360             Level levelNew = cvNew.getLevel(path);
361             if (levelOld != levelNew) {
362                 diff.put(phf.fromPath(path), locale + "\t" + levelOld + "\t" + levelNew + "\t" + path);
363             } else if (levelOld.compareTo(Level.MODERATE) < 0){
364                 same.put(phf.fromPath(path), locale + "\t" + path);
365             }
366         }
367         System.out.println("\nLocale\tPath\tPathHeader");
368         for (Entry<PathHeader, String> line : same.entrySet()) {
369             System.out.println(line.getValue() + "\t" + line.getKey());
370         }
371         System.out.println("\nLocale\tOld\tNew\tPath\tPathHeader");
372         for (Entry<PathHeader, String> line : diff.entrySet()) {
373             System.out.println(line.getValue() + "\t" + line.getKey());
374         }
375     }
376 
377 }
378