1 package org.unicode.cldr.test; 2 3 import static java.util.Collections.disjoint; 4 5 import java.util.ArrayList; 6 import java.util.Collections; 7 import java.util.LinkedList; 8 import java.util.List; 9 import java.util.Map; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.SortedSet; 13 import java.util.TreeMap; 14 import java.util.TreeSet; 15 import java.util.regex.Matcher; 16 17 import org.unicode.cldr.tool.ToolConfig; 18 import org.unicode.cldr.util.Builder; 19 import org.unicode.cldr.util.CLDRConfig; 20 import org.unicode.cldr.util.CLDRFile; 21 import org.unicode.cldr.util.CLDRLocale; 22 import org.unicode.cldr.util.CLDRPaths; 23 import org.unicode.cldr.util.CldrUtility.VariableReplacer; 24 import org.unicode.cldr.util.LanguageTagParser; 25 import org.unicode.cldr.util.Level; 26 import org.unicode.cldr.util.PathHeader; 27 import org.unicode.cldr.util.PatternCache; 28 import org.unicode.cldr.util.RegexLookup; 29 import org.unicode.cldr.util.RegexLookup.Finder; 30 import org.unicode.cldr.util.RegexLookup.RegexFinder; 31 import org.unicode.cldr.util.SupplementalDataInfo; 32 import org.unicode.cldr.util.SupplementalDataInfo.ApprovalRequirementMatcher; 33 import org.unicode.cldr.util.SupplementalDataInfo.CoverageLevelInfo; 34 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo; 35 import org.unicode.cldr.util.XMLFileReader; 36 import org.unicode.cldr.util.XPathParts; 37 38 import com.ibm.icu.util.Output; 39 import com.ibm.icu.util.VersionInfo; 40 41 public class CoverageLevel2 { 42 43 // To modify the results, see /cldr/common/supplemental/coverageLevels.xml 44 45 /** 46 * Enable to get more verbose output when debugging 47 */ 48 private static final boolean DEBUG_LOOKUP = false; 49 50 private RegexLookup<Level> lookup = null; 51 52 enum SetMatchType { 53 Target_Language, Target_Scripts, Target_Territories, Target_TimeZones, Target_Currencies, Target_Plurals, Calendar_List 54 } 55 56 private static class LocaleSpecificInfo { 57 CoverageVariableInfo cvi; 58 String targetLanguage; 59 } 60 61 final LocaleSpecificInfo myInfo = new LocaleSpecificInfo(); 62 63 /** 64 * We define a regex finder for use in the lookup. It has extra tests based on the ci value and the cvi value, 65 * duplicating 66 * what was in SupplementalDataInfo. It uses the sets instead of converting to regex strings. 67 * 68 * @author markdavis 69 * 70 */ 71 public static class MyRegexFinder extends RegexFinder { 72 final private SetMatchType additionalMatch; 73 final private CoverageLevelInfo ci; 74 MyRegexFinder(String pattern, String additionalMatch, CoverageLevelInfo ci)75 public MyRegexFinder(String pattern, String additionalMatch, CoverageLevelInfo ci) { 76 super(pattern); 77 // remove the ${ and the }, and change - to _. 78 this.additionalMatch = additionalMatch == null 79 ? null 80 : SetMatchType.valueOf( 81 additionalMatch.substring(2, additionalMatch.length() - 1).replace('-', '_')); 82 this.ci = ci; 83 } 84 85 @Override find(String item, Object context, Info info)86 public boolean find(String item, Object context, Info info) { 87 LocaleSpecificInfo localeSpecificInfo = (LocaleSpecificInfo) context; 88 // Modified the logic to handle the case where we want specific languages and specific territories. 89 // Any match in language script or territory will succeed when multiple items are present. 90 boolean lstOK = false; 91 if (ci.inLanguage == null && ci.inScriptSet == null && ci.inTerritorySet == null) { 92 lstOK = true; 93 } else if (ci.inLanguage != null 94 && ci.inLanguage.matcher(localeSpecificInfo.targetLanguage).matches()) { 95 lstOK = true; 96 } else if (ci.inScriptSet != null 97 && !disjoint(ci.inScriptSet, localeSpecificInfo.cvi.targetScripts)) { 98 lstOK = true; 99 } else if (ci.inTerritorySet != null 100 && !disjoint(ci.inTerritorySet, localeSpecificInfo.cvi.targetTerritories)) { 101 lstOK = true; 102 } 103 104 if (!lstOK) { 105 return false; 106 } 107 boolean result = super.find(item, context, info); // also sets matcher in RegexFinder 108 if (!result) { 109 return false; 110 } 111 if (additionalMatch != null) { 112 String groupMatch = info.value[1]; 113 // String groupMatch = matcher.group(1); 114 // we match on a group, so get the right one 115 switch (additionalMatch) { 116 case Target_Language: 117 return localeSpecificInfo.targetLanguage.equals(groupMatch); 118 case Target_Scripts: 119 return localeSpecificInfo.cvi.targetScripts.contains(groupMatch); 120 case Target_Territories: 121 return localeSpecificInfo.cvi.targetTerritories.contains(groupMatch); 122 case Target_TimeZones: 123 return localeSpecificInfo.cvi.targetTimeZones.contains(groupMatch); 124 case Target_Currencies: 125 return localeSpecificInfo.cvi.targetCurrencies.contains(groupMatch); 126 // For Target_Plurals, we have to account for the fact that the @count= part might not be in the 127 // xpath, so we shouldn't reject the match because of that. ( i.e. The regex is usually 128 // ([@count='${Target-Plurals}'])? 129 case Target_Plurals: 130 return (groupMatch == null || 131 groupMatch.length() == 0 || localeSpecificInfo.cvi.targetPlurals.contains(groupMatch)); 132 case Calendar_List: 133 return localeSpecificInfo.cvi.calendars.contains(groupMatch); 134 } 135 } 136 137 return true; 138 } 139 140 @Override equals(Object obj)141 public boolean equals(Object obj) { 142 return false; 143 } 144 } 145 CoverageLevel2(SupplementalDataInfo sdi, String locale)146 private CoverageLevel2(SupplementalDataInfo sdi, String locale) { 147 myInfo.targetLanguage = new LanguageTagParser().set(locale).getLanguage(); 148 myInfo.cvi = sdi.getCoverageVariableInfo(myInfo.targetLanguage); 149 lookup = sdi.getCoverageLookup(); 150 } 151 CoverageLevel2(SupplementalDataInfo sdi, String locale, String ruleFile)152 private CoverageLevel2(SupplementalDataInfo sdi, String locale, String ruleFile) { 153 myInfo.targetLanguage = new LanguageTagParser().set(locale).getLanguage(); 154 myInfo.cvi = sdi.getCoverageVariableInfo(myInfo.targetLanguage); 155 RawCoverageFile rcf = new RawCoverageFile(); 156 lookup = rcf.load(ruleFile); 157 } 158 159 /** 160 * get an instance, using CldrUtility.SUPPLEMENTAL_DIRECTORY 161 * 162 * @param locale 163 * @return 164 * @deprecated Don't use this. call the version which takes a SupplementalDataInfo as an argument. 165 * @see #getInstance(SupplementalDataInfo, String) 166 * @see CLDRPaths#SUPPLEMENTAL_DIRECTORY 167 */ 168 @Deprecated getInstance(String locale)169 public static CoverageLevel2 getInstance(String locale) { 170 return new CoverageLevel2(SupplementalDataInfo.getInstance(), locale); 171 } 172 getInstance(SupplementalDataInfo sdi, String locale)173 public static CoverageLevel2 getInstance(SupplementalDataInfo sdi, String locale) { 174 return new CoverageLevel2(sdi, locale); 175 } 176 getInstance(SupplementalDataInfo sdi, String locale, String ruleFile)177 public static CoverageLevel2 getInstance(SupplementalDataInfo sdi, String locale, String ruleFile) { 178 return new CoverageLevel2(sdi, locale, ruleFile); 179 } 180 getLevel(String path)181 public Level getLevel(String path) { 182 if (path == null) { 183 return Level.UNDETERMINED; 184 } 185 synchronized (lookup) { // synchronize on the class, since the Matchers are changed during the matching process 186 Level result; 187 if (DEBUG_LOOKUP) { // for testing 188 Output<String[]> checkItems = new Output<>(); 189 Output<Finder> matcherFound = new Output<>(); 190 List<String> failures = new ArrayList<>(); 191 result = lookup.get(path, myInfo, checkItems, matcherFound, failures); 192 for (String s : failures) { 193 System.out.println(s); 194 } 195 } else { 196 result = lookup.get(path, myInfo, null); 197 } 198 return result == null ? Level.COMPREHENSIVE : result; 199 } 200 } 201 getIntLevel(String path)202 public int getIntLevel(String path) { 203 return getLevel(path).getLevel(); 204 } 205 206 // Moved code in from SupplementalInfo 207 // 208 // TODO: 209 // 1. drop the corresponding code in SupplementalInfo. 210 // 2. change SupplementalInfo to skip reading coverageLevels.xml 211 // 3. change the default creation of CoverageLevels2 to instead use this code with that file. 212 // Later 213 // 4. Generalize the RawCoverageFile code, and use with other supplemental files. 214 // That way supplemental files can be read as needed instead of all at once. 215 216 final private List<String> approvalRequirements = new LinkedList<>(); // xpath array 217 private VariableReplacer coverageVariables = new VariableReplacer(); 218 private SortedSet<CoverageLevelInfo> coverageLevels = new TreeSet<>(); 219 220 public class RawCoverageFile { 221 222 private VersionInfo cldrVersion; 223 224 class MyHandler extends XMLFileReader.SimpleHandler { 225 @Override handlePathValue(String path, String pathValue)226 public void handlePathValue(String path, String pathValue) { 227 XPathParts parts = XPathParts.getFrozenInstance(path); 228 String level1 = parts.size() < 2 ? null : parts.getElement(1); 229 if (level1.equals("version")) { 230 if (cldrVersion == null) { 231 String version = parts.getAttributeValue(1, "cldrVersion"); 232 if (version == null) { 233 version = parts.getAttributeValue(0, "version"); 234 } 235 cldrVersion = VersionInfo.getInstance(version); 236 } 237 } else if (parts.containsElement("approvalRequirement")) { 238 approvalRequirements.add(parts.toString()); 239 } else if (parts.containsElement("coverageLevel")) { 240 String match = parts.containsAttribute("match") ? coverageVariables.replace(parts.getAttributeValue(-1, 241 "match")) : null; 242 String valueStr = parts.getAttributeValue(-1, "value"); 243 // Ticket 7125: map the number to English. So switch from English to number for construction 244 valueStr = Integer.toString(Level.get(valueStr).getLevel()); 245 246 String inLanguage = parts.containsAttribute("inLanguage") ? coverageVariables.replace(parts 247 .getAttributeValue(-1, "inLanguage")) : null; 248 String inScript = parts.containsAttribute("inScript") ? coverageVariables.replace(parts 249 .getAttributeValue(-1, "inScript")) : null; 250 String inTerritory = parts.containsAttribute("inTerritory") ? coverageVariables.replace(parts 251 .getAttributeValue(-1, "inTerritory")) : null; 252 Integer value = (valueStr != null) ? Integer.valueOf(valueStr) : Integer.valueOf("101"); 253 if (cldrVersion.getMajor() < 2) { 254 value = 40; 255 } 256 CoverageLevelInfo ci = new CoverageLevelInfo(match, value, inLanguage, inScript, inTerritory); 257 coverageLevels.add(ci); 258 } else if (parts.containsElement("coverageVariable")) { 259 String key = parts.getAttributeValue(-1, "key"); 260 String value = parts.getAttributeValue(-1, "value"); 261 coverageVariables.add(key, value); 262 } 263 } 264 public void cleanup() { 265 CLDRConfig testInfo = ToolConfig.getToolInstance(); 266 SupplementalDataInfo supplementalDataInfo2 = testInfo.getSupplementalDataInfo(); 267 CoverageLevelInfo.fixEU(coverageLevels, supplementalDataInfo2); 268 coverageLevels = Collections.unmodifiableSortedSet(coverageLevels); 269 } 270 } 271 272 public RegexLookup<Level> makeCoverageLookup() { 273 RegexLookup<Level> lookup = new RegexLookup<>(RegexLookup.LookupType.STAR_PATTERN_LOOKUP); 274 275 Matcher variable = PatternCache.get("\\$\\{[A-Za-z][\\-A-Za-z]*\\}").matcher(""); 276 277 for (CoverageLevelInfo ci : coverageLevels) { 278 String pattern = ci.match.replace('\'', '"') 279 .replace("[@", "\\[@") // make sure that attributes are quoted 280 .replace("(", "(?:") // make sure that there are no capturing groups (beyond what we generate 281 .replace("(?:?!", "(?!"); // Allow negative lookahead 282 pattern = "^//ldml/" + pattern + "$"; // for now, force a complete match 283 String variableType = null; 284 variable.reset(pattern); 285 if (variable.find()) { 286 pattern = pattern.substring(0, variable.start()) + "([^\"]*)" + pattern.substring(variable.end()); 287 variableType = variable.group(); 288 if (variable.find()) { 289 throw new IllegalArgumentException("We can only handle a single variable on a line"); 290 } 291 } 292 293 // .replaceAll("\\]","\\\\]"); 294 lookup.add(new CoverageLevel2.MyRegexFinder(pattern, variableType, ci), ci.value); 295 } 296 return lookup; 297 } 298 299 public RegexLookup<Level> load (String file) { 300 MyHandler myHandler = new MyHandler(); 301 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 302 xfr.read(file, -1, true); 303 myHandler.cleanup(); 304 return makeCoverageLookup(); 305 } 306 } 307 308 // run these from first to last to get the approval info. 309 volatile List<ApprovalRequirementMatcher> approvalMatchers = null; 310 311 /** 312 * Get the preliminary number of required votes based on the given locale and PathHeader 313 * 314 * Important: this number may not agree with VoteResolver.getRequiredVotes 315 * since VoteResolver also takes the baseline status into account. 316 * 317 * Called by VoteResolver, ShowStarredCoverage, TestCoverage, and TestCoverageLevel. 318 * 319 * @param loc the CLDRLocale 320 * @param ph the PathHeader - which path this is applied to, or null if unknown. 321 * @return a number such as 4 or 8 322 */ 323 public int getRequiredVotes(CLDRLocale loc, PathHeader ph) { 324 if (approvalMatchers == null) { 325 approvalMatchers = ApprovalRequirementMatcher.buildAll(approvalRequirements); 326 } 327 328 for (ApprovalRequirementMatcher m : approvalMatchers) { 329 if (m.matches(loc, ph)) { 330 return m.getRequiredVotes(); 331 } 332 } 333 throw new RuntimeException("Error: " + loc + " " + ph + " ran off the end of the approvalMatchers."); 334 } 335 336 // TODO: move to separate tool 337 338 public static void main(String[] args) { 339 // Quick test during development to compare old to new coverageLevels 340 341 checkCoverage("root"); 342 checkCoverage("de"); 343 } 344 345 private static void checkCoverage(String locale) { 346 final CLDRConfig testInfo = ToolConfig.getToolInstance(); 347 final SupplementalDataInfo supplementalDataInfo2 = testInfo.getSupplementalDataInfo(); 348 349 CoverageLevel2 cvOld = CoverageLevel2.getInstance(supplementalDataInfo2, locale); 350 351 CoverageLevel2 cvNew = CoverageLevel2.getInstance(supplementalDataInfo2, locale, CLDRPaths.COMMON_DIRECTORY + "supplemental-temp/coverageLevels2.xml"); 352 353 CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true); 354 Set<String> paths = Builder.with(new TreeSet<String>()).addAll(cldrFile).get(); 355 PathHeader.Factory phf = PathHeader.getFactory(); 356 Map<PathHeader, String> diff = new TreeMap<>(); 357 Map<PathHeader, String> same = new TreeMap<>(); 358 for (String path : paths) { 359 Level levelOld = cvOld.getLevel(path); 360 Level levelNew = cvNew.getLevel(path); 361 if (levelOld != levelNew) { 362 diff.put(phf.fromPath(path), locale + "\t" + levelOld + "\t" + levelNew + "\t" + path); 363 } else if (levelOld.compareTo(Level.MODERATE) < 0){ 364 same.put(phf.fromPath(path), locale + "\t" + path); 365 } 366 } 367 System.out.println("\nLocale\tPath\tPathHeader"); 368 for (Entry<PathHeader, String> line : same.entrySet()) { 369 System.out.println(line.getValue() + "\t" + line.getKey()); 370 } 371 System.out.println("\nLocale\tOld\tNew\tPath\tPathHeader"); 372 for (Entry<PathHeader, String> line : diff.entrySet()) { 373 System.out.println(line.getValue() + "\t" + line.getKey()); 374 } 375 } 376 377 } 378