1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.PrintWriter; 5 import java.util.ArrayList; 6 import java.util.HashMap; 7 import java.util.Iterator; 8 import java.util.List; 9 import java.util.Map; 10 import java.util.Set; 11 import java.util.regex.Matcher; 12 import java.util.regex.Pattern; 13 14 import org.unicode.cldr.draft.FileUtilities; 15 import org.unicode.cldr.tool.Option.Options; 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRFile.DraftStatus; 19 import org.unicode.cldr.util.CLDRPaths; 20 import org.unicode.cldr.util.CoverageInfo; 21 import org.unicode.cldr.util.Factory; 22 import org.unicode.cldr.util.LocaleIDParser; 23 import org.unicode.cldr.util.Organization; 24 import org.unicode.cldr.util.PatternCache; 25 import org.unicode.cldr.util.RegexFileParser; 26 import org.unicode.cldr.util.RegexFileParser.RegexLineParser; 27 import org.unicode.cldr.util.RegexLookup; 28 import org.unicode.cldr.util.StandardCodes; 29 import org.unicode.cldr.util.XMLSource; 30 import org.unicode.cldr.util.XPathParts; 31 32 import com.ibm.icu.util.Output; 33 34 /** 35 * Factory for filtering CLDRFiles by organization and replacing certain values. 36 * Organization coverage data is in org/unicode/cldr/util/data/Locales.txt. 37 * 38 * @author jchye 39 */ 40 public class FilterFactory extends Factory { 41 /** 42 * Types of data modification supported. 43 */ 44 private enum ModificationType { 45 xpath, value; 46 } 47 48 private Factory rawFactory; 49 private String organization; 50 private boolean modifyValues; 51 52 private List<Modifier> modifiers = new ArrayList<Modifier>(); 53 54 /** 55 * Creates a new Factory for filtering CLDRFiles. 56 * 57 * @param rawFactory 58 * the factory to be filtered 59 * @param organization 60 * the organization that the filtering is catered towards 61 * @param modifyValues 62 * true if certain values in the data should be modified or replaced 63 */ FilterFactory(Factory rawFactory, String organization, boolean modifyValues)64 private FilterFactory(Factory rawFactory, String organization, boolean modifyValues) { 65 this.rawFactory = rawFactory; 66 this.organization = organization; 67 setSupplementalDirectory(rawFactory.getSupplementalDirectory()); 68 this.modifyValues = modifyValues; 69 } 70 load(Factory rawFactory, String organization, boolean usesAltValue)71 public static FilterFactory load(Factory rawFactory, String organization, boolean usesAltValue) { 72 FilterFactory filterFactory = new FilterFactory(rawFactory, organization, usesAltValue); 73 filterFactory.loadModifiers("dataModifiers.txt"); 74 return filterFactory; 75 } 76 77 @Override getSourceDirectories()78 public File[] getSourceDirectories() { 79 return rawFactory.getSourceDirectories(); 80 } 81 82 @Override getSourceDirectoriesForLocale(String localeID)83 public List<File> getSourceDirectoriesForLocale(String localeID) { 84 return rawFactory.getSourceDirectoriesForLocale(localeID); 85 } 86 87 @Override handleMake(String localeID, boolean resolved, DraftStatus minimalDraftStatus)88 protected CLDRFile handleMake(String localeID, boolean resolved, DraftStatus minimalDraftStatus) { 89 if (resolved) { 90 return new CLDRFile(makeResolvingSource(localeID, minimalDraftStatus)); 91 } else { 92 return filterCldrFile(localeID, minimalDraftStatus); 93 } 94 } 95 96 /** 97 * @return a filtered CLDRFile. 98 */ filterCldrFile(String localeID, DraftStatus minimalDraftStatus)99 private CLDRFile filterCldrFile(String localeID, DraftStatus minimalDraftStatus) { 100 CLDRFile rawFile = rawFactory.make(localeID, false, minimalDraftStatus).cloneAsThawed(); 101 102 filterAltValues(rawFile); 103 filterCoverage(rawFile); 104 removeRedundantPaths(rawFile); 105 return rawFile; 106 } 107 108 /** 109 * Replaces the value for certain XPaths with their alternate value. 110 * 111 * @param rawFile 112 */ filterAltValues(CLDRFile rawFile)113 private void filterAltValues(CLDRFile rawFile) { 114 if (!modifyValues) return; 115 116 for (Modifier modifier : modifiers) { 117 modifier = modifier.filterLocale(rawFile.getLocaleID()); 118 if (!modifier.isEmpty()) { 119 modifier.modifyFile(rawFile); 120 } 121 } 122 } 123 124 /** 125 * Filters a CLDRFile according to the specified organization's coverage level. 126 * 127 * @param rawFile 128 */ filterCoverage(CLDRFile rawFile)129 private void filterCoverage(CLDRFile rawFile) { 130 if (organization == null) return; 131 132 int minLevel = StandardCodes.make() 133 .getLocaleCoverageLevel(organization, rawFile.getLocaleID()) 134 .getLevel(); 135 CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo(); 136 for (String xpath : rawFile) { 137 // Locale metadata shouldn't be stripped. 138 int level = covInfo.getCoverageValue(xpath, rawFile.getLocaleID()); 139 if (level > minLevel) { 140 rawFile.remove(xpath); 141 } 142 } 143 } 144 145 /** 146 * Removes paths with duplicate values that can be found elsewhere in the file. 147 * @param rawFile 148 */ removeRedundantPaths(CLDRFile rawFile)149 private void removeRedundantPaths(CLDRFile rawFile) { 150 if (organization == null || rawFile.getLocaleID().equals("root")) return; 151 152 String parent = LocaleIDParser.getParent(rawFile.getLocaleID()); 153 CLDRFile resolvedParent = rawFactory.make(parent, true); 154 List<String> duplicatePaths = new ArrayList<String>(); 155 for (String xpath : rawFile) { 156 if (xpath.startsWith("//ldml/identity")) { 157 continue; 158 } 159 String value = rawFile.getStringValue(xpath); 160 // Remove count="x" if the value is equivalent to count="other". 161 if (xpath.contains("[@count=")) { 162 XPathParts parts = XPathParts.getInstance(xpath); // not frozen, for setAttribute 163 String count = parts.getAttributeValue(-1, "count"); 164 if (!count.equals("other")) { 165 parts.setAttribute(-1, "count", "other"); 166 String otherPath = parts.toString(); 167 if (value.equals(rawFile.getStringValue(otherPath))) { 168 duplicatePaths.add(xpath); 169 continue; 170 } 171 } 172 } 173 // Remove xpaths with values also found in the parent. 174 String sourceLocale = resolvedParent.getSourceLocaleID(xpath, null); 175 if (!sourceLocale.equals(XMLSource.CODE_FALLBACK_ID)) { 176 String parentValue = resolvedParent.getStringValue(xpath); 177 if (value.equals(parentValue)) { 178 duplicatePaths.add(xpath); 179 } 180 } 181 } 182 for (String xpath : duplicatePaths) { 183 rawFile.remove(xpath); 184 } 185 } 186 187 @Override getMinimalDraftStatus()188 public DraftStatus getMinimalDraftStatus() { 189 return rawFactory.getMinimalDraftStatus(); 190 } 191 192 @Override handleGetAvailable()193 protected Set<String> handleGetAvailable() { 194 return rawFactory.getAvailable(); 195 } 196 197 /** 198 * Wrapper class for holding information about a value modification entry. 199 */ 200 private class ModifierEntry { 201 String oldValue; 202 String newValue; 203 Map<String, String> options; 204 ModifierEntry(String oldValue, String newValue, Map<String, String> options)205 public ModifierEntry(String oldValue, String newValue, Map<String, String> options) { 206 this.oldValue = oldValue; 207 this.newValue = newValue; 208 this.options = options; 209 } 210 211 /** 212 * @param locale 213 * the locale to be matched 214 * @return true if the locale matches the locale filter in this entry. 215 */ localeMatches(String locale)216 public boolean localeMatches(String locale) { 217 String pattern = options.get("locale"); 218 return pattern == null ? true : locale.matches(pattern); 219 } 220 } 221 222 /** 223 * Class for performing a specific type of data modification on a CLDRFile. 224 */ 225 private abstract class Modifier { 226 protected List<ModifierEntry> entries = new ArrayList<ModifierEntry>(); 227 modifyFile(CLDRFile file)228 public abstract void modifyFile(CLDRFile file); 229 filterLocale(String locale)230 public abstract Modifier filterLocale(String locale); 231 232 /** 233 * @return the list of modifiers meant for the specified locale. 234 */ getModifiersForLocale(String locale)235 protected List<ModifierEntry> getModifiersForLocale(String locale) { 236 List<ModifierEntry> newFilters = new ArrayList<ModifierEntry>(); 237 for (ModifierEntry filter : entries) { 238 if (filter.localeMatches(locale)) { 239 newFilters.add(filter); 240 } 241 } 242 return newFilters; 243 } 244 245 /** 246 * 247 * @param filter 248 */ addModifierEntry(ModifierEntry entry)249 public void addModifierEntry(ModifierEntry entry) { 250 entries.add(entry); 251 } 252 isEmpty()253 public boolean isEmpty() { 254 return entries.size() == 0; 255 } 256 } 257 258 /** 259 * Maps the value of an XPath onto another XPath. 260 */ 261 private class PathModifier extends Modifier { 262 @Override modifyFile(CLDRFile file)263 public void modifyFile(CLDRFile file) { 264 // For certain alternate values, use them as the main values. 265 for (ModifierEntry entry : entries) { 266 String oldPath = entry.oldValue; 267 String value = file.getStringValue(oldPath); 268 if (value != null) { 269 String newPath = entry.newValue; 270 file.add(newPath, value); 271 file.remove(oldPath); 272 } 273 } 274 } 275 276 @Override filterLocale(String locale)277 public Modifier filterLocale(String locale) { 278 PathModifier newModifier = new PathModifier(); 279 newModifier.entries = getModifiersForLocale(locale); 280 return newModifier; 281 } 282 } 283 284 /** 285 * Replaces certain values with other values. 286 */ 287 private class ValueModifier extends Modifier { 288 @Override modifyFile(CLDRFile file)289 public void modifyFile(CLDRFile file) { 290 // Replace values. 291 for (ModifierEntry entry : entries) { 292 String filteringPath = entry.options.get("xpath"); 293 if (filteringPath != null && isValidXPath(filteringPath)) { 294 // For non-regex XPaths, look them up directly. 295 String value = file.getStringValue(filteringPath); 296 if (value != null) { 297 value = value.replaceAll(entry.oldValue, entry.newValue); 298 file.add(filteringPath, value); 299 } 300 } else { 301 Iterator<String> iterator = file.iterator(); 302 if (filteringPath != null) { 303 Matcher matcher = PatternCache.get(filteringPath).matcher(""); 304 iterator = file.iterator(matcher); 305 } 306 while (iterator.hasNext()) { 307 String xpath = iterator.next(); 308 String originalValue = file.getStringValue(xpath); 309 String value = originalValue.replaceAll(entry.oldValue, entry.newValue); 310 if (!value.equals(originalValue)) { 311 file.add(xpath, value); 312 } 313 } 314 } 315 } 316 } 317 318 @Override filterLocale(String locale)319 public Modifier filterLocale(String locale) { 320 ValueModifier newModifier = new ValueModifier(); 321 newModifier.entries = getModifiersForLocale(locale); 322 return newModifier; 323 } 324 } 325 326 /** 327 * Maps the value of XPaths onto other XPaths using regexes. 328 */ 329 private class PathRegexModifier extends Modifier { 330 private RegexLookup<String> xpathLookup = new RegexLookup<String>(); 331 332 @Override addModifierEntry(ModifierEntry entry)333 public void addModifierEntry(ModifierEntry entry) { 334 super.addModifierEntry(entry); 335 xpathLookup.add(entry.oldValue, entry.newValue); 336 } 337 338 @Override modifyFile(CLDRFile file)339 public void modifyFile(CLDRFile file) { 340 if (xpathLookup.size() > 0) { 341 Output<String[]> arguments = new Output<String[]>(); 342 for (String xpath : file) { 343 String newValue = xpathLookup.get(xpath, null, arguments, null, null); 344 if (newValue != null) { 345 String newPath = RegexLookup.replace(newValue, arguments.value); 346 String value = file.getStringValue(xpath); 347 file.add(newPath, value); 348 file.remove(xpath); 349 } 350 } 351 } 352 } 353 354 @Override filterLocale(String locale)355 public Modifier filterLocale(String locale) { 356 PathRegexModifier newModifier = new PathRegexModifier(); 357 newModifier.entries = getModifiersForLocale(locale); 358 for (ModifierEntry entry : newModifier.entries) { 359 newModifier.xpathLookup.add(entry.oldValue, entry.newValue); 360 } 361 return newModifier; 362 } 363 } 364 365 /** 366 * Loads modifiers from a specified file. 367 */ loadModifiers(String filename)368 private void loadModifiers(String filename) { 369 if (!modifyValues) return; 370 final Modifier pathModifier = new PathModifier(); 371 final Modifier pathRegexModifier = new PathRegexModifier(); 372 final Modifier valueModifier = new ValueModifier(); 373 RegexFileParser fileParser = new RegexFileParser(); 374 fileParser.setLineParser(new RegexLineParser() { 375 @Override 376 public void parse(String line) { 377 String[] contents = line.split("\\s*+;\\s*+"); 378 ModificationType filterType = ModificationType.valueOf(contents[0]); 379 String oldValue = contents[1]; 380 String newValue = contents[2]; 381 // Process remaining options. 382 Map<String, String> options = new HashMap<String, String>(); 383 for (int i = 3; i < contents.length; i++) { 384 String rawLine = contents[i]; 385 int pos = rawLine.indexOf('='); 386 if (pos < 0) { 387 throw new IllegalArgumentException("Invalid option: " + rawLine); 388 } 389 String optionType = rawLine.substring(0, pos).trim(); 390 options.put(optionType, rawLine.substring(pos + 1).trim()); 391 } 392 393 switch (filterType) { 394 case xpath: 395 if (isValidXPath(oldValue)) { 396 pathModifier.addModifierEntry(new ModifierEntry(oldValue, newValue, options)); 397 } else { 398 pathRegexModifier.addModifierEntry(new ModifierEntry(fixXPathRegex(oldValue), 399 newValue, options)); 400 } 401 break; 402 case value: 403 String xpath = options.get("xpath"); 404 if (xpath != null && !isValidXPath(xpath)) { 405 options.put("xpath", fixXPathRegex(xpath)); 406 } 407 valueModifier.addModifierEntry(new ModifierEntry(oldValue, newValue, options)); 408 break; 409 } 410 } 411 }); 412 fileParser.parse(FilterFactory.class, filename); 413 modifiers.add(pathModifier); 414 modifiers.add(pathRegexModifier); 415 modifiers.add(valueModifier); 416 } 417 418 private Pattern XPATH_PATTERN = PatternCache.get("/(/\\w++(\\[@\\w++=\"[^\"()%\\\\]+\"])*)++"); 419 420 /** 421 * @param path 422 * @return true if path is a valid XPath and not a regex. 423 */ isValidXPath(String path)424 private boolean isValidXPath(String path) { 425 return XPATH_PATTERN.matcher(path).matches(); 426 } 427 428 /** 429 * Converts an xpath into a proper regex pattern. 430 * 431 * @param path 432 * @return 433 */ fixXPathRegex(String path)434 private String fixXPathRegex(String path) { 435 return '^' + path.replace("[@", "\\[@"); 436 } 437 438 private static final Options options = new Options( 439 "Filters CLDR XML files according to orgnizational coverage levels and an " + 440 "input file of replacement values/xpaths.") 441 // .add("org", 'o', ".*", "google", "The organization that the filtering is for. If set, also removes duplicate paths.") 442 .add("org", 'o', ".*", Organization.cldr.name(), "The organization that the filtering is for. If set, also removes duplicate paths.") 443 .add("locales", 'l', ".*", ".*", "A regular expression indicating the locales to be filtered"); 444 445 /** 446 * Run FilterFactory for a specific organization. 447 * 448 * @param args 449 * @throws Exception 450 */ main(String[] args)451 public static void main(String[] args) throws Exception { 452 options.parse(args, true); 453 Factory rawFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, options.get("locales").getValue()); 454 String org = options.get("org").getValue(); 455 FilterFactory filterFactory = FilterFactory.load(rawFactory, org, true); 456 String outputDir = CLDRPaths.GEN_DIRECTORY + "/filter"; 457 for (String locale : rawFactory.getAvailable()) { 458 try (PrintWriter out = FileUtilities.openUTF8Writer(outputDir, locale + ".xml");) { 459 filterFactory.make(locale, false).write(out); 460 } 461 // out.close(); 462 } 463 } 464 } 465