1 package org.unicode.cldr.tool; 2 3 import com.ibm.icu.util.Output; 4 import java.io.File; 5 import java.io.PrintWriter; 6 import java.util.ArrayList; 7 import java.util.HashMap; 8 import java.util.Iterator; 9 import java.util.List; 10 import java.util.Map; 11 import java.util.Set; 12 import java.util.regex.Matcher; 13 import java.util.regex.Pattern; 14 import org.unicode.cldr.draft.FileUtilities; 15 import org.unicode.cldr.tool.Option.Options; 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRFile.DraftStatus; 19 import org.unicode.cldr.util.CLDRPaths; 20 import org.unicode.cldr.util.CoverageInfo; 21 import org.unicode.cldr.util.Factory; 22 import org.unicode.cldr.util.LocaleIDParser; 23 import org.unicode.cldr.util.Organization; 24 import org.unicode.cldr.util.PatternCache; 25 import org.unicode.cldr.util.RegexFileParser; 26 import org.unicode.cldr.util.RegexFileParser.RegexLineParser; 27 import org.unicode.cldr.util.RegexLookup; 28 import org.unicode.cldr.util.StandardCodes; 29 import org.unicode.cldr.util.XMLSource; 30 import org.unicode.cldr.util.XPathParts; 31 32 /** 33 * Factory for filtering CLDRFiles by organization and replacing certain values. Organization 34 * coverage data is in org/unicode/cldr/util/data/Locales.txt. 35 * 36 * @author jchye 37 */ 38 public class FilterFactory extends Factory { 39 /** Types of data modification supported. */ 40 private enum ModificationType { 41 xpath, 42 value; 43 } 44 45 private Factory rawFactory; 46 private String organization; 47 private boolean modifyValues; 48 49 private List<Modifier> modifiers = new ArrayList<>(); 50 51 /** 52 * Creates a new Factory for filtering CLDRFiles. 53 * 54 * @param rawFactory the factory to be filtered 55 * @param organization the organization that the filtering is catered towards 56 * @param modifyValues true if certain values in the data should be modified or replaced 57 */ FilterFactory(Factory rawFactory, String organization, boolean modifyValues)58 private FilterFactory(Factory rawFactory, String organization, boolean modifyValues) { 59 this.rawFactory = rawFactory; 60 this.organization = organization; 61 setSupplementalDirectory(rawFactory.getSupplementalDirectory()); 62 this.modifyValues = modifyValues; 63 } 64 load( Factory rawFactory, String organization, boolean usesAltValue)65 public static FilterFactory load( 66 Factory rawFactory, String organization, boolean usesAltValue) { 67 FilterFactory filterFactory = new FilterFactory(rawFactory, organization, usesAltValue); 68 filterFactory.loadModifiers("dataModifiers.txt"); 69 return filterFactory; 70 } 71 72 @Override getSourceDirectories()73 public File[] getSourceDirectories() { 74 return rawFactory.getSourceDirectories(); 75 } 76 77 @Override getSourceDirectoriesForLocale(String localeID)78 public List<File> getSourceDirectoriesForLocale(String localeID) { 79 return rawFactory.getSourceDirectoriesForLocale(localeID); 80 } 81 82 @Override handleMake( String localeID, boolean resolved, DraftStatus minimalDraftStatus)83 protected CLDRFile handleMake( 84 String localeID, boolean resolved, DraftStatus minimalDraftStatus) { 85 if (resolved) { 86 return new CLDRFile(makeResolvingSource(localeID, minimalDraftStatus)); 87 } else { 88 return filterCldrFile(localeID, minimalDraftStatus); 89 } 90 } 91 92 /** 93 * @return a filtered CLDRFile. 94 */ filterCldrFile(String localeID, DraftStatus minimalDraftStatus)95 private CLDRFile filterCldrFile(String localeID, DraftStatus minimalDraftStatus) { 96 CLDRFile rawFile = rawFactory.make(localeID, false, minimalDraftStatus).cloneAsThawed(); 97 98 filterAltValues(rawFile); 99 filterCoverage(rawFile); 100 removeRedundantPaths(rawFile); 101 registerXmlSource(rawFile); 102 return rawFile; 103 } 104 105 /** 106 * Replaces the value for certain XPaths with their alternate value. 107 * 108 * @param rawFile 109 */ filterAltValues(CLDRFile rawFile)110 private void filterAltValues(CLDRFile rawFile) { 111 if (!modifyValues) return; 112 113 for (Modifier modifier : modifiers) { 114 modifier = modifier.filterLocale(rawFile.getLocaleID()); 115 if (!modifier.isEmpty()) { 116 modifier.modifyFile(rawFile); 117 } 118 } 119 } 120 121 /** 122 * Filters a CLDRFile according to the specified organization's coverage level. 123 * 124 * @param rawFile 125 */ filterCoverage(CLDRFile rawFile)126 private void filterCoverage(CLDRFile rawFile) { 127 if (organization == null) return; 128 129 int minLevel = 130 StandardCodes.make() 131 .getLocaleCoverageLevel(organization, rawFile.getLocaleID()) 132 .getLevel(); 133 CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo(); 134 for (String xpath : rawFile) { 135 // Locale metadata shouldn't be stripped. 136 int level = covInfo.getCoverageValue(xpath, rawFile.getLocaleID()); 137 if (level > minLevel) { 138 rawFile.remove(xpath); 139 } 140 } 141 } 142 143 /** 144 * Removes paths with duplicate values that can be found elsewhere in the file. 145 * 146 * @param rawFile 147 */ removeRedundantPaths(CLDRFile rawFile)148 private void removeRedundantPaths(CLDRFile rawFile) { 149 if (organization == null || rawFile.getLocaleID().equals("root")) return; 150 151 String parent = LocaleIDParser.getParent(rawFile.getLocaleID()); 152 CLDRFile resolvedParent = rawFactory.make(parent, true); 153 List<String> duplicatePaths = new ArrayList<>(); 154 for (String xpath : rawFile) { 155 if (xpath.startsWith("//ldml/identity")) { 156 continue; 157 } 158 String value = rawFile.getStringValue(xpath); 159 // Remove count="x" if the value is equivalent to count="other". 160 if (xpath.contains("[@count=")) { 161 XPathParts parts = XPathParts.getFrozenInstance(xpath); 162 String count = parts.getAttributeValue(-1, "count"); 163 if (!count.equals("other")) { 164 parts = parts.cloneAsThawed(); 165 parts.setAttribute(-1, "count", "other"); 166 String otherPath = parts.toString(); 167 if (value.equals(rawFile.getStringValue(otherPath))) { 168 duplicatePaths.add(xpath); 169 continue; 170 } 171 } 172 } 173 // Remove xpaths with values also found in the parent. 174 String sourceLocale = resolvedParent.getSourceLocaleID(xpath, null); 175 if (!sourceLocale.equals(XMLSource.CODE_FALLBACK_ID)) { 176 String parentValue = resolvedParent.getStringValue(xpath); 177 if (value.equals(parentValue)) { 178 duplicatePaths.add(xpath); 179 } 180 } 181 } 182 for (String xpath : duplicatePaths) { 183 rawFile.remove(xpath); 184 } 185 } 186 187 @Override getMinimalDraftStatus()188 public DraftStatus getMinimalDraftStatus() { 189 return rawFactory.getMinimalDraftStatus(); 190 } 191 192 @Override handleGetAvailable()193 protected Set<String> handleGetAvailable() { 194 return rawFactory.getAvailable(); 195 } 196 197 /** Wrapper class for holding information about a value modification entry. */ 198 private class ModifierEntry { 199 String oldValue; 200 String newValue; 201 Map<String, String> options; 202 ModifierEntry(String oldValue, String newValue, Map<String, String> options)203 public ModifierEntry(String oldValue, String newValue, Map<String, String> options) { 204 this.oldValue = oldValue; 205 this.newValue = newValue; 206 this.options = options; 207 } 208 209 /** 210 * @param locale the locale to be matched 211 * @return true if the locale matches the locale filter in this entry. 212 */ localeMatches(String locale)213 public boolean localeMatches(String locale) { 214 String pattern = options.get("locale"); 215 return pattern == null ? true : locale.matches(pattern); 216 } 217 } 218 219 /** Class for performing a specific type of data modification on a CLDRFile. */ 220 private abstract class Modifier { 221 protected List<ModifierEntry> entries = new ArrayList<>(); 222 modifyFile(CLDRFile file)223 public abstract void modifyFile(CLDRFile file); 224 filterLocale(String locale)225 public abstract Modifier filterLocale(String locale); 226 227 /** 228 * @return the list of modifiers meant for the specified locale. 229 */ getModifiersForLocale(String locale)230 protected List<ModifierEntry> getModifiersForLocale(String locale) { 231 List<ModifierEntry> newFilters = new ArrayList<>(); 232 for (ModifierEntry filter : entries) { 233 if (filter.localeMatches(locale)) { 234 newFilters.add(filter); 235 } 236 } 237 return newFilters; 238 } 239 240 /** 241 * @param filter 242 */ addModifierEntry(ModifierEntry entry)243 public void addModifierEntry(ModifierEntry entry) { 244 entries.add(entry); 245 } 246 isEmpty()247 public boolean isEmpty() { 248 return entries.size() == 0; 249 } 250 } 251 252 /** Maps the value of an XPath onto another XPath. */ 253 private class PathModifier extends Modifier { 254 @Override modifyFile(CLDRFile file)255 public void modifyFile(CLDRFile file) { 256 // For certain alternate values, use them as the main values. 257 for (ModifierEntry entry : entries) { 258 String oldPath = entry.oldValue; 259 String value = file.getStringValue(oldPath); 260 if (value != null) { 261 String newPath = entry.newValue; 262 file.add(newPath, value); 263 file.remove(oldPath); 264 } 265 } 266 } 267 268 @Override filterLocale(String locale)269 public Modifier filterLocale(String locale) { 270 PathModifier newModifier = new PathModifier(); 271 newModifier.entries = getModifiersForLocale(locale); 272 return newModifier; 273 } 274 } 275 276 /** Replaces certain values with other values. */ 277 private class ValueModifier extends Modifier { 278 @Override modifyFile(CLDRFile file)279 public void modifyFile(CLDRFile file) { 280 // Replace values. 281 for (ModifierEntry entry : entries) { 282 String filteringPath = entry.options.get("xpath"); 283 if (filteringPath != null && isValidXPath(filteringPath)) { 284 // For non-regex XPaths, look them up directly. 285 String value = file.getStringValue(filteringPath); 286 if (value != null) { 287 value = value.replaceAll(entry.oldValue, entry.newValue); 288 file.add(filteringPath, value); 289 } 290 } else { 291 Iterator<String> iterator = file.iterator(); 292 if (filteringPath != null) { 293 Matcher matcher = PatternCache.get(filteringPath).matcher(""); 294 iterator = file.iterator(matcher); 295 } 296 while (iterator.hasNext()) { 297 String xpath = iterator.next(); 298 String originalValue = file.getStringValue(xpath); 299 String value = originalValue.replaceAll(entry.oldValue, entry.newValue); 300 if (!value.equals(originalValue)) { 301 file.add(xpath, value); 302 } 303 } 304 } 305 } 306 } 307 308 @Override filterLocale(String locale)309 public Modifier filterLocale(String locale) { 310 ValueModifier newModifier = new ValueModifier(); 311 newModifier.entries = getModifiersForLocale(locale); 312 return newModifier; 313 } 314 } 315 316 /** Maps the value of XPaths onto other XPaths using regexes. */ 317 private class PathRegexModifier extends Modifier { 318 private RegexLookup<String> xpathLookup = new RegexLookup<>(); 319 320 @Override addModifierEntry(ModifierEntry entry)321 public void addModifierEntry(ModifierEntry entry) { 322 super.addModifierEntry(entry); 323 xpathLookup.add(entry.oldValue, entry.newValue); 324 } 325 326 @Override modifyFile(CLDRFile file)327 public void modifyFile(CLDRFile file) { 328 if (xpathLookup.size() > 0) { 329 Output<String[]> arguments = new Output<>(); 330 for (String xpath : file) { 331 String newValue = xpathLookup.get(xpath, null, arguments, null, null); 332 if (newValue != null) { 333 String newPath = RegexLookup.replace(newValue, arguments.value); 334 String value = file.getStringValue(xpath); 335 file.add(newPath, value); 336 file.remove(xpath); 337 } 338 } 339 } 340 } 341 342 @Override filterLocale(String locale)343 public Modifier filterLocale(String locale) { 344 PathRegexModifier newModifier = new PathRegexModifier(); 345 newModifier.entries = getModifiersForLocale(locale); 346 for (ModifierEntry entry : newModifier.entries) { 347 newModifier.xpathLookup.add(entry.oldValue, entry.newValue); 348 } 349 return newModifier; 350 } 351 } 352 353 /** Loads modifiers from a specified file. */ loadModifiers(String filename)354 private void loadModifiers(String filename) { 355 if (!modifyValues) return; 356 final Modifier pathModifier = new PathModifier(); 357 final Modifier pathRegexModifier = new PathRegexModifier(); 358 final Modifier valueModifier = new ValueModifier(); 359 RegexFileParser fileParser = new RegexFileParser(); 360 fileParser.setLineParser( 361 new RegexLineParser() { 362 @Override 363 public void parse(String line) { 364 String[] contents = line.split("\\s*+;\\s*+"); 365 ModificationType filterType = ModificationType.valueOf(contents[0]); 366 String oldValue = contents[1]; 367 String newValue = contents[2]; 368 // Process remaining options. 369 Map<String, String> options = new HashMap<>(); 370 for (int i = 3; i < contents.length; i++) { 371 String rawLine = contents[i]; 372 int pos = rawLine.indexOf('='); 373 if (pos < 0) { 374 throw new IllegalArgumentException("Invalid option: " + rawLine); 375 } 376 String optionType = rawLine.substring(0, pos).trim(); 377 options.put(optionType, rawLine.substring(pos + 1).trim()); 378 } 379 380 switch (filterType) { 381 case xpath: 382 if (isValidXPath(oldValue)) { 383 pathModifier.addModifierEntry( 384 new ModifierEntry(oldValue, newValue, options)); 385 } else { 386 pathRegexModifier.addModifierEntry( 387 new ModifierEntry( 388 fixXPathRegex(oldValue), newValue, options)); 389 } 390 break; 391 case value: 392 String xpath = options.get("xpath"); 393 if (xpath != null && !isValidXPath(xpath)) { 394 options.put("xpath", fixXPathRegex(xpath)); 395 } 396 valueModifier.addModifierEntry( 397 new ModifierEntry(oldValue, newValue, options)); 398 break; 399 } 400 } 401 }); 402 fileParser.parse(FilterFactory.class, filename); 403 modifiers.add(pathModifier); 404 modifiers.add(pathRegexModifier); 405 modifiers.add(valueModifier); 406 } 407 408 private Pattern XPATH_PATTERN = PatternCache.get("/(/\\w++(\\[@\\w++=\"[^\"()%\\\\]+\"])*)++"); 409 410 /** 411 * @param path 412 * @return true if path is a valid XPath and not a regex. 413 */ isValidXPath(String path)414 private boolean isValidXPath(String path) { 415 return XPATH_PATTERN.matcher(path).matches(); 416 } 417 418 /** 419 * Converts an xpath into a proper regex pattern. 420 * 421 * @param path 422 * @return 423 */ fixXPathRegex(String path)424 private String fixXPathRegex(String path) { 425 return '^' + path.replace("[@", "\\[@"); 426 } 427 428 private static final Options options = 429 new Options( 430 "Filters CLDR XML files according to orgnizational coverage levels and an " 431 + "input file of replacement values/xpaths.") 432 // .add("org", 'o', ".*", "google", "The organization that the filtering 433 // is for. If set, also removes duplicate paths.") 434 .add( 435 "org", 436 'o', 437 ".*", 438 Organization.cldr.name(), 439 "The organization that the filtering is for. If set, also removes duplicate paths.") 440 .add( 441 "locales", 442 'l', 443 ".*", 444 ".*", 445 "A regular expression indicating the locales to be filtered"); 446 447 /** 448 * Run FilterFactory for a specific organization. 449 * 450 * @param args 451 * @throws Exception 452 */ main(String[] args)453 public static void main(String[] args) throws Exception { 454 options.parse(args, true); 455 Factory rawFactory = 456 Factory.make(CLDRPaths.MAIN_DIRECTORY, options.get("locales").getValue()); 457 String org = options.get("org").getValue(); 458 FilterFactory filterFactory = FilterFactory.load(rawFactory, org, true); 459 String outputDir = CLDRPaths.GEN_DIRECTORY + "/filter"; 460 for (String locale : rawFactory.getAvailable()) { 461 try (PrintWriter out = FileUtilities.openUTF8Writer(outputDir, locale + ".xml"); ) { 462 filterFactory.make(locale, false).write(out); 463 } 464 // out.close(); 465 } 466 } 467 } 468