1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.PrintWriter; 5 import java.util.ArrayList; 6 import java.util.HashMap; 7 import java.util.Iterator; 8 import java.util.List; 9 import java.util.Map; 10 import java.util.Set; 11 import java.util.regex.Matcher; 12 import java.util.regex.Pattern; 13 14 import org.unicode.cldr.draft.FileUtilities; 15 import org.unicode.cldr.tool.Option.Options; 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRFile.DraftStatus; 19 import org.unicode.cldr.util.CLDRPaths; 20 import org.unicode.cldr.util.CoverageInfo; 21 import org.unicode.cldr.util.Factory; 22 import org.unicode.cldr.util.LocaleIDParser; 23 import org.unicode.cldr.util.Organization; 24 import org.unicode.cldr.util.PatternCache; 25 import org.unicode.cldr.util.RegexFileParser; 26 import org.unicode.cldr.util.RegexFileParser.RegexLineParser; 27 import org.unicode.cldr.util.RegexLookup; 28 import org.unicode.cldr.util.StandardCodes; 29 import org.unicode.cldr.util.SupplementalDataInfo; 30 import org.unicode.cldr.util.XMLSource; 31 import org.unicode.cldr.util.XPathParts; 32 33 import com.ibm.icu.util.Output; 34 35 /** 36 * Factory for filtering CLDRFiles by organization and replacing certain values. 37 * Organization coverage data is in org/unicode/cldr/util/data/Locales.txt. 38 * 39 * @author jchye 40 */ 41 public class FilterFactory extends Factory { 42 /** 43 * Types of data modification supported. 44 */ 45 private enum ModificationType { 46 xpath, value; 47 } 48 49 private Factory rawFactory; 50 private String organization; 51 private SupplementalDataInfo supplementalData; 52 private boolean modifyValues; 53 54 private List<Modifier> modifiers = new ArrayList<Modifier>(); 55 56 /** 57 * Creates a new Factory for filtering CLDRFiles. 58 * 59 * @param rawFactory 60 * the factory to be filtered 61 * @param organization 62 * the organization that the filtering is catered towards 63 * @param modifyValues 64 * true if certain values in the data should be modified or replaced 65 */ FilterFactory(Factory rawFactory, String organization, boolean modifyValues)66 private FilterFactory(Factory rawFactory, String organization, boolean modifyValues) { 67 this.rawFactory = rawFactory; 68 this.organization = organization; 69 supplementalData = SupplementalDataInfo.getInstance(); 70 setSupplementalDirectory(rawFactory.getSupplementalDirectory()); 71 this.modifyValues = modifyValues; 72 } 73 load(Factory rawFactory, String organization, boolean usesAltValue)74 public static FilterFactory load(Factory rawFactory, String organization, boolean usesAltValue) { 75 FilterFactory filterFactory = new FilterFactory(rawFactory, organization, usesAltValue); 76 filterFactory.loadModifiers("dataModifiers.txt"); 77 return filterFactory; 78 } 79 80 @Override getSourceDirectories()81 public File[] getSourceDirectories() { 82 return rawFactory.getSourceDirectories(); 83 } 84 85 @Override getSourceDirectoriesForLocale(String localeID)86 public List<File> getSourceDirectoriesForLocale(String localeID) { 87 return rawFactory.getSourceDirectoriesForLocale(localeID); 88 } 89 90 @Override handleMake(String localeID, boolean resolved, DraftStatus minimalDraftStatus)91 protected CLDRFile handleMake(String localeID, boolean resolved, DraftStatus minimalDraftStatus) { 92 if (resolved) { 93 return new CLDRFile(makeResolvingSource(localeID, minimalDraftStatus)); 94 } else { 95 return filterCldrFile(localeID, minimalDraftStatus); 96 } 97 } 98 99 /** 100 * @return a filtered CLDRFile. 101 */ filterCldrFile(String localeID, DraftStatus minimalDraftStatus)102 private CLDRFile filterCldrFile(String localeID, DraftStatus minimalDraftStatus) { 103 CLDRFile rawFile = rawFactory.make(localeID, false, minimalDraftStatus).cloneAsThawed(); 104 105 filterAltValues(rawFile); 106 filterCoverage(rawFile); 107 removeRedundantPaths(rawFile); 108 return rawFile; 109 } 110 111 /** 112 * Replaces the value for certain XPaths with their alternate value. 113 * 114 * @param rawFile 115 */ filterAltValues(CLDRFile rawFile)116 private void filterAltValues(CLDRFile rawFile) { 117 if (!modifyValues) return; 118 119 for (Modifier modifier : modifiers) { 120 modifier = modifier.filterLocale(rawFile.getLocaleID()); 121 if (!modifier.isEmpty()) { 122 modifier.modifyFile(rawFile); 123 } 124 } 125 } 126 127 /** 128 * Filters a CLDRFile according to the specified organization's coverage level. 129 * 130 * @param rawFile 131 */ filterCoverage(CLDRFile rawFile)132 private void filterCoverage(CLDRFile rawFile) { 133 if (organization == null) return; 134 135 int minLevel = StandardCodes.make() 136 .getLocaleCoverageLevel(organization, rawFile.getLocaleID()) 137 .getLevel(); 138 CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo(); 139 for (String xpath : rawFile) { 140 // Locale metadata shouldn't be stripped. 141 int level = covInfo.getCoverageValue(xpath, rawFile.getLocaleID()); 142 if (level > minLevel) { 143 rawFile.remove(xpath); 144 } 145 } 146 } 147 148 /** 149 * Removes paths with duplicate values that can be found elsewhere in the file. 150 * @param rawFile 151 */ removeRedundantPaths(CLDRFile rawFile)152 private void removeRedundantPaths(CLDRFile rawFile) { 153 if (organization == null || rawFile.getLocaleID().equals("root")) return; 154 155 String parent = LocaleIDParser.getParent(rawFile.getLocaleID()); 156 CLDRFile resolvedParent = rawFactory.make(parent, true); 157 List<String> duplicatePaths = new ArrayList<String>(); 158 XPathParts parts = new XPathParts(); 159 for (String xpath : rawFile) { 160 if (xpath.startsWith("//ldml/identity")) continue; 161 String value = rawFile.getStringValue(xpath); 162 // Remove count="x" if the value is equivalent to count="other". 163 if (xpath.contains("[@count=")) { 164 parts.set(xpath); 165 String count = parts.getAttributeValue(-1, "count"); 166 if (!count.equals("other")) { 167 parts.setAttribute(-1, "count", "other"); 168 String otherPath = parts.toString(); 169 if (value.equals(rawFile.getStringValue(otherPath))) { 170 duplicatePaths.add(xpath); 171 continue; 172 } 173 } 174 } 175 // Remove xpaths with values also found in the parent. 176 String sourceLocale = resolvedParent.getSourceLocaleID(xpath, null); 177 if (!sourceLocale.equals(XMLSource.CODE_FALLBACK_ID)) { 178 String parentValue = resolvedParent.getStringValue(xpath); 179 if (value.equals(parentValue)) { 180 duplicatePaths.add(xpath); 181 } 182 } 183 } 184 for (String xpath : duplicatePaths) { 185 rawFile.remove(xpath); 186 } 187 } 188 189 @Override getMinimalDraftStatus()190 public DraftStatus getMinimalDraftStatus() { 191 return rawFactory.getMinimalDraftStatus(); 192 } 193 194 @Override handleGetAvailable()195 protected Set<String> handleGetAvailable() { 196 return rawFactory.getAvailable(); 197 } 198 199 /** 200 * Wrapper class for holding information about a value modification entry. 201 */ 202 private class ModifierEntry { 203 String oldValue; 204 String newValue; 205 Map<String, String> options; 206 ModifierEntry(String oldValue, String newValue, Map<String, String> options)207 public ModifierEntry(String oldValue, String newValue, Map<String, String> options) { 208 this.oldValue = oldValue; 209 this.newValue = newValue; 210 this.options = options; 211 } 212 213 /** 214 * @param locale 215 * the locale to be matched 216 * @return true if the locale matches the locale filter in this entry. 217 */ localeMatches(String locale)218 public boolean localeMatches(String locale) { 219 String pattern = options.get("locale"); 220 return pattern == null ? true : locale.matches(pattern); 221 } 222 } 223 224 /** 225 * Class for performing a specific type of data modification on a CLDRFile. 226 */ 227 private abstract class Modifier { 228 protected List<ModifierEntry> entries = new ArrayList<ModifierEntry>(); 229 modifyFile(CLDRFile file)230 public abstract void modifyFile(CLDRFile file); 231 filterLocale(String locale)232 public abstract Modifier filterLocale(String locale); 233 234 /** 235 * @return the list of modifiers meant for the specified locale. 236 */ getModifiersForLocale(String locale)237 protected List<ModifierEntry> getModifiersForLocale(String locale) { 238 List<ModifierEntry> newFilters = new ArrayList<ModifierEntry>(); 239 for (ModifierEntry filter : entries) { 240 if (filter.localeMatches(locale)) { 241 newFilters.add(filter); 242 } 243 } 244 return newFilters; 245 } 246 247 /** 248 * 249 * @param filter 250 */ addModifierEntry(ModifierEntry entry)251 public void addModifierEntry(ModifierEntry entry) { 252 entries.add(entry); 253 } 254 isEmpty()255 public boolean isEmpty() { 256 return entries.size() == 0; 257 } 258 } 259 260 /** 261 * Maps the value of an XPath onto another XPath. 262 */ 263 private class PathModifier extends Modifier { 264 @Override modifyFile(CLDRFile file)265 public void modifyFile(CLDRFile file) { 266 // For certain alternate values, use them as the main values. 267 for (ModifierEntry entry : entries) { 268 String oldPath = entry.oldValue; 269 String value = file.getStringValue(oldPath); 270 if (value != null) { 271 String newPath = entry.newValue; 272 file.add(newPath, value); 273 file.remove(oldPath); 274 } 275 } 276 } 277 278 @Override filterLocale(String locale)279 public Modifier filterLocale(String locale) { 280 PathModifier newModifier = new PathModifier(); 281 newModifier.entries = getModifiersForLocale(locale); 282 return newModifier; 283 } 284 } 285 286 /** 287 * Replaces certain values with other values. 288 */ 289 private class ValueModifier extends Modifier { 290 @Override modifyFile(CLDRFile file)291 public void modifyFile(CLDRFile file) { 292 // Replace values. 293 for (ModifierEntry entry : entries) { 294 String filteringPath = entry.options.get("xpath"); 295 if (filteringPath != null && isValidXPath(filteringPath)) { 296 // For non-regex XPaths, look them up directly. 297 String value = file.getStringValue(filteringPath); 298 if (value != null) { 299 value = value.replaceAll(entry.oldValue, entry.newValue); 300 file.add(filteringPath, value); 301 } 302 } else { 303 Iterator<String> iterator = file.iterator(); 304 if (filteringPath != null) { 305 Matcher matcher = PatternCache.get(filteringPath).matcher(""); 306 iterator = file.iterator(matcher); 307 } 308 while (iterator.hasNext()) { 309 String xpath = iterator.next(); 310 String originalValue = file.getStringValue(xpath); 311 String value = originalValue.replaceAll(entry.oldValue, entry.newValue); 312 if (!value.equals(originalValue)) { 313 file.add(xpath, value); 314 } 315 } 316 } 317 } 318 } 319 320 @Override filterLocale(String locale)321 public Modifier filterLocale(String locale) { 322 ValueModifier newModifier = new ValueModifier(); 323 newModifier.entries = getModifiersForLocale(locale); 324 return newModifier; 325 } 326 } 327 328 /** 329 * Maps the value of XPaths onto other XPaths using regexes. 330 */ 331 private class PathRegexModifier extends Modifier { 332 private RegexLookup<String> xpathLookup = new RegexLookup<String>(); 333 334 @Override addModifierEntry(ModifierEntry entry)335 public void addModifierEntry(ModifierEntry entry) { 336 super.addModifierEntry(entry); 337 xpathLookup.add(entry.oldValue, entry.newValue); 338 } 339 340 @Override modifyFile(CLDRFile file)341 public void modifyFile(CLDRFile file) { 342 if (xpathLookup.size() > 0) { 343 Output<String[]> arguments = new Output<String[]>(); 344 for (String xpath : file) { 345 String newValue = xpathLookup.get(xpath, null, arguments, null, null); 346 if (newValue != null) { 347 String newPath = RegexLookup.replace(newValue, arguments.value); 348 String value = file.getStringValue(xpath); 349 file.add(newPath, value); 350 file.remove(xpath); 351 } 352 } 353 } 354 } 355 356 @Override filterLocale(String locale)357 public Modifier filterLocale(String locale) { 358 PathRegexModifier newModifier = new PathRegexModifier(); 359 newModifier.entries = getModifiersForLocale(locale); 360 for (ModifierEntry entry : newModifier.entries) { 361 newModifier.xpathLookup.add(entry.oldValue, entry.newValue); 362 } 363 return newModifier; 364 } 365 } 366 367 /** 368 * Loads modifiers from a specified file. 369 */ loadModifiers(String filename)370 private void loadModifiers(String filename) { 371 if (!modifyValues) return; 372 final Modifier pathModifier = new PathModifier(); 373 final Modifier pathRegexModifier = new PathRegexModifier(); 374 final Modifier valueModifier = new ValueModifier(); 375 RegexFileParser fileParser = new RegexFileParser(); 376 fileParser.setLineParser(new RegexLineParser() { 377 @Override 378 public void parse(String line) { 379 String[] contents = line.split("\\s*+;\\s*+"); 380 ModificationType filterType = ModificationType.valueOf(contents[0]); 381 String oldValue = contents[1]; 382 String newValue = contents[2]; 383 // Process remaining options. 384 Map<String, String> options = new HashMap<String, String>(); 385 for (int i = 3; i < contents.length; i++) { 386 String rawLine = contents[i]; 387 int pos = rawLine.indexOf('='); 388 if (pos < 0) { 389 throw new IllegalArgumentException("Invalid option: " + rawLine); 390 } 391 String optionType = rawLine.substring(0, pos).trim(); 392 options.put(optionType, rawLine.substring(pos + 1).trim()); 393 } 394 395 switch (filterType) { 396 case xpath: 397 if (isValidXPath(oldValue)) { 398 pathModifier.addModifierEntry(new ModifierEntry(oldValue, newValue, options)); 399 } else { 400 pathRegexModifier.addModifierEntry(new ModifierEntry(fixXPathRegex(oldValue), 401 newValue, options)); 402 } 403 break; 404 case value: 405 String xpath = options.get("xpath"); 406 if (xpath != null && !isValidXPath(xpath)) { 407 options.put("xpath", fixXPathRegex(xpath)); 408 } 409 valueModifier.addModifierEntry(new ModifierEntry(oldValue, newValue, options)); 410 break; 411 } 412 } 413 }); 414 fileParser.parse(FilterFactory.class, filename); 415 modifiers.add(pathModifier); 416 modifiers.add(pathRegexModifier); 417 modifiers.add(valueModifier); 418 } 419 420 private Pattern XPATH_PATTERN = PatternCache.get("/(/\\w++(\\[@\\w++=\"[^\"()%\\\\]+\"])*)++"); 421 422 /** 423 * @param path 424 * @return true if path is a valid XPath and not a regex. 425 */ isValidXPath(String path)426 private boolean isValidXPath(String path) { 427 return XPATH_PATTERN.matcher(path).matches(); 428 } 429 430 /** 431 * Converts an xpath into a proper regex pattern. 432 * 433 * @param path 434 * @return 435 */ fixXPathRegex(String path)436 private String fixXPathRegex(String path) { 437 return '^' + path.replace("[@", "\\[@"); 438 } 439 440 private static final Options options = new Options( 441 "Filters CLDR XML files according to orgnizational coverage levels and an " + 442 "input file of replacement values/xpaths.") 443 // .add("org", 'o', ".*", "google", "The organization that the filtering is for. If set, also removes duplicate paths.") 444 .add("org", 'o', ".*", Organization.cldr.name(), "The organization that the filtering is for. If set, also removes duplicate paths.") 445 .add("locales", 'l', ".*", ".*", "A regular expression indicating the locales to be filtered"); 446 447 /** 448 * Run FilterFactory for a specific organization. 449 * 450 * @param args 451 * @throws Exception 452 */ main(String[] args)453 public static void main(String[] args) throws Exception { 454 options.parse(args, true); 455 Factory rawFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, options.get("locales").getValue()); 456 String org = options.get("org").getValue(); 457 FilterFactory filterFactory = FilterFactory.load(rawFactory, org, true); 458 String outputDir = CLDRPaths.GEN_DIRECTORY + "/filter"; 459 for (String locale : rawFactory.getAvailable()) { 460 try (PrintWriter out = FileUtilities.openUTF8Writer(outputDir, locale + ".xml");) { 461 filterFactory.make(locale, false).write(out); 462 } 463 // out.close(); 464 } 465 } 466 } 467