• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.io.PrintWriter;
5 import java.util.ArrayList;
6 import java.util.HashMap;
7 import java.util.Iterator;
8 import java.util.List;
9 import java.util.Map;
10 import java.util.Set;
11 import java.util.regex.Matcher;
12 import java.util.regex.Pattern;
13 
14 import org.unicode.cldr.draft.FileUtilities;
15 import org.unicode.cldr.tool.Option.Options;
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRFile.DraftStatus;
19 import org.unicode.cldr.util.CLDRPaths;
20 import org.unicode.cldr.util.CoverageInfo;
21 import org.unicode.cldr.util.Factory;
22 import org.unicode.cldr.util.LocaleIDParser;
23 import org.unicode.cldr.util.Organization;
24 import org.unicode.cldr.util.PatternCache;
25 import org.unicode.cldr.util.RegexFileParser;
26 import org.unicode.cldr.util.RegexFileParser.RegexLineParser;
27 import org.unicode.cldr.util.RegexLookup;
28 import org.unicode.cldr.util.StandardCodes;
29 import org.unicode.cldr.util.XMLSource;
30 import org.unicode.cldr.util.XPathParts;
31 
32 import com.ibm.icu.util.Output;
33 
34 /**
35  * Factory for filtering CLDRFiles by organization and replacing certain values.
36  * Organization coverage data is in org/unicode/cldr/util/data/Locales.txt.
37  *
38  * @author jchye
39  */
40 public class FilterFactory extends Factory {
41     /**
42      * Types of data modification supported.
43      */
44     private enum ModificationType {
45         xpath, value;
46     }
47 
48     private Factory rawFactory;
49     private String organization;
50     private boolean modifyValues;
51 
52     private List<Modifier> modifiers = new ArrayList<Modifier>();
53 
54     /**
55      * Creates a new Factory for filtering CLDRFiles.
56      *
57      * @param rawFactory
58      *            the factory to be filtered
59      * @param organization
60      *            the organization that the filtering is catered towards
61      * @param modifyValues
62      *            true if certain values in the data should be modified or replaced
63      */
FilterFactory(Factory rawFactory, String organization, boolean modifyValues)64     private FilterFactory(Factory rawFactory, String organization, boolean modifyValues) {
65         this.rawFactory = rawFactory;
66         this.organization = organization;
67         setSupplementalDirectory(rawFactory.getSupplementalDirectory());
68         this.modifyValues = modifyValues;
69     }
70 
load(Factory rawFactory, String organization, boolean usesAltValue)71     public static FilterFactory load(Factory rawFactory, String organization, boolean usesAltValue) {
72         FilterFactory filterFactory = new FilterFactory(rawFactory, organization, usesAltValue);
73         filterFactory.loadModifiers("dataModifiers.txt");
74         return filterFactory;
75     }
76 
77     @Override
getSourceDirectories()78     public File[] getSourceDirectories() {
79         return rawFactory.getSourceDirectories();
80     }
81 
82     @Override
getSourceDirectoriesForLocale(String localeID)83     public List<File> getSourceDirectoriesForLocale(String localeID) {
84         return rawFactory.getSourceDirectoriesForLocale(localeID);
85     }
86 
87     @Override
handleMake(String localeID, boolean resolved, DraftStatus minimalDraftStatus)88     protected CLDRFile handleMake(String localeID, boolean resolved, DraftStatus minimalDraftStatus) {
89         if (resolved) {
90             return new CLDRFile(makeResolvingSource(localeID, minimalDraftStatus));
91         } else {
92             return filterCldrFile(localeID, minimalDraftStatus);
93         }
94     }
95 
96     /**
97      * @return a filtered CLDRFile.
98      */
filterCldrFile(String localeID, DraftStatus minimalDraftStatus)99     private CLDRFile filterCldrFile(String localeID, DraftStatus minimalDraftStatus) {
100         CLDRFile rawFile = rawFactory.make(localeID, false, minimalDraftStatus).cloneAsThawed();
101 
102         filterAltValues(rawFile);
103         filterCoverage(rawFile);
104         removeRedundantPaths(rawFile);
105         return rawFile;
106     }
107 
108     /**
109      * Replaces the value for certain XPaths with their alternate value.
110      *
111      * @param rawFile
112      */
filterAltValues(CLDRFile rawFile)113     private void filterAltValues(CLDRFile rawFile) {
114         if (!modifyValues) return;
115 
116         for (Modifier modifier : modifiers) {
117             modifier = modifier.filterLocale(rawFile.getLocaleID());
118             if (!modifier.isEmpty()) {
119                 modifier.modifyFile(rawFile);
120             }
121         }
122     }
123 
124     /**
125      * Filters a CLDRFile according to the specified organization's coverage level.
126      *
127      * @param rawFile
128      */
filterCoverage(CLDRFile rawFile)129     private void filterCoverage(CLDRFile rawFile) {
130         if (organization == null) return;
131 
132         int minLevel = StandardCodes.make()
133             .getLocaleCoverageLevel(organization, rawFile.getLocaleID())
134             .getLevel();
135         CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo();
136         for (String xpath : rawFile) {
137             // Locale metadata shouldn't be stripped.
138             int level = covInfo.getCoverageValue(xpath, rawFile.getLocaleID());
139             if (level > minLevel) {
140                 rawFile.remove(xpath);
141             }
142         }
143     }
144 
145     /**
146      * Removes paths with duplicate values that can be found elsewhere in the file.
147      * @param rawFile
148      */
removeRedundantPaths(CLDRFile rawFile)149     private void removeRedundantPaths(CLDRFile rawFile) {
150         if (organization == null || rawFile.getLocaleID().equals("root")) return;
151 
152         String parent = LocaleIDParser.getParent(rawFile.getLocaleID());
153         CLDRFile resolvedParent = rawFactory.make(parent, true);
154         List<String> duplicatePaths = new ArrayList<String>();
155         for (String xpath : rawFile) {
156             if (xpath.startsWith("//ldml/identity")) {
157                 continue;
158             }
159             String value = rawFile.getStringValue(xpath);
160             // Remove count="x" if the value is equivalent to count="other".
161             if (xpath.contains("[@count=")) {
162                 XPathParts parts = XPathParts.getInstance(xpath); // not frozen, for setAttribute
163                 String count = parts.getAttributeValue(-1, "count");
164                 if (!count.equals("other")) {
165                     parts.setAttribute(-1, "count", "other");
166                     String otherPath = parts.toString();
167                     if (value.equals(rawFile.getStringValue(otherPath))) {
168                         duplicatePaths.add(xpath);
169                         continue;
170                     }
171                 }
172             }
173             // Remove xpaths with values also found in the parent.
174             String sourceLocale = resolvedParent.getSourceLocaleID(xpath, null);
175             if (!sourceLocale.equals(XMLSource.CODE_FALLBACK_ID)) {
176                 String parentValue = resolvedParent.getStringValue(xpath);
177                 if (value.equals(parentValue)) {
178                     duplicatePaths.add(xpath);
179                 }
180             }
181         }
182         for (String xpath : duplicatePaths) {
183             rawFile.remove(xpath);
184         }
185     }
186 
187     @Override
getMinimalDraftStatus()188     public DraftStatus getMinimalDraftStatus() {
189         return rawFactory.getMinimalDraftStatus();
190     }
191 
192     @Override
handleGetAvailable()193     protected Set<String> handleGetAvailable() {
194         return rawFactory.getAvailable();
195     }
196 
197     /**
198      * Wrapper class for holding information about a value modification entry.
199      */
200     private class ModifierEntry {
201         String oldValue;
202         String newValue;
203         Map<String, String> options;
204 
ModifierEntry(String oldValue, String newValue, Map<String, String> options)205         public ModifierEntry(String oldValue, String newValue, Map<String, String> options) {
206             this.oldValue = oldValue;
207             this.newValue = newValue;
208             this.options = options;
209         }
210 
211         /**
212          * @param locale
213          *            the locale to be matched
214          * @return true if the locale matches the locale filter in this entry.
215          */
localeMatches(String locale)216         public boolean localeMatches(String locale) {
217             String pattern = options.get("locale");
218             return pattern == null ? true : locale.matches(pattern);
219         }
220     }
221 
222     /**
223      * Class for performing a specific type of data modification on a CLDRFile.
224      */
225     private abstract class Modifier {
226         protected List<ModifierEntry> entries = new ArrayList<ModifierEntry>();
227 
modifyFile(CLDRFile file)228         public abstract void modifyFile(CLDRFile file);
229 
filterLocale(String locale)230         public abstract Modifier filterLocale(String locale);
231 
232         /**
233          * @return the list of modifiers meant for the specified locale.
234          */
getModifiersForLocale(String locale)235         protected List<ModifierEntry> getModifiersForLocale(String locale) {
236             List<ModifierEntry> newFilters = new ArrayList<ModifierEntry>();
237             for (ModifierEntry filter : entries) {
238                 if (filter.localeMatches(locale)) {
239                     newFilters.add(filter);
240                 }
241             }
242             return newFilters;
243         }
244 
245         /**
246          *
247          * @param filter
248          */
addModifierEntry(ModifierEntry entry)249         public void addModifierEntry(ModifierEntry entry) {
250             entries.add(entry);
251         }
252 
isEmpty()253         public boolean isEmpty() {
254             return entries.size() == 0;
255         }
256     }
257 
258     /**
259      * Maps the value of an XPath onto another XPath.
260      */
261     private class PathModifier extends Modifier {
262         @Override
modifyFile(CLDRFile file)263         public void modifyFile(CLDRFile file) {
264             // For certain alternate values, use them as the main values.
265             for (ModifierEntry entry : entries) {
266                 String oldPath = entry.oldValue;
267                 String value = file.getStringValue(oldPath);
268                 if (value != null) {
269                     String newPath = entry.newValue;
270                     file.add(newPath, value);
271                     file.remove(oldPath);
272                 }
273             }
274         }
275 
276         @Override
filterLocale(String locale)277         public Modifier filterLocale(String locale) {
278             PathModifier newModifier = new PathModifier();
279             newModifier.entries = getModifiersForLocale(locale);
280             return newModifier;
281         }
282     }
283 
284     /**
285      * Replaces certain values with other values.
286      */
287     private class ValueModifier extends Modifier {
288         @Override
modifyFile(CLDRFile file)289         public void modifyFile(CLDRFile file) {
290             // Replace values.
291             for (ModifierEntry entry : entries) {
292                 String filteringPath = entry.options.get("xpath");
293                 if (filteringPath != null && isValidXPath(filteringPath)) {
294                     // For non-regex XPaths, look them up directly.
295                     String value = file.getStringValue(filteringPath);
296                     if (value != null) {
297                         value = value.replaceAll(entry.oldValue, entry.newValue);
298                         file.add(filteringPath, value);
299                     }
300                 } else {
301                     Iterator<String> iterator = file.iterator();
302                     if (filteringPath != null) {
303                         Matcher matcher = PatternCache.get(filteringPath).matcher("");
304                         iterator = file.iterator(matcher);
305                     }
306                     while (iterator.hasNext()) {
307                         String xpath = iterator.next();
308                         String originalValue = file.getStringValue(xpath);
309                         String value = originalValue.replaceAll(entry.oldValue, entry.newValue);
310                         if (!value.equals(originalValue)) {
311                             file.add(xpath, value);
312                         }
313                     }
314                 }
315             }
316         }
317 
318         @Override
filterLocale(String locale)319         public Modifier filterLocale(String locale) {
320             ValueModifier newModifier = new ValueModifier();
321             newModifier.entries = getModifiersForLocale(locale);
322             return newModifier;
323         }
324     }
325 
326     /**
327      * Maps the value of XPaths onto other XPaths using regexes.
328      */
329     private class PathRegexModifier extends Modifier {
330         private RegexLookup<String> xpathLookup = new RegexLookup<String>();
331 
332         @Override
addModifierEntry(ModifierEntry entry)333         public void addModifierEntry(ModifierEntry entry) {
334             super.addModifierEntry(entry);
335             xpathLookup.add(entry.oldValue, entry.newValue);
336         }
337 
338         @Override
modifyFile(CLDRFile file)339         public void modifyFile(CLDRFile file) {
340             if (xpathLookup.size() > 0) {
341                 Output<String[]> arguments = new Output<String[]>();
342                 for (String xpath : file) {
343                     String newValue = xpathLookup.get(xpath, null, arguments, null, null);
344                     if (newValue != null) {
345                         String newPath = RegexLookup.replace(newValue, arguments.value);
346                         String value = file.getStringValue(xpath);
347                         file.add(newPath, value);
348                         file.remove(xpath);
349                     }
350                 }
351             }
352         }
353 
354         @Override
filterLocale(String locale)355         public Modifier filterLocale(String locale) {
356             PathRegexModifier newModifier = new PathRegexModifier();
357             newModifier.entries = getModifiersForLocale(locale);
358             for (ModifierEntry entry : newModifier.entries) {
359                 newModifier.xpathLookup.add(entry.oldValue, entry.newValue);
360             }
361             return newModifier;
362         }
363     }
364 
365     /**
366      * Loads modifiers from a specified file.
367      */
loadModifiers(String filename)368     private void loadModifiers(String filename) {
369         if (!modifyValues) return;
370         final Modifier pathModifier = new PathModifier();
371         final Modifier pathRegexModifier = new PathRegexModifier();
372         final Modifier valueModifier = new ValueModifier();
373         RegexFileParser fileParser = new RegexFileParser();
374         fileParser.setLineParser(new RegexLineParser() {
375             @Override
376             public void parse(String line) {
377                 String[] contents = line.split("\\s*+;\\s*+");
378                 ModificationType filterType = ModificationType.valueOf(contents[0]);
379                 String oldValue = contents[1];
380                 String newValue = contents[2];
381                 // Process remaining options.
382                 Map<String, String> options = new HashMap<String, String>();
383                 for (int i = 3; i < contents.length; i++) {
384                     String rawLine = contents[i];
385                     int pos = rawLine.indexOf('=');
386                     if (pos < 0) {
387                         throw new IllegalArgumentException("Invalid option: " + rawLine);
388                     }
389                     String optionType = rawLine.substring(0, pos).trim();
390                     options.put(optionType, rawLine.substring(pos + 1).trim());
391                 }
392 
393                 switch (filterType) {
394                 case xpath:
395                     if (isValidXPath(oldValue)) {
396                         pathModifier.addModifierEntry(new ModifierEntry(oldValue, newValue, options));
397                     } else {
398                         pathRegexModifier.addModifierEntry(new ModifierEntry(fixXPathRegex(oldValue),
399                             newValue, options));
400                     }
401                     break;
402                 case value:
403                     String xpath = options.get("xpath");
404                     if (xpath != null && !isValidXPath(xpath)) {
405                         options.put("xpath", fixXPathRegex(xpath));
406                     }
407                     valueModifier.addModifierEntry(new ModifierEntry(oldValue, newValue, options));
408                     break;
409                 }
410             }
411         });
412         fileParser.parse(FilterFactory.class, filename);
413         modifiers.add(pathModifier);
414         modifiers.add(pathRegexModifier);
415         modifiers.add(valueModifier);
416     }
417 
418     private Pattern XPATH_PATTERN = PatternCache.get("/(/\\w++(\\[@\\w++=\"[^\"()%\\\\]+\"])*)++");
419 
420     /**
421      * @param path
422      * @return true if path is a valid XPath and not a regex.
423      */
isValidXPath(String path)424     private boolean isValidXPath(String path) {
425         return XPATH_PATTERN.matcher(path).matches();
426     }
427 
428     /**
429      * Converts an xpath into a proper regex pattern.
430      *
431      * @param path
432      * @return
433      */
fixXPathRegex(String path)434     private String fixXPathRegex(String path) {
435         return '^' + path.replace("[@", "\\[@");
436     }
437 
438     private static final Options options = new Options(
439         "Filters CLDR XML files according to orgnizational coverage levels and an " +
440             "input file of replacement values/xpaths.")
441                 //        .add("org", 'o', ".*", "google", "The organization that the filtering is for. If set, also removes duplicate paths.")
442                 .add("org", 'o', ".*", Organization.cldr.name(), "The organization that the filtering is for. If set, also removes duplicate paths.")
443                 .add("locales", 'l', ".*", ".*", "A regular expression indicating the locales to be filtered");
444 
445     /**
446      * Run FilterFactory for a specific organization.
447      *
448      * @param args
449      * @throws Exception
450      */
main(String[] args)451     public static void main(String[] args) throws Exception {
452         options.parse(args, true);
453         Factory rawFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, options.get("locales").getValue());
454         String org = options.get("org").getValue();
455         FilterFactory filterFactory = FilterFactory.load(rawFactory, org, true);
456         String outputDir = CLDRPaths.GEN_DIRECTORY + "/filter";
457         for (String locale : rawFactory.getAvailable()) {
458             try (PrintWriter out = FileUtilities.openUTF8Writer(outputDir, locale + ".xml");) {
459                 filterFactory.make(locale, false).write(out);
460             }
461 //            out.close();
462         }
463     }
464 }
465