• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.ibm.icu.util.Output;
4 import java.io.File;
5 import java.io.PrintWriter;
6 import java.util.ArrayList;
7 import java.util.HashMap;
8 import java.util.Iterator;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Set;
12 import java.util.regex.Matcher;
13 import java.util.regex.Pattern;
14 import org.unicode.cldr.draft.FileUtilities;
15 import org.unicode.cldr.tool.Option.Options;
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRFile.DraftStatus;
19 import org.unicode.cldr.util.CLDRPaths;
20 import org.unicode.cldr.util.CoverageInfo;
21 import org.unicode.cldr.util.Factory;
22 import org.unicode.cldr.util.LocaleIDParser;
23 import org.unicode.cldr.util.Organization;
24 import org.unicode.cldr.util.PatternCache;
25 import org.unicode.cldr.util.RegexFileParser;
26 import org.unicode.cldr.util.RegexFileParser.RegexLineParser;
27 import org.unicode.cldr.util.RegexLookup;
28 import org.unicode.cldr.util.StandardCodes;
29 import org.unicode.cldr.util.XMLSource;
30 import org.unicode.cldr.util.XPathParts;
31 
32 /**
33  * Factory for filtering CLDRFiles by organization and replacing certain values. Organization
34  * coverage data is in org/unicode/cldr/util/data/Locales.txt.
35  *
36  * @author jchye
37  */
38 public class FilterFactory extends Factory {
39     /** Types of data modification supported. */
40     private enum ModificationType {
41         xpath,
42         value;
43     }
44 
45     private Factory rawFactory;
46     private String organization;
47     private boolean modifyValues;
48 
49     private List<Modifier> modifiers = new ArrayList<>();
50 
51     /**
52      * Creates a new Factory for filtering CLDRFiles.
53      *
54      * @param rawFactory the factory to be filtered
55      * @param organization the organization that the filtering is catered towards
56      * @param modifyValues true if certain values in the data should be modified or replaced
57      */
FilterFactory(Factory rawFactory, String organization, boolean modifyValues)58     private FilterFactory(Factory rawFactory, String organization, boolean modifyValues) {
59         this.rawFactory = rawFactory;
60         this.organization = organization;
61         setSupplementalDirectory(rawFactory.getSupplementalDirectory());
62         this.modifyValues = modifyValues;
63     }
64 
load( Factory rawFactory, String organization, boolean usesAltValue)65     public static FilterFactory load(
66             Factory rawFactory, String organization, boolean usesAltValue) {
67         FilterFactory filterFactory = new FilterFactory(rawFactory, organization, usesAltValue);
68         filterFactory.loadModifiers("dataModifiers.txt");
69         return filterFactory;
70     }
71 
72     @Override
getSourceDirectories()73     public File[] getSourceDirectories() {
74         return rawFactory.getSourceDirectories();
75     }
76 
77     @Override
getSourceDirectoriesForLocale(String localeID)78     public List<File> getSourceDirectoriesForLocale(String localeID) {
79         return rawFactory.getSourceDirectoriesForLocale(localeID);
80     }
81 
82     @Override
handleMake( String localeID, boolean resolved, DraftStatus minimalDraftStatus)83     protected CLDRFile handleMake(
84             String localeID, boolean resolved, DraftStatus minimalDraftStatus) {
85         if (resolved) {
86             return new CLDRFile(makeResolvingSource(localeID, minimalDraftStatus));
87         } else {
88             return filterCldrFile(localeID, minimalDraftStatus);
89         }
90     }
91 
92     /**
93      * @return a filtered CLDRFile.
94      */
filterCldrFile(String localeID, DraftStatus minimalDraftStatus)95     private CLDRFile filterCldrFile(String localeID, DraftStatus minimalDraftStatus) {
96         CLDRFile rawFile = rawFactory.make(localeID, false, minimalDraftStatus).cloneAsThawed();
97 
98         filterAltValues(rawFile);
99         filterCoverage(rawFile);
100         removeRedundantPaths(rawFile);
101         registerXmlSource(rawFile);
102         return rawFile;
103     }
104 
105     /**
106      * Replaces the value for certain XPaths with their alternate value.
107      *
108      * @param rawFile
109      */
filterAltValues(CLDRFile rawFile)110     private void filterAltValues(CLDRFile rawFile) {
111         if (!modifyValues) return;
112 
113         for (Modifier modifier : modifiers) {
114             modifier = modifier.filterLocale(rawFile.getLocaleID());
115             if (!modifier.isEmpty()) {
116                 modifier.modifyFile(rawFile);
117             }
118         }
119     }
120 
121     /**
122      * Filters a CLDRFile according to the specified organization's coverage level.
123      *
124      * @param rawFile
125      */
filterCoverage(CLDRFile rawFile)126     private void filterCoverage(CLDRFile rawFile) {
127         if (organization == null) return;
128 
129         int minLevel =
130                 StandardCodes.make()
131                         .getLocaleCoverageLevel(organization, rawFile.getLocaleID())
132                         .getLevel();
133         CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo();
134         for (String xpath : rawFile) {
135             // Locale metadata shouldn't be stripped.
136             int level = covInfo.getCoverageValue(xpath, rawFile.getLocaleID());
137             if (level > minLevel) {
138                 rawFile.remove(xpath);
139             }
140         }
141     }
142 
143     /**
144      * Removes paths with duplicate values that can be found elsewhere in the file.
145      *
146      * @param rawFile
147      */
removeRedundantPaths(CLDRFile rawFile)148     private void removeRedundantPaths(CLDRFile rawFile) {
149         if (organization == null || rawFile.getLocaleID().equals("root")) return;
150 
151         String parent = LocaleIDParser.getParent(rawFile.getLocaleID());
152         CLDRFile resolvedParent = rawFactory.make(parent, true);
153         List<String> duplicatePaths = new ArrayList<>();
154         for (String xpath : rawFile) {
155             if (xpath.startsWith("//ldml/identity")) {
156                 continue;
157             }
158             String value = rawFile.getStringValue(xpath);
159             // Remove count="x" if the value is equivalent to count="other".
160             if (xpath.contains("[@count=")) {
161                 XPathParts parts = XPathParts.getFrozenInstance(xpath);
162                 String count = parts.getAttributeValue(-1, "count");
163                 if (!count.equals("other")) {
164                     parts = parts.cloneAsThawed();
165                     parts.setAttribute(-1, "count", "other");
166                     String otherPath = parts.toString();
167                     if (value.equals(rawFile.getStringValue(otherPath))) {
168                         duplicatePaths.add(xpath);
169                         continue;
170                     }
171                 }
172             }
173             // Remove xpaths with values also found in the parent.
174             String sourceLocale = resolvedParent.getSourceLocaleID(xpath, null);
175             if (!sourceLocale.equals(XMLSource.CODE_FALLBACK_ID)) {
176                 String parentValue = resolvedParent.getStringValue(xpath);
177                 if (value.equals(parentValue)) {
178                     duplicatePaths.add(xpath);
179                 }
180             }
181         }
182         for (String xpath : duplicatePaths) {
183             rawFile.remove(xpath);
184         }
185     }
186 
187     @Override
getMinimalDraftStatus()188     public DraftStatus getMinimalDraftStatus() {
189         return rawFactory.getMinimalDraftStatus();
190     }
191 
192     @Override
handleGetAvailable()193     protected Set<String> handleGetAvailable() {
194         return rawFactory.getAvailable();
195     }
196 
197     /** Wrapper class for holding information about a value modification entry. */
198     private class ModifierEntry {
199         String oldValue;
200         String newValue;
201         Map<String, String> options;
202 
ModifierEntry(String oldValue, String newValue, Map<String, String> options)203         public ModifierEntry(String oldValue, String newValue, Map<String, String> options) {
204             this.oldValue = oldValue;
205             this.newValue = newValue;
206             this.options = options;
207         }
208 
209         /**
210          * @param locale the locale to be matched
211          * @return true if the locale matches the locale filter in this entry.
212          */
localeMatches(String locale)213         public boolean localeMatches(String locale) {
214             String pattern = options.get("locale");
215             return pattern == null ? true : locale.matches(pattern);
216         }
217     }
218 
219     /** Class for performing a specific type of data modification on a CLDRFile. */
220     private abstract class Modifier {
221         protected List<ModifierEntry> entries = new ArrayList<>();
222 
modifyFile(CLDRFile file)223         public abstract void modifyFile(CLDRFile file);
224 
filterLocale(String locale)225         public abstract Modifier filterLocale(String locale);
226 
227         /**
228          * @return the list of modifiers meant for the specified locale.
229          */
getModifiersForLocale(String locale)230         protected List<ModifierEntry> getModifiersForLocale(String locale) {
231             List<ModifierEntry> newFilters = new ArrayList<>();
232             for (ModifierEntry filter : entries) {
233                 if (filter.localeMatches(locale)) {
234                     newFilters.add(filter);
235                 }
236             }
237             return newFilters;
238         }
239 
240         /**
241          * @param filter
242          */
addModifierEntry(ModifierEntry entry)243         public void addModifierEntry(ModifierEntry entry) {
244             entries.add(entry);
245         }
246 
isEmpty()247         public boolean isEmpty() {
248             return entries.size() == 0;
249         }
250     }
251 
252     /** Maps the value of an XPath onto another XPath. */
253     private class PathModifier extends Modifier {
254         @Override
modifyFile(CLDRFile file)255         public void modifyFile(CLDRFile file) {
256             // For certain alternate values, use them as the main values.
257             for (ModifierEntry entry : entries) {
258                 String oldPath = entry.oldValue;
259                 String value = file.getStringValue(oldPath);
260                 if (value != null) {
261                     String newPath = entry.newValue;
262                     file.add(newPath, value);
263                     file.remove(oldPath);
264                 }
265             }
266         }
267 
268         @Override
filterLocale(String locale)269         public Modifier filterLocale(String locale) {
270             PathModifier newModifier = new PathModifier();
271             newModifier.entries = getModifiersForLocale(locale);
272             return newModifier;
273         }
274     }
275 
276     /** Replaces certain values with other values. */
277     private class ValueModifier extends Modifier {
278         @Override
modifyFile(CLDRFile file)279         public void modifyFile(CLDRFile file) {
280             // Replace values.
281             for (ModifierEntry entry : entries) {
282                 String filteringPath = entry.options.get("xpath");
283                 if (filteringPath != null && isValidXPath(filteringPath)) {
284                     // For non-regex XPaths, look them up directly.
285                     String value = file.getStringValue(filteringPath);
286                     if (value != null) {
287                         value = value.replaceAll(entry.oldValue, entry.newValue);
288                         file.add(filteringPath, value);
289                     }
290                 } else {
291                     Iterator<String> iterator = file.iterator();
292                     if (filteringPath != null) {
293                         Matcher matcher = PatternCache.get(filteringPath).matcher("");
294                         iterator = file.iterator(matcher);
295                     }
296                     while (iterator.hasNext()) {
297                         String xpath = iterator.next();
298                         String originalValue = file.getStringValue(xpath);
299                         String value = originalValue.replaceAll(entry.oldValue, entry.newValue);
300                         if (!value.equals(originalValue)) {
301                             file.add(xpath, value);
302                         }
303                     }
304                 }
305             }
306         }
307 
308         @Override
filterLocale(String locale)309         public Modifier filterLocale(String locale) {
310             ValueModifier newModifier = new ValueModifier();
311             newModifier.entries = getModifiersForLocale(locale);
312             return newModifier;
313         }
314     }
315 
316     /** Maps the value of XPaths onto other XPaths using regexes. */
317     private class PathRegexModifier extends Modifier {
318         private RegexLookup<String> xpathLookup = new RegexLookup<>();
319 
320         @Override
addModifierEntry(ModifierEntry entry)321         public void addModifierEntry(ModifierEntry entry) {
322             super.addModifierEntry(entry);
323             xpathLookup.add(entry.oldValue, entry.newValue);
324         }
325 
326         @Override
modifyFile(CLDRFile file)327         public void modifyFile(CLDRFile file) {
328             if (xpathLookup.size() > 0) {
329                 Output<String[]> arguments = new Output<>();
330                 for (String xpath : file) {
331                     String newValue = xpathLookup.get(xpath, null, arguments, null, null);
332                     if (newValue != null) {
333                         String newPath = RegexLookup.replace(newValue, arguments.value);
334                         String value = file.getStringValue(xpath);
335                         file.add(newPath, value);
336                         file.remove(xpath);
337                     }
338                 }
339             }
340         }
341 
342         @Override
filterLocale(String locale)343         public Modifier filterLocale(String locale) {
344             PathRegexModifier newModifier = new PathRegexModifier();
345             newModifier.entries = getModifiersForLocale(locale);
346             for (ModifierEntry entry : newModifier.entries) {
347                 newModifier.xpathLookup.add(entry.oldValue, entry.newValue);
348             }
349             return newModifier;
350         }
351     }
352 
353     /** Loads modifiers from a specified file. */
loadModifiers(String filename)354     private void loadModifiers(String filename) {
355         if (!modifyValues) return;
356         final Modifier pathModifier = new PathModifier();
357         final Modifier pathRegexModifier = new PathRegexModifier();
358         final Modifier valueModifier = new ValueModifier();
359         RegexFileParser fileParser = new RegexFileParser();
360         fileParser.setLineParser(
361                 new RegexLineParser() {
362                     @Override
363                     public void parse(String line) {
364                         String[] contents = line.split("\\s*+;\\s*+");
365                         ModificationType filterType = ModificationType.valueOf(contents[0]);
366                         String oldValue = contents[1];
367                         String newValue = contents[2];
368                         // Process remaining options.
369                         Map<String, String> options = new HashMap<>();
370                         for (int i = 3; i < contents.length; i++) {
371                             String rawLine = contents[i];
372                             int pos = rawLine.indexOf('=');
373                             if (pos < 0) {
374                                 throw new IllegalArgumentException("Invalid option: " + rawLine);
375                             }
376                             String optionType = rawLine.substring(0, pos).trim();
377                             options.put(optionType, rawLine.substring(pos + 1).trim());
378                         }
379 
380                         switch (filterType) {
381                             case xpath:
382                                 if (isValidXPath(oldValue)) {
383                                     pathModifier.addModifierEntry(
384                                             new ModifierEntry(oldValue, newValue, options));
385                                 } else {
386                                     pathRegexModifier.addModifierEntry(
387                                             new ModifierEntry(
388                                                     fixXPathRegex(oldValue), newValue, options));
389                                 }
390                                 break;
391                             case value:
392                                 String xpath = options.get("xpath");
393                                 if (xpath != null && !isValidXPath(xpath)) {
394                                     options.put("xpath", fixXPathRegex(xpath));
395                                 }
396                                 valueModifier.addModifierEntry(
397                                         new ModifierEntry(oldValue, newValue, options));
398                                 break;
399                         }
400                     }
401                 });
402         fileParser.parse(FilterFactory.class, filename);
403         modifiers.add(pathModifier);
404         modifiers.add(pathRegexModifier);
405         modifiers.add(valueModifier);
406     }
407 
408     private Pattern XPATH_PATTERN = PatternCache.get("/(/\\w++(\\[@\\w++=\"[^\"()%\\\\]+\"])*)++");
409 
410     /**
411      * @param path
412      * @return true if path is a valid XPath and not a regex.
413      */
isValidXPath(String path)414     private boolean isValidXPath(String path) {
415         return XPATH_PATTERN.matcher(path).matches();
416     }
417 
418     /**
419      * Converts an xpath into a proper regex pattern.
420      *
421      * @param path
422      * @return
423      */
fixXPathRegex(String path)424     private String fixXPathRegex(String path) {
425         return '^' + path.replace("[@", "\\[@");
426     }
427 
428     private static final Options options =
429             new Options(
430                             "Filters CLDR XML files according to orgnizational coverage levels and an "
431                                     + "input file of replacement values/xpaths.")
432                     //        .add("org", 'o', ".*", "google", "The organization that the filtering
433                     // is for. If set, also removes duplicate paths.")
434                     .add(
435                             "org",
436                             'o',
437                             ".*",
438                             Organization.cldr.name(),
439                             "The organization that the filtering is for. If set, also removes duplicate paths.")
440                     .add(
441                             "locales",
442                             'l',
443                             ".*",
444                             ".*",
445                             "A regular expression indicating the locales to be filtered");
446 
447     /**
448      * Run FilterFactory for a specific organization.
449      *
450      * @param args
451      * @throws Exception
452      */
main(String[] args)453     public static void main(String[] args) throws Exception {
454         options.parse(args, true);
455         Factory rawFactory =
456                 Factory.make(CLDRPaths.MAIN_DIRECTORY, options.get("locales").getValue());
457         String org = options.get("org").getValue();
458         FilterFactory filterFactory = FilterFactory.load(rawFactory, org, true);
459         String outputDir = CLDRPaths.GEN_DIRECTORY + "/filter";
460         for (String locale : rawFactory.getAvailable()) {
461             try (PrintWriter out = FileUtilities.openUTF8Writer(outputDir, locale + ".xml"); ) {
462                 filterFactory.make(locale, false).write(out);
463             }
464             //            out.close();
465         }
466     }
467 }
468