• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004-2013, International Business Machines Corporation and   *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.tool;
8 
9 import java.io.File;
10 import java.io.PrintWriter;
11 import java.util.ArrayList;
12 import java.util.Arrays;
13 import java.util.EnumMap;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Iterator;
17 import java.util.LinkedHashMap;
18 import java.util.LinkedHashSet;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.Map.Entry;
22 import java.util.Set;
23 import java.util.TreeMap;
24 import java.util.TreeSet;
25 import java.util.regex.Matcher;
26 import java.util.regex.Pattern;
27 
28 import org.unicode.cldr.draft.FileUtilities;
29 import org.unicode.cldr.test.CLDRTest;
30 import org.unicode.cldr.test.DisplayAndInputProcessor;
31 import org.unicode.cldr.test.QuickCheck;
32 import org.unicode.cldr.util.Annotations;
33 import org.unicode.cldr.util.CLDRConfig;
34 import org.unicode.cldr.util.CLDRFile;
35 import org.unicode.cldr.util.CLDRFile.DraftStatus;
36 import org.unicode.cldr.util.CLDRFile.ExemplarType;
37 import org.unicode.cldr.util.CLDRFile.NumberingSystem;
38 import org.unicode.cldr.util.CLDRFile.WinningChoice;
39 import org.unicode.cldr.util.CLDRLocale;
40 import org.unicode.cldr.util.CLDRPaths;
41 import org.unicode.cldr.util.CLDRTool;
42 import org.unicode.cldr.util.CldrUtility;
43 import org.unicode.cldr.util.CldrUtility.SimpleLineComparator;
44 import org.unicode.cldr.util.DateTimeCanonicalizer;
45 import org.unicode.cldr.util.DateTimeCanonicalizer.DateTimePatternType;
46 import org.unicode.cldr.util.DtdData;
47 import org.unicode.cldr.util.DtdType;
48 import org.unicode.cldr.util.Factory;
49 import org.unicode.cldr.util.FileProcessor;
50 import org.unicode.cldr.util.LocaleIDParser;
51 import org.unicode.cldr.util.Log;
52 import org.unicode.cldr.util.PathHeader;
53 import org.unicode.cldr.util.PatternCache;
54 import org.unicode.cldr.util.RegexLookup;
55 import org.unicode.cldr.util.SimpleFactory;
56 import org.unicode.cldr.util.StandardCodes;
57 import org.unicode.cldr.util.StringId;
58 import org.unicode.cldr.util.SupplementalDataInfo;
59 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
60 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
61 import org.unicode.cldr.util.UnicodeSetPrettyPrinter;
62 import org.unicode.cldr.util.XMLSource;
63 import org.unicode.cldr.util.XPathParts;
64 import org.unicode.cldr.util.XPathParts.Comments;
65 import org.unicode.cldr.util.XPathParts.Comments.CommentType;
66 
67 import com.google.common.base.Splitter;
68 import com.ibm.icu.dev.tool.UOption;
69 import com.ibm.icu.dev.util.CollectionUtilities;
70 import com.ibm.icu.impl.Utility;
71 import com.ibm.icu.text.Collator;
72 import com.ibm.icu.text.DateTimePatternGenerator;
73 import com.ibm.icu.text.DateTimePatternGenerator.VariableField;
74 import com.ibm.icu.text.Normalizer;
75 import com.ibm.icu.text.NumberFormat;
76 import com.ibm.icu.text.UnicodeSet;
77 import com.ibm.icu.text.UnicodeSetIterator;
78 import com.ibm.icu.util.Output;
79 import com.ibm.icu.util.ULocale;
80 
81 /**
82  * Tool for applying modifications to the CLDR files. Use -h to see the options.
83  * <p>
84  * There are some environment variables that can be used with the program <br>
85  * -DSHOW_FILES=<anything> shows all create/open of files.
86  */
87 @CLDRTool(alias = "modify",
88     description = "Tool for applying modifications to the CLDR files. Use -h to see the options.")
89 public class CLDRModify {
90     private static final boolean DEBUG = false;
91     static final String DEBUG_PATHS = null; // ".*currency.*";
92     static final boolean COMMENT_REMOVALS = false; // append removals as comments
93     static final UnicodeSet whitespace = new UnicodeSet("[:whitespace:]").freeze();
94     static final UnicodeSet HEX = new UnicodeSet("[a-fA-F0-9]").freeze();
95     private static final DtdData dtdData = DtdData.getInstance(DtdType.ldml);
96 
97     // TODO make this into input option.
98 
99     enum ConfigKeys {
100         action, locale, path, value, new_path, new_value
101     }
102 
103     enum ConfigAction {
104         /**
105          * Remove a path
106          */
107         delete,
108         /**
109          * Add a path/value
110          */
111         add,
112         /**
113          * Replace a path/value. Equals 'add' but tests selected paths
114          */
115         replace,
116         /**
117          * Add a a path/value. Equals 'add' but tests that path did NOT exist
118          */
119         addNew,
120     }
121 
122     static final class ConfigMatch {
123         final String exactMatch;
124         final Matcher regexMatch; // doesn't have to be thread safe
125         final ConfigAction action;
126         final boolean hexPath;
127 
ConfigMatch(ConfigKeys key, String match)128         public ConfigMatch(ConfigKeys key, String match) {
129             if (key == ConfigKeys.action) {
130                 exactMatch = null;
131                 regexMatch = null;
132                 action = ConfigAction.valueOf(match);
133                 hexPath = false;
134             } else if (match.startsWith("/") && match.endsWith("/")) {
135                 if (key != ConfigKeys.locale && key != ConfigKeys.path && key != ConfigKeys.value) {
136                     throw new IllegalArgumentException("Regex only allowed for old path/value.");
137                 }
138                 exactMatch = null;
139                 regexMatch = PatternCache.get(match.substring(1, match.length() - 1)
140                     .replace("[@", "\\[@")).matcher("");
141                 action = null;
142                 hexPath = false;
143             } else {
144                 exactMatch = match;
145                 regexMatch = null;
146                 action = null;
147                 hexPath = (key == ConfigKeys.new_path || key == ConfigKeys.path)
148                     && HEX.containsAll(match);
149             }
150 
151         }
152 
matches(String other)153         public boolean matches(String other) {
154             if (exactMatch == null) {
155                 return regexMatch.reset(other).find();
156             } else if (hexPath) {
157                 // convert path to id for comparison
158                 return exactMatch.equals(StringId.getHexId(other));
159             } else {
160                 return exactMatch.equals(other);
161             }
162         }
163 
toString()164         public String toString() {
165             return action != null ? action.toString()
166                 : exactMatch == null ? regexMatch.toString()
167                     : hexPath ? "*" + exactMatch + "*"
168                         : exactMatch;
169         }
170 
getPath(CLDRFile cldrFileToFilter)171         public String getPath(CLDRFile cldrFileToFilter) {
172             if (!hexPath) {
173                 return exactMatch;
174             }
175             // ensure that we have all the possible paths cached
176             String path = StringId.getStringFromHexId(exactMatch);
177             if (path == null) {
178                 for (String eachPath : cldrFileToFilter.fullIterable()) {
179                     StringId.getHexId(eachPath);
180                 }
181                 path = StringId.getStringFromHexId(exactMatch);
182                 if (path == null) {
183                     throw new IllegalArgumentException("No path for hex id: " + exactMatch);
184                 }
185             }
186             return path;
187         }
188 
getModified(ConfigMatch valueMatch, String value, ConfigMatch newValue)189         public static String getModified(ConfigMatch valueMatch, String value, ConfigMatch newValue) {
190             if (valueMatch == null) { // match anything
191                 if (newValue != null && newValue.exactMatch != null) {
192                     return newValue.exactMatch;
193                 }
194                 if (value != null) {
195                     return value;
196                 }
197                 throw new IllegalArgumentException("Can't have both old and new be null.");
198             } else if (valueMatch.exactMatch == null) { // regex
199                 if (newValue == null || newValue.exactMatch == null) {
200                     throw new IllegalArgumentException("Can't have regex without replacement.");
201                 }
202                 StringBuffer buffer = new StringBuffer();
203                 valueMatch.regexMatch.appendReplacement(buffer, newValue.exactMatch);
204                 return buffer.toString();
205             } else {
206                 return newValue.exactMatch != null ? newValue.exactMatch : value;
207             }
208         }
209     }
210 
211     static FixList fixList = new FixList();
212 
213     private static final int HELP1 = 0,
214         HELP2 = 1,
215         SOURCEDIR = 2,
216         DESTDIR = 3,
217         MATCH = 4,
218         JOIN = 5,
219         MINIMIZE = 6,
220         FIX = 7,
221         JOIN_ARGS = 8,
222         VET_ADD = 9,
223         RESOLVE = 10,
224         PATH = 11,
225         USER = 12,
226         ALL_DIRS = 13,
227         CHECK = 14,
228         KONFIG = 15;
229 
230     private static final UOption[] options = {
231         UOption.HELP_H(),
232         UOption.HELP_QUESTION_MARK(),
233         UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY),
234         UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "cldrModify/"),
235         UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
236         UOption.create("join", 'j', UOption.OPTIONAL_ARG),
237         UOption.create("minimize", 'r', UOption.NO_ARG),
238         UOption.create("fix", 'f', UOption.OPTIONAL_ARG),
239         UOption.create("join-args", 'i', UOption.OPTIONAL_ARG),
240         UOption.create("vet", 'v', UOption.OPTIONAL_ARG),
241         UOption.create("resolve", 'z', UOption.OPTIONAL_ARG),
242         UOption.create("path", 'p', UOption.REQUIRES_ARG),
243         UOption.create("user", 'u', UOption.REQUIRES_ARG),
244         UOption.create("all", 'a', UOption.REQUIRES_ARG),
245         UOption.create("check", 'c', UOption.NO_ARG),
246         UOption.create("konfig", 'k', UOption.OPTIONAL_ARG).setDefault("modify_config.txt"),
247     };
248 
249     private static final UnicodeSet allMergeOptions = new UnicodeSet("[rcd]");
250 
251     static final String HELP_TEXT1 = "Use the following options"
252         + XPathParts.NEWLINE
253         + "-h or -?\t for this message"
254         + XPathParts.NEWLINE
255         + "-"
256         + options[SOURCEDIR].shortName
257         + "\t source directory. Default = -s"
258         + CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY)
259         + XPathParts.NEWLINE
260         + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\"
261         + XPathParts.NEWLINE
262         + "-"
263         + options[DESTDIR].shortName
264         + "\t destination directory. Default = -d"
265         + CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/")
266         + XPathParts.NEWLINE
267         + "-m<regex>\t to restrict the locales to what matches <regex>"
268         + XPathParts.NEWLINE
269         + "-j<merge_dir>/X'\t to merge two sets of files together (from <source_dir>/X and <merge_dir>/X', "
270         + XPathParts.NEWLINE
271         + "\twhere * in X' is replaced by X)."
272         + XPathParts.NEWLINE
273         + "\tExample:-jC:\\Unicode-CVS2\\cldr\\dropbox\\to_be_merged\\missing\\missing_*"
274         + XPathParts.NEWLINE
275         + "-i\t merge arguments:"
276         + XPathParts.NEWLINE
277         + "\tr\t replace contents (otherwise new data will be draft=\"unconfirmed\")"
278         + XPathParts.NEWLINE
279         + "\tc\t ignore comments in <merge_dir> files"
280         + XPathParts.NEWLINE
281         + "-r\t to minimize the results (removing items that inherit from parent)."
282         + XPathParts.NEWLINE
283         + "-v\t incorporate vetting information, and generate diff files."
284         + XPathParts.NEWLINE
285         + "-z\t generate resolved files"
286         + XPathParts.NEWLINE
287         + "-p\t set path for -fx"
288         + XPathParts.NEWLINE
289         + "-u\t set user for -fb"
290         + XPathParts.NEWLINE
291         + "-a\t pattern: recurse over all subdirectories that match pattern"
292         + XPathParts.NEWLINE
293         + "-c\t check that resulting xml files are valid. Requires that a dtd directory be copied to the output directory, in the appropriate location."
294         + XPathParts.NEWLINE
295         + "-k\t config_file\twith -fk perform modifications according to what is in the config file. For format details, see:"
296         + XPathParts.NEWLINE
297         + "\t\thttp://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config."
298         + XPathParts.NEWLINE
299         + "-f\t to perform various fixes on the files (add following arguments to specify which ones, eg -fxi)"
300         + XPathParts.NEWLINE;
301 
302     static final String HELP_TEXT2 = "Note: A set of bat files are also generated in <dest_dir>/diff. They will invoke a comparison program on the results."
303         + XPathParts.NEWLINE;
304     private static final boolean SHOW_DETAILS = false;
305     private static boolean SHOW_PROCESSING = false;
306 
307     /**
308      * Picks options and executes. Use -h to see options.
309      */
main(String[] args)310     public static void main(String[] args) throws Exception {
311         long startTime = System.currentTimeMillis();
312         UOption.parseArgs(args, options);
313         if (options[HELP1].doesOccur || options[HELP2].doesOccur) {
314             System.out.println(HELP_TEXT1 + fixList.showHelp() + HELP_TEXT2);
315             return;
316         }
317         checkSuboptions(options[FIX], fixList.getOptions());
318         checkSuboptions(options[JOIN_ARGS], allMergeOptions);
319         String recurseOnDirectories = options[ALL_DIRS].value;
320         boolean makeResolved = options[RESOLVE].doesOccur; // Utility.COMMON_DIRECTORY + "main/";
321 
322         // String sourceDir = "C:\\ICU4C\\locale\\common\\main\\";
323 
324         String sourceInput = options[SOURCEDIR].value;
325         String destInput = options[DESTDIR].value;
326         if (recurseOnDirectories != null) {
327             sourceInput = removeSuffix(sourceInput, "main/", "main");
328             destInput = removeSuffix(destInput, "main/", "main");
329         }
330         String sourceDirBase = CldrUtility.checkValidDirectory(sourceInput); // Utility.COMMON_DIRECTORY + "main/";
331         String targetDirBase = CldrUtility.checkValidDirectory(destInput); // Utility.GEN_DIRECTORY + "main/";
332         System.out.format("Source:\t%s\n", sourceDirBase);
333         System.out.format("Target:\t%s\n", targetDirBase);
334 
335         Set<String> dirSet = new TreeSet<String>();
336         if (recurseOnDirectories == null) {
337             dirSet.add("");
338         } else {
339             String[] subdirs = new File(sourceDirBase).list();
340             Matcher subdirMatch = PatternCache.get(recurseOnDirectories).matcher("");
341             for (String subdir : subdirs) {
342                 if (!subdirMatch.reset(subdir).find()) continue;
343                 dirSet.add(subdir + "/");
344             }
345         }
346         for (String dir : dirSet) {
347             String sourceDir = sourceDirBase + dir;
348             if (!new File(sourceDir).isDirectory()) continue;
349             String targetDir = targetDirBase + dir;
350             Log.setLog(targetDir + "/diff", "log.txt");
351             try { // String[] failureLines = new String[2];
352                 SimpleLineComparator lineComparer = new SimpleLineComparator(
353                     // SimpleLineComparator.SKIP_SPACES +
354                     SimpleLineComparator.TRIM +
355                         SimpleLineComparator.SKIP_EMPTY +
356                         SimpleLineComparator.SKIP_CVS_TAGS);
357 
358                 Factory cldrFactory = Factory.make(sourceDir, ".*");
359 
360                 if (options[VET_ADD].doesOccur) {
361                     VettingAdder va = new VettingAdder(options[VET_ADD].value);
362                     va.showFiles(cldrFactory, targetDir);
363                     return;
364                 }
365 
366                 Factory mergeFactory = null;
367 
368                 String join_prefix = "", join_postfix = "";
369                 if (options[JOIN].doesOccur) {
370                     String mergeDir = options[JOIN].value;
371                     File temp = new File(mergeDir);
372                     mergeDir = CldrUtility.checkValidDirectory(temp.getParent() + File.separator); // Utility.COMMON_DIRECTORY
373                     // + "main/";
374                     String filename = temp.getName();
375                     join_prefix = join_postfix = "";
376                     int pos = filename.indexOf("*");
377                     if (pos >= 0) {
378                         join_prefix = filename.substring(0, pos);
379                         join_postfix = filename.substring(pos + 1);
380                     }
381                     mergeFactory = Factory.make(mergeDir, ".*");
382                 }
383                 /*
384                  * Factory cldrFactory = Factory.make(sourceDir, ".*");
385                  * Set testSet = cldrFactory.getAvailable();
386                  * String[] quicktest = new String[] {
387                  * "de"
388                  * //"ar", "dz_BT",
389                  * // "sv", "en", "de"
390                  * };
391                  * if (quicktest.length > 0) {
392                  * testSet = new TreeSet(Arrays.asList(quicktest));
393                  * }
394                  */
395                 Set<String> locales = new TreeSet<String>(cldrFactory.getAvailable());
396                 if (mergeFactory != null) {
397                     Set<String> temp = new TreeSet<String>(mergeFactory.getAvailable());
398                     Set<String> locales3 = new TreeSet<String>();
399                     for (String locale : temp) {
400                         if (!locale.startsWith(join_prefix) || !locale.endsWith(join_postfix)) continue;
401                         locales3.add(locale.substring(join_prefix.length(), locale.length() - join_postfix.length()));
402                     }
403                     locales.retainAll(locales3);
404                     System.out.println("Merging: " + locales3);
405                 }
406                 new CldrUtility.MatcherFilter(options[MATCH].value).retainAll(locales);
407 
408                 RetainWhenMinimizing retainIfTrue = null;
409                 PathHeader.Factory pathHeaderFactory = null;
410 
411                 fixList.handleSetup();
412 
413                 long lastTime = System.currentTimeMillis();
414                 int spin = 0;
415                 System.out.format(locales.size() + " Locales:\t%s\n", locales.toString());
416                 int totalRemoved = 0;
417                 for (String test : locales) {
418                     spin++;
419                     if (SHOW_PROCESSING) {
420                         long now = System.currentTimeMillis();
421                         if (now - lastTime > 5000) {
422                             System.out.println(" .. still processing " + test + " [" + spin + "/" + locales.size()
423                                 + "]");
424                             lastTime = now;
425                         }
426                     }
427                     // testJavaSemantics();
428 
429                     // TODO parameterize the directory and filter
430                     // System.out.println("C:\\ICU4C\\locale\\common\\main\\fr.xml");
431 
432                     CLDRFile k = cldrFactory.make(test, makeResolved).cloneAsThawed();
433                     // HashSet<String> set = Builder.with(new HashSet<String>()).addAll(k).get();
434                     // System.out.format("Locale\t%s, Size\t%s\n", test, set.size());
435                     // if (k.isNonInheriting()) continue; // for now, skip supplementals
436                     if (DEBUG_PATHS != null) {
437                         System.out.println("Debug1 (" + test + "):\t" + k.toString(DEBUG_PATHS));
438                     }
439                     // System.out.println(k);
440                     // String s1 =
441                     // "//ldml/segmentations/segmentation[@type=\"LineBreak\"]/variables/variable[@_q=\"0061\"][@id=\"$CB\"] ";
442                     // String s2 =
443                     // "//ldml/segmentations/segmentation[@type=\"LineBreak\"]/variables/variable[@_q=\"003A\"][@id=\"$CB\"]";
444                     // System.out.println(k.ldmlComparator.compare(s1, s2));
445                     if (mergeFactory != null) {
446                         int mergeOption = CLDRFile.MERGE_ADD_ALTERNATE;
447                         CLDRFile toMergeIn = mergeFactory.make(join_prefix + test + join_postfix, false)
448                             .cloneAsThawed();
449                         if (toMergeIn != null) {
450                             if (options[JOIN_ARGS].doesOccur) {
451                                 if (options[JOIN_ARGS].value.indexOf("r") >= 0)
452                                     mergeOption = CLDRFile.MERGE_REPLACE_MY_DRAFT;
453                                 if (options[JOIN_ARGS].value.indexOf("d") >= 0)
454                                     mergeOption = CLDRFile.MERGE_REPLACE_MINE;
455                                 if (options[JOIN_ARGS].value.indexOf("c") >= 0) toMergeIn.clearComments();
456                                 if (options[JOIN_ARGS].value.indexOf("x") >= 0) removePosix(toMergeIn);
457                             }
458                             toMergeIn.makeDraft(DraftStatus.contributed);
459                             k.putAll(toMergeIn, mergeOption);
460                         }
461                         // special fix
462                         k.removeComment(
463                             " The following are strings that are not found in the locale (currently), but need valid translations for localizing timezones. ");
464                     }
465                     if (DEBUG_PATHS != null) {
466                         System.out.println("Debug2 (" + test + "):\t" + k.toString(DEBUG_PATHS));
467                     }
468                     if (options[FIX].doesOccur) {
469                         fix(k, options[FIX].value, options[KONFIG].value, cldrFactory);
470                     }
471                     if (DEBUG_PATHS != null) {
472                         System.out.println("Debug3 (" + test + "):\t" + k.toString(DEBUG_PATHS));
473                     }
474                     if (options[MINIMIZE].doesOccur) {
475                         if (pathHeaderFactory == null) {
476                             pathHeaderFactory = PathHeader.getFactory(cldrFactory.make("en", true));
477                         }
478                         // TODO, fix identity
479                         String parent = LocaleIDParser.getParent(test);
480                         if (parent != null) {
481                             CLDRFile toRemove = cldrFactory.make(parent, true);
482                             // remove the items that are language codes, script codes, or region codes
483                             // since they may be real translations.
484                             if (parent.equals("root")) {
485                                 if (k.getFullXPath("//ldml/alias", true) != null) {
486                                     System.out.println("Skipping completely aliased file: " + test);
487                                 } else {
488                                     // k.putRoot(toRemove);
489                                 }
490                             }
491                             if (retainIfTrue == null) {
492                                 retainIfTrue = new RetainWhenMinimizing();
493                             }
494                             retainIfTrue.setParentFile(toRemove);
495                             List<String> removed = DEBUG ? null : new ArrayList<String>();
496                             k.removeDuplicates(toRemove, COMMENT_REMOVALS, retainIfTrue, removed);
497                             if (removed != null && removed.size() != 0) {
498                                 totalRemoved += removed.size();
499                                 Set<PathHeader> sorted = new TreeSet<PathHeader>();
500                                 for (String path : removed) {
501                                     sorted.add(pathHeaderFactory.fromPath(path));
502                                 }
503                                 for (PathHeader pathHeader : sorted) {
504                                     System.out.println("\t# " + test + "\t" + pathHeader + "\t" + pathHeader.getOriginalPath());
505                                 }
506                                 System.out.println("\t# " + test + "\t# Removed:\t" + removed.size());
507                             }
508                         }
509                     }
510                     // System.out.println(CLDRFile.getAttributeOrder());
511 
512                     /*
513                      * if (false) {
514                      * Map tempComments = k.getXpath_comments();
515                      *
516                      * for (Iterator it2 = tempComments.keySet().iterator(); it2.hasNext();) {
517                      * String key = (String) it2.next();
518                      * String comment = (String) tempComments.get(key);
519                      * Log.logln("Writing extra comment: " + key);
520                      * System.out.println(key + "\t comment: " + comment);
521                      * }
522                      * }
523                      */
524 
525                     if (DEBUG_PATHS != null) {
526                         System.out.println("Debug4 (" + test + "):\t" + k.toString(DEBUG_PATHS));
527                     }
528 
529                     PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, test + ".xml");
530                     String testPath = "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]";
531                     if (false) {
532                         System.out.println("Printing Raw File:");
533                         testPath = "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/alias";
534                         System.out.println(k.getStringValue(testPath));
535                         // System.out.println(k.getFullXPath(testPath));
536                         Iterator it4 = k.iterator();
537                         Set s = CollectionUtilities.addAll(it4, new TreeSet());
538 
539                         System.out.println(k.getStringValue(testPath));
540                         // if (true) return;
541                         Set orderedSet = new TreeSet(k.getComparator());
542                         CollectionUtilities.addAll(k.iterator(), orderedSet);
543                         for (Iterator it3 = orderedSet.iterator(); it3.hasNext();) {
544                             String path = (String) it3.next();
545                             // System.out.println(path);
546                             if (path.equals(testPath)) {
547                                 System.out.println("huh?");
548                             }
549                             String value = k.getStringValue(path);
550                             String fullpath = k.getFullXPath(path);
551                             System.out.println("\t=\t" + fullpath);
552                             System.out.println("\t=\t" + value);
553                         }
554                         System.out.println("Done Printing Raw File:");
555                     }
556 
557                     k.write(pw);
558                     // pw.println();
559                     pw.close();
560                     if (options[CHECK].doesOccur) {
561                         QuickCheck.check(new File(targetDir, test + ".xml"));
562                     }
563 
564                     // JCE: I don't think anyone really uses the .bat files from CLDRModify any more, since
565                     // Eclipse provides a decent file comparison program.  You can comment this back in if
566                     // you need it, but I found that sometimes having this here clobbers the real output
567                     // file, which we definitely don't want.
568                     // ToolUtilities.generateBat(sourceDir, test + ".xml", targetDir, test + ".xml", lineComparer);
569 
570                     /*
571                      * boolean ok = Utility.areFileIdentical(sourceDir + test + ".xml",
572                      * targetDir + test + ".xml", failureLines, Utility.TRIM + Utility.SKIP_SPACES);
573                      * if (!ok) {
574                      * System.out.println("Found differences at: ");
575                      * System.out.println("\t" + failureLines[0]);
576                      * System.out.println("\t" + failureLines[1]);
577                      * }
578                      */
579                 }
580                 if (totalSkeletons.size() != 0) {
581                     System.out.println("Total Skeletons" + totalSkeletons);
582                 }
583                 if (totalRemoved > 0) {
584                     System.out.println("# Removed:\t" + totalRemoved);
585                 }
586             } finally {
587                 fixList.handleCleanup();
588                 Log.close();
589                 System.out.println("Done -- Elapsed time: " + ((System.currentTimeMillis() - startTime) / 60000.0)
590                     + " minutes");
591             }
592         }
593     }
594 
removeSuffix(String value, String... suffices)595     private static String removeSuffix(String value, String... suffices) {
596         for (String suffix : suffices) {
597             if (value.endsWith(suffix)) {
598                 return value.substring(0, value.length() - suffix.length());
599             }
600         }
601         return value;
602     }
603 
604     /*
605      * Use the coverage to determine what we should keep in the case of a locale just below root.
606      */
607 
608     static class RetainWhenMinimizing implements CLDRFile.RetentionTest {
609         private CLDRFile file;
610         private CLDRLocale c;
611         private boolean isArabicSublocale;
612         // Status status = new Status(); // no need to have, was unused
613 
setParentFile(CLDRFile file)614         public RetainWhenMinimizing setParentFile(CLDRFile file) {
615             this.file = file;
616             this.c = CLDRLocale.getInstance(file.getLocaleIDFromIdentity());
617             isArabicSublocale = "ar".equals(c.getLanguage()) && !"001".equals(c.getCountry());
618             return this;
619         }
620 
621         @Override
getRetention(String path)622         public Retention getRetention(String path) {
623             if (path.startsWith("//ldml/identity/")) {
624                 return Retention.RETAIN;
625             }
626             // special case for Arabic
627             if (isArabicSublocale && path.startsWith("//ldml/numbers/defaultNumberingSystem")) {
628                 return Retention.RETAIN;
629             }
630             String localeId = file.getSourceLocaleID(path, null);
631             if ((c.isLanguageLocale() || c.equals(CLDRLocale.getInstance("pt_PT")))
632                 && (XMLSource.ROOT_ID.equals(localeId) || XMLSource.CODE_FALLBACK_ID.equals(localeId))) {
633                 return Retention.RETAIN;
634             }
635             return Retention.RETAIN_IF_DIFFERENT;
636         }
637     };
638 
639     static final Splitter COMMA_SEMI = Splitter.on(Pattern.compile("[,;|]")).trimResults().omitEmptyStrings();
640     protected static final boolean NUMBER_SYSTEM_HACK = true;
641 
642     /**
643      *
644      */
checkSuboptions(UOption givenOptions, UnicodeSet allowedOptions)645     private static void checkSuboptions(UOption givenOptions, UnicodeSet allowedOptions) {
646         if (givenOptions.doesOccur && !allowedOptions.containsAll(givenOptions.value)) {
647             throw new IllegalArgumentException("Illegal sub-options for "
648                 + givenOptions.shortName
649                 + ": "
650                 + new UnicodeSet().addAll(givenOptions.value).removeAll(allowedOptions)
651                 + CldrUtility.LINE_SEPARATOR + "Use -? for help.");
652         }
653     }
654 
655     /**
656      *
657      */
removePosix(CLDRFile toMergeIn)658     private static void removePosix(CLDRFile toMergeIn) {
659         Set<String> toRemove = new HashSet<String>();
660         for (String xpath : toMergeIn) {
661             if (xpath.startsWith("//ldml/posix")) toRemove.add(xpath);
662         }
663         toMergeIn.removeAll(toRemove, false);
664     }
665 
666     // private static class References {
667     // static Map<String,Map<String,String>> locale_oldref_newref = new TreeMap<String,Map<String,String>>();
668     //
669     // static String[][] keys = {{"standard", "S", "[@standard=\"true\"]"}, {"references", "R", ""}};
670     // UnicodeSet digits = new UnicodeSet("[0-9]");
671     // int referenceCounter = 0;
672     // Map references_token = new TreeMap();
673     // Set tokenSet = new HashSet();
674     // String[] keys2;
675     // boolean isStandard;
676     // References(boolean standard) {
677     // isStandard = standard;
678     // keys2 = standard ? keys[0] : keys[1];
679     // }
680     // /**
681     // *
682     // */
683     // public void reset(CLDRFile k) {
684     // }
685     // /**
686     // *
687     // */
688     // // Samples:
689     // // <language type="ain" references="RP1">阿伊努文</language>
690     // // <reference type="R1" uri="http://www.info.gov.hk/info/holiday_c.htm">二零零五年公眾假期刊登憲報</reference>
691     // private int fix(Map attributes, CLDRFile replacements) {
692     // // we have to have either a references element or attributes.
693     // String references = (String) attributes.get(keys2[0]);
694     // int result = 0;
695     // if (references != null) {
696     // references = references.trim();
697     // if (references.startsWith("S") || references.startsWith("R")) {
698     // if (digits.containsAll(references.substring(1))) return 0;
699     // }
700     // String token = (String) references_token.get(references);
701     // if (token == null) {
702     // while (true) {
703     // token = keys2[1] + (++referenceCounter);
704     // if (!tokenSet.contains(token)) break;
705     // }
706     // references_token.put(references, token);
707     // System.out.println("Adding: " + token + "\t" + references);
708     // replacements.add("//ldml/references/reference[@type=\"" + token + "\"]" + keys2[2], references);
709     // result = 1;
710     // }
711     // attributes.put(keys2[0], token);
712     // }
713     // return result;
714     // }
715     // }
716 
717     abstract static class CLDRFilter {
718         protected CLDRFile cldrFileToFilter;
719         private String localeID;
720         protected Set<String> availableChildren;
721         private Set<String> toBeRemoved;
722         private CLDRFile toBeReplaced;
723         protected XPathParts parts = new XPathParts(null, null);
724         protected XPathParts fullparts = new XPathParts(null, null);
725         protected Factory factory;
726 
setFile(CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements)727         public final void setFile(CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements) {
728             this.cldrFileToFilter = k;
729             this.factory = factory;
730             localeID = k.getLocaleID();
731             this.toBeRemoved = removal;
732             this.toBeReplaced = replacements;
733             handleStart();
734         }
735 
handleStart()736         public void handleStart() {
737         }
738 
handlePath(String xpath)739         public abstract void handlePath(String xpath);
740 
handleEnd()741         public void handleEnd() {
742         }
743 
show(String reason, String detail)744         public void show(String reason, String detail) {
745             System.out.println("%" + localeID + "\t" + reason + "\tConsidering " + detail);
746         }
747 
retain(String path, String reason)748         public void retain(String path, String reason) {
749             System.out.println("%" + localeID + "\t" + reason + "\tRetaining: " + cldrFileToFilter.getStringValue(path)
750                 + "\t at: " + path);
751         }
752 
remove(String path)753         public void remove(String path) {
754             remove(path, "-");
755         }
756 
remove(String path, String reason)757         public void remove(String path, String reason) {
758             if (toBeRemoved.contains(path)) return;
759             toBeRemoved.add(path);
760 //            System.out.println("%" + localeID + "\t" + reason + "\tRemoving:\t«"
761 //                + cldrFileToFilter.getStringValue(path) + "»\t at:\t" + path);
762             String oldValueOldPath = cldrFileToFilter.getStringValue(path);
763             showAction(reason, "Removing", oldValueOldPath, null, null, path, path);
764         }
765 
replace(String oldFullPath, String newFullPath, String newValue)766         public void replace(String oldFullPath, String newFullPath, String newValue) {
767             replace(oldFullPath, newFullPath, newValue, "-");
768         }
769 
showAction(String reason, String action, String oldValueOldPath, String oldValueNewPath, String newValue, String oldFullPath, String newFullPath)770         public void showAction(String reason, String action, String oldValueOldPath, String oldValueNewPath,
771             String newValue, String oldFullPath, String newFullPath) {
772             System.out.println("%"
773                 + localeID
774                 + "\t"
775                 + action
776                 + "\t"
777                 + reason
778                 + "\t«"
779                 + oldValueOldPath
780                 + "»"
781                 + (newFullPath.equals(oldFullPath) || oldValueNewPath == null ? "" : oldValueNewPath
782                     .equals(oldValueOldPath) ? "/=" : "/«" + oldValueNewPath + "»")
783                 + "\t→\t" + (newValue == null ? "∅" : newValue.equals(oldValueOldPath) ? "=" : "«" + newValue + "»")
784                 + "\t" + oldFullPath
785                 + (newFullPath.equals(oldFullPath) ? "" : "\t→\t" + newFullPath));
786         }
787 
788         /**
789          * There are the following cases, where:
790          *
791          * <pre>
792          * pathSame,    new value null:         Removing    v       p
793          * pathSame,    new value not null:     Replacing   v   v'  p
794          * pathChanges, nothing at new path:    Moving      v       p   p'
795          * pathChanges, same value at new path: Replacing   v   v'  p   p'
796          * pathChanges, value changes:          Overriding  v   v'  p   p'
797          *
798          * <pre>
799          * @param oldFullPath
800          * @param newFullPath
801          * @param newValue
802          * @param reason
803          */
replace(String oldFullPath, String newFullPath, String newValue, String reason)804         public void replace(String oldFullPath, String newFullPath, String newValue, String reason) {
805             String oldValueOldPath = cldrFileToFilter.getStringValue(oldFullPath);
806             String temp = cldrFileToFilter.getFullXPath(oldFullPath);
807             if (temp != null) {
808                 oldFullPath = temp;
809             }
810             boolean pathSame = oldFullPath.equals(newFullPath);
811 
812             if (pathSame) {
813                 if (newValue == null) {
814                     remove(oldFullPath, reason);
815                 } else if (oldValueOldPath == null) {
816                     toBeReplaced.add(oldFullPath, newValue);
817                     showAction(reason, "Adding", oldValueOldPath, null, newValue, oldFullPath, newFullPath);
818                 } else {
819                     toBeReplaced.add(oldFullPath, newValue);
820                     showAction(reason, "Replacing", oldValueOldPath, null, newValue, oldFullPath, newFullPath);
821                 }
822                 return;
823             }
824             String oldValueNewPath = cldrFileToFilter.getStringValue(newFullPath);
825             toBeRemoved.add(oldFullPath);
826             toBeReplaced.add(newFullPath, newValue);
827 
828             if (oldValueNewPath == null) {
829                 showAction(reason, "Moving", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath);
830             } else if (oldValueNewPath.equals(newValue)) {
831                 showAction(reason, "Redundant Value", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath);
832             } else {
833                 showAction(reason, "Overriding", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath);
834             }
835         }
836 
837         /**
838          * Adds a new path-value pair to the CLDRFile.
839          * @param path the new path
840          * @param value the value
841          * @param reason Reason for adding the path and value.
842          */
add(String path, String value, String reason)843         public void add(String path, String value, String reason) {
844             String oldValueOldPath = cldrFileToFilter.getStringValue(path);
845             if (oldValueOldPath == null) {
846                 toBeRemoved.remove(path);
847                 toBeReplaced.add(path, value);
848                 showAction(reason, "Adding", oldValueOldPath, null,
849                     value, path, path);
850             } else {
851                 replace(path, path, value);
852             }
853         }
854 
getReplacementFile()855         public CLDRFile getReplacementFile() {
856             return toBeReplaced;
857         }
858 
handleCleanup()859         public void handleCleanup() {
860         }
861 
handleSetup()862         public void handleSetup() {
863         }
864 
getLocaleID()865         public String getLocaleID() {
866             return localeID;
867         }
868     }
869 
870     static class FixList {
871         // simple class, so we use quick list
872         CLDRFilter[] filters = new CLDRFilter[128]; // only ascii
873         String[] helps = new String[128]; // only ascii
874         UnicodeSet options = new UnicodeSet();
875         String inputOptions = null;
876 
add(char letter, String help)877         void add(char letter, String help) {
878             add(letter, help, null);
879         }
880 
handleSetup()881         public void handleSetup() {
882             for (int i = 0; i < filters.length; ++i) {
883                 if (filters[i] != null) {
884                     filters[i].handleSetup();
885                 }
886             }
887         }
888 
handleCleanup()889         public void handleCleanup() {
890             for (int i = 0; i < filters.length; ++i) {
891                 if (filters[i] != null) {
892                     filters[i].handleCleanup();
893                 }
894             }
895         }
896 
getOptions()897         public UnicodeSet getOptions() {
898             return options;
899         }
900 
add(char letter, String help, CLDRFilter filter)901         void add(char letter, String help, CLDRFilter filter) {
902             if (helps[letter] != null) throw new IllegalArgumentException("Duplicate letter: " + letter);
903             filters[letter] = filter;
904             helps[letter] = help;
905             options.add(letter);
906         }
907 
setFile(CLDRFile file, String inputOptions, Factory factory, Set<String> removal, CLDRFile replacements)908         void setFile(CLDRFile file, String inputOptions, Factory factory, Set<String> removal, CLDRFile replacements) {
909             this.inputOptions = inputOptions;
910             for (int i = 0; i < inputOptions.length(); ++i) {
911                 char c = inputOptions.charAt(i);
912                 if (filters[c] != null) {
913                     try {
914                         filters[c].setFile(file, factory, removal, replacements);
915                     } catch (RuntimeException e) {
916                         System.err.println("Failure in " + filters[c].localeID + "\t START");
917                         throw e;
918                     }
919                 }
920             }
921         }
922 
handleStart()923         void handleStart() {
924             for (int i = 0; i < inputOptions.length(); ++i) {
925                 char c = inputOptions.charAt(i);
926                 if (filters[c] != null) {
927                     try {
928                         filters[c].handleStart();
929                     } catch (RuntimeException e) {
930                         System.err.println("Failure in " + filters[c].localeID + "\t START");
931                         throw e;
932                     }
933                 }
934             }
935         }
936 
handlePath(String xpath)937         void handlePath(String xpath) {
938             //options = options.toLowerCase();
939             for (int i = 0; i < inputOptions.length(); ++i) {
940                 char c = inputOptions.charAt(i);
941                 if (filters[c] != null) {
942                     try {
943                         filters[c].handlePath(xpath);
944                     } catch (RuntimeException e) {
945                         System.err.println("Failure in " + filters[c].localeID + "\t " + xpath);
946                         throw e;
947                     }
948                 }
949             }
950         }
951 
handleEnd()952         void handleEnd() {
953             for (int i = 0; i < inputOptions.length(); ++i) {
954                 char c = inputOptions.charAt(i);
955                 if (filters[c] != null) {
956                     try {
957                         filters[c].handleEnd();
958                     } catch (RuntimeException e) {
959                         System.err.println("Failure in " + filters[c].localeID + "\t START");
960                         throw e;
961                     }
962                 }
963             }
964         }
965 
showHelp()966         String showHelp() {
967             String result = "";
968             for (int i = 0; i < filters.length; ++i) {
969                 if (helps[i] != null) {
970                     result += "\t" + (char) i + "\t " + helps[i] + XPathParts.NEWLINE;
971                 }
972             }
973             return result;
974         }
975     }
976 
977     static Set<String> totalSkeletons = new HashSet<String>();
978 
979     static Map<String, String> rootUnitMap = new HashMap<String, String>();
980 
981     static {
982         rootUnitMap.put("second", "s");
983         rootUnitMap.put("minute", "min");
984         rootUnitMap.put("hour", "h");
985         rootUnitMap.put("day", "d");
986         rootUnitMap.put("week", "w");
987         rootUnitMap.put("month", "m");
988         rootUnitMap.put("year", "y");
989 
990         fixList.add('z', "Remove deprecated elements", new CLDRFilter() {
991 
992             public boolean isDeprecated(DtdType type, String element, String attribute, String value) {
993                 return DtdData.getInstance(type).isDeprecated(element, attribute, value);
994             }
995 
996             public boolean isDeprecated(DtdType type, String path) {
997 
998                 XPathParts parts = XPathParts.getInstance(path);
999                 for (int i = 0; i < parts.size(); ++i) {
1000                     String element = parts.getElement(i);
1001                     if (isDeprecated(type, element, "*", "*")) {
1002                         return true;
1003                     }
1004                     for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) {
1005                         String attribute = entry.getKey();
1006                         String value = entry.getValue();
1007                         if (isDeprecated(type, element, attribute, value)) {
1008                             return true;
1009                         }
1010                     }
1011                 }
1012                 return false;
1013             }
1014 
1015             @Override
1016             public void handlePath(String xpath) {
1017                 String fullPath = cldrFileToFilter.getFullXPath(xpath);
1018                 XPathParts parts = XPathParts.getInstance(fullPath);
1019                 for (int i = 0; i < parts.size(); ++i) {
1020                     String element = parts.getElement(i);
1021                     if (dtdData.isDeprecated(element, "*", "*")) {
1022                         remove(fullPath, "Deprecated element");
1023                         return;
1024                     }
1025                     for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) {
1026                         String attribute = entry.getKey();
1027                         String value = entry.getValue();
1028                         if (dtdData.isDeprecated(element, attribute, value)) {
1029                             remove(fullPath, "Element with deprecated attribute(s)");
1030                         }
1031                     }
1032                 }
1033             }
1034         });
1035 
1036         fixList.add('e', "fix Interindic", new CLDRFilter() {
1037             public void handlePath(String xpath) {
1038                 if (xpath.indexOf("=\"InterIndic\"") < 0) return;
1039                 String v = cldrFileToFilter.getStringValue(xpath);
1040                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1041                 fullparts.set(fullXPath);
1042                 Map<String, String> attributes = fullparts.findAttributes("transform");
1043                 String oldValue = attributes.get("direction");
1044                 if ("both".equals(oldValue)) {
1045                     attributes.put("direction", "forward");
1046                     replace(xpath, fullparts.toString(), v);
1047                 }
1048             }
1049         });
1050 
1051         fixList.add('B', "fix bogus values", new CLDRFilter() {
1052             RegexLookup<Integer> paths = RegexLookup.<Integer> of()
1053                 .setPatternTransform(RegexLookup.RegexFinderTransformPath2)
1054                 .add("//ldml/localeDisplayNames/languages/language[@type='([^']*)']", 0)
1055                 .add("//ldml/localeDisplayNames/scripts/script[@type='([^']*)']", 0)
1056                 .add("//ldml/localeDisplayNames/territories/territory[@type='([^']*)']", 0)
1057                 .add("//ldml/dates/timeZoneNames/metazone[@type='([^']*)']", 0)
1058                 .add("//ldml/dates/timeZoneNames/zone[@type='([^']*)']/exemplarCity", 0)
1059                 .add("//ldml/numbers/currencies/currency[@type='([^']*)']/displayName", 0);
1060             Output<String[]> arguments = new Output<>();
1061             CLDRFile english = CLDRConfig.getInstance().getEnglish();
1062             boolean skip;
1063 
1064             @Override
1065             public void handleStart() {
1066                 CLDRFile resolved = factory.make(cldrFileToFilter.getLocaleID(), true);
1067                 UnicodeSet exemplars = resolved.getExemplarSet(ExemplarType.main, WinningChoice.WINNING);
1068                 skip = exemplars.containsSome('a', 'z');
1069                 // TODO add simpler way to skip file entirely
1070             }
1071 
1072             public void handlePath(String xpath) {
1073                 if (skip) {
1074                     return;
1075                 }
1076                 Integer lookupValue = paths.get(xpath, null, arguments);
1077                 if (lookupValue == null) {
1078                     return;
1079                 }
1080                 String type = arguments.value[1];
1081                 String value = cldrFileToFilter.getStringValue(xpath);
1082                 if (value.equals(type)) {
1083                     remove(xpath, "Matches code");
1084                     return;
1085                 }
1086                 String evalue = english.getStringValue(xpath);
1087                 if (value.equals(evalue)) {
1088                     remove(xpath, "Matches English");
1089                     return;
1090                 }
1091             }
1092         });
1093 
1094         fixList.add('s', "fix alt accounting", new CLDRFilter() {
1095             @Override
1096             public void handlePath(String xpath) {
1097                 parts.set(xpath);
1098                 if (!parts.containsAttributeValue("alt", "accounting")) return;
1099                 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath);
1100                 String value = cldrFileToFilter.getStringValue(xpath);
1101                 fullparts.set(oldFullXPath);
1102                 fullparts.removeAttribute("pattern", "alt");
1103                 fullparts.setAttribute("currencyFormat", "type", "accounting");
1104                 String newFullXPath = fullparts.toString();
1105                 replace(oldFullXPath, newFullXPath, value, "Move alt=accounting value to new path");
1106             }
1107         });
1108 
1109         fixList.add('n', "add unit displayName", new CLDRFilter() {
1110             @Override
1111             public void handlePath(String xpath) {
1112                 if (xpath.indexOf("/units/unitLength[@type=\"long\"]") < 0 || xpath.indexOf("/unitPattern[@count=\"other\"]") < 0 ||
1113                     xpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) {
1114                     return;
1115                 }
1116                 String value = cldrFileToFilter.getStringValue(xpath);
1117                 String newValue = null;
1118                 if (value.startsWith("{0}")) {
1119                     newValue = value.substring(3).trim();
1120                 } else if (value.endsWith("{0}")) {
1121                     newValue = value.substring(0, value.length() - 3).trim();
1122                 } else {
1123                     System.out.println("unitPattern-other does not start or end with \"{0}\": \"" + value + "\"");
1124                     return;
1125                 }
1126 
1127                 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath);
1128                 String newFullXPath = oldFullXPath.substring(0, oldFullXPath.indexOf("unitPattern")).concat("displayName[@draft=\"provisional\"]");
1129                 add(newFullXPath, newValue, "create unit displayName-long from unitPattern-long-other");
1130                 String newFullXPathShort = newFullXPath.replace("[@type=\"long\"]", "[@type=\"short\"]");
1131                 add(newFullXPathShort, newValue, "create unit displayName-short from unitPattern-long-other");
1132             }
1133         });
1134 
1135         fixList.add('x', "retain paths", new CLDRFilter() {
1136             Matcher m = null;
1137 
1138             public void handlePath(String xpath) {
1139                 if (m == null) {
1140                     m = PatternCache.get(options[PATH].value).matcher("");
1141                 }
1142                 //String v = cldrFileToFilter.getStringValue(xpath);
1143                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1144                 if (!m.reset(fullXPath).matches()) {
1145                     remove(xpath);
1146                 }
1147             }
1148         });
1149 
1150         fixList.add('_', "remove superfluous compound language translations", new CLDRFilter() {
1151             private CLDRFile resolved;
1152 
1153             public void handleStart() {
1154                 resolved = factory.make(cldrFileToFilter.getLocaleID(), true);
1155             }
1156 
1157             public void handlePath(String xpath) {
1158                 if (!xpath.contains("_")) return;
1159                 if (!xpath.contains("/language")) return;
1160                 String languageCode = parts.set(xpath).findAttributeValue("language", "type");
1161                 String v = resolved.getStringValue(xpath);
1162                 if (v.equals(languageCode)) {
1163                     remove(xpath, "same as language code");
1164                     return;
1165                 }
1166                 String generatedTranslation = resolved.getName(languageCode, true);
1167                 if (v.equals(generatedTranslation)) {
1168                     remove(xpath, "superfluous compound language");
1169                 }
1170                 String spacelessGeneratedTranslation = generatedTranslation.replace(" ", "");
1171                 if (v.equals(spacelessGeneratedTranslation)) {
1172                     remove(xpath, "superfluous compound language (after removing space)");
1173                 }
1174             }
1175         });
1176 
1177         fixList.add('l', "change language code", new CLDRFilter() {
1178             private CLDRFile resolved;
1179 
1180             public void handleStart() {
1181                 resolved = factory.make(cldrFileToFilter.getLocaleID(), true);
1182             }
1183 
1184             public void handlePath(String xpath) {
1185                 if (!xpath.contains("/language")) return;
1186                 String languageCode = parts.set(xpath).findAttributeValue("language", "type");
1187                 String v = resolved.getStringValue(xpath);
1188                 if (!languageCode.equals("swc")) return;
1189                 parts.setAttribute("language", "type", "sw_CD");
1190                 replace(xpath, parts.toString(), v);
1191             }
1192         });
1193 
1194         if (false) fixList.add('s', "fix stand-alone narrows", new CLDRFilter() {
1195             public void handlePath(String xpath) {
1196                 if (xpath.indexOf("[@type=\"narrow\"]") < 0) return;
1197                 parts.set(xpath);
1198                 String element = "";
1199                 if (parts.findElement("dayContext") >= 0) {
1200                     element = "dayContext";
1201                 } else if (parts.findElement("monthContext") >= 0) {
1202                     element = "monthContext";
1203                 } else
1204                     return;
1205 
1206                 // change the element type UNLESS it conflicts
1207                 parts.setAttribute(element, "type", "stand-alone");
1208                 if (cldrFileToFilter.getStringValue(parts.toString()) != null) return;
1209 
1210                 String v = cldrFileToFilter.getStringValue(xpath);
1211                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1212                 fullparts.set(fullXPath);
1213                 fullparts.setAttribute(element, "type", "stand-alone");
1214                 replace(xpath, fullparts.toString(), v);
1215             }
1216         });
1217 
1218         fixList.add('m', "remove multiple alt-variants", new CLDRFilter() {
1219 
1220             public void handleStart() {
1221             }
1222 
1223             public void handlePath(String xpath) {
1224                 parts.set(xpath);
1225                 if (!parts.containsAttributeValue("alt", "variant")) return;
1226                 String variantValue = cldrFileToFilter.getStringValue(xpath);
1227                 String nonVariantXpath = xpath.replaceAll("\\[\\@alt=\"variant\"\\]", "");
1228                 String nonVariantValue = cldrFileToFilter.getStringValue(nonVariantXpath);
1229                 if (variantValue.equals(nonVariantValue)) {
1230                     remove(xpath, "removing superfluous alt-variant value");
1231                 }
1232             }
1233         });
1234 
1235         fixList.add('g', "Swap alt/non-alt values for Czechia", new CLDRFilter() {
1236 
1237             public void handleStart() {
1238             }
1239 
1240             public void handlePath(String xpath) {
1241                 parts.set(xpath);
1242                 if (!parts.containsAttributeValue("alt", "variant") || !parts.containsAttributeValue("type", "CZ")) return;
1243                 String variantValue = cldrFileToFilter.getStringValue(xpath);
1244                 String nonVariantXpath = xpath.replaceAll("\\[\\@alt=\"variant\"\\]", "");
1245                 String nonVariantValue = cldrFileToFilter.getStringValue(nonVariantXpath);
1246                 replace(xpath, xpath, nonVariantValue);
1247                 replace(nonVariantXpath, nonVariantXpath, variantValue);
1248             }
1249         });
1250 
1251         fixList.add('u', "fix duration unit patterns", new CLDRFilter() {
1252 
1253             public void handlePath(String xpath) {
1254                 if (!xpath.contains("/units")) {
1255                     return;
1256                 }
1257                 if (!xpath.contains("/durationUnitPattern")) {
1258                     return;
1259                 }
1260 
1261                 String value = cldrFileToFilter.getStringValue(xpath);
1262                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1263 
1264                 parts.set(fullXPath);
1265 
1266                 String unittype = parts.findAttributeValue("durationUnit", "type");
1267 
1268                 String newFullXpath = "//ldml/units/durationUnit[@type=\"" + unittype + "\"]/durationUnitPattern";
1269                 replace(fullXPath, newFullXpath, value, "converting to new duration unit structure");
1270             }
1271         });
1272 
1273         fixList.add('a', "Fix 0/1", new CLDRFilter() {
1274             final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
1275             PluralInfo info;
1276 
1277             @Override
1278             public void handleStart() {
1279                 info = SupplementalDataInfo.getInstance().getPlurals(super.localeID);
1280             }
1281 
1282             @Override
1283             public void handlePath(String xpath) {
1284 
1285                 if (xpath.indexOf("count") < 0) {
1286                     return;
1287                 }
1288                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1289                 parts.set(fullpath);
1290                 String countValue = parts.getAttributeValue(-1, "count");
1291                 if (!DIGITS.containsAll(countValue)) {
1292                     return;
1293                 }
1294                 int intValue = Integer.parseInt(countValue);
1295                 Count count = info.getCount(intValue);
1296                 parts.setAttribute(-1, "count", count.toString());
1297                 String newPath = parts.toString();
1298                 String oldValue = cldrFileToFilter.getStringValue(newPath);
1299                 String value = cldrFileToFilter.getStringValue(xpath);
1300                 if (oldValue != null) {
1301                     String fixed = oldValue.replace("{0}", countValue);
1302                     if (value.equals(oldValue)
1303                         || value.equals(fixed)) {
1304                         remove(fullpath, "Superfluous given: "
1305                             + count + "→«" + oldValue + "»");
1306                     } else {
1307                         remove(fullpath, "Can’t replace: "
1308                             + count + "→«" + oldValue + "»");
1309                     }
1310                     return;
1311                 }
1312                 replace(fullpath, newPath, value, "Moving 0/1");
1313             }
1314         });
1315 
1316         fixList.add('b', "Prep for bulk import", new CLDRFilter() {
1317 
1318             public void handlePath(String xpath) {
1319 
1320                 if (!options[USER].doesOccur) return;
1321                 String userID = options[USER].value;
1322                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1323                 String value = cldrFileToFilter.getStringValue(xpath);
1324                 parts.set(fullpath);
1325                 parts.addAttribute("draft", "unconfirmed");
1326                 parts.addAttribute("alt", "proposed-u" + userID + "-implicit1.8");
1327                 String newPath = parts.toString();
1328                 replace(fullpath, newPath, value);
1329             }
1330         });
1331 
1332         fixList.add('c', "Fix transiton from an old currency code to a new one", new CLDRFilter() {
1333             public void handlePath(String xpath) {
1334                 String oldCurrencyCode = "VEF";
1335                 String newCurrencyCode = "VES";
1336                 int fromDate = 2008;
1337                 int toDate = 2018;
1338                 String leadingParenString = " (";
1339                 String trailingParenString = ")";
1340                 String separator = "\u2013";
1341                 String languageTag = "root";
1342 
1343                 if (xpath.indexOf("/currency[@type=\"" + oldCurrencyCode + "\"]/displayName") < 0) {
1344                     return;
1345                 }
1346                 String value = cldrFileToFilter.getStringValue(xpath);
1347                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1348                 String newFullXPath = fullXPath.replace(oldCurrencyCode, newCurrencyCode);
1349                 cldrFileToFilter.add(newFullXPath, value);
1350 
1351                 // Exceptions for locales that use an alternate numbering system or a different format for the dates at
1352                 // the end.
1353                 // Add additional ones as necessary
1354                 String localeID = cldrFileToFilter.getLocaleID();
1355                 if (localeID.equals("ne")) {
1356                     languageTag = "root-u-nu-deva";
1357                 } else if (localeID.equals("bn")) {
1358                     languageTag = "root-u-nu-beng";
1359                 } else if (localeID.equals("ar")) {
1360                     leadingParenString = " - ";
1361                     trailingParenString = "";
1362                 } else if (localeID.equals("fa")) {
1363                     languageTag = "root-u-nu-arabext";
1364                     separator = Utility.unescape(" \\u062A\\u0627 ");
1365                 }
1366 
1367                 NumberFormat nf = NumberFormat.getInstance(ULocale.forLanguageTag(languageTag));
1368                 nf.setGroupingUsed(false);
1369 
1370                 String tagString = leadingParenString + nf.format(fromDate) + separator + nf.format(toDate)
1371                     + trailingParenString;
1372 
1373                 replace(fullXPath, fullXPath, value + tagString);
1374 
1375             }
1376         });
1377 
1378         fixList.add('p', "input-processor", new CLDRFilter() {
1379             private DisplayAndInputProcessor inputProcessor;
1380 
1381             public void handleStart() {
1382                 inputProcessor = new DisplayAndInputProcessor(cldrFileToFilter, true);
1383             }
1384 
1385             public void handleEnd() {
1386                 inputProcessor = null; // clean up, just in case
1387             }
1388 
1389             public void handlePath(String xpath) {
1390                 String value = cldrFileToFilter.getStringValue(xpath);
1391                 if (!value.equals(value.trim())) {
1392                     value = value; // for debugging
1393                 }
1394                 String newValue = inputProcessor.processInput(xpath, value, null);
1395                 if (value.equals(newValue)) {
1396                     return;
1397                 }
1398                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1399                 replace(fullXPath, fullXPath, newValue);
1400             }
1401         });
1402 
1403         fixList.add('t', "Fix missing count values groups", new CLDRFilter() {
1404 
1405             public void handlePath(String xpath) {
1406                 if (xpath.indexOf("@count=\"other\"") < 0) {
1407                     return;
1408                 }
1409 
1410                 String value = cldrFileToFilter.getStringValue(xpath);
1411                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1412                 String[] missingCounts = { "one" };
1413                 for (String count : missingCounts) {
1414                     String newFullXPath = fullXPath.replace("other", count);
1415                     if (cldrFileToFilter.getWinningValue(newFullXPath) == null) {
1416                         add(newFullXPath, value, "Adding missing plural form");
1417                     }
1418                 }
1419 
1420             }
1421         });
1422 
1423         fixList.add('f', "NFC (all but transforms, exemplarCharacters, pc, sc, tc, qc, ic)", new CLDRFilter() {
1424             public void handlePath(String xpath) {
1425                 if (xpath.indexOf("/segmentation") >= 0
1426                     || xpath.indexOf("/transforms") >= 0
1427                     || xpath.indexOf("/exemplarCharacters") >= 0
1428                     || xpath.indexOf("/pc") >= 0
1429                     || xpath.indexOf("/sc") >= 0
1430                     || xpath.indexOf("/tc") >= 0
1431                     || xpath.indexOf("/qc") >= 0
1432                     || xpath.indexOf("/ic") >= 0) return;
1433                 String value = cldrFileToFilter.getStringValue(xpath);
1434                 String nfcValue = Normalizer.compose(value, false);
1435                 if (value.equals(nfcValue)) return;
1436                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1437                 replace(fullXPath, fullXPath, nfcValue);
1438             }
1439         });
1440 
1441         fixList.add('v', "remove illegal codes", new CLDRFilter() {
1442 
1443             /*
1444              * Set legalCurrencies;
1445              * }
1446              * {
1447              * StandardCodes sc = StandardCodes.make();
1448              * legalCurrencies = new TreeSet(sc.getAvailableCodes("currency"));
1449              * // first remove non-ISO
1450              * for (Iterator it = legalCurrencies.iterator(); it.hasNext();) {
1451              * String code = (String) it.next();
1452              * List data = sc.getFullData("currency", code);
1453              * if ("X".equals(data.get(3))) it.remove();
1454              * }
1455              * }
1456              */
1457             StandardCodes sc = StandardCodes.make();
1458             String[] codeTypes = { "language", "script", "territory", "currency" };
1459 
1460             public void handlePath(String xpath) {
1461                 if (xpath.indexOf("/currency") < 0
1462                     && xpath.indexOf("/timeZoneNames") < 0
1463                     && xpath.indexOf("/localeDisplayNames") < 0) return;
1464                 parts.set(xpath);
1465                 String code;
1466                 for (int i = 0; i < codeTypes.length; ++i) {
1467                     code = parts.findAttributeValue(codeTypes[i], "type");
1468                     if (code != null) {
1469                         if (!sc.getGoodAvailableCodes(codeTypes[i]).contains(code)) remove(xpath);
1470                         return;
1471                     }
1472                 }
1473                 code = parts.findAttributeValue("zone", "type");
1474                 if (code != null) {
1475                     if (code.indexOf("/GMT") >= 0) remove(xpath);
1476                 }
1477 
1478             }
1479         });
1480 
1481         if (false) fixList.add('q', "fix exemplars", new CLDRFilter() {
1482             Collator col;
1483             Collator spaceCol;
1484             UnicodeSet uppercase = new UnicodeSet("[[:Uppercase:]-[\u0130]]");
1485             UnicodeSetIterator usi = new UnicodeSetIterator();
1486 
1487             public void handleStart() {
1488                 String locale = cldrFileToFilter.getLocaleID();
1489                 col = Collator.getInstance(new ULocale(locale));
1490                 spaceCol = Collator.getInstance(new ULocale(locale));
1491                 spaceCol.setStrength(col.PRIMARY);
1492             }
1493 
1494             public void handlePath(String xpath) {
1495                 if (xpath.indexOf("/exemplarCharacters") < 0) return;
1496                 String value = cldrFileToFilter.getStringValue(xpath);
1497                 try {
1498                     String fixedValue = value.replaceAll("- ", "-"); // TODO fix hack
1499                     if (!fixedValue.equals(value)) {
1500                         System.out.println("Changing: " + value);
1501                     }
1502                     fixedValue = "[" + fixedValue + "]"; // add parens in case forgotten
1503                     UnicodeSet s1 = new UnicodeSet(fixedValue).removeAll(uppercase);
1504                     UnicodeSet s = new UnicodeSet();
1505                     for (usi.reset(s1); usi.next();) {
1506                         s.add(Normalizer.compose(usi.getString(), false));
1507                     }
1508 
1509                     String fixedExemplar1 = new UnicodeSetPrettyPrinter()
1510                         .setOrdering(col != null ? col : Collator.getInstance(ULocale.ROOT))
1511                         .setSpaceComparator(col != null ? col : Collator.getInstance(ULocale.ROOT)
1512                             .setStrength2(Collator.PRIMARY))
1513                         .setCompressRanges(true)
1514                         .format(s);
1515 
1516                     if (!value.equals(fixedExemplar1)) {
1517                         String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1518                         replace(fullXPath, fullXPath, fixedExemplar1);
1519                     }
1520                 } catch (RuntimeException e) {
1521                     System.out.println("Illegal UnicodeSet: " + cldrFileToFilter.getLocaleID() + "\t" + value);
1522                 }
1523             }
1524         });
1525 
1526         fixList.add('w', "fix alt='...proposed' when there is no alternative", new CLDRFilter() {
1527             private XPathParts parts = new XPathParts();
1528             private Set<String> newFullXPathSoFar = new HashSet<String>();
1529 
1530             public void handlePath(String xpath) {
1531                 if (xpath.indexOf("proposed") < 0) return;
1532                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1533                 String newFullXPath = parts.set(fullXPath).removeProposed().toString();
1534                 // now see if there is an uninherited value
1535                 String value = cldrFileToFilter.getStringValue(xpath);
1536                 String baseValue = cldrFileToFilter.getStringValue(newFullXPath);
1537                 if (baseValue != null) {
1538                     // if the value AND the fullxpath are the same as what we have, then delete
1539                     if (value.equals(baseValue)) {
1540                         String baseFullXPath = cldrFileToFilter.getFullXPath(newFullXPath);
1541                         if (baseFullXPath.equals(newFullXPath)) {
1542                             remove(xpath, "alt=base");
1543                         }
1544                     }
1545                     return; // there is, so skip
1546                 }
1547                 // there isn't, so modif if we haven't done so already
1548                 if (!newFullXPathSoFar.contains(newFullXPath)) {
1549                     replace(fullXPath, newFullXPath, value);
1550                     newFullXPathSoFar.add(newFullXPath);
1551                 }
1552             }
1553         });
1554 
1555 //        fixList.add('l', "Remove losing items", new CLDRFilter() {
1556 //            public void handlePath(String xpath) {
1557 //                String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1558 //                if (fullXPath.indexOf("proposed-x10") < 0) return;
1559 //                if (fullXPath.indexOf("unconfirmed") < 0) return;
1560 //                remove(fullXPath, "Losing item");
1561 //            }
1562 //        });
1563 
1564         if (false) fixList.add('z', "fix ZZ", new CLDRFilter() {
1565             public void handlePath(String xpath) {
1566                 if (xpath.indexOf("/exemplarCharacters") < 0) return;
1567                 String value = cldrFileToFilter.getStringValue(xpath);
1568                 if (value.indexOf("[:") < 0) return;
1569                 UnicodeSet s = new UnicodeSet(value);
1570                 s.add(0xFFFF);
1571                 s.remove(0xFFFF); // force flattening
1572                 // at this point, we only have currency formats
1573                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1574                 replace(fullXPath, fullXPath, s.toPattern(false));
1575             }
1576         });
1577 
1578         // fixList.add('z', "GenerateIndex", new CLDRFilter() {
1579         // @Override
1580         // public void handleStart() {
1581         // // TODO Auto-generated method stub
1582         // super.handleStart();
1583         // if (cldrFileToFilter.getExemplarSet("", WinningChoice.WINNING) != null) {
1584         // String indexPattern = GenerateIndexCharacters.getConstructedIndexSet(cldrFileToFilter.getLocaleID(),
1585         // cldrFileToFilter);
1586         // replace("//ldml/characters/exemplarCharacters[@type=\"index\"][@draft=\"unconfirmed\"]",
1587         // "//ldml/characters/exemplarCharacters[@type=\"index\"][@draft=\"unconfirmed\"]", indexPattern);
1588         // }
1589         // }
1590         // public void handlePath(String xpath) {
1591         // return;
1592         // }
1593         // });
1594 
1595         // fixList.add('k', "fix kk/KK", new CLDRFilter() {
1596         // DateTimePatternGenerator dtpg;
1597         // DateTimePatternGenerator.PatternInfo patternInfo = new DateTimePatternGenerator.PatternInfo();
1598         // DateTimePatternGenerator.FormatParser fp = new DateTimePatternGenerator.FormatParser();
1599         // Set dateFormatItems = new TreeSet();
1600         // Set standardFormats = new TreeSet();
1601         //
1602         // public void handleStart() {
1603         // dtpg = DateTimePatternGenerator.getEmptyInstance(); // should add clear()
1604         // dateFormatItems.clear();
1605         // standardFormats.clear();
1606         // }
1607         //
1608         // // <dateFormatItem id="KKmm" alt="proposed-u133-2" draft="provisional">hh:mm a</dateFormatItem>
1609         // public void handlePath(String xpath) {
1610         // if (xpath.indexOf("/dateFormatItem") >= 0) {
1611         // System.out.println(cldrFileToFilter.getStringValue(xpath) + "\t" + xpath);
1612         // dateFormatItems.add(xpath);
1613         // }
1614         // if (xpath.indexOf("gregorian") >= 0 && xpath.indexOf("pattern") >= 0) {
1615         // if (xpath.indexOf("dateFormat") >= 0 || xpath.indexOf("timeFormat") >= 0) {
1616         // standardFormats.add(xpath);
1617         // }
1618         // }
1619         // }
1620         // public void handleEnd() {
1621         // //if (dateFormatItems.size() == 0) return; // nothing to do
1622         //
1623         // // now add all the standard patterns
1624         // // algorithmically construct items from the standard formats
1625         //
1626         // Set standardSkeletons = new HashSet();
1627         // List items = new ArrayList();
1628         // for (Iterator it = standardFormats.iterator(); it.hasNext();) {
1629         // String xpath = (String) it.next();
1630         // String value = cldrFileToFilter.getStringValue(xpath);
1631         // dtpg.addPattern(value, false, patternInfo);
1632         // standardSkeletons.add(dtpg.getSkeleton(value));
1633         // if (false) { // code for adding guesses
1634         // fp.set(value);
1635         // items.clear();
1636         // fp.getAutoPatterns(value, items);
1637         // for (int i = 0; i < items.size(); ++i) {
1638         // String autoItem = (String)items.get(i);
1639         // dtpg.addPattern(autoItem, false, patternInfo);
1640         // if (patternInfo.status == patternInfo.OK) show("generate", value + " ==> " + autoItem);
1641         // }
1642         // }
1643         // retain(xpath, "-(std)");
1644         // }
1645         //
1646         // for (Iterator it = dateFormatItems.iterator(); it.hasNext();) {
1647         // String xpath = (String) it.next();
1648         // String value = cldrFileToFilter.getStringValue(xpath);
1649         // String oldValue = value;
1650         //
1651         // String skeleton = dtpg.getSkeleton(value);
1652         // // remove if single field
1653         // if (dtpg.isSingleField(skeleton)) {
1654         // remove(xpath, "Single Field");
1655         // continue;
1656         // }
1657         // // remove if date + time
1658         // fp.set(value);
1659         // // the following use fp, so make sure it is set
1660         //
1661         // if (fp.hasDateAndTimeFields()) {
1662         // remove(xpath, "Date + Time");
1663         // continue;
1664         // }
1665         //
1666         // if (containsSS()) {
1667         // remove(xpath, "SS");
1668         // continue;
1669         // }
1670         //
1671         // // see if we have a k or K & fix
1672         // value = fixKk(xpath, value);
1673         //
1674         // dtpg.addPattern(value, false, patternInfo);
1675         //
1676         // // // in case we changed value
1677         // // skeleton = dtpg.getSkeleton(value);
1678         // // String fullPath = cldrFileToFilter.getFullXPath(xpath);
1679         // // String oldFullPath = fullPath;
1680         // // parts.set(fullPath);
1681         // // Map attributes = parts.getAttributes(-1);
1682         // // String id = (String)attributes.get("id");
1683         // //
1684         // // // fix the ID
1685         // // if (!id.equals(skeleton)) {
1686         // // attributes.put("id", skeleton);
1687         // // fullPath = parts.toString();
1688         // // }
1689         // //
1690         // // // make the change
1691         // // boolean differentPath = !fullPath.equals(oldFullPath);
1692         // // if (differentPath || !value.equals(oldValue)) {
1693         // // String reason = "Fixed value";
1694         // // if (differentPath) {
1695         // // reason = "Fixed id";
1696         // // String collisionValue = cldrFileToFilter.getStringValue(fullPath);
1697         // // if (collisionValue != null) {
1698         // // if (!value.equals(collisionValue)) {
1699         // // System.out.println("Collision: not changing " + fullPath
1700         // // + " =\t " + value + ", old: " + collisionValue);
1701         // // }
1702         // // //skip if there was an old item with a different id
1703         // // remove(oldFullPath, "ID collision");
1704         // // return;
1705         // // }
1706         // // }
1707         // // replace(oldFullPath, fullPath, value, reason);
1708         // // }
1709         // }
1710         //
1711         // // make a minimal set
1712         // Map skeleton_patterns = dtpg.getSkeletons(null);
1713         //
1714         // Collection redundants = dtpg.getRedundants(null);
1715         // for (Iterator it = redundants.iterator(); it.hasNext();) {
1716         // String skeleton = dtpg.getSkeleton((String) it.next());
1717         // skeleton_patterns.remove(skeleton);
1718         // }
1719         // // remove all the standard IDs
1720         // for (Iterator it = standardSkeletons.iterator(); it.hasNext();) {
1721         // String standardSkeleton = (String) it.next();
1722         // skeleton_patterns.remove(standardSkeleton);
1723         // }
1724         // // Now add them all back in. Preserve old paths if possible
1725         // for (Iterator it = dateFormatItems.iterator(); it.hasNext();) {
1726         // String xpath = (String) it.next();
1727         // String oldValue = cldrFileToFilter.getStringValue(xpath);
1728         // String oldFullPath = cldrFileToFilter.getFullXPath(xpath);
1729         // String newFullPath = oldFullPath;
1730         // parts.set(newFullPath);
1731         // Map attributes = parts.getAttributes(-1);
1732         // String id = (String)attributes.get("id");
1733         // String newValue = (String) skeleton_patterns.get(id);
1734         // if (newValue == null) {
1735         // remove(xpath, "redundant");
1736         // continue;
1737         // }
1738         // String draft = (String)attributes.get("draft");
1739         // if (draft == null || draft.equals("approved")) {
1740         // attributes.put("draft", "provisional");
1741         // newFullPath = parts.toString();
1742         // }
1743         // if (oldValue.equals(newValue) && newFullPath.equals(oldFullPath)) {
1744         // retain(xpath, "-");
1745         // skeleton_patterns.remove(id);
1746         // continue; // skip, they are the same
1747         // }
1748         // // not redundant, but altered
1749         // replace(oldFullPath, newFullPath, newValue, "fixed");
1750         // skeleton_patterns.remove(id);
1751         // }
1752         // parts.set("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/" +
1753         // "dateFormatItem");
1754         // Map attributes = parts.getAttributes(-1);
1755         // //attributes.put("alt", "proposed-666");
1756         // attributes.put("draft", "provisional");
1757         // for (Iterator it = skeleton_patterns.keySet().iterator(); it.hasNext();) {
1758         // String skeleton = (String)it.next();
1759         // String pattern = (String)skeleton_patterns.get(skeleton);
1760         // attributes.put("id", skeleton);
1761         // String fullPath = parts.toString();
1762         // replace(fullPath, fullPath, pattern);
1763         // }
1764         // }
1765         //
1766         // private String fixKk(String xpath, String value) {
1767         // List fields = fp.getItems();
1768         // for (int i = 0; i < fields.size(); ++i) {
1769         // Object field = fields.get(i);
1770         // if (field instanceof DateTimePatternGenerator.VariableField) {
1771         // char first = field.toString().charAt(0);
1772         // String replacement = null;
1773         // if (first == 'k') replacement = "H";
1774         // else if (first == 'K') replacement = "h";
1775         // if (replacement != null) {
1776         // field = new DateTimePatternGenerator.VariableField(Utility.repeat(replacement, field.toString().length()));
1777         // fields.set(i, field);
1778         // }
1779         // }
1780         // }
1781         // String newValue = fp.toString();
1782         // if (!value.equals(newValue)) {
1783         // remove(xpath, value + " => " + newValue);
1784         // }
1785         // return newValue;
1786         // }
1787         //
1788         // private boolean containsSS() {
1789         // List fields = fp.getItems();
1790         // for (int i = 0; i < fields.size(); ++i) {
1791         // Object field = fields.get(i);
1792         // if (field instanceof DateTimePatternGenerator.VariableField) {
1793         // char first = field.toString().charAt(0);
1794         // if (first == 'S') return true;
1795         // }
1796         // }
1797         // return false;
1798         // }
1799         // });
1800         /*
1801          * Fix id to be identical to skeleton
1802          * Eliminate any single-field ids
1803          * Add "L" (stand-alone month), "?" (other stand-alones)
1804          * Remove any fields with both a date and a time
1805          * Test that datetime format is valid format (will have to fix by hand)
1806          * Map k, K to H, h
1807          *
1808          * In Survey Tool: don't show id; compute when item added or changed
1809          * test validity
1810          */
1811 
1812         fixList.add('d', "fix dates", new CLDRFilter() {
1813             DateTimePatternGenerator dateTimePatternGenerator = DateTimePatternGenerator.getEmptyInstance();
1814             DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
1815             Map<String, Set<String>> seenSoFar = new HashMap<String, Set<String>>();
1816 
1817             public void handleStart() {
1818                 seenSoFar.clear();
1819             }
1820 
1821             public void handlePath(String xpath) {
1822                 // timeFormatLength type="full"
1823                 if (xpath.contains("timeFormatLength") && xpath.contains("full")) {
1824                     String fullpath = cldrFileToFilter.getFullXPath(xpath);
1825                     String value = cldrFileToFilter.getStringValue(xpath);
1826                     boolean gotChange = false;
1827                     List<Object> list = formatParser.set(value).getItems();
1828                     for (int i = 0; i < list.size(); ++i) {
1829                         Object item = list.get(i);
1830                         if (item instanceof DateTimePatternGenerator.VariableField) {
1831                             String itemString = item.toString();
1832                             if (itemString.charAt(0) == 'z') {
1833                                 list.set(i, new VariableField(Utility.repeat("v", itemString.length())));
1834                                 gotChange = true;
1835                             }
1836                         }
1837                     }
1838                     if (gotChange) {
1839                         String newValue = toStringWorkaround();
1840                         if (value != newValue) {
1841                             replace(xpath, fullpath, newValue);
1842                         }
1843                     }
1844                 }
1845                 if (xpath.indexOf("/availableFormats") < 0) return;
1846                 String value = cldrFileToFilter.getStringValue(xpath);
1847                 if (value == null) return; // not in current file
1848 
1849                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1850                 fullparts.set(fullpath);
1851 
1852                 Map<String, String> attributes = fullparts.findAttributes("dateFormatItem");
1853                 String id = attributes.get("id");
1854                 String oldID = id;
1855                 try {
1856                     id = dateTimePatternGenerator.getBaseSkeleton(id);
1857                     if (id.equals(oldID)) return;
1858                     System.out.println(oldID + " => " + id);
1859                 } catch (RuntimeException e) {
1860                     id = "[error]";
1861                     return;
1862                 }
1863 
1864                 attributes.put("id", id);
1865                 totalSkeletons.add(id);
1866 
1867                 replace(xpath, fullparts.toString(), value);
1868             }
1869 
1870             private String toStringWorkaround() {
1871                 StringBuffer result = new StringBuffer();
1872                 List<Object> items = formatParser.getItems();
1873                 for (int i = 0; i < items.size(); ++i) {
1874                     Object item = items.get(i);
1875                     if (item instanceof String) {
1876                         result.append(formatParser.quoteLiteral((String) items.get(i)));
1877                     } else {
1878                         result.append(items.get(i).toString());
1879                     }
1880                 }
1881                 return result.toString();
1882             }
1883 
1884         });
1885 
1886         fixList.add('y', "fix years to be y (with exceptions)", new CLDRFilter() {
1887             DateTimeCanonicalizer dtc = new DateTimeCanonicalizer(true);
1888 
1889             DateTimePatternGenerator dateTimePatternGenerator = DateTimePatternGenerator.getEmptyInstance();
1890             DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
1891             Map<String, Set<String>> seenSoFar = new HashMap<String, Set<String>>();
1892 
1893             public void handleStart() {
1894                 seenSoFar.clear();
1895             }
1896 
1897             public void handlePath(String xpath) {
1898                 DateTimePatternType datetimePatternType = DateTimePatternType.fromPath(xpath);
1899 
1900                 // check to see if we need to change the value
1901 
1902                 if (!DateTimePatternType.STOCK_AVAILABLE_INTERVAL_PATTERNS.contains(datetimePatternType)) {
1903                     return;
1904                 }
1905                 String oldValue = cldrFileToFilter.getStringValue(xpath);
1906                 String value = dtc.getCanonicalDatePattern(xpath, oldValue, datetimePatternType);
1907 
1908                 String fullPath = cldrFileToFilter.getFullXPath(xpath);
1909                 // Deleted code to canonicalize id for availableFormats items (cldrbug 5760)
1910 
1911                 if (value.equals(oldValue)) {
1912                     return;
1913                 }
1914 
1915                 // made it through the gauntlet, so replace
1916 
1917                 replace(xpath, fullPath, value);
1918             }
1919         });
1920 
1921         // This should only be applied to specific locales, and the results checked manually afterward.
1922         // It will only create ranges using the same digits as in root, not script-specific digits.
1923         // Any pre-existing year ranges should use the range marker from the intervalFormats "y" item.
1924         // This make several assumptions and is somewhat *FRAGILE*.
1925         fixList.add('j', "add year ranges from root to Japanese calendar eras", new CLDRFilter() {
1926             private CLDRFile rootFile;
1927 
1928             public void handleStart() {
1929                 rootFile = factory.make("root", false);
1930             }
1931 
1932             public void handlePath(String xpath) {
1933                 // Skip paths we don't care about
1934                 if (xpath.indexOf("/calendar[@type=\"japanese\"]/eras/era") < 0) return;
1935                 // Get root name for the era, check it
1936                 String rootEraValue = rootFile.getStringValue(xpath);
1937                 int rootEraIndex = rootEraValue.indexOf(" (");
1938                 if (rootEraIndex < 0) return; // this era does not have a year range in root, no need to add one in this
1939                 // locale
1940                 // Get range marker from intervalFormat range for y
1941                 String yearIntervalFormat = cldrFileToFilter
1942                     .getStringValue(
1943                         "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"y\"]/greatestDifference[@id=\"y\"]");
1944                 if (yearIntervalFormat == null) return; // oops, no intervalFormat data for y
1945                 String rangeMarker = yearIntervalFormat.replaceAll("[.y\u5E74\uB144]", ""); // *FRAGILE* strip out
1946                 // everything except the
1947                 // range-indicating part
1948                 // Get current locale name for this era, check it
1949                 String eraValue = cldrFileToFilter.getStringValue(xpath);
1950                 if (eraValue.indexOf('(') >= 0 && eraValue.indexOf(rangeMarker) >= 0) return; // this eraValue already
1951                 // has a year range that
1952                 // uses the appropriate
1953                 // rangeMarker
1954                 // Now update the root year range it with the rangeMarker for this locale, and append it to this
1955                 // locale's name
1956                 String rootYearRange = rootEraValue.substring(rootEraIndex);
1957                 String appendYearRange = rootYearRange.replaceAll("[\u002D\u2013]", rangeMarker);
1958                 String newEraValue = eraValue.concat(appendYearRange);
1959                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1960                 replace(xpath, fullpath, newEraValue);
1961                 // System.out.println("CLDRModify fj: rootEraValue: \"" + rootEraValue + "\", eraValue: \"" + eraValue +
1962                 // "\", rangeMarker: \"" + rangeMarker + "\"");
1963             }
1964         });
1965 
1966         fixList.add('r', "fix references and standards", new CLDRFilter() {
1967             int currentRef = 500;
1968             Map<String, TreeMap<String, String>> locale_oldref_newref = new TreeMap<String, TreeMap<String, String>>();
1969             TreeMap<String, String> oldref_newref;
1970 
1971             //LanguageTagParser ltp = new LanguageTagParser();
1972 
1973             // References standards = new References(true);
1974             // References references = new References(false);
1975 
1976             public void handleStart() {
1977                 String locale = cldrFileToFilter.getLocaleID();
1978                 oldref_newref = locale_oldref_newref.get(locale);
1979                 if (oldref_newref == null) {
1980                     oldref_newref = new TreeMap<String, String>();
1981                     locale_oldref_newref.put(locale, oldref_newref);
1982                 }
1983             }
1984 
1985             // // Samples:
1986             // // <language type="ain" references="RP1">阿伊努文</language>
1987             // // <reference type="R1" uri="http://www.info.gov.hk/info/holiday_c.htm">二零零五年公眾假期刊登憲報</reference>
1988             public void handlePath(String xpath) {
1989                 // must be minimised for this to work.
1990                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1991                 if (!fullpath.contains("reference")) return;
1992                 String value = cldrFileToFilter.getStringValue(xpath);
1993                 fullparts.set(fullpath);
1994                 if ("reference".equals(fullparts.getElement(-1))) {
1995                     fixType(value, "type", fullpath);
1996                 } else if (fullparts.getAttributeValue(-1, "references") != null) {
1997                     fixType(value, "references", fullpath);
1998                 } else {
1999                     System.out.println("CLDRModify: Skipping: " + xpath);
2000                 }
2001             }
2002 
2003             private void fixType(String value, String type, String oldFullPath) {
2004                 String ref = fullparts.getAttributeValue(-1, type);
2005                 if (whitespace.containsSome(ref)) throw new IllegalArgumentException("Whitespace in references");
2006                 String newRef = getNewRef(ref);
2007                 fullparts.addAttribute(type, newRef);
2008                 replace(oldFullPath, fullparts.toString(), value);
2009             }
2010 
2011             private String getNewRef(String ref) {
2012                 String newRef = oldref_newref.get(ref);
2013                 if (newRef == null) {
2014                     newRef = String.valueOf(currentRef++);
2015                     newRef = "R" + Utility.repeat("0", (3 - newRef.length())) + newRef;
2016                     oldref_newref.put(ref, newRef);
2017                 }
2018                 return newRef;
2019             }
2020         });
2021 
2022         fixList.add('q', "fix annotation punctuation", new CLDRFilter() {
2023             @Override
2024             public void handlePath(String xpath) {
2025                 if (!xpath.contains("/annotation")) {
2026                     return;
2027                 }
2028                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
2029                 XPathParts parts = XPathParts.getInstance(fullpath);
2030                 String cp = parts.getAttributeValue(2, "cp");
2031                 String tts = parts.getAttributeValue(2, "tts");
2032                 String type = parts.getAttributeValue(2, "type");
2033                 if ("tts".equals(type)) {
2034                     return; // ok, skip
2035                 }
2036                 String hex = "1F600";
2037                 if (cp.startsWith("[")) {
2038                     UnicodeSet us = new UnicodeSet(cp);
2039                     if (us.size() == 1) {
2040                         cp = us.iterator().next();
2041                         hex = Utility.hex(cp);
2042                     } else {
2043                         hex = us.toString();
2044                     }
2045                     parts.putAttributeValue(2, "cp", cp);
2046                 }
2047                 parts.removeAttribute(2, "tts");
2048                 if (tts != null) {
2049                     String newTts = CldrUtility.join(COMMA_SEMI.splitToList(tts), ", ");
2050                     XPathParts parts2 = parts.cloneAsThawed();
2051                     parts2.putAttributeValue(2, "type", "tts");
2052                     add(parts2.toString(), newTts, "separate tts");
2053                 }
2054                 String value = cldrFileToFilter.getStringValue(xpath);
2055                 String newValue = CldrUtility.join(COMMA_SEMI.splitToList(value), " | ");
2056                 final String newFullPath = parts.toString();
2057                 Comments comments = cldrFileToFilter.getXpath_comments();
2058                 String comment = comments.removeComment(CommentType.PREBLOCK, xpath);
2059                 comment = hex + (comment == null ? "" : " " + comment);
2060                 comments.addComment(CommentType.PREBLOCK, newFullPath, comment);
2061                 if (!fullpath.equals(newFullPath) || !value.equals(newValue)) {
2062                     replace(fullpath, newFullPath, newValue);
2063                 }
2064             }
2065         });
2066 
2067         fixList.add('Q', "add annotation names to keywords", new CLDRFilter() {
2068             Set<String> available = Annotations.getAvailable();
2069             TreeSet<String> sorted = new TreeSet<>(Collator.getInstance(ULocale.ROOT));
2070             CLDRFile resolved;
2071 
2072             @Override
2073             public void handleStart() {
2074                 String localeID = cldrFileToFilter.getLocaleID();
2075                 if (!available.contains(localeID)) {
2076                     throw new IllegalArgumentException("no annotations available, probably wrong directory");
2077                 }
2078                 ;
2079                 resolved = factory.make(localeID, true);
2080             }
2081 
2082             @Override
2083             public void handlePath(String xpath) {
2084                 if (!xpath.contains("/annotation")) {
2085                     return;
2086                 }
2087                 //      <annotation cp="��">100 | honderd | persent | telling | vol</annotation>
2088                 //      <annotation cp="��" type="tts">honderd punte</annotation>
2089                 //      we will copy honderd punte into the list of keywords.
2090                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
2091                 XPathParts parts = XPathParts.getFrozenInstance(fullpath);
2092                 String type = parts.getAttributeValue(2, "type");
2093                 if (type == null) {
2094                     return; // no TTS, so keywords, skip
2095                 }
2096                 String name = cldrFileToFilter.getStringValue(xpath);
2097                 XPathParts keywordParts = parts.cloneAsThawed().removeAttribute(2, "type");
2098                 String keywordPath = keywordParts.toString();
2099                 String keywordValue = resolved.getStringValue(keywordPath);
2100                 String sourceLocaleId = resolved.getSourceLocaleID(keywordPath, null);
2101                 sorted.clear();
2102                 sorted.add(name);
2103                 List<String> items;
2104                 if (!sourceLocaleId.equals(XMLSource.ROOT_ID) && !sourceLocaleId.equals(XMLSource.CODE_FALLBACK_ID)) {
2105                     items = Annotations.splitter.splitToList(keywordValue);
2106                     sorted.addAll(items);
2107                 } else {
2108                     int debug = 0;
2109                 }
2110                 DisplayAndInputProcessor.filterCoveredKeywords(sorted);
2111                 String newKeywordValue = CollectionUtilities.join(sorted, " | ");
2112                 if (!newKeywordValue.equals(keywordValue)) {
2113                     replace(keywordPath, keywordPath, newKeywordValue);
2114                 }
2115             }
2116         });
2117 
2118         fixList.add('N', "add number symbols to exemplars", new CLDRFilter() {
2119             CLDRFile resolved;
2120             UnicodeSet numberStuff = new UnicodeSet();
2121             Set<String> seen = new HashSet<>();
2122             Set<String> hackAllowOnly = new HashSet<>();
2123             boolean skip = false;
2124 
2125             @Override
2126             public void handleStart() {
2127                 String localeID = cldrFileToFilter.getLocaleID();
2128                 resolved = factory.make(localeID, true);
2129                 numberStuff.clear();
2130                 seen.clear();
2131                 skip = localeID.equals("root");
2132                 // TODO add return value to handleStart to skip calling handlePath
2133 
2134                 if (NUMBER_SYSTEM_HACK) {
2135                     hackAllowOnly.clear();
2136                     for (NumberingSystem system : NumberingSystem.values()) {
2137                         String numberingSystem = system.path == null ? "latn" : cldrFileToFilter.getStringValue(system.path);
2138                         if (numberingSystem != null) {
2139                             hackAllowOnly.add(numberingSystem);
2140                         }
2141                     }
2142                     int debug = 0;
2143                 }
2144             }
2145 
2146             @Override
2147             public void handlePath(String xpath) {
2148                 // the following doesn't work without NUMBER_SYSTEM_HACK, because there are spurious numbersystems in the data.
2149                 // http://unicode.org/cldr/trac/ticket/10648
2150                 // so using a hack for now in handleEnd
2151                 if (skip || !xpath.startsWith("//ldml/numbers/symbols")) {
2152                     return;
2153                 }
2154 
2155                 // //ldml/numbers/symbols[@numberSystem="latn"]/exponential
2156                 parts = XPathParts.getFrozenInstance(xpath);
2157                 String system = parts.getAttributeValue(2, "numberSystem");
2158                 if (system == null) {
2159                     System.err.println("Bogus numberSystem:\t" + cldrFileToFilter.getLocaleID() + " \t" + xpath);
2160                     return;
2161                 } else if (seen.contains(system) || !hackAllowOnly.contains(system)) {
2162                     return;
2163                 }
2164                 seen.add(system);
2165                 UnicodeSet exemplars = resolved.getExemplarsNumeric(system);
2166                 System.out.println("# " + system + " ==> " + exemplars.toPattern(false));
2167                 for (String s : exemplars) {
2168                     numberStuff.addAll(s); // add individual characters
2169                 }
2170             }
2171 
2172             @Override
2173             public void handleEnd() {
2174                 if (!numberStuff.isEmpty()) {
2175                     UnicodeSet current = cldrFileToFilter.getExemplarSet(ExemplarType.numbers, WinningChoice.WINNING);
2176                     if (!numberStuff.equals(current)) {
2177                         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(cldrFileToFilter);
2178                         if (current != null && !current.isEmpty()) {
2179                             numberStuff.addAll(current);
2180                         }
2181                         String path = CLDRFile.getExemplarPath(ExemplarType.numbers);
2182                         String value = daip.getPrettyPrinter().format(numberStuff);
2183                         replace(path, path, value);
2184                     }
2185                 }
2186             }
2187         });
2188 
2189         fixList.add('k',
2190             "fix according to -k config file. Details on http://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config",
2191             new CLDRFilter() {
2192                 private Map<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> locale2keyValues;
2193                 private LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = new LinkedHashSet<Map<ConfigKeys, ConfigMatch>>();
2194                 private Matcher draftMatcher = Pattern.compile("\\[@draft=\"[^\"]+\"]").matcher("");
2195 
2196                 @Override
2197                 public void handleStart() {
2198                     super.handleStart();
2199                     if (!options[FIX].doesOccur || !options[FIX].value.equals("k")) {
2200                         return;
2201                     }
2202                     if (locale2keyValues == null) {
2203                         fillCache();
2204                     }
2205                     // set up for the specific locale we are dealing with.
2206                     // a small optimization
2207                     String localeId = getLocaleID();
2208                     keyValues.clear();
2209                     for (Entry<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> localeMatcher : locale2keyValues
2210                         .entrySet()) {
2211                         if (localeMatcher.getKey().matches(localeId)) {
2212                             keyValues.addAll(localeMatcher.getValue());
2213                         }
2214                     }
2215                     System.out.println("# Checking entries & adding:\t" + keyValues.size());
2216                     for (Map<ConfigKeys, ConfigMatch> entry : keyValues) {
2217                         ConfigMatch action = entry.get(ConfigKeys.action);
2218                         //ConfigMatch locale = entry.get(ConfigKeys.locale);
2219                         ConfigMatch pathMatch = entry.get(ConfigKeys.path);
2220                         ConfigMatch valueMatch = entry.get(ConfigKeys.value);
2221                         ConfigMatch newPath = entry.get(ConfigKeys.new_path);
2222                         ConfigMatch newValue = entry.get(ConfigKeys.new_value);
2223                         switch (action.action) {
2224                         // we add all the values up front
2225                         case addNew:
2226                         case add:
2227                             if (pathMatch != null || valueMatch != null || newPath == null || newValue == null) {
2228                                 throw new IllegalArgumentException(
2229                                     "Bad arguments, must have " +
2230                                         "path==null, value=null, new_path!=null, new_value!=null:\n\t"
2231                                         + entry);
2232                             }
2233                             String newPathString = newPath.getPath(cldrFileToFilter);
2234                             if (action.action == ConfigAction.add
2235                                 || cldrFileToFilter.getStringValue(newPathString) == null) {
2236                                 replace(newPathString, newPathString, newValue.exactMatch, "config");
2237                             }
2238                             break;
2239                         // we just check
2240                         case replace:
2241                             if ((pathMatch == null && valueMatch == null) || (newPath == null && newValue == null)) {
2242                                 throw new IllegalArgumentException(
2243                                     "Bad arguments, must have " +
2244                                         "(path!=null OR value=null) AND (new_path!=null OR new_value!=null):\n\t"
2245                                         + entry);
2246                             }
2247                             break;
2248                         // For delete, we just check; we'll remove later
2249                         case delete:
2250                             if (newPath != null || newValue != null) {
2251                                 throw new IllegalArgumentException("Bad arguments, must have " +
2252                                     "newPath=null, newValue=null"
2253                                     + entry);
2254                             }
2255                             break;
2256                         default: // fall through
2257                             throw new IllegalArgumentException("Internal Error");
2258                         }
2259                     }
2260                 }
2261 
2262                 private void fillCache() {
2263                     locale2keyValues = new LinkedHashMap<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>>();
2264                     String configFileName = options[KONFIG].value;
2265                     FileProcessor myReader = new FileProcessor() {
2266                         {
2267                             doHash = false;
2268                         }
2269 
2270                         @Override
2271                         protected boolean handleLine(int lineCount, String line) {
2272                             line = line.trim();
2273 //                            if (line.isEmpty()) {
2274 //                                return true;
2275 //                            }
2276                             String[] lineParts = line.split("\\s*;\\s*");
2277                             Map<ConfigKeys, ConfigMatch> keyValue = new EnumMap<ConfigKeys, ConfigMatch>(
2278                                 ConfigKeys.class);
2279                             for (String linePart : lineParts) {
2280                                 int pos = linePart.indexOf('=');
2281                                 if (pos < 0) {
2282                                     throw new IllegalArgumentException(lineCount + ":\t No = in command: «" + linePart + "» in " + line);
2283                                 }
2284                                 ConfigKeys key = ConfigKeys.valueOf(linePart.substring(0, pos).trim());
2285                                 if (keyValue.containsKey(key)) {
2286                                     throw new IllegalArgumentException("Must not have multiple keys: " + key);
2287                                 }
2288                                 String match = linePart.substring(pos + 1).trim();
2289                                 keyValue.put(key, new ConfigMatch(key, match));
2290                             }
2291                             final ConfigMatch locale = keyValue.get(ConfigKeys.locale);
2292                             if (locale == null || keyValue.get(ConfigKeys.action) == null) {
2293                                 throw new IllegalArgumentException();
2294                             }
2295 
2296                             LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = locale2keyValues
2297                                 .get(locale);
2298                             if (keyValues == null) {
2299                                 locale2keyValues.put(locale,
2300                                     keyValues = new LinkedHashSet<Map<ConfigKeys, ConfigMatch>>());
2301                             }
2302                             keyValues.add(keyValue);
2303                             return true;
2304                         }
2305                     };
2306                     myReader.process(CLDRModify.class, configFileName);
2307                 }
2308 
2309                 @Override
2310                 public void handlePath(String xpath) {
2311                     // slow method; could optimize
2312                     for (Map<ConfigKeys, ConfigMatch> entry : keyValues) {
2313                         ConfigMatch pathMatch = entry.get(ConfigKeys.path);
2314                         if (pathMatch != null && !pathMatch.matches(xpath)) {
2315                             continue;
2316                         }
2317                         ConfigMatch valueMatch = entry.get(ConfigKeys.value);
2318                         String value = cldrFileToFilter.getStringValue(xpath);
2319                         if (valueMatch != null && !valueMatch.matches(value)) {
2320                             continue;
2321                         }
2322                         ConfigMatch action = entry.get(ConfigKeys.action);
2323                         switch (action.action) {
2324                         case delete:
2325                             remove(xpath, "config");
2326                             break;
2327                         case replace:
2328                             ConfigMatch newPath = entry.get(ConfigKeys.new_path);
2329                             ConfigMatch newValue = entry.get(ConfigKeys.new_value);
2330 
2331                             String fullpath = cldrFileToFilter.getFullXPath(xpath);
2332                             String draft = "";
2333                             int loc = fullpath.indexOf("[@draft=");
2334                             if (loc >= 0) {
2335                                 int loc2 = fullpath.indexOf(']', loc + 7);
2336                                 draft = fullpath.substring(loc, loc2 + 1);
2337                             }
2338 
2339                             String modPath = ConfigMatch.getModified(pathMatch, xpath, newPath) + draft;
2340                             String modValue = ConfigMatch.getModified(valueMatch, value, newValue);
2341                             replace(xpath, modPath, modValue, "config");
2342                         }
2343                     }
2344                 }
2345             });
2346 
2347         // fixList.add('q', "fix numbering system", new CLDRFilter() {
2348         // private final UnicodeSet dotEquivalents =(UnicodeSet) new UnicodeSet("[..․﹒ 。。︒۔٬]").freeze();
2349         // private final UnicodeSet commaEquivalents = (UnicodeSet) new UnicodeSet("[,,﹐ ، ٫ 、﹑、،]").freeze();
2350         // private final UnicodeSet apostropheEquivalent = (UnicodeSet) new UnicodeSet("[︐︑ '' ‘ ’ ]").freeze();
2351         // private final UnicodeSet spaces = (UnicodeSet) new UnicodeSet("[:whitespace:]").freeze();
2352         //
2353         // private final UnicodeSet ALLOWED_IN_NUMBER_SYMBOLS = (UnicodeSet) new
2354         // UnicodeSet("[\\u0000-\\u00FF ’ ‰ ∞ −]").freeze();
2355         //
2356         // private final UnicodeMap map = new UnicodeMap();
2357         // {
2358         // map.putAll(dotEquivalents, ".");
2359         // map.putAll(commaEquivalents, ",");
2360         // map.putAll(apostropheEquivalent, "’");
2361         // map.putAll(spaces, "\u00a0");
2362         // map.put('٪', "%");
2363         // map.put('؛', ";");
2364         // map.put('؉', "‰");
2365         // map.putAll(new UnicodeSet("\\p{dash}"), "-");
2366         // }
2367         //
2368         // private String system;
2369         // private CLDRFile resolved;
2370         //
2371         // /*
2372         // <decimal>.</decimal>
2373         // <group>,</group>
2374         // <list>;</list>
2375         // <percentSign>%</percentSign>
2376         // <nativeZeroDigit>0</nativeZeroDigit>
2377         // <patternDigit>#</patternDigit>
2378         // <plusSign>+</plusSign>
2379         // <minusSign>-</minusSign>
2380         // <exponential>E</exponential>
2381         // <perMille>‰</perMille>
2382         // <infinity>∞</infinity>
2383         // <nan>NaN</nan>
2384         // */
2385         // public void handleStart() {
2386         // resolved = cldrFileToFilter.make(cldrFileToFilter.getLocaleID(), true);
2387         // system = "????";
2388         // String zero = resolved.getStringValue("//ldml/numbers/symbols/nativeZeroDigit");
2389         // int firstChar = zero.codePointAt(0);
2390         // switch(firstChar) {
2391         // case '0': system = "????"; break;
2392         // case '٠': system = "arab"; break;
2393         // case '۰': system = "arabext"; break;
2394         // default:
2395         // int script = UScript.getScript(zero.codePointAt(0));
2396         // if (script != UScript.UNKNOWN) {
2397         // system = UScript.getShortName(script).toLowerCase(Locale.ENGLISH);
2398         // }
2399         // break;
2400         // }
2401         // }
2402         // public void handlePath(String xpath) {
2403         // String fullpath = cldrFileToFilter.getFullXPath(xpath);
2404         // if (!fullpath.startsWith("//ldml/numbers/symbols/")) return;
2405         // String value = cldrFileToFilter.getStringValue(xpath);
2406         // if (ALLOWED_IN_NUMBER_SYMBOLS.contains(value)) return;
2407         // parts.set(xpath);
2408         // String alt = parts.getAttributeValue(-1, "alt");
2409         // if (alt != null) {
2410         // show("*** Non-empty alt on " + xpath + "\t\t" + value,"???");
2411         // return;
2412         // }
2413         // String last = parts.getElement(-1);
2414         // String newValue = getLatinSeparator(value, last);
2415         // if (newValue == null) {
2416         // throw new IllegalArgumentException("Can't handle " + xpath + "\t\t" + value);
2417         // }
2418         // if (newValue.equals(value)) {
2419         // return;
2420         // }
2421         // replace(fullpath, fullpath, newValue);
2422         // parts.set(fullpath);
2423         // parts.addAttribute("alt", system);
2424         // String newPath = parts.toString();
2425         // replace(newPath, newPath, value);
2426         // }
2427         //
2428         // String getLatinSeparator(String value, String last) {
2429         // String newValue = map.transform(value);
2430         // if (ALLOWED_IN_NUMBER_SYMBOLS.containsAll(newValue)) {
2431         // return newValue;
2432         // }
2433         // if (last.equals("nativeZeroDigit")) {
2434         // return "0";
2435         // }
2436         // if (last.equals("exponential")) {
2437         // return "E";
2438         // }
2439         // if (last.equals("nan")) {
2440         // return "NaN";
2441         // }
2442         // if (last.equals("infinity")) {
2443         // return "∞";
2444         // }
2445         // if (last.equals("list")) {
2446         // return ";";
2447         // }
2448         // if (last.equals("percentSign")) {
2449         // return "%";
2450         // }
2451         // if (last.equals("group")) {
2452         // return "’";
2453         // }
2454         // return null;
2455         // }
2456         // });
2457 
2458         fixList.add('i', "fix Identical Children");
2459         fixList.add('o', "check attribute validity");
2460     }
2461 
2462     // references="http://www.stat.fi/tk/tt/luokitukset/lk/kieli_02.html"
2463 
2464     private static class ValuePair {
2465         String value;
2466         String fullxpath;
2467     }
2468 
2469     /**
2470      * Find the set of xpaths that
2471      * (a) have all the same values (if present) in the children
2472      * (b) are absent in the parent,
2473      * (c) are different than what is in the fully resolved parent
2474      * and add them.
2475      */
fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements)2476     static void fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements) {
2477         String key = k.getLocaleID();
2478         if (key.equals("root")) return;
2479         Set<String> availableChildren = cldrFactory.getAvailableWithParent(key, true);
2480         if (availableChildren.size() == 0) return;
2481         Set<String> skipPaths = new HashSet<String>();
2482         Map<String, ValuePair> haveSameValues = new TreeMap<String, ValuePair>();
2483         CLDRFile resolvedFile = cldrFactory.make(key, true);
2484         // get only those paths that are not in "root"
2485         CollectionUtilities.addAll(resolvedFile.iterator(), skipPaths);
2486 
2487         // first, collect all the paths
2488         for (String locale : availableChildren) {
2489             if (locale.indexOf("POSIX") >= 0) continue;
2490             CLDRFile item = cldrFactory.make(locale, false);
2491             for (String xpath : item) {
2492                 if (skipPaths.contains(xpath)) continue;
2493                 // skip certain elements
2494                 if (xpath.indexOf("/identity") >= 0) continue;
2495                 if (xpath.startsWith("//ldml/numbers/currencies/currency")) continue;
2496                 if (xpath.startsWith("//ldml/dates/timeZoneNames/metazone[")) continue;
2497                 if (xpath.indexOf("[@alt") >= 0) continue;
2498                 if (xpath.indexOf("/alias") >= 0) continue;
2499 
2500                 // must be string vale
2501                 ValuePair v1 = new ValuePair();
2502                 v1.value = item.getStringValue(xpath);
2503                 v1.fullxpath = item.getFullXPath(xpath);
2504 
2505                 ValuePair vAlready = haveSameValues.get(xpath);
2506                 if (vAlready == null) {
2507                     haveSameValues.put(xpath, v1);
2508                 } else if (!v1.value.equals(vAlready.value) || !v1.fullxpath.equals(vAlready.fullxpath)) {
2509                     skipPaths.add(xpath);
2510                     haveSameValues.remove(xpath);
2511                 }
2512             }
2513         }
2514         // at this point, haveSameValues is all kosher, so add items
2515         for (String xpath : haveSameValues.keySet()) {
2516             ValuePair v = haveSameValues.get(xpath);
2517             // if (v.value.equals(resolvedFile.getStringValue(xpath))
2518             // && v.fullxpath.equals(resolvedFile.getFullXPath(xpath))) continue;
2519             replacements.add(v.fullxpath, v.value);
2520         }
2521     }
2522 
fixAltProposed()2523     static void fixAltProposed() {
2524         throw new IllegalArgumentException();
2525         // throw out any alt=proposed values that are the same as the main
2526         // HashSet toRemove = new HashSet();
2527         // for (Iterator it = dataSource.iterator(); it.hasNext();) {
2528         // String cpath = (String) it.next();
2529         // if (cpath.indexOf("[@alt=") < 0) continue;
2530         // String cpath2 = getNondraftNonaltXPath(cpath);
2531         // String value = getStringValue(cpath);
2532         // String value2 = getStringValue(cpath2);
2533         // if (!value.equals(value2)) continue;
2534         // // have to worry about cases where the info is not in the value!!
2535         // //fix this; values are the same!!
2536         // String fullpath = getNondraftNonaltXPath(getFullXPath(cpath));
2537         // String fullpath2 = getNondraftNonaltXPath(getFullXPath(cpath2));
2538         // if (!fullpath.equals(fullpath2)) continue;
2539         // Log.logln(getLocaleID() + "\tRemoving redundant alternate: " + getFullXPath(cpath) + " ;\t" + value);
2540         // Log.logln("\t\tBecause of: " + getFullXPath(cpath2) + " ;\t" + value2);
2541         // if (getFullXPath(cpath2).indexOf("[@references=") >= 0) {
2542         // System.out.println("Warning: removing references: " + getFullXPath(cpath2));
2543         // }
2544         // toRemove.add(cpath);
2545         // }
2546         // dataSource.removeAll(toRemove);
2547 
2548     }
2549 
2550     /**
2551      * Perform various fixes
2552      * TODO add options to pick which one.
2553      *
2554      * @param options
2555      * @param config
2556      * @param cldrFactory
2557      */
fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory)2558     private static void fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory) {
2559 
2560         // TODO before modifying, make sure that it is fully resolved.
2561         // then minimize against the NEW parents
2562 
2563         Set<String> removal = new TreeSet<String>(k.getComparator());
2564         CLDRFile replacements = SimpleFactory.makeFile("temp");
2565         fixList.setFile(k, inputOptions, cldrFactory, removal, replacements);
2566 
2567         for (String xpath : k) {
2568             fixList.handlePath(xpath);
2569         }
2570         fixList.handleEnd();
2571 
2572         // remove bad attributes
2573 
2574         if (inputOptions.indexOf('v') >= 0) {
2575             CLDRTest.checkAttributeValidity(k, null, removal);
2576         }
2577 
2578         // raise identical elements
2579 
2580         if (inputOptions.indexOf('i') >= 0) {
2581             fixIdenticalChildren(cldrFactory, k, replacements);
2582         }
2583 
2584         // now do the actions we collected
2585 
2586         if (SHOW_DETAILS) {
2587             if (removal.size() != 0 || !replacements.isEmpty()) {
2588                 if (!removal.isEmpty()) {
2589                     System.out.println("Removals:");
2590                     for (String path : removal) {
2591                         System.out.println(path + " =\t " + k.getStringValue(path));
2592                     }
2593                 }
2594                 if (!replacements.isEmpty()) {
2595                     System.out.println("Additions/Replacements:");
2596                     System.out.println(replacements.toString().replaceAll("\u00A0", "<NBSP>"));
2597                 }
2598             }
2599         }
2600         if (removal.size() != 0) {
2601             k.removeAll(removal, COMMENT_REMOVALS);
2602         }
2603         k.putAll(replacements, CLDRFile.MERGE_REPLACE_MINE);
2604     }
2605 
2606     /**
2607      * Internal
2608      */
testJavaSemantics()2609     public static void testJavaSemantics() {
2610         Collator caseInsensitive = Collator.getInstance(ULocale.ROOT);
2611         caseInsensitive.setStrength(Collator.SECONDARY);
2612         Set<String> setWithCaseInsensitive = new TreeSet<String>(caseInsensitive);
2613         setWithCaseInsensitive.addAll(Arrays.asList(new String[] { "a", "b", "c" }));
2614         Set<String> plainSet = new TreeSet<String>();
2615         plainSet.addAll(Arrays.asList(new String[] { "a", "b", "B" }));
2616         System.out.println("S1 equals S2?\t" + setWithCaseInsensitive.equals(plainSet));
2617         System.out.println("S2 equals S1?\t" + plainSet.equals(setWithCaseInsensitive));
2618         setWithCaseInsensitive.removeAll(plainSet);
2619         System.out.println("S1 removeAll S2 is empty?\t" + setWithCaseInsensitive.isEmpty());
2620     }
2621 
2622     // <localizedPatternChars>GyMdkHmsSEDFwWahKzYeugAZ</localizedPatternChars>
2623     /*
2624      * <localizedPattern>
2625      * <map type="era">G</map>
2626      * <map type="year">y</map>
2627      * <map type="year_iso">Y</map>
2628      * <map type="year_uniform">u</map>
2629      * <map type="month">M</map>
2630      * <map type="week_in_year">w</map>
2631      * <map type="week_in_month">W</map>
2632      * <map type="day">d</map>
2633      * <map type="day_of_year">D</map>
2634      * <map type="day_of_week_in_month">F</map>
2635      * <map type="day_julian">g</map>
2636      * <map type="day_of_week">E</map>
2637      * <map type="day_of_week_local">e</map>
2638      * <map type="period_in_day">a</map>
2639      * <map type="hour_1_12">h</map>
2640      * <map type="hour_0_23">H</map>
2641      * <map type="hour_0_11">K</map>
2642      * <map type="hour_1_24">k</map>
2643      * <map type="minute">m</map>
2644      * <map type="second">s</map>
2645      * <map type="fractions_of_second">S</map>
2646      * <map type="milliseconds_in_day">A</map>
2647      * <map type="timezone">z</map>
2648      * <map type="timezone_gmt">Z</map>
2649      * </localizedPattern>
2650      */
2651 
2652 }
2653