• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004-2013, International Business Machines Corporation and   *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.tool;
8 
9 import java.io.File;
10 import java.io.PrintWriter;
11 import java.util.ArrayList;
12 import java.util.Arrays;
13 import java.util.EnumMap;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Iterator;
17 import java.util.LinkedHashMap;
18 import java.util.LinkedHashSet;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.Map.Entry;
22 import java.util.Set;
23 import java.util.TreeMap;
24 import java.util.TreeSet;
25 import java.util.regex.Matcher;
26 import java.util.regex.Pattern;
27 
28 import org.unicode.cldr.draft.FileUtilities;
29 import org.unicode.cldr.test.CLDRTest;
30 import org.unicode.cldr.test.CoverageLevel2;
31 import org.unicode.cldr.test.DisplayAndInputProcessor;
32 import org.unicode.cldr.test.QuickCheck;
33 import org.unicode.cldr.util.Annotations;
34 import org.unicode.cldr.util.CLDRConfig;
35 import org.unicode.cldr.util.CLDRFile;
36 import org.unicode.cldr.util.CLDRFile.DraftStatus;
37 import org.unicode.cldr.util.CLDRFile.ExemplarType;
38 import org.unicode.cldr.util.CLDRFile.NumberingSystem;
39 import org.unicode.cldr.util.CLDRFile.WinningChoice;
40 import org.unicode.cldr.util.CLDRLocale;
41 import org.unicode.cldr.util.CLDRPaths;
42 import org.unicode.cldr.util.CLDRTool;
43 import org.unicode.cldr.util.CldrUtility;
44 import org.unicode.cldr.util.DateTimeCanonicalizer;
45 import org.unicode.cldr.util.DateTimeCanonicalizer.DateTimePatternType;
46 import org.unicode.cldr.util.DtdData;
47 import org.unicode.cldr.util.DtdType;
48 import org.unicode.cldr.util.Factory;
49 import org.unicode.cldr.util.FileProcessor;
50 import org.unicode.cldr.util.LanguageTagParser;
51 import org.unicode.cldr.util.Level;
52 import org.unicode.cldr.util.Log;
53 import org.unicode.cldr.util.LogicalGrouping;
54 import org.unicode.cldr.util.PathHeader;
55 import org.unicode.cldr.util.PatternCache;
56 import org.unicode.cldr.util.RegexLookup;
57 import org.unicode.cldr.util.SimpleFactory;
58 import org.unicode.cldr.util.StandardCodes;
59 import org.unicode.cldr.util.StringId;
60 import org.unicode.cldr.util.SupplementalDataInfo;
61 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
62 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
63 import org.unicode.cldr.util.XMLSource;
64 import org.unicode.cldr.util.XPathParts;
65 import org.unicode.cldr.util.XPathParts.Comments;
66 import org.unicode.cldr.util.XPathParts.Comments.CommentType;
67 
68 import com.google.common.base.Splitter;
69 import com.ibm.icu.dev.tool.UOption;
70 import com.ibm.icu.dev.util.CollectionUtilities;
71 import com.ibm.icu.impl.Utility;
72 import com.ibm.icu.text.Collator;
73 import com.ibm.icu.text.DateTimePatternGenerator;
74 import com.ibm.icu.text.DateTimePatternGenerator.VariableField;
75 import com.ibm.icu.text.Normalizer;
76 import com.ibm.icu.text.NumberFormat;
77 import com.ibm.icu.text.UnicodeSet;
78 import com.ibm.icu.util.ICUException;
79 import com.ibm.icu.util.Output;
80 import com.ibm.icu.util.ULocale;
81 
82 /**
83  * Tool for applying modifications to the CLDR files. Use -h to see the options.
84  * <p>
85  * There are some environment variables that can be used with the program <br>
86  * -DSHOW_FILES=<anything> shows all create/open of files.
87  */
88 @CLDRTool(alias = "modify",
89 description = "Tool for applying modifications to the CLDR files. Use -h to see the options.")
90 public class CLDRModify {
91     private static final boolean DEBUG = false;
92     static final String DEBUG_PATHS = null; // ".*currency.*";
93     static final boolean COMMENT_REMOVALS = false; // append removals as comments
94     static final UnicodeSet whitespace = new UnicodeSet("[:whitespace:]").freeze();
95     static final UnicodeSet HEX = new UnicodeSet("[a-fA-F0-9]").freeze();
96     private static final DtdData dtdData = DtdData.getInstance(DtdType.ldml);
97 
98     // TODO make this into input option.
99 
100     enum ConfigKeys {
101         action, locale, path, value, new_path, new_value
102     }
103 
104     enum ConfigAction {
105         /**
106          * Remove a path
107          */
108         delete,
109         /**
110          * Add a path/value
111          */
112         add,
113         /**
114          * Replace a path/value. Equals 'add' but tests selected paths
115          */
116         replace,
117         /**
118          * Add a a path/value. Equals 'add' but tests that path did NOT exist
119          */
120         addNew,
121     }
122 
123     static final class ConfigMatch {
124         final String exactMatch;
125         final Matcher regexMatch; // doesn't have to be thread safe
126         final ConfigAction action;
127         final boolean hexPath;
128 
ConfigMatch(ConfigKeys key, String match)129         public ConfigMatch(ConfigKeys key, String match) {
130             if (key == ConfigKeys.action) {
131                 exactMatch = null;
132                 regexMatch = null;
133                 action = ConfigAction.valueOf(match);
134                 hexPath = false;
135             } else if (match.startsWith("/") && match.endsWith("/")) {
136                 if (key != ConfigKeys.locale && key != ConfigKeys.path && key != ConfigKeys.value) {
137                     throw new IllegalArgumentException("Regex only allowed for old path/value.");
138                 }
139                 exactMatch = null;
140                 regexMatch = PatternCache.get(match.substring(1, match.length() - 1)
141                     .replace("[@", "\\[@")).matcher("");
142                 action = null;
143                 hexPath = false;
144             } else {
145                 exactMatch = match;
146                 regexMatch = null;
147                 action = null;
148                 hexPath = (key == ConfigKeys.new_path || key == ConfigKeys.path)
149                     && HEX.containsAll(match);
150             }
151 
152         }
153 
matches(String other)154         public boolean matches(String other) {
155             if (exactMatch == null) {
156                 return regexMatch.reset(other).find();
157             } else if (hexPath) {
158                 // convert path to id for comparison
159                 return exactMatch.equals(StringId.getHexId(other));
160             } else {
161                 return exactMatch.equals(other);
162             }
163         }
164 
toString()165         public String toString() {
166             return action != null ? action.toString()
167                 : exactMatch == null ? regexMatch.toString()
168                     : hexPath ? "*" + exactMatch + "*"
169                         : exactMatch;
170         }
171 
getPath(CLDRFile cldrFileToFilter)172         public String getPath(CLDRFile cldrFileToFilter) {
173             if (!hexPath) {
174                 return exactMatch;
175             }
176             // ensure that we have all the possible paths cached
177             String path = StringId.getStringFromHexId(exactMatch);
178             if (path == null) {
179                 for (String eachPath : cldrFileToFilter.fullIterable()) {
180                     StringId.getHexId(eachPath);
181                 }
182                 path = StringId.getStringFromHexId(exactMatch);
183                 if (path == null) {
184                     throw new IllegalArgumentException("No path for hex id: " + exactMatch);
185                 }
186             }
187             return path;
188         }
189 
getModified(ConfigMatch valueMatch, String value, ConfigMatch newValue)190         public static String getModified(ConfigMatch valueMatch, String value, ConfigMatch newValue) {
191             if (valueMatch == null) { // match anything
192                 if (newValue != null && newValue.exactMatch != null) {
193                     return newValue.exactMatch;
194                 }
195                 if (value != null) {
196                     return value;
197                 }
198                 throw new IllegalArgumentException("Can't have both old and new be null.");
199             } else if (valueMatch.exactMatch == null) { // regex
200                 if (newValue == null || newValue.exactMatch == null) {
201                     throw new IllegalArgumentException("Can't have regex without replacement.");
202                 }
203                 StringBuffer buffer = new StringBuffer();
204                 valueMatch.regexMatch.appendReplacement(buffer, newValue.exactMatch);
205                 return buffer.toString();
206             } else {
207                 return newValue.exactMatch != null ? newValue.exactMatch : value;
208             }
209         }
210     }
211 
212     static FixList fixList = new FixList();
213 
214     private static final int HELP1 = 0,
215         HELP2 = 1,
216         SOURCEDIR = 2,
217         DESTDIR = 3,
218         MATCH = 4,
219         JOIN = 5,
220         MINIMIZE = 6,
221         FIX = 7,
222         JOIN_ARGS = 8,
223         VET_ADD = 9,
224         RESOLVE = 10,
225         PATH = 11,
226         USER = 12,
227         ALL_DIRS = 13,
228         CHECK = 14,
229         KONFIG = 15;
230 
231     private static final UOption[] options = {
232         UOption.HELP_H(),
233         UOption.HELP_QUESTION_MARK(),
234         UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY),
235         UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "cldrModify/"),
236         UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
237         UOption.create("join", 'j', UOption.OPTIONAL_ARG),
238         UOption.create("minimize", 'r', UOption.NO_ARG),
239         UOption.create("fix", 'f', UOption.OPTIONAL_ARG),
240         UOption.create("join-args", 'i', UOption.OPTIONAL_ARG),
241         UOption.create("vet", 'v', UOption.OPTIONAL_ARG),
242         UOption.create("resolve", 'z', UOption.OPTIONAL_ARG),
243         UOption.create("path", 'p', UOption.REQUIRES_ARG),
244         UOption.create("user", 'u', UOption.REQUIRES_ARG),
245         UOption.create("all", 'a', UOption.REQUIRES_ARG),
246         UOption.create("check", 'c', UOption.NO_ARG),
247         UOption.create("konfig", 'k', UOption.OPTIONAL_ARG).setDefault("modify_config.txt"),
248     };
249 
250     private static final UnicodeSet allMergeOptions = new UnicodeSet("[rcd]");
251 
252     static final String HELP_TEXT1 = "Use the following options"
253         + XPathParts.NEWLINE
254         + "-h or -?\t for this message"
255         + XPathParts.NEWLINE
256         + "-"
257         + options[SOURCEDIR].shortName
258         + "\t source directory. Default = -s"
259         + CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY)
260         + XPathParts.NEWLINE
261         + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\"
262         + XPathParts.NEWLINE
263         + "-"
264         + options[DESTDIR].shortName
265         + "\t destination directory. Default = -d"
266         + CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/")
267         + XPathParts.NEWLINE
268         + "-m<regex>\t to restrict the locales to what matches <regex>"
269         + XPathParts.NEWLINE
270         + "-j<merge_dir>/X'\t to merge two sets of files together (from <source_dir>/X and <merge_dir>/X', "
271         + XPathParts.NEWLINE
272         + "\twhere * in X' is replaced by X)."
273         + XPathParts.NEWLINE
274         + "\tExample:-jC:\\Unicode-CVS2\\cldr\\dropbox\\to_be_merged\\missing\\missing_*"
275         + XPathParts.NEWLINE
276         + "-i\t merge arguments:"
277         + XPathParts.NEWLINE
278         + "\tr\t replace contents (otherwise new data will be draft=\"unconfirmed\")"
279         + XPathParts.NEWLINE
280         + "\tc\t ignore comments in <merge_dir> files"
281 //        + XPathParts.NEWLINE
282 //        + "-r\t to minimize the results (removing items that inherit from parent)."
283 + XPathParts.NEWLINE
284 + "-v\t incorporate vetting information, and generate diff files."
285 + XPathParts.NEWLINE
286 + "-z\t generate resolved files"
287 + XPathParts.NEWLINE
288 + "-p\t set path for -fx"
289 + XPathParts.NEWLINE
290 + "-u\t set user for -fb"
291 + XPathParts.NEWLINE
292 + "-a\t pattern: recurse over all subdirectories that match pattern"
293 + XPathParts.NEWLINE
294 + "-c\t check that resulting xml files are valid. Requires that a dtd directory be copied to the output directory, in the appropriate location."
295 + XPathParts.NEWLINE
296 + "-k\t config_file\twith -fk perform modifications according to what is in the config file. For format details, see:"
297 + XPathParts.NEWLINE
298 + "\t\thttp://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config."
299 + XPathParts.NEWLINE
300 + "-f\t to perform various fixes on the files (add following arguments to specify which ones, eg -fxi)"
301 + XPathParts.NEWLINE;
302 
303     static final String HELP_TEXT2 = "Note: A set of bat files are also generated in <dest_dir>/diff. They will invoke a comparison program on the results."
304         + XPathParts.NEWLINE;
305     private static final boolean SHOW_DETAILS = false;
306     private static boolean SHOW_PROCESSING = false;
307 
308     static String sourceInput;
309 
310     /**
311      * Picks options and executes. Use -h to see options.
312      */
main(String[] args)313     public static void main(String[] args) throws Exception {
314         long startTime = System.currentTimeMillis();
315         UOption.parseArgs(args, options);
316         if (options[HELP1].doesOccur || options[HELP2].doesOccur) {
317             System.out.println(HELP_TEXT1 + fixList.showHelp() + HELP_TEXT2);
318             return;
319         }
320         checkSuboptions(options[FIX], fixList.getOptions());
321         checkSuboptions(options[JOIN_ARGS], allMergeOptions);
322         String recurseOnDirectories = options[ALL_DIRS].value;
323         boolean makeResolved = options[RESOLVE].doesOccur; // Utility.COMMON_DIRECTORY + "main/";
324 
325         // String sourceDir = "C:\\ICU4C\\locale\\common\\main\\";
326 
327         sourceInput = options[SOURCEDIR].value;
328         String destInput = options[DESTDIR].value;
329         if (recurseOnDirectories != null) {
330             sourceInput = removeSuffix(sourceInput, "main/", "main");
331             destInput = removeSuffix(destInput, "main/", "main");
332         }
333         String sourceDirBase = CldrUtility.checkValidDirectory(sourceInput); // Utility.COMMON_DIRECTORY + "main/";
334         String targetDirBase = CldrUtility.checkValidDirectory(destInput); // Utility.GEN_DIRECTORY + "main/";
335         System.out.format("Source:\t%s\n", sourceDirBase);
336         System.out.format("Target:\t%s\n", targetDirBase);
337 
338         Set<String> dirSet = new TreeSet<String>();
339         if (recurseOnDirectories == null) {
340             dirSet.add("");
341         } else {
342             String[] subdirs = new File(sourceDirBase).list();
343             Matcher subdirMatch = PatternCache.get(recurseOnDirectories).matcher("");
344             for (String subdir : subdirs) {
345                 if (!subdirMatch.reset(subdir).find()) continue;
346                 dirSet.add(subdir + "/");
347             }
348         }
349         for (String dir : dirSet) {
350             String sourceDir = sourceDirBase + dir;
351             if (!new File(sourceDir).isDirectory()) continue;
352             String targetDir = targetDirBase + dir;
353             Log.setLog(targetDir + "/diff", "log.txt");
354             try {
355                 Factory cldrFactory = Factory.make(sourceDir, ".*");
356 
357                 if (options[VET_ADD].doesOccur) {
358                     VettingAdder va = new VettingAdder(options[VET_ADD].value);
359                     va.showFiles(cldrFactory, targetDir);
360                     return;
361                 }
362 
363                 Factory mergeFactory = null;
364 
365                 String join_prefix = "", join_postfix = "";
366                 if (options[JOIN].doesOccur) {
367                     String mergeDir = options[JOIN].value;
368                     File temp = new File(mergeDir);
369                     mergeDir = CldrUtility.checkValidDirectory(temp.getParent() + File.separator); // Utility.COMMON_DIRECTORY
370                     // + "main/";
371                     String filename = temp.getName();
372                     join_prefix = join_postfix = "";
373                     int pos = filename.indexOf("*");
374                     if (pos >= 0) {
375                         join_prefix = filename.substring(0, pos);
376                         join_postfix = filename.substring(pos + 1);
377                     }
378                     mergeFactory = Factory.make(mergeDir, ".*");
379                 }
380                 /*
381                  * Factory cldrFactory = Factory.make(sourceDir, ".*");
382                  * Set testSet = cldrFactory.getAvailable();
383                  * String[] quicktest = new String[] {
384                  * "de"
385                  * //"ar", "dz_BT",
386                  * // "sv", "en", "de"
387                  * };
388                  * if (quicktest.length > 0) {
389                  * testSet = new TreeSet(Arrays.asList(quicktest));
390                  * }
391                  */
392                 Set<String> locales = new TreeSet<String>(cldrFactory.getAvailable());
393                 if (mergeFactory != null) {
394                     Set<String> temp = new TreeSet<String>(mergeFactory.getAvailable());
395                     Set<String> locales3 = new TreeSet<String>();
396                     for (String locale : temp) {
397                         if (!locale.startsWith(join_prefix) || !locale.endsWith(join_postfix)) continue;
398                         locales3.add(locale.substring(join_prefix.length(), locale.length() - join_postfix.length()));
399                     }
400                     locales.retainAll(locales3);
401                     System.out.println("Merging: " + locales3);
402                 }
403                 new CldrUtility.MatcherFilter(options[MATCH].value).retainAll(locales);
404 
405                 RetainWhenMinimizing retainIfTrue = null;
406                 PathHeader.Factory pathHeaderFactory = null;
407 
408                 fixList.handleSetup();
409 
410                 long lastTime = System.currentTimeMillis();
411                 int spin = 0;
412                 System.out.format(locales.size() + " Locales:\t%s\n", locales.toString());
413                 int totalRemoved = 0;
414                 for (String test : locales) {
415                     spin++;
416                     if (SHOW_PROCESSING) {
417                         long now = System.currentTimeMillis();
418                         if (now - lastTime > 5000) {
419                             System.out.println(" .. still processing " + test + " [" + spin + "/" + locales.size()
420                             + "]");
421                             lastTime = now;
422                         }
423                     }
424                     // testJavaSemantics();
425 
426                     // TODO parameterize the directory and filter
427                     // System.out.println("C:\\ICU4C\\locale\\common\\main\\fr.xml");
428 
429                     CLDRFile k = cldrFactory.make(test, makeResolved).cloneAsThawed();
430                     // HashSet<String> set = Builder.with(new HashSet<String>()).addAll(k).get();
431                     // System.out.format("Locale\t%s, Size\t%s\n", test, set.size());
432                     // if (k.isNonInheriting()) continue; // for now, skip supplementals
433                     if (DEBUG_PATHS != null) {
434                         System.out.println("Debug1 (" + test + "):\t" + k.toString(DEBUG_PATHS));
435                     }
436                     // System.out.println(k);
437                     // String s1 =
438                     // "//ldml/segmentations/segmentation[@type=\"LineBreak\"]/variables/variable[@_q=\"0061\"][@id=\"$CB\"] ";
439                     // String s2 =
440                     // "//ldml/segmentations/segmentation[@type=\"LineBreak\"]/variables/variable[@_q=\"003A\"][@id=\"$CB\"]";
441                     // System.out.println(k.ldmlComparator.compare(s1, s2));
442                     if (mergeFactory != null) {
443                         int mergeOption = CLDRFile.MERGE_ADD_ALTERNATE;
444                         CLDRFile toMergeIn = mergeFactory.make(join_prefix + test + join_postfix, false)
445                             .cloneAsThawed();
446                         if (toMergeIn != null) {
447                             if (options[JOIN_ARGS].doesOccur) {
448                                 if (options[JOIN_ARGS].value.indexOf("r") >= 0)
449                                     mergeOption = CLDRFile.MERGE_REPLACE_MY_DRAFT;
450                                 if (options[JOIN_ARGS].value.indexOf("d") >= 0)
451                                     mergeOption = CLDRFile.MERGE_REPLACE_MINE;
452                                 if (options[JOIN_ARGS].value.indexOf("c") >= 0) toMergeIn.clearComments();
453                                 if (options[JOIN_ARGS].value.indexOf("x") >= 0) removePosix(toMergeIn);
454                             }
455                             toMergeIn.makeDraft(DraftStatus.contributed);
456                             k.putAll(toMergeIn, mergeOption);
457                         }
458                         // special fix
459                         k.removeComment(
460                             " The following are strings that are not found in the locale (currently), but need valid translations for localizing timezones. ");
461                     }
462                     if (DEBUG_PATHS != null) {
463                         System.out.println("Debug2 (" + test + "):\t" + k.toString(DEBUG_PATHS));
464                     }
465                     if (options[FIX].doesOccur) {
466                         fix(k, options[FIX].value, options[KONFIG].value, cldrFactory);
467                     }
468                     if (DEBUG_PATHS != null) {
469                         System.out.println("Debug3 (" + test + "):\t" + k.toString(DEBUG_PATHS));
470                     }
471 //                    if (options[MINIMIZE].doesOccur) {
472 //                        if (pathHeaderFactory == null) {
473 //                            pathHeaderFactory = PathHeader.getFactory(cldrFactory.make("en", true));
474 //                        }
475 //                        // TODO, fix identity
476 //                        String parent = LocaleIDParser.getParent(test);
477 //                        if (parent != null) {
478 //                            CLDRFile toRemove = cldrFactory.make(parent, true);
479 //                            // remove the items that are language codes, script codes, or region codes
480 //                            // since they may be real translations.
481 //                            if (retainIfTrue == null) {
482 //                                retainIfTrue = new RetainWhenMinimizing();
483 //                            }
484 //                            retainIfTrue.setParentFile(toRemove);
485 //                            List<String> removed = DEBUG ? null : new ArrayList<String>();
486 //                            k.removeDuplicates(toRemove, COMMENT_REMOVALS, retainIfTrue, removed);
487 //                            if (removed != null && removed.size() != 0) {
488 //                                totalRemoved += removed.size();
489 //                                Set<PathHeader> sorted = new TreeSet<PathHeader>();
490 //                                for (String path : removed) {
491 //                                    sorted.add(pathHeaderFactory.fromPath(path));
492 //                                }
493 //                                for (PathHeader pathHeader : sorted) {
494 //                                    System.out.println("\t# " + test + "\t" + pathHeader + "\t" + pathHeader.getOriginalPath());
495 //                                }
496 //                                System.out.println("\t# " + test + "\t# Removed:\t" + removed.size());
497 //                            }
498 //                        }
499 //                    }
500                     // System.out.println(CLDRFile.getAttributeOrder());
501 
502                     /*
503                      * if (false) {
504                      * Map tempComments = k.getXpath_comments();
505                      *
506                      * for (Iterator it2 = tempComments.keySet().iterator(); it2.hasNext();) {
507                      * String key = (String) it2.next();
508                      * String comment = (String) tempComments.get(key);
509                      * Log.logln("Writing extra comment: " + key);
510                      * System.out.println(key + "\t comment: " + comment);
511                      * }
512                      * }
513                      */
514 
515                     if (DEBUG_PATHS != null) {
516                         System.out.println("Debug4 (" + test + "):\t" + k.toString(DEBUG_PATHS));
517                     }
518 
519                     PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, test + ".xml");
520                     String testPath = "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]";
521                     if (false) {
522                         System.out.println("Printing Raw File:");
523                         testPath = "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/alias";
524                         System.out.println(k.getStringValue(testPath));
525                         // System.out.println(k.getFullXPath(testPath));
526                         Iterator it4 = k.iterator();
527                         Set s = CollectionUtilities.addAll(it4, new TreeSet());
528 
529                         System.out.println(k.getStringValue(testPath));
530                         // if (true) return;
531                         Set orderedSet = new TreeSet(k.getComparator());
532                         CollectionUtilities.addAll(k.iterator(), orderedSet);
533                         for (Iterator it3 = orderedSet.iterator(); it3.hasNext();) {
534                             String path = (String) it3.next();
535                             // System.out.println(path);
536                             if (path.equals(testPath)) {
537                                 System.out.println("huh?");
538                             }
539                             String value = k.getStringValue(path);
540                             String fullpath = k.getFullXPath(path);
541                             System.out.println("\t=\t" + fullpath);
542                             System.out.println("\t=\t" + value);
543                         }
544                         System.out.println("Done Printing Raw File:");
545                     }
546 
547                     k.write(pw);
548                     // pw.println();
549                     pw.close();
550                     if (options[CHECK].doesOccur) {
551                         QuickCheck.check(new File(targetDir, test + ".xml"));
552                     }
553 
554                     // JCE: I don't think anyone really uses the .bat files from CLDRModify any more, since
555                     // Eclipse provides a decent file comparison program.  You can comment this back in if
556                     // you need it, but I found that sometimes having this here clobbers the real output
557                     // file, which we definitely don't want.
558                     // ToolUtilities.generateBat(sourceDir, test + ".xml", targetDir, test + ".xml", lineComparer);
559 
560                     /*
561                      * boolean ok = Utility.areFileIdentical(sourceDir + test + ".xml",
562                      * targetDir + test + ".xml", failureLines, Utility.TRIM + Utility.SKIP_SPACES);
563                      * if (!ok) {
564                      * System.out.println("Found differences at: ");
565                      * System.out.println("\t" + failureLines[0]);
566                      * System.out.println("\t" + failureLines[1]);
567                      * }
568                      */
569                 }
570                 if (totalSkeletons.size() != 0) {
571                     System.out.println("Total Skeletons" + totalSkeletons);
572                 }
573                 if (totalRemoved > 0) {
574                     System.out.println("# Removed:\t" + totalRemoved);
575                 }
576             } finally {
577                 fixList.handleCleanup();
578                 Log.close();
579                 System.out.println("Done -- Elapsed time: " + ((System.currentTimeMillis() - startTime) / 60000.0)
580                     + " minutes");
581             }
582         }
583     }
584 
removeSuffix(String value, String... suffices)585     private static String removeSuffix(String value, String... suffices) {
586         for (String suffix : suffices) {
587             if (value.endsWith(suffix)) {
588                 return value.substring(0, value.length() - suffix.length());
589             }
590         }
591         return value;
592     }
593 
594     /*
595      * Use the coverage to determine what we should keep in the case of a locale just below root.
596      */
597 
598     static class RetainWhenMinimizing implements CLDRFile.RetentionTest {
599         private CLDRFile file;
600         private CLDRLocale c;
601         private boolean isArabicSublocale;
602         // Status status = new Status(); // no need to have, was unused
603 
setParentFile(CLDRFile file)604         public RetainWhenMinimizing setParentFile(CLDRFile file) {
605             this.file = file;
606             this.c = CLDRLocale.getInstance(file.getLocaleIDFromIdentity());
607             isArabicSublocale = "ar".equals(c.getLanguage()) && !"001".equals(c.getCountry());
608             return this;
609         }
610 
611         @Override
getRetention(String path)612         public Retention getRetention(String path) {
613             if (path.startsWith("//ldml/identity/")) {
614                 return Retention.RETAIN;
615             }
616             // special case for Arabic
617             if (isArabicSublocale && path.startsWith("//ldml/numbers/defaultNumberingSystem")) {
618                 return Retention.RETAIN;
619             }
620             String localeId = file.getSourceLocaleID(path, null);
621             if ((c.isLanguageLocale() || c.equals(CLDRLocale.getInstance("pt_PT")))
622                 && (XMLSource.ROOT_ID.equals(localeId) || XMLSource.CODE_FALLBACK_ID.equals(localeId))) {
623                 return Retention.RETAIN;
624             }
625             return Retention.RETAIN_IF_DIFFERENT;
626         }
627     };
628 
629     static final Splitter COMMA_SEMI = Splitter.on(Pattern.compile("[,;|]")).trimResults().omitEmptyStrings();
630     protected static final boolean NUMBER_SYSTEM_HACK = true;
631 
632     /**
633      *
634      */
checkSuboptions(UOption givenOptions, UnicodeSet allowedOptions)635     private static void checkSuboptions(UOption givenOptions, UnicodeSet allowedOptions) {
636         if (givenOptions.doesOccur && !allowedOptions.containsAll(givenOptions.value)) {
637             throw new IllegalArgumentException("Illegal sub-options for "
638                 + givenOptions.shortName
639                 + ": "
640                 + new UnicodeSet().addAll(givenOptions.value).removeAll(allowedOptions)
641                 + CldrUtility.LINE_SEPARATOR + "Use -? for help.");
642         }
643     }
644 
645     /**
646      *
647      */
removePosix(CLDRFile toMergeIn)648     private static void removePosix(CLDRFile toMergeIn) {
649         Set<String> toRemove = new HashSet<String>();
650         for (String xpath : toMergeIn) {
651             if (xpath.startsWith("//ldml/posix")) toRemove.add(xpath);
652         }
653         toMergeIn.removeAll(toRemove, false);
654     }
655 
656     // private static class References {
657     // static Map<String,Map<String,String>> locale_oldref_newref = new TreeMap<String,Map<String,String>>();
658     //
659     // static String[][] keys = {{"standard", "S", "[@standard=\"true\"]"}, {"references", "R", ""}};
660     // UnicodeSet digits = new UnicodeSet("[0-9]");
661     // int referenceCounter = 0;
662     // Map references_token = new TreeMap();
663     // Set tokenSet = new HashSet();
664     // String[] keys2;
665     // boolean isStandard;
666     // References(boolean standard) {
667     // isStandard = standard;
668     // keys2 = standard ? keys[0] : keys[1];
669     // }
670     // /**
671     // *
672     // */
673     // public void reset(CLDRFile k) {
674     // }
675     // /**
676     // *
677     // */
678     // // Samples:
679     // // <language type="ain" references="RP1">阿伊努文</language>
680     // // <reference type="R1" uri="http://www.info.gov.hk/info/holiday_c.htm">二零零五年公眾假期刊登憲報</reference>
681     // private int fix(Map attributes, CLDRFile replacements) {
682     // // we have to have either a references element or attributes.
683     // String references = (String) attributes.get(keys2[0]);
684     // int result = 0;
685     // if (references != null) {
686     // references = references.trim();
687     // if (references.startsWith("S") || references.startsWith("R")) {
688     // if (digits.containsAll(references.substring(1))) return 0;
689     // }
690     // String token = (String) references_token.get(references);
691     // if (token == null) {
692     // while (true) {
693     // token = keys2[1] + (++referenceCounter);
694     // if (!tokenSet.contains(token)) break;
695     // }
696     // references_token.put(references, token);
697     // System.out.println("Adding: " + token + "\t" + references);
698     // replacements.add("//ldml/references/reference[@type=\"" + token + "\"]" + keys2[2], references);
699     // result = 1;
700     // }
701     // attributes.put(keys2[0], token);
702     // }
703     // return result;
704     // }
705     // }
706 
707     abstract static class CLDRFilter {
708         protected CLDRFile cldrFileToFilter;
709         protected CLDRFile cldrFileToFilterResolved;
710         private String localeID;
711         protected Set<String> availableChildren;
712         private Set<String> toBeRemoved;
713         private CLDRFile toBeReplaced;
714         protected Factory factory;
715 
setFile(CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements)716         public final void setFile(CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements) {
717             this.cldrFileToFilter = k;
718             cldrFileToFilterResolved = null;
719             this.factory = factory;
720             localeID = k.getLocaleID();
721             this.toBeRemoved = removal;
722             this.toBeReplaced = replacements;
723             handleStart();
724         }
725 
handleStart()726         public void handleStart() {
727         }
728 
handlePath(String xpath)729         public abstract void handlePath(String xpath);
730 
handleEnd()731         public void handleEnd() {
732         }
733 
getResolved()734         public CLDRFile getResolved() {
735             if (cldrFileToFilterResolved == null) {
736                 if (cldrFileToFilter.isResolved()) {
737                     cldrFileToFilterResolved = cldrFileToFilter;
738                 } else {
739                     cldrFileToFilterResolved = factory.make(cldrFileToFilter.getLocaleID(), true);
740                 }
741             }
742             return cldrFileToFilterResolved;
743 
744         }
show(String reason, String detail)745         public void show(String reason, String detail) {
746             System.out.println("%" + localeID + "\t" + reason + "\tConsidering " + detail);
747         }
748 
retain(String path, String reason)749         public void retain(String path, String reason) {
750             System.out.println("%" + localeID + "\t" + reason + "\tRetaining: " + cldrFileToFilter.getStringValue(path)
751             + "\t at: " + path);
752         }
753 
remove(String path)754         public void remove(String path) {
755             remove(path, "-");
756         }
757 
remove(String path, String reason)758         public void remove(String path, String reason) {
759             if (toBeRemoved.contains(path)) return;
760             toBeRemoved.add(path);
761 //            System.out.println("%" + localeID + "\t" + reason + "\tRemoving:\t«"
762 //                + cldrFileToFilter.getStringValue(path) + "»\t at:\t" + path);
763             String oldValueOldPath = cldrFileToFilter.getStringValue(path);
764             showAction(reason, "Removing", oldValueOldPath, null, null, path, path);
765         }
766 
replace(String oldFullPath, String newFullPath, String newValue)767         public void replace(String oldFullPath, String newFullPath, String newValue) {
768             replace(oldFullPath, newFullPath, newValue, "-");
769         }
770 
showAction(String reason, String action, String oldValueOldPath, String oldValueNewPath, String newValue, String oldFullPath, String newFullPath)771         public void showAction(String reason, String action, String oldValueOldPath, String oldValueNewPath,
772             String newValue, String oldFullPath, String newFullPath) {
773             System.out.println("%"
774                 + localeID
775                 + "\t"
776                 + action
777                 + "\t"
778                 + reason
779                 + "\t«"
780                 + oldValueOldPath
781                 + "»"
782                 + (newFullPath.equals(oldFullPath) || oldValueNewPath == null ? "" : oldValueNewPath
783                     .equals(oldValueOldPath) ? "/=" : "/«" + oldValueNewPath + "»")
784                 + "\t→\t" + (newValue == null ? "∅" : newValue.equals(oldValueOldPath) ? "≡" : "«" + newValue + "»")
785                 + "\t" + oldFullPath
786                 + (newFullPath.equals(oldFullPath) ? "" : "\t→\t" + newFullPath));
787         }
788 
789         /**
790          * There are the following cases, where:
791          *
792          * <pre>
793          * pathSame,    new value null:         Removing    v       p
794          * pathSame,    new value not null:     Replacing   v   v'  p
795          * pathChanges, nothing at new path:    Moving      v       p   p'
796          * pathChanges, same value at new path: Replacing   v   v'  p   p'
797          * pathChanges, value changes:          Overriding  v   v'  p   p'
798          *
799          * <pre>
800          * @param oldFullPath
801          * @param newFullPath
802          * @param newValue
803          * @param reason
804          */
replace(String oldFullPath, String newFullPath, String newValue, String reason)805         public void replace(String oldFullPath, String newFullPath, String newValue, String reason) {
806             String oldValueOldPath = cldrFileToFilter.getStringValue(oldFullPath);
807             String temp = cldrFileToFilter.getFullXPath(oldFullPath);
808             if (temp != null) {
809                 oldFullPath = temp;
810             }
811             boolean pathSame = oldFullPath.equals(newFullPath);
812 
813             if (pathSame) {
814                 if (newValue == null) {
815                     remove(oldFullPath, reason);
816                 } else if (oldValueOldPath == null) {
817                     toBeReplaced.add(oldFullPath, newValue);
818                     showAction(reason, "Adding", oldValueOldPath, null, newValue, oldFullPath, newFullPath);
819                 } else {
820                     toBeReplaced.add(oldFullPath, newValue);
821                     showAction(reason, "Replacing", oldValueOldPath, null, newValue, oldFullPath, newFullPath);
822                 }
823                 return;
824             }
825             String oldValueNewPath = cldrFileToFilter.getStringValue(newFullPath);
826             toBeRemoved.add(oldFullPath);
827             toBeReplaced.add(newFullPath, newValue);
828 
829             if (oldValueNewPath == null) {
830                 showAction(reason, "Moving", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath);
831             } else if (oldValueNewPath.equals(newValue)) {
832                 showAction(reason, "Redundant Value", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath);
833             } else {
834                 showAction(reason, "Overriding", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath);
835             }
836         }
837 
838         /**
839          * Adds a new path-value pair to the CLDRFile.
840          * @param path the new path
841          * @param value the value
842          * @param reason Reason for adding the path and value.
843          */
add(String path, String value, String reason)844         public void add(String path, String value, String reason) {
845             String oldValueOldPath = cldrFileToFilter.getStringValue(path);
846             if (oldValueOldPath == null) {
847                 toBeRemoved.remove(path);
848                 toBeReplaced.add(path, value);
849                 showAction(reason, "Adding", oldValueOldPath, null,
850                     value, path, path);
851             } else {
852                 replace(path, path, value);
853             }
854         }
855 
getReplacementFile()856         public CLDRFile getReplacementFile() {
857             return toBeReplaced;
858         }
859 
handleCleanup()860         public void handleCleanup() {
861         }
862 
handleSetup()863         public void handleSetup() {
864         }
865 
getLocaleID()866         public String getLocaleID() {
867             return localeID;
868         }
869     }
870 
871     static class FixList {
872         // simple class, so we use quick list
873         CLDRFilter[] filters = new CLDRFilter[128]; // only ascii
874         String[] helps = new String[128]; // only ascii
875         UnicodeSet options = new UnicodeSet();
876         String inputOptions = null;
877 
add(char letter, String help)878         void add(char letter, String help) {
879             add(letter, help, null);
880         }
881 
handleSetup()882         public void handleSetup() {
883             for (int i = 0; i < filters.length; ++i) {
884                 if (filters[i] != null) {
885                     filters[i].handleSetup();
886                 }
887             }
888         }
889 
handleCleanup()890         public void handleCleanup() {
891             for (int i = 0; i < filters.length; ++i) {
892                 if (filters[i] != null) {
893                     filters[i].handleCleanup();
894                 }
895             }
896         }
897 
getOptions()898         public UnicodeSet getOptions() {
899             return options;
900         }
901 
add(char letter, String help, CLDRFilter filter)902         void add(char letter, String help, CLDRFilter filter) {
903             if (helps[letter] != null) throw new IllegalArgumentException("Duplicate letter: " + letter);
904             filters[letter] = filter;
905             helps[letter] = help;
906             options.add(letter);
907         }
908 
setFile(CLDRFile file, String inputOptions, Factory factory, Set<String> removal, CLDRFile replacements)909         void setFile(CLDRFile file, String inputOptions, Factory factory, Set<String> removal, CLDRFile replacements) {
910             this.inputOptions = inputOptions;
911             for (int i = 0; i < inputOptions.length(); ++i) {
912                 char c = inputOptions.charAt(i);
913                 if (filters[c] != null) {
914                     try {
915                         filters[c].setFile(file, factory, removal, replacements);
916                     } catch (RuntimeException e) {
917                         System.err.println("Failure in " + filters[c].localeID + "\t START");
918                         throw e;
919                     }
920                 }
921             }
922         }
923 
handleStart()924         void handleStart() {
925             for (int i = 0; i < inputOptions.length(); ++i) {
926                 char c = inputOptions.charAt(i);
927                 if (filters[c] != null) {
928                     try {
929                         filters[c].handleStart();
930                     } catch (RuntimeException e) {
931                         System.err.println("Failure in " + filters[c].localeID + "\t START");
932                         throw e;
933                     }
934                 }
935             }
936         }
937 
handlePath(String xpath)938         void handlePath(String xpath) {
939             //options = options.toLowerCase();
940             for (int i = 0; i < inputOptions.length(); ++i) {
941                 char c = inputOptions.charAt(i);
942                 if (filters[c] != null) {
943                     try {
944                         filters[c].handlePath(xpath);
945                     } catch (RuntimeException e) {
946                         System.err.println("Failure in " + filters[c].localeID + "\t " + xpath);
947                         throw e;
948                     }
949                 }
950             }
951         }
952 
handleEnd()953         void handleEnd() {
954             for (int i = 0; i < inputOptions.length(); ++i) {
955                 char c = inputOptions.charAt(i);
956                 if (filters[c] != null) {
957                     try {
958                         filters[c].handleEnd();
959                     } catch (RuntimeException e) {
960                         System.err.println("Failure in " + filters[c].localeID + "\t START");
961                         throw e;
962                     }
963                 }
964             }
965         }
966 
showHelp()967         String showHelp() {
968             String result = "";
969             for (int i = 0; i < filters.length; ++i) {
970                 if (helps[i] != null) {
971                     result += "\t" + (char) i + "\t " + helps[i] + XPathParts.NEWLINE;
972                 }
973             }
974             return result;
975         }
976     }
977 
978     static Set<String> totalSkeletons = new HashSet<String>();
979 
980     static Map<String, String> rootUnitMap = new HashMap<String, String>();
981 
982     static {
983         rootUnitMap.put("second", "s");
984         rootUnitMap.put("minute", "min");
985         rootUnitMap.put("hour", "h");
986         rootUnitMap.put("day", "d");
987         rootUnitMap.put("week", "w");
988         rootUnitMap.put("month", "m");
989         rootUnitMap.put("year", "y");
990 
991         fixList.add('z', "Remove deprecated elements", new CLDRFilter() {
992 
993             public boolean isDeprecated(DtdType type, String element, String attribute, String value) {
994                 return DtdData.getInstance(type).isDeprecated(element, attribute, value);
995             }
996 
997             public boolean isDeprecated(DtdType type, String path) {
998 
999                 XPathParts parts = XPathParts.getFrozenInstance(path);
1000                 for (int i = 0; i < parts.size(); ++i) {
1001                     String element = parts.getElement(i);
1002                     if (isDeprecated(type, element, "*", "*")) {
1003                         return true;
1004                     }
1005                     for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) {
1006                         String attribute = entry.getKey();
1007                         String value = entry.getValue();
1008                         if (isDeprecated(type, element, attribute, value)) {
1009                             return true;
1010                         }
1011                     }
1012                 }
1013                 return false;
1014             }
1015 
1016             @Override
1017             public void handlePath(String xpath) {
1018                 String fullPath = cldrFileToFilter.getFullXPath(xpath);
1019                 XPathParts parts = XPathParts.getFrozenInstance(fullPath);
1020                 for (int i = 0; i < parts.size(); ++i) {
1021                     String element = parts.getElement(i);
1022                     if (dtdData.isDeprecated(element, "*", "*")) {
1023                         remove(fullPath, "Deprecated element");
1024                         return;
1025                     }
1026                     for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) {
1027                         String attribute = entry.getKey();
1028                         String value = entry.getValue();
1029                         if (dtdData.isDeprecated(element, attribute, value)) {
1030                             remove(fullPath, "Element with deprecated attribute(s)");
1031                         }
1032                     }
1033                 }
1034             }
1035         });
1036 
1037         fixList.add('e', "fix Interindic", new CLDRFilter() {
1038             public void handlePath(String xpath) {
1039                 if (xpath.indexOf("=\"InterIndic\"") < 0) return;
1040                 String v = cldrFileToFilter.getStringValue(xpath);
1041                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1042                 XPathParts fullparts = XPathParts.getFrozenInstance(fullXPath);
1043                 Map<String, String> attributes = fullparts.findAttributes("transform");
1044                 String oldValue = attributes.get("direction");
1045                 if ("both".equals(oldValue)) {
1046                     attributes.put("direction", "forward");
1047                     replace(xpath, fullparts.toString(), v);
1048                 }
1049             }
1050         });
1051 
1052         fixList.add('B', "fix bogus values", new CLDRFilter() {
1053             RegexLookup<Integer> paths = RegexLookup.<Integer> of()
1054                 .setPatternTransform(RegexLookup.RegexFinderTransformPath2)
1055                 .add("//ldml/localeDisplayNames/languages/language[@type='([^']*)']", 0)
1056                 .add("//ldml/localeDisplayNames/scripts/script[@type='([^']*)']", 0)
1057                 .add("//ldml/localeDisplayNames/territories/territory[@type='([^']*)']", 0)
1058                 .add("//ldml/dates/timeZoneNames/metazone[@type='([^']*)']", 0)
1059                 .add("//ldml/dates/timeZoneNames/zone[@type='([^']*)']/exemplarCity", 0)
1060                 .add("//ldml/numbers/currencies/currency[@type='([^']*)']/displayName", 0);
1061             Output<String[]> arguments = new Output<>();
1062             CLDRFile english = CLDRConfig.getInstance().getEnglish();
1063             boolean skip;
1064 
1065             @Override
1066             public void handleStart() {
1067                 CLDRFile resolved = factory.make(cldrFileToFilter.getLocaleID(), true);
1068                 UnicodeSet exemplars = resolved.getExemplarSet(ExemplarType.main, WinningChoice.WINNING);
1069                 skip = exemplars.containsSome('a', 'z');
1070                 // TODO add simpler way to skip file entirely
1071             }
1072 
1073             public void handlePath(String xpath) {
1074                 if (skip) {
1075                     return;
1076                 }
1077                 Integer lookupValue = paths.get(xpath, null, arguments);
1078                 if (lookupValue == null) {
1079                     return;
1080                 }
1081                 String type = arguments.value[1];
1082                 String value = cldrFileToFilter.getStringValue(xpath);
1083                 if (value.equals(type)) {
1084                     remove(xpath, "Matches code");
1085                     return;
1086                 }
1087                 String evalue = english.getStringValue(xpath);
1088                 if (value.equals(evalue)) {
1089                     remove(xpath, "Matches English");
1090                     return;
1091                 }
1092             }
1093         });
1094 
1095         fixList.add('s', "fix alt accounting", new CLDRFilter() {
1096             @Override
1097             public void handlePath(String xpath) {
1098                 XPathParts parts = XPathParts.getFrozenInstance(xpath);
1099                 if (!parts.containsAttributeValue("alt", "accounting")) {
1100                     return;
1101                 }
1102                 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath);
1103                 String value = cldrFileToFilter.getStringValue(xpath);
1104                 XPathParts fullparts = XPathParts.getInstance(oldFullXPath); // not frozen, for removeAttribute
1105                 fullparts.removeAttribute("pattern", "alt");
1106                 fullparts.setAttribute("currencyFormat", "type", "accounting");
1107                 String newFullXPath = fullparts.toString();
1108                 replace(oldFullXPath, newFullXPath, value, "Move alt=accounting value to new path");
1109             }
1110         });
1111 
1112         fixList.add('n', "add unit displayName", new CLDRFilter() {
1113             @Override
1114             public void handlePath(String xpath) {
1115                 if (xpath.indexOf("/units/unitLength[@type=\"long\"]") < 0 || xpath.indexOf("/unitPattern[@count=\"other\"]") < 0 ||
1116                     xpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) {
1117                     return;
1118                 }
1119                 String value = cldrFileToFilter.getStringValue(xpath);
1120                 String newValue = null;
1121                 if (value.startsWith("{0}")) {
1122                     newValue = value.substring(3).trim();
1123                 } else if (value.endsWith("{0}")) {
1124                     newValue = value.substring(0, value.length() - 3).trim();
1125                 } else {
1126                     System.out.println("unitPattern-other does not start or end with \"{0}\": \"" + value + "\"");
1127                     return;
1128                 }
1129 
1130                 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath);
1131                 String newFullXPath = oldFullXPath.substring(0, oldFullXPath.indexOf("unitPattern")).concat("displayName[@draft=\"provisional\"]");
1132                 add(newFullXPath, newValue, "create unit displayName-long from unitPattern-long-other");
1133                 String newFullXPathShort = newFullXPath.replace("[@type=\"long\"]", "[@type=\"short\"]");
1134                 add(newFullXPathShort, newValue, "create unit displayName-short from unitPattern-long-other");
1135             }
1136         });
1137 
1138         fixList.add('x', "retain paths", new CLDRFilter() {
1139             Matcher m = null;
1140 
1141             public void handlePath(String xpath) {
1142                 if (m == null) {
1143                     m = PatternCache.get(options[PATH].value).matcher("");
1144                 }
1145                 //String v = cldrFileToFilter.getStringValue(xpath);
1146                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1147                 if (!m.reset(fullXPath).matches()) {
1148                     remove(xpath);
1149                 }
1150             }
1151         });
1152 
1153 //        fixList.add('_', "remove superfluous compound language translations", new CLDRFilter() {
1154 //            private CLDRFile resolved;
1155 //
1156 //            public void handleStart() {
1157 //                resolved = factory.make(cldrFileToFilter.getLocaleID(), true);
1158 //            }
1159 //
1160 //            public void handlePath(String xpath) {
1161 //                if (!xpath.contains("_")) return;
1162 //                if (!xpath.contains("/language")) return;
1163 //                XPathParts parts = XPathParts.getFrozenInstance(xpath);
1164 //                String languageCode = parts.findAttributeValue("language", "type");
1165 //                String v = resolved.getStringValue(xpath);
1166 //                if (v.equals(languageCode)) {
1167 //                    remove(xpath, "same as language code");
1168 //                    return;
1169 //                }
1170 //                String generatedTranslation = resolved.getName(languageCode, true);
1171 //                if (v.equals(generatedTranslation)) {
1172 //                    remove(xpath, "superfluous compound language");
1173 //                }
1174 //                String spacelessGeneratedTranslation = generatedTranslation.replace(" ", "");
1175 //                if (v.equals(spacelessGeneratedTranslation)) {
1176 //                    remove(xpath, "superfluous compound language (after removing space)");
1177 //                }
1178 //            }
1179 //        });
1180 
1181         fixList.add('l', "change language code", new CLDRFilter() {
1182             private CLDRFile resolved;
1183 
1184             public void handleStart() {
1185                 resolved = factory.make(cldrFileToFilter.getLocaleID(), true);
1186             }
1187 
1188             public void handlePath(String xpath) {
1189                 if (!xpath.contains("/language")) {
1190                     return;
1191                 }
1192                 XPathParts parts = XPathParts.getInstance(xpath); // not frozen, for setAttribute
1193                 String languageCode = parts.findAttributeValue("language", "type");
1194                 String v = resolved.getStringValue(xpath);
1195                 if (!languageCode.equals("swc")) {
1196                     return;
1197                 }
1198                 parts.setAttribute("language", "type", "sw_CD");
1199                 replace(xpath, parts.toString(), v);
1200             }
1201         });
1202 
1203 //        fixList.add('m', "remove multiple alt-variants", new CLDRFilter() {
1204 //
1205 //            public void handleStart() {
1206 //            }
1207 //
1208 //            public void handlePath(String xpath) {
1209 //                XPathParts parts = XPathParts.getFrozenInstance(xpath);
1210 //                if (!parts.containsAttributeValue("alt", "variant")) {
1211 //                    return;
1212 //                }
1213 //                String variantValue = cldrFileToFilter.getStringValue(xpath);
1214 //                String nonVariantXpath = xpath.replaceAll("\\[\\@alt=\"variant\"\\]", "");
1215 //                String nonVariantValue = cldrFileToFilter.getStringValue(nonVariantXpath);
1216 //                if (variantValue.equals(nonVariantValue)) {
1217 //                    remove(xpath, "removing superfluous alt-variant value");
1218 //                }
1219 //            }
1220 //        });
1221 
1222         fixList.add('g', "Swap alt/non-alt values for Czechia", new CLDRFilter() {
1223 
1224             public void handleStart() {
1225             }
1226 
1227             public void handlePath(String xpath) {
1228                 XPathParts parts = XPathParts.getFrozenInstance(xpath);
1229                 if (!parts.containsAttributeValue("alt", "variant") || !parts.containsAttributeValue("type", "CZ")) {
1230                     return;
1231                 }
1232                 String variantValue = cldrFileToFilter.getStringValue(xpath);
1233                 String nonVariantXpath = xpath.replaceAll("\\[\\@alt=\"variant\"\\]", "");
1234                 String nonVariantValue = cldrFileToFilter.getStringValue(nonVariantXpath);
1235                 replace(xpath, xpath, nonVariantValue);
1236                 replace(nonVariantXpath, nonVariantXpath, variantValue);
1237             }
1238         });
1239 
1240         fixList.add('u', "fix duration unit patterns", new CLDRFilter() {
1241 
1242             public void handlePath(String xpath) {
1243                 if (!xpath.contains("/units")) {
1244                     return;
1245                 }
1246                 if (!xpath.contains("/durationUnitPattern")) {
1247                     return;
1248                 }
1249 
1250                 String value = cldrFileToFilter.getStringValue(xpath);
1251                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1252 
1253                 XPathParts parts = XPathParts.getFrozenInstance(fullXPath);
1254                 String unittype = parts.findAttributeValue("durationUnit", "type");
1255 
1256                 String newFullXpath = "//ldml/units/durationUnit[@type=\"" + unittype + "\"]/durationUnitPattern";
1257                 replace(fullXPath, newFullXpath, value, "converting to new duration unit structure");
1258             }
1259         });
1260 
1261         fixList.add('a', "Fix 0/1", new CLDRFilter() {
1262             final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
1263             PluralInfo info;
1264 
1265             @Override
1266             public void handleStart() {
1267                 info = SupplementalDataInfo.getInstance().getPlurals(super.localeID);
1268             }
1269 
1270             @Override
1271             public void handlePath(String xpath) {
1272 
1273                 if (xpath.indexOf("count") < 0) {
1274                     return;
1275                 }
1276                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1277                 XPathParts parts = XPathParts.getInstance(fullpath); // not frozen, for setAttribute
1278                 String countValue = parts.getAttributeValue(-1, "count");
1279                 if (!DIGITS.containsAll(countValue)) {
1280                     return;
1281                 }
1282                 int intValue = Integer.parseInt(countValue);
1283                 Count count = info.getCount(intValue);
1284                 parts.setAttribute(-1, "count", count.toString());
1285                 String newPath = parts.toString();
1286                 String oldValue = cldrFileToFilter.getStringValue(newPath);
1287                 String value = cldrFileToFilter.getStringValue(xpath);
1288                 if (oldValue != null) {
1289                     String fixed = oldValue.replace("{0}", countValue);
1290                     if (value.equals(oldValue)
1291                         || value.equals(fixed)) {
1292                         remove(fullpath, "Superfluous given: "
1293                             + count + "→«" + oldValue + "»");
1294                     } else {
1295                         remove(fullpath, "Can’t replace: "
1296                             + count + "→«" + oldValue + "»");
1297                     }
1298                     return;
1299                 }
1300                 replace(fullpath, newPath, value, "Moving 0/1");
1301             }
1302         });
1303 
1304         fixList.add('b', "Prep for bulk import", new CLDRFilter() {
1305 
1306             public void handlePath(String xpath) {
1307                 if (!options[USER].doesOccur) {
1308                     return;
1309                 }
1310                 String userID = options[USER].value;
1311                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1312                 String value = cldrFileToFilter.getStringValue(xpath);
1313                 XPathParts parts = XPathParts.getInstance(fullpath); // not frozen, for addAttribute
1314                 parts.addAttribute("draft", "unconfirmed");
1315                 parts.addAttribute("alt", "proposed-u" + userID + "-implicit1.8");
1316                 String newPath = parts.toString();
1317                 replace(fullpath, newPath, value);
1318             }
1319         });
1320 
1321         fixList.add('c', "Fix transiton from an old currency code to a new one", new CLDRFilter() {
1322             public void handlePath(String xpath) {
1323                 String oldCurrencyCode = "VEF";
1324                 String newCurrencyCode = "VES";
1325                 int fromDate = 2008;
1326                 int toDate = 2018;
1327                 String leadingParenString = " (";
1328                 String trailingParenString = ")";
1329                 String separator = "\u2013";
1330                 String languageTag = "root";
1331 
1332                 if (xpath.indexOf("/currency[@type=\"" + oldCurrencyCode + "\"]/displayName") < 0) {
1333                     return;
1334                 }
1335                 String value = cldrFileToFilter.getStringValue(xpath);
1336                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1337                 String newFullXPath = fullXPath.replace(oldCurrencyCode, newCurrencyCode);
1338                 cldrFileToFilter.add(newFullXPath, value);
1339 
1340                 // Exceptions for locales that use an alternate numbering system or a different format for the dates at
1341                 // the end.
1342                 // Add additional ones as necessary
1343                 String localeID = cldrFileToFilter.getLocaleID();
1344                 if (localeID.equals("ne")) {
1345                     languageTag = "root-u-nu-deva";
1346                 } else if (localeID.equals("bn")) {
1347                     languageTag = "root-u-nu-beng";
1348                 } else if (localeID.equals("ar")) {
1349                     leadingParenString = " - ";
1350                     trailingParenString = "";
1351                 } else if (localeID.equals("fa")) {
1352                     languageTag = "root-u-nu-arabext";
1353                     separator = Utility.unescape(" \\u062A\\u0627 ");
1354                 }
1355 
1356                 NumberFormat nf = NumberFormat.getInstance(ULocale.forLanguageTag(languageTag));
1357                 nf.setGroupingUsed(false);
1358 
1359                 String tagString = leadingParenString + nf.format(fromDate) + separator + nf.format(toDate)
1360                 + trailingParenString;
1361 
1362                 replace(fullXPath, fullXPath, value + tagString);
1363 
1364             }
1365         });
1366 
1367         fixList.add('p', "input-processor", new CLDRFilter() {
1368             private DisplayAndInputProcessor inputProcessor;
1369 
1370             public void handleStart() {
1371                 inputProcessor = new DisplayAndInputProcessor(cldrFileToFilter, true);
1372             }
1373 
1374             public void handleEnd() {
1375                 inputProcessor = null; // clean up, just in case
1376             }
1377 
1378             public void handlePath(String xpath) {
1379                 String value = cldrFileToFilter.getStringValue(xpath);
1380                 if (!value.equals(value.trim())) {
1381                     value = value; // for debugging
1382                 }
1383                 String newValue = inputProcessor.processInput(xpath, value, null);
1384                 if (value.equals(newValue)) {
1385                     return;
1386                 }
1387                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1388                 replace(fullXPath, fullXPath, newValue);
1389             }
1390         });
1391 
1392         fixList.add('t', "Fix missing count values groups", new CLDRFilter() {
1393 
1394             public void handlePath(String xpath) {
1395                 if (xpath.indexOf("@count=\"other\"") < 0) {
1396                     return;
1397                 }
1398 
1399                 String value = cldrFileToFilter.getStringValue(xpath);
1400                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1401                 String[] missingCounts = { "one" };
1402                 for (String count : missingCounts) {
1403                     String newFullXPath = fullXPath.replace("other", count);
1404                     if (cldrFileToFilter.getWinningValue(newFullXPath) == null) {
1405                         add(newFullXPath, value, "Adding missing plural form");
1406                     }
1407                 }
1408 
1409             }
1410         });
1411 
1412         fixList.add('f', "NFC (all but transforms, exemplarCharacters, pc, sc, tc, qc, ic)", new CLDRFilter() {
1413             public void handlePath(String xpath) {
1414                 if (xpath.indexOf("/segmentation") >= 0
1415                     || xpath.indexOf("/transforms") >= 0
1416                     || xpath.indexOf("/exemplarCharacters") >= 0
1417                     || xpath.indexOf("/pc") >= 0
1418                     || xpath.indexOf("/sc") >= 0
1419                     || xpath.indexOf("/tc") >= 0
1420                     || xpath.indexOf("/qc") >= 0
1421                     || xpath.indexOf("/ic") >= 0) return;
1422                 String value = cldrFileToFilter.getStringValue(xpath);
1423                 String nfcValue = Normalizer.compose(value, false);
1424                 if (value.equals(nfcValue)) return;
1425                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1426                 replace(fullXPath, fullXPath, nfcValue);
1427             }
1428         });
1429 
1430         fixList.add('v', "remove illegal codes", new CLDRFilter() {
1431 
1432             /*
1433              * Set legalCurrencies;
1434              * }
1435              * {
1436              * StandardCodes sc = StandardCodes.make();
1437              * legalCurrencies = new TreeSet(sc.getAvailableCodes("currency"));
1438              * // first remove non-ISO
1439              * for (Iterator it = legalCurrencies.iterator(); it.hasNext();) {
1440              * String code = (String) it.next();
1441              * List data = sc.getFullData("currency", code);
1442              * if ("X".equals(data.get(3))) it.remove();
1443              * }
1444              * }
1445              */
1446             StandardCodes sc = StandardCodes.make();
1447             String[] codeTypes = { "language", "script", "territory", "currency" };
1448 
1449             public void handlePath(String xpath) {
1450                 if (xpath.indexOf("/currency") < 0
1451                     && xpath.indexOf("/timeZoneNames") < 0
1452                     && xpath.indexOf("/localeDisplayNames") < 0) return;
1453                 XPathParts parts = XPathParts.getFrozenInstance(xpath);
1454                 String code;
1455                 for (int i = 0; i < codeTypes.length; ++i) {
1456                     code = parts.findAttributeValue(codeTypes[i], "type");
1457                     if (code != null) {
1458                         if (!sc.getGoodAvailableCodes(codeTypes[i]).contains(code)) remove(xpath);
1459                         return;
1460                     }
1461                 }
1462                 code = parts.findAttributeValue("zone", "type");
1463                 if (code != null) {
1464                     if (code.indexOf("/GMT") >= 0) remove(xpath);
1465                 }
1466 
1467             }
1468         });
1469 
1470         fixList.add('w', "fix alt='...proposed' when there is no alternative", new CLDRFilter() {
1471             private Set<String> newFullXPathSoFar = new HashSet<String>();
1472 
1473             public void handlePath(String xpath) {
1474                 if (xpath.indexOf("proposed") < 0) return;
1475                 String fullXPath = cldrFileToFilter.getFullXPath(xpath);
1476                 XPathParts parts = XPathParts.getInstance(fullXPath); // not frozen, for removeProposed
1477                 String newFullXPath = parts.removeProposed().toString();
1478                 // now see if there is an uninherited value
1479                 String value = cldrFileToFilter.getStringValue(xpath);
1480                 String baseValue = cldrFileToFilter.getStringValue(newFullXPath);
1481                 if (baseValue != null) {
1482                     // if the value AND the fullxpath are the same as what we have, then delete
1483                     if (value.equals(baseValue)) {
1484                         String baseFullXPath = cldrFileToFilter.getFullXPath(newFullXPath);
1485                         if (baseFullXPath.equals(newFullXPath)) {
1486                             remove(xpath, "alt=base");
1487                         }
1488                     }
1489                     return; // there is, so skip
1490                 }
1491                 // there isn't, so modif if we haven't done so already
1492                 if (!newFullXPathSoFar.contains(newFullXPath)) {
1493                     replace(fullXPath, newFullXPath, value);
1494                     newFullXPathSoFar.add(newFullXPath);
1495                 }
1496             }
1497         });
1498         /*
1499          * Fix id to be identical to skeleton
1500          * Eliminate any single-field ids
1501          * Add "L" (stand-alone month), "?" (other stand-alones)
1502          * Remove any fields with both a date and a time
1503          * Test that datetime format is valid format (will have to fix by hand)
1504          * Map k, K to H, h
1505          *
1506          * In Survey Tool: don't show id; compute when item added or changed
1507          * test validity
1508          */
1509 
1510         fixList.add('d', "fix dates", new CLDRFilter() {
1511             DateTimePatternGenerator dateTimePatternGenerator = DateTimePatternGenerator.getEmptyInstance();
1512             DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
1513             Map<String, Set<String>> seenSoFar = new HashMap<String, Set<String>>();
1514 
1515             public void handleStart() {
1516                 seenSoFar.clear();
1517             }
1518 
1519             public void handlePath(String xpath) {
1520                 // timeFormatLength type="full"
1521                 if (xpath.contains("timeFormatLength") && xpath.contains("full")) {
1522                     String fullpath = cldrFileToFilter.getFullXPath(xpath);
1523                     String value = cldrFileToFilter.getStringValue(xpath);
1524                     boolean gotChange = false;
1525                     List<Object> list = formatParser.set(value).getItems();
1526                     for (int i = 0; i < list.size(); ++i) {
1527                         Object item = list.get(i);
1528                         if (item instanceof DateTimePatternGenerator.VariableField) {
1529                             String itemString = item.toString();
1530                             if (itemString.charAt(0) == 'z') {
1531                                 list.set(i, new VariableField(Utility.repeat("v", itemString.length())));
1532                                 gotChange = true;
1533                             }
1534                         }
1535                     }
1536                     if (gotChange) {
1537                         String newValue = toStringWorkaround();
1538                         if (value != newValue) {
1539                             replace(xpath, fullpath, newValue);
1540                         }
1541                     }
1542                 }
1543                 if (xpath.indexOf("/availableFormats") < 0) {
1544                     return;
1545                 }
1546                 String value = cldrFileToFilter.getStringValue(xpath);
1547                 if (value == null) {
1548                     return; // not in current file
1549                 }
1550 
1551                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1552                 XPathParts fullparts = XPathParts.getFrozenInstance(fullpath);
1553                 Map<String, String> attributes = fullparts.findAttributes("dateFormatItem");
1554                 String id = attributes.get("id");
1555                 String oldID = id;
1556                 try {
1557                     id = dateTimePatternGenerator.getBaseSkeleton(id);
1558                     if (id.equals(oldID)) {
1559                         return;
1560                     }
1561                     System.out.println(oldID + " => " + id);
1562                 } catch (RuntimeException e) {
1563                     id = "[error]";
1564                     return;
1565                 }
1566 
1567                 attributes.put("id", id);
1568                 totalSkeletons.add(id);
1569 
1570                 replace(xpath, fullparts.toString(), value);
1571             }
1572 
1573             private String toStringWorkaround() {
1574                 StringBuffer result = new StringBuffer();
1575                 List<Object> items = formatParser.getItems();
1576                 for (int i = 0; i < items.size(); ++i) {
1577                     Object item = items.get(i);
1578                     if (item instanceof String) {
1579                         result.append(formatParser.quoteLiteral((String) items.get(i)));
1580                     } else {
1581                         result.append(items.get(i).toString());
1582                     }
1583                 }
1584                 return result.toString();
1585             }
1586 
1587         });
1588 
1589         fixList.add('y', "fix years to be y (with exceptions)", new CLDRFilter() {
1590             DateTimeCanonicalizer dtc = new DateTimeCanonicalizer(true);
1591 
1592             DateTimePatternGenerator dateTimePatternGenerator = DateTimePatternGenerator.getEmptyInstance();
1593             DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
1594             Map<String, Set<String>> seenSoFar = new HashMap<String, Set<String>>();
1595 
1596             public void handleStart() {
1597                 seenSoFar.clear();
1598             }
1599 
1600             public void handlePath(String xpath) {
1601                 DateTimePatternType datetimePatternType = DateTimePatternType.fromPath(xpath);
1602 
1603                 // check to see if we need to change the value
1604 
1605                 if (!DateTimePatternType.STOCK_AVAILABLE_INTERVAL_PATTERNS.contains(datetimePatternType)) {
1606                     return;
1607                 }
1608                 String oldValue = cldrFileToFilter.getStringValue(xpath);
1609                 String value = dtc.getCanonicalDatePattern(xpath, oldValue, datetimePatternType);
1610 
1611                 String fullPath = cldrFileToFilter.getFullXPath(xpath);
1612                 // Deleted code to canonicalize id for availableFormats items (cldrbug 5760)
1613 
1614                 if (value.equals(oldValue)) {
1615                     return;
1616                 }
1617 
1618                 // made it through the gauntlet, so replace
1619 
1620                 replace(xpath, fullPath, value);
1621             }
1622         });
1623 
1624         // This should only be applied to specific locales, and the results checked manually afterward.
1625         // It will only create ranges using the same digits as in root, not script-specific digits.
1626         // Any pre-existing year ranges should use the range marker from the intervalFormats "y" item.
1627         // This make several assumptions and is somewhat *FRAGILE*.
1628         fixList.add('j', "add year ranges from root to Japanese calendar eras", new CLDRFilter() {
1629             private CLDRFile rootFile;
1630 
1631             public void handleStart() {
1632                 rootFile = factory.make("root", false);
1633             }
1634 
1635             public void handlePath(String xpath) {
1636                 // Skip paths we don't care about
1637                 if (xpath.indexOf("/calendar[@type=\"japanese\"]/eras/era") < 0) return;
1638                 // Get root name for the era, check it
1639                 String rootEraValue = rootFile.getStringValue(xpath);
1640                 int rootEraIndex = rootEraValue.indexOf(" (");
1641                 if (rootEraIndex < 0) return; // this era does not have a year range in root, no need to add one in this
1642                 // locale
1643                 // Get range marker from intervalFormat range for y
1644                 String yearIntervalFormat = cldrFileToFilter
1645                     .getStringValue(
1646                         "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"y\"]/greatestDifference[@id=\"y\"]");
1647                 if (yearIntervalFormat == null) return; // oops, no intervalFormat data for y
1648                 String rangeMarker = yearIntervalFormat.replaceAll("[.y\u5E74\uB144]", ""); // *FRAGILE* strip out
1649                 // everything except the
1650                 // range-indicating part
1651                 // Get current locale name for this era, check it
1652                 String eraValue = cldrFileToFilter.getStringValue(xpath);
1653                 if (eraValue.indexOf('(') >= 0 && eraValue.indexOf(rangeMarker) >= 0) return; // this eraValue already
1654                 // has a year range that
1655                 // uses the appropriate
1656                 // rangeMarker
1657                 // Now update the root year range it with the rangeMarker for this locale, and append it to this
1658                 // locale's name
1659                 String rootYearRange = rootEraValue.substring(rootEraIndex);
1660                 String appendYearRange = rootYearRange.replaceAll("[\u002D\u2013]", rangeMarker);
1661                 String newEraValue = eraValue.concat(appendYearRange);
1662                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1663                 replace(xpath, fullpath, newEraValue);
1664                 // System.out.println("CLDRModify fj: rootEraValue: \"" + rootEraValue + "\", eraValue: \"" + eraValue +
1665                 // "\", rangeMarker: \"" + rangeMarker + "\"");
1666             }
1667         });
1668 
1669         fixList.add('r', "fix references and standards", new CLDRFilter() {
1670             int currentRef = 500;
1671             Map<String, TreeMap<String, String>> locale_oldref_newref = new TreeMap<String, TreeMap<String, String>>();
1672             TreeMap<String, String> oldref_newref;
1673 
1674             //LanguageTagParser ltp = new LanguageTagParser();
1675 
1676             // References standards = new References(true);
1677             // References references = new References(false);
1678 
1679             public void handleStart() {
1680                 String locale = cldrFileToFilter.getLocaleID();
1681                 oldref_newref = locale_oldref_newref.get(locale);
1682                 if (oldref_newref == null) {
1683                     oldref_newref = new TreeMap<String, String>();
1684                     locale_oldref_newref.put(locale, oldref_newref);
1685                 }
1686             }
1687 
1688             // // Samples:
1689             // // <language type="ain" references="RP1">阿伊努文</language>
1690             // // <reference type="R1" uri="http://www.info.gov.hk/info/holiday_c.htm">二零零五年公眾假期刊登憲報</reference>
1691             public void handlePath(String xpath) {
1692                 // must be minimised for this to work.
1693                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1694                 if (!fullpath.contains("reference")) return;
1695                 String value = cldrFileToFilter.getStringValue(xpath);
1696                 XPathParts fullparts = XPathParts.getInstance(fullpath); // can't be frozen
1697                 if ("reference".equals(fullparts.getElement(-1))) {
1698                     fixType(value, "type", fullpath, fullparts);
1699                 } else if (fullparts.getAttributeValue(-1, "references") != null) {
1700                     fixType(value, "references", fullpath, fullparts);
1701                 } else {
1702                     System.out.println("CLDRModify: Skipping: " + xpath);
1703                 }
1704             }
1705 
1706             /**
1707              *
1708              * @param value
1709              * @param type
1710              * @param oldFullPath
1711              * @param fullparts the XPathParts -- must not be frozen, for addAttribute
1712              */
1713             private void fixType(String value, String type, String oldFullPath, XPathParts fullparts) {
1714                 String ref = fullparts.getAttributeValue(-1, type);
1715                 if (whitespace.containsSome(ref)) {
1716                     throw new IllegalArgumentException("Whitespace in references");
1717                 }
1718                 String newRef = getNewRef(ref);
1719                 fullparts.addAttribute(type, newRef);
1720                 replace(oldFullPath, fullparts.toString(), value);
1721             }
1722 
1723             private String getNewRef(String ref) {
1724                 String newRef = oldref_newref.get(ref);
1725                 if (newRef == null) {
1726                     newRef = String.valueOf(currentRef++);
1727                     newRef = "R" + Utility.repeat("0", (3 - newRef.length())) + newRef;
1728                     oldref_newref.put(ref, newRef);
1729                 }
1730                 return newRef;
1731             }
1732         });
1733 
1734         fixList.add('q', "fix annotation punctuation", new CLDRFilter() {
1735             @Override
1736             public void handlePath(String xpath) {
1737                 if (!xpath.contains("/annotation")) {
1738                     return;
1739                 }
1740                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1741                 XPathParts parts = XPathParts.getInstance(fullpath); // not frozen, for putAttributeValue
1742                 String cp = parts.getAttributeValue(2, "cp");
1743                 String tts = parts.getAttributeValue(2, "tts");
1744                 String type = parts.getAttributeValue(2, "type");
1745                 if ("tts".equals(type)) {
1746                     return; // ok, skip
1747                 }
1748                 String hex = "1F600";
1749                 if (cp.startsWith("[")) {
1750                     UnicodeSet us = new UnicodeSet(cp);
1751                     if (us.size() == 1) {
1752                         cp = us.iterator().next();
1753                         hex = Utility.hex(cp);
1754                     } else {
1755                         hex = us.toString();
1756                     }
1757                     parts.putAttributeValue(2, "cp", cp);
1758                 }
1759                 parts.removeAttribute(2, "tts");
1760                 if (tts != null) {
1761                     String newTts = CldrUtility.join(COMMA_SEMI.splitToList(tts), ", ");
1762                     XPathParts parts2 = parts.cloneAsThawed();
1763                     parts2.putAttributeValue(2, "type", "tts");
1764                     add(parts2.toString(), newTts, "separate tts");
1765                 }
1766                 String value = cldrFileToFilter.getStringValue(xpath);
1767                 String newValue = CldrUtility.join(COMMA_SEMI.splitToList(value), " | ");
1768                 final String newFullPath = parts.toString();
1769                 Comments comments = cldrFileToFilter.getXpath_comments();
1770                 String comment = comments.removeComment(CommentType.PREBLOCK, xpath);
1771                 comment = hex + (comment == null ? "" : " " + comment);
1772                 comments.addComment(CommentType.PREBLOCK, newFullPath, comment);
1773                 if (!fullpath.equals(newFullPath) || !value.equals(newValue)) {
1774                     replace(fullpath, newFullPath, newValue);
1775                 }
1776             }
1777         });
1778 
1779         fixList.add('Q', "add annotation names to keywords", new CLDRFilter() {
1780             Set<String> available = Annotations.getAvailable();
1781             TreeSet<String> sorted = new TreeSet<>(Collator.getInstance(ULocale.ROOT));
1782             CLDRFile resolved;
1783 
1784             @Override
1785             public void handleStart() {
1786                 String localeID = cldrFileToFilter.getLocaleID();
1787                 if (!available.contains(localeID)) {
1788                     throw new IllegalArgumentException("no annotations available, probably wrong directory");
1789                 }
1790                 ;
1791                 resolved = factory.make(localeID, true);
1792             }
1793 
1794             @Override
1795             public void handlePath(String xpath) {
1796                 if (!xpath.contains("/annotation")) {
1797                     return;
1798                 }
1799                 //      <annotation cp="��">100 | honderd | persent | telling | vol</annotation>
1800                 //      <annotation cp="��" type="tts">honderd punte</annotation>
1801                 //      we will copy honderd punte into the list of keywords.
1802                 String fullpath = cldrFileToFilter.getFullXPath(xpath);
1803                 XPathParts parts = XPathParts.getFrozenInstance(fullpath);
1804                 String type = parts.getAttributeValue(2, "type");
1805                 if (type == null) {
1806                     return; // no TTS, so keywords, skip
1807                 }
1808 
1809                 XPathParts keywordParts = parts.cloneAsThawed().removeAttribute(2, "type");
1810                 String keywordPath = keywordParts.toString();
1811                 String rawKeywordValue = cldrFileToFilter.getStringValue(keywordPath);
1812 
1813                 // skip if keywords AND name are inherited
1814                 if (rawKeywordValue == null || rawKeywordValue.equals(CldrUtility.INHERITANCE_MARKER)) {
1815                     String rawName = cldrFileToFilter.getStringValue(xpath);
1816                     if (rawName == null || rawName.equals(CldrUtility.INHERITANCE_MARKER)) {
1817                         return;
1818                     }
1819                 }
1820 
1821                 // skip if the name is not above root
1822                 String nameSourceLocale = resolved.getSourceLocaleID(xpath, null);
1823                 if ("root".equals(nameSourceLocale) || XMLSource.CODE_FALLBACK_ID.equals(nameSourceLocale)) {
1824                     return;
1825                 }
1826 
1827                 String name = resolved.getStringValue(xpath);
1828                 String keywordValue = resolved.getStringValue(keywordPath);
1829                 String sourceLocaleId = resolved.getSourceLocaleID(keywordPath, null);
1830                 sorted.clear();
1831                 sorted.add(name);
1832                 List<String> items;
1833                 if (!sourceLocaleId.equals(XMLSource.ROOT_ID) && !sourceLocaleId.equals(XMLSource.CODE_FALLBACK_ID)) {
1834                     items = Annotations.splitter.splitToList(keywordValue);
1835                     sorted.addAll(items);
1836                 } else {
1837                     int debug = 0;
1838                 }
1839                 DisplayAndInputProcessor.filterCoveredKeywords(sorted);
1840                 String newKeywordValue = CollectionUtilities.join(sorted, " | ");
1841                 if (!newKeywordValue.equals(keywordValue)) {
1842                     replace(keywordPath, keywordPath, newKeywordValue);
1843                 }
1844             }
1845         });
1846 
1847         fixList.add('N', "add number symbols to exemplars", new CLDRFilter() {
1848             CLDRFile resolved;
1849             UnicodeSet numberStuff = new UnicodeSet();
1850             Set<String> seen = new HashSet<>();
1851             Set<String> hackAllowOnly = new HashSet<>();
1852             boolean skip = false;
1853 
1854             @Override
1855             public void handleStart() {
1856                 String localeID = cldrFileToFilter.getLocaleID();
1857                 resolved = factory.make(localeID, true);
1858                 numberStuff.clear();
1859                 seen.clear();
1860                 skip = localeID.equals("root");
1861                 // TODO add return value to handleStart to skip calling handlePath
1862 
1863                 if (NUMBER_SYSTEM_HACK) {
1864                     hackAllowOnly.clear();
1865                     for (NumberingSystem system : NumberingSystem.values()) {
1866                         String numberingSystem = system.path == null ? "latn" : cldrFileToFilter.getStringValue(system.path);
1867                         if (numberingSystem != null) {
1868                             hackAllowOnly.add(numberingSystem);
1869                         }
1870                     }
1871                     int debug = 0;
1872                 }
1873             }
1874 
1875             @Override
1876             public void handlePath(String xpath) {
1877                 // the following doesn't work without NUMBER_SYSTEM_HACK, because there are spurious numbersystems in the data.
1878                 // http://unicode.org/cldr/trac/ticket/10648
1879                 // so using a hack for now in handleEnd
1880                 if (skip || !xpath.startsWith("//ldml/numbers/symbols")) {
1881                     return;
1882                 }
1883 
1884                 // //ldml/numbers/symbols[@numberSystem="latn"]/exponential
1885                 XPathParts parts = XPathParts.getFrozenInstance(xpath);
1886                 String system = parts.getAttributeValue(2, "numberSystem");
1887                 if (system == null) {
1888                     System.err.println("Bogus numberSystem:\t" + cldrFileToFilter.getLocaleID() + " \t" + xpath);
1889                     return;
1890                 } else if (seen.contains(system) || !hackAllowOnly.contains(system)) {
1891                     return;
1892                 }
1893                 seen.add(system);
1894                 UnicodeSet exemplars = resolved.getExemplarsNumeric(system);
1895                 System.out.println("# " + system + " ==> " + exemplars.toPattern(false));
1896                 for (String s : exemplars) {
1897                     numberStuff.addAll(s); // add individual characters
1898                 }
1899             }
1900 
1901             @Override
1902             public void handleEnd() {
1903                 if (!numberStuff.isEmpty()) {
1904                     UnicodeSet current = cldrFileToFilter.getExemplarSet(ExemplarType.numbers, WinningChoice.WINNING);
1905                     if (!numberStuff.equals(current)) {
1906                         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(cldrFileToFilter);
1907                         if (current != null && !current.isEmpty()) {
1908                             numberStuff.addAll(current);
1909                         }
1910                         String path = CLDRFile.getExemplarPath(ExemplarType.numbers);
1911                         String value = daip.getPrettyPrinter().format(numberStuff);
1912                         replace(path, path, value);
1913                     }
1914                 }
1915             }
1916         });
1917 
1918         fixList.add('k',
1919             "fix according to -k config file. Details on http://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config",
1920             new CLDRFilter() {
1921             private Map<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> locale2keyValues;
1922             private LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = new LinkedHashSet<Map<ConfigKeys, ConfigMatch>>();
1923             private Matcher draftMatcher = Pattern.compile("\\[@draft=\"[^\"]+\"]").matcher("");
1924 
1925             @Override
1926             public void handleStart() {
1927                 super.handleStart();
1928                 if (!options[FIX].doesOccur || !options[FIX].value.equals("k")) {
1929                     return;
1930                 }
1931                 if (locale2keyValues == null) {
1932                     fillCache();
1933                 }
1934                 // set up for the specific locale we are dealing with.
1935                 // a small optimization
1936                 String localeId = getLocaleID();
1937                 keyValues.clear();
1938                 for (Entry<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> localeMatcher : locale2keyValues
1939                     .entrySet()) {
1940                     if (localeMatcher.getKey().matches(localeId)) {
1941                         keyValues.addAll(localeMatcher.getValue());
1942                     }
1943                 }
1944                 System.out.println("# Checking entries & adding:\t" + keyValues.size());
1945                 for (Map<ConfigKeys, ConfigMatch> entry : keyValues) {
1946                     ConfigMatch action = entry.get(ConfigKeys.action);
1947                     //ConfigMatch locale = entry.get(ConfigKeys.locale);
1948                     ConfigMatch pathMatch = entry.get(ConfigKeys.path);
1949                     ConfigMatch valueMatch = entry.get(ConfigKeys.value);
1950                     ConfigMatch newPath = entry.get(ConfigKeys.new_path);
1951                     ConfigMatch newValue = entry.get(ConfigKeys.new_value);
1952                     switch (action.action) {
1953                     // we add all the values up front
1954                     case addNew:
1955                     case add:
1956                         if (pathMatch != null || valueMatch != null || newPath == null || newValue == null) {
1957                             throw new IllegalArgumentException(
1958                                 "Bad arguments, must have " +
1959                                     "path==null, value=null, new_path!=null, new_value!=null:\n\t"
1960                                     + entry);
1961                         }
1962                         String newPathString = newPath.getPath(getResolved());
1963                         if (action.action == ConfigAction.add
1964                             || cldrFileToFilter.getStringValue(newPathString) == null) {
1965                             replace(newPathString, newPathString, newValue.exactMatch, "config");
1966                         }
1967                         break;
1968                         // we just check
1969                     case replace:
1970                         if ((pathMatch == null && valueMatch == null) || (newPath == null && newValue == null)) {
1971                             throw new IllegalArgumentException(
1972                                 "Bad arguments, must have " +
1973                                     "(path!=null OR value=null) AND (new_path!=null OR new_value!=null):\n\t"
1974                                     + entry);
1975                         }
1976                         break;
1977                         // For delete, we just check; we'll remove later
1978                     case delete:
1979                         if (newPath != null || newValue != null) {
1980                             throw new IllegalArgumentException("Bad arguments, must have " +
1981                                 "newPath=null, newValue=null"
1982                                 + entry);
1983                         }
1984                         break;
1985                     default: // fall through
1986                         throw new IllegalArgumentException("Internal Error");
1987                     }
1988                 }
1989             }
1990 
1991             private void fillCache() {
1992                 locale2keyValues = new LinkedHashMap<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>>();
1993                 String configFileName = options[KONFIG].value;
1994                 FileProcessor myReader = new FileProcessor() {
1995                     {
1996                         doHash = false;
1997                     }
1998 
1999                     @Override
2000                     protected boolean handleLine(int lineCount, String line) {
2001                         line = line.trim();
2002 //                            if (line.isEmpty()) {
2003 //                                return true;
2004 //                            }
2005                         String[] lineParts = line.split("\\s*;\\s*");
2006                         Map<ConfigKeys, ConfigMatch> keyValue = new EnumMap<ConfigKeys, ConfigMatch>(
2007                             ConfigKeys.class);
2008                         for (String linePart : lineParts) {
2009                             int pos = linePart.indexOf('=');
2010                             if (pos < 0) {
2011                                 throw new IllegalArgumentException(lineCount + ":\t No = in command: «" + linePart + "» in " + line);
2012                             }
2013                             ConfigKeys key = ConfigKeys.valueOf(linePart.substring(0, pos).trim());
2014                             if (keyValue.containsKey(key)) {
2015                                 throw new IllegalArgumentException("Must not have multiple keys: " + key);
2016                             }
2017                             String match = linePart.substring(pos + 1).trim();
2018                             keyValue.put(key, new ConfigMatch(key, match));
2019                         }
2020                         final ConfigMatch locale = keyValue.get(ConfigKeys.locale);
2021                         if (locale == null || keyValue.get(ConfigKeys.action) == null) {
2022                             throw new IllegalArgumentException();
2023                         }
2024 
2025                         LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = locale2keyValues
2026                             .get(locale);
2027                         if (keyValues == null) {
2028                             locale2keyValues.put(locale,
2029                                 keyValues = new LinkedHashSet<Map<ConfigKeys, ConfigMatch>>());
2030                         }
2031                         keyValues.add(keyValue);
2032                         return true;
2033                     }
2034                 };
2035                 myReader.process(CLDRModify.class, configFileName);
2036             }
2037 
2038             @Override
2039             public void handlePath(String xpath) {
2040                 // slow method; could optimize
2041                 for (Map<ConfigKeys, ConfigMatch> entry : keyValues) {
2042                     ConfigMatch pathMatch = entry.get(ConfigKeys.path);
2043                     if (pathMatch != null && !pathMatch.matches(xpath)) {
2044                         continue;
2045                     }
2046                     ConfigMatch valueMatch = entry.get(ConfigKeys.value);
2047                     String value = cldrFileToFilter.getStringValue(xpath);
2048                     if (valueMatch != null && !valueMatch.matches(value)) {
2049                         continue;
2050                     }
2051                     ConfigMatch action = entry.get(ConfigKeys.action);
2052                     switch (action.action) {
2053                     case delete:
2054                         remove(xpath, "config");
2055                         break;
2056                     case replace:
2057                         ConfigMatch newPath = entry.get(ConfigKeys.new_path);
2058                         ConfigMatch newValue = entry.get(ConfigKeys.new_value);
2059 
2060                         String fullpath = cldrFileToFilter.getFullXPath(xpath);
2061                         String draft = "";
2062                         int loc = fullpath.indexOf("[@draft=");
2063                         if (loc >= 0) {
2064                             int loc2 = fullpath.indexOf(']', loc + 7);
2065                             draft = fullpath.substring(loc, loc2 + 1);
2066                         }
2067 
2068                         String modPath = ConfigMatch.getModified(pathMatch, xpath, newPath) + draft;
2069                         String modValue = ConfigMatch.getModified(valueMatch, value, newValue);
2070                         replace(xpath, modPath, modValue, "config");
2071                     }
2072                 }
2073             }
2074         });
2075         fixList.add('i', "fix Identical Children");
2076         fixList.add('o', "check attribute validity");
2077 
2078         /**
2079         Goal is: if value in vxml is ^^^, then add ^^^ to trunk IFF
2080         (a) if there is no value in trunk
2081         (b) the value in trunk = bailey.
2082          */
2083         fixList.add('^', "add inheritance-marked items from vxml to trunk", new CLDRFilter() {
2084             Factory VxmlFactory;
2085             final ArrayList<File> fileList = new ArrayList<>();
2086 
2087             @Override
2088             public void handleStart() {
2089                 if (fileList.isEmpty()) {
2090                     for (String top : Arrays.asList("common/", "seed/")) {
2091                         //for (String leaf : Arrays.asList("main/", "annotations/")) {
2092                         String leaf = sourceInput.contains("annotations") ? "annotations/" : "main/";
2093                         String key = top + leaf;
2094                         fileList.add(new File(CLDRPaths.AUX_DIRECTORY + "voting/" + CLDRFile.GEN_VERSION + "/vxml/" + key));
2095                     }
2096                     VxmlFactory = SimpleFactory.make(fileList.toArray(new File[fileList.size()]), ".*");
2097                 }
2098 
2099                 String localeID = cldrFileToFilter.getLocaleID();
2100 
2101                 CLDRFile vxmlCommonMainFile;
2102                 try {
2103                     vxmlCommonMainFile = VxmlFactory.make(localeID, false);
2104                 } catch (Exception e) {
2105                     System.out.println("#ERROR: VXML file not found for " + localeID + " in " + fileList);
2106                     return;
2107                 }
2108                 CLDRFile resolved = cldrFileToFilter;
2109 
2110                 if (!cldrFileToFilter.isResolved()) {
2111                     resolved = factory.make(cldrFileToFilter.getLocaleID(), true);
2112                 }
2113 
2114                 for (String xpath : vxmlCommonMainFile) {
2115                     if (xpath.contains("/language[@type=\"aa\"")) {
2116                         int debug = 0;
2117                     }
2118                     String vxmlValue = vxmlCommonMainFile.getStringValue(xpath);
2119                     if (vxmlValue == null) {
2120                         continue;
2121                     }
2122                     if (!CldrUtility.INHERITANCE_MARKER.equals(vxmlValue)) {
2123                         continue;
2124                     }
2125 
2126                     String trunkValue = resolved.getStringValue(xpath);
2127                     if (trunkValue != null) {
2128                         String baileyValue = resolved.getBaileyValue(xpath, null, null);
2129                         if (!trunkValue.equals(baileyValue)) {
2130                             continue;
2131                         }
2132                     }
2133                     // at this point, the vxmlValue is ^^^ and the trunk value is either null or == baileyValue
2134 
2135 
2136                     // special hack to avoid combined locale names like //ldml/localeDisplayNames/languages/language[@type="en_AU"][@draft="contributed"]
2137 
2138 //                    if (xpath.startsWith("//ldml/localeDisplayNames/languages/language[@type=") && xpath.contains("_")) {
2139 //                        continue;
2140 //                    }
2141 
2142                     String fullPath = resolved.getFullXPath(xpath); // get the draft status, etc.
2143                     if (fullPath == null) { // debugging
2144                         fullPath = vxmlCommonMainFile.getFullXPath(xpath);
2145                         if (fullPath == null) {
2146                             throw new ICUException("getFullXPath not working for " + localeID + ", " + xpath);
2147                         }
2148                     }
2149                     add(fullPath, vxmlValue, "Add or replace by " + CldrUtility.INHERITANCE_MARKER);
2150                 }
2151             }
2152             @Override
2153             public void handlePath(String xpath) {
2154                 // Everything done in handleStart
2155             }
2156         });
2157 
2158         fixList.add('L', "fix logical groups by adding all the bailey values", new CLDRFilter() {
2159             Set<String> seen = new HashSet<>();
2160             CLDRFile resolved;
2161             boolean skip;
2162             CoverageLevel2 coverageLeveler;
2163 
2164             @Override
2165             public void handleStart() {
2166                 seen.clear();
2167                 resolved = getResolved();
2168                 skip = false;
2169                 coverageLeveler = null;
2170 
2171                 String localeID = cldrFileToFilter.getLocaleID();
2172                 LanguageTagParser ltp = new LanguageTagParser().set(localeID);
2173                 if (!ltp.getRegion().isEmpty() || !ltp.getVariants().isEmpty()) {
2174                     skip = true;
2175                 } else {
2176                     coverageLeveler = CoverageLevel2.getInstance(localeID);
2177                 }
2178             }
2179             @Override
2180             public void handlePath(String xpath) {
2181                 if (skip
2182                     || seen.contains(xpath)
2183                     || coverageLeveler.getLevel(xpath) == Level.COMPREHENSIVE) {
2184                     return;
2185                 }
2186                 Set<String> paths = LogicalGrouping.getPaths(cldrFileToFilter, xpath);
2187                 if (paths.size() < 2) {
2188                     return;
2189                 }
2190                 Set<String> needed = new LinkedHashSet<>();
2191                 for (String path2 : paths) {
2192                     if (path2.equals(xpath)) {
2193                         continue;
2194                     }
2195                     if (cldrFileToFilter.isHere(path2)) {
2196                         continue;
2197                     }
2198                     if (LogicalGrouping.isOptional(cldrFileToFilter, path2)) {
2199                         continue;
2200                     }
2201                     // ok, we have a path missing a value
2202                     needed.add(path2);
2203                 }
2204                 if (needed.isEmpty()) {
2205                     return;
2206                 }
2207                 // we need at least one value
2208 
2209                 // flesh out by adding a bailey value
2210                 // TODO resolve the draft status in a better way
2211                 // For now, get the lowest draft status, and we'll reset everything to that.
2212 
2213                 DraftStatus worstStatus = DraftStatus.contributed; // don't ever add an approved.
2214                 for (String path2 : paths) {
2215                     XPathParts parts = XPathParts.getFrozenInstance(path2);
2216                     String rawStatus = parts.getAttributeValue(-1, "draft");
2217                     if (rawStatus == null) {
2218                         continue;
2219                     }
2220                     DraftStatus df = DraftStatus.forString(rawStatus);
2221                     if (df.compareTo(worstStatus) < 0) {
2222                         worstStatus = df;
2223                     }
2224                 }
2225 
2226                 for (String path2 : paths) {
2227                     String fullPath = resolved.getFullXPath(path2);
2228                     String value = resolved.getStringValue(path2);
2229                     if (LogicalGrouping.isOptional(cldrFileToFilter, path2)
2230                         && !cldrFileToFilter.isHere(path2)) {
2231                         continue;
2232                     }
2233 
2234                     XPathParts fullparts = XPathParts.getInstance(fullPath); // not frozen, for setAttribute
2235                     fullparts.setAttribute(-1, "draft", worstStatus.toString());
2236                     replace(fullPath, fullparts.toString(), value, "Fleshing out bailey to " + worstStatus);
2237                 }
2238                 seen.addAll(paths);
2239             }
2240         });
2241     }
2242 
getLast2Dirs(File sourceDir1)2243     public static String getLast2Dirs(File sourceDir1) {
2244         String[] pathElements = sourceDir1.toString().split("/");
2245         return pathElements[pathElements.length-2] + "/" + pathElements[pathElements.length-1] + "/";
2246     }
2247 
2248 
2249     // references="http://www.stat.fi/tk/tt/luokitukset/lk/kieli_02.html"
2250 
2251     private static class ValuePair {
2252         String value;
2253         String fullxpath;
2254     }
2255 
2256     /**
2257      * Find the set of xpaths that
2258      * (a) have all the same values (if present) in the children
2259      * (b) are absent in the parent,
2260      * (c) are different than what is in the fully resolved parent
2261      * and add them.
2262      */
fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements)2263     static void fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements) {
2264         String key = k.getLocaleID();
2265         if (key.equals("root")) return;
2266         Set<String> availableChildren = cldrFactory.getAvailableWithParent(key, true);
2267         if (availableChildren.size() == 0) return;
2268         Set<String> skipPaths = new HashSet<String>();
2269         Map<String, ValuePair> haveSameValues = new TreeMap<String, ValuePair>();
2270         CLDRFile resolvedFile = cldrFactory.make(key, true);
2271         // get only those paths that are not in "root"
2272         CollectionUtilities.addAll(resolvedFile.iterator(), skipPaths);
2273 
2274         // first, collect all the paths
2275         for (String locale : availableChildren) {
2276             if (locale.indexOf("POSIX") >= 0) continue;
2277             CLDRFile item = cldrFactory.make(locale, false);
2278             for (String xpath : item) {
2279                 if (skipPaths.contains(xpath)) continue;
2280                 // skip certain elements
2281                 if (xpath.indexOf("/identity") >= 0) continue;
2282                 if (xpath.startsWith("//ldml/numbers/currencies/currency")) continue;
2283                 if (xpath.startsWith("//ldml/dates/timeZoneNames/metazone[")) continue;
2284                 if (xpath.indexOf("[@alt") >= 0) continue;
2285                 if (xpath.indexOf("/alias") >= 0) continue;
2286 
2287                 // must be string vale
2288                 ValuePair v1 = new ValuePair();
2289                 v1.value = item.getStringValue(xpath);
2290                 v1.fullxpath = item.getFullXPath(xpath);
2291 
2292                 ValuePair vAlready = haveSameValues.get(xpath);
2293                 if (vAlready == null) {
2294                     haveSameValues.put(xpath, v1);
2295                 } else if (!v1.value.equals(vAlready.value) || !v1.fullxpath.equals(vAlready.fullxpath)) {
2296                     skipPaths.add(xpath);
2297                     haveSameValues.remove(xpath);
2298                 }
2299             }
2300         }
2301         // at this point, haveSameValues is all kosher, so add items
2302         for (String xpath : haveSameValues.keySet()) {
2303             ValuePair v = haveSameValues.get(xpath);
2304             // if (v.value.equals(resolvedFile.getStringValue(xpath))
2305             // && v.fullxpath.equals(resolvedFile.getFullXPath(xpath))) continue;
2306             replacements.add(v.fullxpath, v.value);
2307         }
2308     }
2309 
fixAltProposed()2310     static void fixAltProposed() {
2311         throw new IllegalArgumentException();
2312         // throw out any alt=proposed values that are the same as the main
2313         // HashSet toRemove = new HashSet();
2314         // for (Iterator it = dataSource.iterator(); it.hasNext();) {
2315         // String cpath = (String) it.next();
2316         // if (cpath.indexOf("[@alt=") < 0) continue;
2317         // String cpath2 = getNondraftNonaltXPath(cpath);
2318         // String value = getStringValue(cpath);
2319         // String value2 = getStringValue(cpath2);
2320         // if (!value.equals(value2)) continue;
2321         // // have to worry about cases where the info is not in the value!!
2322         // //fix this; values are the same!!
2323         // String fullpath = getNondraftNonaltXPath(getFullXPath(cpath));
2324         // String fullpath2 = getNondraftNonaltXPath(getFullXPath(cpath2));
2325         // if (!fullpath.equals(fullpath2)) continue;
2326         // Log.logln(getLocaleID() + "\tRemoving redundant alternate: " + getFullXPath(cpath) + " ;\t" + value);
2327         // Log.logln("\t\tBecause of: " + getFullXPath(cpath2) + " ;\t" + value2);
2328         // if (getFullXPath(cpath2).indexOf("[@references=") >= 0) {
2329         // System.out.println("Warning: removing references: " + getFullXPath(cpath2));
2330         // }
2331         // toRemove.add(cpath);
2332         // }
2333         // dataSource.removeAll(toRemove);
2334 
2335     }
2336 
2337     /**
2338      * Perform various fixes
2339      * TODO add options to pick which one.
2340      *
2341      * @param options
2342      * @param config
2343      * @param cldrFactory
2344      */
fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory)2345     private static void fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory) {
2346 
2347         // TODO before modifying, make sure that it is fully resolved.
2348         // then minimize against the NEW parents
2349 
2350         Set<String> removal = new TreeSet<String>(k.getComparator());
2351         CLDRFile replacements = SimpleFactory.makeFile("temp");
2352         fixList.setFile(k, inputOptions, cldrFactory, removal, replacements);
2353 
2354         for (String xpath : k) {
2355             fixList.handlePath(xpath);
2356         }
2357         fixList.handleEnd();
2358 
2359         // remove bad attributes
2360 
2361         if (inputOptions.indexOf('v') >= 0) {
2362             CLDRTest.checkAttributeValidity(k, null, removal);
2363         }
2364 
2365         // raise identical elements
2366 
2367         if (inputOptions.indexOf('i') >= 0) {
2368             fixIdenticalChildren(cldrFactory, k, replacements);
2369         }
2370 
2371         // now do the actions we collected
2372 
2373         if (SHOW_DETAILS) {
2374             if (removal.size() != 0 || !replacements.isEmpty()) {
2375                 if (!removal.isEmpty()) {
2376                     System.out.println("Removals:");
2377                     for (String path : removal) {
2378                         System.out.println(path + " =\t " + k.getStringValue(path));
2379                     }
2380                 }
2381                 if (!replacements.isEmpty()) {
2382                     System.out.println("Additions/Replacements:");
2383                     System.out.println(replacements.toString().replaceAll("\u00A0", "<NBSP>"));
2384                 }
2385             }
2386         }
2387         if (removal.size() != 0) {
2388             k.removeAll(removal, COMMENT_REMOVALS);
2389         }
2390         k.putAll(replacements, CLDRFile.MERGE_REPLACE_MINE);
2391     }
2392 
2393     /**
2394      * Internal
2395      */
testJavaSemantics()2396     public static void testJavaSemantics() {
2397         Collator caseInsensitive = Collator.getInstance(ULocale.ROOT);
2398         caseInsensitive.setStrength(Collator.SECONDARY);
2399         Set<String> setWithCaseInsensitive = new TreeSet<String>(caseInsensitive);
2400         setWithCaseInsensitive.addAll(Arrays.asList(new String[] { "a", "b", "c" }));
2401         Set<String> plainSet = new TreeSet<String>();
2402         plainSet.addAll(Arrays.asList(new String[] { "a", "b", "B" }));
2403         System.out.println("S1 equals S2?\t" + setWithCaseInsensitive.equals(plainSet));
2404         System.out.println("S2 equals S1?\t" + plainSet.equals(setWithCaseInsensitive));
2405         setWithCaseInsensitive.removeAll(plainSet);
2406         System.out.println("S1 removeAll S2 is empty?\t" + setWithCaseInsensitive.isEmpty());
2407     }
2408 
2409     // <localizedPatternChars>GyMdkHmsSEDFwWahKzYeugAZ</localizedPatternChars>
2410     /*
2411      * <localizedPattern>
2412      * <map type="era">G</map>
2413      * <map type="year">y</map>
2414      * <map type="year_iso">Y</map>
2415      * <map type="year_uniform">u</map>
2416      * <map type="month">M</map>
2417      * <map type="week_in_year">w</map>
2418      * <map type="week_in_month">W</map>
2419      * <map type="day">d</map>
2420      * <map type="day_of_year">D</map>
2421      * <map type="day_of_week_in_month">F</map>
2422      * <map type="day_julian">g</map>
2423      * <map type="day_of_week">E</map>
2424      * <map type="day_of_week_local">e</map>
2425      * <map type="period_in_day">a</map>
2426      * <map type="hour_1_12">h</map>
2427      * <map type="hour_0_23">H</map>
2428      * <map type="hour_0_11">K</map>
2429      * <map type="hour_1_24">k</map>
2430      * <map type="minute">m</map>
2431      * <map type="second">s</map>
2432      * <map type="fractions_of_second">S</map>
2433      * <map type="milliseconds_in_day">A</map>
2434      * <map type="timezone">z</map>
2435      * <map type="timezone_gmt">Z</map>
2436      * </localizedPattern>
2437      */
2438 
2439 }
2440