1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004-2013, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import java.io.File; 10 import java.io.PrintWriter; 11 import java.util.ArrayList; 12 import java.util.Arrays; 13 import java.util.EnumMap; 14 import java.util.HashMap; 15 import java.util.HashSet; 16 import java.util.Iterator; 17 import java.util.LinkedHashMap; 18 import java.util.LinkedHashSet; 19 import java.util.List; 20 import java.util.Map; 21 import java.util.Map.Entry; 22 import java.util.Set; 23 import java.util.TreeMap; 24 import java.util.TreeSet; 25 import java.util.regex.Matcher; 26 import java.util.regex.Pattern; 27 28 import org.unicode.cldr.draft.FileUtilities; 29 import org.unicode.cldr.test.CLDRTest; 30 import org.unicode.cldr.test.DisplayAndInputProcessor; 31 import org.unicode.cldr.test.QuickCheck; 32 import org.unicode.cldr.util.Annotations; 33 import org.unicode.cldr.util.CLDRConfig; 34 import org.unicode.cldr.util.CLDRFile; 35 import org.unicode.cldr.util.CLDRFile.DraftStatus; 36 import org.unicode.cldr.util.CLDRFile.ExemplarType; 37 import org.unicode.cldr.util.CLDRFile.NumberingSystem; 38 import org.unicode.cldr.util.CLDRFile.WinningChoice; 39 import org.unicode.cldr.util.CLDRLocale; 40 import org.unicode.cldr.util.CLDRPaths; 41 import org.unicode.cldr.util.CLDRTool; 42 import org.unicode.cldr.util.CldrUtility; 43 import org.unicode.cldr.util.CldrUtility.SimpleLineComparator; 44 import org.unicode.cldr.util.DateTimeCanonicalizer; 45 import org.unicode.cldr.util.DateTimeCanonicalizer.DateTimePatternType; 46 import org.unicode.cldr.util.DtdData; 47 import org.unicode.cldr.util.DtdType; 48 import org.unicode.cldr.util.Factory; 49 import org.unicode.cldr.util.FileProcessor; 50 import org.unicode.cldr.util.LocaleIDParser; 51 import org.unicode.cldr.util.Log; 52 import org.unicode.cldr.util.PathHeader; 53 import org.unicode.cldr.util.PatternCache; 54 import org.unicode.cldr.util.RegexLookup; 55 import org.unicode.cldr.util.SimpleFactory; 56 import org.unicode.cldr.util.StandardCodes; 57 import org.unicode.cldr.util.StringId; 58 import org.unicode.cldr.util.SupplementalDataInfo; 59 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 60 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 61 import org.unicode.cldr.util.UnicodeSetPrettyPrinter; 62 import org.unicode.cldr.util.XMLSource; 63 import org.unicode.cldr.util.XPathParts; 64 import org.unicode.cldr.util.XPathParts.Comments; 65 import org.unicode.cldr.util.XPathParts.Comments.CommentType; 66 67 import com.google.common.base.Splitter; 68 import com.ibm.icu.dev.tool.UOption; 69 import com.ibm.icu.dev.util.CollectionUtilities; 70 import com.ibm.icu.impl.Utility; 71 import com.ibm.icu.text.Collator; 72 import com.ibm.icu.text.DateTimePatternGenerator; 73 import com.ibm.icu.text.DateTimePatternGenerator.VariableField; 74 import com.ibm.icu.text.Normalizer; 75 import com.ibm.icu.text.NumberFormat; 76 import com.ibm.icu.text.UnicodeSet; 77 import com.ibm.icu.text.UnicodeSetIterator; 78 import com.ibm.icu.util.Output; 79 import com.ibm.icu.util.ULocale; 80 81 /** 82 * Tool for applying modifications to the CLDR files. Use -h to see the options. 83 * <p> 84 * There are some environment variables that can be used with the program <br> 85 * -DSHOW_FILES=<anything> shows all create/open of files. 86 */ 87 @CLDRTool(alias = "modify", 88 description = "Tool for applying modifications to the CLDR files. Use -h to see the options.") 89 public class CLDRModify { 90 private static final boolean DEBUG = false; 91 static final String DEBUG_PATHS = null; // ".*currency.*"; 92 static final boolean COMMENT_REMOVALS = false; // append removals as comments 93 static final UnicodeSet whitespace = new UnicodeSet("[:whitespace:]").freeze(); 94 static final UnicodeSet HEX = new UnicodeSet("[a-fA-F0-9]").freeze(); 95 private static final DtdData dtdData = DtdData.getInstance(DtdType.ldml); 96 97 // TODO make this into input option. 98 99 enum ConfigKeys { 100 action, locale, path, value, new_path, new_value 101 } 102 103 enum ConfigAction { 104 /** 105 * Remove a path 106 */ 107 delete, 108 /** 109 * Add a path/value 110 */ 111 add, 112 /** 113 * Replace a path/value. Equals 'add' but tests selected paths 114 */ 115 replace, 116 /** 117 * Add a a path/value. Equals 'add' but tests that path did NOT exist 118 */ 119 addNew, 120 } 121 122 static final class ConfigMatch { 123 final String exactMatch; 124 final Matcher regexMatch; // doesn't have to be thread safe 125 final ConfigAction action; 126 final boolean hexPath; 127 ConfigMatch(ConfigKeys key, String match)128 public ConfigMatch(ConfigKeys key, String match) { 129 if (key == ConfigKeys.action) { 130 exactMatch = null; 131 regexMatch = null; 132 action = ConfigAction.valueOf(match); 133 hexPath = false; 134 } else if (match.startsWith("/") && match.endsWith("/")) { 135 if (key != ConfigKeys.locale && key != ConfigKeys.path && key != ConfigKeys.value) { 136 throw new IllegalArgumentException("Regex only allowed for old path/value."); 137 } 138 exactMatch = null; 139 regexMatch = PatternCache.get(match.substring(1, match.length() - 1) 140 .replace("[@", "\\[@")).matcher(""); 141 action = null; 142 hexPath = false; 143 } else { 144 exactMatch = match; 145 regexMatch = null; 146 action = null; 147 hexPath = (key == ConfigKeys.new_path || key == ConfigKeys.path) 148 && HEX.containsAll(match); 149 } 150 151 } 152 matches(String other)153 public boolean matches(String other) { 154 if (exactMatch == null) { 155 return regexMatch.reset(other).find(); 156 } else if (hexPath) { 157 // convert path to id for comparison 158 return exactMatch.equals(StringId.getHexId(other)); 159 } else { 160 return exactMatch.equals(other); 161 } 162 } 163 toString()164 public String toString() { 165 return action != null ? action.toString() 166 : exactMatch == null ? regexMatch.toString() 167 : hexPath ? "*" + exactMatch + "*" 168 : exactMatch; 169 } 170 getPath(CLDRFile cldrFileToFilter)171 public String getPath(CLDRFile cldrFileToFilter) { 172 if (!hexPath) { 173 return exactMatch; 174 } 175 // ensure that we have all the possible paths cached 176 String path = StringId.getStringFromHexId(exactMatch); 177 if (path == null) { 178 for (String eachPath : cldrFileToFilter.fullIterable()) { 179 StringId.getHexId(eachPath); 180 } 181 path = StringId.getStringFromHexId(exactMatch); 182 if (path == null) { 183 throw new IllegalArgumentException("No path for hex id: " + exactMatch); 184 } 185 } 186 return path; 187 } 188 getModified(ConfigMatch valueMatch, String value, ConfigMatch newValue)189 public static String getModified(ConfigMatch valueMatch, String value, ConfigMatch newValue) { 190 if (valueMatch == null) { // match anything 191 if (newValue != null && newValue.exactMatch != null) { 192 return newValue.exactMatch; 193 } 194 if (value != null) { 195 return value; 196 } 197 throw new IllegalArgumentException("Can't have both old and new be null."); 198 } else if (valueMatch.exactMatch == null) { // regex 199 if (newValue == null || newValue.exactMatch == null) { 200 throw new IllegalArgumentException("Can't have regex without replacement."); 201 } 202 StringBuffer buffer = new StringBuffer(); 203 valueMatch.regexMatch.appendReplacement(buffer, newValue.exactMatch); 204 return buffer.toString(); 205 } else { 206 return newValue.exactMatch != null ? newValue.exactMatch : value; 207 } 208 } 209 } 210 211 static FixList fixList = new FixList(); 212 213 private static final int HELP1 = 0, 214 HELP2 = 1, 215 SOURCEDIR = 2, 216 DESTDIR = 3, 217 MATCH = 4, 218 JOIN = 5, 219 MINIMIZE = 6, 220 FIX = 7, 221 JOIN_ARGS = 8, 222 VET_ADD = 9, 223 RESOLVE = 10, 224 PATH = 11, 225 USER = 12, 226 ALL_DIRS = 13, 227 CHECK = 14, 228 KONFIG = 15; 229 230 private static final UOption[] options = { 231 UOption.HELP_H(), 232 UOption.HELP_QUESTION_MARK(), 233 UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY), 234 UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "cldrModify/"), 235 UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 236 UOption.create("join", 'j', UOption.OPTIONAL_ARG), 237 UOption.create("minimize", 'r', UOption.NO_ARG), 238 UOption.create("fix", 'f', UOption.OPTIONAL_ARG), 239 UOption.create("join-args", 'i', UOption.OPTIONAL_ARG), 240 UOption.create("vet", 'v', UOption.OPTIONAL_ARG), 241 UOption.create("resolve", 'z', UOption.OPTIONAL_ARG), 242 UOption.create("path", 'p', UOption.REQUIRES_ARG), 243 UOption.create("user", 'u', UOption.REQUIRES_ARG), 244 UOption.create("all", 'a', UOption.REQUIRES_ARG), 245 UOption.create("check", 'c', UOption.NO_ARG), 246 UOption.create("konfig", 'k', UOption.OPTIONAL_ARG).setDefault("modify_config.txt"), 247 }; 248 249 private static final UnicodeSet allMergeOptions = new UnicodeSet("[rcd]"); 250 251 static final String HELP_TEXT1 = "Use the following options" 252 + XPathParts.NEWLINE 253 + "-h or -?\t for this message" 254 + XPathParts.NEWLINE 255 + "-" 256 + options[SOURCEDIR].shortName 257 + "\t source directory. Default = -s" 258 + CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY) 259 + XPathParts.NEWLINE 260 + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\" 261 + XPathParts.NEWLINE 262 + "-" 263 + options[DESTDIR].shortName 264 + "\t destination directory. Default = -d" 265 + CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/") 266 + XPathParts.NEWLINE 267 + "-m<regex>\t to restrict the locales to what matches <regex>" 268 + XPathParts.NEWLINE 269 + "-j<merge_dir>/X'\t to merge two sets of files together (from <source_dir>/X and <merge_dir>/X', " 270 + XPathParts.NEWLINE 271 + "\twhere * in X' is replaced by X)." 272 + XPathParts.NEWLINE 273 + "\tExample:-jC:\\Unicode-CVS2\\cldr\\dropbox\\to_be_merged\\missing\\missing_*" 274 + XPathParts.NEWLINE 275 + "-i\t merge arguments:" 276 + XPathParts.NEWLINE 277 + "\tr\t replace contents (otherwise new data will be draft=\"unconfirmed\")" 278 + XPathParts.NEWLINE 279 + "\tc\t ignore comments in <merge_dir> files" 280 + XPathParts.NEWLINE 281 + "-r\t to minimize the results (removing items that inherit from parent)." 282 + XPathParts.NEWLINE 283 + "-v\t incorporate vetting information, and generate diff files." 284 + XPathParts.NEWLINE 285 + "-z\t generate resolved files" 286 + XPathParts.NEWLINE 287 + "-p\t set path for -fx" 288 + XPathParts.NEWLINE 289 + "-u\t set user for -fb" 290 + XPathParts.NEWLINE 291 + "-a\t pattern: recurse over all subdirectories that match pattern" 292 + XPathParts.NEWLINE 293 + "-c\t check that resulting xml files are valid. Requires that a dtd directory be copied to the output directory, in the appropriate location." 294 + XPathParts.NEWLINE 295 + "-k\t config_file\twith -fk perform modifications according to what is in the config file. For format details, see:" 296 + XPathParts.NEWLINE 297 + "\t\thttp://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config." 298 + XPathParts.NEWLINE 299 + "-f\t to perform various fixes on the files (add following arguments to specify which ones, eg -fxi)" 300 + XPathParts.NEWLINE; 301 302 static final String HELP_TEXT2 = "Note: A set of bat files are also generated in <dest_dir>/diff. They will invoke a comparison program on the results." 303 + XPathParts.NEWLINE; 304 private static final boolean SHOW_DETAILS = false; 305 private static boolean SHOW_PROCESSING = false; 306 307 /** 308 * Picks options and executes. Use -h to see options. 309 */ main(String[] args)310 public static void main(String[] args) throws Exception { 311 long startTime = System.currentTimeMillis(); 312 UOption.parseArgs(args, options); 313 if (options[HELP1].doesOccur || options[HELP2].doesOccur) { 314 System.out.println(HELP_TEXT1 + fixList.showHelp() + HELP_TEXT2); 315 return; 316 } 317 checkSuboptions(options[FIX], fixList.getOptions()); 318 checkSuboptions(options[JOIN_ARGS], allMergeOptions); 319 String recurseOnDirectories = options[ALL_DIRS].value; 320 boolean makeResolved = options[RESOLVE].doesOccur; // Utility.COMMON_DIRECTORY + "main/"; 321 322 // String sourceDir = "C:\\ICU4C\\locale\\common\\main\\"; 323 324 String sourceInput = options[SOURCEDIR].value; 325 String destInput = options[DESTDIR].value; 326 if (recurseOnDirectories != null) { 327 sourceInput = removeSuffix(sourceInput, "main/", "main"); 328 destInput = removeSuffix(destInput, "main/", "main"); 329 } 330 String sourceDirBase = CldrUtility.checkValidDirectory(sourceInput); // Utility.COMMON_DIRECTORY + "main/"; 331 String targetDirBase = CldrUtility.checkValidDirectory(destInput); // Utility.GEN_DIRECTORY + "main/"; 332 System.out.format("Source:\t%s\n", sourceDirBase); 333 System.out.format("Target:\t%s\n", targetDirBase); 334 335 Set<String> dirSet = new TreeSet<String>(); 336 if (recurseOnDirectories == null) { 337 dirSet.add(""); 338 } else { 339 String[] subdirs = new File(sourceDirBase).list(); 340 Matcher subdirMatch = PatternCache.get(recurseOnDirectories).matcher(""); 341 for (String subdir : subdirs) { 342 if (!subdirMatch.reset(subdir).find()) continue; 343 dirSet.add(subdir + "/"); 344 } 345 } 346 for (String dir : dirSet) { 347 String sourceDir = sourceDirBase + dir; 348 if (!new File(sourceDir).isDirectory()) continue; 349 String targetDir = targetDirBase + dir; 350 Log.setLog(targetDir + "/diff", "log.txt"); 351 try { // String[] failureLines = new String[2]; 352 SimpleLineComparator lineComparer = new SimpleLineComparator( 353 // SimpleLineComparator.SKIP_SPACES + 354 SimpleLineComparator.TRIM + 355 SimpleLineComparator.SKIP_EMPTY + 356 SimpleLineComparator.SKIP_CVS_TAGS); 357 358 Factory cldrFactory = Factory.make(sourceDir, ".*"); 359 360 if (options[VET_ADD].doesOccur) { 361 VettingAdder va = new VettingAdder(options[VET_ADD].value); 362 va.showFiles(cldrFactory, targetDir); 363 return; 364 } 365 366 Factory mergeFactory = null; 367 368 String join_prefix = "", join_postfix = ""; 369 if (options[JOIN].doesOccur) { 370 String mergeDir = options[JOIN].value; 371 File temp = new File(mergeDir); 372 mergeDir = CldrUtility.checkValidDirectory(temp.getParent() + File.separator); // Utility.COMMON_DIRECTORY 373 // + "main/"; 374 String filename = temp.getName(); 375 join_prefix = join_postfix = ""; 376 int pos = filename.indexOf("*"); 377 if (pos >= 0) { 378 join_prefix = filename.substring(0, pos); 379 join_postfix = filename.substring(pos + 1); 380 } 381 mergeFactory = Factory.make(mergeDir, ".*"); 382 } 383 /* 384 * Factory cldrFactory = Factory.make(sourceDir, ".*"); 385 * Set testSet = cldrFactory.getAvailable(); 386 * String[] quicktest = new String[] { 387 * "de" 388 * //"ar", "dz_BT", 389 * // "sv", "en", "de" 390 * }; 391 * if (quicktest.length > 0) { 392 * testSet = new TreeSet(Arrays.asList(quicktest)); 393 * } 394 */ 395 Set<String> locales = new TreeSet<String>(cldrFactory.getAvailable()); 396 if (mergeFactory != null) { 397 Set<String> temp = new TreeSet<String>(mergeFactory.getAvailable()); 398 Set<String> locales3 = new TreeSet<String>(); 399 for (String locale : temp) { 400 if (!locale.startsWith(join_prefix) || !locale.endsWith(join_postfix)) continue; 401 locales3.add(locale.substring(join_prefix.length(), locale.length() - join_postfix.length())); 402 } 403 locales.retainAll(locales3); 404 System.out.println("Merging: " + locales3); 405 } 406 new CldrUtility.MatcherFilter(options[MATCH].value).retainAll(locales); 407 408 RetainWhenMinimizing retainIfTrue = null; 409 PathHeader.Factory pathHeaderFactory = null; 410 411 fixList.handleSetup(); 412 413 long lastTime = System.currentTimeMillis(); 414 int spin = 0; 415 System.out.format(locales.size() + " Locales:\t%s\n", locales.toString()); 416 int totalRemoved = 0; 417 for (String test : locales) { 418 spin++; 419 if (SHOW_PROCESSING) { 420 long now = System.currentTimeMillis(); 421 if (now - lastTime > 5000) { 422 System.out.println(" .. still processing " + test + " [" + spin + "/" + locales.size() 423 + "]"); 424 lastTime = now; 425 } 426 } 427 // testJavaSemantics(); 428 429 // TODO parameterize the directory and filter 430 // System.out.println("C:\\ICU4C\\locale\\common\\main\\fr.xml"); 431 432 CLDRFile k = cldrFactory.make(test, makeResolved).cloneAsThawed(); 433 // HashSet<String> set = Builder.with(new HashSet<String>()).addAll(k).get(); 434 // System.out.format("Locale\t%s, Size\t%s\n", test, set.size()); 435 // if (k.isNonInheriting()) continue; // for now, skip supplementals 436 if (DEBUG_PATHS != null) { 437 System.out.println("Debug1 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 438 } 439 // System.out.println(k); 440 // String s1 = 441 // "//ldml/segmentations/segmentation[@type=\"LineBreak\"]/variables/variable[@_q=\"0061\"][@id=\"$CB\"] "; 442 // String s2 = 443 // "//ldml/segmentations/segmentation[@type=\"LineBreak\"]/variables/variable[@_q=\"003A\"][@id=\"$CB\"]"; 444 // System.out.println(k.ldmlComparator.compare(s1, s2)); 445 if (mergeFactory != null) { 446 int mergeOption = CLDRFile.MERGE_ADD_ALTERNATE; 447 CLDRFile toMergeIn = mergeFactory.make(join_prefix + test + join_postfix, false) 448 .cloneAsThawed(); 449 if (toMergeIn != null) { 450 if (options[JOIN_ARGS].doesOccur) { 451 if (options[JOIN_ARGS].value.indexOf("r") >= 0) 452 mergeOption = CLDRFile.MERGE_REPLACE_MY_DRAFT; 453 if (options[JOIN_ARGS].value.indexOf("d") >= 0) 454 mergeOption = CLDRFile.MERGE_REPLACE_MINE; 455 if (options[JOIN_ARGS].value.indexOf("c") >= 0) toMergeIn.clearComments(); 456 if (options[JOIN_ARGS].value.indexOf("x") >= 0) removePosix(toMergeIn); 457 } 458 toMergeIn.makeDraft(DraftStatus.contributed); 459 k.putAll(toMergeIn, mergeOption); 460 } 461 // special fix 462 k.removeComment( 463 " The following are strings that are not found in the locale (currently), but need valid translations for localizing timezones. "); 464 } 465 if (DEBUG_PATHS != null) { 466 System.out.println("Debug2 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 467 } 468 if (options[FIX].doesOccur) { 469 fix(k, options[FIX].value, options[KONFIG].value, cldrFactory); 470 } 471 if (DEBUG_PATHS != null) { 472 System.out.println("Debug3 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 473 } 474 if (options[MINIMIZE].doesOccur) { 475 if (pathHeaderFactory == null) { 476 pathHeaderFactory = PathHeader.getFactory(cldrFactory.make("en", true)); 477 } 478 // TODO, fix identity 479 String parent = LocaleIDParser.getParent(test); 480 if (parent != null) { 481 CLDRFile toRemove = cldrFactory.make(parent, true); 482 // remove the items that are language codes, script codes, or region codes 483 // since they may be real translations. 484 if (parent.equals("root")) { 485 if (k.getFullXPath("//ldml/alias", true) != null) { 486 System.out.println("Skipping completely aliased file: " + test); 487 } else { 488 // k.putRoot(toRemove); 489 } 490 } 491 if (retainIfTrue == null) { 492 retainIfTrue = new RetainWhenMinimizing(); 493 } 494 retainIfTrue.setParentFile(toRemove); 495 List<String> removed = DEBUG ? null : new ArrayList<String>(); 496 k.removeDuplicates(toRemove, COMMENT_REMOVALS, retainIfTrue, removed); 497 if (removed != null && removed.size() != 0) { 498 totalRemoved += removed.size(); 499 Set<PathHeader> sorted = new TreeSet<PathHeader>(); 500 for (String path : removed) { 501 sorted.add(pathHeaderFactory.fromPath(path)); 502 } 503 for (PathHeader pathHeader : sorted) { 504 System.out.println("\t# " + test + "\t" + pathHeader + "\t" + pathHeader.getOriginalPath()); 505 } 506 System.out.println("\t# " + test + "\t# Removed:\t" + removed.size()); 507 } 508 } 509 } 510 // System.out.println(CLDRFile.getAttributeOrder()); 511 512 /* 513 * if (false) { 514 * Map tempComments = k.getXpath_comments(); 515 * 516 * for (Iterator it2 = tempComments.keySet().iterator(); it2.hasNext();) { 517 * String key = (String) it2.next(); 518 * String comment = (String) tempComments.get(key); 519 * Log.logln("Writing extra comment: " + key); 520 * System.out.println(key + "\t comment: " + comment); 521 * } 522 * } 523 */ 524 525 if (DEBUG_PATHS != null) { 526 System.out.println("Debug4 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 527 } 528 529 PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, test + ".xml"); 530 String testPath = "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]"; 531 if (false) { 532 System.out.println("Printing Raw File:"); 533 testPath = "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/alias"; 534 System.out.println(k.getStringValue(testPath)); 535 // System.out.println(k.getFullXPath(testPath)); 536 Iterator it4 = k.iterator(); 537 Set s = CollectionUtilities.addAll(it4, new TreeSet()); 538 539 System.out.println(k.getStringValue(testPath)); 540 // if (true) return; 541 Set orderedSet = new TreeSet(k.getComparator()); 542 CollectionUtilities.addAll(k.iterator(), orderedSet); 543 for (Iterator it3 = orderedSet.iterator(); it3.hasNext();) { 544 String path = (String) it3.next(); 545 // System.out.println(path); 546 if (path.equals(testPath)) { 547 System.out.println("huh?"); 548 } 549 String value = k.getStringValue(path); 550 String fullpath = k.getFullXPath(path); 551 System.out.println("\t=\t" + fullpath); 552 System.out.println("\t=\t" + value); 553 } 554 System.out.println("Done Printing Raw File:"); 555 } 556 557 k.write(pw); 558 // pw.println(); 559 pw.close(); 560 if (options[CHECK].doesOccur) { 561 QuickCheck.check(new File(targetDir, test + ".xml")); 562 } 563 564 // JCE: I don't think anyone really uses the .bat files from CLDRModify any more, since 565 // Eclipse provides a decent file comparison program. You can comment this back in if 566 // you need it, but I found that sometimes having this here clobbers the real output 567 // file, which we definitely don't want. 568 // ToolUtilities.generateBat(sourceDir, test + ".xml", targetDir, test + ".xml", lineComparer); 569 570 /* 571 * boolean ok = Utility.areFileIdentical(sourceDir + test + ".xml", 572 * targetDir + test + ".xml", failureLines, Utility.TRIM + Utility.SKIP_SPACES); 573 * if (!ok) { 574 * System.out.println("Found differences at: "); 575 * System.out.println("\t" + failureLines[0]); 576 * System.out.println("\t" + failureLines[1]); 577 * } 578 */ 579 } 580 if (totalSkeletons.size() != 0) { 581 System.out.println("Total Skeletons" + totalSkeletons); 582 } 583 if (totalRemoved > 0) { 584 System.out.println("# Removed:\t" + totalRemoved); 585 } 586 } finally { 587 fixList.handleCleanup(); 588 Log.close(); 589 System.out.println("Done -- Elapsed time: " + ((System.currentTimeMillis() - startTime) / 60000.0) 590 + " minutes"); 591 } 592 } 593 } 594 removeSuffix(String value, String... suffices)595 private static String removeSuffix(String value, String... suffices) { 596 for (String suffix : suffices) { 597 if (value.endsWith(suffix)) { 598 return value.substring(0, value.length() - suffix.length()); 599 } 600 } 601 return value; 602 } 603 604 /* 605 * Use the coverage to determine what we should keep in the case of a locale just below root. 606 */ 607 608 static class RetainWhenMinimizing implements CLDRFile.RetentionTest { 609 private CLDRFile file; 610 private CLDRLocale c; 611 private boolean isArabicSublocale; 612 // Status status = new Status(); // no need to have, was unused 613 setParentFile(CLDRFile file)614 public RetainWhenMinimizing setParentFile(CLDRFile file) { 615 this.file = file; 616 this.c = CLDRLocale.getInstance(file.getLocaleIDFromIdentity()); 617 isArabicSublocale = "ar".equals(c.getLanguage()) && !"001".equals(c.getCountry()); 618 return this; 619 } 620 621 @Override getRetention(String path)622 public Retention getRetention(String path) { 623 if (path.startsWith("//ldml/identity/")) { 624 return Retention.RETAIN; 625 } 626 // special case for Arabic 627 if (isArabicSublocale && path.startsWith("//ldml/numbers/defaultNumberingSystem")) { 628 return Retention.RETAIN; 629 } 630 String localeId = file.getSourceLocaleID(path, null); 631 if ((c.isLanguageLocale() || c.equals(CLDRLocale.getInstance("pt_PT"))) 632 && (XMLSource.ROOT_ID.equals(localeId) || XMLSource.CODE_FALLBACK_ID.equals(localeId))) { 633 return Retention.RETAIN; 634 } 635 return Retention.RETAIN_IF_DIFFERENT; 636 } 637 }; 638 639 static final Splitter COMMA_SEMI = Splitter.on(Pattern.compile("[,;|]")).trimResults().omitEmptyStrings(); 640 protected static final boolean NUMBER_SYSTEM_HACK = true; 641 642 /** 643 * 644 */ checkSuboptions(UOption givenOptions, UnicodeSet allowedOptions)645 private static void checkSuboptions(UOption givenOptions, UnicodeSet allowedOptions) { 646 if (givenOptions.doesOccur && !allowedOptions.containsAll(givenOptions.value)) { 647 throw new IllegalArgumentException("Illegal sub-options for " 648 + givenOptions.shortName 649 + ": " 650 + new UnicodeSet().addAll(givenOptions.value).removeAll(allowedOptions) 651 + CldrUtility.LINE_SEPARATOR + "Use -? for help."); 652 } 653 } 654 655 /** 656 * 657 */ removePosix(CLDRFile toMergeIn)658 private static void removePosix(CLDRFile toMergeIn) { 659 Set<String> toRemove = new HashSet<String>(); 660 for (String xpath : toMergeIn) { 661 if (xpath.startsWith("//ldml/posix")) toRemove.add(xpath); 662 } 663 toMergeIn.removeAll(toRemove, false); 664 } 665 666 // private static class References { 667 // static Map<String,Map<String,String>> locale_oldref_newref = new TreeMap<String,Map<String,String>>(); 668 // 669 // static String[][] keys = {{"standard", "S", "[@standard=\"true\"]"}, {"references", "R", ""}}; 670 // UnicodeSet digits = new UnicodeSet("[0-9]"); 671 // int referenceCounter = 0; 672 // Map references_token = new TreeMap(); 673 // Set tokenSet = new HashSet(); 674 // String[] keys2; 675 // boolean isStandard; 676 // References(boolean standard) { 677 // isStandard = standard; 678 // keys2 = standard ? keys[0] : keys[1]; 679 // } 680 // /** 681 // * 682 // */ 683 // public void reset(CLDRFile k) { 684 // } 685 // /** 686 // * 687 // */ 688 // // Samples: 689 // // <language type="ain" references="RP1">阿伊努文</language> 690 // // <reference type="R1" uri="http://www.info.gov.hk/info/holiday_c.htm">二零零五年公眾假期刊登憲報</reference> 691 // private int fix(Map attributes, CLDRFile replacements) { 692 // // we have to have either a references element or attributes. 693 // String references = (String) attributes.get(keys2[0]); 694 // int result = 0; 695 // if (references != null) { 696 // references = references.trim(); 697 // if (references.startsWith("S") || references.startsWith("R")) { 698 // if (digits.containsAll(references.substring(1))) return 0; 699 // } 700 // String token = (String) references_token.get(references); 701 // if (token == null) { 702 // while (true) { 703 // token = keys2[1] + (++referenceCounter); 704 // if (!tokenSet.contains(token)) break; 705 // } 706 // references_token.put(references, token); 707 // System.out.println("Adding: " + token + "\t" + references); 708 // replacements.add("//ldml/references/reference[@type=\"" + token + "\"]" + keys2[2], references); 709 // result = 1; 710 // } 711 // attributes.put(keys2[0], token); 712 // } 713 // return result; 714 // } 715 // } 716 717 abstract static class CLDRFilter { 718 protected CLDRFile cldrFileToFilter; 719 private String localeID; 720 protected Set<String> availableChildren; 721 private Set<String> toBeRemoved; 722 private CLDRFile toBeReplaced; 723 protected XPathParts parts = new XPathParts(null, null); 724 protected XPathParts fullparts = new XPathParts(null, null); 725 protected Factory factory; 726 setFile(CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements)727 public final void setFile(CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements) { 728 this.cldrFileToFilter = k; 729 this.factory = factory; 730 localeID = k.getLocaleID(); 731 this.toBeRemoved = removal; 732 this.toBeReplaced = replacements; 733 handleStart(); 734 } 735 handleStart()736 public void handleStart() { 737 } 738 handlePath(String xpath)739 public abstract void handlePath(String xpath); 740 handleEnd()741 public void handleEnd() { 742 } 743 show(String reason, String detail)744 public void show(String reason, String detail) { 745 System.out.println("%" + localeID + "\t" + reason + "\tConsidering " + detail); 746 } 747 retain(String path, String reason)748 public void retain(String path, String reason) { 749 System.out.println("%" + localeID + "\t" + reason + "\tRetaining: " + cldrFileToFilter.getStringValue(path) 750 + "\t at: " + path); 751 } 752 remove(String path)753 public void remove(String path) { 754 remove(path, "-"); 755 } 756 remove(String path, String reason)757 public void remove(String path, String reason) { 758 if (toBeRemoved.contains(path)) return; 759 toBeRemoved.add(path); 760 // System.out.println("%" + localeID + "\t" + reason + "\tRemoving:\t«" 761 // + cldrFileToFilter.getStringValue(path) + "»\t at:\t" + path); 762 String oldValueOldPath = cldrFileToFilter.getStringValue(path); 763 showAction(reason, "Removing", oldValueOldPath, null, null, path, path); 764 } 765 replace(String oldFullPath, String newFullPath, String newValue)766 public void replace(String oldFullPath, String newFullPath, String newValue) { 767 replace(oldFullPath, newFullPath, newValue, "-"); 768 } 769 showAction(String reason, String action, String oldValueOldPath, String oldValueNewPath, String newValue, String oldFullPath, String newFullPath)770 public void showAction(String reason, String action, String oldValueOldPath, String oldValueNewPath, 771 String newValue, String oldFullPath, String newFullPath) { 772 System.out.println("%" 773 + localeID 774 + "\t" 775 + action 776 + "\t" 777 + reason 778 + "\t«" 779 + oldValueOldPath 780 + "»" 781 + (newFullPath.equals(oldFullPath) || oldValueNewPath == null ? "" : oldValueNewPath 782 .equals(oldValueOldPath) ? "/=" : "/«" + oldValueNewPath + "»") 783 + "\t→\t" + (newValue == null ? "∅" : newValue.equals(oldValueOldPath) ? "=" : "«" + newValue + "»") 784 + "\t" + oldFullPath 785 + (newFullPath.equals(oldFullPath) ? "" : "\t→\t" + newFullPath)); 786 } 787 788 /** 789 * There are the following cases, where: 790 * 791 * <pre> 792 * pathSame, new value null: Removing v p 793 * pathSame, new value not null: Replacing v v' p 794 * pathChanges, nothing at new path: Moving v p p' 795 * pathChanges, same value at new path: Replacing v v' p p' 796 * pathChanges, value changes: Overriding v v' p p' 797 * 798 * <pre> 799 * @param oldFullPath 800 * @param newFullPath 801 * @param newValue 802 * @param reason 803 */ replace(String oldFullPath, String newFullPath, String newValue, String reason)804 public void replace(String oldFullPath, String newFullPath, String newValue, String reason) { 805 String oldValueOldPath = cldrFileToFilter.getStringValue(oldFullPath); 806 String temp = cldrFileToFilter.getFullXPath(oldFullPath); 807 if (temp != null) { 808 oldFullPath = temp; 809 } 810 boolean pathSame = oldFullPath.equals(newFullPath); 811 812 if (pathSame) { 813 if (newValue == null) { 814 remove(oldFullPath, reason); 815 } else if (oldValueOldPath == null) { 816 toBeReplaced.add(oldFullPath, newValue); 817 showAction(reason, "Adding", oldValueOldPath, null, newValue, oldFullPath, newFullPath); 818 } else { 819 toBeReplaced.add(oldFullPath, newValue); 820 showAction(reason, "Replacing", oldValueOldPath, null, newValue, oldFullPath, newFullPath); 821 } 822 return; 823 } 824 String oldValueNewPath = cldrFileToFilter.getStringValue(newFullPath); 825 toBeRemoved.add(oldFullPath); 826 toBeReplaced.add(newFullPath, newValue); 827 828 if (oldValueNewPath == null) { 829 showAction(reason, "Moving", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath); 830 } else if (oldValueNewPath.equals(newValue)) { 831 showAction(reason, "Redundant Value", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath); 832 } else { 833 showAction(reason, "Overriding", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath); 834 } 835 } 836 837 /** 838 * Adds a new path-value pair to the CLDRFile. 839 * @param path the new path 840 * @param value the value 841 * @param reason Reason for adding the path and value. 842 */ add(String path, String value, String reason)843 public void add(String path, String value, String reason) { 844 String oldValueOldPath = cldrFileToFilter.getStringValue(path); 845 if (oldValueOldPath == null) { 846 toBeRemoved.remove(path); 847 toBeReplaced.add(path, value); 848 showAction(reason, "Adding", oldValueOldPath, null, 849 value, path, path); 850 } else { 851 replace(path, path, value); 852 } 853 } 854 getReplacementFile()855 public CLDRFile getReplacementFile() { 856 return toBeReplaced; 857 } 858 handleCleanup()859 public void handleCleanup() { 860 } 861 handleSetup()862 public void handleSetup() { 863 } 864 getLocaleID()865 public String getLocaleID() { 866 return localeID; 867 } 868 } 869 870 static class FixList { 871 // simple class, so we use quick list 872 CLDRFilter[] filters = new CLDRFilter[128]; // only ascii 873 String[] helps = new String[128]; // only ascii 874 UnicodeSet options = new UnicodeSet(); 875 String inputOptions = null; 876 add(char letter, String help)877 void add(char letter, String help) { 878 add(letter, help, null); 879 } 880 handleSetup()881 public void handleSetup() { 882 for (int i = 0; i < filters.length; ++i) { 883 if (filters[i] != null) { 884 filters[i].handleSetup(); 885 } 886 } 887 } 888 handleCleanup()889 public void handleCleanup() { 890 for (int i = 0; i < filters.length; ++i) { 891 if (filters[i] != null) { 892 filters[i].handleCleanup(); 893 } 894 } 895 } 896 getOptions()897 public UnicodeSet getOptions() { 898 return options; 899 } 900 add(char letter, String help, CLDRFilter filter)901 void add(char letter, String help, CLDRFilter filter) { 902 if (helps[letter] != null) throw new IllegalArgumentException("Duplicate letter: " + letter); 903 filters[letter] = filter; 904 helps[letter] = help; 905 options.add(letter); 906 } 907 setFile(CLDRFile file, String inputOptions, Factory factory, Set<String> removal, CLDRFile replacements)908 void setFile(CLDRFile file, String inputOptions, Factory factory, Set<String> removal, CLDRFile replacements) { 909 this.inputOptions = inputOptions; 910 for (int i = 0; i < inputOptions.length(); ++i) { 911 char c = inputOptions.charAt(i); 912 if (filters[c] != null) { 913 try { 914 filters[c].setFile(file, factory, removal, replacements); 915 } catch (RuntimeException e) { 916 System.err.println("Failure in " + filters[c].localeID + "\t START"); 917 throw e; 918 } 919 } 920 } 921 } 922 handleStart()923 void handleStart() { 924 for (int i = 0; i < inputOptions.length(); ++i) { 925 char c = inputOptions.charAt(i); 926 if (filters[c] != null) { 927 try { 928 filters[c].handleStart(); 929 } catch (RuntimeException e) { 930 System.err.println("Failure in " + filters[c].localeID + "\t START"); 931 throw e; 932 } 933 } 934 } 935 } 936 handlePath(String xpath)937 void handlePath(String xpath) { 938 //options = options.toLowerCase(); 939 for (int i = 0; i < inputOptions.length(); ++i) { 940 char c = inputOptions.charAt(i); 941 if (filters[c] != null) { 942 try { 943 filters[c].handlePath(xpath); 944 } catch (RuntimeException e) { 945 System.err.println("Failure in " + filters[c].localeID + "\t " + xpath); 946 throw e; 947 } 948 } 949 } 950 } 951 handleEnd()952 void handleEnd() { 953 for (int i = 0; i < inputOptions.length(); ++i) { 954 char c = inputOptions.charAt(i); 955 if (filters[c] != null) { 956 try { 957 filters[c].handleEnd(); 958 } catch (RuntimeException e) { 959 System.err.println("Failure in " + filters[c].localeID + "\t START"); 960 throw e; 961 } 962 } 963 } 964 } 965 showHelp()966 String showHelp() { 967 String result = ""; 968 for (int i = 0; i < filters.length; ++i) { 969 if (helps[i] != null) { 970 result += "\t" + (char) i + "\t " + helps[i] + XPathParts.NEWLINE; 971 } 972 } 973 return result; 974 } 975 } 976 977 static Set<String> totalSkeletons = new HashSet<String>(); 978 979 static Map<String, String> rootUnitMap = new HashMap<String, String>(); 980 981 static { 982 rootUnitMap.put("second", "s"); 983 rootUnitMap.put("minute", "min"); 984 rootUnitMap.put("hour", "h"); 985 rootUnitMap.put("day", "d"); 986 rootUnitMap.put("week", "w"); 987 rootUnitMap.put("month", "m"); 988 rootUnitMap.put("year", "y"); 989 990 fixList.add('z', "Remove deprecated elements", new CLDRFilter() { 991 992 public boolean isDeprecated(DtdType type, String element, String attribute, String value) { 993 return DtdData.getInstance(type).isDeprecated(element, attribute, value); 994 } 995 996 public boolean isDeprecated(DtdType type, String path) { 997 998 XPathParts parts = XPathParts.getInstance(path); 999 for (int i = 0; i < parts.size(); ++i) { 1000 String element = parts.getElement(i); 1001 if (isDeprecated(type, element, "*", "*")) { 1002 return true; 1003 } 1004 for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) { 1005 String attribute = entry.getKey(); 1006 String value = entry.getValue(); 1007 if (isDeprecated(type, element, attribute, value)) { 1008 return true; 1009 } 1010 } 1011 } 1012 return false; 1013 } 1014 1015 @Override 1016 public void handlePath(String xpath) { 1017 String fullPath = cldrFileToFilter.getFullXPath(xpath); 1018 XPathParts parts = XPathParts.getInstance(fullPath); 1019 for (int i = 0; i < parts.size(); ++i) { 1020 String element = parts.getElement(i); 1021 if (dtdData.isDeprecated(element, "*", "*")) { 1022 remove(fullPath, "Deprecated element"); 1023 return; 1024 } 1025 for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) { 1026 String attribute = entry.getKey(); 1027 String value = entry.getValue(); 1028 if (dtdData.isDeprecated(element, attribute, value)) { 1029 remove(fullPath, "Element with deprecated attribute(s)"); 1030 } 1031 } 1032 } 1033 } 1034 }); 1035 1036 fixList.add('e', "fix Interindic", new CLDRFilter() { 1037 public void handlePath(String xpath) { 1038 if (xpath.indexOf("=\"InterIndic\"") < 0) return; 1039 String v = cldrFileToFilter.getStringValue(xpath); 1040 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1041 fullparts.set(fullXPath); 1042 Map<String, String> attributes = fullparts.findAttributes("transform"); 1043 String oldValue = attributes.get("direction"); 1044 if ("both".equals(oldValue)) { 1045 attributes.put("direction", "forward"); 1046 replace(xpath, fullparts.toString(), v); 1047 } 1048 } 1049 }); 1050 1051 fixList.add('B', "fix bogus values", new CLDRFilter() { 1052 RegexLookup<Integer> paths = RegexLookup.<Integer> of() 1053 .setPatternTransform(RegexLookup.RegexFinderTransformPath2) 1054 .add("//ldml/localeDisplayNames/languages/language[@type='([^']*)']", 0) 1055 .add("//ldml/localeDisplayNames/scripts/script[@type='([^']*)']", 0) 1056 .add("//ldml/localeDisplayNames/territories/territory[@type='([^']*)']", 0) 1057 .add("//ldml/dates/timeZoneNames/metazone[@type='([^']*)']", 0) 1058 .add("//ldml/dates/timeZoneNames/zone[@type='([^']*)']/exemplarCity", 0) 1059 .add("//ldml/numbers/currencies/currency[@type='([^']*)']/displayName", 0); 1060 Output<String[]> arguments = new Output<>(); 1061 CLDRFile english = CLDRConfig.getInstance().getEnglish(); 1062 boolean skip; 1063 1064 @Override 1065 public void handleStart() { 1066 CLDRFile resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 1067 UnicodeSet exemplars = resolved.getExemplarSet(ExemplarType.main, WinningChoice.WINNING); 1068 skip = exemplars.containsSome('a', 'z'); 1069 // TODO add simpler way to skip file entirely 1070 } 1071 1072 public void handlePath(String xpath) { 1073 if (skip) { 1074 return; 1075 } 1076 Integer lookupValue = paths.get(xpath, null, arguments); 1077 if (lookupValue == null) { 1078 return; 1079 } 1080 String type = arguments.value[1]; 1081 String value = cldrFileToFilter.getStringValue(xpath); 1082 if (value.equals(type)) { 1083 remove(xpath, "Matches code"); 1084 return; 1085 } 1086 String evalue = english.getStringValue(xpath); 1087 if (value.equals(evalue)) { 1088 remove(xpath, "Matches English"); 1089 return; 1090 } 1091 } 1092 }); 1093 1094 fixList.add('s', "fix alt accounting", new CLDRFilter() { 1095 @Override 1096 public void handlePath(String xpath) { 1097 parts.set(xpath); 1098 if (!parts.containsAttributeValue("alt", "accounting")) return; 1099 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath); 1100 String value = cldrFileToFilter.getStringValue(xpath); 1101 fullparts.set(oldFullXPath); 1102 fullparts.removeAttribute("pattern", "alt"); 1103 fullparts.setAttribute("currencyFormat", "type", "accounting"); 1104 String newFullXPath = fullparts.toString(); 1105 replace(oldFullXPath, newFullXPath, value, "Move alt=accounting value to new path"); 1106 } 1107 }); 1108 1109 fixList.add('n', "add unit displayName", new CLDRFilter() { 1110 @Override 1111 public void handlePath(String xpath) { 1112 if (xpath.indexOf("/units/unitLength[@type=\"long\"]") < 0 || xpath.indexOf("/unitPattern[@count=\"other\"]") < 0 || 1113 xpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) { 1114 return; 1115 } 1116 String value = cldrFileToFilter.getStringValue(xpath); 1117 String newValue = null; 1118 if (value.startsWith("{0}")) { 1119 newValue = value.substring(3).trim(); 1120 } else if (value.endsWith("{0}")) { 1121 newValue = value.substring(0, value.length() - 3).trim(); 1122 } else { 1123 System.out.println("unitPattern-other does not start or end with \"{0}\": \"" + value + "\""); 1124 return; 1125 } 1126 1127 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath); 1128 String newFullXPath = oldFullXPath.substring(0, oldFullXPath.indexOf("unitPattern")).concat("displayName[@draft=\"provisional\"]"); 1129 add(newFullXPath, newValue, "create unit displayName-long from unitPattern-long-other"); 1130 String newFullXPathShort = newFullXPath.replace("[@type=\"long\"]", "[@type=\"short\"]"); 1131 add(newFullXPathShort, newValue, "create unit displayName-short from unitPattern-long-other"); 1132 } 1133 }); 1134 1135 fixList.add('x', "retain paths", new CLDRFilter() { 1136 Matcher m = null; 1137 1138 public void handlePath(String xpath) { 1139 if (m == null) { 1140 m = PatternCache.get(options[PATH].value).matcher(""); 1141 } 1142 //String v = cldrFileToFilter.getStringValue(xpath); 1143 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1144 if (!m.reset(fullXPath).matches()) { 1145 remove(xpath); 1146 } 1147 } 1148 }); 1149 1150 fixList.add('_', "remove superfluous compound language translations", new CLDRFilter() { 1151 private CLDRFile resolved; 1152 1153 public void handleStart() { 1154 resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 1155 } 1156 1157 public void handlePath(String xpath) { 1158 if (!xpath.contains("_")) return; 1159 if (!xpath.contains("/language")) return; 1160 String languageCode = parts.set(xpath).findAttributeValue("language", "type"); 1161 String v = resolved.getStringValue(xpath); 1162 if (v.equals(languageCode)) { 1163 remove(xpath, "same as language code"); 1164 return; 1165 } 1166 String generatedTranslation = resolved.getName(languageCode, true); 1167 if (v.equals(generatedTranslation)) { 1168 remove(xpath, "superfluous compound language"); 1169 } 1170 String spacelessGeneratedTranslation = generatedTranslation.replace(" ", ""); 1171 if (v.equals(spacelessGeneratedTranslation)) { 1172 remove(xpath, "superfluous compound language (after removing space)"); 1173 } 1174 } 1175 }); 1176 1177 fixList.add('l', "change language code", new CLDRFilter() { 1178 private CLDRFile resolved; 1179 1180 public void handleStart() { 1181 resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 1182 } 1183 1184 public void handlePath(String xpath) { 1185 if (!xpath.contains("/language")) return; 1186 String languageCode = parts.set(xpath).findAttributeValue("language", "type"); 1187 String v = resolved.getStringValue(xpath); 1188 if (!languageCode.equals("swc")) return; 1189 parts.setAttribute("language", "type", "sw_CD"); 1190 replace(xpath, parts.toString(), v); 1191 } 1192 }); 1193 1194 if (false) fixList.add('s', "fix stand-alone narrows", new CLDRFilter() { 1195 public void handlePath(String xpath) { 1196 if (xpath.indexOf("[@type=\"narrow\"]") < 0) return; 1197 parts.set(xpath); 1198 String element = ""; 1199 if (parts.findElement("dayContext") >= 0) { 1200 element = "dayContext"; 1201 } else if (parts.findElement("monthContext") >= 0) { 1202 element = "monthContext"; 1203 } else 1204 return; 1205 1206 // change the element type UNLESS it conflicts 1207 parts.setAttribute(element, "type", "stand-alone"); 1208 if (cldrFileToFilter.getStringValue(parts.toString()) != null) return; 1209 1210 String v = cldrFileToFilter.getStringValue(xpath); 1211 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1212 fullparts.set(fullXPath); 1213 fullparts.setAttribute(element, "type", "stand-alone"); 1214 replace(xpath, fullparts.toString(), v); 1215 } 1216 }); 1217 1218 fixList.add('m', "remove multiple alt-variants", new CLDRFilter() { 1219 1220 public void handleStart() { 1221 } 1222 1223 public void handlePath(String xpath) { 1224 parts.set(xpath); 1225 if (!parts.containsAttributeValue("alt", "variant")) return; 1226 String variantValue = cldrFileToFilter.getStringValue(xpath); 1227 String nonVariantXpath = xpath.replaceAll("\\[\\@alt=\"variant\"\\]", ""); 1228 String nonVariantValue = cldrFileToFilter.getStringValue(nonVariantXpath); 1229 if (variantValue.equals(nonVariantValue)) { 1230 remove(xpath, "removing superfluous alt-variant value"); 1231 } 1232 } 1233 }); 1234 1235 fixList.add('g', "Swap alt/non-alt values for Czechia", new CLDRFilter() { 1236 1237 public void handleStart() { 1238 } 1239 1240 public void handlePath(String xpath) { 1241 parts.set(xpath); 1242 if (!parts.containsAttributeValue("alt", "variant") || !parts.containsAttributeValue("type", "CZ")) return; 1243 String variantValue = cldrFileToFilter.getStringValue(xpath); 1244 String nonVariantXpath = xpath.replaceAll("\\[\\@alt=\"variant\"\\]", ""); 1245 String nonVariantValue = cldrFileToFilter.getStringValue(nonVariantXpath); 1246 replace(xpath, xpath, nonVariantValue); 1247 replace(nonVariantXpath, nonVariantXpath, variantValue); 1248 } 1249 }); 1250 1251 fixList.add('u', "fix duration unit patterns", new CLDRFilter() { 1252 1253 public void handlePath(String xpath) { 1254 if (!xpath.contains("/units")) { 1255 return; 1256 } 1257 if (!xpath.contains("/durationUnitPattern")) { 1258 return; 1259 } 1260 1261 String value = cldrFileToFilter.getStringValue(xpath); 1262 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1263 1264 parts.set(fullXPath); 1265 1266 String unittype = parts.findAttributeValue("durationUnit", "type"); 1267 1268 String newFullXpath = "//ldml/units/durationUnit[@type=\"" + unittype + "\"]/durationUnitPattern"; 1269 replace(fullXPath, newFullXpath, value, "converting to new duration unit structure"); 1270 } 1271 }); 1272 1273 fixList.add('a', "Fix 0/1", new CLDRFilter() { 1274 final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 1275 PluralInfo info; 1276 1277 @Override 1278 public void handleStart() { 1279 info = SupplementalDataInfo.getInstance().getPlurals(super.localeID); 1280 } 1281 1282 @Override 1283 public void handlePath(String xpath) { 1284 1285 if (xpath.indexOf("count") < 0) { 1286 return; 1287 } 1288 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1289 parts.set(fullpath); 1290 String countValue = parts.getAttributeValue(-1, "count"); 1291 if (!DIGITS.containsAll(countValue)) { 1292 return; 1293 } 1294 int intValue = Integer.parseInt(countValue); 1295 Count count = info.getCount(intValue); 1296 parts.setAttribute(-1, "count", count.toString()); 1297 String newPath = parts.toString(); 1298 String oldValue = cldrFileToFilter.getStringValue(newPath); 1299 String value = cldrFileToFilter.getStringValue(xpath); 1300 if (oldValue != null) { 1301 String fixed = oldValue.replace("{0}", countValue); 1302 if (value.equals(oldValue) 1303 || value.equals(fixed)) { 1304 remove(fullpath, "Superfluous given: " 1305 + count + "→«" + oldValue + "»"); 1306 } else { 1307 remove(fullpath, "Can’t replace: " 1308 + count + "→«" + oldValue + "»"); 1309 } 1310 return; 1311 } 1312 replace(fullpath, newPath, value, "Moving 0/1"); 1313 } 1314 }); 1315 1316 fixList.add('b', "Prep for bulk import", new CLDRFilter() { 1317 1318 public void handlePath(String xpath) { 1319 1320 if (!options[USER].doesOccur) return; 1321 String userID = options[USER].value; 1322 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1323 String value = cldrFileToFilter.getStringValue(xpath); 1324 parts.set(fullpath); 1325 parts.addAttribute("draft", "unconfirmed"); 1326 parts.addAttribute("alt", "proposed-u" + userID + "-implicit1.8"); 1327 String newPath = parts.toString(); 1328 replace(fullpath, newPath, value); 1329 } 1330 }); 1331 1332 fixList.add('c', "Fix transiton from an old currency code to a new one", new CLDRFilter() { 1333 public void handlePath(String xpath) { 1334 String oldCurrencyCode = "VEF"; 1335 String newCurrencyCode = "VES"; 1336 int fromDate = 2008; 1337 int toDate = 2018; 1338 String leadingParenString = " ("; 1339 String trailingParenString = ")"; 1340 String separator = "\u2013"; 1341 String languageTag = "root"; 1342 1343 if (xpath.indexOf("/currency[@type=\"" + oldCurrencyCode + "\"]/displayName") < 0) { 1344 return; 1345 } 1346 String value = cldrFileToFilter.getStringValue(xpath); 1347 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1348 String newFullXPath = fullXPath.replace(oldCurrencyCode, newCurrencyCode); 1349 cldrFileToFilter.add(newFullXPath, value); 1350 1351 // Exceptions for locales that use an alternate numbering system or a different format for the dates at 1352 // the end. 1353 // Add additional ones as necessary 1354 String localeID = cldrFileToFilter.getLocaleID(); 1355 if (localeID.equals("ne")) { 1356 languageTag = "root-u-nu-deva"; 1357 } else if (localeID.equals("bn")) { 1358 languageTag = "root-u-nu-beng"; 1359 } else if (localeID.equals("ar")) { 1360 leadingParenString = " - "; 1361 trailingParenString = ""; 1362 } else if (localeID.equals("fa")) { 1363 languageTag = "root-u-nu-arabext"; 1364 separator = Utility.unescape(" \\u062A\\u0627 "); 1365 } 1366 1367 NumberFormat nf = NumberFormat.getInstance(ULocale.forLanguageTag(languageTag)); 1368 nf.setGroupingUsed(false); 1369 1370 String tagString = leadingParenString + nf.format(fromDate) + separator + nf.format(toDate) 1371 + trailingParenString; 1372 1373 replace(fullXPath, fullXPath, value + tagString); 1374 1375 } 1376 }); 1377 1378 fixList.add('p', "input-processor", new CLDRFilter() { 1379 private DisplayAndInputProcessor inputProcessor; 1380 1381 public void handleStart() { 1382 inputProcessor = new DisplayAndInputProcessor(cldrFileToFilter, true); 1383 } 1384 1385 public void handleEnd() { 1386 inputProcessor = null; // clean up, just in case 1387 } 1388 1389 public void handlePath(String xpath) { 1390 String value = cldrFileToFilter.getStringValue(xpath); 1391 if (!value.equals(value.trim())) { 1392 value = value; // for debugging 1393 } 1394 String newValue = inputProcessor.processInput(xpath, value, null); 1395 if (value.equals(newValue)) { 1396 return; 1397 } 1398 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1399 replace(fullXPath, fullXPath, newValue); 1400 } 1401 }); 1402 1403 fixList.add('t', "Fix missing count values groups", new CLDRFilter() { 1404 1405 public void handlePath(String xpath) { 1406 if (xpath.indexOf("@count=\"other\"") < 0) { 1407 return; 1408 } 1409 1410 String value = cldrFileToFilter.getStringValue(xpath); 1411 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1412 String[] missingCounts = { "one" }; 1413 for (String count : missingCounts) { 1414 String newFullXPath = fullXPath.replace("other", count); 1415 if (cldrFileToFilter.getWinningValue(newFullXPath) == null) { 1416 add(newFullXPath, value, "Adding missing plural form"); 1417 } 1418 } 1419 1420 } 1421 }); 1422 1423 fixList.add('f', "NFC (all but transforms, exemplarCharacters, pc, sc, tc, qc, ic)", new CLDRFilter() { 1424 public void handlePath(String xpath) { 1425 if (xpath.indexOf("/segmentation") >= 0 1426 || xpath.indexOf("/transforms") >= 0 1427 || xpath.indexOf("/exemplarCharacters") >= 0 1428 || xpath.indexOf("/pc") >= 0 1429 || xpath.indexOf("/sc") >= 0 1430 || xpath.indexOf("/tc") >= 0 1431 || xpath.indexOf("/qc") >= 0 1432 || xpath.indexOf("/ic") >= 0) return; 1433 String value = cldrFileToFilter.getStringValue(xpath); 1434 String nfcValue = Normalizer.compose(value, false); 1435 if (value.equals(nfcValue)) return; 1436 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1437 replace(fullXPath, fullXPath, nfcValue); 1438 } 1439 }); 1440 1441 fixList.add('v', "remove illegal codes", new CLDRFilter() { 1442 1443 /* 1444 * Set legalCurrencies; 1445 * } 1446 * { 1447 * StandardCodes sc = StandardCodes.make(); 1448 * legalCurrencies = new TreeSet(sc.getAvailableCodes("currency")); 1449 * // first remove non-ISO 1450 * for (Iterator it = legalCurrencies.iterator(); it.hasNext();) { 1451 * String code = (String) it.next(); 1452 * List data = sc.getFullData("currency", code); 1453 * if ("X".equals(data.get(3))) it.remove(); 1454 * } 1455 * } 1456 */ 1457 StandardCodes sc = StandardCodes.make(); 1458 String[] codeTypes = { "language", "script", "territory", "currency" }; 1459 1460 public void handlePath(String xpath) { 1461 if (xpath.indexOf("/currency") < 0 1462 && xpath.indexOf("/timeZoneNames") < 0 1463 && xpath.indexOf("/localeDisplayNames") < 0) return; 1464 parts.set(xpath); 1465 String code; 1466 for (int i = 0; i < codeTypes.length; ++i) { 1467 code = parts.findAttributeValue(codeTypes[i], "type"); 1468 if (code != null) { 1469 if (!sc.getGoodAvailableCodes(codeTypes[i]).contains(code)) remove(xpath); 1470 return; 1471 } 1472 } 1473 code = parts.findAttributeValue("zone", "type"); 1474 if (code != null) { 1475 if (code.indexOf("/GMT") >= 0) remove(xpath); 1476 } 1477 1478 } 1479 }); 1480 1481 if (false) fixList.add('q', "fix exemplars", new CLDRFilter() { 1482 Collator col; 1483 Collator spaceCol; 1484 UnicodeSet uppercase = new UnicodeSet("[[:Uppercase:]-[\u0130]]"); 1485 UnicodeSetIterator usi = new UnicodeSetIterator(); 1486 1487 public void handleStart() { 1488 String locale = cldrFileToFilter.getLocaleID(); 1489 col = Collator.getInstance(new ULocale(locale)); 1490 spaceCol = Collator.getInstance(new ULocale(locale)); 1491 spaceCol.setStrength(col.PRIMARY); 1492 } 1493 1494 public void handlePath(String xpath) { 1495 if (xpath.indexOf("/exemplarCharacters") < 0) return; 1496 String value = cldrFileToFilter.getStringValue(xpath); 1497 try { 1498 String fixedValue = value.replaceAll("- ", "-"); // TODO fix hack 1499 if (!fixedValue.equals(value)) { 1500 System.out.println("Changing: " + value); 1501 } 1502 fixedValue = "[" + fixedValue + "]"; // add parens in case forgotten 1503 UnicodeSet s1 = new UnicodeSet(fixedValue).removeAll(uppercase); 1504 UnicodeSet s = new UnicodeSet(); 1505 for (usi.reset(s1); usi.next();) { 1506 s.add(Normalizer.compose(usi.getString(), false)); 1507 } 1508 1509 String fixedExemplar1 = new UnicodeSetPrettyPrinter() 1510 .setOrdering(col != null ? col : Collator.getInstance(ULocale.ROOT)) 1511 .setSpaceComparator(col != null ? col : Collator.getInstance(ULocale.ROOT) 1512 .setStrength2(Collator.PRIMARY)) 1513 .setCompressRanges(true) 1514 .format(s); 1515 1516 if (!value.equals(fixedExemplar1)) { 1517 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1518 replace(fullXPath, fullXPath, fixedExemplar1); 1519 } 1520 } catch (RuntimeException e) { 1521 System.out.println("Illegal UnicodeSet: " + cldrFileToFilter.getLocaleID() + "\t" + value); 1522 } 1523 } 1524 }); 1525 1526 fixList.add('w', "fix alt='...proposed' when there is no alternative", new CLDRFilter() { 1527 private XPathParts parts = new XPathParts(); 1528 private Set<String> newFullXPathSoFar = new HashSet<String>(); 1529 1530 public void handlePath(String xpath) { 1531 if (xpath.indexOf("proposed") < 0) return; 1532 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1533 String newFullXPath = parts.set(fullXPath).removeProposed().toString(); 1534 // now see if there is an uninherited value 1535 String value = cldrFileToFilter.getStringValue(xpath); 1536 String baseValue = cldrFileToFilter.getStringValue(newFullXPath); 1537 if (baseValue != null) { 1538 // if the value AND the fullxpath are the same as what we have, then delete 1539 if (value.equals(baseValue)) { 1540 String baseFullXPath = cldrFileToFilter.getFullXPath(newFullXPath); 1541 if (baseFullXPath.equals(newFullXPath)) { 1542 remove(xpath, "alt=base"); 1543 } 1544 } 1545 return; // there is, so skip 1546 } 1547 // there isn't, so modif if we haven't done so already 1548 if (!newFullXPathSoFar.contains(newFullXPath)) { 1549 replace(fullXPath, newFullXPath, value); 1550 newFullXPathSoFar.add(newFullXPath); 1551 } 1552 } 1553 }); 1554 1555 // fixList.add('l', "Remove losing items", new CLDRFilter() { 1556 // public void handlePath(String xpath) { 1557 // String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1558 // if (fullXPath.indexOf("proposed-x10") < 0) return; 1559 // if (fullXPath.indexOf("unconfirmed") < 0) return; 1560 // remove(fullXPath, "Losing item"); 1561 // } 1562 // }); 1563 1564 if (false) fixList.add('z', "fix ZZ", new CLDRFilter() { 1565 public void handlePath(String xpath) { 1566 if (xpath.indexOf("/exemplarCharacters") < 0) return; 1567 String value = cldrFileToFilter.getStringValue(xpath); 1568 if (value.indexOf("[:") < 0) return; 1569 UnicodeSet s = new UnicodeSet(value); 1570 s.add(0xFFFF); 1571 s.remove(0xFFFF); // force flattening 1572 // at this point, we only have currency formats 1573 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1574 replace(fullXPath, fullXPath, s.toPattern(false)); 1575 } 1576 }); 1577 1578 // fixList.add('z', "GenerateIndex", new CLDRFilter() { 1579 // @Override 1580 // public void handleStart() { 1581 // // TODO Auto-generated method stub 1582 // super.handleStart(); 1583 // if (cldrFileToFilter.getExemplarSet("", WinningChoice.WINNING) != null) { 1584 // String indexPattern = GenerateIndexCharacters.getConstructedIndexSet(cldrFileToFilter.getLocaleID(), 1585 // cldrFileToFilter); 1586 // replace("//ldml/characters/exemplarCharacters[@type=\"index\"][@draft=\"unconfirmed\"]", 1587 // "//ldml/characters/exemplarCharacters[@type=\"index\"][@draft=\"unconfirmed\"]", indexPattern); 1588 // } 1589 // } 1590 // public void handlePath(String xpath) { 1591 // return; 1592 // } 1593 // }); 1594 1595 // fixList.add('k', "fix kk/KK", new CLDRFilter() { 1596 // DateTimePatternGenerator dtpg; 1597 // DateTimePatternGenerator.PatternInfo patternInfo = new DateTimePatternGenerator.PatternInfo(); 1598 // DateTimePatternGenerator.FormatParser fp = new DateTimePatternGenerator.FormatParser(); 1599 // Set dateFormatItems = new TreeSet(); 1600 // Set standardFormats = new TreeSet(); 1601 // 1602 // public void handleStart() { 1603 // dtpg = DateTimePatternGenerator.getEmptyInstance(); // should add clear() 1604 // dateFormatItems.clear(); 1605 // standardFormats.clear(); 1606 // } 1607 // 1608 // // <dateFormatItem id="KKmm" alt="proposed-u133-2" draft="provisional">hh:mm a</dateFormatItem> 1609 // public void handlePath(String xpath) { 1610 // if (xpath.indexOf("/dateFormatItem") >= 0) { 1611 // System.out.println(cldrFileToFilter.getStringValue(xpath) + "\t" + xpath); 1612 // dateFormatItems.add(xpath); 1613 // } 1614 // if (xpath.indexOf("gregorian") >= 0 && xpath.indexOf("pattern") >= 0) { 1615 // if (xpath.indexOf("dateFormat") >= 0 || xpath.indexOf("timeFormat") >= 0) { 1616 // standardFormats.add(xpath); 1617 // } 1618 // } 1619 // } 1620 // public void handleEnd() { 1621 // //if (dateFormatItems.size() == 0) return; // nothing to do 1622 // 1623 // // now add all the standard patterns 1624 // // algorithmically construct items from the standard formats 1625 // 1626 // Set standardSkeletons = new HashSet(); 1627 // List items = new ArrayList(); 1628 // for (Iterator it = standardFormats.iterator(); it.hasNext();) { 1629 // String xpath = (String) it.next(); 1630 // String value = cldrFileToFilter.getStringValue(xpath); 1631 // dtpg.addPattern(value, false, patternInfo); 1632 // standardSkeletons.add(dtpg.getSkeleton(value)); 1633 // if (false) { // code for adding guesses 1634 // fp.set(value); 1635 // items.clear(); 1636 // fp.getAutoPatterns(value, items); 1637 // for (int i = 0; i < items.size(); ++i) { 1638 // String autoItem = (String)items.get(i); 1639 // dtpg.addPattern(autoItem, false, patternInfo); 1640 // if (patternInfo.status == patternInfo.OK) show("generate", value + " ==> " + autoItem); 1641 // } 1642 // } 1643 // retain(xpath, "-(std)"); 1644 // } 1645 // 1646 // for (Iterator it = dateFormatItems.iterator(); it.hasNext();) { 1647 // String xpath = (String) it.next(); 1648 // String value = cldrFileToFilter.getStringValue(xpath); 1649 // String oldValue = value; 1650 // 1651 // String skeleton = dtpg.getSkeleton(value); 1652 // // remove if single field 1653 // if (dtpg.isSingleField(skeleton)) { 1654 // remove(xpath, "Single Field"); 1655 // continue; 1656 // } 1657 // // remove if date + time 1658 // fp.set(value); 1659 // // the following use fp, so make sure it is set 1660 // 1661 // if (fp.hasDateAndTimeFields()) { 1662 // remove(xpath, "Date + Time"); 1663 // continue; 1664 // } 1665 // 1666 // if (containsSS()) { 1667 // remove(xpath, "SS"); 1668 // continue; 1669 // } 1670 // 1671 // // see if we have a k or K & fix 1672 // value = fixKk(xpath, value); 1673 // 1674 // dtpg.addPattern(value, false, patternInfo); 1675 // 1676 // // // in case we changed value 1677 // // skeleton = dtpg.getSkeleton(value); 1678 // // String fullPath = cldrFileToFilter.getFullXPath(xpath); 1679 // // String oldFullPath = fullPath; 1680 // // parts.set(fullPath); 1681 // // Map attributes = parts.getAttributes(-1); 1682 // // String id = (String)attributes.get("id"); 1683 // // 1684 // // // fix the ID 1685 // // if (!id.equals(skeleton)) { 1686 // // attributes.put("id", skeleton); 1687 // // fullPath = parts.toString(); 1688 // // } 1689 // // 1690 // // // make the change 1691 // // boolean differentPath = !fullPath.equals(oldFullPath); 1692 // // if (differentPath || !value.equals(oldValue)) { 1693 // // String reason = "Fixed value"; 1694 // // if (differentPath) { 1695 // // reason = "Fixed id"; 1696 // // String collisionValue = cldrFileToFilter.getStringValue(fullPath); 1697 // // if (collisionValue != null) { 1698 // // if (!value.equals(collisionValue)) { 1699 // // System.out.println("Collision: not changing " + fullPath 1700 // // + " =\t " + value + ", old: " + collisionValue); 1701 // // } 1702 // // //skip if there was an old item with a different id 1703 // // remove(oldFullPath, "ID collision"); 1704 // // return; 1705 // // } 1706 // // } 1707 // // replace(oldFullPath, fullPath, value, reason); 1708 // // } 1709 // } 1710 // 1711 // // make a minimal set 1712 // Map skeleton_patterns = dtpg.getSkeletons(null); 1713 // 1714 // Collection redundants = dtpg.getRedundants(null); 1715 // for (Iterator it = redundants.iterator(); it.hasNext();) { 1716 // String skeleton = dtpg.getSkeleton((String) it.next()); 1717 // skeleton_patterns.remove(skeleton); 1718 // } 1719 // // remove all the standard IDs 1720 // for (Iterator it = standardSkeletons.iterator(); it.hasNext();) { 1721 // String standardSkeleton = (String) it.next(); 1722 // skeleton_patterns.remove(standardSkeleton); 1723 // } 1724 // // Now add them all back in. Preserve old paths if possible 1725 // for (Iterator it = dateFormatItems.iterator(); it.hasNext();) { 1726 // String xpath = (String) it.next(); 1727 // String oldValue = cldrFileToFilter.getStringValue(xpath); 1728 // String oldFullPath = cldrFileToFilter.getFullXPath(xpath); 1729 // String newFullPath = oldFullPath; 1730 // parts.set(newFullPath); 1731 // Map attributes = parts.getAttributes(-1); 1732 // String id = (String)attributes.get("id"); 1733 // String newValue = (String) skeleton_patterns.get(id); 1734 // if (newValue == null) { 1735 // remove(xpath, "redundant"); 1736 // continue; 1737 // } 1738 // String draft = (String)attributes.get("draft"); 1739 // if (draft == null || draft.equals("approved")) { 1740 // attributes.put("draft", "provisional"); 1741 // newFullPath = parts.toString(); 1742 // } 1743 // if (oldValue.equals(newValue) && newFullPath.equals(oldFullPath)) { 1744 // retain(xpath, "-"); 1745 // skeleton_patterns.remove(id); 1746 // continue; // skip, they are the same 1747 // } 1748 // // not redundant, but altered 1749 // replace(oldFullPath, newFullPath, newValue, "fixed"); 1750 // skeleton_patterns.remove(id); 1751 // } 1752 // parts.set("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/" + 1753 // "dateFormatItem"); 1754 // Map attributes = parts.getAttributes(-1); 1755 // //attributes.put("alt", "proposed-666"); 1756 // attributes.put("draft", "provisional"); 1757 // for (Iterator it = skeleton_patterns.keySet().iterator(); it.hasNext();) { 1758 // String skeleton = (String)it.next(); 1759 // String pattern = (String)skeleton_patterns.get(skeleton); 1760 // attributes.put("id", skeleton); 1761 // String fullPath = parts.toString(); 1762 // replace(fullPath, fullPath, pattern); 1763 // } 1764 // } 1765 // 1766 // private String fixKk(String xpath, String value) { 1767 // List fields = fp.getItems(); 1768 // for (int i = 0; i < fields.size(); ++i) { 1769 // Object field = fields.get(i); 1770 // if (field instanceof DateTimePatternGenerator.VariableField) { 1771 // char first = field.toString().charAt(0); 1772 // String replacement = null; 1773 // if (first == 'k') replacement = "H"; 1774 // else if (first == 'K') replacement = "h"; 1775 // if (replacement != null) { 1776 // field = new DateTimePatternGenerator.VariableField(Utility.repeat(replacement, field.toString().length())); 1777 // fields.set(i, field); 1778 // } 1779 // } 1780 // } 1781 // String newValue = fp.toString(); 1782 // if (!value.equals(newValue)) { 1783 // remove(xpath, value + " => " + newValue); 1784 // } 1785 // return newValue; 1786 // } 1787 // 1788 // private boolean containsSS() { 1789 // List fields = fp.getItems(); 1790 // for (int i = 0; i < fields.size(); ++i) { 1791 // Object field = fields.get(i); 1792 // if (field instanceof DateTimePatternGenerator.VariableField) { 1793 // char first = field.toString().charAt(0); 1794 // if (first == 'S') return true; 1795 // } 1796 // } 1797 // return false; 1798 // } 1799 // }); 1800 /* 1801 * Fix id to be identical to skeleton 1802 * Eliminate any single-field ids 1803 * Add "L" (stand-alone month), "?" (other stand-alones) 1804 * Remove any fields with both a date and a time 1805 * Test that datetime format is valid format (will have to fix by hand) 1806 * Map k, K to H, h 1807 * 1808 * In Survey Tool: don't show id; compute when item added or changed 1809 * test validity 1810 */ 1811 1812 fixList.add('d', "fix dates", new CLDRFilter() { 1813 DateTimePatternGenerator dateTimePatternGenerator = DateTimePatternGenerator.getEmptyInstance(); 1814 DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser(); 1815 Map<String, Set<String>> seenSoFar = new HashMap<String, Set<String>>(); 1816 1817 public void handleStart() { 1818 seenSoFar.clear(); 1819 } 1820 1821 public void handlePath(String xpath) { 1822 // timeFormatLength type="full" 1823 if (xpath.contains("timeFormatLength") && xpath.contains("full")) { 1824 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1825 String value = cldrFileToFilter.getStringValue(xpath); 1826 boolean gotChange = false; 1827 List<Object> list = formatParser.set(value).getItems(); 1828 for (int i = 0; i < list.size(); ++i) { 1829 Object item = list.get(i); 1830 if (item instanceof DateTimePatternGenerator.VariableField) { 1831 String itemString = item.toString(); 1832 if (itemString.charAt(0) == 'z') { 1833 list.set(i, new VariableField(Utility.repeat("v", itemString.length()))); 1834 gotChange = true; 1835 } 1836 } 1837 } 1838 if (gotChange) { 1839 String newValue = toStringWorkaround(); 1840 if (value != newValue) { 1841 replace(xpath, fullpath, newValue); 1842 } 1843 } 1844 } 1845 if (xpath.indexOf("/availableFormats") < 0) return; 1846 String value = cldrFileToFilter.getStringValue(xpath); 1847 if (value == null) return; // not in current file 1848 1849 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1850 fullparts.set(fullpath); 1851 1852 Map<String, String> attributes = fullparts.findAttributes("dateFormatItem"); 1853 String id = attributes.get("id"); 1854 String oldID = id; 1855 try { 1856 id = dateTimePatternGenerator.getBaseSkeleton(id); 1857 if (id.equals(oldID)) return; 1858 System.out.println(oldID + " => " + id); 1859 } catch (RuntimeException e) { 1860 id = "[error]"; 1861 return; 1862 } 1863 1864 attributes.put("id", id); 1865 totalSkeletons.add(id); 1866 1867 replace(xpath, fullparts.toString(), value); 1868 } 1869 1870 private String toStringWorkaround() { 1871 StringBuffer result = new StringBuffer(); 1872 List<Object> items = formatParser.getItems(); 1873 for (int i = 0; i < items.size(); ++i) { 1874 Object item = items.get(i); 1875 if (item instanceof String) { 1876 result.append(formatParser.quoteLiteral((String) items.get(i))); 1877 } else { 1878 result.append(items.get(i).toString()); 1879 } 1880 } 1881 return result.toString(); 1882 } 1883 1884 }); 1885 1886 fixList.add('y', "fix years to be y (with exceptions)", new CLDRFilter() { 1887 DateTimeCanonicalizer dtc = new DateTimeCanonicalizer(true); 1888 1889 DateTimePatternGenerator dateTimePatternGenerator = DateTimePatternGenerator.getEmptyInstance(); 1890 DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser(); 1891 Map<String, Set<String>> seenSoFar = new HashMap<String, Set<String>>(); 1892 1893 public void handleStart() { 1894 seenSoFar.clear(); 1895 } 1896 1897 public void handlePath(String xpath) { 1898 DateTimePatternType datetimePatternType = DateTimePatternType.fromPath(xpath); 1899 1900 // check to see if we need to change the value 1901 1902 if (!DateTimePatternType.STOCK_AVAILABLE_INTERVAL_PATTERNS.contains(datetimePatternType)) { 1903 return; 1904 } 1905 String oldValue = cldrFileToFilter.getStringValue(xpath); 1906 String value = dtc.getCanonicalDatePattern(xpath, oldValue, datetimePatternType); 1907 1908 String fullPath = cldrFileToFilter.getFullXPath(xpath); 1909 // Deleted code to canonicalize id for availableFormats items (cldrbug 5760) 1910 1911 if (value.equals(oldValue)) { 1912 return; 1913 } 1914 1915 // made it through the gauntlet, so replace 1916 1917 replace(xpath, fullPath, value); 1918 } 1919 }); 1920 1921 // This should only be applied to specific locales, and the results checked manually afterward. 1922 // It will only create ranges using the same digits as in root, not script-specific digits. 1923 // Any pre-existing year ranges should use the range marker from the intervalFormats "y" item. 1924 // This make several assumptions and is somewhat *FRAGILE*. 1925 fixList.add('j', "add year ranges from root to Japanese calendar eras", new CLDRFilter() { 1926 private CLDRFile rootFile; 1927 1928 public void handleStart() { 1929 rootFile = factory.make("root", false); 1930 } 1931 1932 public void handlePath(String xpath) { 1933 // Skip paths we don't care about 1934 if (xpath.indexOf("/calendar[@type=\"japanese\"]/eras/era") < 0) return; 1935 // Get root name for the era, check it 1936 String rootEraValue = rootFile.getStringValue(xpath); 1937 int rootEraIndex = rootEraValue.indexOf(" ("); 1938 if (rootEraIndex < 0) return; // this era does not have a year range in root, no need to add one in this 1939 // locale 1940 // Get range marker from intervalFormat range for y 1941 String yearIntervalFormat = cldrFileToFilter 1942 .getStringValue( 1943 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"y\"]/greatestDifference[@id=\"y\"]"); 1944 if (yearIntervalFormat == null) return; // oops, no intervalFormat data for y 1945 String rangeMarker = yearIntervalFormat.replaceAll("[.y\u5E74\uB144]", ""); // *FRAGILE* strip out 1946 // everything except the 1947 // range-indicating part 1948 // Get current locale name for this era, check it 1949 String eraValue = cldrFileToFilter.getStringValue(xpath); 1950 if (eraValue.indexOf('(') >= 0 && eraValue.indexOf(rangeMarker) >= 0) return; // this eraValue already 1951 // has a year range that 1952 // uses the appropriate 1953 // rangeMarker 1954 // Now update the root year range it with the rangeMarker for this locale, and append it to this 1955 // locale's name 1956 String rootYearRange = rootEraValue.substring(rootEraIndex); 1957 String appendYearRange = rootYearRange.replaceAll("[\u002D\u2013]", rangeMarker); 1958 String newEraValue = eraValue.concat(appendYearRange); 1959 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1960 replace(xpath, fullpath, newEraValue); 1961 // System.out.println("CLDRModify fj: rootEraValue: \"" + rootEraValue + "\", eraValue: \"" + eraValue + 1962 // "\", rangeMarker: \"" + rangeMarker + "\""); 1963 } 1964 }); 1965 1966 fixList.add('r', "fix references and standards", new CLDRFilter() { 1967 int currentRef = 500; 1968 Map<String, TreeMap<String, String>> locale_oldref_newref = new TreeMap<String, TreeMap<String, String>>(); 1969 TreeMap<String, String> oldref_newref; 1970 1971 //LanguageTagParser ltp = new LanguageTagParser(); 1972 1973 // References standards = new References(true); 1974 // References references = new References(false); 1975 1976 public void handleStart() { 1977 String locale = cldrFileToFilter.getLocaleID(); 1978 oldref_newref = locale_oldref_newref.get(locale); 1979 if (oldref_newref == null) { 1980 oldref_newref = new TreeMap<String, String>(); 1981 locale_oldref_newref.put(locale, oldref_newref); 1982 } 1983 } 1984 1985 // // Samples: 1986 // // <language type="ain" references="RP1">阿伊努文</language> 1987 // // <reference type="R1" uri="http://www.info.gov.hk/info/holiday_c.htm">二零零五年公眾假期刊登憲報</reference> 1988 public void handlePath(String xpath) { 1989 // must be minimised for this to work. 1990 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1991 if (!fullpath.contains("reference")) return; 1992 String value = cldrFileToFilter.getStringValue(xpath); 1993 fullparts.set(fullpath); 1994 if ("reference".equals(fullparts.getElement(-1))) { 1995 fixType(value, "type", fullpath); 1996 } else if (fullparts.getAttributeValue(-1, "references") != null) { 1997 fixType(value, "references", fullpath); 1998 } else { 1999 System.out.println("CLDRModify: Skipping: " + xpath); 2000 } 2001 } 2002 2003 private void fixType(String value, String type, String oldFullPath) { 2004 String ref = fullparts.getAttributeValue(-1, type); 2005 if (whitespace.containsSome(ref)) throw new IllegalArgumentException("Whitespace in references"); 2006 String newRef = getNewRef(ref); 2007 fullparts.addAttribute(type, newRef); 2008 replace(oldFullPath, fullparts.toString(), value); 2009 } 2010 2011 private String getNewRef(String ref) { 2012 String newRef = oldref_newref.get(ref); 2013 if (newRef == null) { 2014 newRef = String.valueOf(currentRef++); 2015 newRef = "R" + Utility.repeat("0", (3 - newRef.length())) + newRef; 2016 oldref_newref.put(ref, newRef); 2017 } 2018 return newRef; 2019 } 2020 }); 2021 2022 fixList.add('q', "fix annotation punctuation", new CLDRFilter() { 2023 @Override 2024 public void handlePath(String xpath) { 2025 if (!xpath.contains("/annotation")) { 2026 return; 2027 } 2028 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2029 XPathParts parts = XPathParts.getInstance(fullpath); 2030 String cp = parts.getAttributeValue(2, "cp"); 2031 String tts = parts.getAttributeValue(2, "tts"); 2032 String type = parts.getAttributeValue(2, "type"); 2033 if ("tts".equals(type)) { 2034 return; // ok, skip 2035 } 2036 String hex = "1F600"; 2037 if (cp.startsWith("[")) { 2038 UnicodeSet us = new UnicodeSet(cp); 2039 if (us.size() == 1) { 2040 cp = us.iterator().next(); 2041 hex = Utility.hex(cp); 2042 } else { 2043 hex = us.toString(); 2044 } 2045 parts.putAttributeValue(2, "cp", cp); 2046 } 2047 parts.removeAttribute(2, "tts"); 2048 if (tts != null) { 2049 String newTts = CldrUtility.join(COMMA_SEMI.splitToList(tts), ", "); 2050 XPathParts parts2 = parts.cloneAsThawed(); 2051 parts2.putAttributeValue(2, "type", "tts"); 2052 add(parts2.toString(), newTts, "separate tts"); 2053 } 2054 String value = cldrFileToFilter.getStringValue(xpath); 2055 String newValue = CldrUtility.join(COMMA_SEMI.splitToList(value), " | "); 2056 final String newFullPath = parts.toString(); 2057 Comments comments = cldrFileToFilter.getXpath_comments(); 2058 String comment = comments.removeComment(CommentType.PREBLOCK, xpath); 2059 comment = hex + (comment == null ? "" : " " + comment); 2060 comments.addComment(CommentType.PREBLOCK, newFullPath, comment); 2061 if (!fullpath.equals(newFullPath) || !value.equals(newValue)) { 2062 replace(fullpath, newFullPath, newValue); 2063 } 2064 } 2065 }); 2066 2067 fixList.add('Q', "add annotation names to keywords", new CLDRFilter() { 2068 Set<String> available = Annotations.getAvailable(); 2069 TreeSet<String> sorted = new TreeSet<>(Collator.getInstance(ULocale.ROOT)); 2070 CLDRFile resolved; 2071 2072 @Override 2073 public void handleStart() { 2074 String localeID = cldrFileToFilter.getLocaleID(); 2075 if (!available.contains(localeID)) { 2076 throw new IllegalArgumentException("no annotations available, probably wrong directory"); 2077 } 2078 ; 2079 resolved = factory.make(localeID, true); 2080 } 2081 2082 @Override 2083 public void handlePath(String xpath) { 2084 if (!xpath.contains("/annotation")) { 2085 return; 2086 } 2087 // <annotation cp="">100 | honderd | persent | telling | vol</annotation> 2088 // <annotation cp="" type="tts">honderd punte</annotation> 2089 // we will copy honderd punte into the list of keywords. 2090 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2091 XPathParts parts = XPathParts.getFrozenInstance(fullpath); 2092 String type = parts.getAttributeValue(2, "type"); 2093 if (type == null) { 2094 return; // no TTS, so keywords, skip 2095 } 2096 String name = cldrFileToFilter.getStringValue(xpath); 2097 XPathParts keywordParts = parts.cloneAsThawed().removeAttribute(2, "type"); 2098 String keywordPath = keywordParts.toString(); 2099 String keywordValue = resolved.getStringValue(keywordPath); 2100 String sourceLocaleId = resolved.getSourceLocaleID(keywordPath, null); 2101 sorted.clear(); 2102 sorted.add(name); 2103 List<String> items; 2104 if (!sourceLocaleId.equals(XMLSource.ROOT_ID) && !sourceLocaleId.equals(XMLSource.CODE_FALLBACK_ID)) { 2105 items = Annotations.splitter.splitToList(keywordValue); 2106 sorted.addAll(items); 2107 } else { 2108 int debug = 0; 2109 } 2110 DisplayAndInputProcessor.filterCoveredKeywords(sorted); 2111 String newKeywordValue = CollectionUtilities.join(sorted, " | "); 2112 if (!newKeywordValue.equals(keywordValue)) { 2113 replace(keywordPath, keywordPath, newKeywordValue); 2114 } 2115 } 2116 }); 2117 2118 fixList.add('N', "add number symbols to exemplars", new CLDRFilter() { 2119 CLDRFile resolved; 2120 UnicodeSet numberStuff = new UnicodeSet(); 2121 Set<String> seen = new HashSet<>(); 2122 Set<String> hackAllowOnly = new HashSet<>(); 2123 boolean skip = false; 2124 2125 @Override 2126 public void handleStart() { 2127 String localeID = cldrFileToFilter.getLocaleID(); 2128 resolved = factory.make(localeID, true); 2129 numberStuff.clear(); 2130 seen.clear(); 2131 skip = localeID.equals("root"); 2132 // TODO add return value to handleStart to skip calling handlePath 2133 2134 if (NUMBER_SYSTEM_HACK) { 2135 hackAllowOnly.clear(); 2136 for (NumberingSystem system : NumberingSystem.values()) { 2137 String numberingSystem = system.path == null ? "latn" : cldrFileToFilter.getStringValue(system.path); 2138 if (numberingSystem != null) { 2139 hackAllowOnly.add(numberingSystem); 2140 } 2141 } 2142 int debug = 0; 2143 } 2144 } 2145 2146 @Override 2147 public void handlePath(String xpath) { 2148 // the following doesn't work without NUMBER_SYSTEM_HACK, because there are spurious numbersystems in the data. 2149 // http://unicode.org/cldr/trac/ticket/10648 2150 // so using a hack for now in handleEnd 2151 if (skip || !xpath.startsWith("//ldml/numbers/symbols")) { 2152 return; 2153 } 2154 2155 // //ldml/numbers/symbols[@numberSystem="latn"]/exponential 2156 parts = XPathParts.getFrozenInstance(xpath); 2157 String system = parts.getAttributeValue(2, "numberSystem"); 2158 if (system == null) { 2159 System.err.println("Bogus numberSystem:\t" + cldrFileToFilter.getLocaleID() + " \t" + xpath); 2160 return; 2161 } else if (seen.contains(system) || !hackAllowOnly.contains(system)) { 2162 return; 2163 } 2164 seen.add(system); 2165 UnicodeSet exemplars = resolved.getExemplarsNumeric(system); 2166 System.out.println("# " + system + " ==> " + exemplars.toPattern(false)); 2167 for (String s : exemplars) { 2168 numberStuff.addAll(s); // add individual characters 2169 } 2170 } 2171 2172 @Override 2173 public void handleEnd() { 2174 if (!numberStuff.isEmpty()) { 2175 UnicodeSet current = cldrFileToFilter.getExemplarSet(ExemplarType.numbers, WinningChoice.WINNING); 2176 if (!numberStuff.equals(current)) { 2177 DisplayAndInputProcessor daip = new DisplayAndInputProcessor(cldrFileToFilter); 2178 if (current != null && !current.isEmpty()) { 2179 numberStuff.addAll(current); 2180 } 2181 String path = CLDRFile.getExemplarPath(ExemplarType.numbers); 2182 String value = daip.getPrettyPrinter().format(numberStuff); 2183 replace(path, path, value); 2184 } 2185 } 2186 } 2187 }); 2188 2189 fixList.add('k', 2190 "fix according to -k config file. Details on http://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config", 2191 new CLDRFilter() { 2192 private Map<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> locale2keyValues; 2193 private LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = new LinkedHashSet<Map<ConfigKeys, ConfigMatch>>(); 2194 private Matcher draftMatcher = Pattern.compile("\\[@draft=\"[^\"]+\"]").matcher(""); 2195 2196 @Override 2197 public void handleStart() { 2198 super.handleStart(); 2199 if (!options[FIX].doesOccur || !options[FIX].value.equals("k")) { 2200 return; 2201 } 2202 if (locale2keyValues == null) { 2203 fillCache(); 2204 } 2205 // set up for the specific locale we are dealing with. 2206 // a small optimization 2207 String localeId = getLocaleID(); 2208 keyValues.clear(); 2209 for (Entry<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> localeMatcher : locale2keyValues 2210 .entrySet()) { 2211 if (localeMatcher.getKey().matches(localeId)) { 2212 keyValues.addAll(localeMatcher.getValue()); 2213 } 2214 } 2215 System.out.println("# Checking entries & adding:\t" + keyValues.size()); 2216 for (Map<ConfigKeys, ConfigMatch> entry : keyValues) { 2217 ConfigMatch action = entry.get(ConfigKeys.action); 2218 //ConfigMatch locale = entry.get(ConfigKeys.locale); 2219 ConfigMatch pathMatch = entry.get(ConfigKeys.path); 2220 ConfigMatch valueMatch = entry.get(ConfigKeys.value); 2221 ConfigMatch newPath = entry.get(ConfigKeys.new_path); 2222 ConfigMatch newValue = entry.get(ConfigKeys.new_value); 2223 switch (action.action) { 2224 // we add all the values up front 2225 case addNew: 2226 case add: 2227 if (pathMatch != null || valueMatch != null || newPath == null || newValue == null) { 2228 throw new IllegalArgumentException( 2229 "Bad arguments, must have " + 2230 "path==null, value=null, new_path!=null, new_value!=null:\n\t" 2231 + entry); 2232 } 2233 String newPathString = newPath.getPath(cldrFileToFilter); 2234 if (action.action == ConfigAction.add 2235 || cldrFileToFilter.getStringValue(newPathString) == null) { 2236 replace(newPathString, newPathString, newValue.exactMatch, "config"); 2237 } 2238 break; 2239 // we just check 2240 case replace: 2241 if ((pathMatch == null && valueMatch == null) || (newPath == null && newValue == null)) { 2242 throw new IllegalArgumentException( 2243 "Bad arguments, must have " + 2244 "(path!=null OR value=null) AND (new_path!=null OR new_value!=null):\n\t" 2245 + entry); 2246 } 2247 break; 2248 // For delete, we just check; we'll remove later 2249 case delete: 2250 if (newPath != null || newValue != null) { 2251 throw new IllegalArgumentException("Bad arguments, must have " + 2252 "newPath=null, newValue=null" 2253 + entry); 2254 } 2255 break; 2256 default: // fall through 2257 throw new IllegalArgumentException("Internal Error"); 2258 } 2259 } 2260 } 2261 2262 private void fillCache() { 2263 locale2keyValues = new LinkedHashMap<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>>(); 2264 String configFileName = options[KONFIG].value; 2265 FileProcessor myReader = new FileProcessor() { 2266 { 2267 doHash = false; 2268 } 2269 2270 @Override 2271 protected boolean handleLine(int lineCount, String line) { 2272 line = line.trim(); 2273 // if (line.isEmpty()) { 2274 // return true; 2275 // } 2276 String[] lineParts = line.split("\\s*;\\s*"); 2277 Map<ConfigKeys, ConfigMatch> keyValue = new EnumMap<ConfigKeys, ConfigMatch>( 2278 ConfigKeys.class); 2279 for (String linePart : lineParts) { 2280 int pos = linePart.indexOf('='); 2281 if (pos < 0) { 2282 throw new IllegalArgumentException(lineCount + ":\t No = in command: «" + linePart + "» in " + line); 2283 } 2284 ConfigKeys key = ConfigKeys.valueOf(linePart.substring(0, pos).trim()); 2285 if (keyValue.containsKey(key)) { 2286 throw new IllegalArgumentException("Must not have multiple keys: " + key); 2287 } 2288 String match = linePart.substring(pos + 1).trim(); 2289 keyValue.put(key, new ConfigMatch(key, match)); 2290 } 2291 final ConfigMatch locale = keyValue.get(ConfigKeys.locale); 2292 if (locale == null || keyValue.get(ConfigKeys.action) == null) { 2293 throw new IllegalArgumentException(); 2294 } 2295 2296 LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = locale2keyValues 2297 .get(locale); 2298 if (keyValues == null) { 2299 locale2keyValues.put(locale, 2300 keyValues = new LinkedHashSet<Map<ConfigKeys, ConfigMatch>>()); 2301 } 2302 keyValues.add(keyValue); 2303 return true; 2304 } 2305 }; 2306 myReader.process(CLDRModify.class, configFileName); 2307 } 2308 2309 @Override 2310 public void handlePath(String xpath) { 2311 // slow method; could optimize 2312 for (Map<ConfigKeys, ConfigMatch> entry : keyValues) { 2313 ConfigMatch pathMatch = entry.get(ConfigKeys.path); 2314 if (pathMatch != null && !pathMatch.matches(xpath)) { 2315 continue; 2316 } 2317 ConfigMatch valueMatch = entry.get(ConfigKeys.value); 2318 String value = cldrFileToFilter.getStringValue(xpath); 2319 if (valueMatch != null && !valueMatch.matches(value)) { 2320 continue; 2321 } 2322 ConfigMatch action = entry.get(ConfigKeys.action); 2323 switch (action.action) { 2324 case delete: 2325 remove(xpath, "config"); 2326 break; 2327 case replace: 2328 ConfigMatch newPath = entry.get(ConfigKeys.new_path); 2329 ConfigMatch newValue = entry.get(ConfigKeys.new_value); 2330 2331 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2332 String draft = ""; 2333 int loc = fullpath.indexOf("[@draft="); 2334 if (loc >= 0) { 2335 int loc2 = fullpath.indexOf(']', loc + 7); 2336 draft = fullpath.substring(loc, loc2 + 1); 2337 } 2338 2339 String modPath = ConfigMatch.getModified(pathMatch, xpath, newPath) + draft; 2340 String modValue = ConfigMatch.getModified(valueMatch, value, newValue); 2341 replace(xpath, modPath, modValue, "config"); 2342 } 2343 } 2344 } 2345 }); 2346 2347 // fixList.add('q', "fix numbering system", new CLDRFilter() { 2348 // private final UnicodeSet dotEquivalents =(UnicodeSet) new UnicodeSet("[..․﹒ 。。︒۔٬]").freeze(); 2349 // private final UnicodeSet commaEquivalents = (UnicodeSet) new UnicodeSet("[,,﹐ ، ٫ 、﹑、،]").freeze(); 2350 // private final UnicodeSet apostropheEquivalent = (UnicodeSet) new UnicodeSet("[︐︑ '' ‘ ’ ]").freeze(); 2351 // private final UnicodeSet spaces = (UnicodeSet) new UnicodeSet("[:whitespace:]").freeze(); 2352 // 2353 // private final UnicodeSet ALLOWED_IN_NUMBER_SYMBOLS = (UnicodeSet) new 2354 // UnicodeSet("[\\u0000-\\u00FF ’ ‰ ∞ −]").freeze(); 2355 // 2356 // private final UnicodeMap map = new UnicodeMap(); 2357 // { 2358 // map.putAll(dotEquivalents, "."); 2359 // map.putAll(commaEquivalents, ","); 2360 // map.putAll(apostropheEquivalent, "’"); 2361 // map.putAll(spaces, "\u00a0"); 2362 // map.put('٪', "%"); 2363 // map.put('؛', ";"); 2364 // map.put('؉', "‰"); 2365 // map.putAll(new UnicodeSet("\\p{dash}"), "-"); 2366 // } 2367 // 2368 // private String system; 2369 // private CLDRFile resolved; 2370 // 2371 // /* 2372 // <decimal>.</decimal> 2373 // <group>,</group> 2374 // <list>;</list> 2375 // <percentSign>%</percentSign> 2376 // <nativeZeroDigit>0</nativeZeroDigit> 2377 // <patternDigit>#</patternDigit> 2378 // <plusSign>+</plusSign> 2379 // <minusSign>-</minusSign> 2380 // <exponential>E</exponential> 2381 // <perMille>‰</perMille> 2382 // <infinity>∞</infinity> 2383 // <nan>NaN</nan> 2384 // */ 2385 // public void handleStart() { 2386 // resolved = cldrFileToFilter.make(cldrFileToFilter.getLocaleID(), true); 2387 // system = "????"; 2388 // String zero = resolved.getStringValue("//ldml/numbers/symbols/nativeZeroDigit"); 2389 // int firstChar = zero.codePointAt(0); 2390 // switch(firstChar) { 2391 // case '0': system = "????"; break; 2392 // case '٠': system = "arab"; break; 2393 // case '۰': system = "arabext"; break; 2394 // default: 2395 // int script = UScript.getScript(zero.codePointAt(0)); 2396 // if (script != UScript.UNKNOWN) { 2397 // system = UScript.getShortName(script).toLowerCase(Locale.ENGLISH); 2398 // } 2399 // break; 2400 // } 2401 // } 2402 // public void handlePath(String xpath) { 2403 // String fullpath = cldrFileToFilter.getFullXPath(xpath); 2404 // if (!fullpath.startsWith("//ldml/numbers/symbols/")) return; 2405 // String value = cldrFileToFilter.getStringValue(xpath); 2406 // if (ALLOWED_IN_NUMBER_SYMBOLS.contains(value)) return; 2407 // parts.set(xpath); 2408 // String alt = parts.getAttributeValue(-1, "alt"); 2409 // if (alt != null) { 2410 // show("*** Non-empty alt on " + xpath + "\t\t" + value,"???"); 2411 // return; 2412 // } 2413 // String last = parts.getElement(-1); 2414 // String newValue = getLatinSeparator(value, last); 2415 // if (newValue == null) { 2416 // throw new IllegalArgumentException("Can't handle " + xpath + "\t\t" + value); 2417 // } 2418 // if (newValue.equals(value)) { 2419 // return; 2420 // } 2421 // replace(fullpath, fullpath, newValue); 2422 // parts.set(fullpath); 2423 // parts.addAttribute("alt", system); 2424 // String newPath = parts.toString(); 2425 // replace(newPath, newPath, value); 2426 // } 2427 // 2428 // String getLatinSeparator(String value, String last) { 2429 // String newValue = map.transform(value); 2430 // if (ALLOWED_IN_NUMBER_SYMBOLS.containsAll(newValue)) { 2431 // return newValue; 2432 // } 2433 // if (last.equals("nativeZeroDigit")) { 2434 // return "0"; 2435 // } 2436 // if (last.equals("exponential")) { 2437 // return "E"; 2438 // } 2439 // if (last.equals("nan")) { 2440 // return "NaN"; 2441 // } 2442 // if (last.equals("infinity")) { 2443 // return "∞"; 2444 // } 2445 // if (last.equals("list")) { 2446 // return ";"; 2447 // } 2448 // if (last.equals("percentSign")) { 2449 // return "%"; 2450 // } 2451 // if (last.equals("group")) { 2452 // return "’"; 2453 // } 2454 // return null; 2455 // } 2456 // }); 2457 2458 fixList.add('i', "fix Identical Children"); 2459 fixList.add('o', "check attribute validity"); 2460 } 2461 2462 // references="http://www.stat.fi/tk/tt/luokitukset/lk/kieli_02.html" 2463 2464 private static class ValuePair { 2465 String value; 2466 String fullxpath; 2467 } 2468 2469 /** 2470 * Find the set of xpaths that 2471 * (a) have all the same values (if present) in the children 2472 * (b) are absent in the parent, 2473 * (c) are different than what is in the fully resolved parent 2474 * and add them. 2475 */ fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements)2476 static void fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements) { 2477 String key = k.getLocaleID(); 2478 if (key.equals("root")) return; 2479 Set<String> availableChildren = cldrFactory.getAvailableWithParent(key, true); 2480 if (availableChildren.size() == 0) return; 2481 Set<String> skipPaths = new HashSet<String>(); 2482 Map<String, ValuePair> haveSameValues = new TreeMap<String, ValuePair>(); 2483 CLDRFile resolvedFile = cldrFactory.make(key, true); 2484 // get only those paths that are not in "root" 2485 CollectionUtilities.addAll(resolvedFile.iterator(), skipPaths); 2486 2487 // first, collect all the paths 2488 for (String locale : availableChildren) { 2489 if (locale.indexOf("POSIX") >= 0) continue; 2490 CLDRFile item = cldrFactory.make(locale, false); 2491 for (String xpath : item) { 2492 if (skipPaths.contains(xpath)) continue; 2493 // skip certain elements 2494 if (xpath.indexOf("/identity") >= 0) continue; 2495 if (xpath.startsWith("//ldml/numbers/currencies/currency")) continue; 2496 if (xpath.startsWith("//ldml/dates/timeZoneNames/metazone[")) continue; 2497 if (xpath.indexOf("[@alt") >= 0) continue; 2498 if (xpath.indexOf("/alias") >= 0) continue; 2499 2500 // must be string vale 2501 ValuePair v1 = new ValuePair(); 2502 v1.value = item.getStringValue(xpath); 2503 v1.fullxpath = item.getFullXPath(xpath); 2504 2505 ValuePair vAlready = haveSameValues.get(xpath); 2506 if (vAlready == null) { 2507 haveSameValues.put(xpath, v1); 2508 } else if (!v1.value.equals(vAlready.value) || !v1.fullxpath.equals(vAlready.fullxpath)) { 2509 skipPaths.add(xpath); 2510 haveSameValues.remove(xpath); 2511 } 2512 } 2513 } 2514 // at this point, haveSameValues is all kosher, so add items 2515 for (String xpath : haveSameValues.keySet()) { 2516 ValuePair v = haveSameValues.get(xpath); 2517 // if (v.value.equals(resolvedFile.getStringValue(xpath)) 2518 // && v.fullxpath.equals(resolvedFile.getFullXPath(xpath))) continue; 2519 replacements.add(v.fullxpath, v.value); 2520 } 2521 } 2522 fixAltProposed()2523 static void fixAltProposed() { 2524 throw new IllegalArgumentException(); 2525 // throw out any alt=proposed values that are the same as the main 2526 // HashSet toRemove = new HashSet(); 2527 // for (Iterator it = dataSource.iterator(); it.hasNext();) { 2528 // String cpath = (String) it.next(); 2529 // if (cpath.indexOf("[@alt=") < 0) continue; 2530 // String cpath2 = getNondraftNonaltXPath(cpath); 2531 // String value = getStringValue(cpath); 2532 // String value2 = getStringValue(cpath2); 2533 // if (!value.equals(value2)) continue; 2534 // // have to worry about cases where the info is not in the value!! 2535 // //fix this; values are the same!! 2536 // String fullpath = getNondraftNonaltXPath(getFullXPath(cpath)); 2537 // String fullpath2 = getNondraftNonaltXPath(getFullXPath(cpath2)); 2538 // if (!fullpath.equals(fullpath2)) continue; 2539 // Log.logln(getLocaleID() + "\tRemoving redundant alternate: " + getFullXPath(cpath) + " ;\t" + value); 2540 // Log.logln("\t\tBecause of: " + getFullXPath(cpath2) + " ;\t" + value2); 2541 // if (getFullXPath(cpath2).indexOf("[@references=") >= 0) { 2542 // System.out.println("Warning: removing references: " + getFullXPath(cpath2)); 2543 // } 2544 // toRemove.add(cpath); 2545 // } 2546 // dataSource.removeAll(toRemove); 2547 2548 } 2549 2550 /** 2551 * Perform various fixes 2552 * TODO add options to pick which one. 2553 * 2554 * @param options 2555 * @param config 2556 * @param cldrFactory 2557 */ fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory)2558 private static void fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory) { 2559 2560 // TODO before modifying, make sure that it is fully resolved. 2561 // then minimize against the NEW parents 2562 2563 Set<String> removal = new TreeSet<String>(k.getComparator()); 2564 CLDRFile replacements = SimpleFactory.makeFile("temp"); 2565 fixList.setFile(k, inputOptions, cldrFactory, removal, replacements); 2566 2567 for (String xpath : k) { 2568 fixList.handlePath(xpath); 2569 } 2570 fixList.handleEnd(); 2571 2572 // remove bad attributes 2573 2574 if (inputOptions.indexOf('v') >= 0) { 2575 CLDRTest.checkAttributeValidity(k, null, removal); 2576 } 2577 2578 // raise identical elements 2579 2580 if (inputOptions.indexOf('i') >= 0) { 2581 fixIdenticalChildren(cldrFactory, k, replacements); 2582 } 2583 2584 // now do the actions we collected 2585 2586 if (SHOW_DETAILS) { 2587 if (removal.size() != 0 || !replacements.isEmpty()) { 2588 if (!removal.isEmpty()) { 2589 System.out.println("Removals:"); 2590 for (String path : removal) { 2591 System.out.println(path + " =\t " + k.getStringValue(path)); 2592 } 2593 } 2594 if (!replacements.isEmpty()) { 2595 System.out.println("Additions/Replacements:"); 2596 System.out.println(replacements.toString().replaceAll("\u00A0", "<NBSP>")); 2597 } 2598 } 2599 } 2600 if (removal.size() != 0) { 2601 k.removeAll(removal, COMMENT_REMOVALS); 2602 } 2603 k.putAll(replacements, CLDRFile.MERGE_REPLACE_MINE); 2604 } 2605 2606 /** 2607 * Internal 2608 */ testJavaSemantics()2609 public static void testJavaSemantics() { 2610 Collator caseInsensitive = Collator.getInstance(ULocale.ROOT); 2611 caseInsensitive.setStrength(Collator.SECONDARY); 2612 Set<String> setWithCaseInsensitive = new TreeSet<String>(caseInsensitive); 2613 setWithCaseInsensitive.addAll(Arrays.asList(new String[] { "a", "b", "c" })); 2614 Set<String> plainSet = new TreeSet<String>(); 2615 plainSet.addAll(Arrays.asList(new String[] { "a", "b", "B" })); 2616 System.out.println("S1 equals S2?\t" + setWithCaseInsensitive.equals(plainSet)); 2617 System.out.println("S2 equals S1?\t" + plainSet.equals(setWithCaseInsensitive)); 2618 setWithCaseInsensitive.removeAll(plainSet); 2619 System.out.println("S1 removeAll S2 is empty?\t" + setWithCaseInsensitive.isEmpty()); 2620 } 2621 2622 // <localizedPatternChars>GyMdkHmsSEDFwWahKzYeugAZ</localizedPatternChars> 2623 /* 2624 * <localizedPattern> 2625 * <map type="era">G</map> 2626 * <map type="year">y</map> 2627 * <map type="year_iso">Y</map> 2628 * <map type="year_uniform">u</map> 2629 * <map type="month">M</map> 2630 * <map type="week_in_year">w</map> 2631 * <map type="week_in_month">W</map> 2632 * <map type="day">d</map> 2633 * <map type="day_of_year">D</map> 2634 * <map type="day_of_week_in_month">F</map> 2635 * <map type="day_julian">g</map> 2636 * <map type="day_of_week">E</map> 2637 * <map type="day_of_week_local">e</map> 2638 * <map type="period_in_day">a</map> 2639 * <map type="hour_1_12">h</map> 2640 * <map type="hour_0_23">H</map> 2641 * <map type="hour_0_11">K</map> 2642 * <map type="hour_1_24">k</map> 2643 * <map type="minute">m</map> 2644 * <map type="second">s</map> 2645 * <map type="fractions_of_second">S</map> 2646 * <map type="milliseconds_in_day">A</map> 2647 * <map type="timezone">z</map> 2648 * <map type="timezone_gmt">Z</map> 2649 * </localizedPattern> 2650 */ 2651 2652 } 2653