1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004-2013, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import java.io.File; 10 import java.io.PrintWriter; 11 import java.util.ArrayList; 12 import java.util.Arrays; 13 import java.util.EnumMap; 14 import java.util.HashMap; 15 import java.util.HashSet; 16 import java.util.Iterator; 17 import java.util.LinkedHashMap; 18 import java.util.LinkedHashSet; 19 import java.util.List; 20 import java.util.Map; 21 import java.util.Map.Entry; 22 import java.util.Set; 23 import java.util.TreeMap; 24 import java.util.TreeSet; 25 import java.util.regex.Matcher; 26 import java.util.regex.Pattern; 27 28 import org.unicode.cldr.draft.FileUtilities; 29 import org.unicode.cldr.test.CLDRTest; 30 import org.unicode.cldr.test.CoverageLevel2; 31 import org.unicode.cldr.test.DisplayAndInputProcessor; 32 import org.unicode.cldr.test.QuickCheck; 33 import org.unicode.cldr.util.Annotations; 34 import org.unicode.cldr.util.CLDRConfig; 35 import org.unicode.cldr.util.CLDRFile; 36 import org.unicode.cldr.util.CLDRFile.DraftStatus; 37 import org.unicode.cldr.util.CLDRFile.ExemplarType; 38 import org.unicode.cldr.util.CLDRFile.NumberingSystem; 39 import org.unicode.cldr.util.CLDRFile.WinningChoice; 40 import org.unicode.cldr.util.CLDRLocale; 41 import org.unicode.cldr.util.CLDRPaths; 42 import org.unicode.cldr.util.CLDRTool; 43 import org.unicode.cldr.util.CldrUtility; 44 import org.unicode.cldr.util.DateTimeCanonicalizer; 45 import org.unicode.cldr.util.DateTimeCanonicalizer.DateTimePatternType; 46 import org.unicode.cldr.util.DtdData; 47 import org.unicode.cldr.util.DtdType; 48 import org.unicode.cldr.util.Factory; 49 import org.unicode.cldr.util.FileProcessor; 50 import org.unicode.cldr.util.LanguageTagParser; 51 import org.unicode.cldr.util.Level; 52 import org.unicode.cldr.util.Log; 53 import org.unicode.cldr.util.LogicalGrouping; 54 import org.unicode.cldr.util.PathHeader; 55 import org.unicode.cldr.util.PatternCache; 56 import org.unicode.cldr.util.RegexLookup; 57 import org.unicode.cldr.util.SimpleFactory; 58 import org.unicode.cldr.util.StandardCodes; 59 import org.unicode.cldr.util.StringId; 60 import org.unicode.cldr.util.SupplementalDataInfo; 61 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 62 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 63 import org.unicode.cldr.util.XMLSource; 64 import org.unicode.cldr.util.XPathParts; 65 import org.unicode.cldr.util.XPathParts.Comments; 66 import org.unicode.cldr.util.XPathParts.Comments.CommentType; 67 68 import com.google.common.base.Splitter; 69 import com.ibm.icu.dev.tool.UOption; 70 import com.ibm.icu.dev.util.CollectionUtilities; 71 import com.ibm.icu.impl.Utility; 72 import com.ibm.icu.text.Collator; 73 import com.ibm.icu.text.DateTimePatternGenerator; 74 import com.ibm.icu.text.DateTimePatternGenerator.VariableField; 75 import com.ibm.icu.text.Normalizer; 76 import com.ibm.icu.text.NumberFormat; 77 import com.ibm.icu.text.UnicodeSet; 78 import com.ibm.icu.util.ICUException; 79 import com.ibm.icu.util.Output; 80 import com.ibm.icu.util.ULocale; 81 82 /** 83 * Tool for applying modifications to the CLDR files. Use -h to see the options. 84 * <p> 85 * There are some environment variables that can be used with the program <br> 86 * -DSHOW_FILES=<anything> shows all create/open of files. 87 */ 88 @CLDRTool(alias = "modify", 89 description = "Tool for applying modifications to the CLDR files. Use -h to see the options.") 90 public class CLDRModify { 91 private static final boolean DEBUG = false; 92 static final String DEBUG_PATHS = null; // ".*currency.*"; 93 static final boolean COMMENT_REMOVALS = false; // append removals as comments 94 static final UnicodeSet whitespace = new UnicodeSet("[:whitespace:]").freeze(); 95 static final UnicodeSet HEX = new UnicodeSet("[a-fA-F0-9]").freeze(); 96 private static final DtdData dtdData = DtdData.getInstance(DtdType.ldml); 97 98 // TODO make this into input option. 99 100 enum ConfigKeys { 101 action, locale, path, value, new_path, new_value 102 } 103 104 enum ConfigAction { 105 /** 106 * Remove a path 107 */ 108 delete, 109 /** 110 * Add a path/value 111 */ 112 add, 113 /** 114 * Replace a path/value. Equals 'add' but tests selected paths 115 */ 116 replace, 117 /** 118 * Add a a path/value. Equals 'add' but tests that path did NOT exist 119 */ 120 addNew, 121 } 122 123 static final class ConfigMatch { 124 final String exactMatch; 125 final Matcher regexMatch; // doesn't have to be thread safe 126 final ConfigAction action; 127 final boolean hexPath; 128 ConfigMatch(ConfigKeys key, String match)129 public ConfigMatch(ConfigKeys key, String match) { 130 if (key == ConfigKeys.action) { 131 exactMatch = null; 132 regexMatch = null; 133 action = ConfigAction.valueOf(match); 134 hexPath = false; 135 } else if (match.startsWith("/") && match.endsWith("/")) { 136 if (key != ConfigKeys.locale && key != ConfigKeys.path && key != ConfigKeys.value) { 137 throw new IllegalArgumentException("Regex only allowed for old path/value."); 138 } 139 exactMatch = null; 140 regexMatch = PatternCache.get(match.substring(1, match.length() - 1) 141 .replace("[@", "\\[@")).matcher(""); 142 action = null; 143 hexPath = false; 144 } else { 145 exactMatch = match; 146 regexMatch = null; 147 action = null; 148 hexPath = (key == ConfigKeys.new_path || key == ConfigKeys.path) 149 && HEX.containsAll(match); 150 } 151 152 } 153 matches(String other)154 public boolean matches(String other) { 155 if (exactMatch == null) { 156 return regexMatch.reset(other).find(); 157 } else if (hexPath) { 158 // convert path to id for comparison 159 return exactMatch.equals(StringId.getHexId(other)); 160 } else { 161 return exactMatch.equals(other); 162 } 163 } 164 toString()165 public String toString() { 166 return action != null ? action.toString() 167 : exactMatch == null ? regexMatch.toString() 168 : hexPath ? "*" + exactMatch + "*" 169 : exactMatch; 170 } 171 getPath(CLDRFile cldrFileToFilter)172 public String getPath(CLDRFile cldrFileToFilter) { 173 if (!hexPath) { 174 return exactMatch; 175 } 176 // ensure that we have all the possible paths cached 177 String path = StringId.getStringFromHexId(exactMatch); 178 if (path == null) { 179 for (String eachPath : cldrFileToFilter.fullIterable()) { 180 StringId.getHexId(eachPath); 181 } 182 path = StringId.getStringFromHexId(exactMatch); 183 if (path == null) { 184 throw new IllegalArgumentException("No path for hex id: " + exactMatch); 185 } 186 } 187 return path; 188 } 189 getModified(ConfigMatch valueMatch, String value, ConfigMatch newValue)190 public static String getModified(ConfigMatch valueMatch, String value, ConfigMatch newValue) { 191 if (valueMatch == null) { // match anything 192 if (newValue != null && newValue.exactMatch != null) { 193 return newValue.exactMatch; 194 } 195 if (value != null) { 196 return value; 197 } 198 throw new IllegalArgumentException("Can't have both old and new be null."); 199 } else if (valueMatch.exactMatch == null) { // regex 200 if (newValue == null || newValue.exactMatch == null) { 201 throw new IllegalArgumentException("Can't have regex without replacement."); 202 } 203 StringBuffer buffer = new StringBuffer(); 204 valueMatch.regexMatch.appendReplacement(buffer, newValue.exactMatch); 205 return buffer.toString(); 206 } else { 207 return newValue.exactMatch != null ? newValue.exactMatch : value; 208 } 209 } 210 } 211 212 static FixList fixList = new FixList(); 213 214 private static final int HELP1 = 0, 215 HELP2 = 1, 216 SOURCEDIR = 2, 217 DESTDIR = 3, 218 MATCH = 4, 219 JOIN = 5, 220 MINIMIZE = 6, 221 FIX = 7, 222 JOIN_ARGS = 8, 223 VET_ADD = 9, 224 RESOLVE = 10, 225 PATH = 11, 226 USER = 12, 227 ALL_DIRS = 13, 228 CHECK = 14, 229 KONFIG = 15; 230 231 private static final UOption[] options = { 232 UOption.HELP_H(), 233 UOption.HELP_QUESTION_MARK(), 234 UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY), 235 UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "cldrModify/"), 236 UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 237 UOption.create("join", 'j', UOption.OPTIONAL_ARG), 238 UOption.create("minimize", 'r', UOption.NO_ARG), 239 UOption.create("fix", 'f', UOption.OPTIONAL_ARG), 240 UOption.create("join-args", 'i', UOption.OPTIONAL_ARG), 241 UOption.create("vet", 'v', UOption.OPTIONAL_ARG), 242 UOption.create("resolve", 'z', UOption.OPTIONAL_ARG), 243 UOption.create("path", 'p', UOption.REQUIRES_ARG), 244 UOption.create("user", 'u', UOption.REQUIRES_ARG), 245 UOption.create("all", 'a', UOption.REQUIRES_ARG), 246 UOption.create("check", 'c', UOption.NO_ARG), 247 UOption.create("konfig", 'k', UOption.OPTIONAL_ARG).setDefault("modify_config.txt"), 248 }; 249 250 private static final UnicodeSet allMergeOptions = new UnicodeSet("[rcd]"); 251 252 static final String HELP_TEXT1 = "Use the following options" 253 + XPathParts.NEWLINE 254 + "-h or -?\t for this message" 255 + XPathParts.NEWLINE 256 + "-" 257 + options[SOURCEDIR].shortName 258 + "\t source directory. Default = -s" 259 + CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY) 260 + XPathParts.NEWLINE 261 + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\" 262 + XPathParts.NEWLINE 263 + "-" 264 + options[DESTDIR].shortName 265 + "\t destination directory. Default = -d" 266 + CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/") 267 + XPathParts.NEWLINE 268 + "-m<regex>\t to restrict the locales to what matches <regex>" 269 + XPathParts.NEWLINE 270 + "-j<merge_dir>/X'\t to merge two sets of files together (from <source_dir>/X and <merge_dir>/X', " 271 + XPathParts.NEWLINE 272 + "\twhere * in X' is replaced by X)." 273 + XPathParts.NEWLINE 274 + "\tExample:-jC:\\Unicode-CVS2\\cldr\\dropbox\\to_be_merged\\missing\\missing_*" 275 + XPathParts.NEWLINE 276 + "-i\t merge arguments:" 277 + XPathParts.NEWLINE 278 + "\tr\t replace contents (otherwise new data will be draft=\"unconfirmed\")" 279 + XPathParts.NEWLINE 280 + "\tc\t ignore comments in <merge_dir> files" 281 // + XPathParts.NEWLINE 282 // + "-r\t to minimize the results (removing items that inherit from parent)." 283 + XPathParts.NEWLINE 284 + "-v\t incorporate vetting information, and generate diff files." 285 + XPathParts.NEWLINE 286 + "-z\t generate resolved files" 287 + XPathParts.NEWLINE 288 + "-p\t set path for -fx" 289 + XPathParts.NEWLINE 290 + "-u\t set user for -fb" 291 + XPathParts.NEWLINE 292 + "-a\t pattern: recurse over all subdirectories that match pattern" 293 + XPathParts.NEWLINE 294 + "-c\t check that resulting xml files are valid. Requires that a dtd directory be copied to the output directory, in the appropriate location." 295 + XPathParts.NEWLINE 296 + "-k\t config_file\twith -fk perform modifications according to what is in the config file. For format details, see:" 297 + XPathParts.NEWLINE 298 + "\t\thttp://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config." 299 + XPathParts.NEWLINE 300 + "-f\t to perform various fixes on the files (add following arguments to specify which ones, eg -fxi)" 301 + XPathParts.NEWLINE; 302 303 static final String HELP_TEXT2 = "Note: A set of bat files are also generated in <dest_dir>/diff. They will invoke a comparison program on the results." 304 + XPathParts.NEWLINE; 305 private static final boolean SHOW_DETAILS = false; 306 private static boolean SHOW_PROCESSING = false; 307 308 static String sourceInput; 309 310 /** 311 * Picks options and executes. Use -h to see options. 312 */ main(String[] args)313 public static void main(String[] args) throws Exception { 314 long startTime = System.currentTimeMillis(); 315 UOption.parseArgs(args, options); 316 if (options[HELP1].doesOccur || options[HELP2].doesOccur) { 317 System.out.println(HELP_TEXT1 + fixList.showHelp() + HELP_TEXT2); 318 return; 319 } 320 checkSuboptions(options[FIX], fixList.getOptions()); 321 checkSuboptions(options[JOIN_ARGS], allMergeOptions); 322 String recurseOnDirectories = options[ALL_DIRS].value; 323 boolean makeResolved = options[RESOLVE].doesOccur; // Utility.COMMON_DIRECTORY + "main/"; 324 325 // String sourceDir = "C:\\ICU4C\\locale\\common\\main\\"; 326 327 sourceInput = options[SOURCEDIR].value; 328 String destInput = options[DESTDIR].value; 329 if (recurseOnDirectories != null) { 330 sourceInput = removeSuffix(sourceInput, "main/", "main"); 331 destInput = removeSuffix(destInput, "main/", "main"); 332 } 333 String sourceDirBase = CldrUtility.checkValidDirectory(sourceInput); // Utility.COMMON_DIRECTORY + "main/"; 334 String targetDirBase = CldrUtility.checkValidDirectory(destInput); // Utility.GEN_DIRECTORY + "main/"; 335 System.out.format("Source:\t%s\n", sourceDirBase); 336 System.out.format("Target:\t%s\n", targetDirBase); 337 338 Set<String> dirSet = new TreeSet<String>(); 339 if (recurseOnDirectories == null) { 340 dirSet.add(""); 341 } else { 342 String[] subdirs = new File(sourceDirBase).list(); 343 Matcher subdirMatch = PatternCache.get(recurseOnDirectories).matcher(""); 344 for (String subdir : subdirs) { 345 if (!subdirMatch.reset(subdir).find()) continue; 346 dirSet.add(subdir + "/"); 347 } 348 } 349 for (String dir : dirSet) { 350 String sourceDir = sourceDirBase + dir; 351 if (!new File(sourceDir).isDirectory()) continue; 352 String targetDir = targetDirBase + dir; 353 Log.setLog(targetDir + "/diff", "log.txt"); 354 try { 355 Factory cldrFactory = Factory.make(sourceDir, ".*"); 356 357 if (options[VET_ADD].doesOccur) { 358 VettingAdder va = new VettingAdder(options[VET_ADD].value); 359 va.showFiles(cldrFactory, targetDir); 360 return; 361 } 362 363 Factory mergeFactory = null; 364 365 String join_prefix = "", join_postfix = ""; 366 if (options[JOIN].doesOccur) { 367 String mergeDir = options[JOIN].value; 368 File temp = new File(mergeDir); 369 mergeDir = CldrUtility.checkValidDirectory(temp.getParent() + File.separator); // Utility.COMMON_DIRECTORY 370 // + "main/"; 371 String filename = temp.getName(); 372 join_prefix = join_postfix = ""; 373 int pos = filename.indexOf("*"); 374 if (pos >= 0) { 375 join_prefix = filename.substring(0, pos); 376 join_postfix = filename.substring(pos + 1); 377 } 378 mergeFactory = Factory.make(mergeDir, ".*"); 379 } 380 /* 381 * Factory cldrFactory = Factory.make(sourceDir, ".*"); 382 * Set testSet = cldrFactory.getAvailable(); 383 * String[] quicktest = new String[] { 384 * "de" 385 * //"ar", "dz_BT", 386 * // "sv", "en", "de" 387 * }; 388 * if (quicktest.length > 0) { 389 * testSet = new TreeSet(Arrays.asList(quicktest)); 390 * } 391 */ 392 Set<String> locales = new TreeSet<String>(cldrFactory.getAvailable()); 393 if (mergeFactory != null) { 394 Set<String> temp = new TreeSet<String>(mergeFactory.getAvailable()); 395 Set<String> locales3 = new TreeSet<String>(); 396 for (String locale : temp) { 397 if (!locale.startsWith(join_prefix) || !locale.endsWith(join_postfix)) continue; 398 locales3.add(locale.substring(join_prefix.length(), locale.length() - join_postfix.length())); 399 } 400 locales.retainAll(locales3); 401 System.out.println("Merging: " + locales3); 402 } 403 new CldrUtility.MatcherFilter(options[MATCH].value).retainAll(locales); 404 405 RetainWhenMinimizing retainIfTrue = null; 406 PathHeader.Factory pathHeaderFactory = null; 407 408 fixList.handleSetup(); 409 410 long lastTime = System.currentTimeMillis(); 411 int spin = 0; 412 System.out.format(locales.size() + " Locales:\t%s\n", locales.toString()); 413 int totalRemoved = 0; 414 for (String test : locales) { 415 spin++; 416 if (SHOW_PROCESSING) { 417 long now = System.currentTimeMillis(); 418 if (now - lastTime > 5000) { 419 System.out.println(" .. still processing " + test + " [" + spin + "/" + locales.size() 420 + "]"); 421 lastTime = now; 422 } 423 } 424 // testJavaSemantics(); 425 426 // TODO parameterize the directory and filter 427 // System.out.println("C:\\ICU4C\\locale\\common\\main\\fr.xml"); 428 429 CLDRFile k = cldrFactory.make(test, makeResolved).cloneAsThawed(); 430 // HashSet<String> set = Builder.with(new HashSet<String>()).addAll(k).get(); 431 // System.out.format("Locale\t%s, Size\t%s\n", test, set.size()); 432 // if (k.isNonInheriting()) continue; // for now, skip supplementals 433 if (DEBUG_PATHS != null) { 434 System.out.println("Debug1 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 435 } 436 // System.out.println(k); 437 // String s1 = 438 // "//ldml/segmentations/segmentation[@type=\"LineBreak\"]/variables/variable[@_q=\"0061\"][@id=\"$CB\"] "; 439 // String s2 = 440 // "//ldml/segmentations/segmentation[@type=\"LineBreak\"]/variables/variable[@_q=\"003A\"][@id=\"$CB\"]"; 441 // System.out.println(k.ldmlComparator.compare(s1, s2)); 442 if (mergeFactory != null) { 443 int mergeOption = CLDRFile.MERGE_ADD_ALTERNATE; 444 CLDRFile toMergeIn = mergeFactory.make(join_prefix + test + join_postfix, false) 445 .cloneAsThawed(); 446 if (toMergeIn != null) { 447 if (options[JOIN_ARGS].doesOccur) { 448 if (options[JOIN_ARGS].value.indexOf("r") >= 0) 449 mergeOption = CLDRFile.MERGE_REPLACE_MY_DRAFT; 450 if (options[JOIN_ARGS].value.indexOf("d") >= 0) 451 mergeOption = CLDRFile.MERGE_REPLACE_MINE; 452 if (options[JOIN_ARGS].value.indexOf("c") >= 0) toMergeIn.clearComments(); 453 if (options[JOIN_ARGS].value.indexOf("x") >= 0) removePosix(toMergeIn); 454 } 455 toMergeIn.makeDraft(DraftStatus.contributed); 456 k.putAll(toMergeIn, mergeOption); 457 } 458 // special fix 459 k.removeComment( 460 " The following are strings that are not found in the locale (currently), but need valid translations for localizing timezones. "); 461 } 462 if (DEBUG_PATHS != null) { 463 System.out.println("Debug2 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 464 } 465 if (options[FIX].doesOccur) { 466 fix(k, options[FIX].value, options[KONFIG].value, cldrFactory); 467 } 468 if (DEBUG_PATHS != null) { 469 System.out.println("Debug3 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 470 } 471 // if (options[MINIMIZE].doesOccur) { 472 // if (pathHeaderFactory == null) { 473 // pathHeaderFactory = PathHeader.getFactory(cldrFactory.make("en", true)); 474 // } 475 // // TODO, fix identity 476 // String parent = LocaleIDParser.getParent(test); 477 // if (parent != null) { 478 // CLDRFile toRemove = cldrFactory.make(parent, true); 479 // // remove the items that are language codes, script codes, or region codes 480 // // since they may be real translations. 481 // if (retainIfTrue == null) { 482 // retainIfTrue = new RetainWhenMinimizing(); 483 // } 484 // retainIfTrue.setParentFile(toRemove); 485 // List<String> removed = DEBUG ? null : new ArrayList<String>(); 486 // k.removeDuplicates(toRemove, COMMENT_REMOVALS, retainIfTrue, removed); 487 // if (removed != null && removed.size() != 0) { 488 // totalRemoved += removed.size(); 489 // Set<PathHeader> sorted = new TreeSet<PathHeader>(); 490 // for (String path : removed) { 491 // sorted.add(pathHeaderFactory.fromPath(path)); 492 // } 493 // for (PathHeader pathHeader : sorted) { 494 // System.out.println("\t# " + test + "\t" + pathHeader + "\t" + pathHeader.getOriginalPath()); 495 // } 496 // System.out.println("\t# " + test + "\t# Removed:\t" + removed.size()); 497 // } 498 // } 499 // } 500 // System.out.println(CLDRFile.getAttributeOrder()); 501 502 /* 503 * if (false) { 504 * Map tempComments = k.getXpath_comments(); 505 * 506 * for (Iterator it2 = tempComments.keySet().iterator(); it2.hasNext();) { 507 * String key = (String) it2.next(); 508 * String comment = (String) tempComments.get(key); 509 * Log.logln("Writing extra comment: " + key); 510 * System.out.println(key + "\t comment: " + comment); 511 * } 512 * } 513 */ 514 515 if (DEBUG_PATHS != null) { 516 System.out.println("Debug4 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 517 } 518 519 PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, test + ".xml"); 520 String testPath = "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]"; 521 if (false) { 522 System.out.println("Printing Raw File:"); 523 testPath = "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/alias"; 524 System.out.println(k.getStringValue(testPath)); 525 // System.out.println(k.getFullXPath(testPath)); 526 Iterator it4 = k.iterator(); 527 Set s = CollectionUtilities.addAll(it4, new TreeSet()); 528 529 System.out.println(k.getStringValue(testPath)); 530 // if (true) return; 531 Set orderedSet = new TreeSet(k.getComparator()); 532 CollectionUtilities.addAll(k.iterator(), orderedSet); 533 for (Iterator it3 = orderedSet.iterator(); it3.hasNext();) { 534 String path = (String) it3.next(); 535 // System.out.println(path); 536 if (path.equals(testPath)) { 537 System.out.println("huh?"); 538 } 539 String value = k.getStringValue(path); 540 String fullpath = k.getFullXPath(path); 541 System.out.println("\t=\t" + fullpath); 542 System.out.println("\t=\t" + value); 543 } 544 System.out.println("Done Printing Raw File:"); 545 } 546 547 k.write(pw); 548 // pw.println(); 549 pw.close(); 550 if (options[CHECK].doesOccur) { 551 QuickCheck.check(new File(targetDir, test + ".xml")); 552 } 553 554 // JCE: I don't think anyone really uses the .bat files from CLDRModify any more, since 555 // Eclipse provides a decent file comparison program. You can comment this back in if 556 // you need it, but I found that sometimes having this here clobbers the real output 557 // file, which we definitely don't want. 558 // ToolUtilities.generateBat(sourceDir, test + ".xml", targetDir, test + ".xml", lineComparer); 559 560 /* 561 * boolean ok = Utility.areFileIdentical(sourceDir + test + ".xml", 562 * targetDir + test + ".xml", failureLines, Utility.TRIM + Utility.SKIP_SPACES); 563 * if (!ok) { 564 * System.out.println("Found differences at: "); 565 * System.out.println("\t" + failureLines[0]); 566 * System.out.println("\t" + failureLines[1]); 567 * } 568 */ 569 } 570 if (totalSkeletons.size() != 0) { 571 System.out.println("Total Skeletons" + totalSkeletons); 572 } 573 if (totalRemoved > 0) { 574 System.out.println("# Removed:\t" + totalRemoved); 575 } 576 } finally { 577 fixList.handleCleanup(); 578 Log.close(); 579 System.out.println("Done -- Elapsed time: " + ((System.currentTimeMillis() - startTime) / 60000.0) 580 + " minutes"); 581 } 582 } 583 } 584 removeSuffix(String value, String... suffices)585 private static String removeSuffix(String value, String... suffices) { 586 for (String suffix : suffices) { 587 if (value.endsWith(suffix)) { 588 return value.substring(0, value.length() - suffix.length()); 589 } 590 } 591 return value; 592 } 593 594 /* 595 * Use the coverage to determine what we should keep in the case of a locale just below root. 596 */ 597 598 static class RetainWhenMinimizing implements CLDRFile.RetentionTest { 599 private CLDRFile file; 600 private CLDRLocale c; 601 private boolean isArabicSublocale; 602 // Status status = new Status(); // no need to have, was unused 603 setParentFile(CLDRFile file)604 public RetainWhenMinimizing setParentFile(CLDRFile file) { 605 this.file = file; 606 this.c = CLDRLocale.getInstance(file.getLocaleIDFromIdentity()); 607 isArabicSublocale = "ar".equals(c.getLanguage()) && !"001".equals(c.getCountry()); 608 return this; 609 } 610 611 @Override getRetention(String path)612 public Retention getRetention(String path) { 613 if (path.startsWith("//ldml/identity/")) { 614 return Retention.RETAIN; 615 } 616 // special case for Arabic 617 if (isArabicSublocale && path.startsWith("//ldml/numbers/defaultNumberingSystem")) { 618 return Retention.RETAIN; 619 } 620 String localeId = file.getSourceLocaleID(path, null); 621 if ((c.isLanguageLocale() || c.equals(CLDRLocale.getInstance("pt_PT"))) 622 && (XMLSource.ROOT_ID.equals(localeId) || XMLSource.CODE_FALLBACK_ID.equals(localeId))) { 623 return Retention.RETAIN; 624 } 625 return Retention.RETAIN_IF_DIFFERENT; 626 } 627 }; 628 629 static final Splitter COMMA_SEMI = Splitter.on(Pattern.compile("[,;|]")).trimResults().omitEmptyStrings(); 630 protected static final boolean NUMBER_SYSTEM_HACK = true; 631 632 /** 633 * 634 */ checkSuboptions(UOption givenOptions, UnicodeSet allowedOptions)635 private static void checkSuboptions(UOption givenOptions, UnicodeSet allowedOptions) { 636 if (givenOptions.doesOccur && !allowedOptions.containsAll(givenOptions.value)) { 637 throw new IllegalArgumentException("Illegal sub-options for " 638 + givenOptions.shortName 639 + ": " 640 + new UnicodeSet().addAll(givenOptions.value).removeAll(allowedOptions) 641 + CldrUtility.LINE_SEPARATOR + "Use -? for help."); 642 } 643 } 644 645 /** 646 * 647 */ removePosix(CLDRFile toMergeIn)648 private static void removePosix(CLDRFile toMergeIn) { 649 Set<String> toRemove = new HashSet<String>(); 650 for (String xpath : toMergeIn) { 651 if (xpath.startsWith("//ldml/posix")) toRemove.add(xpath); 652 } 653 toMergeIn.removeAll(toRemove, false); 654 } 655 656 // private static class References { 657 // static Map<String,Map<String,String>> locale_oldref_newref = new TreeMap<String,Map<String,String>>(); 658 // 659 // static String[][] keys = {{"standard", "S", "[@standard=\"true\"]"}, {"references", "R", ""}}; 660 // UnicodeSet digits = new UnicodeSet("[0-9]"); 661 // int referenceCounter = 0; 662 // Map references_token = new TreeMap(); 663 // Set tokenSet = new HashSet(); 664 // String[] keys2; 665 // boolean isStandard; 666 // References(boolean standard) { 667 // isStandard = standard; 668 // keys2 = standard ? keys[0] : keys[1]; 669 // } 670 // /** 671 // * 672 // */ 673 // public void reset(CLDRFile k) { 674 // } 675 // /** 676 // * 677 // */ 678 // // Samples: 679 // // <language type="ain" references="RP1">阿伊努文</language> 680 // // <reference type="R1" uri="http://www.info.gov.hk/info/holiday_c.htm">二零零五年公眾假期刊登憲報</reference> 681 // private int fix(Map attributes, CLDRFile replacements) { 682 // // we have to have either a references element or attributes. 683 // String references = (String) attributes.get(keys2[0]); 684 // int result = 0; 685 // if (references != null) { 686 // references = references.trim(); 687 // if (references.startsWith("S") || references.startsWith("R")) { 688 // if (digits.containsAll(references.substring(1))) return 0; 689 // } 690 // String token = (String) references_token.get(references); 691 // if (token == null) { 692 // while (true) { 693 // token = keys2[1] + (++referenceCounter); 694 // if (!tokenSet.contains(token)) break; 695 // } 696 // references_token.put(references, token); 697 // System.out.println("Adding: " + token + "\t" + references); 698 // replacements.add("//ldml/references/reference[@type=\"" + token + "\"]" + keys2[2], references); 699 // result = 1; 700 // } 701 // attributes.put(keys2[0], token); 702 // } 703 // return result; 704 // } 705 // } 706 707 abstract static class CLDRFilter { 708 protected CLDRFile cldrFileToFilter; 709 protected CLDRFile cldrFileToFilterResolved; 710 private String localeID; 711 protected Set<String> availableChildren; 712 private Set<String> toBeRemoved; 713 private CLDRFile toBeReplaced; 714 protected Factory factory; 715 setFile(CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements)716 public final void setFile(CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements) { 717 this.cldrFileToFilter = k; 718 cldrFileToFilterResolved = null; 719 this.factory = factory; 720 localeID = k.getLocaleID(); 721 this.toBeRemoved = removal; 722 this.toBeReplaced = replacements; 723 handleStart(); 724 } 725 handleStart()726 public void handleStart() { 727 } 728 handlePath(String xpath)729 public abstract void handlePath(String xpath); 730 handleEnd()731 public void handleEnd() { 732 } 733 getResolved()734 public CLDRFile getResolved() { 735 if (cldrFileToFilterResolved == null) { 736 if (cldrFileToFilter.isResolved()) { 737 cldrFileToFilterResolved = cldrFileToFilter; 738 } else { 739 cldrFileToFilterResolved = factory.make(cldrFileToFilter.getLocaleID(), true); 740 } 741 } 742 return cldrFileToFilterResolved; 743 744 } show(String reason, String detail)745 public void show(String reason, String detail) { 746 System.out.println("%" + localeID + "\t" + reason + "\tConsidering " + detail); 747 } 748 retain(String path, String reason)749 public void retain(String path, String reason) { 750 System.out.println("%" + localeID + "\t" + reason + "\tRetaining: " + cldrFileToFilter.getStringValue(path) 751 + "\t at: " + path); 752 } 753 remove(String path)754 public void remove(String path) { 755 remove(path, "-"); 756 } 757 remove(String path, String reason)758 public void remove(String path, String reason) { 759 if (toBeRemoved.contains(path)) return; 760 toBeRemoved.add(path); 761 // System.out.println("%" + localeID + "\t" + reason + "\tRemoving:\t«" 762 // + cldrFileToFilter.getStringValue(path) + "»\t at:\t" + path); 763 String oldValueOldPath = cldrFileToFilter.getStringValue(path); 764 showAction(reason, "Removing", oldValueOldPath, null, null, path, path); 765 } 766 replace(String oldFullPath, String newFullPath, String newValue)767 public void replace(String oldFullPath, String newFullPath, String newValue) { 768 replace(oldFullPath, newFullPath, newValue, "-"); 769 } 770 showAction(String reason, String action, String oldValueOldPath, String oldValueNewPath, String newValue, String oldFullPath, String newFullPath)771 public void showAction(String reason, String action, String oldValueOldPath, String oldValueNewPath, 772 String newValue, String oldFullPath, String newFullPath) { 773 System.out.println("%" 774 + localeID 775 + "\t" 776 + action 777 + "\t" 778 + reason 779 + "\t«" 780 + oldValueOldPath 781 + "»" 782 + (newFullPath.equals(oldFullPath) || oldValueNewPath == null ? "" : oldValueNewPath 783 .equals(oldValueOldPath) ? "/=" : "/«" + oldValueNewPath + "»") 784 + "\t→\t" + (newValue == null ? "∅" : newValue.equals(oldValueOldPath) ? "≡" : "«" + newValue + "»") 785 + "\t" + oldFullPath 786 + (newFullPath.equals(oldFullPath) ? "" : "\t→\t" + newFullPath)); 787 } 788 789 /** 790 * There are the following cases, where: 791 * 792 * <pre> 793 * pathSame, new value null: Removing v p 794 * pathSame, new value not null: Replacing v v' p 795 * pathChanges, nothing at new path: Moving v p p' 796 * pathChanges, same value at new path: Replacing v v' p p' 797 * pathChanges, value changes: Overriding v v' p p' 798 * 799 * <pre> 800 * @param oldFullPath 801 * @param newFullPath 802 * @param newValue 803 * @param reason 804 */ replace(String oldFullPath, String newFullPath, String newValue, String reason)805 public void replace(String oldFullPath, String newFullPath, String newValue, String reason) { 806 String oldValueOldPath = cldrFileToFilter.getStringValue(oldFullPath); 807 String temp = cldrFileToFilter.getFullXPath(oldFullPath); 808 if (temp != null) { 809 oldFullPath = temp; 810 } 811 boolean pathSame = oldFullPath.equals(newFullPath); 812 813 if (pathSame) { 814 if (newValue == null) { 815 remove(oldFullPath, reason); 816 } else if (oldValueOldPath == null) { 817 toBeReplaced.add(oldFullPath, newValue); 818 showAction(reason, "Adding", oldValueOldPath, null, newValue, oldFullPath, newFullPath); 819 } else { 820 toBeReplaced.add(oldFullPath, newValue); 821 showAction(reason, "Replacing", oldValueOldPath, null, newValue, oldFullPath, newFullPath); 822 } 823 return; 824 } 825 String oldValueNewPath = cldrFileToFilter.getStringValue(newFullPath); 826 toBeRemoved.add(oldFullPath); 827 toBeReplaced.add(newFullPath, newValue); 828 829 if (oldValueNewPath == null) { 830 showAction(reason, "Moving", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath); 831 } else if (oldValueNewPath.equals(newValue)) { 832 showAction(reason, "Redundant Value", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath); 833 } else { 834 showAction(reason, "Overriding", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath); 835 } 836 } 837 838 /** 839 * Adds a new path-value pair to the CLDRFile. 840 * @param path the new path 841 * @param value the value 842 * @param reason Reason for adding the path and value. 843 */ add(String path, String value, String reason)844 public void add(String path, String value, String reason) { 845 String oldValueOldPath = cldrFileToFilter.getStringValue(path); 846 if (oldValueOldPath == null) { 847 toBeRemoved.remove(path); 848 toBeReplaced.add(path, value); 849 showAction(reason, "Adding", oldValueOldPath, null, 850 value, path, path); 851 } else { 852 replace(path, path, value); 853 } 854 } 855 getReplacementFile()856 public CLDRFile getReplacementFile() { 857 return toBeReplaced; 858 } 859 handleCleanup()860 public void handleCleanup() { 861 } 862 handleSetup()863 public void handleSetup() { 864 } 865 getLocaleID()866 public String getLocaleID() { 867 return localeID; 868 } 869 } 870 871 static class FixList { 872 // simple class, so we use quick list 873 CLDRFilter[] filters = new CLDRFilter[128]; // only ascii 874 String[] helps = new String[128]; // only ascii 875 UnicodeSet options = new UnicodeSet(); 876 String inputOptions = null; 877 add(char letter, String help)878 void add(char letter, String help) { 879 add(letter, help, null); 880 } 881 handleSetup()882 public void handleSetup() { 883 for (int i = 0; i < filters.length; ++i) { 884 if (filters[i] != null) { 885 filters[i].handleSetup(); 886 } 887 } 888 } 889 handleCleanup()890 public void handleCleanup() { 891 for (int i = 0; i < filters.length; ++i) { 892 if (filters[i] != null) { 893 filters[i].handleCleanup(); 894 } 895 } 896 } 897 getOptions()898 public UnicodeSet getOptions() { 899 return options; 900 } 901 add(char letter, String help, CLDRFilter filter)902 void add(char letter, String help, CLDRFilter filter) { 903 if (helps[letter] != null) throw new IllegalArgumentException("Duplicate letter: " + letter); 904 filters[letter] = filter; 905 helps[letter] = help; 906 options.add(letter); 907 } 908 setFile(CLDRFile file, String inputOptions, Factory factory, Set<String> removal, CLDRFile replacements)909 void setFile(CLDRFile file, String inputOptions, Factory factory, Set<String> removal, CLDRFile replacements) { 910 this.inputOptions = inputOptions; 911 for (int i = 0; i < inputOptions.length(); ++i) { 912 char c = inputOptions.charAt(i); 913 if (filters[c] != null) { 914 try { 915 filters[c].setFile(file, factory, removal, replacements); 916 } catch (RuntimeException e) { 917 System.err.println("Failure in " + filters[c].localeID + "\t START"); 918 throw e; 919 } 920 } 921 } 922 } 923 handleStart()924 void handleStart() { 925 for (int i = 0; i < inputOptions.length(); ++i) { 926 char c = inputOptions.charAt(i); 927 if (filters[c] != null) { 928 try { 929 filters[c].handleStart(); 930 } catch (RuntimeException e) { 931 System.err.println("Failure in " + filters[c].localeID + "\t START"); 932 throw e; 933 } 934 } 935 } 936 } 937 handlePath(String xpath)938 void handlePath(String xpath) { 939 //options = options.toLowerCase(); 940 for (int i = 0; i < inputOptions.length(); ++i) { 941 char c = inputOptions.charAt(i); 942 if (filters[c] != null) { 943 try { 944 filters[c].handlePath(xpath); 945 } catch (RuntimeException e) { 946 System.err.println("Failure in " + filters[c].localeID + "\t " + xpath); 947 throw e; 948 } 949 } 950 } 951 } 952 handleEnd()953 void handleEnd() { 954 for (int i = 0; i < inputOptions.length(); ++i) { 955 char c = inputOptions.charAt(i); 956 if (filters[c] != null) { 957 try { 958 filters[c].handleEnd(); 959 } catch (RuntimeException e) { 960 System.err.println("Failure in " + filters[c].localeID + "\t START"); 961 throw e; 962 } 963 } 964 } 965 } 966 showHelp()967 String showHelp() { 968 String result = ""; 969 for (int i = 0; i < filters.length; ++i) { 970 if (helps[i] != null) { 971 result += "\t" + (char) i + "\t " + helps[i] + XPathParts.NEWLINE; 972 } 973 } 974 return result; 975 } 976 } 977 978 static Set<String> totalSkeletons = new HashSet<String>(); 979 980 static Map<String, String> rootUnitMap = new HashMap<String, String>(); 981 982 static { 983 rootUnitMap.put("second", "s"); 984 rootUnitMap.put("minute", "min"); 985 rootUnitMap.put("hour", "h"); 986 rootUnitMap.put("day", "d"); 987 rootUnitMap.put("week", "w"); 988 rootUnitMap.put("month", "m"); 989 rootUnitMap.put("year", "y"); 990 991 fixList.add('z', "Remove deprecated elements", new CLDRFilter() { 992 993 public boolean isDeprecated(DtdType type, String element, String attribute, String value) { 994 return DtdData.getInstance(type).isDeprecated(element, attribute, value); 995 } 996 997 public boolean isDeprecated(DtdType type, String path) { 998 999 XPathParts parts = XPathParts.getFrozenInstance(path); 1000 for (int i = 0; i < parts.size(); ++i) { 1001 String element = parts.getElement(i); 1002 if (isDeprecated(type, element, "*", "*")) { 1003 return true; 1004 } 1005 for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) { 1006 String attribute = entry.getKey(); 1007 String value = entry.getValue(); 1008 if (isDeprecated(type, element, attribute, value)) { 1009 return true; 1010 } 1011 } 1012 } 1013 return false; 1014 } 1015 1016 @Override 1017 public void handlePath(String xpath) { 1018 String fullPath = cldrFileToFilter.getFullXPath(xpath); 1019 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 1020 for (int i = 0; i < parts.size(); ++i) { 1021 String element = parts.getElement(i); 1022 if (dtdData.isDeprecated(element, "*", "*")) { 1023 remove(fullPath, "Deprecated element"); 1024 return; 1025 } 1026 for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) { 1027 String attribute = entry.getKey(); 1028 String value = entry.getValue(); 1029 if (dtdData.isDeprecated(element, attribute, value)) { 1030 remove(fullPath, "Element with deprecated attribute(s)"); 1031 } 1032 } 1033 } 1034 } 1035 }); 1036 1037 fixList.add('e', "fix Interindic", new CLDRFilter() { 1038 public void handlePath(String xpath) { 1039 if (xpath.indexOf("=\"InterIndic\"") < 0) return; 1040 String v = cldrFileToFilter.getStringValue(xpath); 1041 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1042 XPathParts fullparts = XPathParts.getFrozenInstance(fullXPath); 1043 Map<String, String> attributes = fullparts.findAttributes("transform"); 1044 String oldValue = attributes.get("direction"); 1045 if ("both".equals(oldValue)) { 1046 attributes.put("direction", "forward"); 1047 replace(xpath, fullparts.toString(), v); 1048 } 1049 } 1050 }); 1051 1052 fixList.add('B', "fix bogus values", new CLDRFilter() { 1053 RegexLookup<Integer> paths = RegexLookup.<Integer> of() 1054 .setPatternTransform(RegexLookup.RegexFinderTransformPath2) 1055 .add("//ldml/localeDisplayNames/languages/language[@type='([^']*)']", 0) 1056 .add("//ldml/localeDisplayNames/scripts/script[@type='([^']*)']", 0) 1057 .add("//ldml/localeDisplayNames/territories/territory[@type='([^']*)']", 0) 1058 .add("//ldml/dates/timeZoneNames/metazone[@type='([^']*)']", 0) 1059 .add("//ldml/dates/timeZoneNames/zone[@type='([^']*)']/exemplarCity", 0) 1060 .add("//ldml/numbers/currencies/currency[@type='([^']*)']/displayName", 0); 1061 Output<String[]> arguments = new Output<>(); 1062 CLDRFile english = CLDRConfig.getInstance().getEnglish(); 1063 boolean skip; 1064 1065 @Override 1066 public void handleStart() { 1067 CLDRFile resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 1068 UnicodeSet exemplars = resolved.getExemplarSet(ExemplarType.main, WinningChoice.WINNING); 1069 skip = exemplars.containsSome('a', 'z'); 1070 // TODO add simpler way to skip file entirely 1071 } 1072 1073 public void handlePath(String xpath) { 1074 if (skip) { 1075 return; 1076 } 1077 Integer lookupValue = paths.get(xpath, null, arguments); 1078 if (lookupValue == null) { 1079 return; 1080 } 1081 String type = arguments.value[1]; 1082 String value = cldrFileToFilter.getStringValue(xpath); 1083 if (value.equals(type)) { 1084 remove(xpath, "Matches code"); 1085 return; 1086 } 1087 String evalue = english.getStringValue(xpath); 1088 if (value.equals(evalue)) { 1089 remove(xpath, "Matches English"); 1090 return; 1091 } 1092 } 1093 }); 1094 1095 fixList.add('s', "fix alt accounting", new CLDRFilter() { 1096 @Override 1097 public void handlePath(String xpath) { 1098 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1099 if (!parts.containsAttributeValue("alt", "accounting")) { 1100 return; 1101 } 1102 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath); 1103 String value = cldrFileToFilter.getStringValue(xpath); 1104 XPathParts fullparts = XPathParts.getInstance(oldFullXPath); // not frozen, for removeAttribute 1105 fullparts.removeAttribute("pattern", "alt"); 1106 fullparts.setAttribute("currencyFormat", "type", "accounting"); 1107 String newFullXPath = fullparts.toString(); 1108 replace(oldFullXPath, newFullXPath, value, "Move alt=accounting value to new path"); 1109 } 1110 }); 1111 1112 fixList.add('n', "add unit displayName", new CLDRFilter() { 1113 @Override 1114 public void handlePath(String xpath) { 1115 if (xpath.indexOf("/units/unitLength[@type=\"long\"]") < 0 || xpath.indexOf("/unitPattern[@count=\"other\"]") < 0 || 1116 xpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) { 1117 return; 1118 } 1119 String value = cldrFileToFilter.getStringValue(xpath); 1120 String newValue = null; 1121 if (value.startsWith("{0}")) { 1122 newValue = value.substring(3).trim(); 1123 } else if (value.endsWith("{0}")) { 1124 newValue = value.substring(0, value.length() - 3).trim(); 1125 } else { 1126 System.out.println("unitPattern-other does not start or end with \"{0}\": \"" + value + "\""); 1127 return; 1128 } 1129 1130 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath); 1131 String newFullXPath = oldFullXPath.substring(0, oldFullXPath.indexOf("unitPattern")).concat("displayName[@draft=\"provisional\"]"); 1132 add(newFullXPath, newValue, "create unit displayName-long from unitPattern-long-other"); 1133 String newFullXPathShort = newFullXPath.replace("[@type=\"long\"]", "[@type=\"short\"]"); 1134 add(newFullXPathShort, newValue, "create unit displayName-short from unitPattern-long-other"); 1135 } 1136 }); 1137 1138 fixList.add('x', "retain paths", new CLDRFilter() { 1139 Matcher m = null; 1140 1141 public void handlePath(String xpath) { 1142 if (m == null) { 1143 m = PatternCache.get(options[PATH].value).matcher(""); 1144 } 1145 //String v = cldrFileToFilter.getStringValue(xpath); 1146 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1147 if (!m.reset(fullXPath).matches()) { 1148 remove(xpath); 1149 } 1150 } 1151 }); 1152 1153 // fixList.add('_', "remove superfluous compound language translations", new CLDRFilter() { 1154 // private CLDRFile resolved; 1155 // 1156 // public void handleStart() { 1157 // resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 1158 // } 1159 // 1160 // public void handlePath(String xpath) { 1161 // if (!xpath.contains("_")) return; 1162 // if (!xpath.contains("/language")) return; 1163 // XPathParts parts = XPathParts.getFrozenInstance(xpath); 1164 // String languageCode = parts.findAttributeValue("language", "type"); 1165 // String v = resolved.getStringValue(xpath); 1166 // if (v.equals(languageCode)) { 1167 // remove(xpath, "same as language code"); 1168 // return; 1169 // } 1170 // String generatedTranslation = resolved.getName(languageCode, true); 1171 // if (v.equals(generatedTranslation)) { 1172 // remove(xpath, "superfluous compound language"); 1173 // } 1174 // String spacelessGeneratedTranslation = generatedTranslation.replace(" ", ""); 1175 // if (v.equals(spacelessGeneratedTranslation)) { 1176 // remove(xpath, "superfluous compound language (after removing space)"); 1177 // } 1178 // } 1179 // }); 1180 1181 fixList.add('l', "change language code", new CLDRFilter() { 1182 private CLDRFile resolved; 1183 1184 public void handleStart() { 1185 resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 1186 } 1187 1188 public void handlePath(String xpath) { 1189 if (!xpath.contains("/language")) { 1190 return; 1191 } 1192 XPathParts parts = XPathParts.getInstance(xpath); // not frozen, for setAttribute 1193 String languageCode = parts.findAttributeValue("language", "type"); 1194 String v = resolved.getStringValue(xpath); 1195 if (!languageCode.equals("swc")) { 1196 return; 1197 } 1198 parts.setAttribute("language", "type", "sw_CD"); 1199 replace(xpath, parts.toString(), v); 1200 } 1201 }); 1202 1203 // fixList.add('m', "remove multiple alt-variants", new CLDRFilter() { 1204 // 1205 // public void handleStart() { 1206 // } 1207 // 1208 // public void handlePath(String xpath) { 1209 // XPathParts parts = XPathParts.getFrozenInstance(xpath); 1210 // if (!parts.containsAttributeValue("alt", "variant")) { 1211 // return; 1212 // } 1213 // String variantValue = cldrFileToFilter.getStringValue(xpath); 1214 // String nonVariantXpath = xpath.replaceAll("\\[\\@alt=\"variant\"\\]", ""); 1215 // String nonVariantValue = cldrFileToFilter.getStringValue(nonVariantXpath); 1216 // if (variantValue.equals(nonVariantValue)) { 1217 // remove(xpath, "removing superfluous alt-variant value"); 1218 // } 1219 // } 1220 // }); 1221 1222 fixList.add('g', "Swap alt/non-alt values for Czechia", new CLDRFilter() { 1223 1224 public void handleStart() { 1225 } 1226 1227 public void handlePath(String xpath) { 1228 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1229 if (!parts.containsAttributeValue("alt", "variant") || !parts.containsAttributeValue("type", "CZ")) { 1230 return; 1231 } 1232 String variantValue = cldrFileToFilter.getStringValue(xpath); 1233 String nonVariantXpath = xpath.replaceAll("\\[\\@alt=\"variant\"\\]", ""); 1234 String nonVariantValue = cldrFileToFilter.getStringValue(nonVariantXpath); 1235 replace(xpath, xpath, nonVariantValue); 1236 replace(nonVariantXpath, nonVariantXpath, variantValue); 1237 } 1238 }); 1239 1240 fixList.add('u', "fix duration unit patterns", new CLDRFilter() { 1241 1242 public void handlePath(String xpath) { 1243 if (!xpath.contains("/units")) { 1244 return; 1245 } 1246 if (!xpath.contains("/durationUnitPattern")) { 1247 return; 1248 } 1249 1250 String value = cldrFileToFilter.getStringValue(xpath); 1251 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1252 1253 XPathParts parts = XPathParts.getFrozenInstance(fullXPath); 1254 String unittype = parts.findAttributeValue("durationUnit", "type"); 1255 1256 String newFullXpath = "//ldml/units/durationUnit[@type=\"" + unittype + "\"]/durationUnitPattern"; 1257 replace(fullXPath, newFullXpath, value, "converting to new duration unit structure"); 1258 } 1259 }); 1260 1261 fixList.add('a', "Fix 0/1", new CLDRFilter() { 1262 final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 1263 PluralInfo info; 1264 1265 @Override 1266 public void handleStart() { 1267 info = SupplementalDataInfo.getInstance().getPlurals(super.localeID); 1268 } 1269 1270 @Override 1271 public void handlePath(String xpath) { 1272 1273 if (xpath.indexOf("count") < 0) { 1274 return; 1275 } 1276 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1277 XPathParts parts = XPathParts.getInstance(fullpath); // not frozen, for setAttribute 1278 String countValue = parts.getAttributeValue(-1, "count"); 1279 if (!DIGITS.containsAll(countValue)) { 1280 return; 1281 } 1282 int intValue = Integer.parseInt(countValue); 1283 Count count = info.getCount(intValue); 1284 parts.setAttribute(-1, "count", count.toString()); 1285 String newPath = parts.toString(); 1286 String oldValue = cldrFileToFilter.getStringValue(newPath); 1287 String value = cldrFileToFilter.getStringValue(xpath); 1288 if (oldValue != null) { 1289 String fixed = oldValue.replace("{0}", countValue); 1290 if (value.equals(oldValue) 1291 || value.equals(fixed)) { 1292 remove(fullpath, "Superfluous given: " 1293 + count + "→«" + oldValue + "»"); 1294 } else { 1295 remove(fullpath, "Can’t replace: " 1296 + count + "→«" + oldValue + "»"); 1297 } 1298 return; 1299 } 1300 replace(fullpath, newPath, value, "Moving 0/1"); 1301 } 1302 }); 1303 1304 fixList.add('b', "Prep for bulk import", new CLDRFilter() { 1305 1306 public void handlePath(String xpath) { 1307 if (!options[USER].doesOccur) { 1308 return; 1309 } 1310 String userID = options[USER].value; 1311 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1312 String value = cldrFileToFilter.getStringValue(xpath); 1313 XPathParts parts = XPathParts.getInstance(fullpath); // not frozen, for addAttribute 1314 parts.addAttribute("draft", "unconfirmed"); 1315 parts.addAttribute("alt", "proposed-u" + userID + "-implicit1.8"); 1316 String newPath = parts.toString(); 1317 replace(fullpath, newPath, value); 1318 } 1319 }); 1320 1321 fixList.add('c', "Fix transiton from an old currency code to a new one", new CLDRFilter() { 1322 public void handlePath(String xpath) { 1323 String oldCurrencyCode = "VEF"; 1324 String newCurrencyCode = "VES"; 1325 int fromDate = 2008; 1326 int toDate = 2018; 1327 String leadingParenString = " ("; 1328 String trailingParenString = ")"; 1329 String separator = "\u2013"; 1330 String languageTag = "root"; 1331 1332 if (xpath.indexOf("/currency[@type=\"" + oldCurrencyCode + "\"]/displayName") < 0) { 1333 return; 1334 } 1335 String value = cldrFileToFilter.getStringValue(xpath); 1336 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1337 String newFullXPath = fullXPath.replace(oldCurrencyCode, newCurrencyCode); 1338 cldrFileToFilter.add(newFullXPath, value); 1339 1340 // Exceptions for locales that use an alternate numbering system or a different format for the dates at 1341 // the end. 1342 // Add additional ones as necessary 1343 String localeID = cldrFileToFilter.getLocaleID(); 1344 if (localeID.equals("ne")) { 1345 languageTag = "root-u-nu-deva"; 1346 } else if (localeID.equals("bn")) { 1347 languageTag = "root-u-nu-beng"; 1348 } else if (localeID.equals("ar")) { 1349 leadingParenString = " - "; 1350 trailingParenString = ""; 1351 } else if (localeID.equals("fa")) { 1352 languageTag = "root-u-nu-arabext"; 1353 separator = Utility.unescape(" \\u062A\\u0627 "); 1354 } 1355 1356 NumberFormat nf = NumberFormat.getInstance(ULocale.forLanguageTag(languageTag)); 1357 nf.setGroupingUsed(false); 1358 1359 String tagString = leadingParenString + nf.format(fromDate) + separator + nf.format(toDate) 1360 + trailingParenString; 1361 1362 replace(fullXPath, fullXPath, value + tagString); 1363 1364 } 1365 }); 1366 1367 fixList.add('p', "input-processor", new CLDRFilter() { 1368 private DisplayAndInputProcessor inputProcessor; 1369 1370 public void handleStart() { 1371 inputProcessor = new DisplayAndInputProcessor(cldrFileToFilter, true); 1372 } 1373 1374 public void handleEnd() { 1375 inputProcessor = null; // clean up, just in case 1376 } 1377 1378 public void handlePath(String xpath) { 1379 String value = cldrFileToFilter.getStringValue(xpath); 1380 if (!value.equals(value.trim())) { 1381 value = value; // for debugging 1382 } 1383 String newValue = inputProcessor.processInput(xpath, value, null); 1384 if (value.equals(newValue)) { 1385 return; 1386 } 1387 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1388 replace(fullXPath, fullXPath, newValue); 1389 } 1390 }); 1391 1392 fixList.add('t', "Fix missing count values groups", new CLDRFilter() { 1393 1394 public void handlePath(String xpath) { 1395 if (xpath.indexOf("@count=\"other\"") < 0) { 1396 return; 1397 } 1398 1399 String value = cldrFileToFilter.getStringValue(xpath); 1400 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1401 String[] missingCounts = { "one" }; 1402 for (String count : missingCounts) { 1403 String newFullXPath = fullXPath.replace("other", count); 1404 if (cldrFileToFilter.getWinningValue(newFullXPath) == null) { 1405 add(newFullXPath, value, "Adding missing plural form"); 1406 } 1407 } 1408 1409 } 1410 }); 1411 1412 fixList.add('f', "NFC (all but transforms, exemplarCharacters, pc, sc, tc, qc, ic)", new CLDRFilter() { 1413 public void handlePath(String xpath) { 1414 if (xpath.indexOf("/segmentation") >= 0 1415 || xpath.indexOf("/transforms") >= 0 1416 || xpath.indexOf("/exemplarCharacters") >= 0 1417 || xpath.indexOf("/pc") >= 0 1418 || xpath.indexOf("/sc") >= 0 1419 || xpath.indexOf("/tc") >= 0 1420 || xpath.indexOf("/qc") >= 0 1421 || xpath.indexOf("/ic") >= 0) return; 1422 String value = cldrFileToFilter.getStringValue(xpath); 1423 String nfcValue = Normalizer.compose(value, false); 1424 if (value.equals(nfcValue)) return; 1425 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1426 replace(fullXPath, fullXPath, nfcValue); 1427 } 1428 }); 1429 1430 fixList.add('v', "remove illegal codes", new CLDRFilter() { 1431 1432 /* 1433 * Set legalCurrencies; 1434 * } 1435 * { 1436 * StandardCodes sc = StandardCodes.make(); 1437 * legalCurrencies = new TreeSet(sc.getAvailableCodes("currency")); 1438 * // first remove non-ISO 1439 * for (Iterator it = legalCurrencies.iterator(); it.hasNext();) { 1440 * String code = (String) it.next(); 1441 * List data = sc.getFullData("currency", code); 1442 * if ("X".equals(data.get(3))) it.remove(); 1443 * } 1444 * } 1445 */ 1446 StandardCodes sc = StandardCodes.make(); 1447 String[] codeTypes = { "language", "script", "territory", "currency" }; 1448 1449 public void handlePath(String xpath) { 1450 if (xpath.indexOf("/currency") < 0 1451 && xpath.indexOf("/timeZoneNames") < 0 1452 && xpath.indexOf("/localeDisplayNames") < 0) return; 1453 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1454 String code; 1455 for (int i = 0; i < codeTypes.length; ++i) { 1456 code = parts.findAttributeValue(codeTypes[i], "type"); 1457 if (code != null) { 1458 if (!sc.getGoodAvailableCodes(codeTypes[i]).contains(code)) remove(xpath); 1459 return; 1460 } 1461 } 1462 code = parts.findAttributeValue("zone", "type"); 1463 if (code != null) { 1464 if (code.indexOf("/GMT") >= 0) remove(xpath); 1465 } 1466 1467 } 1468 }); 1469 1470 fixList.add('w', "fix alt='...proposed' when there is no alternative", new CLDRFilter() { 1471 private Set<String> newFullXPathSoFar = new HashSet<String>(); 1472 1473 public void handlePath(String xpath) { 1474 if (xpath.indexOf("proposed") < 0) return; 1475 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1476 XPathParts parts = XPathParts.getInstance(fullXPath); // not frozen, for removeProposed 1477 String newFullXPath = parts.removeProposed().toString(); 1478 // now see if there is an uninherited value 1479 String value = cldrFileToFilter.getStringValue(xpath); 1480 String baseValue = cldrFileToFilter.getStringValue(newFullXPath); 1481 if (baseValue != null) { 1482 // if the value AND the fullxpath are the same as what we have, then delete 1483 if (value.equals(baseValue)) { 1484 String baseFullXPath = cldrFileToFilter.getFullXPath(newFullXPath); 1485 if (baseFullXPath.equals(newFullXPath)) { 1486 remove(xpath, "alt=base"); 1487 } 1488 } 1489 return; // there is, so skip 1490 } 1491 // there isn't, so modif if we haven't done so already 1492 if (!newFullXPathSoFar.contains(newFullXPath)) { 1493 replace(fullXPath, newFullXPath, value); 1494 newFullXPathSoFar.add(newFullXPath); 1495 } 1496 } 1497 }); 1498 /* 1499 * Fix id to be identical to skeleton 1500 * Eliminate any single-field ids 1501 * Add "L" (stand-alone month), "?" (other stand-alones) 1502 * Remove any fields with both a date and a time 1503 * Test that datetime format is valid format (will have to fix by hand) 1504 * Map k, K to H, h 1505 * 1506 * In Survey Tool: don't show id; compute when item added or changed 1507 * test validity 1508 */ 1509 1510 fixList.add('d', "fix dates", new CLDRFilter() { 1511 DateTimePatternGenerator dateTimePatternGenerator = DateTimePatternGenerator.getEmptyInstance(); 1512 DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser(); 1513 Map<String, Set<String>> seenSoFar = new HashMap<String, Set<String>>(); 1514 1515 public void handleStart() { 1516 seenSoFar.clear(); 1517 } 1518 1519 public void handlePath(String xpath) { 1520 // timeFormatLength type="full" 1521 if (xpath.contains("timeFormatLength") && xpath.contains("full")) { 1522 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1523 String value = cldrFileToFilter.getStringValue(xpath); 1524 boolean gotChange = false; 1525 List<Object> list = formatParser.set(value).getItems(); 1526 for (int i = 0; i < list.size(); ++i) { 1527 Object item = list.get(i); 1528 if (item instanceof DateTimePatternGenerator.VariableField) { 1529 String itemString = item.toString(); 1530 if (itemString.charAt(0) == 'z') { 1531 list.set(i, new VariableField(Utility.repeat("v", itemString.length()))); 1532 gotChange = true; 1533 } 1534 } 1535 } 1536 if (gotChange) { 1537 String newValue = toStringWorkaround(); 1538 if (value != newValue) { 1539 replace(xpath, fullpath, newValue); 1540 } 1541 } 1542 } 1543 if (xpath.indexOf("/availableFormats") < 0) { 1544 return; 1545 } 1546 String value = cldrFileToFilter.getStringValue(xpath); 1547 if (value == null) { 1548 return; // not in current file 1549 } 1550 1551 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1552 XPathParts fullparts = XPathParts.getFrozenInstance(fullpath); 1553 Map<String, String> attributes = fullparts.findAttributes("dateFormatItem"); 1554 String id = attributes.get("id"); 1555 String oldID = id; 1556 try { 1557 id = dateTimePatternGenerator.getBaseSkeleton(id); 1558 if (id.equals(oldID)) { 1559 return; 1560 } 1561 System.out.println(oldID + " => " + id); 1562 } catch (RuntimeException e) { 1563 id = "[error]"; 1564 return; 1565 } 1566 1567 attributes.put("id", id); 1568 totalSkeletons.add(id); 1569 1570 replace(xpath, fullparts.toString(), value); 1571 } 1572 1573 private String toStringWorkaround() { 1574 StringBuffer result = new StringBuffer(); 1575 List<Object> items = formatParser.getItems(); 1576 for (int i = 0; i < items.size(); ++i) { 1577 Object item = items.get(i); 1578 if (item instanceof String) { 1579 result.append(formatParser.quoteLiteral((String) items.get(i))); 1580 } else { 1581 result.append(items.get(i).toString()); 1582 } 1583 } 1584 return result.toString(); 1585 } 1586 1587 }); 1588 1589 fixList.add('y', "fix years to be y (with exceptions)", new CLDRFilter() { 1590 DateTimeCanonicalizer dtc = new DateTimeCanonicalizer(true); 1591 1592 DateTimePatternGenerator dateTimePatternGenerator = DateTimePatternGenerator.getEmptyInstance(); 1593 DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser(); 1594 Map<String, Set<String>> seenSoFar = new HashMap<String, Set<String>>(); 1595 1596 public void handleStart() { 1597 seenSoFar.clear(); 1598 } 1599 1600 public void handlePath(String xpath) { 1601 DateTimePatternType datetimePatternType = DateTimePatternType.fromPath(xpath); 1602 1603 // check to see if we need to change the value 1604 1605 if (!DateTimePatternType.STOCK_AVAILABLE_INTERVAL_PATTERNS.contains(datetimePatternType)) { 1606 return; 1607 } 1608 String oldValue = cldrFileToFilter.getStringValue(xpath); 1609 String value = dtc.getCanonicalDatePattern(xpath, oldValue, datetimePatternType); 1610 1611 String fullPath = cldrFileToFilter.getFullXPath(xpath); 1612 // Deleted code to canonicalize id for availableFormats items (cldrbug 5760) 1613 1614 if (value.equals(oldValue)) { 1615 return; 1616 } 1617 1618 // made it through the gauntlet, so replace 1619 1620 replace(xpath, fullPath, value); 1621 } 1622 }); 1623 1624 // This should only be applied to specific locales, and the results checked manually afterward. 1625 // It will only create ranges using the same digits as in root, not script-specific digits. 1626 // Any pre-existing year ranges should use the range marker from the intervalFormats "y" item. 1627 // This make several assumptions and is somewhat *FRAGILE*. 1628 fixList.add('j', "add year ranges from root to Japanese calendar eras", new CLDRFilter() { 1629 private CLDRFile rootFile; 1630 1631 public void handleStart() { 1632 rootFile = factory.make("root", false); 1633 } 1634 1635 public void handlePath(String xpath) { 1636 // Skip paths we don't care about 1637 if (xpath.indexOf("/calendar[@type=\"japanese\"]/eras/era") < 0) return; 1638 // Get root name for the era, check it 1639 String rootEraValue = rootFile.getStringValue(xpath); 1640 int rootEraIndex = rootEraValue.indexOf(" ("); 1641 if (rootEraIndex < 0) return; // this era does not have a year range in root, no need to add one in this 1642 // locale 1643 // Get range marker from intervalFormat range for y 1644 String yearIntervalFormat = cldrFileToFilter 1645 .getStringValue( 1646 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"y\"]/greatestDifference[@id=\"y\"]"); 1647 if (yearIntervalFormat == null) return; // oops, no intervalFormat data for y 1648 String rangeMarker = yearIntervalFormat.replaceAll("[.y\u5E74\uB144]", ""); // *FRAGILE* strip out 1649 // everything except the 1650 // range-indicating part 1651 // Get current locale name for this era, check it 1652 String eraValue = cldrFileToFilter.getStringValue(xpath); 1653 if (eraValue.indexOf('(') >= 0 && eraValue.indexOf(rangeMarker) >= 0) return; // this eraValue already 1654 // has a year range that 1655 // uses the appropriate 1656 // rangeMarker 1657 // Now update the root year range it with the rangeMarker for this locale, and append it to this 1658 // locale's name 1659 String rootYearRange = rootEraValue.substring(rootEraIndex); 1660 String appendYearRange = rootYearRange.replaceAll("[\u002D\u2013]", rangeMarker); 1661 String newEraValue = eraValue.concat(appendYearRange); 1662 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1663 replace(xpath, fullpath, newEraValue); 1664 // System.out.println("CLDRModify fj: rootEraValue: \"" + rootEraValue + "\", eraValue: \"" + eraValue + 1665 // "\", rangeMarker: \"" + rangeMarker + "\""); 1666 } 1667 }); 1668 1669 fixList.add('r', "fix references and standards", new CLDRFilter() { 1670 int currentRef = 500; 1671 Map<String, TreeMap<String, String>> locale_oldref_newref = new TreeMap<String, TreeMap<String, String>>(); 1672 TreeMap<String, String> oldref_newref; 1673 1674 //LanguageTagParser ltp = new LanguageTagParser(); 1675 1676 // References standards = new References(true); 1677 // References references = new References(false); 1678 1679 public void handleStart() { 1680 String locale = cldrFileToFilter.getLocaleID(); 1681 oldref_newref = locale_oldref_newref.get(locale); 1682 if (oldref_newref == null) { 1683 oldref_newref = new TreeMap<String, String>(); 1684 locale_oldref_newref.put(locale, oldref_newref); 1685 } 1686 } 1687 1688 // // Samples: 1689 // // <language type="ain" references="RP1">阿伊努文</language> 1690 // // <reference type="R1" uri="http://www.info.gov.hk/info/holiday_c.htm">二零零五年公眾假期刊登憲報</reference> 1691 public void handlePath(String xpath) { 1692 // must be minimised for this to work. 1693 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1694 if (!fullpath.contains("reference")) return; 1695 String value = cldrFileToFilter.getStringValue(xpath); 1696 XPathParts fullparts = XPathParts.getInstance(fullpath); // can't be frozen 1697 if ("reference".equals(fullparts.getElement(-1))) { 1698 fixType(value, "type", fullpath, fullparts); 1699 } else if (fullparts.getAttributeValue(-1, "references") != null) { 1700 fixType(value, "references", fullpath, fullparts); 1701 } else { 1702 System.out.println("CLDRModify: Skipping: " + xpath); 1703 } 1704 } 1705 1706 /** 1707 * 1708 * @param value 1709 * @param type 1710 * @param oldFullPath 1711 * @param fullparts the XPathParts -- must not be frozen, for addAttribute 1712 */ 1713 private void fixType(String value, String type, String oldFullPath, XPathParts fullparts) { 1714 String ref = fullparts.getAttributeValue(-1, type); 1715 if (whitespace.containsSome(ref)) { 1716 throw new IllegalArgumentException("Whitespace in references"); 1717 } 1718 String newRef = getNewRef(ref); 1719 fullparts.addAttribute(type, newRef); 1720 replace(oldFullPath, fullparts.toString(), value); 1721 } 1722 1723 private String getNewRef(String ref) { 1724 String newRef = oldref_newref.get(ref); 1725 if (newRef == null) { 1726 newRef = String.valueOf(currentRef++); 1727 newRef = "R" + Utility.repeat("0", (3 - newRef.length())) + newRef; 1728 oldref_newref.put(ref, newRef); 1729 } 1730 return newRef; 1731 } 1732 }); 1733 1734 fixList.add('q', "fix annotation punctuation", new CLDRFilter() { 1735 @Override 1736 public void handlePath(String xpath) { 1737 if (!xpath.contains("/annotation")) { 1738 return; 1739 } 1740 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1741 XPathParts parts = XPathParts.getInstance(fullpath); // not frozen, for putAttributeValue 1742 String cp = parts.getAttributeValue(2, "cp"); 1743 String tts = parts.getAttributeValue(2, "tts"); 1744 String type = parts.getAttributeValue(2, "type"); 1745 if ("tts".equals(type)) { 1746 return; // ok, skip 1747 } 1748 String hex = "1F600"; 1749 if (cp.startsWith("[")) { 1750 UnicodeSet us = new UnicodeSet(cp); 1751 if (us.size() == 1) { 1752 cp = us.iterator().next(); 1753 hex = Utility.hex(cp); 1754 } else { 1755 hex = us.toString(); 1756 } 1757 parts.putAttributeValue(2, "cp", cp); 1758 } 1759 parts.removeAttribute(2, "tts"); 1760 if (tts != null) { 1761 String newTts = CldrUtility.join(COMMA_SEMI.splitToList(tts), ", "); 1762 XPathParts parts2 = parts.cloneAsThawed(); 1763 parts2.putAttributeValue(2, "type", "tts"); 1764 add(parts2.toString(), newTts, "separate tts"); 1765 } 1766 String value = cldrFileToFilter.getStringValue(xpath); 1767 String newValue = CldrUtility.join(COMMA_SEMI.splitToList(value), " | "); 1768 final String newFullPath = parts.toString(); 1769 Comments comments = cldrFileToFilter.getXpath_comments(); 1770 String comment = comments.removeComment(CommentType.PREBLOCK, xpath); 1771 comment = hex + (comment == null ? "" : " " + comment); 1772 comments.addComment(CommentType.PREBLOCK, newFullPath, comment); 1773 if (!fullpath.equals(newFullPath) || !value.equals(newValue)) { 1774 replace(fullpath, newFullPath, newValue); 1775 } 1776 } 1777 }); 1778 1779 fixList.add('Q', "add annotation names to keywords", new CLDRFilter() { 1780 Set<String> available = Annotations.getAvailable(); 1781 TreeSet<String> sorted = new TreeSet<>(Collator.getInstance(ULocale.ROOT)); 1782 CLDRFile resolved; 1783 1784 @Override 1785 public void handleStart() { 1786 String localeID = cldrFileToFilter.getLocaleID(); 1787 if (!available.contains(localeID)) { 1788 throw new IllegalArgumentException("no annotations available, probably wrong directory"); 1789 } 1790 ; 1791 resolved = factory.make(localeID, true); 1792 } 1793 1794 @Override 1795 public void handlePath(String xpath) { 1796 if (!xpath.contains("/annotation")) { 1797 return; 1798 } 1799 // <annotation cp="">100 | honderd | persent | telling | vol</annotation> 1800 // <annotation cp="" type="tts">honderd punte</annotation> 1801 // we will copy honderd punte into the list of keywords. 1802 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1803 XPathParts parts = XPathParts.getFrozenInstance(fullpath); 1804 String type = parts.getAttributeValue(2, "type"); 1805 if (type == null) { 1806 return; // no TTS, so keywords, skip 1807 } 1808 1809 XPathParts keywordParts = parts.cloneAsThawed().removeAttribute(2, "type"); 1810 String keywordPath = keywordParts.toString(); 1811 String rawKeywordValue = cldrFileToFilter.getStringValue(keywordPath); 1812 1813 // skip if keywords AND name are inherited 1814 if (rawKeywordValue == null || rawKeywordValue.equals(CldrUtility.INHERITANCE_MARKER)) { 1815 String rawName = cldrFileToFilter.getStringValue(xpath); 1816 if (rawName == null || rawName.equals(CldrUtility.INHERITANCE_MARKER)) { 1817 return; 1818 } 1819 } 1820 1821 // skip if the name is not above root 1822 String nameSourceLocale = resolved.getSourceLocaleID(xpath, null); 1823 if ("root".equals(nameSourceLocale) || XMLSource.CODE_FALLBACK_ID.equals(nameSourceLocale)) { 1824 return; 1825 } 1826 1827 String name = resolved.getStringValue(xpath); 1828 String keywordValue = resolved.getStringValue(keywordPath); 1829 String sourceLocaleId = resolved.getSourceLocaleID(keywordPath, null); 1830 sorted.clear(); 1831 sorted.add(name); 1832 List<String> items; 1833 if (!sourceLocaleId.equals(XMLSource.ROOT_ID) && !sourceLocaleId.equals(XMLSource.CODE_FALLBACK_ID)) { 1834 items = Annotations.splitter.splitToList(keywordValue); 1835 sorted.addAll(items); 1836 } else { 1837 int debug = 0; 1838 } 1839 DisplayAndInputProcessor.filterCoveredKeywords(sorted); 1840 String newKeywordValue = CollectionUtilities.join(sorted, " | "); 1841 if (!newKeywordValue.equals(keywordValue)) { 1842 replace(keywordPath, keywordPath, newKeywordValue); 1843 } 1844 } 1845 }); 1846 1847 fixList.add('N', "add number symbols to exemplars", new CLDRFilter() { 1848 CLDRFile resolved; 1849 UnicodeSet numberStuff = new UnicodeSet(); 1850 Set<String> seen = new HashSet<>(); 1851 Set<String> hackAllowOnly = new HashSet<>(); 1852 boolean skip = false; 1853 1854 @Override 1855 public void handleStart() { 1856 String localeID = cldrFileToFilter.getLocaleID(); 1857 resolved = factory.make(localeID, true); 1858 numberStuff.clear(); 1859 seen.clear(); 1860 skip = localeID.equals("root"); 1861 // TODO add return value to handleStart to skip calling handlePath 1862 1863 if (NUMBER_SYSTEM_HACK) { 1864 hackAllowOnly.clear(); 1865 for (NumberingSystem system : NumberingSystem.values()) { 1866 String numberingSystem = system.path == null ? "latn" : cldrFileToFilter.getStringValue(system.path); 1867 if (numberingSystem != null) { 1868 hackAllowOnly.add(numberingSystem); 1869 } 1870 } 1871 int debug = 0; 1872 } 1873 } 1874 1875 @Override 1876 public void handlePath(String xpath) { 1877 // the following doesn't work without NUMBER_SYSTEM_HACK, because there are spurious numbersystems in the data. 1878 // http://unicode.org/cldr/trac/ticket/10648 1879 // so using a hack for now in handleEnd 1880 if (skip || !xpath.startsWith("//ldml/numbers/symbols")) { 1881 return; 1882 } 1883 1884 // //ldml/numbers/symbols[@numberSystem="latn"]/exponential 1885 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1886 String system = parts.getAttributeValue(2, "numberSystem"); 1887 if (system == null) { 1888 System.err.println("Bogus numberSystem:\t" + cldrFileToFilter.getLocaleID() + " \t" + xpath); 1889 return; 1890 } else if (seen.contains(system) || !hackAllowOnly.contains(system)) { 1891 return; 1892 } 1893 seen.add(system); 1894 UnicodeSet exemplars = resolved.getExemplarsNumeric(system); 1895 System.out.println("# " + system + " ==> " + exemplars.toPattern(false)); 1896 for (String s : exemplars) { 1897 numberStuff.addAll(s); // add individual characters 1898 } 1899 } 1900 1901 @Override 1902 public void handleEnd() { 1903 if (!numberStuff.isEmpty()) { 1904 UnicodeSet current = cldrFileToFilter.getExemplarSet(ExemplarType.numbers, WinningChoice.WINNING); 1905 if (!numberStuff.equals(current)) { 1906 DisplayAndInputProcessor daip = new DisplayAndInputProcessor(cldrFileToFilter); 1907 if (current != null && !current.isEmpty()) { 1908 numberStuff.addAll(current); 1909 } 1910 String path = CLDRFile.getExemplarPath(ExemplarType.numbers); 1911 String value = daip.getPrettyPrinter().format(numberStuff); 1912 replace(path, path, value); 1913 } 1914 } 1915 } 1916 }); 1917 1918 fixList.add('k', 1919 "fix according to -k config file. Details on http://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config", 1920 new CLDRFilter() { 1921 private Map<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> locale2keyValues; 1922 private LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = new LinkedHashSet<Map<ConfigKeys, ConfigMatch>>(); 1923 private Matcher draftMatcher = Pattern.compile("\\[@draft=\"[^\"]+\"]").matcher(""); 1924 1925 @Override 1926 public void handleStart() { 1927 super.handleStart(); 1928 if (!options[FIX].doesOccur || !options[FIX].value.equals("k")) { 1929 return; 1930 } 1931 if (locale2keyValues == null) { 1932 fillCache(); 1933 } 1934 // set up for the specific locale we are dealing with. 1935 // a small optimization 1936 String localeId = getLocaleID(); 1937 keyValues.clear(); 1938 for (Entry<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> localeMatcher : locale2keyValues 1939 .entrySet()) { 1940 if (localeMatcher.getKey().matches(localeId)) { 1941 keyValues.addAll(localeMatcher.getValue()); 1942 } 1943 } 1944 System.out.println("# Checking entries & adding:\t" + keyValues.size()); 1945 for (Map<ConfigKeys, ConfigMatch> entry : keyValues) { 1946 ConfigMatch action = entry.get(ConfigKeys.action); 1947 //ConfigMatch locale = entry.get(ConfigKeys.locale); 1948 ConfigMatch pathMatch = entry.get(ConfigKeys.path); 1949 ConfigMatch valueMatch = entry.get(ConfigKeys.value); 1950 ConfigMatch newPath = entry.get(ConfigKeys.new_path); 1951 ConfigMatch newValue = entry.get(ConfigKeys.new_value); 1952 switch (action.action) { 1953 // we add all the values up front 1954 case addNew: 1955 case add: 1956 if (pathMatch != null || valueMatch != null || newPath == null || newValue == null) { 1957 throw new IllegalArgumentException( 1958 "Bad arguments, must have " + 1959 "path==null, value=null, new_path!=null, new_value!=null:\n\t" 1960 + entry); 1961 } 1962 String newPathString = newPath.getPath(getResolved()); 1963 if (action.action == ConfigAction.add 1964 || cldrFileToFilter.getStringValue(newPathString) == null) { 1965 replace(newPathString, newPathString, newValue.exactMatch, "config"); 1966 } 1967 break; 1968 // we just check 1969 case replace: 1970 if ((pathMatch == null && valueMatch == null) || (newPath == null && newValue == null)) { 1971 throw new IllegalArgumentException( 1972 "Bad arguments, must have " + 1973 "(path!=null OR value=null) AND (new_path!=null OR new_value!=null):\n\t" 1974 + entry); 1975 } 1976 break; 1977 // For delete, we just check; we'll remove later 1978 case delete: 1979 if (newPath != null || newValue != null) { 1980 throw new IllegalArgumentException("Bad arguments, must have " + 1981 "newPath=null, newValue=null" 1982 + entry); 1983 } 1984 break; 1985 default: // fall through 1986 throw new IllegalArgumentException("Internal Error"); 1987 } 1988 } 1989 } 1990 1991 private void fillCache() { 1992 locale2keyValues = new LinkedHashMap<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>>(); 1993 String configFileName = options[KONFIG].value; 1994 FileProcessor myReader = new FileProcessor() { 1995 { 1996 doHash = false; 1997 } 1998 1999 @Override 2000 protected boolean handleLine(int lineCount, String line) { 2001 line = line.trim(); 2002 // if (line.isEmpty()) { 2003 // return true; 2004 // } 2005 String[] lineParts = line.split("\\s*;\\s*"); 2006 Map<ConfigKeys, ConfigMatch> keyValue = new EnumMap<ConfigKeys, ConfigMatch>( 2007 ConfigKeys.class); 2008 for (String linePart : lineParts) { 2009 int pos = linePart.indexOf('='); 2010 if (pos < 0) { 2011 throw new IllegalArgumentException(lineCount + ":\t No = in command: «" + linePart + "» in " + line); 2012 } 2013 ConfigKeys key = ConfigKeys.valueOf(linePart.substring(0, pos).trim()); 2014 if (keyValue.containsKey(key)) { 2015 throw new IllegalArgumentException("Must not have multiple keys: " + key); 2016 } 2017 String match = linePart.substring(pos + 1).trim(); 2018 keyValue.put(key, new ConfigMatch(key, match)); 2019 } 2020 final ConfigMatch locale = keyValue.get(ConfigKeys.locale); 2021 if (locale == null || keyValue.get(ConfigKeys.action) == null) { 2022 throw new IllegalArgumentException(); 2023 } 2024 2025 LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = locale2keyValues 2026 .get(locale); 2027 if (keyValues == null) { 2028 locale2keyValues.put(locale, 2029 keyValues = new LinkedHashSet<Map<ConfigKeys, ConfigMatch>>()); 2030 } 2031 keyValues.add(keyValue); 2032 return true; 2033 } 2034 }; 2035 myReader.process(CLDRModify.class, configFileName); 2036 } 2037 2038 @Override 2039 public void handlePath(String xpath) { 2040 // slow method; could optimize 2041 for (Map<ConfigKeys, ConfigMatch> entry : keyValues) { 2042 ConfigMatch pathMatch = entry.get(ConfigKeys.path); 2043 if (pathMatch != null && !pathMatch.matches(xpath)) { 2044 continue; 2045 } 2046 ConfigMatch valueMatch = entry.get(ConfigKeys.value); 2047 String value = cldrFileToFilter.getStringValue(xpath); 2048 if (valueMatch != null && !valueMatch.matches(value)) { 2049 continue; 2050 } 2051 ConfigMatch action = entry.get(ConfigKeys.action); 2052 switch (action.action) { 2053 case delete: 2054 remove(xpath, "config"); 2055 break; 2056 case replace: 2057 ConfigMatch newPath = entry.get(ConfigKeys.new_path); 2058 ConfigMatch newValue = entry.get(ConfigKeys.new_value); 2059 2060 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2061 String draft = ""; 2062 int loc = fullpath.indexOf("[@draft="); 2063 if (loc >= 0) { 2064 int loc2 = fullpath.indexOf(']', loc + 7); 2065 draft = fullpath.substring(loc, loc2 + 1); 2066 } 2067 2068 String modPath = ConfigMatch.getModified(pathMatch, xpath, newPath) + draft; 2069 String modValue = ConfigMatch.getModified(valueMatch, value, newValue); 2070 replace(xpath, modPath, modValue, "config"); 2071 } 2072 } 2073 } 2074 }); 2075 fixList.add('i', "fix Identical Children"); 2076 fixList.add('o', "check attribute validity"); 2077 2078 /** 2079 Goal is: if value in vxml is ^^^, then add ^^^ to trunk IFF 2080 (a) if there is no value in trunk 2081 (b) the value in trunk = bailey. 2082 */ 2083 fixList.add('^', "add inheritance-marked items from vxml to trunk", new CLDRFilter() { 2084 Factory VxmlFactory; 2085 final ArrayList<File> fileList = new ArrayList<>(); 2086 2087 @Override 2088 public void handleStart() { 2089 if (fileList.isEmpty()) { 2090 for (String top : Arrays.asList("common/", "seed/")) { 2091 //for (String leaf : Arrays.asList("main/", "annotations/")) { 2092 String leaf = sourceInput.contains("annotations") ? "annotations/" : "main/"; 2093 String key = top + leaf; 2094 fileList.add(new File(CLDRPaths.AUX_DIRECTORY + "voting/" + CLDRFile.GEN_VERSION + "/vxml/" + key)); 2095 } 2096 VxmlFactory = SimpleFactory.make(fileList.toArray(new File[fileList.size()]), ".*"); 2097 } 2098 2099 String localeID = cldrFileToFilter.getLocaleID(); 2100 2101 CLDRFile vxmlCommonMainFile; 2102 try { 2103 vxmlCommonMainFile = VxmlFactory.make(localeID, false); 2104 } catch (Exception e) { 2105 System.out.println("#ERROR: VXML file not found for " + localeID + " in " + fileList); 2106 return; 2107 } 2108 CLDRFile resolved = cldrFileToFilter; 2109 2110 if (!cldrFileToFilter.isResolved()) { 2111 resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 2112 } 2113 2114 for (String xpath : vxmlCommonMainFile) { 2115 if (xpath.contains("/language[@type=\"aa\"")) { 2116 int debug = 0; 2117 } 2118 String vxmlValue = vxmlCommonMainFile.getStringValue(xpath); 2119 if (vxmlValue == null) { 2120 continue; 2121 } 2122 if (!CldrUtility.INHERITANCE_MARKER.equals(vxmlValue)) { 2123 continue; 2124 } 2125 2126 String trunkValue = resolved.getStringValue(xpath); 2127 if (trunkValue != null) { 2128 String baileyValue = resolved.getBaileyValue(xpath, null, null); 2129 if (!trunkValue.equals(baileyValue)) { 2130 continue; 2131 } 2132 } 2133 // at this point, the vxmlValue is ^^^ and the trunk value is either null or == baileyValue 2134 2135 2136 // special hack to avoid combined locale names like //ldml/localeDisplayNames/languages/language[@type="en_AU"][@draft="contributed"] 2137 2138 // if (xpath.startsWith("//ldml/localeDisplayNames/languages/language[@type=") && xpath.contains("_")) { 2139 // continue; 2140 // } 2141 2142 String fullPath = resolved.getFullXPath(xpath); // get the draft status, etc. 2143 if (fullPath == null) { // debugging 2144 fullPath = vxmlCommonMainFile.getFullXPath(xpath); 2145 if (fullPath == null) { 2146 throw new ICUException("getFullXPath not working for " + localeID + ", " + xpath); 2147 } 2148 } 2149 add(fullPath, vxmlValue, "Add or replace by " + CldrUtility.INHERITANCE_MARKER); 2150 } 2151 } 2152 @Override 2153 public void handlePath(String xpath) { 2154 // Everything done in handleStart 2155 } 2156 }); 2157 2158 fixList.add('L', "fix logical groups by adding all the bailey values", new CLDRFilter() { 2159 Set<String> seen = new HashSet<>(); 2160 CLDRFile resolved; 2161 boolean skip; 2162 CoverageLevel2 coverageLeveler; 2163 2164 @Override 2165 public void handleStart() { 2166 seen.clear(); 2167 resolved = getResolved(); 2168 skip = false; 2169 coverageLeveler = null; 2170 2171 String localeID = cldrFileToFilter.getLocaleID(); 2172 LanguageTagParser ltp = new LanguageTagParser().set(localeID); 2173 if (!ltp.getRegion().isEmpty() || !ltp.getVariants().isEmpty()) { 2174 skip = true; 2175 } else { 2176 coverageLeveler = CoverageLevel2.getInstance(localeID); 2177 } 2178 } 2179 @Override 2180 public void handlePath(String xpath) { 2181 if (skip 2182 || seen.contains(xpath) 2183 || coverageLeveler.getLevel(xpath) == Level.COMPREHENSIVE) { 2184 return; 2185 } 2186 Set<String> paths = LogicalGrouping.getPaths(cldrFileToFilter, xpath); 2187 if (paths.size() < 2) { 2188 return; 2189 } 2190 Set<String> needed = new LinkedHashSet<>(); 2191 for (String path2 : paths) { 2192 if (path2.equals(xpath)) { 2193 continue; 2194 } 2195 if (cldrFileToFilter.isHere(path2)) { 2196 continue; 2197 } 2198 if (LogicalGrouping.isOptional(cldrFileToFilter, path2)) { 2199 continue; 2200 } 2201 // ok, we have a path missing a value 2202 needed.add(path2); 2203 } 2204 if (needed.isEmpty()) { 2205 return; 2206 } 2207 // we need at least one value 2208 2209 // flesh out by adding a bailey value 2210 // TODO resolve the draft status in a better way 2211 // For now, get the lowest draft status, and we'll reset everything to that. 2212 2213 DraftStatus worstStatus = DraftStatus.contributed; // don't ever add an approved. 2214 for (String path2 : paths) { 2215 XPathParts parts = XPathParts.getFrozenInstance(path2); 2216 String rawStatus = parts.getAttributeValue(-1, "draft"); 2217 if (rawStatus == null) { 2218 continue; 2219 } 2220 DraftStatus df = DraftStatus.forString(rawStatus); 2221 if (df.compareTo(worstStatus) < 0) { 2222 worstStatus = df; 2223 } 2224 } 2225 2226 for (String path2 : paths) { 2227 String fullPath = resolved.getFullXPath(path2); 2228 String value = resolved.getStringValue(path2); 2229 if (LogicalGrouping.isOptional(cldrFileToFilter, path2) 2230 && !cldrFileToFilter.isHere(path2)) { 2231 continue; 2232 } 2233 2234 XPathParts fullparts = XPathParts.getInstance(fullPath); // not frozen, for setAttribute 2235 fullparts.setAttribute(-1, "draft", worstStatus.toString()); 2236 replace(fullPath, fullparts.toString(), value, "Fleshing out bailey to " + worstStatus); 2237 } 2238 seen.addAll(paths); 2239 } 2240 }); 2241 } 2242 getLast2Dirs(File sourceDir1)2243 public static String getLast2Dirs(File sourceDir1) { 2244 String[] pathElements = sourceDir1.toString().split("/"); 2245 return pathElements[pathElements.length-2] + "/" + pathElements[pathElements.length-1] + "/"; 2246 } 2247 2248 2249 // references="http://www.stat.fi/tk/tt/luokitukset/lk/kieli_02.html" 2250 2251 private static class ValuePair { 2252 String value; 2253 String fullxpath; 2254 } 2255 2256 /** 2257 * Find the set of xpaths that 2258 * (a) have all the same values (if present) in the children 2259 * (b) are absent in the parent, 2260 * (c) are different than what is in the fully resolved parent 2261 * and add them. 2262 */ fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements)2263 static void fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements) { 2264 String key = k.getLocaleID(); 2265 if (key.equals("root")) return; 2266 Set<String> availableChildren = cldrFactory.getAvailableWithParent(key, true); 2267 if (availableChildren.size() == 0) return; 2268 Set<String> skipPaths = new HashSet<String>(); 2269 Map<String, ValuePair> haveSameValues = new TreeMap<String, ValuePair>(); 2270 CLDRFile resolvedFile = cldrFactory.make(key, true); 2271 // get only those paths that are not in "root" 2272 CollectionUtilities.addAll(resolvedFile.iterator(), skipPaths); 2273 2274 // first, collect all the paths 2275 for (String locale : availableChildren) { 2276 if (locale.indexOf("POSIX") >= 0) continue; 2277 CLDRFile item = cldrFactory.make(locale, false); 2278 for (String xpath : item) { 2279 if (skipPaths.contains(xpath)) continue; 2280 // skip certain elements 2281 if (xpath.indexOf("/identity") >= 0) continue; 2282 if (xpath.startsWith("//ldml/numbers/currencies/currency")) continue; 2283 if (xpath.startsWith("//ldml/dates/timeZoneNames/metazone[")) continue; 2284 if (xpath.indexOf("[@alt") >= 0) continue; 2285 if (xpath.indexOf("/alias") >= 0) continue; 2286 2287 // must be string vale 2288 ValuePair v1 = new ValuePair(); 2289 v1.value = item.getStringValue(xpath); 2290 v1.fullxpath = item.getFullXPath(xpath); 2291 2292 ValuePair vAlready = haveSameValues.get(xpath); 2293 if (vAlready == null) { 2294 haveSameValues.put(xpath, v1); 2295 } else if (!v1.value.equals(vAlready.value) || !v1.fullxpath.equals(vAlready.fullxpath)) { 2296 skipPaths.add(xpath); 2297 haveSameValues.remove(xpath); 2298 } 2299 } 2300 } 2301 // at this point, haveSameValues is all kosher, so add items 2302 for (String xpath : haveSameValues.keySet()) { 2303 ValuePair v = haveSameValues.get(xpath); 2304 // if (v.value.equals(resolvedFile.getStringValue(xpath)) 2305 // && v.fullxpath.equals(resolvedFile.getFullXPath(xpath))) continue; 2306 replacements.add(v.fullxpath, v.value); 2307 } 2308 } 2309 fixAltProposed()2310 static void fixAltProposed() { 2311 throw new IllegalArgumentException(); 2312 // throw out any alt=proposed values that are the same as the main 2313 // HashSet toRemove = new HashSet(); 2314 // for (Iterator it = dataSource.iterator(); it.hasNext();) { 2315 // String cpath = (String) it.next(); 2316 // if (cpath.indexOf("[@alt=") < 0) continue; 2317 // String cpath2 = getNondraftNonaltXPath(cpath); 2318 // String value = getStringValue(cpath); 2319 // String value2 = getStringValue(cpath2); 2320 // if (!value.equals(value2)) continue; 2321 // // have to worry about cases where the info is not in the value!! 2322 // //fix this; values are the same!! 2323 // String fullpath = getNondraftNonaltXPath(getFullXPath(cpath)); 2324 // String fullpath2 = getNondraftNonaltXPath(getFullXPath(cpath2)); 2325 // if (!fullpath.equals(fullpath2)) continue; 2326 // Log.logln(getLocaleID() + "\tRemoving redundant alternate: " + getFullXPath(cpath) + " ;\t" + value); 2327 // Log.logln("\t\tBecause of: " + getFullXPath(cpath2) + " ;\t" + value2); 2328 // if (getFullXPath(cpath2).indexOf("[@references=") >= 0) { 2329 // System.out.println("Warning: removing references: " + getFullXPath(cpath2)); 2330 // } 2331 // toRemove.add(cpath); 2332 // } 2333 // dataSource.removeAll(toRemove); 2334 2335 } 2336 2337 /** 2338 * Perform various fixes 2339 * TODO add options to pick which one. 2340 * 2341 * @param options 2342 * @param config 2343 * @param cldrFactory 2344 */ fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory)2345 private static void fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory) { 2346 2347 // TODO before modifying, make sure that it is fully resolved. 2348 // then minimize against the NEW parents 2349 2350 Set<String> removal = new TreeSet<String>(k.getComparator()); 2351 CLDRFile replacements = SimpleFactory.makeFile("temp"); 2352 fixList.setFile(k, inputOptions, cldrFactory, removal, replacements); 2353 2354 for (String xpath : k) { 2355 fixList.handlePath(xpath); 2356 } 2357 fixList.handleEnd(); 2358 2359 // remove bad attributes 2360 2361 if (inputOptions.indexOf('v') >= 0) { 2362 CLDRTest.checkAttributeValidity(k, null, removal); 2363 } 2364 2365 // raise identical elements 2366 2367 if (inputOptions.indexOf('i') >= 0) { 2368 fixIdenticalChildren(cldrFactory, k, replacements); 2369 } 2370 2371 // now do the actions we collected 2372 2373 if (SHOW_DETAILS) { 2374 if (removal.size() != 0 || !replacements.isEmpty()) { 2375 if (!removal.isEmpty()) { 2376 System.out.println("Removals:"); 2377 for (String path : removal) { 2378 System.out.println(path + " =\t " + k.getStringValue(path)); 2379 } 2380 } 2381 if (!replacements.isEmpty()) { 2382 System.out.println("Additions/Replacements:"); 2383 System.out.println(replacements.toString().replaceAll("\u00A0", "<NBSP>")); 2384 } 2385 } 2386 } 2387 if (removal.size() != 0) { 2388 k.removeAll(removal, COMMENT_REMOVALS); 2389 } 2390 k.putAll(replacements, CLDRFile.MERGE_REPLACE_MINE); 2391 } 2392 2393 /** 2394 * Internal 2395 */ testJavaSemantics()2396 public static void testJavaSemantics() { 2397 Collator caseInsensitive = Collator.getInstance(ULocale.ROOT); 2398 caseInsensitive.setStrength(Collator.SECONDARY); 2399 Set<String> setWithCaseInsensitive = new TreeSet<String>(caseInsensitive); 2400 setWithCaseInsensitive.addAll(Arrays.asList(new String[] { "a", "b", "c" })); 2401 Set<String> plainSet = new TreeSet<String>(); 2402 plainSet.addAll(Arrays.asList(new String[] { "a", "b", "B" })); 2403 System.out.println("S1 equals S2?\t" + setWithCaseInsensitive.equals(plainSet)); 2404 System.out.println("S2 equals S1?\t" + plainSet.equals(setWithCaseInsensitive)); 2405 setWithCaseInsensitive.removeAll(plainSet); 2406 System.out.println("S1 removeAll S2 is empty?\t" + setWithCaseInsensitive.isEmpty()); 2407 } 2408 2409 // <localizedPatternChars>GyMdkHmsSEDFwWahKzYeugAZ</localizedPatternChars> 2410 /* 2411 * <localizedPattern> 2412 * <map type="era">G</map> 2413 * <map type="year">y</map> 2414 * <map type="year_iso">Y</map> 2415 * <map type="year_uniform">u</map> 2416 * <map type="month">M</map> 2417 * <map type="week_in_year">w</map> 2418 * <map type="week_in_month">W</map> 2419 * <map type="day">d</map> 2420 * <map type="day_of_year">D</map> 2421 * <map type="day_of_week_in_month">F</map> 2422 * <map type="day_julian">g</map> 2423 * <map type="day_of_week">E</map> 2424 * <map type="day_of_week_local">e</map> 2425 * <map type="period_in_day">a</map> 2426 * <map type="hour_1_12">h</map> 2427 * <map type="hour_0_23">H</map> 2428 * <map type="hour_0_11">K</map> 2429 * <map type="hour_1_24">k</map> 2430 * <map type="minute">m</map> 2431 * <map type="second">s</map> 2432 * <map type="fractions_of_second">S</map> 2433 * <map type="milliseconds_in_day">A</map> 2434 * <map type="timezone">z</map> 2435 * <map type="timezone_gmt">Z</map> 2436 * </localizedPattern> 2437 */ 2438 2439 } 2440