1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004-2013, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import com.google.common.base.Joiner; 10 import com.google.common.base.Splitter; 11 import com.ibm.icu.dev.util.UOption; 12 import com.ibm.icu.impl.Utility; 13 import com.ibm.icu.text.Collator; 14 import com.ibm.icu.text.DateTimePatternGenerator; 15 import com.ibm.icu.text.DateTimePatternGenerator.VariableField; 16 import com.ibm.icu.text.Normalizer; 17 import com.ibm.icu.text.NumberFormat; 18 import com.ibm.icu.text.UnicodeSet; 19 import com.ibm.icu.util.ICUException; 20 import com.ibm.icu.util.Output; 21 import com.ibm.icu.util.ULocale; 22 import java.io.File; 23 import java.io.PrintWriter; 24 import java.util.ArrayList; 25 import java.util.Arrays; 26 import java.util.EnumMap; 27 import java.util.HashMap; 28 import java.util.HashSet; 29 import java.util.Iterator; 30 import java.util.LinkedHashMap; 31 import java.util.LinkedHashSet; 32 import java.util.List; 33 import java.util.Map; 34 import java.util.Map.Entry; 35 import java.util.Objects; 36 import java.util.Set; 37 import java.util.TreeMap; 38 import java.util.TreeSet; 39 import java.util.regex.Matcher; 40 import java.util.regex.Pattern; 41 import org.unicode.cldr.draft.FileUtilities; 42 import org.unicode.cldr.test.CLDRTest; 43 import org.unicode.cldr.test.CoverageLevel2; 44 import org.unicode.cldr.test.DisplayAndInputProcessor; 45 import org.unicode.cldr.test.QuickCheck; 46 import org.unicode.cldr.test.SubmissionLocales; 47 import org.unicode.cldr.util.Annotations; 48 import org.unicode.cldr.util.CLDRConfig; 49 import org.unicode.cldr.util.CLDRFile; 50 import org.unicode.cldr.util.CLDRFile.DraftStatus; 51 import org.unicode.cldr.util.CLDRFile.ExemplarType; 52 import org.unicode.cldr.util.CLDRFile.NumberingSystem; 53 import org.unicode.cldr.util.CLDRFile.WinningChoice; 54 import org.unicode.cldr.util.CLDRLocale; 55 import org.unicode.cldr.util.CLDRPaths; 56 import org.unicode.cldr.util.CLDRTool; 57 import org.unicode.cldr.util.CldrUtility; 58 import org.unicode.cldr.util.DateTimeCanonicalizer; 59 import org.unicode.cldr.util.DateTimeCanonicalizer.DateTimePatternType; 60 import org.unicode.cldr.util.DowngradePaths; 61 import org.unicode.cldr.util.DtdData; 62 import org.unicode.cldr.util.DtdType; 63 import org.unicode.cldr.util.Factory; 64 import org.unicode.cldr.util.FileProcessor; 65 import org.unicode.cldr.util.GlossonymConstructor; 66 import org.unicode.cldr.util.LanguageTagParser; 67 import org.unicode.cldr.util.Level; 68 import org.unicode.cldr.util.LocaleIDParser; 69 import org.unicode.cldr.util.LocaleNames; 70 import org.unicode.cldr.util.LogicalGrouping; 71 import org.unicode.cldr.util.PathChecker; 72 import org.unicode.cldr.util.PatternCache; 73 import org.unicode.cldr.util.RegexLookup; 74 import org.unicode.cldr.util.RegexUtilities; 75 import org.unicode.cldr.util.SimpleFactory; 76 import org.unicode.cldr.util.StandardCodes; 77 import org.unicode.cldr.util.StringId; 78 import org.unicode.cldr.util.SupplementalDataInfo; 79 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 80 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 81 import org.unicode.cldr.util.VoteResolver; 82 import org.unicode.cldr.util.XMLSource; 83 import org.unicode.cldr.util.XPathParts; 84 import org.unicode.cldr.util.XPathParts.Comments; 85 import org.unicode.cldr.util.XPathParts.Comments.CommentType; 86 87 /** 88 * Tool for applying modifications to the CLDR files. Use -h to see the options. 89 * 90 * <p>There are some environment variables that can be used with the program <br> 91 * -DSHOW_FILES=<anything> shows all create/open of files. 92 */ 93 @CLDRTool( 94 alias = "modify", 95 description = 96 "Tool for applying modifications to the CLDR files. Use -h to see the options.") 97 public class CLDRModify { 98 private static final Splitter SPLIT_ON_SEMI = Splitter.onPattern("\\s*;\\s+"); 99 static final String DEBUG_PATHS = null; // ".*currency.*"; 100 static final boolean COMMENT_REMOVALS = false; // append removals as comments 101 static final UnicodeSet whitespace = new UnicodeSet("[:whitespace:]").freeze(); 102 static final UnicodeSet HEX = new UnicodeSet("[a-fA-F0-9]").freeze(); 103 private static final DtdData dtdData = DtdData.getInstance(DtdType.ldml); 104 105 // TODO make this into input option. 106 107 enum ConfigKeys { 108 action, 109 locale, 110 path, 111 value, 112 new_path, 113 new_value 114 } 115 116 enum ConfigAction { 117 /** Remove a path */ 118 delete, 119 /** Add a path/value */ 120 add, 121 /** Replace a path/value. Equals 'add' but tests selected paths */ 122 replace, 123 /** Add a a path/value. Equals 'add' but tests that path did NOT exist */ 124 addNew, 125 } 126 127 static final class ConfigMatch { 128 final String exactMatch; 129 final Matcher regexMatch; // doesn't have to be thread safe 130 final ConfigAction action; 131 final boolean hexPath; 132 ConfigMatch(ConfigKeys key, String match)133 public ConfigMatch(ConfigKeys key, String match) { 134 if (key == ConfigKeys.action) { 135 exactMatch = null; 136 regexMatch = null; 137 action = ConfigAction.valueOf(match); 138 hexPath = false; 139 } else if (match.startsWith("/") && match.endsWith("/")) { 140 if (key != ConfigKeys.locale && key != ConfigKeys.path && key != ConfigKeys.value) { 141 throw new IllegalArgumentException("Regex only allowed for old path/value."); 142 } 143 exactMatch = null; 144 regexMatch = 145 PatternCache.get( 146 match.substring(1, match.length() - 1) 147 .replace("[@", "\\[@")) 148 .matcher(""); 149 action = null; 150 hexPath = false; 151 } else { 152 exactMatch = match; 153 regexMatch = null; 154 action = null; 155 hexPath = 156 (key == ConfigKeys.new_path || key == ConfigKeys.path) 157 && HEX.containsAll(match); 158 } 159 } 160 matches(String other)161 public boolean matches(String other) { 162 if (exactMatch == null) { 163 return regexMatch.reset(other).find(); 164 } else if (hexPath) { 165 // convert path to id for comparison 166 return exactMatch.equals(StringId.getHexId(other)); 167 } else { 168 return exactMatch.equals(other); 169 } 170 } 171 172 @Override toString()173 public String toString() { 174 return action != null 175 ? action.toString() 176 : exactMatch == null 177 ? regexMatch.toString() 178 : hexPath ? "*" + exactMatch + "*" : exactMatch; 179 } 180 getPath(CLDRFile cldrFileToFilter)181 public String getPath(CLDRFile cldrFileToFilter) { 182 if (!hexPath) { 183 return exactMatch; 184 } 185 // ensure that we have all the possible paths cached 186 String path = StringId.getStringFromHexId(exactMatch); 187 if (path == null) { 188 for (String eachPath : cldrFileToFilter.fullIterable()) { 189 StringId.getHexId(eachPath); 190 } 191 path = StringId.getStringFromHexId(exactMatch); 192 if (path == null) { 193 throw new IllegalArgumentException("No path for hex id: " + exactMatch); 194 } 195 } 196 return path; 197 } 198 getModified( ConfigMatch valueMatch, String value, ConfigMatch newValue)199 public static String getModified( 200 ConfigMatch valueMatch, String value, ConfigMatch newValue) { 201 if (valueMatch == null) { // match anything 202 if (newValue != null && newValue.exactMatch != null) { 203 return newValue.exactMatch; 204 } 205 if (value != null) { 206 return value; 207 } 208 throw new IllegalArgumentException("Can't have both old and new be null."); 209 } else if (valueMatch.exactMatch == null) { // regex 210 if (newValue == null || newValue.exactMatch == null) { 211 throw new IllegalArgumentException("Can't have regex without replacement."); 212 } 213 StringBuffer buffer = new StringBuffer(); 214 valueMatch.regexMatch.appendReplacement(buffer, newValue.exactMatch); 215 return buffer.toString(); 216 } else { 217 return newValue.exactMatch != null ? newValue.exactMatch : value; 218 } 219 } 220 } 221 222 static FixList fixList = new FixList(); 223 224 private static final int HELP1 = 0, 225 HELP2 = 1, 226 SOURCEDIR = 2, 227 DESTDIR = 3, 228 MATCH = 4, 229 JOIN = 5, 230 MINIMIZE = 6, 231 FIX = 7, 232 JOIN_ARGS = 8, 233 VET_ADD = 9, 234 RESOLVE = 10, 235 PATH = 11, 236 USER = 12, 237 ALL_DIRS = 13, 238 CHECK = 14, 239 KONFIG = 15, 240 RETAIN = 16; 241 242 private static final UOption[] options = { 243 UOption.HELP_H(), 244 UOption.HELP_QUESTION_MARK(), 245 UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY), 246 UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "cldrModify/"), 247 UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 248 UOption.create("join", 'j', UOption.OPTIONAL_ARG), 249 UOption.create("minimize", 'r', UOption.NO_ARG), 250 UOption.create("fix", 'f', UOption.OPTIONAL_ARG), 251 UOption.create("join-args", 'i', UOption.OPTIONAL_ARG), 252 UOption.create("vet", 'v', UOption.OPTIONAL_ARG), 253 UOption.create("resolve", 'z', UOption.OPTIONAL_ARG), 254 UOption.create("path", 'p', UOption.REQUIRES_ARG), 255 UOption.create("user", 'u', UOption.REQUIRES_ARG), 256 UOption.create("all", 'a', UOption.REQUIRES_ARG), 257 UOption.create("check", 'c', UOption.NO_ARG), 258 UOption.create("konfig", 'k', UOption.OPTIONAL_ARG).setDefault("modify_config.txt"), 259 UOption.create("Retain", 'R', UOption.NO_ARG), 260 }; 261 262 private static final UnicodeSet allMergeOptions = new UnicodeSet("[rcd]"); 263 264 static final String HELP_TEXT1 = 265 "Use the following options" 266 + XPathParts.NEWLINE 267 + "-h or -?\t for this message" 268 + XPathParts.NEWLINE 269 + "-" 270 + options[SOURCEDIR].shortName 271 + "\t source directory. Default = -s" 272 + CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY) 273 + XPathParts.NEWLINE 274 + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\" 275 + XPathParts.NEWLINE 276 + "-" 277 + options[DESTDIR].shortName 278 + "\t destination directory. Default = -d" 279 + CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/") 280 + XPathParts.NEWLINE 281 + "-m<regex>\t to restrict the locales to what matches <regex>" 282 + XPathParts.NEWLINE 283 + "-j<merge_dir>/X'\t to merge two sets of files together (from <source_dir>/X and <merge_dir>/X', " 284 + XPathParts.NEWLINE 285 + "\twhere * in X' is replaced by X)." 286 + XPathParts.NEWLINE 287 + "\tExample:-jC:\\Unicode-CVS2\\cldr\\dropbox\\to_be_merged\\missing\\missing_*" 288 + XPathParts.NEWLINE 289 + "-i\t merge arguments:" 290 + XPathParts.NEWLINE 291 + "\tr\t replace contents (otherwise new data will be draft=\"unconfirmed\")" 292 + XPathParts.NEWLINE 293 + "\tc\t ignore comments in <merge_dir> files" 294 + XPathParts.NEWLINE 295 + "-v\t incorporate vetting information, and generate diff files." 296 + XPathParts.NEWLINE 297 + "-z\t generate resolved files" 298 + XPathParts.NEWLINE 299 + "-p\t set path for -fx" 300 + XPathParts.NEWLINE 301 + "-u\t set user for -fb" 302 + XPathParts.NEWLINE 303 + "-a\t pattern: recurse over all subdirectories that match pattern" 304 + XPathParts.NEWLINE 305 + "-c\t check that resulting xml files are valid. Requires that a dtd directory be copied to the output directory, in the appropriate location." 306 + XPathParts.NEWLINE 307 + "-k\t config_file\twith -fk perform modifications according to what is in the config file. For format details, see:" 308 + XPathParts.NEWLINE 309 + "\t\thttp://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config." 310 + XPathParts.NEWLINE 311 + "-R\t retain unchanged files" 312 + XPathParts.NEWLINE 313 + "-f\t to perform various fixes on the files (add following arguments to specify which ones, eg -fxi)" 314 + XPathParts.NEWLINE; 315 316 static final String HELP_TEXT2 = 317 "Note: A set of bat files are also generated in <dest_dir>/diff. They will invoke a comparison program on the results." 318 + XPathParts.NEWLINE; 319 private static final boolean SHOW_DETAILS = false; 320 private static boolean SHOW_PROCESSING = false; 321 322 static String sourceInput; 323 324 /** Picks options and executes. Use -h to see options. */ main(String[] args)325 public static void main(String[] args) throws Exception { 326 long startTime = System.currentTimeMillis(); 327 UOption.parseArgs(args, options); 328 if (options[HELP1].doesOccur || options[HELP2].doesOccur) { 329 System.out.println(HELP_TEXT1 + fixList.showHelp() + HELP_TEXT2); 330 return; 331 } 332 checkSuboptions(FIX, fixList.getOptions()); 333 checkSuboptions(JOIN_ARGS, allMergeOptions); 334 String recurseOnDirectories = options[ALL_DIRS].value; 335 boolean makeResolved = options[RESOLVE].doesOccur; // Utility.COMMON_DIRECTORY + "main/"; 336 337 sourceInput = options[SOURCEDIR].value; 338 String destInput = options[DESTDIR].value; 339 if (recurseOnDirectories != null) { 340 sourceInput = removeSuffix(sourceInput, "main/", "main"); 341 destInput = removeSuffix(destInput, "main/", "main"); 342 } 343 String sourceDirBase = 344 CldrUtility.checkValidDirectory(sourceInput); // Utility.COMMON_DIRECTORY + "main/"; 345 String targetDirBase = 346 CldrUtility.checkValidDirectory(destInput); // Utility.GEN_DIRECTORY + "main/"; 347 System.out.format("Source:\t%s\n", sourceDirBase); 348 System.out.format("Target:\t%s\n", targetDirBase); 349 350 boolean retainUnchangedFiles = options[RETAIN].doesOccur; 351 352 Set<String> dirSet = new TreeSet<>(); 353 if (recurseOnDirectories == null) { 354 dirSet.add(""); 355 } else { 356 String[] subdirs = new File(sourceDirBase).list(); 357 Matcher subdirMatch = PatternCache.get(recurseOnDirectories).matcher(""); 358 for (String subdir : subdirs) { 359 if (!subdirMatch.reset(subdir).find()) continue; 360 dirSet.add(subdir + "/"); 361 } 362 } 363 for (String dir : dirSet) { 364 String sourceDir = sourceDirBase + dir; 365 if (!new File(sourceDir).isDirectory()) continue; 366 String targetDir = targetDirBase + dir; 367 try { 368 Factory cldrFactoryForAvailable = Factory.make(sourceDir, ".*"); 369 Factory cldrFactory = cldrFactoryForAvailable; 370 // Need root.xml or else cannot load resolved locales. 371 /* 372 * TODO: when seed and common are merged per https://unicode-org.atlassian.net/browse/CLDR-6396 373 * this will become moot; in the meantime it became necessary to do this not only for "Q" 374 * but also for "p" per https://unicode-org.atlassian.net/browse/CLDR-15054 375 */ 376 if (sourceDir.endsWith("/seed/annotations/") && "Q".equals(options[FIX].value)) { 377 System.err.println( 378 "Correcting factory so that annotations can load, including " 379 + CLDRPaths.ANNOTATIONS_DIRECTORY); 380 final File[] paths = { 381 new File(sourceDir), 382 new File(CLDRPaths.ANNOTATIONS_DIRECTORY) // common/annotations - to load 383 // root.xml 384 }; 385 cldrFactory = SimpleFactory.make(paths, ".*"); 386 } else if (sourceDir.contains("/seed/") && "p".equals(options[FIX].value)) { 387 System.err.println("Correcting factory to enable getting root"); 388 final File[] paths = { 389 new File(sourceDir), 390 new File(CLDRPaths.ANNOTATIONS_DIRECTORY), // to load 391 // common/annotations/root.xml 392 new File(CLDRPaths.MAIN_DIRECTORY) // to load common/main/root.xml 393 }; 394 cldrFactory = SimpleFactory.make(paths, ".*"); 395 } else { 396 System.err.println("!!! " + sourceDir); 397 } 398 399 if (options[VET_ADD].doesOccur) { 400 VettingAdder va = new VettingAdder(options[VET_ADD].value); 401 va.showFiles(cldrFactory, targetDir); 402 return; 403 } 404 405 Factory mergeFactory = null; 406 407 String join_prefix = "", join_postfix = ""; 408 if (options[JOIN].doesOccur) { 409 String mergeDir = options[JOIN].value; 410 File temp = new File(mergeDir); 411 mergeDir = 412 CldrUtility.checkValidDirectory( 413 temp.getParent() + File.separator); // Utility.COMMON_DIRECTORY 414 // + "main/"; 415 String filename = temp.getName(); 416 join_prefix = join_postfix = ""; 417 int pos = filename.indexOf("*"); 418 if (pos >= 0) { 419 join_prefix = filename.substring(0, pos); 420 join_postfix = filename.substring(pos + 1); 421 } 422 mergeFactory = Factory.make(mergeDir, ".*"); 423 } 424 Set<String> locales = new TreeSet<>(cldrFactoryForAvailable.getAvailable()); 425 if (mergeFactory != null) { 426 Set<String> temp = new TreeSet<>(mergeFactory.getAvailable()); 427 Set<String> locales3 = new TreeSet<>(); 428 for (String locale : temp) { 429 if (!locale.startsWith(join_prefix) || !locale.endsWith(join_postfix)) 430 continue; 431 locales3.add( 432 locale.substring( 433 join_prefix.length(), 434 locale.length() - join_postfix.length())); 435 } 436 locales.retainAll(locales3); 437 System.out.println("Merging: " + locales3); 438 } 439 new CldrUtility.MatcherFilter(options[MATCH].value).retainAll(locales); 440 441 fixList.handleSetup(); 442 443 long lastTime = System.currentTimeMillis(); 444 int spin = 0; 445 System.out.format(locales.size() + " Locales:\t%s\n", locales.toString()); 446 int totalRemoved = 0; 447 for (String test : locales) { 448 spin++; 449 if (SHOW_PROCESSING) { 450 long now = System.currentTimeMillis(); 451 if (now - lastTime > 5000) { 452 System.out.println( 453 " .. still processing " 454 + test 455 + " [" 456 + spin 457 + "/" 458 + locales.size() 459 + "]"); 460 lastTime = now; 461 } 462 } 463 464 // TODO parameterize the directory and filter 465 466 final CLDRFile originalCldrFile = cldrFactory.make(test, makeResolved); 467 CLDRFile k = originalCldrFile.cloneAsThawed(); 468 if (DEBUG_PATHS != null) { 469 System.out.println("Debug1 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 470 } 471 if (mergeFactory != null) { 472 int mergeOption = CLDRFile.MERGE_ADD_ALTERNATE; 473 CLDRFile toMergeIn = 474 mergeFactory 475 .make(join_prefix + test + join_postfix, false) 476 .cloneAsThawed(); 477 if (toMergeIn != null) { 478 if (options[JOIN_ARGS].doesOccur) { 479 if (options[JOIN_ARGS].value.indexOf("r") >= 0) 480 mergeOption = CLDRFile.MERGE_REPLACE_MY_DRAFT; 481 if (options[JOIN_ARGS].value.indexOf("d") >= 0) 482 mergeOption = CLDRFile.MERGE_REPLACE_MINE; 483 if (options[JOIN_ARGS].value.indexOf("c") >= 0) 484 toMergeIn.clearComments(); 485 if (options[JOIN_ARGS].value.indexOf("x") >= 0) 486 removePosix(toMergeIn); 487 } 488 toMergeIn.makeDraft(DraftStatus.contributed); 489 k.putAll(toMergeIn, mergeOption); 490 } 491 // special fix 492 k.removeComment( 493 " The following are strings that are not found in the locale (currently), but need valid translations for localizing timezones. "); 494 } 495 if (DEBUG_PATHS != null) { 496 System.out.println("Debug2 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 497 } 498 if (options[FIX].doesOccur) { 499 fix(k, options[FIX].value, options[KONFIG].value, cldrFactory); 500 System.out.println("#TOTAL\tItems changed: " + fixList.totalChanged); 501 } 502 if (DEBUG_PATHS != null) { 503 System.out.println("Debug3 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 504 } 505 if (DEBUG_PATHS != null) { 506 System.out.println("Debug4 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 507 } 508 509 PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, test + ".xml"); 510 String testPath = 511 "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]"; 512 if (false) { 513 System.out.println("Printing Raw File:"); 514 testPath = 515 "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/alias"; 516 System.out.println(k.getStringValue(testPath)); 517 TreeSet s = new TreeSet(); 518 k.forEach(s::add); 519 520 System.out.println(k.getStringValue(testPath)); 521 Set orderedSet = new TreeSet(k.getComparator()); 522 k.forEach(orderedSet::add); 523 for (Iterator it3 = orderedSet.iterator(); it3.hasNext(); ) { 524 String path = (String) it3.next(); 525 if (path.equals(testPath)) { 526 System.out.println("huh?"); 527 } 528 String value = k.getStringValue(path); 529 String fullpath = k.getFullXPath(path); 530 System.out.println("\t=\t" + fullpath); 531 System.out.println("\t=\t" + value); 532 } 533 System.out.println("Done Printing Raw File:"); 534 } 535 536 k.write(pw); 537 pw.close(); 538 539 File oldFile = new File(sourceDir, test + ".xml"); 540 File newFile = new File(targetDir, test + ".xml"); 541 if (!retainUnchangedFiles 542 && !oldFile.equals( 543 newFile) // only skip if the source & target are different. 544 && equalsSkippingCopyright(oldFile, newFile)) { 545 newFile.delete(); 546 continue; 547 } 548 549 if (options[CHECK].doesOccur) { 550 QuickCheck.check(new File(targetDir, test + ".xml")); 551 } 552 } 553 if (totalSkeletons.size() != 0) { 554 System.out.println("Total Skeletons" + totalSkeletons); 555 } 556 if (totalRemoved > 0) { 557 System.out.println("# Removed:\t" + totalRemoved); 558 } 559 } finally { 560 fixList.handleCleanup(); 561 System.out.println( 562 "Done -- Elapsed time: " 563 + ((System.currentTimeMillis() - startTime) / 60000.0) 564 + " minutes"); 565 } 566 } 567 } 568 equalsSkippingCopyright(File oldFile, File newFile)569 public static boolean equalsSkippingCopyright(File oldFile, File newFile) { 570 Iterator<String> oldIterator = FileUtilities.in(oldFile).iterator(); 571 Iterator<String> newIterator = FileUtilities.in(newFile).iterator(); 572 while (true) { 573 boolean oldHasNext = oldIterator.hasNext(); 574 boolean newHasNext = newIterator.hasNext(); 575 if (oldHasNext != newHasNext) { 576 return false; 577 } 578 if (!oldHasNext) { 579 return true; 580 } 581 String oldLine = oldIterator.next(); 582 String newLine = newIterator.next(); 583 if (!oldLine.equals(newLine)) { 584 if (oldLine.startsWith("<!-- Copyright ©") 585 && newLine.startsWith("<!-- Copyright ©")) { 586 continue; 587 } 588 return false; 589 } 590 } 591 } 592 removeSuffix(String value, String... suffices)593 private static String removeSuffix(String value, String... suffices) { 594 for (String suffix : suffices) { 595 if (value.endsWith(suffix)) { 596 return value.substring(0, value.length() - suffix.length()); 597 } 598 } 599 return value; 600 } 601 602 /* 603 * Use the coverage to determine what we should keep in the case of a locale just below root. 604 */ 605 606 static class RetainWhenMinimizing implements CLDRFile.RetentionTest { 607 private CLDRFile file; 608 private CLDRLocale c; 609 private boolean isArabicSublocale; 610 setParentFile(CLDRFile file)611 public RetainWhenMinimizing setParentFile(CLDRFile file) { 612 this.file = file; 613 this.c = CLDRLocale.getInstance(file.getLocaleIDFromIdentity()); 614 isArabicSublocale = "ar".equals(c.getLanguage()) && !"001".equals(c.getCountry()); 615 return this; 616 } 617 618 @Override getRetention(String path)619 public Retention getRetention(String path) { 620 if (path.startsWith("//ldml/identity/")) { 621 return Retention.RETAIN; 622 } 623 // special case for Arabic 624 if (isArabicSublocale && path.startsWith("//ldml/numbers/defaultNumberingSystem")) { 625 return Retention.RETAIN; 626 } 627 String localeId = file.getSourceLocaleID(path, null); 628 if ((c.isLanguageLocale() || c.equals(CLDRLocale.getInstance("pt_PT"))) 629 && (XMLSource.ROOT_ID.equals(localeId) 630 || XMLSource.CODE_FALLBACK_ID.equals(localeId))) { 631 return Retention.RETAIN; 632 } 633 return Retention.RETAIN_IF_DIFFERENT; 634 } 635 } 636 637 static final Splitter COMMA_SEMI = 638 Splitter.on(Pattern.compile("[,;|]")).trimResults().omitEmptyStrings(); 639 protected static final boolean NUMBER_SYSTEM_HACK = true; 640 checkSuboptions(int i, UnicodeSet allowedOptions)641 private static void checkSuboptions(int i, UnicodeSet allowedOptions) { 642 UOption givenOptions = options[i]; 643 if (givenOptions.doesOccur && !allowedOptions.containsAll(givenOptions.value)) { 644 throw new IllegalArgumentException( 645 "Illegal sub-options for " 646 + givenOptions.shortName 647 + ": " 648 + new UnicodeSet().addAll(givenOptions.value).removeAll(allowedOptions) 649 + CldrUtility.LINE_SEPARATOR 650 + "Use -? for help."); 651 } 652 if (i == FIX && givenOptions.value != null) { 653 final UnicodeSet allowedFilters = new UnicodeSet().add('P').add('k'); 654 for (char c : givenOptions.value.toCharArray()) { 655 if (!allowedFilters.contains(c)) { 656 throw new IllegalArgumentException( 657 "The filter " 658 + c 659 + " is currently disabled, see CLDR-17144 and CLDR-17765"); 660 } 661 } 662 } 663 } 664 removePosix(CLDRFile toMergeIn)665 private static void removePosix(CLDRFile toMergeIn) { 666 Set<String> toRemove = new HashSet<>(); 667 for (String xpath : toMergeIn) { 668 if (xpath.startsWith("//ldml/posix")) toRemove.add(xpath); 669 } 670 toMergeIn.removeAll(toRemove, false); 671 } 672 673 static PathChecker pathChecker = new PathChecker(); 674 675 /** Implementation for a certain type of filter. Each filter has a letter associated with it. */ 676 abstract static class CLDRFilter { 677 protected CLDRFile cldrFileToFilter; 678 protected CLDRFile cldrFileToFilterResolved; 679 private String localeID; 680 protected Set<String> availableChildren; 681 private Set<String> toBeRemoved; 682 private CLDRFile toBeReplaced; 683 protected Factory factory; 684 protected int countChanges; 685 686 /** 687 * Called when a new locale is being processed 688 * 689 * @param k 690 * @param factory 691 * @param removal 692 * @param replacements 693 */ setFile( CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements)694 public final void setFile( 695 CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements) { 696 this.cldrFileToFilter = k; 697 cldrFileToFilterResolved = null; 698 this.factory = factory; 699 localeID = k.getLocaleID(); 700 this.toBeRemoved = removal; 701 this.toBeReplaced = replacements; 702 countChanges = 0; 703 handleStart(); 704 } 705 706 /** Called by setFile() before all processing for a file */ handleStart()707 public void handleStart() {} 708 709 /** 710 * Called for each xpath 711 * 712 * @param xpath 713 */ handlePath(String xpath)714 public abstract void handlePath(String xpath); 715 716 /** Called after all xpaths in this file are handled */ handleEnd()717 public void handleEnd() {} 718 getResolved()719 public CLDRFile getResolved() { 720 if (cldrFileToFilterResolved == null) { 721 if (cldrFileToFilter.isResolved()) { 722 cldrFileToFilterResolved = cldrFileToFilter; 723 } else { 724 cldrFileToFilterResolved = factory.make(cldrFileToFilter.getLocaleID(), true); 725 } 726 } 727 return cldrFileToFilterResolved; 728 } 729 show(String reason, String detail)730 public void show(String reason, String detail) { 731 System.out.println("%" + localeID + "\t" + reason + "\tConsidering " + detail); 732 } 733 retain(String path, String reason)734 public void retain(String path, String reason) { 735 System.out.println( 736 "%" 737 + localeID 738 + "\t" 739 + reason 740 + "\tRetaining: " 741 + cldrFileToFilter.getStringValue(path) 742 + "\t at: " 743 + path); 744 } 745 remove(String path)746 public void remove(String path) { 747 remove(path, "-"); 748 } 749 remove(String path, String reason)750 public void remove(String path, String reason) { 751 if (toBeRemoved.contains(path)) return; 752 toBeRemoved.add(path); 753 String oldValueOldPath = cldrFileToFilter.getStringValue(path); 754 showAction(reason, "Removing", oldValueOldPath, null, null, path, path); 755 } 756 replace(String oldFullPath, String newFullPath, String newValue)757 public void replace(String oldFullPath, String newFullPath, String newValue) { 758 replace(oldFullPath, newFullPath, newValue, "-"); 759 } 760 showAction( String reason, String action, String oldValueOldPath, String oldValueNewPath, String newValue, String oldFullPath, String newFullPath)761 public void showAction( 762 String reason, 763 String action, 764 String oldValueOldPath, 765 String oldValueNewPath, 766 String newValue, 767 String oldFullPath, 768 String newFullPath) { 769 System.out.println( 770 "%" 771 + localeID 772 + "\t" 773 + action 774 + "\t" 775 + reason 776 + "\t«" 777 + oldValueOldPath 778 + "»" 779 + (newFullPath.equals(oldFullPath) || oldValueNewPath == null 780 ? "" 781 : oldValueNewPath.equals(oldValueOldPath) 782 ? "/=" 783 : "/«" + oldValueNewPath + "»") 784 + "\t→\t" 785 + (newValue == null 786 ? "∅" 787 : newValue.equals(oldValueOldPath) ? "≡" : "«" + newValue + "»") 788 + "\t" 789 + oldFullPath 790 + (newFullPath.equals(oldFullPath) ? "" : "\t→\t" + newFullPath)); 791 ++countChanges; 792 } 793 794 /** 795 * There are the following cases, where: 796 * 797 * <pre> 798 * pathSame, new value null: Removing v p 799 * pathSame, new value not null: Replacing v v' p 800 * pathChanges, nothing at new path: Moving v p p' 801 * pathChanges, same value at new path: Replacing v v' p p' 802 * pathChanges, value changes: Overriding v v' p p' 803 * 804 * <pre> 805 * @param oldFullPath 806 * @param newFullPath 807 * @param newValue 808 * @param reason 809 */ replace( String oldFullPath, String newFullPath, String newValue, String reason)810 public void replace( 811 String oldFullPath, String newFullPath, String newValue, String reason) { 812 String oldValueOldPath = cldrFileToFilter.getStringValue(oldFullPath); 813 String temp = cldrFileToFilter.getFullXPath(oldFullPath); 814 if (temp != null) { 815 oldFullPath = temp; 816 } 817 boolean pathSame = oldFullPath.equals(newFullPath); 818 819 if (!pathChecker.checkPath(newFullPath)) { 820 throw new IllegalArgumentException("Bad path: " + newFullPath); 821 } 822 823 if (pathSame) { 824 if (newValue == null) { 825 remove(oldFullPath, reason); 826 } else if (oldValueOldPath == null) { 827 toBeReplaced.add(oldFullPath, newValue); 828 showAction( 829 reason, 830 "Adding", 831 oldValueOldPath, 832 null, 833 newValue, 834 oldFullPath, 835 newFullPath); 836 } else { 837 toBeReplaced.add(oldFullPath, newValue); 838 showAction( 839 reason, 840 "Replacing", 841 oldValueOldPath, 842 null, 843 newValue, 844 oldFullPath, 845 newFullPath); 846 } 847 return; 848 } 849 String oldValueNewPath = cldrFileToFilter.getStringValue(newFullPath); 850 toBeRemoved.add(oldFullPath); 851 toBeReplaced.add(newFullPath, newValue); 852 853 if (oldValueNewPath == null) { 854 showAction( 855 reason, 856 "Moving", 857 oldValueOldPath, 858 oldValueNewPath, 859 newValue, 860 oldFullPath, 861 newFullPath); 862 } else if (oldValueNewPath.equals(newValue)) { 863 showAction( 864 reason, 865 "Unchanged Value", 866 oldValueOldPath, 867 oldValueNewPath, 868 newValue, 869 oldFullPath, 870 newFullPath); 871 } else { 872 showAction( 873 reason, 874 "Overriding", 875 oldValueOldPath, 876 oldValueNewPath, 877 newValue, 878 oldFullPath, 879 newFullPath); 880 } 881 } 882 883 /** 884 * Adds a new path-value pair to the CLDRFile. 885 * 886 * @param path the new path 887 * @param value the value 888 * @param reason Reason for adding the path and value. 889 */ add(String path, String value, String reason)890 public void add(String path, String value, String reason) { 891 String oldValueOldPath = cldrFileToFilter.getStringValue(path); 892 if (oldValueOldPath == null) { 893 toBeRemoved.remove(path); 894 toBeReplaced.add(path, value); 895 showAction(reason, "Adding", oldValueOldPath, null, value, path, path); 896 } else { 897 replace(path, path, value); 898 } 899 } 900 getReplacementFile()901 public CLDRFile getReplacementFile() { 902 return toBeReplaced; 903 } 904 905 /** 906 * Called before all files are processed. Note: TODO: This is called unconditionally, 907 * whether the filter is enabled or not. It should only be called if the filter is enabled. 908 * Reference: https://unicode-org.atlassian.net/browse/CLDR-16343 909 */ handleSetup()910 public void handleSetup() {} 911 912 /** 913 * Called after all files are processed. Note: TODO: This is called unconditionally, whether 914 * the filter is enabled or not. It should only be called if the filter is enabled. 915 * Reference: https://unicode-org.atlassian.net/browse/CLDR-16343 916 */ handleCleanup()917 public void handleCleanup() {} 918 getLocaleID()919 public String getLocaleID() { 920 return localeID; 921 } 922 } 923 924 static class FixList { 925 // simple class, so we use quick list 926 CLDRFilter[] filters = new CLDRFilter[128]; // only ascii 927 String[] helps = new String[128]; // only ascii 928 UnicodeSet options = new UnicodeSet(); 929 String inputOptions = null; 930 int totalChanged = 0; 931 add(char letter, String help)932 void add(char letter, String help) { 933 add(letter, help, null); 934 } 935 handleSetup()936 public void handleSetup() { 937 for (int i = 0; i < filters.length; ++i) { 938 if (filters[i] != null) { 939 filters[i].handleSetup(); 940 } 941 } 942 } 943 handleCleanup()944 public void handleCleanup() { 945 for (int i = 0; i < filters.length; ++i) { 946 if (filters[i] != null) { 947 filters[i].handleCleanup(); 948 } 949 } 950 } 951 getOptions()952 public UnicodeSet getOptions() { 953 return options; 954 } 955 add(char letter, String help, CLDRFilter filter)956 void add(char letter, String help, CLDRFilter filter) { 957 if (helps[letter] != null) 958 throw new IllegalArgumentException("Duplicate letter: " + letter); 959 filters[letter] = filter; 960 helps[letter] = help; 961 options.add(letter); 962 } 963 setFile( CLDRFile file, String inputOptions, Factory factory, Set<String> removal, CLDRFile replacements)964 void setFile( 965 CLDRFile file, 966 String inputOptions, 967 Factory factory, 968 Set<String> removal, 969 CLDRFile replacements) { 970 this.inputOptions = inputOptions; 971 for (int i = 0; i < inputOptions.length(); ++i) { 972 char c = inputOptions.charAt(i); 973 if (filters[c] != null) { 974 try { 975 filters[c].setFile(file, factory, removal, replacements); 976 } catch (RuntimeException e) { 977 System.err.println("Failure in " + filters[c].localeID + "\t START"); 978 throw e; 979 } 980 } 981 } 982 } 983 handleStart()984 void handleStart() { 985 for (int i = 0; i < inputOptions.length(); ++i) { 986 char c = inputOptions.charAt(i); 987 if (filters[c] != null) { 988 try { 989 filters[c].handleStart(); 990 } catch (RuntimeException e) { 991 System.err.println("Failure in " + filters[c].localeID + "\t START"); 992 throw e; 993 } 994 } 995 } 996 } 997 handlePath(String xpath)998 void handlePath(String xpath) { 999 for (int i = 0; i < inputOptions.length(); ++i) { 1000 char c = inputOptions.charAt(i); 1001 if (filters[c] != null) { 1002 try { 1003 filters[c].handlePath(xpath); 1004 } catch (RuntimeException e) { 1005 System.err.println("Failure in " + filters[c].localeID + "\t " + xpath); 1006 throw e; 1007 } 1008 } 1009 } 1010 } 1011 handleEnd()1012 void handleEnd() { 1013 for (int i = 0; i < inputOptions.length(); ++i) { 1014 char c = inputOptions.charAt(i); 1015 if (filters[c] != null) { 1016 try { 1017 filters[c].handleEnd(); 1018 if (filters[c].countChanges != 0) { 1019 totalChanged += filters[c].countChanges; 1020 System.out.println( 1021 "#" 1022 + filters[c].localeID 1023 + "\tItems changed: " 1024 + filters[c].countChanges); 1025 } 1026 } catch (RuntimeException e) { 1027 System.err.println("Failure in " + filters[c].localeID + "\t START"); 1028 throw e; 1029 } 1030 } 1031 } 1032 } 1033 showHelp()1034 String showHelp() { 1035 String result = ""; 1036 for (int i = 0; i < filters.length; ++i) { 1037 if (helps[i] != null) { 1038 result += "\t" + (char) i + "\t " + helps[i] + XPathParts.NEWLINE; 1039 } 1040 } 1041 return result; 1042 } 1043 } 1044 1045 static Set<String> totalSkeletons = new HashSet<>(); 1046 1047 static Map<String, String> rootUnitMap = new HashMap<>(); 1048 1049 static { 1050 rootUnitMap.put("second", "s"); 1051 rootUnitMap.put("minute", "min"); 1052 rootUnitMap.put("hour", "h"); 1053 rootUnitMap.put("day", "d"); 1054 rootUnitMap.put("week", "w"); 1055 rootUnitMap.put("month", "m"); 1056 rootUnitMap.put("year", "y"); 1057 1058 fixList.add( 1059 'z', 1060 "Remove deprecated elements", 1061 new CLDRFilter() { 1062 1063 public boolean isDeprecated( 1064 DtdType type, String element, String attribute, String value) { 1065 return DtdData.getInstance(type).isDeprecated(element, attribute, value); 1066 } 1067 1068 public boolean isDeprecated(DtdType type, String path) { 1069 1070 XPathParts parts = XPathParts.getFrozenInstance(path); 1071 for (int i = 0; i < parts.size(); ++i) { 1072 String element = parts.getElement(i); 1073 if (isDeprecated(type, element, "*", "*")) { 1074 return true; 1075 } 1076 for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) { 1077 String attribute = entry.getKey(); 1078 String value = entry.getValue(); 1079 if (isDeprecated(type, element, attribute, value)) { 1080 return true; 1081 } 1082 } 1083 } 1084 return false; 1085 } 1086 1087 @Override 1088 public void handlePath(String xpath) { 1089 String fullPath = cldrFileToFilter.getFullXPath(xpath); 1090 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 1091 for (int i = 0; i < parts.size(); ++i) { 1092 String element = parts.getElement(i); 1093 if (dtdData.isDeprecated(element, "*", "*")) { 1094 remove(fullPath, "Deprecated element"); 1095 return; 1096 } 1097 for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) { 1098 String attribute = entry.getKey(); 1099 String value = entry.getValue(); 1100 if (dtdData.isDeprecated(element, attribute, value)) { 1101 remove(fullPath, "Element with deprecated attribute(s)"); 1102 } 1103 } 1104 } 1105 } 1106 }); 1107 1108 fixList.add( 1109 'e', 1110 "fix Interindic", 1111 new CLDRFilter() { 1112 @Override 1113 public void handlePath(String xpath) { 1114 if (xpath.indexOf("=\"InterIndic\"") < 0) return; 1115 String v = cldrFileToFilter.getStringValue(xpath); 1116 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1117 XPathParts fullparts = XPathParts.getFrozenInstance(fullXPath); 1118 Map<String, String> attributes = fullparts.findAttributes("transform"); 1119 String oldValue = attributes.get("direction"); 1120 if ("both".equals(oldValue)) { 1121 attributes.put("direction", "forward"); 1122 replace(xpath, fullparts.toString(), v); 1123 } 1124 } 1125 }); 1126 1127 fixList.add( 1128 'B', 1129 "fix bogus values", 1130 new CLDRFilter() { 1131 RegexLookup<Integer> paths = 1132 RegexLookup.<Integer>of() 1133 .setPatternTransform(RegexLookup.RegexFinderTransformPath2) 1134 .add( 1135 "//ldml/localeDisplayNames/languages/language[@type='([^']*)']", 1136 0) 1137 .add( 1138 "//ldml/localeDisplayNames/scripts/script[@type='([^']*)']", 1139 0) 1140 .add( 1141 "//ldml/localeDisplayNames/territories/territory[@type='([^']*)']", 1142 0) 1143 .add("//ldml/dates/timeZoneNames/metazone[@type='([^']*)']", 0) 1144 .add( 1145 "//ldml/dates/timeZoneNames/zone[@type='([^']*)']/exemplarCity", 1146 0) 1147 .add( 1148 "//ldml/numbers/currencies/currency[@type='([^']*)']/displayName", 1149 0); 1150 Output<String[]> arguments = new Output<>(); 1151 CLDRFile english = CLDRConfig.getInstance().getEnglish(); 1152 boolean skip; 1153 1154 @Override 1155 public void handleStart() { 1156 CLDRFile resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 1157 UnicodeSet exemplars = 1158 resolved.getExemplarSet(ExemplarType.main, WinningChoice.WINNING); 1159 skip = exemplars.containsSome('a', 'z'); 1160 // TODO add simpler way to skip file entirely 1161 } 1162 1163 @Override 1164 public void handlePath(String xpath) { 1165 if (skip) { 1166 return; 1167 } 1168 Integer lookupValue = paths.get(xpath, null, arguments); 1169 if (lookupValue == null) { 1170 return; 1171 } 1172 String type = arguments.value[1]; 1173 String value = cldrFileToFilter.getStringValue(xpath); 1174 if (value.equals(type)) { 1175 remove(xpath, "Matches code"); 1176 return; 1177 } 1178 String evalue = english.getStringValue(xpath); 1179 if (value.equals(evalue)) { 1180 remove(xpath, "Matches English"); 1181 return; 1182 } 1183 } 1184 }); 1185 1186 fixList.add( 1187 's', 1188 "fix alt accounting", 1189 new CLDRFilter() { 1190 @Override 1191 public void handlePath(String xpath) { 1192 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1193 if (!parts.containsAttributeValue("alt", "accounting")) { 1194 return; 1195 } 1196 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath); 1197 String value = cldrFileToFilter.getStringValue(xpath); 1198 XPathParts fullparts = 1199 XPathParts.getFrozenInstance(oldFullXPath) 1200 .cloneAsThawed(); // not frozen, for removeAttribute 1201 fullparts.removeAttribute("pattern", "alt"); 1202 fullparts.setAttribute("currencyFormat", "type", "accounting"); 1203 String newFullXPath = fullparts.toString(); 1204 replace( 1205 oldFullXPath, 1206 newFullXPath, 1207 value, 1208 "Move alt=accounting value to new path"); 1209 } 1210 }); 1211 1212 fixList.add( 1213 'n', 1214 "add unit displayName", 1215 new CLDRFilter() { 1216 @Override 1217 public void handlePath(String xpath) { 1218 if (xpath.indexOf("/units/unitLength[@type=\"long\"]") < 0 1219 || xpath.indexOf("/unitPattern[@count=\"other\"]") < 0 1220 || xpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) { 1221 return; 1222 } 1223 String value = cldrFileToFilter.getStringValue(xpath); 1224 String newValue = null; 1225 if (value.startsWith("{0}")) { 1226 newValue = value.substring(3).trim(); 1227 } else if (value.endsWith("{0}")) { 1228 newValue = value.substring(0, value.length() - 3).trim(); 1229 } else { 1230 System.out.println( 1231 "unitPattern-other does not start or end with \"{0}\": \"" 1232 + value 1233 + "\""); 1234 return; 1235 } 1236 1237 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath); 1238 String newFullXPath = 1239 oldFullXPath 1240 .substring(0, oldFullXPath.indexOf("unitPattern")) 1241 .concat("displayName[@draft=\"provisional\"]"); 1242 add( 1243 newFullXPath, 1244 newValue, 1245 "create unit displayName-long from unitPattern-long-other"); 1246 String newFullXPathShort = 1247 newFullXPath.replace("[@type=\"long\"]", "[@type=\"short\"]"); 1248 add( 1249 newFullXPathShort, 1250 newValue, 1251 "create unit displayName-short from unitPattern-long-other"); 1252 } 1253 }); 1254 1255 fixList.add( 1256 'x', 1257 "retain paths", 1258 new CLDRFilter() { 1259 Matcher m = null; 1260 1261 @Override 1262 public void handlePath(String xpath) { 1263 if (m == null) { 1264 m = PatternCache.get(options[PATH].value).matcher(""); 1265 } 1266 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1267 if (!m.reset(fullXPath).matches()) { 1268 remove(xpath); 1269 } 1270 } 1271 }); 1272 1273 fixList.add( 1274 'l', 1275 "change language code", 1276 new CLDRFilter() { 1277 private CLDRFile resolved; 1278 1279 @Override 1280 public void handleStart() { 1281 resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 1282 } 1283 1284 @Override 1285 public void handlePath(String xpath) { 1286 if (!xpath.contains("/language")) { 1287 return; 1288 } 1289 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1290 String languageCode = parts.findAttributeValue("language", "type"); 1291 String v = resolved.getStringValue(xpath); 1292 if (!languageCode.equals("swc")) { 1293 return; 1294 } 1295 parts = parts.cloneAsThawed(); 1296 parts.setAttribute("language", "type", "sw_CD"); 1297 replace(xpath, parts.toString(), v); 1298 } 1299 }); 1300 1301 fixList.add( 1302 'g', 1303 "Swap alt/non-alt values for Czechia", 1304 new CLDRFilter() { 1305 1306 @Override 1307 public void handleStart() {} 1308 1309 @Override 1310 public void handlePath(String xpath) { 1311 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1312 if (!parts.containsAttributeValue("alt", "variant") 1313 || !parts.containsAttributeValue("type", "CZ")) { 1314 return; 1315 } 1316 String variantValue = cldrFileToFilter.getStringValue(xpath); 1317 String nonVariantXpath = xpath.replaceAll("\\[\\@alt=\"variant\"\\]", ""); 1318 String nonVariantValue = cldrFileToFilter.getStringValue(nonVariantXpath); 1319 replace(xpath, xpath, nonVariantValue); 1320 replace(nonVariantXpath, nonVariantXpath, variantValue); 1321 } 1322 }); 1323 1324 fixList.add( 1325 'u', 1326 "fix duration unit patterns", 1327 new CLDRFilter() { 1328 1329 @Override 1330 public void handlePath(String xpath) { 1331 if (!xpath.contains("/units")) { 1332 return; 1333 } 1334 if (!xpath.contains("/durationUnitPattern")) { 1335 return; 1336 } 1337 1338 String value = cldrFileToFilter.getStringValue(xpath); 1339 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1340 1341 XPathParts parts = XPathParts.getFrozenInstance(fullXPath); 1342 String unittype = parts.findAttributeValue("durationUnit", "type"); 1343 1344 String newFullXpath = 1345 "//ldml/units/durationUnit[@type=\"" 1346 + unittype 1347 + "\"]/durationUnitPattern"; 1348 replace( 1349 fullXPath, 1350 newFullXpath, 1351 value, 1352 "converting to new duration unit structure"); 1353 } 1354 }); 1355 1356 fixList.add( 1357 'a', 1358 "Fix 0/1", 1359 new CLDRFilter() { 1360 final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 1361 PluralInfo info; 1362 1363 @Override 1364 public void handleStart() { 1365 info = SupplementalDataInfo.getInstance().getPlurals(super.localeID); 1366 } 1367 1368 @Override 1369 public void handlePath(String xpath) { 1370 if (xpath.indexOf("count") < 0) { 1371 return; 1372 } 1373 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1374 XPathParts parts = 1375 XPathParts.getFrozenInstance(fullpath) 1376 .cloneAsThawed(); // not frozen, for setAttribute 1377 String countValue = parts.getAttributeValue(-1, "count"); 1378 if (!DIGITS.containsAll(countValue)) { 1379 return; 1380 } 1381 int intValue = Integer.parseInt(countValue); 1382 Count count = info.getCount(intValue); 1383 parts.setAttribute(-1, "count", count.toString()); 1384 String newPath = parts.toString(); 1385 String oldValue = cldrFileToFilter.getStringValue(newPath); 1386 String value = cldrFileToFilter.getStringValue(xpath); 1387 if (oldValue != null) { 1388 String fixed = oldValue.replace("{0}", countValue); 1389 if (value.equals(oldValue) || value.equals(fixed)) { 1390 remove( 1391 fullpath, 1392 "Superfluous given: " + count + "→«" + oldValue + "»"); 1393 } else { 1394 remove(fullpath, "Can’t replace: " + count + "→«" + oldValue + "»"); 1395 } 1396 return; 1397 } 1398 replace(fullpath, newPath, value, "Moving 0/1"); 1399 } 1400 }); 1401 1402 fixList.add( 1403 'b', 1404 "Prep for bulk import", 1405 new CLDRFilter() { 1406 1407 @Override 1408 public void handlePath(String xpath) { 1409 if (!options[USER].doesOccur) { 1410 return; 1411 } 1412 String userID = options[USER].value; 1413 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1414 String value = cldrFileToFilter.getStringValue(xpath); 1415 XPathParts parts = 1416 XPathParts.getFrozenInstance(fullpath) 1417 .cloneAsThawed(); // not frozen, for addAttribute 1418 parts.addAttribute("draft", "unconfirmed"); 1419 parts.addAttribute("alt", "proposed-u" + userID + "-implicit1.8"); 1420 String newPath = parts.toString(); 1421 replace(fullpath, newPath, value); 1422 } 1423 }); 1424 1425 fixList.add( 1426 'c', 1427 "Fix transiton from an old currency code to a new one", 1428 new CLDRFilter() { 1429 @Override 1430 public void handlePath(String xpath) { 1431 String oldCurrencyCode = "VEF"; 1432 String newCurrencyCode = "VES"; 1433 int fromDate = 2008; 1434 int toDate = 2018; 1435 String leadingParenString = " ("; 1436 String trailingParenString = ")"; 1437 String separator = "\u2013"; 1438 String languageTag = "root"; 1439 1440 if (xpath.indexOf( 1441 "/currency[@type=\"" + oldCurrencyCode + "\"]/displayName") 1442 < 0) { 1443 return; 1444 } 1445 String value = cldrFileToFilter.getStringValue(xpath); 1446 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1447 String newFullXPath = fullXPath.replace(oldCurrencyCode, newCurrencyCode); 1448 cldrFileToFilter.add(newFullXPath, value); 1449 1450 // Exceptions for locales that use an alternate numbering system or a 1451 // different format for the dates at 1452 // the end. 1453 // Add additional ones as necessary 1454 String localeID = cldrFileToFilter.getLocaleID(); 1455 if (localeID.equals("ne")) { 1456 languageTag = "root-u-nu-deva"; 1457 } else if (localeID.equals("bn")) { 1458 languageTag = "root-u-nu-beng"; 1459 } else if (localeID.equals("ar")) { 1460 leadingParenString = " - "; 1461 trailingParenString = ""; 1462 } else if (localeID.equals("fa")) { 1463 languageTag = "root-u-nu-arabext"; 1464 separator = Utility.unescape(" \\u062A\\u0627 "); 1465 } 1466 1467 NumberFormat nf = 1468 NumberFormat.getInstance(ULocale.forLanguageTag(languageTag)); 1469 nf.setGroupingUsed(false); 1470 1471 String tagString = 1472 leadingParenString 1473 + nf.format(fromDate) 1474 + separator 1475 + nf.format(toDate) 1476 + trailingParenString; 1477 1478 replace(fullXPath, fullXPath, value + tagString); 1479 } 1480 }); 1481 1482 fixList.add( 1483 'p', 1484 "input-processor", 1485 new CLDRFilter() { 1486 private DisplayAndInputProcessor inputProcessor; 1487 1488 @Override 1489 public void handleStart() { 1490 inputProcessor = new DisplayAndInputProcessor(cldrFileToFilter, true); 1491 inputProcessor.enableInheritanceReplacement(getResolved()); 1492 } 1493 1494 @Override 1495 public void handleEnd() { 1496 inputProcessor = null; // clean up, just in case 1497 } 1498 1499 @Override 1500 public void handlePath(String xpath) { 1501 String value = cldrFileToFilter.getStringValue(xpath); 1502 String newValue = inputProcessor.processInput(xpath, value, null); 1503 if (value.equals(newValue)) { 1504 return; 1505 } 1506 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1507 replace(fullXPath, fullXPath, newValue); 1508 } 1509 }); 1510 1511 // 'P' Process, like 'p' but without inheritance replacement 1512 fixList.add( 1513 'P', 1514 "input-Processor-no-inheritance-replacement", 1515 new CLDRFilter() { 1516 private DisplayAndInputProcessor inputProcessor; 1517 1518 @Override 1519 public void handleStart() { 1520 inputProcessor = new DisplayAndInputProcessor(cldrFileToFilter, true); 1521 } 1522 1523 @Override 1524 public void handleEnd() { 1525 inputProcessor = null; // clean up, just in case 1526 } 1527 1528 @Override 1529 public void handlePath(String xpath) { 1530 String value = cldrFileToFilter.getStringValue(xpath); 1531 String newValue = inputProcessor.processInput(xpath, value, null); 1532 if (value.equals(newValue)) { 1533 return; 1534 } 1535 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1536 replace(fullXPath, fullXPath, newValue); 1537 } 1538 }); 1539 1540 // use DAIP for one thing only: replaceBaileyWithInheritanceMarker 1541 fixList.add( 1542 'I', 1543 "Inheritance-substitution", 1544 new CLDRFilter() { 1545 private DisplayAndInputProcessor inputProcessor; 1546 private final int STEPS_FROM_ROOT = 1547 1; // only process if locale's level matches; root = 0, en = 1, ... 1548 1549 @Override 1550 public void handleStart() { 1551 int steps = stepsFromRoot(cldrFileToFilter.getLocaleID()); 1552 if (steps == STEPS_FROM_ROOT) { 1553 inputProcessor = new DisplayAndInputProcessor(cldrFileToFilter, true); 1554 inputProcessor.enableInheritanceReplacement(getResolved()); 1555 } else { 1556 inputProcessor = null; 1557 } 1558 } 1559 1560 @Override 1561 public void handleEnd() { 1562 inputProcessor = null; // clean up, just in case 1563 } 1564 1565 @Override 1566 public void handlePath(String xpath) { 1567 if (inputProcessor == null) { 1568 return; 1569 } 1570 String value = cldrFileToFilter.getStringValue(xpath); 1571 String newValue = 1572 inputProcessor.replaceBaileyWithInheritanceMarker(xpath, value); 1573 if (value.equals(newValue)) { 1574 return; 1575 } 1576 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1577 replace(fullXPath, fullXPath, newValue); 1578 } 1579 }); 1580 1581 // Un-drop hard inheritance: revert INHERITANCE_MARKER to pre-drop-hard-inheritance values 1582 fixList.add( 1583 'U', 1584 "Un-drop inheritance", 1585 new CLDRFilter() { 1586 // baseDir needs to be the "pre-drop" path of an existing copy of old 1587 // common/main 1588 // For example, 2022_10_07_pre folder gets xml from pull request 2433, commit 1589 // 80029f1 1590 // Also ldml.dtd is required; for example: 1591 // mkdir ../2022_10_07_pre/common/dtd 1592 // cp common/dtd/ldml.dtd ../2022_10_07_pre/common/dtd 1593 private final String baseDir = "../2022_10_07_pre/"; 1594 private final File[] list = 1595 new File[] { 1596 new File(baseDir + "common/main/"), 1597 new File(baseDir + "common/annotations/") 1598 }; 1599 private Factory preFactory = null; 1600 private CLDRFile preFile = null; 1601 1602 @Override 1603 public void handleStart() { 1604 if (preFactory == null) { 1605 preFactory = SimpleFactory.make(list, ".*"); 1606 } 1607 String localeID = cldrFileToFilter.getLocaleID(); 1608 try { 1609 preFile = preFactory.make(localeID, false /* not resolved */); 1610 } catch (Exception e) { 1611 System.out.println("Skipping " + localeID + " due to " + e); 1612 preFile = null; 1613 } 1614 } 1615 1616 @Override 1617 public void handlePath(String xpath) { 1618 if (preFile == null) { 1619 return; 1620 } 1621 if (xpath.contains("personName")) { 1622 return; 1623 } 1624 String value = cldrFileToFilter.getStringValue(xpath); 1625 if (CldrUtility.INHERITANCE_MARKER.equals(value)) { 1626 String preValue = preFile.getStringValue(xpath); 1627 if (!CldrUtility.INHERITANCE_MARKER.equals(preValue)) { 1628 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1629 replace(fullXPath, fullXPath, preValue); 1630 } 1631 } 1632 } 1633 }); 1634 1635 fixList.add( 1636 't', 1637 "Fix missing count values groups", 1638 new CLDRFilter() { 1639 1640 @Override 1641 public void handlePath(String xpath) { 1642 if (xpath.indexOf("@count=\"other\"") < 0) { 1643 return; 1644 } 1645 1646 String value = cldrFileToFilter.getStringValue(xpath); 1647 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1648 String[] missingCounts = {"one"}; 1649 for (String count : missingCounts) { 1650 String newFullXPath = fullXPath.replace("other", count); 1651 if (cldrFileToFilter.getWinningValue(newFullXPath) == null) { 1652 add(newFullXPath, value, "Adding missing plural form"); 1653 } 1654 } 1655 } 1656 }); 1657 1658 fixList.add( 1659 'f', 1660 "NFC (all but transforms, exemplarCharacters, pc, sc, tc, qc, ic)", 1661 new CLDRFilter() { 1662 @Override 1663 public void handlePath(String xpath) { 1664 if (xpath.indexOf("/segmentation") >= 0 1665 || xpath.indexOf("/transforms") >= 0 1666 || xpath.indexOf("/exemplarCharacters") >= 0 1667 || xpath.indexOf("/pc") >= 0 1668 || xpath.indexOf("/sc") >= 0 1669 || xpath.indexOf("/tc") >= 0 1670 || xpath.indexOf("/qc") >= 0 1671 || xpath.indexOf("/ic") >= 0) return; 1672 String value = cldrFileToFilter.getStringValue(xpath); 1673 String nfcValue = Normalizer.compose(value, false); 1674 if (value.equals(nfcValue)) return; 1675 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1676 replace(fullXPath, fullXPath, nfcValue); 1677 } 1678 }); 1679 1680 fixList.add( 1681 'v', 1682 "remove illegal codes", 1683 new CLDRFilter() { 1684 StandardCodes sc = StandardCodes.make(); 1685 String[] codeTypes = {"language", "script", "territory", "currency"}; 1686 1687 @Override 1688 public void handlePath(String xpath) { 1689 if (xpath.indexOf("/currency") < 0 1690 && xpath.indexOf("/timeZoneNames") < 0 1691 && xpath.indexOf("/localeDisplayNames") < 0) return; 1692 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1693 String code; 1694 for (int i = 0; i < codeTypes.length; ++i) { 1695 code = parts.findAttributeValue(codeTypes[i], "type"); 1696 if (code != null) { 1697 if (!sc.getGoodAvailableCodes(codeTypes[i]).contains(code)) 1698 remove(xpath); 1699 return; 1700 } 1701 } 1702 code = parts.findAttributeValue("zone", "type"); 1703 if (code != null) { 1704 if (code.indexOf("/GMT") >= 0) remove(xpath); 1705 } 1706 } 1707 }); 1708 1709 fixList.add( 1710 'w', 1711 "fix alt='...proposed' when there is no alternative", 1712 new CLDRFilter() { 1713 private Set<String> newFullXPathSoFar = new HashSet<>(); 1714 1715 @Override 1716 public void handlePath(String xpath) { 1717 if (xpath.indexOf("proposed") < 0) return; 1718 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1719 XPathParts parts = 1720 XPathParts.getFrozenInstance(fullXPath) 1721 .cloneAsThawed(); // not frozen, for removeProposed 1722 String newFullXPath = parts.removeProposed().toString(); 1723 // now see if there is an uninherited value 1724 String value = cldrFileToFilter.getStringValue(xpath); 1725 String baseValue = cldrFileToFilter.getStringValue(newFullXPath); 1726 if (baseValue != null) { 1727 // if the value AND the fullxpath are the same as what we have, then 1728 // delete 1729 if (value.equals(baseValue)) { 1730 String baseFullXPath = cldrFileToFilter.getFullXPath(newFullXPath); 1731 if (baseFullXPath.equals(newFullXPath)) { 1732 remove(xpath, "alt=base"); 1733 } 1734 } 1735 return; // there is, so skip 1736 } 1737 // there isn't, so modif if we haven't done so already 1738 if (!newFullXPathSoFar.contains(newFullXPath)) { 1739 replace(fullXPath, newFullXPath, value); 1740 newFullXPathSoFar.add(newFullXPath); 1741 } 1742 } 1743 }); 1744 1745 fixList.add( 1746 'S', 1747 "add datetimeSkeleton to dateFormat,timeFormat", 1748 new CLDRFilter() { 1749 DateTimePatternGenerator dateTimePatternGenerator = 1750 DateTimePatternGenerator.getEmptyInstance(); 1751 1752 @Override 1753 public void handlePath(String xpath) { 1754 // desired xpaths are like 1755 // //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"] 1756 // //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@draft="..."] 1757 // //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@numbers="..."] 1758 // //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@numbers="..."][@draft="..."] 1759 // //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@alt="variant"] 1760 // //ldml/dates/calendars/calendar[@type="..."]/timeFormats/timeFormatLength[@type="..."]/timeFormat[@type="standard"]/pattern[@type="standard"] 1761 // //ldml/dates/calendars/calendar[@type="..."]/timeFormats/timeFormatLength[@type="..."]/timeFormat[@type="standard"]/pattern[@type="standard"][@draft="..."] 1762 if (xpath.indexOf("/dateFormat[@type=\"standard\"]/pattern") < 0 1763 && xpath.indexOf("/timeFormat[@type=\"standard\"]/pattern") < 0) { 1764 return; 1765 } 1766 String patternValue = cldrFileToFilter.getStringValue(xpath); 1767 String skeletonValue = patternValue; 1768 if (!patternValue.equals("↑↑↑")) { 1769 skeletonValue = dateTimePatternGenerator.getSkeleton(patternValue); 1770 if (skeletonValue == null || skeletonValue.length() < 1) { 1771 show( 1772 "empty skeleton for datetime pattern \"" 1773 + patternValue 1774 + "\"", 1775 "path " + xpath); 1776 return; 1777 } 1778 } 1779 1780 String patternFullXPath = cldrFileToFilter.getFullXPath(xpath); 1781 // Replace pattern[@type="standard"] with datetimeSkeleton, preserve other 1782 // attributes (including numbers per TC discussion). 1783 // Note that for the alt="variant" patterns there are corresponding 1784 // alt="variant" availableFormats that must be used. 1785 String skeletonFullXPath = 1786 patternFullXPath.replace( 1787 "/pattern[@type=\"standard\"]", 1788 "/datetimeSkeleton"); // .replaceAll("\\[@numbers=\"[^\"]+\"\\]", "") 1789 add( 1790 skeletonFullXPath, 1791 skeletonValue, 1792 "create datetimeSkeleton from dateFormat/pattern or timeFormat/pattern"); 1793 } 1794 }); 1795 1796 /* 1797 * Fix id to be identical to skeleton 1798 * Eliminate any single-field ids 1799 * Add "L" (stand-alone month), "?" (other stand-alones) 1800 * Remove any fields with both a date and a time 1801 * Test that datetime format is valid format (will have to fix by hand) 1802 * Map k, K to H, h 1803 * 1804 * In Survey Tool: don't show id; compute when item added or changed 1805 * test validity 1806 */ 1807 fixList.add( 1808 'd', 1809 "fix dates", 1810 new CLDRFilter() { 1811 DateTimePatternGenerator dateTimePatternGenerator = 1812 DateTimePatternGenerator.getEmptyInstance(); 1813 DateTimePatternGenerator.FormatParser formatParser = 1814 new DateTimePatternGenerator.FormatParser(); 1815 Map<String, Set<String>> seenSoFar = new HashMap<>(); 1816 1817 @Override 1818 public void handleStart() { 1819 seenSoFar.clear(); 1820 } 1821 1822 @Override 1823 public void handlePath(String xpath) { 1824 if (xpath.contains("timeFormatLength") && xpath.contains("full")) { 1825 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1826 String value = cldrFileToFilter.getStringValue(xpath); 1827 boolean gotChange = false; 1828 List<Object> list = formatParser.set(value).getItems(); 1829 for (int i = 0; i < list.size(); ++i) { 1830 Object item = list.get(i); 1831 if (item instanceof DateTimePatternGenerator.VariableField) { 1832 String itemString = item.toString(); 1833 if (itemString.charAt(0) == 'z') { 1834 list.set( 1835 i, 1836 new VariableField( 1837 Utility.repeat("v", itemString.length()))); 1838 gotChange = true; 1839 } 1840 } 1841 } 1842 if (gotChange) { 1843 String newValue = toStringWorkaround(); 1844 if (value != newValue) { 1845 replace(xpath, fullpath, newValue); 1846 } 1847 } 1848 } 1849 if (xpath.indexOf("/availableFormats") < 0) { 1850 return; 1851 } 1852 String value = cldrFileToFilter.getStringValue(xpath); 1853 if (value == null) { 1854 return; // not in current file 1855 } 1856 1857 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1858 XPathParts fullparts = XPathParts.getFrozenInstance(fullpath); 1859 Map<String, String> attributes = fullparts.findAttributes("dateFormatItem"); 1860 String id = attributes.get("id"); 1861 String oldID = id; 1862 try { 1863 id = dateTimePatternGenerator.getBaseSkeleton(id); 1864 if (id.equals(oldID)) { 1865 return; 1866 } 1867 System.out.println(oldID + " => " + id); 1868 } catch (RuntimeException e) { 1869 id = "[error]"; 1870 return; 1871 } 1872 1873 attributes.put("id", id); 1874 totalSkeletons.add(id); 1875 1876 replace(xpath, fullparts.toString(), value); 1877 } 1878 1879 private String toStringWorkaround() { 1880 StringBuffer result = new StringBuffer(); 1881 List<Object> items = formatParser.getItems(); 1882 for (int i = 0; i < items.size(); ++i) { 1883 Object item = items.get(i); 1884 if (item instanceof String) { 1885 result.append(formatParser.quoteLiteral((String) items.get(i))); 1886 } else { 1887 result.append(items.get(i).toString()); 1888 } 1889 } 1890 return result.toString(); 1891 } 1892 }); 1893 1894 fixList.add( 1895 'y', 1896 "fix years to be y (with exceptions)", 1897 new CLDRFilter() { 1898 DateTimeCanonicalizer dtc = new DateTimeCanonicalizer(true); 1899 Map<String, Set<String>> seenSoFar = new HashMap<>(); 1900 1901 @Override 1902 public void handleStart() { 1903 seenSoFar.clear(); 1904 } 1905 1906 @Override 1907 public void handlePath(String xpath) { 1908 DateTimePatternType datetimePatternType = 1909 DateTimePatternType.fromPath(xpath); 1910 1911 // check to see if we need to change the value 1912 if (!DateTimePatternType.STOCK_AVAILABLE_INTERVAL_PATTERNS.contains( 1913 datetimePatternType)) { 1914 return; 1915 } 1916 String oldValue = cldrFileToFilter.getStringValue(xpath); 1917 String value = 1918 dtc.getCanonicalDatePattern(xpath, oldValue, datetimePatternType); 1919 String fullPath = cldrFileToFilter.getFullXPath(xpath); 1920 if (value.equals(oldValue)) { 1921 return; 1922 } 1923 // made it through the gauntlet, so replace 1924 replace(xpath, fullPath, value); 1925 } 1926 }); 1927 1928 // This should only be applied to specific locales, and the results checked manually 1929 // afterward. 1930 // It will only create ranges using the same digits as in root, not script-specific digits. 1931 // Any pre-existing year ranges should use the range marker from the intervalFormats "y" 1932 // item. 1933 // This make several assumptions and is somewhat *FRAGILE*. 1934 fixList.add( 1935 'j', 1936 "add year ranges from root to Japanese calendar eras", 1937 new CLDRFilter() { 1938 private CLDRFile rootFile; 1939 1940 @Override 1941 public void handleStart() { 1942 rootFile = factory.make("root", false); 1943 } 1944 1945 @Override 1946 public void handlePath(String xpath) { 1947 // Skip paths we don't care about 1948 if (xpath.indexOf("/calendar[@type=\"japanese\"]/eras/era") < 0) return; 1949 // Get root name for the era, check it 1950 String rootEraValue = rootFile.getStringValue(xpath); 1951 int rootEraIndex = rootEraValue.indexOf(" ("); 1952 if (rootEraIndex < 0) 1953 return; // this era does not have a year range in root, no need to add 1954 // one in this 1955 // locale 1956 // Get range marker from intervalFormat range for y 1957 String yearIntervalFormat = 1958 cldrFileToFilter.getStringValue( 1959 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"y\"]/greatestDifference[@id=\"y\"]"); 1960 if (yearIntervalFormat == null) 1961 return; // oops, no intervalFormat data for y 1962 String rangeMarker = 1963 yearIntervalFormat.replaceAll( 1964 "[.y\u5E74\uB144]", ""); // *FRAGILE* strip out 1965 // everything except the 1966 // range-indicating part 1967 // Get current locale name for this era, check it 1968 String eraValue = cldrFileToFilter.getStringValue(xpath); 1969 if (eraValue.indexOf('(') >= 0 && eraValue.indexOf(rangeMarker) >= 0) 1970 return; // this eraValue already 1971 // has a year range that 1972 // uses the appropriate 1973 // rangeMarker 1974 // Now update the root year range it with the rangeMarker for this locale, 1975 // and append it to this 1976 // locale's name 1977 String rootYearRange = rootEraValue.substring(rootEraIndex); 1978 String appendYearRange = 1979 rootYearRange.replaceAll("[\u002D\u2013]", rangeMarker); 1980 String newEraValue = eraValue.concat(appendYearRange); 1981 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1982 replace(xpath, fullpath, newEraValue); 1983 } 1984 }); 1985 1986 fixList.add( 1987 'r', 1988 "fix references and standards", 1989 new CLDRFilter() { 1990 int currentRef = 500; 1991 Map<String, TreeMap<String, String>> locale_oldref_newref = new TreeMap<>(); 1992 TreeMap<String, String> oldref_newref; 1993 1994 @Override 1995 public void handleStart() { 1996 String locale = cldrFileToFilter.getLocaleID(); 1997 oldref_newref = locale_oldref_newref.get(locale); 1998 if (oldref_newref == null) { 1999 oldref_newref = new TreeMap<>(); 2000 locale_oldref_newref.put(locale, oldref_newref); 2001 } 2002 } 2003 2004 @Override 2005 public void handlePath(String xpath) { 2006 // must be minimized for this to work. 2007 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2008 if (!fullpath.contains("reference")) return; 2009 String value = cldrFileToFilter.getStringValue(xpath); 2010 XPathParts fullparts = 2011 XPathParts.getFrozenInstance(fullpath) 2012 .cloneAsThawed(); // can't be frozen 2013 if ("reference".equals(fullparts.getElement(-1))) { 2014 fixType(value, "type", fullpath, fullparts); 2015 } else if (fullparts.getAttributeValue(-1, "references") != null) { 2016 fixType(value, "references", fullpath, fullparts); 2017 } else { 2018 System.out.println("CLDRModify: Skipping: " + xpath); 2019 } 2020 } 2021 2022 /** 2023 * @param value 2024 * @param type 2025 * @param oldFullPath 2026 * @param fullparts the XPathParts -- must not be frozen, for addAttribute 2027 */ 2028 private void fixType( 2029 String value, String type, String oldFullPath, XPathParts fullparts) { 2030 String ref = fullparts.getAttributeValue(-1, type); 2031 if (whitespace.containsSome(ref)) { 2032 throw new IllegalArgumentException("Whitespace in references"); 2033 } 2034 String newRef = getNewRef(ref); 2035 fullparts.addAttribute(type, newRef); 2036 replace(oldFullPath, fullparts.toString(), value); 2037 } 2038 2039 private String getNewRef(String ref) { 2040 String newRef = oldref_newref.get(ref); 2041 if (newRef == null) { 2042 newRef = String.valueOf(currentRef++); 2043 newRef = "R" + Utility.repeat("0", (3 - newRef.length())) + newRef; 2044 oldref_newref.put(ref, newRef); 2045 } 2046 return newRef; 2047 } 2048 }); 2049 2050 fixList.add( 2051 'q', 2052 "fix annotation punctuation", 2053 new CLDRFilter() { 2054 @Override 2055 public void handlePath(String xpath) { 2056 if (!xpath.contains("/annotation")) { 2057 return; 2058 } 2059 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2060 XPathParts parts = XPathParts.getFrozenInstance(fullpath); 2061 String cp = parts.getAttributeValue(2, "cp"); 2062 String tts = parts.getAttributeValue(2, "tts"); 2063 String type = parts.getAttributeValue(2, "type"); 2064 if ("tts".equals(type)) { 2065 return; // ok, skip 2066 } 2067 parts = parts.cloneAsThawed(); 2068 String hex = "1F600"; 2069 if (cp.startsWith("[")) { 2070 UnicodeSet us = new UnicodeSet(cp); 2071 if (us.size() == 1) { 2072 cp = us.iterator().next(); 2073 hex = Utility.hex(cp); 2074 } else { 2075 hex = us.toString(); 2076 } 2077 parts.putAttributeValue(2, "cp", cp); 2078 } 2079 parts.removeAttribute(2, "tts"); 2080 if (tts != null) { 2081 String newTts = CldrUtility.join(COMMA_SEMI.splitToList(tts), ", "); 2082 XPathParts parts2 = parts.cloneAsThawed(); 2083 parts2.putAttributeValue(2, "type", "tts"); 2084 add(parts2.toString(), newTts, "separate tts"); 2085 } 2086 String value = cldrFileToFilter.getStringValue(xpath); 2087 String newValue = CldrUtility.join(COMMA_SEMI.splitToList(value), " | "); 2088 final String newFullPath = parts.toString(); 2089 Comments comments = cldrFileToFilter.getXpath_comments(); 2090 String comment = comments.removeComment(CommentType.PREBLOCK, xpath); 2091 comment = hex + (comment == null ? "" : " " + comment); 2092 comments.addComment(CommentType.PREBLOCK, newFullPath, comment); 2093 if (!fullpath.equals(newFullPath) || !value.equals(newValue)) { 2094 replace(fullpath, newFullPath, newValue); 2095 } 2096 } 2097 }); 2098 2099 fixList.add( 2100 'Q', 2101 "add annotation names to keywords", 2102 new CLDRFilter() { 2103 Set<String> available = Annotations.getAllAvailable(); 2104 TreeSet<String> sorted = new TreeSet<>(Collator.getInstance(ULocale.ROOT)); 2105 CLDRFile resolved; 2106 Set<String> handledCharacters = new HashSet<>(); 2107 boolean isTop; 2108 2109 @Override 2110 public void handleStart() { 2111 String localeID = cldrFileToFilter.getLocaleID(); 2112 if (!available.contains(localeID)) { 2113 throw new IllegalArgumentException( 2114 "no annotations available, probably wrong directory"); 2115 } 2116 resolved = factory.make(localeID, true); 2117 CLDRLocale parent = CLDRLocale.getInstance(localeID).getParent(); 2118 isTop = CLDRLocale.ROOT.equals(parent); 2119 } 2120 2121 @Override 2122 public void handlePath(String xpath) { 2123 if (!xpath.contains("/annotation")) { 2124 return; 2125 } 2126 // <annotation cp="">100 | honderd | persent | telling | 2127 // vol</annotation> 2128 // <annotation cp="" type="tts">honderd punte</annotation> 2129 // we will copy honderd punte into the list of keywords. 2130 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2131 XPathParts parts = XPathParts.getFrozenInstance(fullpath); 2132 String cp = parts.getAttributeValue(2, "cp"); 2133 String type = parts.getAttributeValue(2, "type"); 2134 if (!isTop) { 2135 // If we run into the keyword first (or only the keywords) 2136 // we construct the tts version for consistent processing 2137 // and mark it as handled. We only do this for non-top locales, 2138 // because if the top locales don't have a tts we're not going to add 2139 // anyway. 2140 if (handledCharacters.contains(cp)) { 2141 return; // already handled 2142 } 2143 // repeat the above, but for the tts path 2144 xpath = parts.cloneAsThawed().setAttribute(2, "type", "tts").toString(); 2145 fullpath = cldrFileToFilter.getFullXPath(xpath); 2146 parts = XPathParts.getFrozenInstance(fullpath); 2147 type = parts.getAttributeValue(2, "type"); 2148 // mark the character as seen 2149 handledCharacters.add(cp); 2150 } else if (type == null) { 2151 return; // no TTS, and top level, so skip 2152 } 2153 String keywordPath = 2154 parts.cloneAsThawed() 2155 .removeAttribute(2, "type") 2156 .toString(); // construct the path without tts 2157 String distinguishingKeywordPath = 2158 CLDRFile.getDistinguishingXPath(keywordPath, null); 2159 String rawKeywordValue = cldrFileToFilter.getStringValue(keywordPath); 2160 2161 // skip if keywords AND name are inherited 2162 if (rawKeywordValue == null 2163 || rawKeywordValue.equals(CldrUtility.INHERITANCE_MARKER)) { 2164 String rawName = cldrFileToFilter.getStringValue(xpath); 2165 if (rawName == null || rawName.equals(CldrUtility.INHERITANCE_MARKER)) { 2166 return; 2167 } 2168 } 2169 2170 // skip if the name is not above root 2171 String nameSourceLocale = resolved.getSourceLocaleID(xpath, null); 2172 if (XMLSource.ROOT_ID.equals(nameSourceLocale) 2173 || XMLSource.CODE_FALLBACK_ID.equals(nameSourceLocale)) { 2174 return; 2175 } 2176 2177 String name = resolved.getStringValue(xpath); 2178 String keywordValue = resolved.getStringValue(keywordPath); 2179 String sourceLocaleId = 2180 resolved.getSourceLocaleID(distinguishingKeywordPath, null); 2181 sorted.clear(); 2182 sorted.add(name); 2183 2184 List<String> items; 2185 if (!sourceLocaleId.equals(XMLSource.ROOT_ID) 2186 && !sourceLocaleId.equals(XMLSource.CODE_FALLBACK_ID)) { 2187 items = Annotations.splitter.splitToList(keywordValue); 2188 sorted.addAll(items); 2189 } 2190 2191 DisplayAndInputProcessor.filterCoveredKeywords(sorted); 2192 DisplayAndInputProcessor.filterKeywordsDifferingOnlyInCase(sorted); 2193 String newKeywordValue = Joiner.on(" | ").join(sorted); 2194 if (!newKeywordValue.equals(keywordValue)) { 2195 replace(keywordPath, keywordPath, newKeywordValue); 2196 } 2197 } 2198 }); 2199 2200 fixList.add( 2201 'N', 2202 "add number symbols to exemplars", 2203 new CLDRFilter() { 2204 CLDRFile resolved; 2205 UnicodeSet numberStuff = new UnicodeSet(); 2206 Set<String> seen = new HashSet<>(); 2207 Set<String> hackAllowOnly = new HashSet<>(); 2208 boolean skip = false; 2209 2210 @Override 2211 public void handleStart() { 2212 String localeID = cldrFileToFilter.getLocaleID(); 2213 resolved = factory.make(localeID, true); 2214 numberStuff.clear(); 2215 seen.clear(); 2216 skip = localeID.equals("root"); 2217 // TODO add return value to handleStart to skip calling handlePath 2218 2219 if (NUMBER_SYSTEM_HACK) { 2220 hackAllowOnly.clear(); 2221 for (NumberingSystem system : NumberingSystem.values()) { 2222 String numberingSystem = 2223 system.path == null 2224 ? "latn" 2225 : cldrFileToFilter.getStringValue(system.path); 2226 if (numberingSystem != null) { 2227 hackAllowOnly.add(numberingSystem); 2228 } 2229 } 2230 } 2231 } 2232 2233 @Override 2234 public void handlePath(String xpath) { 2235 // the following doesn't work without NUMBER_SYSTEM_HACK, because there are 2236 // spurious numbersystems in the data. 2237 // http://unicode.org/cldr/trac/ticket/10648 2238 // so using a hack for now in handleEnd 2239 if (skip || !xpath.startsWith("//ldml/numbers/symbols")) { 2240 return; 2241 } 2242 2243 // //ldml/numbers/symbols[@numberSystem="latn"]/exponential 2244 XPathParts parts = XPathParts.getFrozenInstance(xpath); 2245 String system = parts.getAttributeValue(2, "numberSystem"); 2246 if (system == null) { 2247 System.err.println( 2248 "Bogus numberSystem:\t" 2249 + cldrFileToFilter.getLocaleID() 2250 + " \t" 2251 + xpath); 2252 return; 2253 } else if (seen.contains(system) || !hackAllowOnly.contains(system)) { 2254 return; 2255 } 2256 seen.add(system); 2257 UnicodeSet exemplars = resolved.getExemplarsNumeric(system); 2258 System.out.println("# " + system + " ==> " + exemplars.toPattern(false)); 2259 for (String s : exemplars) { 2260 numberStuff.addAll(s); // add individual characters 2261 } 2262 } 2263 2264 @Override 2265 public void handleEnd() { 2266 if (!numberStuff.isEmpty()) { 2267 UnicodeSet current = 2268 cldrFileToFilter.getExemplarSet( 2269 ExemplarType.numbers, WinningChoice.WINNING); 2270 if (!numberStuff.equals(current)) { 2271 DisplayAndInputProcessor daip = 2272 new DisplayAndInputProcessor(cldrFileToFilter); 2273 if (current != null && !current.isEmpty()) { 2274 numberStuff.addAll(current); 2275 } 2276 String path = CLDRFile.getExemplarPath(ExemplarType.numbers); 2277 String value = daip.getPrettyPrinter().format(numberStuff); 2278 replace(path, path, value); 2279 } 2280 } 2281 } 2282 }); 2283 2284 fixList.add( 2285 'k', 2286 "fix according to -k config file. Details on http://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config", 2287 new CLDRFilter() { 2288 private Map<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> 2289 locale2keyValues; 2290 private LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = 2291 new LinkedHashSet<>(); 2292 2293 @Override 2294 public void handleStart() { 2295 super.handleStart(); 2296 if (!options[FIX].doesOccur || !options[FIX].value.equals("k")) { 2297 return; 2298 } 2299 if (locale2keyValues == null) { 2300 fillCache(); 2301 } 2302 // set up for the specific locale we are dealing with. 2303 // a small optimization 2304 String localeId = getLocaleID(); 2305 keyValues.clear(); 2306 for (Entry<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> 2307 localeMatcher : locale2keyValues.entrySet()) { 2308 if (localeMatcher.getKey().matches(localeId)) { 2309 keyValues.addAll(localeMatcher.getValue()); 2310 } 2311 } 2312 System.out.println("# Checking entries & changing:\t" + keyValues.size()); 2313 for (Map<ConfigKeys, ConfigMatch> entry : keyValues) { 2314 ConfigMatch action = entry.get(ConfigKeys.action); 2315 ConfigMatch pathMatch = entry.get(ConfigKeys.path); 2316 ConfigMatch valueMatch = entry.get(ConfigKeys.value); 2317 ConfigMatch newPath = entry.get(ConfigKeys.new_path); 2318 ConfigMatch newValue = entry.get(ConfigKeys.new_value); 2319 switch (action.action) { 2320 // we add all the values up front 2321 case addNew: 2322 case add: 2323 if (pathMatch != null 2324 || valueMatch != null 2325 || newPath == null 2326 || newValue == null) { 2327 throw new IllegalArgumentException( 2328 action.action 2329 + ": must have no path nor value = null AND new_path or new_value:\n\t" 2330 + entry); 2331 } 2332 String newPathString = newPath.getPath(getResolved()); 2333 if (action.action == ConfigAction.add 2334 || cldrFileToFilter.getStringValue(newPathString) 2335 == null) { 2336 replace( 2337 newPathString, 2338 newPathString, 2339 newValue.exactMatch, 2340 "config"); 2341 } 2342 break; 2343 // we just check 2344 case replace: 2345 if ((pathMatch == null && valueMatch == null) 2346 || (newPath == null && newValue == null)) { 2347 throw new IllegalArgumentException( 2348 action.action 2349 + ": must have (path or value) AND (new_path or new_value):\n\t" 2350 + entry); 2351 } 2352 break; 2353 // For delete, we just check; we'll remove later 2354 case delete: 2355 if (newPath != null || newValue != null) { 2356 throw new IllegalArgumentException( 2357 action.action 2358 + ": must have no new_path nor new_value:\n\t" 2359 + entry); 2360 } 2361 break; 2362 default: // fall through 2363 throw new IllegalArgumentException("Internal Error"); 2364 } 2365 } 2366 } 2367 2368 private void fillCache() { 2369 locale2keyValues = new LinkedHashMap<>(); 2370 String configFileName = options[KONFIG].value; 2371 FileProcessor myReader = 2372 new FileProcessor() { 2373 { 2374 doHash = false; 2375 } 2376 2377 @Override 2378 protected boolean handleLine(int lineCount, String line) { 2379 line = line.trim(); 2380 Iterable<String> lineParts = SPLIT_ON_SEMI.split(line); 2381 Map<ConfigKeys, ConfigMatch> keyValue = 2382 new EnumMap<>(ConfigKeys.class); 2383 for (String linePart : lineParts) { 2384 int pos = linePart.indexOf('='); 2385 if (pos < 0) { 2386 // WARNING; the code doesn't allow for ; within 2387 // values; need to restructure for that. 2388 throw new IllegalArgumentException( 2389 lineCount 2390 + ":\t No = in command: «" 2391 + linePart 2392 + "» in " 2393 + line); 2394 } 2395 ConfigKeys key = 2396 ConfigKeys.valueOf( 2397 linePart.substring(0, pos).trim()); 2398 if (keyValue.containsKey(key)) { 2399 throw new IllegalArgumentException( 2400 "Must not have multiple keys: " + key); 2401 } 2402 String match = linePart.substring(pos + 1).trim(); 2403 keyValue.put(key, new ConfigMatch(key, match)); 2404 } 2405 final ConfigMatch locale = keyValue.get(ConfigKeys.locale); 2406 if (locale == null 2407 || keyValue.get(ConfigKeys.action) == null) { 2408 throw new IllegalArgumentException(); 2409 } 2410 2411 // validate new path 2412 LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = 2413 locale2keyValues.get(locale); 2414 if (keyValues == null) { 2415 locale2keyValues.put( 2416 locale, keyValues = new LinkedHashSet<>()); 2417 } 2418 keyValues.add(keyValue); 2419 return true; 2420 } 2421 }; 2422 myReader.process(CLDRModify.class, configFileName); 2423 } 2424 2425 static final String DEBUG_PATH = 2426 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"formal\"]/namePattern"; 2427 2428 @Override 2429 public void handlePath(String xpath) { 2430 // slow method; could optimize 2431 if (DEBUG_PATH != null && DEBUG_PATH.equals(xpath)) { 2432 System.out.println(xpath); 2433 } 2434 for (Map<ConfigKeys, ConfigMatch> entry : keyValues) { 2435 ConfigMatch pathMatch = entry.get(ConfigKeys.path); 2436 if (pathMatch != null && !pathMatch.matches(xpath)) { 2437 if (DEBUG_PATH != null 2438 && pathMatch != null 2439 && pathMatch.regexMatch != null) { 2440 System.out.println( 2441 RegexUtilities.showMismatch( 2442 pathMatch.regexMatch, xpath)); 2443 } 2444 continue; 2445 } 2446 ConfigMatch valueMatch = entry.get(ConfigKeys.value); 2447 final String value = cldrFileToFilter.getStringValue(xpath); 2448 if (valueMatch != null && !valueMatch.matches(value)) { 2449 continue; 2450 } 2451 ConfigMatch action = entry.get(ConfigKeys.action); 2452 switch (action.action) { 2453 case delete: 2454 remove(xpath, "config"); 2455 break; 2456 case replace: 2457 ConfigMatch newPath = entry.get(ConfigKeys.new_path); 2458 ConfigMatch newValue = entry.get(ConfigKeys.new_value); 2459 2460 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2461 String draft = ""; 2462 int loc = fullpath.indexOf("[@draft="); 2463 if (loc >= 0) { 2464 int loc2 = fullpath.indexOf(']', loc + 7); 2465 draft = fullpath.substring(loc, loc2 + 1); 2466 } 2467 2468 String modPath = 2469 ConfigMatch.getModified(pathMatch, xpath, newPath) 2470 + draft; 2471 String modValue = 2472 ConfigMatch.getModified(valueMatch, value, newValue); 2473 replace(xpath, modPath, modValue, "config"); 2474 } 2475 } 2476 } 2477 }); 2478 fixList.add('i', "fix Identical Children"); 2479 fixList.add('o', "check attribute validity"); 2480 2481 /** 2482 * Goal is: if value in vxml is ^^^, then add ^^^ to trunk IFF (a) if there is no value in 2483 * trunk (b) the value in trunk = bailey. 2484 */ 2485 fixList.add( 2486 '^', 2487 "add inheritance-marked items from vxml to trunk", 2488 new CLDRFilter() { 2489 Factory VxmlFactory; 2490 final ArrayList<File> fileList = new ArrayList<>(); 2491 2492 @Override 2493 public void handleStart() { 2494 if (fileList.isEmpty()) { 2495 for (String top : Arrays.asList("common/", "seed/")) { 2496 // for (String leaf : Arrays.asList("main/", "annotations/")) { 2497 String leaf = 2498 sourceInput.contains("annotations") 2499 ? "annotations/" 2500 : "main/"; 2501 String key = top + leaf; 2502 fileList.add( 2503 new File( 2504 CLDRPaths.AUX_DIRECTORY 2505 + "voting/" 2506 + CLDRFile.GEN_VERSION 2507 + "/vxml/" 2508 + key)); 2509 } 2510 VxmlFactory = 2511 SimpleFactory.make( 2512 fileList.toArray(new File[fileList.size()]), ".*"); 2513 } 2514 2515 String localeID = cldrFileToFilter.getLocaleID(); 2516 2517 CLDRFile vxmlCommonMainFile; 2518 try { 2519 vxmlCommonMainFile = VxmlFactory.make(localeID, false); 2520 } catch (Exception e) { 2521 System.out.println( 2522 "#ERROR: VXML file not found for " 2523 + localeID 2524 + " in " 2525 + fileList); 2526 return; 2527 } 2528 CLDRFile resolved = cldrFileToFilter; 2529 2530 if (!cldrFileToFilter.isResolved()) { 2531 resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 2532 } 2533 2534 for (String xpath : vxmlCommonMainFile) { 2535 String vxmlValue = vxmlCommonMainFile.getStringValue(xpath); 2536 if (vxmlValue == null) { 2537 continue; 2538 } 2539 if (!CldrUtility.INHERITANCE_MARKER.equals(vxmlValue)) { 2540 continue; 2541 } 2542 2543 String trunkValue = resolved.getStringValue(xpath); 2544 if (trunkValue != null) { 2545 String baileyValue = resolved.getBaileyValue(xpath, null, null); 2546 if (!trunkValue.equals(baileyValue)) { 2547 continue; 2548 } 2549 } 2550 // at this point, the vxmlValue is ^^^ and the trunk value is either 2551 // null or == baileyValue 2552 String fullPath = 2553 resolved.getFullXPath(xpath); // get the draft status, etc. 2554 if (fullPath == null) { // debugging 2555 fullPath = vxmlCommonMainFile.getFullXPath(xpath); 2556 if (fullPath == null) { 2557 throw new ICUException( 2558 "getFullXPath not working for " 2559 + localeID 2560 + ", " 2561 + xpath); 2562 } 2563 } 2564 add( 2565 fullPath, 2566 vxmlValue, 2567 "Add or replace by " + CldrUtility.INHERITANCE_MARKER); 2568 } 2569 } 2570 2571 @Override 2572 public void handlePath(String xpath) { 2573 // Everything done in handleStart 2574 } 2575 }); 2576 2577 fixList.add( 2578 'L', 2579 "fix logical groups by adding all the bailey values", 2580 new CLDRFilter() { 2581 Set<String> seen = new HashSet<>(); 2582 CLDRFile resolved; 2583 boolean skip; 2584 CoverageLevel2 coverageLeveler; 2585 2586 @Override 2587 public void handleStart() { 2588 seen.clear(); 2589 resolved = getResolved(); 2590 skip = false; 2591 coverageLeveler = null; 2592 2593 String localeID = cldrFileToFilter.getLocaleID(); 2594 LanguageTagParser ltp = new LanguageTagParser().set(localeID); 2595 if (!ltp.getRegion().isEmpty() || !ltp.getVariants().isEmpty()) { 2596 skip = true; 2597 } else { 2598 coverageLeveler = CoverageLevel2.getInstance(localeID); 2599 } 2600 } 2601 2602 @Override 2603 public void handlePath(String xpath) { 2604 if (skip 2605 || seen.contains(xpath) 2606 || coverageLeveler.getLevel(xpath) == Level.COMPREHENSIVE) { 2607 return; 2608 } 2609 Set<String> paths = LogicalGrouping.getPaths(cldrFileToFilter, xpath); 2610 if (paths == null || paths.size() < 2) { 2611 return; 2612 } 2613 Set<String> needed = new LinkedHashSet<>(); 2614 for (String path2 : paths) { 2615 if (path2.equals(xpath)) { 2616 continue; 2617 } 2618 if (cldrFileToFilter.isHere(path2)) { 2619 continue; 2620 } 2621 if (LogicalGrouping.isOptional(cldrFileToFilter, path2)) { 2622 continue; 2623 } 2624 // ok, we have a path missing a value 2625 needed.add(path2); 2626 } 2627 if (needed.isEmpty()) { 2628 return; 2629 } 2630 // we need at least one value 2631 2632 // flesh out by adding a bailey value 2633 // TODO resolve the draft status in a better way 2634 // For now, get the lowest draft status, and we'll reset everything to that. 2635 2636 DraftStatus worstStatus = 2637 DraftStatus.contributed; // don't ever add an approved. 2638 for (String path2 : paths) { 2639 XPathParts parts = XPathParts.getFrozenInstance(path2); 2640 String rawStatus = parts.getAttributeValue(-1, "draft"); 2641 if (rawStatus == null) { 2642 continue; 2643 } 2644 DraftStatus df = DraftStatus.forString(rawStatus); 2645 if (df.compareTo(worstStatus) < 0) { 2646 worstStatus = df; 2647 } 2648 } 2649 2650 for (String path2 : paths) { 2651 String fullPath = resolved.getFullXPath(path2); 2652 String value = resolved.getStringValue(path2); 2653 if (LogicalGrouping.isOptional(cldrFileToFilter, path2) 2654 && !cldrFileToFilter.isHere(path2)) { 2655 continue; 2656 } 2657 2658 XPathParts fullparts = 2659 XPathParts.getFrozenInstance(fullPath) 2660 .cloneAsThawed(); // not frozen, for setAttribute 2661 fullparts.setAttribute(-1, "draft", worstStatus.toString()); 2662 replace( 2663 fullPath, 2664 fullparts.toString(), 2665 value, 2666 "Fleshing out bailey to " + worstStatus); 2667 } 2668 seen.addAll(paths); 2669 } 2670 }); 2671 2672 // 'R' = Revert to baseline version under certain conditions 2673 fixList.add( 2674 'R', 2675 "Revert under certain conditions", 2676 new CLDRFilter() { 2677 // vxmlDir needs to be the "plain" (without post-processing) path of an existing 2678 // copy of common/main 2679 // For example, vetdata-2023-01-23-plain-dropfalse ... see 2680 // https://github.com/unicode-org/cldr/pull/2659 2681 // Also ldml.dtd is required -- and should already have been created by ST when 2682 // generating vxml 2683 private final String vxmlDir = "../vetdata-2023-01-23-plain-dropfalse/vxml/"; 2684 private Factory vxmlFactory = null; 2685 private CLDRFile vxmlFile = null; 2686 private CLDRFile baselineFileUnresolved = null; 2687 private CLDRFile baselineFileResolved = null; 2688 private File[] list = null; 2689 2690 @Override 2691 public void handleSetup() { 2692 final String vxmlSubPath = 2693 vxmlDir + "common/" + new File(options[SOURCEDIR].value).getName(); 2694 // System.out.println(vxmlSubPath); 2695 list = new File[] {new File(vxmlSubPath)}; 2696 } 2697 2698 @Override 2699 public void handleStart() { 2700 if (vxmlFactory == null) { 2701 vxmlFactory = SimpleFactory.make(list, ".*"); 2702 if (!pathHasError( 2703 "zh_Hant", 2704 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]")) { 2705 throw new RuntimeException("pathHasError wrong?"); 2706 } 2707 } 2708 String localeID = cldrFileToFilter.getLocaleID(); 2709 if (cldrFileToFilter 2710 .isResolved()) { // true only if "-z" added to command line 2711 baselineFileResolved = cldrFileToFilter; 2712 baselineFileUnresolved = cldrFileToFilter.getUnresolved(); 2713 } else { // true unless "-z" added to command line 2714 baselineFileResolved = getResolved(); 2715 baselineFileUnresolved = cldrFileToFilter; 2716 } 2717 try { 2718 vxmlFile = vxmlFactory.make(localeID, false /* not resolved */); 2719 } catch (Exception e) { 2720 System.out.println("Skipping " + localeID + " due to " + e); 2721 vxmlFile = null; 2722 } 2723 } 2724 2725 @Override 2726 public void handlePath(String xpath) { 2727 boolean debugging = false; // xpath.contains("Ciudad_Juarez"); 2728 if (debugging) { 2729 System.out.println("handlePath: got Ciudad_Juarez"); 2730 } 2731 if (vxmlFile == null) { 2732 if (debugging) { 2733 System.out.println("handlePath: vxmlFile is null"); 2734 } 2735 return; // use baseline 2736 } 2737 String vxmlValue = vxmlFile.getStringValue(xpath); 2738 if (vxmlValue == null) { 2739 throw new RuntimeException( 2740 this.getLocaleID() + ":" + xpath + ": vxmlValue == null"); 2741 } 2742 if (!wantRevertToBaseline(xpath, vxmlValue)) { 2743 if (debugging) { 2744 System.out.println("handlePath: wantRevertToBaseline false"); 2745 } 2746 String fullXPath = vxmlFile.getFullXPath(xpath); 2747 replace(fullXPath, fullXPath, vxmlValue); 2748 } else { 2749 if (debugging) { 2750 System.out.println("handlePath: wantRevertToBaseline true"); 2751 } 2752 } 2753 } 2754 2755 private boolean wantRevertToBaseline(String xpath, String vxmlValue) { 2756 String localeID = cldrFileToFilter.getLocaleID(); 2757 boolean debugging = false; // xpath.contains("Ciudad_Juarez"); 2758 // boolean deb = 2759 // "//ldml/dates/timeZoneNames/zone[@type=\"America/Ciudad_Juarez\"]/exemplarCity".equals(xpath); 2760 // boolean deb = ("ru".equals(localeID) && 2761 // "//ldml/dates/timeZoneNames/zone[@type=\"America/Ciudad_Juarez\"]/exemplarCity".equals(xpath)); 2762 if (debugging) { 2763 System.out.println("wantRevertToBaseline: got Ciudad_Juarez"); 2764 } 2765 String fullXPath = vxmlFile.getFullXPath(xpath); 2766 if (!changesWereAllowed(localeID, xpath, fullXPath)) { 2767 // criterion 2: if Survey Tool did NOT allow changes in the locale/path 2768 // in v43, MUST revert to baseline 2769 if (debugging) { 2770 System.out.println( 2771 "wantRevertToBaseline: return true since changes not allowed"); 2772 } 2773 return true; 2774 } 2775 if (!CldrUtility.INHERITANCE_MARKER.equals(vxmlValue)) { 2776 // criterion zero: if vxml value is not ↑↑↑, don't revert to baseline 2777 if (debugging) { 2778 System.out.println("wantRevertToBaseline: return for 0"); 2779 } 2780 return false; 2781 } 2782 // String baselineValue = baselineFileResolved.getStringValue(xpath); 2783 String baselineValue = baselineFileUnresolved.getStringValue(xpath); 2784 if (baselineValue == null 2785 || CldrUtility.INHERITANCE_MARKER.equals(baselineValue)) { 2786 // criterion 1: if baseline value is not a hard value, don't revert to 2787 // baseline 2788 if (debugging) { 2789 System.out.println( 2790 "wantRevertToBaseline: return for 1; baselineValue = " 2791 + baselineValue); 2792 } 2793 return false; 2794 } 2795 Output<String> inheritancePathWhereFound = new Output<>(); 2796 Output<String> localeWhereFound = new Output<>(); 2797 baselineFileResolved.getBaileyValue( 2798 xpath, inheritancePathWhereFound, localeWhereFound); 2799 if (localeID.equals(localeWhereFound.value) 2800 || xpath.equals(inheritancePathWhereFound.value)) { 2801 // criterion 3: if bailey value is not from different path and locale, 2802 // don't revert to baseline 2803 if (debugging) { 2804 System.out.println( 2805 "wantRevertToBaseline: found at " 2806 + localeWhereFound.value 2807 + " " 2808 + inheritancePathWhereFound.value); 2809 System.out.println("wantRevertToBaseline: return for 3"); 2810 } 2811 return false; 2812 } 2813 if (debugging) { 2814 System.out.println("wantRevertToBaseline: return true"); 2815 } 2816 return true; 2817 } 2818 2819 private boolean changesWereAllowed( 2820 String localeID, String xpath, String fullXPath) { 2821 boolean isError = pathHasError(localeID, xpath); 2822 String oldValue = baselineFileUnresolved.getWinningValue(xpath); 2823 boolean isMissing = 2824 (oldValue == null 2825 || CLDRFile.DraftStatus.forXpath(fullXPath).ordinal() 2826 <= CLDRFile.DraftStatus.provisional.ordinal()); 2827 String locOrAncestor = localeID; 2828 while (!"root".equals(locOrAncestor)) { 2829 if (SubmissionLocales.allowEvenIfLimited( 2830 locOrAncestor, xpath, isError, isMissing)) { 2831 return true; 2832 } 2833 locOrAncestor = LocaleIDParser.getParent(locOrAncestor); 2834 } 2835 return false; 2836 } 2837 2838 /** 2839 * These were derived from all errors found running this command: java 2840 * -DCLDR_DIR=$(pwd) -jar tools/cldr-code/target/cldr-code.jar check -S 2841 * common,seed -e -z FINAL_TESTING >> org.unicode.cldr.test.ConsoleCheckCLDR 2842 * 2843 * <p>TODO: this is incomplete? Should include some "errors" that are not in 2844 * personNames?? 2845 */ 2846 private final String[] ERR_LOCALES_PATHS = 2847 new String[] { 2848 "ja", 2849 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", 2850 "nl_BE", 2851 "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname\"]", 2852 "yue", 2853 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", 2854 "yue", 2855 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", 2856 "yue", 2857 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", 2858 "zh", 2859 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", 2860 "zh", 2861 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", 2862 "zh", 2863 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", 2864 "zh_Hant", 2865 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", 2866 "zh_Hant", 2867 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", 2868 "zh_Hant", 2869 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", 2870 }; 2871 2872 private boolean pathHasError(String localeID, String xpath) { 2873 for (int i = 0; i < ERR_LOCALES_PATHS.length; i += 2) { 2874 String errLoc = ERR_LOCALES_PATHS[i]; 2875 String errPath = ERR_LOCALES_PATHS[i + 1]; 2876 if (localeID.equals(errLoc) && xpath.equals(errPath)) { 2877 return true; 2878 } 2879 } 2880 return false; 2881 } 2882 2883 @Override 2884 public void handleEnd() { 2885 // look for paths in vxmlFile that aren't in baselineFileUnresolved 2886 final Set<String> vPaths = new HashSet<>(); 2887 final Set<String> bPaths = new HashSet<>(); 2888 vxmlFile.getPaths("", null, vPaths); 2889 baselineFileUnresolved.getPaths("", null, bPaths); 2890 vPaths.removeAll(bPaths); 2891 for (final String dPath : vPaths) { 2892 // System.out.println(">!> " + dPath); 2893 final String fPath = vxmlFile.getFullXPath(dPath); 2894 add( 2895 fPath, 2896 vxmlFile.getWinningValue(fPath), 2897 "in vxmlFile, missing from baseline"); 2898 } 2899 } 2900 }); 2901 2902 fixList.add( 2903 'V', 2904 "Fix values that would inherit laterally", 2905 new CLDRFilter() { 2906 boolean skip = false; 2907 boolean isL1 = false; 2908 String parentId = null; 2909 CLDRFile parentFile = null; 2910 Set<String> pathsHandled = new HashSet<>(); 2911 String onlyValues = null; 2912 String message = null; 2913 2914 @Override 2915 public void handleStart() { 2916 // skip if the locale is root. 2917 skip = getLocaleID().equals(XMLSource.ROOT_ID); 2918 if (!skip) { 2919 parentId = LocaleIDParser.getParent(getLocaleID()); 2920 // This locale is "L1" (level one) if its parent is root. 2921 isL1 = parentId.equals(XMLSource.ROOT_ID); 2922 parentFile = null; // lazy evaluate 2923 } 2924 pathsHandled.clear(); 2925 onlyValues = CldrUtility.INHERITANCE_MARKER; 2926 message = "fix ↑↑↑ lateral"; 2927 } 2928 2929 @Override 2930 public void handlePath(String xpath) { 2931 if (skip) { 2932 return; 2933 } 2934 String value = cldrFileToFilter.getStringValue(xpath); 2935 if (!Objects.equals(onlyValues, value)) { 2936 return; 2937 } 2938 2939 // remember which paths we handle, so we can skip them in handleEnd 2940 pathsHandled.add(xpath); 2941 2942 Output<String> pathWhereFound = new Output<>(); 2943 Output<String> localeWhereFound = new Output<>(); 2944 String baileyValue = 2945 getResolved() 2946 .getBaileyValue(xpath, pathWhereFound, localeWhereFound); 2947 if (baileyValue != null 2948 && !xpath.equals(pathWhereFound.value) 2949 && !GlossonymConstructor.PSEUDO_PATH.equals(pathWhereFound.value)) { 2950 2951 // we have lateral inheritance, so we decide whether to harden. 2952 2953 boolean harden = false; 2954 String message2 = ""; 2955 2956 // if we are L1, then we make a hard value, to protect higher values 2957 2958 if (isL1) { 2959 harden = true; 2960 message2 = "; L1"; 2961 } else { 2962 // for all others, we check to see if the parent's lateral value is 2963 // the same as ours 2964 // If it is, we are ok, since one of that parent's parents will be 2965 // hardened 2966 2967 if (parentFile == null) { 2968 parentFile = factory.make(parentId, true); 2969 } 2970 String parentValue = parentFile.getStringValueWithBailey(xpath); 2971 if (!baileyValue.equals(parentValue)) { 2972 harden = true; // true if parentValue == null, see comment below 2973 } 2974 message2 = "; L2+"; 2975 2976 // Problem case: the parent value is null (not inheritance marker) 2977 // but the child value is ^^^. 2978 // See if we need to fix that. 2979 // Currently harden is true if parentValue is null, which, as of 2980 // 2023-09-20, happens here for only two paths, both in locale 2981 // en_AU: 2982 // //ldml/dates/calendars/calendar[@type="islamic"]/dateTimeFormats/availableFormats/dateFormatItem[@id="yMEd"] 2983 // //ldml/dates/calendars/calendar[@type="islamic"]/dateTimeFormats/availableFormats/dateFormatItem[@id="yMd"] 2984 } 2985 if (harden) { 2986 String fullPath = cldrFileToFilter.getFullXPath(xpath); 2987 replace(fullPath, fullPath, baileyValue, message + message2); 2988 } 2989 } 2990 } 2991 2992 @Override 2993 public void handleEnd() { 2994 if (skip || isL1) { 2995 return; 2996 } 2997 // Handle all the null cases that are in the L1 value. 2998 onlyValues = null; 2999 message = "fix null lateral"; 3000 3001 List<String> parentChain = LocaleIDParser.getParentChain(getLocaleID()); 3002 String localeL1 = 3003 parentChain.get(parentChain.size() - 2); // get last before root 3004 CLDRFile fileL1 = factory.make(localeL1, false); // only unresolved paths 3005 for (String path : fileL1) { 3006 if (!pathsHandled.contains(path)) { 3007 handlePath(path); 3008 } 3009 } 3010 } 3011 }); 3012 3013 fixList.add( 3014 'D', 3015 "Downgrade paths", 3016 new CLDRFilter() { 3017 3018 boolean skipLocale = false; 3019 3020 @Override 3021 public void handleStart() { 3022 // TODO Auto-generated method stub 3023 super.handleSetup(); 3024 String locale = getLocaleID(); 3025 skipLocale = 3026 locale.equals("en") 3027 || locale.equals("root") 3028 || !DowngradePaths.lookingAt(locale); 3029 } 3030 3031 @Override 3032 public void handlePath(String xpath) { 3033 if (skipLocale) { // fast path 3034 return; 3035 } 3036 String value = cldrFileToFilter.getStringValue(xpath); 3037 if (!DowngradePaths.lookingAt(getLocaleID(), xpath, value)) { 3038 return; 3039 } 3040 String fullPath = cldrFileToFilter.getFullXPath(xpath); 3041 XPathParts fullParts = XPathParts.getFrozenInstance(fullPath); 3042 String oldDraft = fullParts.getAttributeValue(-1, "draft"); 3043 if (oldDraft != null) { 3044 DraftStatus oldDraftEnum = DraftStatus.forString(oldDraft); 3045 if (oldDraftEnum == DraftStatus.provisional 3046 || oldDraftEnum == DraftStatus.unconfirmed) { 3047 return; 3048 } 3049 } 3050 fullParts = fullParts.cloneAsThawed(); 3051 fullParts.setAttribute(-1, "draft", "provisional"); 3052 replace(fullPath, fullParts.toString(), value, "Downgrade to provisional"); 3053 } 3054 }); 3055 3056 fixList.add( 3057 'G', 3058 "upGrade basic paths to contributed", 3059 new CLDRFilter() { 3060 3061 // boolean skipLocale = false; 3062 CoverageLevel2 coverageLeveler; 3063 final CLDRFile.DraftStatus TARGET_STATUS = DraftStatus.contributed; 3064 final Level TARGET_LEVEL = Level.BASIC; 3065 3066 @Override 3067 public void handleStart() { 3068 super.handleSetup(); 3069 String locale = getLocaleID(); 3070 // skipLocale = false; 3071 final CLDRConfig config = CLDRConfig.getInstance(); 3072 coverageLeveler = 3073 CoverageLevel2.getInstance( 3074 config.getSupplementalDataInfo(), locale); 3075 } 3076 3077 @Override 3078 public void handlePath(String xpath) { 3079 // if (skipLocale) { // fast path 3080 // return; 3081 // } 3082 if (!TARGET_LEVEL.isAtLeast(coverageLeveler.getLevel(xpath))) { 3083 return; // skip 3084 } 3085 String fullPath = cldrFileToFilter.getFullXPath(xpath); 3086 final CLDRFile.DraftStatus oldDraft = 3087 CLDRFile.DraftStatus.forXpath(fullPath); 3088 if (oldDraft.compareTo(TARGET_STATUS) > 0) { 3089 return; // already at contributed or better 3090 } 3091 // Now we need the value 3092 final String value = cldrFileToFilter.getStringValue(xpath); 3093 final String newPath = TARGET_STATUS.updateXPath(fullPath); 3094 replace(fullPath, newPath, value, "Upgrade to " + TARGET_STATUS.name()); 3095 } 3096 }); 3097 3098 fixList.add( 3099 'Z', 3100 "Zero lateral: convert inheritance marker to specific value if inheritance would be lateral/problematic", 3101 new CLDRFilter() { 3102 @Override 3103 public void handlePath(String xpath) { 3104 String value = cldrFileToFilter.getStringValue(xpath); 3105 if (!CldrUtility.INHERITANCE_MARKER.equals(value)) { 3106 return; 3107 } 3108 String newValue = 3109 VoteResolver.reviseInheritanceAsNeeded(xpath, value, getResolved()); 3110 if (value.equals(newValue)) { 3111 return; 3112 } 3113 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 3114 replace(fullXPath, fullXPath, newValue); 3115 } 3116 }); 3117 } 3118 getLast2Dirs(File sourceDir1)3119 public static String getLast2Dirs(File sourceDir1) { 3120 String[] pathElements = sourceDir1.toString().split("/"); 3121 return pathElements[pathElements.length - 2] 3122 + "/" 3123 + pathElements[pathElements.length - 1] 3124 + "/"; 3125 } 3126 3127 // references="http://www.stat.fi/tk/tt/luokitukset/lk/kieli_02.html" 3128 3129 private static class ValuePair { 3130 String value; 3131 String fullxpath; 3132 } 3133 3134 /** 3135 * Find the set of xpaths that (a) have all the same values (if present) in the children (b) are 3136 * absent in the parent, (c) are different than what is in the fully resolved parent and add 3137 * them. 3138 */ fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements)3139 static void fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements) { 3140 String key = k.getLocaleID(); 3141 if (key.equals("root")) return; 3142 Set<String> availableChildren = cldrFactory.getAvailableWithParent(key, true); 3143 if (availableChildren.size() == 0) return; 3144 Set<String> skipPaths = new HashSet<>(); 3145 Map<String, ValuePair> haveSameValues = new TreeMap<>(); 3146 CLDRFile resolvedFile = cldrFactory.make(key, true); 3147 // get only those paths that are not in "root" 3148 resolvedFile.forEach(skipPaths::add); 3149 3150 // first, collect all the paths 3151 for (String locale : availableChildren) { 3152 if (locale.indexOf("POSIX") >= 0) continue; 3153 CLDRFile item = cldrFactory.make(locale, false); 3154 for (String xpath : item) { 3155 if (skipPaths.contains(xpath)) continue; 3156 // skip certain elements 3157 if (xpath.indexOf("/identity") >= 0) continue; 3158 if (xpath.startsWith("//ldml/numbers/currencies/currency")) continue; 3159 if (xpath.startsWith("//ldml/dates/timeZoneNames/metazone[")) continue; 3160 if (xpath.indexOf("[@alt") >= 0) continue; 3161 if (xpath.indexOf("/alias") >= 0) continue; 3162 3163 // must be string vale 3164 ValuePair v1 = new ValuePair(); 3165 v1.value = item.getStringValue(xpath); 3166 v1.fullxpath = item.getFullXPath(xpath); 3167 3168 ValuePair vAlready = haveSameValues.get(xpath); 3169 if (vAlready == null) { 3170 haveSameValues.put(xpath, v1); 3171 } else if (!v1.value.equals(vAlready.value) 3172 || !v1.fullxpath.equals(vAlready.fullxpath)) { 3173 skipPaths.add(xpath); 3174 haveSameValues.remove(xpath); 3175 } 3176 } 3177 } 3178 // at this point, haveSameValues is all kosher, so add items 3179 for (String xpath : haveSameValues.keySet()) { 3180 ValuePair v = haveSameValues.get(xpath); 3181 // if (v.value.equals(resolvedFile.getStringValue(xpath)) 3182 // && v.fullxpath.equals(resolvedFile.getFullXPath(xpath))) continue; 3183 replacements.add(v.fullxpath, v.value); 3184 } 3185 } 3186 fixAltProposed()3187 static void fixAltProposed() { 3188 throw new IllegalArgumentException(); 3189 } 3190 3191 /** Perform various fixes TODO add options to pick which one. */ fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory)3192 private static void fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory) { 3193 3194 // TODO before modifying, make sure that it is fully resolved. 3195 // then minimize against the NEW parents 3196 3197 Set<String> removal = new TreeSet<>(k.getComparator()); 3198 CLDRFile replacements = SimpleFactory.makeFile("temp"); 3199 fixList.setFile(k, inputOptions, cldrFactory, removal, replacements); 3200 3201 for (String xpath : k) { 3202 fixList.handlePath(xpath); 3203 } 3204 fixList.handleEnd(); 3205 3206 // remove bad attributes 3207 3208 if (inputOptions.indexOf('v') >= 0) { 3209 CLDRTest.checkAttributeValidity(k, null, removal); 3210 } 3211 3212 // raise identical elements 3213 3214 if (inputOptions.indexOf('i') >= 0) { 3215 fixIdenticalChildren(cldrFactory, k, replacements); 3216 } 3217 3218 // now do the actions we collected 3219 3220 if (SHOW_DETAILS) { 3221 if (removal.size() != 0 || !replacements.isEmpty()) { 3222 if (!removal.isEmpty()) { 3223 System.out.println("Removals:"); 3224 for (String path : removal) { 3225 System.out.println(path + " =\t " + k.getStringValue(path)); 3226 } 3227 } 3228 if (!replacements.isEmpty()) { 3229 System.out.println("Additions/Replacements:"); 3230 System.out.println(replacements.toString().replaceAll("\u00A0", "<NBSP>")); 3231 } 3232 } 3233 } 3234 if (removal.size() != 0) { 3235 k.removeAll(removal, COMMENT_REMOVALS); 3236 } 3237 k.putAll(replacements, CLDRFile.MERGE_REPLACE_MINE); 3238 } 3239 3240 /** 3241 * How many steps from root is the given locale? 3242 * 3243 * @param origLoc 3244 * @return the number of steps; e.g., 0 for "root", -1 for "code-fallback", 1 for "fr", 2 for 3245 * "fr_CA", ... 3246 */ stepsFromRoot(String origLoc)3247 private static int stepsFromRoot(String origLoc) { 3248 int steps = 0; 3249 String loc = origLoc; 3250 while (!LocaleNames.ROOT.equals(loc)) { 3251 loc = LocaleIDParser.getParent(loc); 3252 if (loc == null) { 3253 throw new IllegalArgumentException("Missing root in inheritance chain"); 3254 } 3255 ++steps; 3256 } 3257 System.out.println("stepsFromRoot = " + steps + " for " + origLoc); 3258 return steps; 3259 } 3260 3261 /** Internal */ testJavaSemantics()3262 public static void testJavaSemantics() { 3263 Collator caseInsensitive = Collator.getInstance(ULocale.ROOT); 3264 caseInsensitive.setStrength(Collator.SECONDARY); 3265 Set<String> setWithCaseInsensitive = new TreeSet<>(caseInsensitive); 3266 setWithCaseInsensitive.addAll(Arrays.asList(new String[] {"a", "b", "c"})); 3267 Set<String> plainSet = new TreeSet<>(); 3268 plainSet.addAll(Arrays.asList(new String[] {"a", "b", "B"})); 3269 System.out.println("S1 equals S2?\t" + setWithCaseInsensitive.equals(plainSet)); 3270 System.out.println("S2 equals S1?\t" + plainSet.equals(setWithCaseInsensitive)); 3271 setWithCaseInsensitive.removeAll(plainSet); 3272 System.out.println("S1 removeAll S2 is empty?\t" + setWithCaseInsensitive.isEmpty()); 3273 } 3274 } 3275