1 package org.unicode.cldr.tool; 2 3 import com.google.common.collect.HashMultimap; 4 import com.google.common.collect.ImmutableListMultimap; 5 import com.google.common.collect.ImmutableSet; 6 import com.google.common.collect.ImmutableSortedSet; 7 import com.google.common.collect.Multimap; 8 import com.google.common.collect.Sets; 9 import com.google.common.io.Files; 10 import com.ibm.icu.util.ICUUncheckedIOException; 11 import com.ibm.icu.util.Output; 12 import java.io.File; 13 import java.io.FileNotFoundException; 14 import java.io.IOException; 15 import java.io.PrintWriter; 16 import java.util.Arrays; 17 import java.util.Collection; 18 import java.util.HashSet; 19 import java.util.LinkedHashSet; 20 import java.util.Map; 21 import java.util.Objects; 22 import java.util.Set; 23 import java.util.TreeMap; 24 import java.util.TreeSet; 25 import java.util.regex.Matcher; 26 import java.util.regex.Pattern; 27 import org.unicode.cldr.tool.Option.Options; 28 import org.unicode.cldr.tool.Option.Params; 29 import org.unicode.cldr.util.AnnotationUtil; 30 import org.unicode.cldr.util.CLDRConfig; 31 import org.unicode.cldr.util.CLDRFile; 32 import org.unicode.cldr.util.CLDRPaths; 33 import org.unicode.cldr.util.CldrUtility; 34 import org.unicode.cldr.util.DtdType; 35 import org.unicode.cldr.util.Factory; 36 import org.unicode.cldr.util.GlossonymConstructor; 37 import org.unicode.cldr.util.InternalCldrException; 38 import org.unicode.cldr.util.Level; 39 import org.unicode.cldr.util.LocaleIDParser; 40 import org.unicode.cldr.util.LocaleNames; 41 import org.unicode.cldr.util.LogicalGrouping; 42 import org.unicode.cldr.util.SupplementalDataInfo; 43 import org.unicode.cldr.util.SupplementalDataInfo.ParentLocaleComponent; 44 import org.unicode.cldr.util.XMLSource; 45 import org.unicode.cldr.util.XPathParts; 46 47 public class GenerateProductionData { 48 private static boolean DEBUG = false; 49 private static boolean VERBOSE = false; 50 private static Matcher FILE_MATCH = null; 51 52 private static String SOURCE_COMMON_DIR = null; 53 private static String DEST_COMMON_DIR = null; 54 55 private static boolean ADD_LOGICAL_GROUPS = false; 56 private static boolean ADD_DATETIME = false; 57 private static boolean ADD_SIDEWAYS = false; 58 private static boolean ADD_ROOT = false; 59 private static boolean INCLUDE_COMPREHENSIVE = false; 60 private static boolean CONSTRAINED_RESTORATION = false; 61 62 private static final Set<String> NON_XML = 63 ImmutableSet.of("dtd", "properties", "testData", "uca"); 64 private static final Set<String> COPY_ANYWAY = 65 ImmutableSet.of( 66 "casing", 67 "collation"); // don't want to "clean up", makes format difficult to use 68 private static final SupplementalDataInfo SDI = 69 CLDRConfig.getInstance().getSupplementalDataInfo(); 70 71 enum MyOptions { 72 sourceDirectory( 73 new Params() 74 .setHelp("source common directory") 75 .setDefault(CLDRPaths.COMMON_DIRECTORY) 76 .setMatch(".*")), 77 destinationDirectory( 78 new Params() 79 .setHelp("destination common directory") 80 .setDefault(CLDRPaths.STAGING_DIRECTORY + "production/common") 81 .setMatch(".*")), 82 logicalGroups( 83 new Params() 84 .setHelp("add path/values for logical groups") 85 .setDefault("true") 86 .setMatch("true|false")), 87 time( 88 new Params() 89 .setHelp("add path/values for stock date/time/datetime") 90 .setDefault("true") 91 .setMatch("true|false")), 92 Sideways( 93 new Params() 94 .setHelp("add path/values for sideways inheritance") 95 .setDefault("true") 96 .setMatch("true|false")), 97 root( 98 new Params() 99 .setHelp("add path/values for root and code-fallback") 100 .setDefault("true") 101 .setMatch("true|false")), 102 constrainedRestoration( 103 new Params() 104 .setHelp("only add inherited paths that were in original file") 105 .setDefault("true") 106 .setMatch("true|false")), 107 includeComprehensive( 108 new Params() 109 .setHelp("exclude comprehensive paths — otherwise just to modern level") 110 .setDefault("true") 111 .setMatch("true|false")), 112 verbose(new Params().setHelp("verbose debugging messages")), 113 Debug(new Params().setHelp("debug")), 114 fileMatch(new Params().setHelp("regex to match patterns").setMatch(".*")), 115 ; 116 117 // BOILERPLATE TO COPY 118 final Option option; 119 MyOptions(Params params)120 private MyOptions(Params params) { 121 option = new Option(this, params); 122 } 123 124 private static Options myOptions = new Options(); 125 126 static { 127 for (MyOptions option : MyOptions.values()) { myOptions.add(option, option.option)128 myOptions.add(option, option.option); 129 } 130 } 131 parse(String[] args, boolean showArguments)132 private static Set<String> parse(String[] args, boolean showArguments) { 133 return myOptions.parse(MyOptions.values()[0], args, true); 134 } 135 } 136 main(String[] args)137 public static void main(String[] args) { 138 // TODO rbnf and segments don't have modern coverage; fix there. 139 140 MyOptions.parse(args, true); 141 SOURCE_COMMON_DIR = MyOptions.sourceDirectory.option.getValue(); 142 DEST_COMMON_DIR = MyOptions.destinationDirectory.option.getValue(); 143 144 // debugging 145 VERBOSE = MyOptions.verbose.option.doesOccur(); 146 DEBUG = MyOptions.Debug.option.doesOccur(); 147 String fileMatch = MyOptions.fileMatch.option.getValue(); 148 if (fileMatch != null) { 149 FILE_MATCH = Pattern.compile(fileMatch).matcher(""); 150 } 151 152 // controls for minimization 153 ADD_LOGICAL_GROUPS = "true".equalsIgnoreCase(MyOptions.logicalGroups.option.getValue()); 154 ADD_DATETIME = "true".equalsIgnoreCase(MyOptions.time.option.getValue()); 155 ADD_SIDEWAYS = "true".equalsIgnoreCase(MyOptions.Sideways.option.getValue()); 156 ADD_ROOT = "true".equalsIgnoreCase(MyOptions.root.option.getValue()); 157 158 // constraints 159 INCLUDE_COMPREHENSIVE = 160 "true".equalsIgnoreCase(MyOptions.includeComprehensive.option.getValue()); 161 CONSTRAINED_RESTORATION = 162 "true".equalsIgnoreCase(MyOptions.constrainedRestoration.option.getValue()); 163 164 // get directories 165 166 Map<File, File> specialDirectories = new TreeMap<>(); 167 168 Arrays.asList(DtdType.values()) 169 // .parallelStream() 170 // .unordered() 171 .forEach( 172 type -> { 173 boolean isLdmlDtdType = type == DtdType.ldml; 174 175 // bit of a hack, using the ldmlICU — otherwise unused! — to get the 176 // nonXML files. 177 Set<String> directories = 178 (type == DtdType.ldmlICU) ? NON_XML : type.directories; 179 180 for (String dir : directories) { 181 File sourceDir = new File(SOURCE_COMMON_DIR, dir); 182 File destinationDir = new File(DEST_COMMON_DIR, dir); 183 Stats stats = new Stats(); 184 copyFilesAndReturnIsEmpty( 185 sourceDir, destinationDir, null, isLdmlDtdType, stats); 186 if (directoryIsSpecial(sourceDir.getAbsolutePath())) { 187 specialDirectories.put(sourceDir, destinationDir); 188 } 189 } 190 }); 191 192 for (File source : specialDirectories.keySet()) { 193 File dest = specialDirectories.get(source); 194 doubleCheckSpecialPaths(source, dest); 195 } 196 } 197 198 private static class Stats { 199 long files; 200 long removed; 201 long retained; 202 long remaining; 203 clear()204 Stats clear() { 205 files = removed = retained = remaining = 0; 206 return this; 207 } 208 209 @Override toString()210 public String toString() { 211 return "files=" 212 + files 213 + (removed + retained + remaining == 0 214 ? "" 215 : "; removed=" 216 + removed 217 + "; retained=" 218 + retained 219 + "; remaining=" 220 + remaining); 221 } 222 showNonZero(String label)223 public void showNonZero(String label) { 224 if (removed + retained + remaining != 0) { 225 System.out.println(label + toString()); 226 } 227 } 228 } 229 230 /** 231 * Copy files in directories, recursively. 232 * 233 * @param sourceFile 234 * @param destinationFile 235 * @param factory 236 * @param isLdmlDtdType 237 * @param stats 238 * @return true if the file is an ldml file with empty content. 239 */ copyFilesAndReturnIsEmpty( File sourceFile, File destinationFile, Factory factory, boolean isLdmlDtdType, final Stats stats)240 private static boolean copyFilesAndReturnIsEmpty( 241 File sourceFile, 242 File destinationFile, 243 Factory factory, 244 boolean isLdmlDtdType, 245 final Stats stats) { 246 if (sourceFile.isDirectory()) { 247 248 System.out.println(sourceFile + " => " + destinationFile); 249 if (!destinationFile.mkdirs()) { 250 // if created, remove old contents 251 Arrays.stream(destinationFile.listFiles()).forEach(File::delete); 252 } 253 254 Set<String> sorted = new TreeSet<>(); 255 sorted.addAll(Arrays.asList(sourceFile.list())); 256 257 if (COPY_ANYWAY.contains(sourceFile.getName())) { // special cases 258 isLdmlDtdType = false; 259 } 260 // reset factory for directory 261 factory = null; 262 if (isLdmlDtdType) { 263 // if the factory is empty, then we just copy files 264 factory = Factory.make(sourceFile.toString(), ".*"); 265 } 266 boolean isMainDir = factory != null && sourceFile.getName().contentEquals("main"); 267 boolean isRbnfDir = factory != null && sourceFile.getName().contentEquals("rbnf"); 268 boolean isAnnotationsDir = 269 factory != null && sourceFile.getName().startsWith("annotations"); 270 271 Set<String> emptyLocales = new HashSet<>(); 272 final Stats stats2 = new Stats(); 273 final Factory theFactory = factory; 274 final boolean isLdmlDtdType2 = isLdmlDtdType; 275 sorted 276 // .parallelStream() 277 .forEach( 278 file -> { 279 File sourceFile2 = new File(sourceFile, file); 280 File destinationFile2 = new File(destinationFile, file); 281 if (VERBOSE) System.out.println("\t" + file); 282 283 // special step to just copy certain files like main/root.xml file 284 Factory currFactory = theFactory; 285 if (isMainDir) { 286 if (file.equals("root.xml")) { 287 currFactory = null; 288 } 289 } else if (isRbnfDir) { 290 currFactory = null; 291 } 292 293 // when the currFactory is null, we just copy files as-is 294 boolean isEmpty = 295 copyFilesAndReturnIsEmpty( 296 sourceFile2, 297 destinationFile2, 298 currFactory, 299 isLdmlDtdType2, 300 stats2); 301 if (isEmpty) { // only happens for ldml 302 emptyLocales.add(getLocaleIdFromFileName(file)); 303 } 304 }); 305 stats2.showNonZero("\tTOTAL:\t"); 306 // if there are empty ldml files, AND we aren't in /main/, 307 // then remove any without children 308 if (!emptyLocales.isEmpty() && !isMainDir) { 309 Set<String> childless = 310 getChildless(emptyLocales, factory.getAvailable(), isAnnotationsDir); 311 if (!childless.isEmpty()) { 312 if (VERBOSE) 313 System.out.println( 314 "\t" + destinationFile + "\tRemoving empty locales:" + childless); 315 childless.stream() 316 .forEach(locale -> new File(destinationFile, locale + ".xml").delete()); 317 } 318 } 319 return false; 320 } else if (factory != null) { 321 String file = sourceFile.getName(); 322 if (!file.endsWith(".xml")) { 323 return false; 324 } 325 String localeId = getLocaleIdFromFileName(file); 326 if (FILE_MATCH != null) { 327 if (!FILE_MATCH.reset(localeId).matches()) { 328 return false; 329 } 330 } 331 boolean isRoot = localeId.equals(LocaleNames.ROOT); 332 333 CLDRFile cldrFileUnresolved = factory.make(localeId, false); 334 CLDRFile cldrFileResolved = factory.make(localeId, true); 335 boolean gotOne = false; 336 Set<String> toRemove = new TreeSet<>(); // TreeSet just makes debugging easier 337 Set<String> toRetain = new TreeSet<>(); 338 Set<String> toRetainSpecial = new TreeSet<>(); 339 Output<String> pathWhereFound = new Output<>(); 340 Output<String> localeWhereFound = new Output<>(); 341 342 final boolean specialPathsAreRequired = 343 areSpecialPathsRequired(localeId, sourceFile.toString()); 344 345 String debugPath = 346 "//ldml/localeDisplayNames/languages/language[@type=\"en_US\"]"; // "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"power-kilowatt\"]/displayName"; 347 boolean debugLocale = localeId.equals("pt"); 348 349 ImmutableSet<String> sortedPaths = 350 ImmutableSortedSet.copyOf(cldrFileUnresolved); // sort for debugging 351 352 for (String xpath : sortedPaths) { 353 if (xpath.startsWith("//ldml/identity")) { 354 continue; 355 } 356 if (debugPath != null && debugLocale && xpath.startsWith(debugPath)) { 357 int debug = 0; 358 } 359 360 String value = cldrFileUnresolved.getStringValue(xpath); 361 if (value == null || CldrUtility.INHERITANCE_MARKER.equals(value)) { 362 toRemove.add(xpath); 363 continue; 364 } 365 366 // special-case the root values that are only for Survey Tool use 367 368 if (isRoot) { 369 if (AnnotationUtil.pathIsAnnotation(xpath)) { 370 toRemove.add(xpath); 371 continue; 372 } 373 } 374 375 if (specialPathsAreRequired && pathIsSpecial(xpath)) { 376 toRetainSpecial.add(xpath); 377 } 378 379 // Remove items that are the same as their bailey values. 380 // However, two optional parameters change what happens 381 // if ADD_SIDEWAYS is true, then we check for paths equal (condidtionally, see the 382 // method doc) 383 // if ADD_ROOT is true, then we check for the found locale being root 384 385 String bailey = 386 cldrFileResolved.getBaileyValue(xpath, pathWhereFound, localeWhereFound); 387 if (value.equals(bailey) 388 && (!ADD_SIDEWAYS 389 || pathEqualsOrIsOkAltVariantOf( 390 cldrFileResolved, 391 xpath, 392 pathWhereFound.value, 393 localeId, 394 localeWhereFound.value)) 395 && (!ADD_ROOT 396 || (!Objects.equals(XMLSource.ROOT_ID, localeWhereFound.value) 397 && !Objects.equals( 398 XMLSource.CODE_FALLBACK_ID, 399 localeWhereFound.value)))) { 400 toRemove.add(xpath); 401 continue; 402 } 403 404 // remove level=comprehensive (under setting) 405 406 if (!INCLUDE_COMPREHENSIVE) { 407 Level coverage = SDI.getCoverageLevel(xpath, localeId); 408 if (coverage == Level.COMPREHENSIVE) { 409 toRemove.add(xpath); 410 continue; 411 } 412 } 413 414 // if we got all the way to here, we have a non-empty result 415 416 // check to see if we might need to flesh out logical groups 417 // TODO Should be done in the converter tool!! 418 if (ADD_LOGICAL_GROUPS && !LogicalGrouping.isOptional(cldrFileResolved, xpath)) { 419 Set<String> paths = LogicalGrouping.getPaths(cldrFileResolved, xpath); 420 if (paths != null && paths.size() > 1) { 421 for (String possiblePath : paths) { 422 // Unclear from API whether we need to do this filtering 423 if (!LogicalGrouping.isOptional(cldrFileResolved, possiblePath)) { 424 toRetain.add(possiblePath); 425 } 426 } 427 } 428 } 429 430 // check to see if we might need to flesh out datetime. 431 // TODO Should be done in the converter tool!! 432 if (ADD_DATETIME && isDateTimePath(xpath)) { 433 toRetain.addAll(dateTimePaths(xpath)); 434 } 435 436 // past the gauntlet 437 gotOne = true; 438 } 439 if (specialPathsAreRequired) { 440 addSpecialPathsIfMissing(toRetainSpecial); 441 } 442 443 // we even add empty files, but can delete them back on the directory level. 444 try (PrintWriter pw = new PrintWriter(destinationFile)) { 445 CLDRFile outCldrFile = cldrFileUnresolved.cloneAsThawed(); 446 447 // Remove paths, but pull out the ones to retain 448 // example: 449 // toRemove == {a b c} // c may have ^^^ value 450 // toRetain == {b c d} // d may have ^^^ value 451 452 if (DEBUG) { 453 showIfNonZero(localeId, "removing", toRemove); 454 showIfNonZero(localeId, "retaining", toRetain); 455 showIfNonZero(localeId, "retaining for special paths", toRetainSpecial); 456 } 457 if (CONSTRAINED_RESTORATION) { 458 toRetain.retainAll(toRemove); // only add paths that were there already 459 // toRetain == {b c} 460 if (DEBUG) { 461 showIfNonZero(localeId, "constrained retaining", toRetain); 462 } 463 } 464 // add "special" paths even if CONSTRAINED_RESTORATION 465 toRetain.addAll(toRetainSpecial); 466 467 boolean changed0 = toRemove.removeAll(toRetain); 468 // toRemove == {a} 469 if (DEBUG && changed0) { 470 showIfNonZero(localeId, "final removing", toRemove); 471 } 472 473 boolean changed = toRetain.removeAll(toRemove); 474 // toRetain = {b c d} or if constrained, {b c} 475 if (DEBUG && changed) { 476 showIfNonZero(localeId, "final retaining", toRetain); 477 } 478 479 outCldrFile.removeAll(toRemove, false); 480 if (DEBUG) { 481 for (String xpath : toRemove) { 482 System.out.println( 483 localeId 484 + ": removing: «" 485 + cldrFileUnresolved.getStringValue(xpath) 486 + "», " 487 + xpath); 488 } 489 } 490 491 // now set any null values to bailey values if not present 492 for (String xpath : toRetain) { 493 if (debugPath != null 494 && localeId.equals(debugLocale) 495 && xpath.equals(debugPath)) { 496 int debug = 0; 497 } 498 String value = cldrFileResolved.getStringValue(xpath); 499 if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) { 500 throw new IllegalArgumentException( 501 localeId + ": " + value + " in value for " + xpath); 502 } else { 503 if (DEBUG) { 504 String oldValue = cldrFileUnresolved.getStringValue(xpath); 505 System.out.println( 506 "Restoring: «" + oldValue + "» ⇒ «" + value + "»\t" + xpath); 507 } 508 outCldrFile.add(xpath, value); 509 } 510 } 511 512 // double-check results 513 int count = 0; 514 for (String xpath : outCldrFile) { 515 if (debugPath != null 516 && localeId.equals(debugLocale) 517 && xpath.equals(debugPath)) { 518 int debug = 0; 519 } 520 String value = outCldrFile.getStringValue(xpath); 521 if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) { 522 throw new IllegalArgumentException( 523 localeId + ": " + value + " in value for " + xpath); 524 } 525 } 526 527 outCldrFile.write(pw); 528 ++stats.files; 529 stats.removed += toRemove.size(); 530 stats.retained += toRetain.size(); 531 stats.remaining += count; 532 } catch (FileNotFoundException e) { 533 throw new ICUUncheckedIOException( 534 "Can't copy " + sourceFile + " to " + destinationFile + " — ", e); 535 } 536 return !gotOne; 537 } else { 538 if (FILE_MATCH != null) { 539 String file = sourceFile.getName(); 540 int dotPos = file.lastIndexOf('.'); 541 String baseName = dotPos >= 0 ? file.substring(0, file.length() - dotPos) : file; 542 if (!FILE_MATCH.reset(baseName).matches()) { 543 return false; 544 } 545 } 546 // for now, just copy 547 ++stats.files; 548 copyFiles(sourceFile, destinationFile); 549 return false; 550 } 551 } 552 getLocaleIdFromFileName(String file)553 private static String getLocaleIdFromFileName(String file) { 554 return file.substring(0, file.length() - 4); // drop ".xml" 555 } 556 557 /** 558 * Are any "special paths" required to be explicitly included for this locale, in this 559 * directory? 560 * 561 * <p>Currently this requirement applies only to Arabic defaultNumberingSystem, only in 562 * common/main 563 * 564 * @param localeId the locale ID such as "ar_KM" 565 * @param directory the string describing the source directory; currently only "common/main" has 566 * special paths 567 * @return true if required, else false 568 */ areSpecialPathsRequired(String localeId, String directory)569 private static boolean areSpecialPathsRequired(String localeId, String directory) { 570 571 return localeIsSpecial(localeId) && directoryIsSpecial(directory); 572 } 573 directoryIsSpecial(String directory)574 private static boolean directoryIsSpecial(String directory) { 575 return directory.contains("common/main"); 576 } 577 localeIsSpecial(String localeId)578 private static boolean localeIsSpecial(String localeId) { 579 return localeId.equals("ar") || (localeId.startsWith("ar_") && !"ar_001".equals(localeId)); 580 } 581 582 private static final String[] SPECIAL_PATHS = 583 new String[] { 584 "//ldml/numbers/defaultNumberingSystem", 585 "//ldml/numbers/defaultNumberingSystem[@alt=\"latn\"]" 586 }; 587 private static final Set<String> SPECIAL_PATH_SET = new TreeSet<>(Arrays.asList(SPECIAL_PATHS)); 588 589 /** 590 * Is the given path a "special path" required to be explicitly included? 591 * 592 * @param xpath the path 593 * @return true if this particular path is required, else false 594 */ pathIsSpecial(String xpath)595 private static boolean pathIsSpecial(String xpath) { 596 return SPECIAL_PATH_SET.contains(xpath); 597 } 598 addSpecialPathsIfMissing(Set<String> toRetainSpecial)599 private static void addSpecialPathsIfMissing(Set<String> toRetainSpecial) { 600 for (String xpath : SPECIAL_PATH_SET) { 601 if (!toRetainSpecial.contains(xpath)) { 602 toRetainSpecial.add(xpath); 603 } 604 } 605 } 606 607 /** 608 * Confirm that a file (in the destination) is present for each "special" locale (in the 609 * source(), and that each such destination file contains all the required "special" paths 610 * 611 * @param sourceDir a directory like ".../common/main" 612 * @param destDir a directory 613 */ doubleCheckSpecialPaths(File sourceDir, File destDir)614 private static void doubleCheckSpecialPaths(File sourceDir, File destDir) { 615 Set<String> sorted = new TreeSet<>(); 616 sorted.addAll(Arrays.asList(sourceDir.list())); 617 Factory factory = Factory.make(destDir.toString(), ".*"); 618 sorted.forEach( 619 file -> { 620 doubleCheckLocale(sourceDir, destDir, file, factory); 621 }); 622 } 623 doubleCheckLocale( File sourceDir, File destDir, String file, Factory factory)624 private static void doubleCheckLocale( 625 File sourceDir, File destDir, String file, Factory factory) { 626 if (!file.endsWith(".xml")) { 627 return; 628 } 629 String localeId = getLocaleIdFromFileName(file); 630 if (!localeIsSpecial(localeId)) { 631 return; 632 } 633 File destFile = new File(destDir, file); 634 if (!destFile.exists()) { 635 throw new InternalCldrException("doubleCheckLocale FILE NOT FOUND: " + destFile); 636 } 637 // Note: factory.make will fail here unless ../common/dtd/ldml.dtd exists in relation to the 638 // destination folder 639 CLDRFile cldrFileUnresolved = factory.make(localeId, false); 640 for (String xpath : SPECIAL_PATH_SET) { 641 String value = cldrFileUnresolved.getStringValue(xpath); 642 if (value == null) { 643 throw new InternalCldrException( 644 "Locale " + localeId + " missing required special path " + xpath); 645 } 646 if (CldrUtility.INHERITANCE_MARKER.equals(value)) { 647 throw new InternalCldrException( 648 "Locale " 649 + localeId 650 + " has INHERITANCE_MARKER for required special path " 651 + xpath); 652 } 653 } 654 } 655 showIfNonZero(String localeId, String title, Set<String> toRemove)656 private static void showIfNonZero(String localeId, String title, Set<String> toRemove) { 657 if (toRemove.size() != 0) { 658 System.out.println(localeId + ": " + title + ": " + toRemove.size()); 659 } 660 } 661 662 /** 663 * Exceptions for generating production data, because the results would not pass 664 * CompareResolved. 665 */ 666 static final Multimap<String, String> LOCALE_TO_PATH_EXCEPTIONS = 667 ImmutableListMultimap.<String, String>builder() 668 .put( 669 "oc_ES", 670 "//ldml/localeDisplayNames/territories/territory[@type=\"HK\"][@alt=\"short\"]") 671 .put( 672 "zh_Hant_MO", 673 "//ldml/localeDisplayNames/languages/language[@type=\"yue\"][@alt=\"menu\"]") 674 .put( 675 "zh_Hant_MO", 676 "//ldml/localeDisplayNames/territories/territory[@type=\"CI\"][@alt=\"variant\"]") 677 .put( 678 "zh_Hant_HK", 679 "//ldml/localeDisplayNames/languages/language[@type=\"yue\"][@alt=\"menu\"]") 680 .put( 681 "zh_Hant_HK", 682 "//ldml/localeDisplayNames/territories/territory[@type=\"CI\"][@alt=\"variant\"]") 683 .put( 684 "ru_BY", 685 "//ldml/numbers/currencies/currency[@type=\"RUR\"]/symbol[@alt=\"narrow\"]") 686 .put( 687 "oc_ES", 688 "//ldml/localeDisplayNames/territories/territory[@type=\"HK\"][@alt=\"short\"]") 689 .put( 690 "el_POLYTON", 691 "//ldml/localeDisplayNames/territories/territory[@type=\"CI\"][@alt=\"variant\"]") 692 .put( 693 "be_TARASK", 694 "//ldml/localeDisplayNames/languages/language[@type=\"az\"][@alt=\"short\"]") 695 .build(); 696 697 /** 698 * Check if a path is equal, or if it is a suitable alt variant If it returns true, the value 699 * will be removed; false will retain it. 700 */ pathEqualsOrIsOkAltVariantOf( CLDRFile cldrFileResolved, String desiredPath, String foundPath, String localeId, String foundLocaleId)701 private static boolean pathEqualsOrIsOkAltVariantOf( 702 CLDRFile cldrFileResolved, 703 String desiredPath, 704 String foundPath, 705 String localeId, 706 String foundLocaleId) { 707 if (LOCALE_TO_PATH_EXCEPTIONS.containsEntry(localeId, desiredPath)) { 708 return false; 709 } 710 /* 711 * Protect against bad case, such as: 712 * 713 * pt //ldml/localeDisplayNames/languages/language[@type="en_US"] ↑↑↑ (= inglês americano) 714 * pt //ldml/localeDisplayNames/languages/language[@type="en_US"][@alt="short"] inglês (EUA) 715 * 716 * pt_AO //ldml/localeDisplayNames/languages/language[@type="en_US"][@alt="short"] inglês (EUA) 717 * 718 * When processing pt, its short value disappears, because its value = lateral inherited (constructed) value from pt 719 * When processing pt_AO, its short value is also removed, because it is the same as the pt 720 * But then when it is constructed, its value = 721 * 722 */ 723 if (desiredPath.equals(foundPath)) { 724 // TODO for a full fix, we need to check that the foundLocaleId/foundPath will not 725 // disappear when it is processed. 726 // For now, we are using the LOCALE_TO_PATH_EXCEPTIONS. 727 return true; 728 } 729 if (!foundLocaleId.equals( 730 localeId)) { // extra condition on alt values; has to be found in the same locale 731 return false; 732 } 733 if (desiredPath.contains("type=\"en_GB\"") && desiredPath.contains("alt=")) { 734 int debug = 0; 735 } 736 if (foundPath == null || foundPath.equals(GlossonymConstructor.PSEUDO_PATH)) { 737 if (!LocaleIDParser.isL1(localeId)) { 738 return true; 739 } 740 // We can do this, because the bailey value has already been checked. 741 // Since it isn't null, a null or PSEUDO_PATH indicates a constructed alt value. 742 return false; 743 } 744 XPathParts desiredPathParts = XPathParts.getFrozenInstance(desiredPath); 745 XPathParts foundPathParts = XPathParts.getFrozenInstance(foundPath); 746 if (desiredPathParts.size() != foundPathParts.size()) { 747 return false; 748 } 749 for (int e = 0; e < desiredPathParts.size(); ++e) { 750 String element1 = desiredPathParts.getElement(e); 751 String element2 = foundPathParts.getElement(e); 752 if (!element1.equals(element2)) { 753 return false; 754 } 755 Map<String, String> attr1 = desiredPathParts.getAttributes(e); 756 Map<String, String> attr2 = foundPathParts.getAttributes(e); 757 if (attr1.equals(attr2)) { 758 continue; 759 } 760 Set<String> keys1 = attr1.keySet(); 761 Set<String> keys2 = attr2.keySet(); 762 for (String attr : Sets.union(keys1, keys2)) { 763 if (attr.equals("alt")) { 764 continue; 765 } 766 if (!Objects.equals(attr1.get(attr), attr2.get(attr))) { 767 return false; 768 } 769 } 770 } 771 return true; 772 } 773 isDateTimePath(String xpath)774 private static boolean isDateTimePath(String xpath) { 775 return xpath.startsWith("//ldml/dates/calendars/calendar") 776 && xpath.contains("FormatLength[@type="); 777 } 778 779 /** 780 * generate full dateTimePaths from any element 781 * //ldml/dates/calendars/calendar[@type="gregorian"]/dateFormats/dateFormatLength[@type=".*"]/dateFormat[@type="standard"]/pattern[@type="standard"] 782 * //ldml/dates/calendars/calendar[@type="gregorian"]/timeFormats/timeFormatLength[@type=".*"]/timeFormat[@type="standard"]/pattern[@type="standard"] 783 * //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/dateTimeFormatLength[@type=".*"]/dateTimeFormat[@type="standard"]/pattern[@type="standard"] 784 */ dateTimePaths(String xpath)785 private static Set<String> dateTimePaths(String xpath) { 786 LinkedHashSet<String> result = new LinkedHashSet<>(); 787 String prefix = xpath.substring(0, xpath.indexOf(']') + 2); // get after ]/ 788 for (String type : Arrays.asList("date", "time", "dateTime")) { 789 String pattern = 790 prefix 791 + "$XFormats/$XFormatLength[@type=\"$Y\"]/$XFormat[@type=\"standard\"]/pattern[@type=\"standard\"]" 792 .replace("$X", type); 793 for (String width : Arrays.asList("full", "long", "medium", "short")) { 794 result.add(pattern.replace("$Y", width)); 795 } 796 } 797 return result; 798 } 799 getChildless( Set<String> emptyLocales, Set<String> available, boolean isAnnotationsDir)800 private static Set<String> getChildless( 801 Set<String> emptyLocales, Set<String> available, boolean isAnnotationsDir) { 802 // first build the parent2child map 803 Multimap<String, String> parent2child = HashMultimap.create(); 804 for (String locale : available) { 805 String parent = LocaleIDParser.getParent(locale); 806 if (parent != null) { 807 parent2child.put(parent, locale); 808 } 809 if (isAnnotationsDir) { 810 String simpleParent = 811 LocaleIDParser.getParent(locale, ParentLocaleComponent.collations); 812 if (simpleParent != null && (parent == null || simpleParent != parent)) { 813 parent2child.put(simpleParent, locale); 814 } 815 } 816 } 817 818 // now cycle through the empties 819 Set<String> result = new HashSet<>(); 820 for (String empty : emptyLocales) { 821 if (allChildrenAreEmpty(empty, emptyLocales, parent2child)) { 822 result.add(empty); 823 } 824 } 825 return result; 826 } 827 828 /** 829 * Recursively checks that all children are empty (including that there are no children) 830 * 831 * @param locale 832 * @param emptyLocales 833 * @param parent2child 834 * @return 835 */ allChildrenAreEmpty( String locale, Set<String> emptyLocales, Multimap<String, String> parent2child)836 private static boolean allChildrenAreEmpty( 837 String locale, Set<String> emptyLocales, Multimap<String, String> parent2child) { 838 839 Collection<String> children = parent2child.get(locale); 840 for (String child : children) { 841 if (!emptyLocales.contains(child)) { 842 return false; 843 } 844 if (!allChildrenAreEmpty(child, emptyLocales, parent2child)) { 845 return false; 846 } 847 } 848 return true; 849 } 850 copyFiles(File sourceFile, File destinationFile)851 private static void copyFiles(File sourceFile, File destinationFile) { 852 try { 853 Files.copy(sourceFile, destinationFile); 854 } catch (IOException e) { 855 System.err.println("Can't copy " + sourceFile + " to " + destinationFile + " — " + e); 856 } 857 } 858 } 859