1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.FileNotFoundException; 5 import java.io.IOException; 6 import java.io.PrintWriter; 7 import java.io.UncheckedIOException; 8 import java.util.Arrays; 9 import java.util.Collection; 10 import java.util.HashSet; 11 import java.util.LinkedHashSet; 12 import java.util.Map; 13 import java.util.Map.Entry; 14 import java.util.Objects; 15 import java.util.Set; 16 import java.util.TreeSet; 17 import java.util.regex.Matcher; 18 import java.util.regex.Pattern; 19 20 import org.unicode.cldr.tool.Option.Options; 21 import org.unicode.cldr.tool.Option.Params; 22 import org.unicode.cldr.util.CLDRConfig; 23 import org.unicode.cldr.util.CLDRFile; 24 import org.unicode.cldr.util.CLDRPaths; 25 import org.unicode.cldr.util.CldrUtility; 26 import org.unicode.cldr.util.DtdType; 27 import org.unicode.cldr.util.Factory; 28 import org.unicode.cldr.util.Level; 29 import org.unicode.cldr.util.LocaleIDParser; 30 import org.unicode.cldr.util.LogicalGrouping; 31 import org.unicode.cldr.util.Pair; 32 import org.unicode.cldr.util.SupplementalDataInfo; 33 import org.unicode.cldr.util.XMLSource; 34 import org.unicode.cldr.util.XPathParts; 35 36 import com.google.common.collect.HashMultimap; 37 import com.google.common.collect.ImmutableSet; 38 import com.google.common.collect.Multimap; 39 import com.google.common.collect.Sets; 40 import com.google.common.collect.TreeMultimap; 41 import com.google.common.io.Files; 42 import com.ibm.icu.util.Output; 43 44 public class GenerateProductionData { 45 static boolean DEBUG = false; 46 static boolean VERBOSE = false; 47 static Matcher FILE_MATCH = null; 48 49 static String SOURCE_COMMON_DIR = null; 50 static String DEST_COMMON_DIR = null; 51 52 static boolean ADD_LOGICAL_GROUPS = false; 53 static boolean ADD_DATETIME = false; 54 static boolean ADD_SIDEWAYS = false; 55 static boolean ADD_ROOT = false; 56 static boolean INCLUDE_COMPREHENSIVE = false; 57 static boolean CONSTRAINED_RESTORATION = false; 58 59 static final Set<String> NON_XML = ImmutableSet.of("dtd", "properties", "testData", "uca"); 60 static final Set<String> COPY_ANYWAY = ImmutableSet.of("casing", "collation"); // don't want to "clean up", makes format difficult to use 61 static final SupplementalDataInfo SDI = CLDRConfig.getInstance().getSupplementalDataInfo(); 62 63 static final Multimap<String, Pair<String, String>> localeToSubdivisionsToMigrate = TreeMultimap.create(); 64 65 enum MyOptions { 66 sourceDirectory(new Params() 67 .setHelp("source common directory") 68 .setDefault(CLDRPaths.COMMON_DIRECTORY) 69 .setMatch(".*")), 70 destinationDirectory(new Params() 71 .setHelp("destination common directory") 72 .setDefault(CLDRPaths.STAGING_DIRECTORY + "production/common") 73 .setMatch(".*")), 74 logicalGroups(new Params() 75 .setHelp("add path/values for logical groups") 76 .setDefault("true") 77 .setMatch("true|false")), 78 time(new Params() 79 .setHelp("add path/values for stock date/time/datetime") 80 .setDefault("true") 81 .setMatch("true|false")), 82 Sideways(new Params() 83 .setHelp("add path/values for sideways inheritance") 84 .setDefault("true") 85 .setMatch("true|false")), 86 root(new Params() 87 .setHelp("add path/values for root and code-fallback") 88 .setDefault("true") 89 .setMatch("true|false")), 90 constrainedRestoration(new Params() 91 .setHelp("only add inherited paths that were in original file") 92 .setDefault("true") 93 .setMatch("true|false")), 94 includeComprehensive(new Params() 95 .setHelp("exclude comprehensive paths — otherwise just to modern level") 96 .setDefault("true") 97 .setMatch("true|false")), 98 verbose(new Params() 99 .setHelp("verbose debugging messages")), 100 Debug(new Params() 101 .setHelp("debug")), 102 fileMatch(new Params() 103 .setHelp("regex to match patterns") 104 .setMatch(".*")), 105 ; 106 107 // BOILERPLATE TO COPY 108 final Option option; 109 MyOptions(Params params)110 private MyOptions(Params params) { 111 option = new Option(this, params); 112 } 113 114 private static Options myOptions = new Options(); 115 static { 116 for (MyOptions option : MyOptions.values()) { myOptions.add(option, option.option)117 myOptions.add(option, option.option); 118 } 119 } 120 parse(String[] args, boolean showArguments)121 private static Set<String> parse(String[] args, boolean showArguments) { 122 return myOptions.parse(MyOptions.values()[0], args, true); 123 } 124 } 125 main(String[] args)126 public static void main(String[] args) { 127 // TODO rbnf and segments don't have modern coverage; fix there. 128 129 MyOptions.parse(args, true); 130 SOURCE_COMMON_DIR = MyOptions.sourceDirectory.option.getValue(); 131 DEST_COMMON_DIR = MyOptions.destinationDirectory.option.getValue(); 132 133 // debugging 134 VERBOSE = MyOptions.verbose.option.doesOccur(); 135 DEBUG = MyOptions.Debug.option.doesOccur(); 136 String fileMatch = MyOptions.fileMatch.option.getValue(); 137 if (fileMatch != null) { 138 FILE_MATCH = Pattern.compile(fileMatch).matcher(""); 139 } 140 141 // controls for minimization 142 ADD_LOGICAL_GROUPS = "true".equalsIgnoreCase(MyOptions.logicalGroups.option.getValue()); 143 ADD_DATETIME = "true".equalsIgnoreCase(MyOptions.time.option.getValue()); 144 ADD_SIDEWAYS = "true".equalsIgnoreCase(MyOptions.Sideways.option.getValue()); 145 ADD_ROOT = "true".equalsIgnoreCase(MyOptions.root.option.getValue()); 146 147 // constraints 148 INCLUDE_COMPREHENSIVE = "true".equalsIgnoreCase(MyOptions.includeComprehensive.option.getValue()); 149 CONSTRAINED_RESTORATION = "true".equalsIgnoreCase(MyOptions.constrainedRestoration.option.getValue()); 150 151 // get directories 152 153 for (DtdType type : DtdType.values()) { 154 boolean isLdmlDtdType = type == DtdType.ldml; 155 156 // bit of a hack, using the ldmlICU — otherwise unused! — to get the nonXML files. 157 Set<String> directories = (type == DtdType.ldmlICU) ? NON_XML : type.directories; 158 159 for (String dir : directories) { 160 File sourceDir = new File(SOURCE_COMMON_DIR, dir); 161 File destinationDir = new File(DEST_COMMON_DIR, dir); 162 Stats stats = new Stats(); 163 copyFilesAndReturnIsEmpty(sourceDir, destinationDir, null, isLdmlDtdType, stats); 164 } 165 } 166 if (!localeToSubdivisionsToMigrate.isEmpty()) { 167 System.err.println("WARNING: Subdivision files not written"); 168 for (Entry<String, Pair<String, String>> entry : localeToSubdivisionsToMigrate.entries()) { 169 System.err.println(entry.getKey() + " \t" + entry.getValue()); 170 } 171 } 172 } 173 174 private static class Stats { 175 long files; 176 long removed; 177 long retained; 178 long remaining; clear()179 Stats clear() { 180 files = removed = retained = remaining = 0; 181 return this; 182 } 183 @Override toString()184 public String toString() { 185 return 186 "files=" + files 187 + (removed + retained + remaining == 0 ? "" 188 : "; removed=" + removed 189 + "; retained=" + retained 190 + "; remaining=" + remaining); 191 } showNonZero(String label)192 public void showNonZero(String label) { 193 if (removed + retained + remaining != 0) { 194 System.out.println(label + toString()); 195 } 196 } 197 } 198 199 /** 200 * Copy files in directories, recursively. 201 * @param sourceFile 202 * @param destinationFile 203 * @param factory 204 * @param isLdmlDtdType 205 * @param stats 206 * @param hasChildren 207 * @return true if the file is an ldml file with empty content. 208 */ copyFilesAndReturnIsEmpty(File sourceFile, File destinationFile, Factory factory, boolean isLdmlDtdType, Stats stats)209 private static boolean copyFilesAndReturnIsEmpty(File sourceFile, File destinationFile, 210 Factory factory, boolean isLdmlDtdType, Stats stats) { 211 if (sourceFile.isDirectory()) { 212 213 System.out.println(sourceFile + " => " + destinationFile); 214 if (!destinationFile.mkdirs()) { 215 // if created, remove old contents 216 Arrays.stream(destinationFile.listFiles()).forEach(File::delete); 217 } 218 219 Set<String> sorted = new TreeSet<>(); 220 sorted.addAll(Arrays.asList(sourceFile.list())); 221 222 if (COPY_ANYWAY.contains(sourceFile.getName())) { // special cases 223 isLdmlDtdType = false; 224 } 225 // reset factory for directory 226 factory = null; 227 if (isLdmlDtdType) { 228 // if the factory is empty, then we just copy files 229 factory = Factory.make(sourceFile.toString(), ".*"); 230 } 231 boolean isMainDir = factory != null && sourceFile.getName().contentEquals("main"); 232 boolean isRbnfDir = factory != null && sourceFile.getName().contentEquals("rbnf"); 233 234 Set<String> emptyLocales = new HashSet<>(); 235 stats = new Stats(); 236 for (String file : sorted) { 237 File sourceFile2 = new File(sourceFile, file); 238 File destinationFile2 = new File(destinationFile, file); 239 if (VERBOSE) System.out.println("\t" + file); 240 241 // special step to just copy certain files like main/root.xml file 242 Factory currFactory = factory; 243 if (isMainDir) { 244 if (file.equals("root.xml")) { 245 currFactory = null; 246 } 247 } else if (isRbnfDir) { 248 currFactory = null; 249 } 250 251 // when the currFactory is null, we just copy files as-is 252 boolean isEmpty = copyFilesAndReturnIsEmpty(sourceFile2, destinationFile2, currFactory, isLdmlDtdType, stats); 253 if (isEmpty) { // only happens for ldml 254 emptyLocales.add(file.substring(0,file.length()-4)); // remove .xml for localeId 255 } 256 } 257 stats.showNonZero("\tTOTAL:\t"); 258 // if there are empty ldml files, AND we aren't in /main/, 259 // then remove any without children 260 if (!emptyLocales.isEmpty() && !sourceFile.getName().equals("main")) { 261 Set<String> childless = getChildless(emptyLocales, factory.getAvailable()); 262 if (!childless.isEmpty()) { 263 if (VERBOSE) System.out.println("\t" + destinationFile + "\tRemoving empty locales:" + childless); 264 childless.stream().forEach(locale -> new File(destinationFile, locale + ".xml").delete()); 265 } 266 } 267 return false; 268 } else if (factory != null) { 269 String file = sourceFile.getName(); 270 if (!file.endsWith(".xml")) { 271 return false; 272 } 273 String localeId = file.substring(0, file.length()-4); 274 if (FILE_MATCH != null) { 275 if (!FILE_MATCH.reset(localeId).matches()) { 276 return false; 277 } 278 } 279 boolean isRoot = localeId.equals("root"); 280 String directoryName = sourceFile.getParentFile().getName(); 281 boolean isSubdivisionDirectory = "subdivisions".equals(directoryName); 282 283 CLDRFile cldrFileUnresolved = factory.make(localeId, false); 284 CLDRFile cldrFileResolved = factory.make(localeId, true); 285 boolean gotOne = false; 286 Set<String> toRemove = new TreeSet<>(); // TreeSet just makes debugging easier 287 Set<String> toRetain = new TreeSet<>(); 288 Output<String> pathWhereFound = new Output<>(); 289 Output<String> localeWhereFound = new Output<>(); 290 291 boolean isArabicSpecial = localeId.equals("ar") || localeId.startsWith("ar_"); 292 293 String debugPath = null; // "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"power-kilowatt\"]/displayName"; 294 String debugLocale = "af"; 295 296 for (String xpath : cldrFileUnresolved) { 297 if (xpath.startsWith("//ldml/identity")) { 298 continue; 299 } 300 if (debugPath != null && localeId.equals(debugLocale) && xpath.equals(debugPath)) { 301 int debug = 0; 302 } 303 304 String value = cldrFileUnresolved.getStringValue(xpath); 305 if (value == null || CldrUtility.INHERITANCE_MARKER.equals(value)) { 306 toRemove.add(xpath); 307 continue; 308 } 309 310 // special-case the root values that are only for Survey Tool use 311 312 if (isRoot) { 313 if (xpath.startsWith("//ldml/annotations/annotation")) { 314 toRemove.add(xpath); 315 continue; 316 } 317 } 318 319 // special case for Arabic defaultNumberingSystem 320 if (isArabicSpecial && xpath.contains("/defaultNumberingSystem")) { 321 toRetain.add(xpath); 322 } 323 324 // remove items that are the same as their bailey values. This also catches Inheritance Marker 325 326 String bailey = cldrFileResolved.getConstructedBaileyValue(xpath, pathWhereFound, localeWhereFound); 327 if (value.equals(bailey) 328 && (!ADD_SIDEWAYS 329 || pathEqualsOrIsAltVariantOf(xpath, pathWhereFound.value)) 330 && (!ADD_ROOT 331 || (!Objects.equals(XMLSource.ROOT_ID, localeWhereFound.value) 332 && !Objects.equals(XMLSource.CODE_FALLBACK_ID, localeWhereFound.value)))) { 333 toRemove.add(xpath); 334 continue; 335 } 336 337 // Move subdivisions elsewhere 338 if (!isSubdivisionDirectory && xpath.startsWith("//ldml/localeDisplayNames/subdivisions/subdivision")) { 339 localeToSubdivisionsToMigrate.put(localeId, Pair.of(xpath, value)); 340 toRemove.add(xpath); 341 continue; 342 } 343 // remove level=comprehensive (under setting) 344 345 if (!INCLUDE_COMPREHENSIVE) { 346 Level coverage = SDI.getCoverageLevel(xpath, localeId); 347 if (coverage == Level.COMPREHENSIVE) { 348 toRemove.add(xpath); 349 continue; 350 } 351 } 352 353 // if we got all the way to here, we have a non-empty result 354 355 // check to see if we might need to flesh out logical groups 356 // TODO Should be done in the converter tool!! 357 if (ADD_LOGICAL_GROUPS && !LogicalGrouping.isOptional(cldrFileResolved, xpath)) { 358 Set<String> paths = LogicalGrouping.getPaths(cldrFileResolved, xpath); 359 if (paths != null && paths.size() > 1) { 360 for (String possiblePath : paths) { 361 // Unclear from API whether we need to do this filtering 362 if (!LogicalGrouping.isOptional(cldrFileResolved, possiblePath)) { 363 toRetain.add(possiblePath); 364 } 365 } 366 } 367 } 368 369 // check to see if we might need to flesh out datetime. 370 // TODO Should be done in the converter tool!! 371 if (ADD_DATETIME && isDateTimePath(xpath)) { 372 toRetain.addAll(dateTimePaths(xpath)); 373 } 374 375 // past the gauntlet 376 gotOne = true; 377 } 378 379 // we even add empty files, but can delete them back on the directory level. 380 try (PrintWriter pw = new PrintWriter(destinationFile)) { 381 CLDRFile outCldrFile = cldrFileUnresolved.cloneAsThawed(); 382 if (isSubdivisionDirectory) { 383 Collection<Pair<String, String>> path_values = localeToSubdivisionsToMigrate.get(localeId); 384 if (path_values != null) { 385 for (Pair<String, String>path_value : path_values) { 386 outCldrFile.add(path_value.getFirst(), path_value.getSecond()); 387 } 388 localeToSubdivisionsToMigrate.removeAll(localeId); 389 } 390 } 391 392 // Remove paths, but pull out the ones to retain 393 // example: 394 // toRemove == {a b c} // c may have ^^^ value 395 // toRetain == {b c d} // d may have ^^^ value 396 397 if (DEBUG) { 398 showIfNonZero(localeId, "removing", toRemove); 399 showIfNonZero(localeId, "retaining", toRetain); 400 401 } 402 if (CONSTRAINED_RESTORATION) { 403 toRetain.retainAll(toRemove); // only add paths that were there already 404 // toRetain == {b c} 405 if (DEBUG) { 406 showIfNonZero(localeId, "constrained retaining", toRetain); 407 } 408 } 409 410 boolean changed0 = toRemove.removeAll(toRetain); 411 // toRemove == {a} 412 if (DEBUG && changed0) { 413 showIfNonZero(localeId, "final removing", toRemove); 414 } 415 416 boolean changed = toRetain.removeAll(toRemove); 417 // toRetain = {b c d} or if constrained, {b c} 418 if (DEBUG && changed) { 419 showIfNonZero(localeId, "final retaining", toRetain); 420 } 421 422 outCldrFile.removeAll(toRemove, false); 423 if (DEBUG) { 424 for (String xpath : toRemove) { 425 System.out.println(localeId + ": removing: «" 426 + cldrFileUnresolved.getStringValue(xpath) 427 + "», " + xpath); 428 } 429 } 430 431 // now set any null values to bailey values if not present 432 for (String xpath : toRetain) { 433 if (debugPath != null && localeId.equals(debugLocale) && xpath.equals(debugPath)) { 434 int debug = 0; 435 } 436 String value = cldrFileResolved.getStringValue(xpath); 437 if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) { 438 throw new IllegalArgumentException(localeId + ": " + value + " in value for " + xpath); 439 } else { 440 if (DEBUG) { 441 String oldValue = cldrFileUnresolved.getStringValue(xpath); 442 System.out.println("Restoring: «" + oldValue + "» ⇒ «" + value 443 + "»\t" + xpath); 444 } 445 outCldrFile.add(xpath, value); 446 } 447 } 448 449 // double-check results 450 int count = 0; 451 for (String xpath : outCldrFile) { 452 if (debugPath != null && localeId.equals(debugLocale) && xpath.equals(debugPath)) { 453 int debug = 0; 454 } 455 String value = outCldrFile.getStringValue(xpath); 456 if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) { 457 throw new IllegalArgumentException(localeId + ": " + value + " in value for " + xpath); 458 } 459 } 460 461 outCldrFile.write(pw); 462 ++stats.files; 463 stats.removed += toRemove.size(); 464 stats.retained += toRetain.size(); 465 stats.remaining += count; 466 } catch (FileNotFoundException e) { 467 throw new UncheckedIOException("Can't copy " + sourceFile + " to " + destinationFile + " — ", e); 468 } 469 return !gotOne; 470 } else { 471 if (FILE_MATCH != null) { 472 String file = sourceFile.getName(); 473 int dotPos = file.lastIndexOf('.'); 474 String baseName = dotPos >= 0 ? file.substring(0, file.length()-dotPos) : file; 475 if (!FILE_MATCH.reset(baseName).matches()) { 476 return false; 477 } 478 } 479 // for now, just copy 480 ++stats.files; 481 copyFiles(sourceFile, destinationFile); 482 return false; 483 } 484 } 485 showIfNonZero(String localeId, String title, Set<String> toRemove)486 private static void showIfNonZero(String localeId, String title, Set<String> toRemove) { 487 if (toRemove.size() != 0) { 488 System.out.println(localeId + ": " 489 + title 490 + ": " + toRemove.size()); 491 } 492 } 493 pathEqualsOrIsAltVariantOf(String desiredPath, String foundPath)494 private static boolean pathEqualsOrIsAltVariantOf(String desiredPath, String foundPath) { 495 if (desiredPath.equals(foundPath)) { 496 return true; 497 } 498 if (desiredPath.contains("type=\"en_GB\"") && desiredPath.contains("alt=")) { 499 int debug = 0; 500 } 501 if (foundPath == null) { 502 // We can do this, because the bailey value has already been checked 503 // Since it isn't null, a null indicates a constructed alt value 504 return true; 505 } 506 XPathParts desiredPathParts = XPathParts.getFrozenInstance(desiredPath); 507 XPathParts foundPathParts = XPathParts.getFrozenInstance(foundPath); 508 if (desiredPathParts.size() != foundPathParts.size()) { 509 return false; 510 } 511 for (int e = 0; e < desiredPathParts.size(); ++e) { 512 String element1 = desiredPathParts.getElement(e); 513 String element2 = foundPathParts.getElement(e); 514 if (!element1.equals(element2)) { 515 return false; 516 } 517 Map<String, String> attr1 = desiredPathParts.getAttributes(e); 518 Map<String, String> attr2 = foundPathParts.getAttributes(e); 519 if (attr1.equals(attr2)) { 520 continue; 521 } 522 Set<String> keys1 = attr1.keySet(); 523 Set<String> keys2 = attr2.keySet(); 524 for (String attr : Sets.union(keys1, keys2)) { 525 if (attr.equals("alt")) { 526 continue; 527 } 528 if (!Objects.equals(attr1.get(attr), attr2.get(attr))) { 529 return false; 530 } 531 } 532 } 533 return true; 534 } 535 isDateTimePath(String xpath)536 private static boolean isDateTimePath(String xpath) { 537 return xpath.startsWith("//ldml/dates/calendars/calendar") 538 && xpath.contains("FormatLength[@type="); 539 } 540 541 /** generate full dateTimePaths from any element 542 //ldml/dates/calendars/calendar[@type="gregorian"]/dateFormats/dateFormatLength[@type=".*"]/dateFormat[@type="standard"]/pattern[@type="standard"] 543 //ldml/dates/calendars/calendar[@type="gregorian"]/timeFormats/timeFormatLength[@type=".*"]/timeFormat[@type="standard"]/pattern[@type="standard"] 544 //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/dateTimeFormatLength[@type=".*"]/dateTimeFormat[@type="standard"]/pattern[@type="standard"] 545 */ dateTimePaths(String xpath)546 private static Set<String> dateTimePaths(String xpath) { 547 LinkedHashSet<String> result = new LinkedHashSet<>(); 548 String prefix = xpath.substring(0,xpath.indexOf(']') + 2); // get after ]/ 549 for (String type : Arrays.asList("date", "time", "dateTime")) { 550 String pattern = prefix + "$XFormats/$XFormatLength[@type=\"$Y\"]/$XFormat[@type=\"standard\"]/pattern[@type=\"standard\"]".replace("$X", type); 551 for (String width : Arrays.asList("full", "long", "medium", "short")) { 552 result.add(pattern.replace("$Y", width)); 553 } 554 } 555 return result; 556 } 557 getChildless(Set<String> emptyLocales, Set<String> available)558 private static Set<String> getChildless(Set<String> emptyLocales, Set<String> available) { 559 // first build the parent2child map 560 Multimap<String,String> parent2child = HashMultimap.create(); 561 for (String locale : available) { 562 String parent = LocaleIDParser.getParent(locale); 563 if (parent != null) { 564 parent2child.put(parent, locale); 565 } 566 } 567 568 // now cycle through the empties 569 Set<String> result = new HashSet<>(); 570 for (String empty : emptyLocales) { 571 if (allChildrenAreEmpty(empty, emptyLocales, parent2child)) { 572 result.add(empty); 573 } 574 } 575 return result; 576 } 577 578 /** 579 * Recursively checks that all children are empty (including that there are no children) 580 * @param name 581 * @param emptyLocales 582 * @param parent2child 583 * @return 584 */ allChildrenAreEmpty( String locale, Set<String> emptyLocales, Multimap<String, String> parent2child)585 private static boolean allChildrenAreEmpty( 586 String locale, 587 Set<String> emptyLocales, 588 Multimap<String, String> parent2child) { 589 590 Collection<String> children = parent2child.get(locale); 591 for (String child : children) { 592 if (!emptyLocales.contains(child)) { 593 return false; 594 } 595 if (!allChildrenAreEmpty(child, emptyLocales, parent2child)) { 596 return false; 597 } 598 } 599 return true; 600 } 601 copyFiles(File sourceFile, File destinationFile)602 private static void copyFiles(File sourceFile, File destinationFile) { 603 try { 604 Files.copy(sourceFile, destinationFile); 605 } catch (IOException e) { 606 System.err.println("Can't copy " + sourceFile + " to " + destinationFile + " — " + e); 607 } 608 } 609 } 610