1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.util.ArrayList; 7 import java.util.Arrays; 8 import java.util.Collection; 9 import java.util.Collections; 10 import java.util.HashSet; 11 import java.util.List; 12 import java.util.Map; 13 import java.util.Map.Entry; 14 import java.util.Objects; 15 import java.util.Set; 16 import java.util.TreeMap; 17 import java.util.TreeSet; 18 import java.util.regex.Matcher; 19 import java.util.regex.Pattern; 20 21 import org.unicode.cldr.draft.FileUtilities; 22 import org.unicode.cldr.test.DisplayAndInputProcessor; 23 import org.unicode.cldr.test.SubmissionLocales; 24 import org.unicode.cldr.tool.FormattedFileWriter.Anchors; 25 import org.unicode.cldr.tool.Option.Options; 26 import org.unicode.cldr.tool.Option.Params; 27 import org.unicode.cldr.util.CLDRConfig; 28 import org.unicode.cldr.util.CLDRFile; 29 import org.unicode.cldr.util.CLDRFile.Status; 30 import org.unicode.cldr.util.CLDRPaths; 31 import org.unicode.cldr.util.CldrUtility; 32 import org.unicode.cldr.util.Counter; 33 import org.unicode.cldr.util.DtdData; 34 import org.unicode.cldr.util.DtdType; 35 import org.unicode.cldr.util.Factory; 36 import org.unicode.cldr.util.LanguageTagParser; 37 import org.unicode.cldr.util.Level; 38 import org.unicode.cldr.util.LocaleIDParser; 39 import org.unicode.cldr.util.Organization; 40 import org.unicode.cldr.util.Pair; 41 import org.unicode.cldr.util.PathHeader; 42 import org.unicode.cldr.util.PathHeader.PageId; 43 import org.unicode.cldr.util.PathStarrer; 44 import org.unicode.cldr.util.PatternCache; 45 import org.unicode.cldr.util.SimpleXMLSource; 46 import org.unicode.cldr.util.StandardCodes; 47 import org.unicode.cldr.util.SupplementalDataInfo; 48 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo; 49 import org.unicode.cldr.util.TransliteratorUtilities; 50 import org.unicode.cldr.util.XMLFileReader; 51 import org.unicode.cldr.util.XPathParts; 52 53 import com.google.common.base.Joiner; 54 import com.google.common.base.Splitter; 55 import com.google.common.collect.Multimap; 56 import com.google.common.collect.TreeMultimap; 57 import com.ibm.icu.impl.Relation; 58 import com.ibm.icu.impl.Row.R2; 59 import com.ibm.icu.impl.Row.R3; 60 import com.ibm.icu.impl.Row.R4; 61 import com.ibm.icu.text.NumberFormat; 62 import com.ibm.icu.text.UnicodeSet; 63 import com.ibm.icu.util.ICUUncheckedIOException; 64 import com.ibm.icu.util.Output; 65 66 public class ChartDelta extends Chart { 67 private static final boolean verbose_skipping = false; 68 69 private static final String DEFAULT_DELTA_DIR_NAME = "delta"; 70 private static final String DEFAULT_CHURN_DIR_NAME = "churn"; 71 72 private static final boolean SKIP_REFORMAT_ANNOTATIONS = ToolConstants.PREV_CHART_VERSION.compareTo("30") >= 0; 73 74 private static final PageId DEBUG_PAGE_ID = PageId.DayPeriod; 75 76 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = CLDRConfig.getInstance().getSupplementalDataInfo(); 77 78 private enum MyOptions { 79 fileFilter(new Params().setHelp("filter files by dir/locale, eg: ^main/en$ or .*/en").setMatch(".*")), 80 orgFilter(new Params().setHelp("filter files by organization").setMatch(".*")), 81 Vxml(new Params().setHelp("use cldr-aux for the base directory")), 82 coverageFilter(new Params().setHelp("filter files by coverage").setMatch(".*")), 83 directory(new Params().setHelp("Set the output directory name").setDefault(DEFAULT_DELTA_DIR_NAME).setMatch(".*")), 84 verbose(new Params().setHelp("verbose debugging messages")), 85 highLevelOnly(new Params().setHelp("check high-level paths (churn) only").setFlag('H')), 86 ; 87 88 // BOILERPLATE TO COPY 89 final Option option; 90 MyOptions(Params params)91 private MyOptions(Params params) { 92 option = new Option(this, params); 93 } 94 95 private static Options myOptions = new Options(); 96 static { 97 for (MyOptions option : MyOptions.values()) { myOptions.add(option, option.option)98 myOptions.add(option, option.option); 99 } 100 } 101 parse(String[] args)102 private static Set<String> parse(String[] args) { 103 return myOptions.parse(MyOptions.values()[0], args, true); 104 } 105 } 106 107 private final Matcher fileFilter; 108 private final String dirName; // "delta" or "churn" or set as option 109 private final String chartNameCap; // capitalized, e.g., "Delta" or "Churn" 110 private final String DIR; // full path of output folder 111 private final Level minimumPathCoverage; 112 private final boolean verbose; 113 114 /** 115 * If true, check only high-level paths, i.e., paths for which any changes 116 * have high potential to cause disruptive "churn" 117 */ 118 private final boolean highLevelOnly; 119 main(String[] args)120 public static void main(String[] args) { 121 main(args, false); 122 } 123 main(String[] args, boolean highLevelOnly)124 public static void main(String[] args, boolean highLevelOnly) { 125 System.out.println("use -DCHART_VERSION=36.0 -DPREV_CHART_VERSION=34.0 to generate the differences between v36 and v34."); 126 MyOptions.parse(args); 127 Matcher fileFilter = !MyOptions.fileFilter.option.doesOccur() ? null : PatternCache.get(MyOptions.fileFilter.option.getValue()).matcher(""); 128 if (MyOptions.orgFilter.option.doesOccur()) { 129 if (MyOptions.fileFilter.option.doesOccur()) { 130 throw new IllegalArgumentException("Can't have both fileFilter and orgFilter"); 131 } 132 String rawOrg = MyOptions.orgFilter.option.getValue(); 133 Organization org = Organization.fromString(rawOrg); 134 Set<String> locales = StandardCodes.make().getLocaleCoverageLocales(org); 135 fileFilter = PatternCache.get("^(main|annotations)/(" + Joiner.on("|").join(locales) + ")$").matcher(""); 136 } 137 Level coverage = !MyOptions.coverageFilter.option.doesOccur() ? null : Level.fromString(MyOptions.coverageFilter.option.getValue()); 138 boolean verbose = MyOptions.verbose.option.doesOccur(); 139 if (MyOptions.highLevelOnly.option.doesOccur()) { 140 highLevelOnly = true; 141 } 142 String dirName = MyOptions.directory.option.getValue(); 143 if (highLevelOnly && DEFAULT_DELTA_DIR_NAME.equals(dirName)) { 144 System.out.println("For highLevelOnly, changing directory from " + DEFAULT_DELTA_DIR_NAME 145 + " to " + DEFAULT_CHURN_DIR_NAME); 146 dirName = DEFAULT_CHURN_DIR_NAME; 147 } 148 ChartDelta temp = new ChartDelta(fileFilter, coverage, dirName, verbose, highLevelOnly); 149 temp.writeChart(null); 150 temp.showTotals(); 151 if (highLevelOnly) { 152 HighLevelPaths.reportHighLevelPathUsage(); 153 } 154 System.out.println("Finished. Files may have been created in these directories:"); 155 System.out.println(temp.DIR); 156 System.out.println(getTsvDir(temp.DIR, temp.dirName)); 157 } 158 ChartDelta(Matcher fileFilter, Level coverage, String dirName, boolean verbose, boolean highLevelOnly)159 private ChartDelta(Matcher fileFilter, Level coverage, String dirName, boolean verbose, boolean highLevelOnly) { 160 this.fileFilter = fileFilter; 161 this.verbose = verbose; 162 this.highLevelOnly = highLevelOnly; 163 this.dirName = dirName; 164 this.chartNameCap = dirName.substring(0, 1).toUpperCase() + dirName.substring(1); 165 this.DIR = CLDRPaths.CHART_DIRECTORY + dirName; 166 this.minimumPathCoverage = coverage; 167 } 168 169 private static final String SEP = "\u0001"; 170 private static final boolean DEBUG = false; 171 private static final String DEBUG_FILE = null; // "windowsZones.xml"; 172 static Pattern fileMatcher = PatternCache.get(".*"); 173 174 static PathHeader.Factory phf = PathHeader.getFactory(ENGLISH); 175 static final Set<String> DONT_CARE = new HashSet<>(Arrays.asList("draft", "standard", "reference")); 176 177 @Override getDirectory()178 public String getDirectory() { 179 return DIR; 180 } 181 182 @Override getTitle()183 public String getTitle() { 184 return chartNameCap + " Charts"; 185 } 186 187 @Override getFileName()188 public String getFileName() { 189 return "index"; 190 } 191 192 @Override getExplanation()193 public String getExplanation() { 194 return "<p>Charts showing the differences from the last version. " 195 + "Titles prefixed by ¤ are special: either the locale data summary or supplemental data. " 196 + "Not all changed data is charted yet. For details see each chart.</p>"; 197 } 198 199 @Override writeContents(FormattedFileWriter pw)200 public void writeContents(FormattedFileWriter pw) throws IOException { 201 FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors(); 202 FileUtilities.copyFile(ChartDelta.class, "index.css", getDirectory()); 203 FormattedFileWriter.copyIncludeHtmls(getDirectory(), true); 204 counter.clear(); 205 fileCounters.clear(); 206 writeNonLdmlPlain(anchors); 207 writeLdml(anchors); 208 pw.setIndex("Main Chart Index", "../index.html"); 209 pw.write(anchors.toString()); 210 } 211 212 private static class PathHeaderSegment extends R3<PathHeader, Integer, String> { PathHeaderSegment(PathHeader b, int elementIndex, String attribute)213 public PathHeaderSegment(PathHeader b, int elementIndex, String attribute) { 214 super(b, elementIndex, attribute); 215 } 216 } 217 218 private static class PathDiff extends R4<PathHeaderSegment, String, String, String> { PathDiff(String locale, PathHeaderSegment pathHeaderSegment, String oldValue, String newValue)219 public PathDiff(String locale, PathHeaderSegment pathHeaderSegment, String oldValue, String newValue) { 220 super(pathHeaderSegment, locale, oldValue, newValue); 221 } 222 } 223 224 private static final CLDRFile EMPTY_CLDR = new CLDRFile(new SimpleXMLSource("und").freeze()); 225 226 private static final File CLDR_BASE_DIR = CLDRConfig.getInstance().getCldrBaseDirectory(); 227 228 private enum ChangeType { 229 added, deleted, changed, same; get(String oldValue, String currentValue)230 public static ChangeType get(String oldValue, String currentValue) { 231 return oldValue == null ? added 232 : currentValue == null ? deleted 233 : oldValue.equals(currentValue) ? same 234 : changed; 235 } 236 } 237 238 private Counter<ChangeType> counter = new Counter<>(); 239 private Map<String, Counter<ChangeType>> fileCounters = new TreeMap<>(); 240 private Set<String> badHeaders = new TreeSet<>(); 241 242 /** 243 * Add the count of changed items 244 */ addChange(String file, ChangeType changeType, int count)245 private void addChange(String file, ChangeType changeType, int count) { 246 counter.add(changeType, count); // unified add 247 Counter<ChangeType> fileCounter = fileCounters.get(file); 248 if (fileCounter == null) { 249 fileCounters.put(file, fileCounter = new Counter<>()); 250 } 251 fileCounter.add(changeType, count); 252 } 253 showTotals()254 private void showTotals() { 255 try (PrintWriter pw = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_summary.tsv")) { 256 // pw.println("# percentages are of *new* total"); 257 pw.print("# dir\tfile"); 258 for (ChangeType item : ChangeType.values()) { 259 pw.print("\t" + (item == ChangeType.same ? "total" : item.toString())); 260 } 261 pw.println(); 262 showTotal(pw, "TOTAL/", counter); 263 264 for (Entry<String, Counter<ChangeType>> entry : fileCounters.entrySet()) { 265 showTotal(pw, entry.getKey(), entry.getValue()); 266 } 267 for (String s : badHeaders) { 268 pw.println(s); 269 } 270 // pw.println("# EOF"); 271 } catch (IOException e) { 272 throw new ICUUncheckedIOException(e); 273 } 274 } 275 showTotal(PrintWriter pw, String title2, Counter<ChangeType> counter2)276 private void showTotal(PrintWriter pw, String title2, Counter<ChangeType> counter2) { 277 long total = counter2.getTotal(); 278 NumberFormat pf = NumberFormat.getPercentInstance(); 279 pf.setMinimumFractionDigits(2); 280 NumberFormat nf = NumberFormat.getIntegerInstance(); 281 pw.print(title2.replace("/", "\t")); 282 for (ChangeType item : ChangeType.values()) { 283 if (item == ChangeType.same) { 284 pw.print("\t" + nf.format(total)); 285 } else { 286 final long current = counter2.getCount(item); 287 pw.print("\t" + nf.format(current)); 288 } 289 } 290 pw.println(); 291 } 292 293 /** 294 * 295 * @param anchors 296 * @throws IOException 297 * 298 * TODO: shorten the function using subroutines 299 */ writeLdml(Anchors anchors)300 private void writeLdml(Anchors anchors) throws IOException { 301 try (PrintWriter tsvFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + ".tsv"); 302 PrintWriter tsvCountFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_count.tsv"); 303 ) { 304 tsvFile.println("# Section\tPage\tHeader\tCode\tLocale\tOld\tNew\tLevel"); 305 306 // set up factories 307 List<Factory> factories = new ArrayList<>(); 308 List<Factory> oldFactories = new ArrayList<>(); 309 310 Counter<PathHeader> counts = new Counter<>(); 311 312 String dirBase = ToolConstants.getBaseDirectory(ToolConstants.CHART_VERSION); 313 String prevDirBase = ToolConstants.getBaseDirectory(ToolConstants.PREV_CHART_VERSION); 314 315 for (String dir : DtdType.ldml.directories) { 316 if (dir.equals("annotationsDerived") || dir.equals("casing")) { 317 continue; 318 } 319 String current = dirBase + "common/" + dir; 320 String past = prevDirBase + "common/" + dir; 321 try { 322 factories.add(Factory.make(current, ".*")); 323 } catch (Exception e1) { 324 System.out.println("Skipping: " + dir + "\t" + e1.getMessage()); 325 continue; // skip where the directories don't exist in old versions 326 } 327 try { 328 oldFactories.add(Factory.make(past, ".*")); 329 } catch (Exception e) { 330 System.out.println("Couldn't open factory: " + past); 331 past = null; 332 oldFactories.add(null); 333 } 334 System.out.println("Will compare: " + dir + "\t\t" + current + "\t\t" + past); 335 } 336 if (factories.isEmpty()) { 337 throw new IllegalArgumentException("No factories found for " 338 + dirBase + ": " + DtdType.ldml.directories); 339 } 340 // get a list of all the locales to cycle over 341 342 Relation<String, String> baseToLocales = Relation.of(new TreeMap<String, Set<String>>(), HashSet.class); 343 Matcher m = fileMatcher.matcher(""); 344 Set<String> defaultContents = SDI.getDefaultContentLocales(); 345 LanguageTagParser ltp = new LanguageTagParser(); 346 LikelySubtags ls = new LikelySubtags(); 347 for (String file : factories.get(0).getAvailable()) { 348 if (defaultContents.contains(file)) { 349 continue; 350 } 351 if (!m.reset(file).matches()) { 352 continue; 353 } 354 String base = file.equals("root") ? "root" : ltp.set(ls.minimize(file)).getLanguageScript(); 355 baseToLocales.put(base, file); 356 } 357 358 // do keyboards later 359 360 Status currentStatus = new Status(); 361 Status oldStatus = new Status(); 362 Set<PathDiff> diff = new TreeSet<>(); 363 Set<String> paths = new HashSet<>(); 364 365 Relation<PathHeader, String> diffAll = Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class); 366 for (Entry<String, Set<String>> baseNLocale : baseToLocales.keyValuesSet()) { 367 String base = baseNLocale.getKey(); 368 for (int i = 0; i < factories.size(); ++i) { 369 Factory factory = factories.get(i); 370 Factory oldFactory = oldFactories.get(i); 371 List<File> sourceDirs = Arrays.asList(factory.getSourceDirectories()); 372 if (sourceDirs.size() != 1) { 373 throw new IllegalArgumentException("Internal error: expect single source dir"); 374 } 375 File sourceDir = sourceDirs.get(0); 376 String sourceDirLeaf = sourceDir.getName(); 377 boolean resolving = !sourceDirLeaf.contains("subdivisions") 378 && !sourceDirLeaf.contains("transforms"); 379 380 for (String locale : baseNLocale.getValue()) { 381 String nameAndLocale = sourceDirLeaf + "/" + locale; 382 if (fileFilter != null && !fileFilter.reset(nameAndLocale).find()) { 383 if (verbose && verbose_skipping) { 384 System.out.println("SKIPPING: " + nameAndLocale); 385 } 386 continue; 387 } 388 if (verbose) { 389 System.out.println(nameAndLocale); 390 } 391 CLDRFile current = makeWithFallback(factory, locale, resolving); 392 CLDRFile old = makeWithFallback(oldFactory, locale, resolving); 393 DisplayAndInputProcessor daip = new DisplayAndInputProcessor(old); 394 395 if (!locale.equals("root") && current.getLocaleID().equals("root") && old.getLocaleID().equals("root")) { 396 continue; 397 } 398 if (old == EMPTY_CLDR && current == EMPTY_CLDR) { 399 continue; 400 } 401 if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(locale)) { 402 continue; 403 } 404 paths.clear(); 405 for (String path : current.fullIterable()) { 406 if (allowPath(locale, path)) { 407 paths.add(path); 408 } 409 } 410 for (String path : old.fullIterable()) { 411 if (!paths.contains(path) && allowPath(locale, path)) { 412 paths.add(path); 413 } 414 } 415 416 Output<String> reformattedValue = new Output<>(); 417 Output<Boolean> hasReformattedValue = new Output<>(); 418 419 for (String path : paths) { 420 if (path.startsWith("//ldml/identity") 421 || path.endsWith("/alias") 422 || path.startsWith("//ldml/segmentations") // do later 423 || path.startsWith("//ldml/rbnf") // do later 424 ) { 425 continue; 426 } 427 PathHeader ph = getPathHeader(path); 428 if (ph == null) { 429 continue; 430 } 431 432 String oldValue; 433 String currentValue; 434 435 { 436 String sourceLocaleCurrent = current.getSourceLocaleID(path, currentStatus); 437 String sourceLocaleOld = getReformattedPath(oldStatus, old, path, reformattedValue, hasReformattedValue); 438 439 // filter out stuff that differs at a higher level 440 if (!sourceLocaleCurrent.equals(locale) 441 && !sourceLocaleOld.equals(locale)) { 442 continue; 443 } 444 if (!path.equals(currentStatus.pathWhereFound) 445 && !path.equals(oldStatus.pathWhereFound)) { 446 continue; 447 } 448 // fix some incorrect cases? 449 450 currentValue = current.getStringValue(path); 451 if (CldrUtility.INHERITANCE_MARKER.equals(currentValue)) { 452 currentValue = current.getBaileyValue(path, null, null); 453 } 454 455 String oldRawValue = hasReformattedValue.value ? reformattedValue.value : old.getStringValue(path); 456 if (CldrUtility.INHERITANCE_MARKER.equals(oldRawValue)) { 457 oldRawValue = old.getBaileyValue(path, null, null); 458 } 459 // ignore differences due to old DAIP 460 oldValue = dontDaipValue(oldRawValue, path) ? oldRawValue : daip.processInput(path, oldRawValue, null); 461 } 462 if (highLevelOnly && new SuspiciousChange(oldValue, currentValue, path, locale).isDisruptive() == false) { 463 continue; 464 } 465 // handle non-distinguishing attributes 466 addPathDiff(sourceDir, old, current, locale, ph, diff); 467 468 addValueDiff(sourceDir, oldValue, currentValue, locale, ph, diff, diffAll); 469 } 470 } 471 } 472 writeDiffs(anchors, base, diff, tsvFile, counts); 473 diff.clear(); 474 } 475 writeDiffs(diffAll); 476 477 writeCounter(tsvCountFile, "Count", counts); 478 } 479 } 480 dontDaipValue(String oldRawValue, String path)481 public boolean dontDaipValue(String oldRawValue, String path) { 482 return oldRawValue == null || path.startsWith("//ldml/collations"); 483 } 484 allowPath(String locale, String path)485 private boolean allowPath(String locale, String path) { 486 if (minimumPathCoverage != null) { 487 Level pathLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, locale); 488 if (minimumPathCoverage.compareTo(pathLevel) < 0) { 489 return false; 490 } 491 } 492 return true; 493 } 494 getReformattedPath(Status oldStatus, CLDRFile old, String path, Output<String> value, Output<Boolean> hasReformattedValue)495 private String getReformattedPath(Status oldStatus, CLDRFile old, String path, Output<String> value, Output<Boolean> hasReformattedValue) { 496 if (SKIP_REFORMAT_ANNOTATIONS || !path.startsWith("//ldml/annotations/")) { 497 hasReformattedValue.value = Boolean.FALSE; 498 return old.getSourceLocaleID(path, oldStatus); 499 } 500 // OLD: <annotation cp='[]' tts='grinning face'>face; grin</annotation> 501 // NEW: <annotation cp="">face | grin</annotation> 502 // <annotation cp="" type="tts">grinning face</annotation> 503 // from the NEW paths, get the OLD values 504 XPathParts parts = XPathParts.getFrozenInstance(path).cloneAsThawed(); // not frozen, for removeAttribute 505 boolean isTts = parts.getAttributeValue(-1, "type") != null; 506 if (isTts) { 507 parts.removeAttribute(-1, "type"); 508 } 509 String cp = parts.getAttributeValue(-1, "cp"); 510 parts.setAttribute(-1, "cp", "[" + cp + "]"); 511 512 String oldStylePath = parts.toString(); 513 String temp = old.getStringValue(oldStylePath); 514 if (temp == null) { 515 hasReformattedValue.value = Boolean.FALSE; 516 } else if (isTts) { 517 String temp2 = old.getFullXPath(oldStylePath); 518 value.value = XPathParts.getFrozenInstance(temp2).getAttributeValue(-1, "tts"); 519 hasReformattedValue.value = Boolean.TRUE; 520 } else { 521 value.value = temp.replaceAll("\\s*;\\s*", " | "); 522 hasReformattedValue.value = Boolean.TRUE; 523 } 524 return old.getSourceLocaleID(oldStylePath, oldStatus); 525 } 526 527 PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A"); 528 getPathHeader(String path)529 private PathHeader getPathHeader(String path) { 530 try { 531 PathHeader ph = phf.fromPath(path); 532 if (ph.getPageId() == PageId.Unknown) { 533 String star = starrer.set(path); 534 badHeaders.add(star); 535 return null; 536 } 537 return ph; 538 } catch (Exception e) { 539 String star = starrer.set(path); 540 badHeaders.add(star); 541 // System.err.println("Skipping path with bad PathHeader: " + path); 542 return null; 543 } 544 } 545 makeWithFallback(Factory oldFactory, String locale, boolean resolving)546 private CLDRFile makeWithFallback(Factory oldFactory, String locale, boolean resolving) { 547 if (oldFactory == null) { 548 return EMPTY_CLDR; 549 } 550 CLDRFile old; 551 String oldLocale = locale; 552 while (true) { // fall back for old, maybe to root 553 try { 554 old = oldFactory.make(oldLocale, resolving); 555 break; 556 } catch (Exception e) { 557 oldLocale = LocaleIDParser.getParent(oldLocale); 558 if (oldLocale == null) { 559 return EMPTY_CLDR; 560 } 561 } 562 } 563 return old; 564 } 565 addPathDiff(File sourceDir, CLDRFile old, CLDRFile current, String locale, PathHeader ph, Set<PathDiff> diff2)566 private void addPathDiff(File sourceDir, CLDRFile old, CLDRFile current, String locale, PathHeader ph, Set<PathDiff> diff2) { 567 String path = ph.getOriginalPath(); 568 String fullPathCurrent = current.getFullXPath(path); 569 String fullPathOld = old.getFullXPath(path); 570 if (Objects.equals(fullPathCurrent, fullPathOld)) { 571 return; 572 } 573 XPathParts pathPlain = XPathParts.getFrozenInstance(path); 574 XPathParts pathCurrent = fullPathCurrent == null ? pathPlain : XPathParts.getFrozenInstance(fullPathCurrent); 575 XPathParts pathOld = fullPathOld == null ? pathPlain : XPathParts.getFrozenInstance(fullPathOld); 576 TreeSet<String> fullAttributes = null; 577 int size = pathCurrent.size(); 578 String parentAndName = parentAndName(sourceDir, locale); 579 for (int elementIndex = 0; elementIndex < size; ++elementIndex) { // will have same size 580 Collection<String> distinguishing = pathPlain.getAttributeKeys(elementIndex); 581 Collection<String> attributesCurrent = pathCurrent.getAttributeKeys(elementIndex); 582 Collection<String> attributesOld = pathCurrent.getAttributeKeys(elementIndex); 583 if (attributesCurrent.isEmpty() && attributesOld.isEmpty()) { 584 continue; 585 } 586 if (fullAttributes == null) { 587 fullAttributes = new TreeSet<>(); 588 } else { 589 fullAttributes.clear(); 590 } 591 fullAttributes.addAll(attributesCurrent); 592 fullAttributes.addAll(attributesOld); 593 fullAttributes.removeAll(distinguishing); 594 fullAttributes.removeAll(DONT_CARE); 595 596 // at this point we only have non-distinguishing 597 for (String attribute : fullAttributes) { 598 String attributeValueOld = pathOld.getAttributeValue(elementIndex, attribute); 599 String attributeValueCurrent = pathCurrent.getAttributeValue(elementIndex, attribute); 600 if (Objects.equals(attributeValueOld, attributeValueCurrent)) { 601 addChange(parentAndName, ChangeType.same, 1); 602 continue; 603 } 604 addChange(parentAndName, ChangeType.get(attributeValueOld, attributeValueCurrent), 1); 605 606 PathDiff row = new PathDiff( 607 locale, 608 new PathHeaderSegment(ph, size - elementIndex - 1, attribute), 609 attributeValueOld, 610 attributeValueCurrent); 611 if (DEBUG) { 612 System.out.println(row); 613 } 614 diff2.add(row); 615 } 616 } 617 } 618 parentAndName(File sourceDir, String locale)619 private String parentAndName(File sourceDir, String locale) { 620 return sourceDir.getName() + "/" + locale + ".xml"; 621 } 622 addValueDiff(File sourceDir, String valueOld, String valueCurrent, String locale, PathHeader ph, Set<PathDiff> diff, Relation<PathHeader, String> diffAll)623 private void addValueDiff(File sourceDir, String valueOld, String valueCurrent, String locale, PathHeader ph, Set<PathDiff> diff, 624 Relation<PathHeader, String> diffAll) { 625 // handle stuff that can be split specially 626 Splitter splitter = getSplitter(ph.getOriginalPath(), valueOld, valueCurrent); 627 int count = 1; 628 String parentAndName = parentAndName(sourceDir, locale); 629 if (Objects.equals(valueCurrent, valueOld)) { 630 if (splitter != null && valueCurrent != null) { 631 count = splitHandlingNull(splitter, valueCurrent).size(); 632 } 633 addChange(parentAndName, ChangeType.same, count); 634 } else { 635 if (splitter != null) { 636 List<String> setOld = splitHandlingNull(splitter, valueOld); 637 List<String> setNew = splitHandlingNull(splitter, valueCurrent); 638 int[] sameAndNotInSecond = new int[2]; 639 valueOld = getFilteredValue(setOld, setNew, sameAndNotInSecond); 640 addChange(parentAndName, ChangeType.same, sameAndNotInSecond[0]); 641 addChange(parentAndName, ChangeType.deleted, sameAndNotInSecond[1]); 642 sameAndNotInSecond[0] = sameAndNotInSecond[1] = 0; 643 valueCurrent = getFilteredValue(setNew, setOld, sameAndNotInSecond); 644 addChange(parentAndName, ChangeType.added, sameAndNotInSecond[1]); 645 } else if (hasUnicodeSetValue(ph.getOriginalPath())) { 646 UnicodeSet usOld = valueOld == null ? UnicodeSet.EMPTY : new UnicodeSet(valueOld); 647 UnicodeSet usCurrent = valueCurrent == null ? UnicodeSet.EMPTY : new UnicodeSet(valueCurrent); 648 UnicodeSet oldOnly = new UnicodeSet(usOld).removeAll(usCurrent); 649 UnicodeSet currentOnly = new UnicodeSet(usCurrent).removeAll(usOld); 650 addChange(parentAndName, ChangeType.same, usOld.size()-oldOnly.size()); 651 addChange(parentAndName, ChangeType.deleted, oldOnly.size()); 652 addChange(parentAndName, ChangeType.added, currentOnly.size()); 653 valueOld = usOld.size()==oldOnly.size() ? oldOnly.toPattern(false) : "…" + oldOnly + "…"; 654 valueCurrent = usCurrent.size()==currentOnly.size() ? currentOnly.toPattern(false) : "…" + currentOnly + "…"; 655 } else { 656 addChange(parentAndName, ChangeType.get(valueOld, valueCurrent), count); 657 } 658 PathDiff row = new PathDiff(locale, new PathHeaderSegment(ph, -1, ""), valueOld, valueCurrent); 659 diff.add(row); 660 diffAll.put(ph, locale); 661 } 662 } 663 hasUnicodeSetValue(String xpath)664 private boolean hasUnicodeSetValue(String xpath) { 665 return xpath.startsWith("//ldml/characters/exemplar"); 666 } 667 splitHandlingNull(Splitter splitter, String value)668 private List<String> splitHandlingNull(Splitter splitter, String value) { 669 return value == null ? null : splitter.splitToList(value); 670 } 671 getSplitter(String path, String valueOld, String valueCurrent)672 private Splitter getSplitter(String path, String valueOld, String valueCurrent) { 673 if (path.contains("/annotation") && !path.contains("tts")) { 674 return DtdData.BAR_SPLITTER; 675 } else if (valueOld != null && valueOld.contains("\n") || valueCurrent != null && valueCurrent.contains("\n")) { 676 return DtdData.CR_SPLITTER; 677 } else { 678 return null; 679 } 680 } 681 682 /** 683 * Return string with all lines from linesToRemove removed 684 * @param toGetStringFor 685 * @param linesToRemove 686 * @return 687 */ getFilteredValue(Collection<String> toGetStringFor, Collection<String> linesToRemove, int[] sameAndDiff)688 private String getFilteredValue(Collection<String> toGetStringFor, Collection<String> linesToRemove, 689 int[] sameAndDiff) { 690 if (toGetStringFor == null) { 691 return null; 692 } 693 StringBuilder buf = new StringBuilder(); 694 Set<String> toRemove = linesToRemove == null ? Collections.emptySet() : new HashSet<>(linesToRemove); 695 boolean removed = false; 696 for (String old : toGetStringFor) { 697 if (toRemove.contains(old)) { 698 removed = true; 699 sameAndDiff[0]++; 700 } else { 701 sameAndDiff[1]++; 702 if (removed) { 703 buf.append("…\n"); 704 removed = false; 705 } 706 buf.append(old).append('\n'); 707 } 708 } 709 if (removed) { 710 buf.append("…"); 711 } else if (buf.length() > 0) { 712 buf.setLength(buf.length() - 1); // remove final \n 713 } 714 return buf.toString(); 715 } 716 writeDiffs(Anchors anchors, String file, String title, Multimap<PathHeader, String> bcp, PrintWriter tsvFile)717 private void writeDiffs(Anchors anchors, String file, String title, Multimap<PathHeader, String> bcp, PrintWriter tsvFile) { 718 if (bcp.isEmpty()) { 719 System.out.println("\tDeleting: " + DIR + "/" + file); 720 new File(DIR + file).delete(); 721 return; 722 } 723 TablePrinter tablePrinter = new TablePrinter() 724 .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 725 .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)//.setRepeatDivider(true) 726 .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 727 .addColumn("Code", "class='source'", null, "class='source'", false) 728 .addColumn("Old", "class='target'", null, "class='target'", false) // width='20%' 729 .addColumn("New", "class='target'", null, "class='target'", false); // width='20%' 730 PathHeader ph1 = phf.fromPath("//supplementalData/metadata/alias/subdivisionAlias[@type=\"TW-TXQ\"]/_reason"); 731 PathHeader ph2 = phf.fromPath("//supplementalData/metadata/alias/subdivisionAlias[@type=\"LA-XN\"]/_replacement"); 732 ph1.compareTo(ph2); 733 for (Entry<PathHeader, Collection<String>> entry : bcp.asMap().entrySet()) { 734 PathHeader ph = entry.getKey(); 735 if (ph.getPageId() == DEBUG_PAGE_ID) { 736 System.out.println(ph + "\t" + ph.getOriginalPath()); 737 } 738 for (String value : entry.getValue()) { 739 String[] oldNew = value.split(SEP); 740 tablePrinter.addRow() 741 .addCell(ph.getSectionId()) 742 .addCell(ph.getPageId()) 743 .addCell(ph.getHeader()) 744 .addCell(ph.getCode()) 745 .addCell(oldNew[0]) 746 .addCell(oldNew[1]) 747 .finishRow(); 748 } 749 } 750 writeTable(anchors, file, tablePrinter, title, tsvFile); 751 } 752 writeDiffs(Relation<PathHeader, String> diffAll)753 private void writeDiffs(Relation<PathHeader, String> diffAll) { 754 TablePrinter tablePrinter = new TablePrinter() 755 .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 756 .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 757 .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 758 .addColumn("Code", "class='source'", null, "class='source'", true) 759 .addColumn("Locales where different", "class='target'", null, "class='target'", true); 760 for (Entry<PathHeader, Set<String>> row : diffAll.keyValuesSet()) { 761 PathHeader ph = row.getKey(); 762 Set<String> locales = row.getValue(); 763 tablePrinter.addRow() 764 .addCell(ph.getSectionId()) 765 .addCell(ph.getPageId()) 766 .addCell(ph.getHeader()) 767 .addCell(ph.getCode()) 768 .addCell(Joiner.on(" ").join(locales)) 769 .finishRow(); 770 } 771 } 772 writeDiffs(Anchors anchors, String file, Set<PathDiff> diff, PrintWriter tsvFile, Counter<PathHeader> counts)773 private void writeDiffs(Anchors anchors, String file, Set<PathDiff> diff, PrintWriter tsvFile, Counter<PathHeader> counts) { 774 if (diff.isEmpty()) { 775 return; 776 } 777 TablePrinter tablePrinter = new TablePrinter() 778 .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 779 .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 780 .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 781 .addColumn("Code", "class='source'", null, "class='source'", true) 782 .addColumn("Locale", "class='source'", null, "class='source'", true) 783 .addColumn("Old", "class='target'", null, "class='target'", true) // width='20%' 784 .addColumn("New", "class='target'", null, "class='target'", true) // width='20%' 785 .addColumn("Level", "class='target'", null, "class='target'", true); 786 787 for (PathDiff row : diff) { 788 PathHeaderSegment phs = row.get0(); 789 counts.add(phs.get0(), 1); 790 String locale = row.get1(); 791 String oldValue = row.get2(); 792 String currentValue = row.get3(); 793 794 PathHeader ph = phs.get0(); 795 Integer pathIndex = phs.get1(); 796 String attribute = phs.get2(); 797 String specialCode = ph.getCode(); 798 799 if (!attribute.isEmpty()) { 800 specialCode += "_" + attribute; 801 if (pathIndex != 0) { 802 specialCode += "|" + pathIndex; 803 } 804 } 805 Level coverageLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(ph.getOriginalPath(), locale); 806 String fixedOldValue = oldValue == null ? "▷missing◁" : TransliteratorUtilities.toHTML.transform(oldValue); 807 String fixedNewValue = currentValue == null ? "▷removed◁" : TransliteratorUtilities.toHTML.transform(currentValue); 808 809 tablePrinter.addRow() 810 .addCell(ph.getSectionId()) 811 .addCell(ph.getPageId()) 812 .addCell(ph.getHeader()) 813 .addCell(specialCode) 814 .addCell(locale) 815 .addCell(fixedOldValue) 816 .addCell(fixedNewValue) 817 .addCell(coverageLevel) 818 .finishRow(); 819 820 } 821 String title = ENGLISH.getName(file) + " " + chartNameCap; 822 writeTable(anchors, file, tablePrinter, title, tsvFile); 823 824 diff.clear(); 825 } 826 827 private class ChartDeltaSub extends Chart { 828 private String title; 829 private String file; 830 private TablePrinter tablePrinter; 831 private PrintWriter tsvFile; 832 ChartDeltaSub(String title, String file, TablePrinter tablePrinter, PrintWriter tsvFile)833 private ChartDeltaSub(String title, String file, TablePrinter tablePrinter, PrintWriter tsvFile) { 834 super(); 835 this.title = title; 836 this.file = file; 837 this.tablePrinter = tablePrinter; 838 this.tsvFile = tsvFile; 839 } 840 841 @Override getDirectory()842 public String getDirectory() { 843 return DIR; 844 } 845 846 @Override getShowDate()847 public boolean getShowDate() { 848 return false; 849 } 850 851 @Override getTitle()852 public String getTitle() { 853 return title; 854 } 855 856 @Override getFileName()857 public String getFileName() { 858 return file; 859 } 860 861 @Override getExplanation()862 public String getExplanation() { 863 return "<p>Lists data fields that differ from the last major version (see versions above)." 864 + " Inherited differences in locales are suppressed, except where the source locales are different. " 865 + "<p>"; 866 } 867 868 @Override writeContents(FormattedFileWriter pw)869 public void writeContents(FormattedFileWriter pw) throws IOException { 870 pw.write(tablePrinter.toTable()); 871 tablePrinter.toTsv(tsvFile); 872 } 873 } 874 writeTable(Anchors anchors, String file, TablePrinter tablePrinter, String title, PrintWriter tsvFile)875 private void writeTable(Anchors anchors, String file, TablePrinter tablePrinter, String title, PrintWriter tsvFile) { 876 ChartDeltaSub chartDeltaSub = new ChartDeltaSub(title, file, tablePrinter, tsvFile); 877 chartDeltaSub.writeChart(anchors); 878 } 879 writeNonLdmlPlain(Anchors anchors)880 private void writeNonLdmlPlain(Anchors anchors) throws IOException { 881 try (PrintWriter tsvFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_supp.tsv"); 882 PrintWriter tsvCountFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_supp_count.tsv"); 883 ) { 884 tsvFile.println("# Section\tPage\tHeader\tCode\tOld\tNew"); 885 886 Multimap<PathHeader, String> bcp = TreeMultimap.create(); 887 Multimap<PathHeader, String> supplemental = TreeMultimap.create(); 888 Multimap<PathHeader, String> transforms = TreeMultimap.create(); 889 890 Counter<PathHeader> countSame = new Counter<>(); 891 Counter<PathHeader> countAdded = new Counter<>(); 892 Counter<PathHeader> countDeleted = new Counter<>(); 893 894 for (String dir : new File(CLDRPaths.BASE_DIRECTORY + "common/").list()) { 895 if (DtdType.ldml.directories.contains(dir) 896 || dir.equals(".DS_Store") 897 || dir.equals("dtd") // TODO as flat files 898 || dir.equals("properties") // TODO as flat files 899 || dir.equals("uca") // TODO as flat files 900 ) { 901 continue; 902 } 903 File dirOld = new File(PREV_CHART_VERSION_DIRECTORY + "common/" + dir); 904 System.out.println("\tLast dir: " + dirOld); 905 File dir2 = new File(CHART_VERSION_DIRECTORY + "common/" + dir); 906 System.out.println("\tCurr dir: " + dir2); 907 908 for (String file : dir2.list()) { 909 if (!file.endsWith(".xml")) { 910 continue; 911 } 912 String parentAndFile = dir + "/" + file; 913 String base = file.substring(0, file.length() - 4); 914 if (fileFilter != null && !fileFilter.reset(dir + "/" + base).find()) { 915 if (verbose) { // && verbose_skipping 916 System.out.println("SKIPPING: " + dir + "/" + base); 917 } 918 continue; 919 } 920 if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(base)) { 921 continue; 922 } 923 if (verbose) { 924 System.out.println(file); 925 } 926 Relation<PathHeader, String> contentsOld = fillData(dirOld.toString() + "/", file, base); 927 Relation<PathHeader, String> contents2 = fillData(dir2.toString() + "/", file, base); 928 929 Set<PathHeader> keys = new TreeSet<>(CldrUtility.ifNull(contentsOld.keySet(), Collections.<PathHeader> emptySet())); 930 keys.addAll(CldrUtility.ifNull(contents2.keySet(), Collections.<PathHeader> emptySet())); 931 DtdType dtdType = null; 932 for (PathHeader key : keys) { 933 String originalPath = key.getOriginalPath(); 934 if (highLevelOnly && !HighLevelPaths.pathIsHighLevel(originalPath, base)) { 935 continue; 936 } 937 boolean isTransform = originalPath.contains("/tRule"); 938 if (dtdType == null) { 939 dtdType = DtdType.fromPath(originalPath); 940 } 941 Multimap<PathHeader, String> target = dtdType == DtdType.ldmlBCP47 ? bcp 942 : isTransform ? transforms 943 : supplemental; 944 Set<String> setOld = contentsOld.get(key); 945 Set<String> set2 = contents2.get(key); 946 947 if (Objects.equals(setOld, set2)) { 948 if (file.equals(DEBUG_FILE)) { // for debugging 949 System.out.println("**Same: " + key + "\t" + setOld); 950 } 951 addChange(parentAndFile, ChangeType.same, setOld.size()); 952 countSame.add(key, 1); 953 continue; 954 } 955 if (setOld == null) { 956 addChange(parentAndFile, ChangeType.added, set2.size()); 957 for (String s : set2) { 958 addRow(target, key, "▷missing◁", s); 959 countAdded.add(key, 1); 960 } 961 } else if (set2 == null) { 962 addChange(parentAndFile, ChangeType.deleted, setOld.size()); 963 for (String s : setOld) { 964 addRow(target, key, s, "▷removed◁"); 965 countDeleted.add(key, 1); 966 } 967 } else { 968 Set<String> s1MOld = setOld; 969 Set<String> s2M1 = set2; 970 if (s1MOld.isEmpty()) { 971 addRow(target, key, "▷missing◁", Joiner.on(", ").join(s2M1)); 972 addChange(parentAndFile, ChangeType.added, s2M1.size()); 973 countAdded.add(key, 1); 974 } else if (s2M1.isEmpty()) { 975 addRow(target, key, Joiner.on(", ").join(s1MOld), "▷removed◁"); 976 addChange(parentAndFile, ChangeType.deleted, s1MOld.size()); 977 countDeleted.add(key, 1); 978 } else { 979 String valueOld; 980 String valueCurrent; 981 982 int[] sameAndNotInSecond = new int[2]; 983 valueOld = getFilteredValue(s1MOld, s1MOld, sameAndNotInSecond); 984 addChange(parentAndFile, ChangeType.same, sameAndNotInSecond[0]); 985 countSame.add(key, 1); 986 addChange(parentAndFile, ChangeType.deleted, sameAndNotInSecond[1]); 987 sameAndNotInSecond[1] = 0; 988 countDeleted.add(key, 1); 989 valueCurrent = getFilteredValue(s2M1, s1MOld, sameAndNotInSecond); 990 addChange(parentAndFile, ChangeType.added, sameAndNotInSecond[1]); 991 addRow(target, key, valueOld, valueCurrent); 992 countAdded.add(key, 1); 993 } 994 } 995 } 996 } 997 } 998 writeDiffs(anchors, "bcp47", "¤¤BCP47 " + chartNameCap, bcp, tsvFile); 999 writeDiffs(anchors, "supplemental-data", "¤¤Supplemental " + chartNameCap, supplemental, tsvFile); 1000 writeDiffs(anchors, "transforms", "¤¤Transforms " + chartNameCap, transforms, tsvFile); 1001 1002 writeCounter(tsvCountFile, "CountSame", countSame); 1003 tsvCountFile.println(); 1004 writeCounter(tsvCountFile, "CountAdded", countAdded); 1005 tsvCountFile.println(); 1006 writeCounter(tsvCountFile, "CountDeleted", countDeleted); 1007 1008 //tsvFile.println("# EOF"); 1009 //tsvCountFile.println("# EOF"); 1010 } 1011 } 1012 writeCounter(PrintWriter tsvFile, String title, Counter<PathHeader> countDeleted)1013 private void writeCounter(PrintWriter tsvFile, String title, Counter<PathHeader> countDeleted) { 1014 tsvFile.append("# " 1015 + title 1016 + "\tSection\tPage\tSubhead\tCode\n\n"); 1017 for (R2<Long, PathHeader> entry : countDeleted.getEntrySetSortedByCount(false, null)) { 1018 tsvFile.println(entry.get0() + "\t" + entry.get1()); 1019 } 1020 } 1021 addRow(Multimap<PathHeader, String> target, PathHeader key, String oldItem, String newItem)1022 private void addRow(Multimap<PathHeader, String> target, PathHeader key, String oldItem, String newItem) { 1023 if (oldItem.isEmpty() || newItem.isEmpty()) { 1024 throw new IllegalArgumentException(); 1025 } 1026 target.put(key, oldItem + SEP + newItem); 1027 } 1028 1029 /** 1030 * Fill in the chart data for the specified file 1031 * 1032 * @param directory 1033 * @param file like "xx.xml" where "xx" may be a locale name 1034 * @param fileBase like "xx", same as file without ".xml" 1035 * @return the Relation 1036 */ fillData(String directory, String file, String fileBase)1037 private Relation<PathHeader, String> fillData(String directory, String file, String fileBase) { 1038 Relation<PathHeader, String> results = Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class); 1039 1040 List<Pair<String, String>> contents1; 1041 try { 1042 contents1 = XMLFileReader.loadPathValues(directory + file, new ArrayList<Pair<String, String>>(), true); 1043 } catch (Exception e) { 1044 /* 1045 * This happens with e = ICUException, file = grammaticalFeatures.xml in cldr-36.0 1046 */ 1047 return results; 1048 } 1049 DtdType dtdType = null; 1050 DtdData dtdData = null; 1051 Multimap<String, String> extras = TreeMultimap.create(); 1052 1053 for (Pair<String, String> s : contents1) { 1054 String path = s.getFirst(); 1055 if (highLevelOnly && !HighLevelPaths.pathIsHighLevel(path, fileBase /* locale, or not */)) { 1056 continue; 1057 } 1058 String value = s.getSecond(); 1059 if (dtdType == null) { 1060 /* 1061 * Note: although dtdType and dtdData depend on path, they are the same for all paths 1062 * in the same file, so they only need to be set the first time through this loop. 1063 * 1064 * Note: the current DTD in CLDR_BASE_DIR is supposed to be backward-compatible, that is, to support 1065 * paths from all archived versions. Any exception to that rule (e.g., for "grammaticalState") is a bug. 1066 */ 1067 dtdType = DtdType.fromPath(path); 1068 dtdData = DtdData.getInstance(dtdType, CLDR_BASE_DIR); 1069 } 1070 XPathParts pathPlain = XPathParts.getFrozenInstance(path); 1071 try { 1072 if (dtdData.isMetadata(pathPlain)) { 1073 continue; 1074 } 1075 } catch (NullPointerException e) { 1076 /* 1077 * TODO: this happens for "grammaticalState" in this path from version 37: 1078 * //supplementalData/grammaticalData/grammaticalFeatures[@targets="nominal"][@locales="he"]/grammaticalState[@values="definite indefinite construct"] 1079 * Reference: https://unicode-org.atlassian.net/browse/CLDR-13306 1080 */ 1081 System.out.println("Caught NullPointerException in fillData calling isMetadata, path = " + path); 1082 continue; 1083 } 1084 Set<String> pathForValues = dtdData.getRegularizedPaths(pathPlain, extras); 1085 if (pathForValues != null) { 1086 for (String pathForValue : pathForValues) { 1087 PathHeader pathHeader = phf.fromPath(pathForValue); 1088 if (pathHeader.getPageId() == PageId.Suppress) { 1089 continue; 1090 } 1091 Splitter splitter = DtdData.getValueSplitter(pathPlain); 1092 for (String line : splitter.split(value)) { 1093 // special case # in transforms 1094 if (isComment(pathPlain, line)) { 1095 continue; 1096 } 1097 results.put(pathHeader, line); 1098 } 1099 } 1100 } 1101 for (Entry<String, Collection<String>> entry : extras.asMap().entrySet()) { 1102 final String extraPath = entry.getKey(); 1103 final PathHeader pathHeaderExtra = phf.fromPath(extraPath); 1104 if (pathHeaderExtra.getPageId() == PageId.Suppress) { 1105 continue; 1106 } 1107 final Collection<String> extraValue = entry.getValue(); 1108 if (isExtraSplit(extraPath)) { 1109 for (String items : extraValue) { 1110 results.putAll(pathHeaderExtra, DtdData.SPACE_SPLITTER.splitToList(items)); 1111 } 1112 } else { 1113 results.putAll(pathHeaderExtra, extraValue); 1114 } 1115 } 1116 if (pathForValues == null && !value.isEmpty()) { 1117 System.err.println("Shouldn't happen"); 1118 } 1119 } 1120 return results; 1121 } 1122 isExtraSplit(String extraPath)1123 private boolean isExtraSplit(String extraPath) { 1124 if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) { 1125 return true; 1126 } 1127 return false; 1128 } 1129 isComment(XPathParts pathPlain, String line)1130 private static boolean isComment(XPathParts pathPlain, String line) { 1131 if (pathPlain.contains("transform")) { 1132 if (line.startsWith("#")) { 1133 return true; 1134 } 1135 } 1136 return false; 1137 } 1138 1139 /** 1140 * Determine when changes to the values for paths should be treated as 1141 * potentially "disruptive" for the purpose of "churn" reporting 1142 */ 1143 private class SuspiciousChange { 1144 /** 1145 * the old and new values, such as "HH:mm–HH:mm v" and "HH:mm – HH:mm v" 1146 */ 1147 private String oldValue, newValue; 1148 1149 /** 1150 * the path, such as //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id="Hmv"]/greatestDifference[@id="H"] 1151 */ 1152 private String path; 1153 1154 /** 1155 * the locale (such as "doi") in which the path was found, or null, or possibly 1156 * the base file name without extension, like "xx" if the file name is "xx.xml", 1157 * where "xx" may or may not be a locale; e.g., "supplementalData" 1158 */ 1159 private String locale; 1160 SuspiciousChange(String oldValue, String newValue, String path, String locale)1161 SuspiciousChange(String oldValue, String newValue, String path, String locale) { 1162 this.oldValue = oldValue; 1163 this.newValue = newValue; 1164 this.path = path; 1165 this.locale = locale; 1166 } 1167 1168 /** 1169 * Is the change from the old value to the new value, for this path and locale, potentially disruptive? 1170 * 1171 * @return true or false 1172 */ isDisruptive()1173 public boolean isDisruptive() { 1174 /* 1175 * OR, not AND: certain changes in value are disruptive even for paths not 1176 * otherwise treated as high-level, and changes for high-level paths are 1177 * disruptive even if the changes in values themselves are not identified 1178 * as disruptive. 1179 */ 1180 return valueChangeIsDisruptive() || HighLevelPaths.pathIsHighLevel(path, locale); 1181 } 1182 1183 /** 1184 * Is the change from the old value to the current value potentially disruptive, based (primarily) on 1185 * the values themselves? 1186 * 1187 * @return true or false 1188 */ valueChangeIsDisruptive()1189 private boolean valueChangeIsDisruptive() { 1190 if (oldValue == null || newValue == null || oldValue.equals(newValue)) { 1191 return false; 1192 } 1193 if (valueChangeIsDisruptiveWhitespaceOnly()) { 1194 return true; 1195 } 1196 return false; 1197 } 1198 1199 /** 1200 * Is the change disruptive whitespace only? 1201 * Per design doc, "Format changes: second to none on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP 1202 * or NBSP -> Narrow NBSP. Or adding a space somewhere in the format where previously there was none." 1203 * 1204 * @return true or false 1205 */ valueChangeIsDisruptiveWhitespaceOnly()1206 private boolean valueChangeIsDisruptiveWhitespaceOnly() { 1207 /* 1208 * annotations often have changes like "pop gorn", "popgorn", not treated as disruptive 1209 */ 1210 if (path.startsWith("//ldml/annotations")) { 1211 return false; 1212 } 1213 if (removeWhitespace(oldValue).equals(removeWhitespace(newValue))) { 1214 return true; 1215 } 1216 return false; 1217 } 1218 1219 /** 1220 * Remove whitespace from the given string 1221 * 1222 * Remove whitespace as defined by regex \s, and also 1223 * U+00A0 NO-BREAK SPACE 1224 * U+2007 FIGURE SPACE 1225 * U+202F NARROW NO-BREAK SPACE 1226 * 1227 * @param s the string 1228 * @return the modified string 1229 */ removeWhitespace(String s)1230 private String removeWhitespace(String s) { 1231 return s.replaceAll("[\\s\\u00A0\\u2007\\u202F]", ""); 1232 } 1233 } 1234 1235 /** 1236 * Determine which paths are considered "high-level" paths, i.e., 1237 * paths for which any changes have high potential to cause disruptive "churn". 1238 * Whether a path is high-level sometimes depends on the locale or xml file in 1239 * which it occurs. 1240 * Some paths are high-level regardless of the locale in which they are located. 1241 * Other paths are high-level for some locales but not others. For example, 1242 * //ldml/localeDisplayNames/languages/language[@type="xx"] 1243 * is high level in locale "xx", and maybe "en", but not for all locales. 1244 */ 1245 private static class HighLevelPaths { 1246 /** 1247 * A set of paths to be treated as "high-level". 1248 * These are complete paths to be matched exactly. 1249 * Other paths are recognized by special functions like isHighLevelTerritoryName. 1250 * 1251 * The ordering and comments are based on the design spec. 1252 */ 1253 final private static Set<String> highLevelPaths = new HashSet<>(Arrays.asList( 1254 /* 1255 * Core data 1256 */ 1257 "//ldml/characters/exemplarCharacters", 1258 "//ldml/numbers/defaultNumberingSystem", 1259 "//ldml/numbers/otherNumberingSystems/native", 1260 /* 1261 * Territory and Language names 1262 * Country/Region names (English and Native names) -- see isHighLevelTerritoryName 1263 * //ldml/localeDisplayName/territories/territory/... 1264 * Language names (English and Native) -- see isHighLevelLangName 1265 * //ldml/localeDisplayNames/languages/language/... 1266 */ 1267 /* 1268 * Date 1269 * Note: "year", "month", etc., below, form a subset (eight) of all possible values for type, 1270 * excluding, for example, "fri" and "zone". If we use starred paths, we would need further complication 1271 * to filter out "fri", "zone", etc. 1272 */ 1273 "//ldml/dates/fields/field[@type=\"year\"]/displayName", 1274 "//ldml/dates/fields/field[@type=\"month\"]/displayName", 1275 "//ldml/dates/fields/field[@type=\"week\"]/displayName", 1276 "//ldml/dates/fields/field[@type=\"day\"]/displayName", 1277 "//ldml/dates/fields/field[@type=\"hour\"]/displayName", 1278 "//ldml/dates/fields/field[@type=\"era\"]/displayName", 1279 "//ldml/dates/fields/field[@type=\"minute\"]/displayName", 1280 "//ldml/dates/fields/field[@type=\"second\"]/displayName", 1281 /* 1282 * First day of week: firstDay in supplementalData.xml; see isHighLevelFirstDay 1283 * First week of year: see isHighLevelWeekOfPreference 1284 */ 1285 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"full\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1286 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"long\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1287 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"medium\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1288 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"short\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1289 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MMMEd\"]", 1290 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MEd\"]", 1291 /* 1292 * Time 1293 */ 1294 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"full\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1295 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"long\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1296 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"medium\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1297 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"short\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1298 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"am\"]", 1299 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"am\"]", 1300 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"pm\"]", 1301 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"pm\"]", 1302 /* 1303 * Currency (English and Native) -- see isHighLevelCurrencyName 1304 * E.g., //ldml/numbers/currencies/currency[@type=\"KRW\"]/displayName" 1305 * 1306 * ISO Currency Code: SupplementalData.xml match <region iso3166> -- see isHighLevelCurrencyCode 1307 */ 1308 /* 1309 * Currency Formats 1310 * a. Currency thousand separator 1311 * b. Currency decimal separator 1312 * c. Currency Symbol //ldml/numbers/currencies/currency[@type="CNY"]/symbol 1313 * d. Currency Symbol Narrow //ldml/numbers/currencies/currency[@type=\"CNY\"]/symbol[@alt=\"narrow\"]" 1314 * 1315 * See isHighLevelCurrencySeparatorOrSymbol 1316 */ 1317 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1318 "//ldml/numbers/currencyFormats[@numberSystem=\"arab\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1319 /* 1320 * Number Symbols 1321 */ 1322 "//ldml/numbers/minimumGroupingDigits", 1323 "//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal", 1324 "//ldml/numbers/symbols[@numberSystem=\"latn\"]/group", 1325 "//ldml/numbers/symbols[@numberSystem=\"arab\"]/decimal", 1326 "//ldml/numbers/symbols[@numberSystem=\"arab\"]/group", 1327 /* 1328 * Number formats 1329 */ 1330 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1331 "//ldml/numbers/percentFormats[@numberSystem=\"latn\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1332 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"accounting\"]/pattern[@type=\"standard\"]", 1333 "//ldml/numbers/decimalFormats[@numberSystem=\"arab\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1334 "//ldml/numbers/percentFormats[@numberSystem=\"arab\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]" 1335 /* 1336 * "Complementary Observations" 1337 */ 1338 /* 1339 * Changes to language aliases (supplementalMetaData) -- see isHighLevelLangAlias 1340 * E.g., //supplementalData/metadata/alias/languageAlias[@type="aar"] 1341 */ 1342 /* 1343 * Changes in the containment graph -- see isHighLevelTerritoryContainment 1344 * Data mostly (or entirely?) from M49 standard, thus CLDR has limited control. 1345 * Users use the containment graph in a variety of ways. 1346 * E.g., //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"] 1347 */ 1348 /* 1349 * Format changes: second to none on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP 1350 * or NBSP -> Narrow NBSP. Or adding a space somewhere in the format where previously there was none. 1351 * -- see SuspiciousChange.valueChangeIsDisruptiveWhitespaceOnly 1352 */ 1353 /* 1354 * TODO: per design doc, "Adding a timezone" 1355 * TODO: per design doc, "Changes of symbols or codes that are cross-locale in some way such as the unknown 1356 * currency symbol change '???' -> '¤'." 1357 * TODO: per design doc, "Change in character properties (not a CLDR but a Unicode change), and here especially 1358 * newly adding or removing punctuation. Frequently irritates parsers." 1359 */ 1360 )); 1361 1362 static Pattern currencyPattern = Pattern.compile("^//ldml/numbers/currencies/currency.*/displayName.*"); 1363 1364 /** 1365 * Should the given path in the given locale be taken into account for generating "churn" reports? 1366 * 1367 * @param path the path of interest 1368 * @param locale the locale in which the path was found, or null, or possibly 1369 * the base file name without extension, like "xx" if the file name is "xx.xml", 1370 * where "xx" may or may not be a locale; e.g., "supplementalData" 1371 * @return true if it counts, else false to ignore 1372 */ pathIsHighLevel(String path, String locale)1373 private static boolean pathIsHighLevel(String path, String locale) { 1374 if (path == null || locale == null) { 1375 return false; 1376 } 1377 if (!localeIsHighLevel(locale)) { // for efficiency, this should be caught at a higher level 1378 System.out.println("locale [" + locale + "] failed localeIsHighLevel in pathIsHighLevel; path = " + path); 1379 return false; 1380 } 1381 if (pathIsReallyHighLevel(path, locale)) { 1382 if (verboseHighLevelReporting) { 1383 recordHighLevelMatch(path); 1384 } 1385 return true; 1386 } 1387 return false; 1388 } 1389 pathIsReallyHighLevel(String path, String locale)1390 private static boolean pathIsReallyHighLevel(String path, String locale) { 1391 if (highLevelPaths.contains(path)) { 1392 return true; 1393 } else if (isHighLevelTerritoryName(path, locale)) { 1394 return true; 1395 } else if (isHighLevelLangName(path, locale)) { 1396 return true; 1397 } else if (isHighLevelCurrencyName(path, locale)) { 1398 return true; 1399 } else if (isHighLevelCurrencyCode(path, locale)) { 1400 return true; 1401 } else if (isHighLevelCurrencySeparatorOrSymbol(path, locale)) { 1402 return true; 1403 } else if (isHighLevelLangAlias(path, locale)) { 1404 return true; 1405 } else if (isHighLevelTerritoryContainment(path, locale)) { 1406 return true; 1407 } else if (isHighLevelFirstDay(path, locale)) { 1408 return true; 1409 } else if (isHighLevelWeekOfPreference(path, locale)) { 1410 return true; 1411 } 1412 return false; 1413 } 1414 1415 /** 1416 * Is the given locale, or base name, to be considered for "high level" churn report? 1417 * 1418 * @param locale the locale string, or base name like "supplementalData" as in "supplementalData.xml" 1419 * @return true or false 1420 */ localeIsHighLevel(String locale)1421 private static boolean localeIsHighLevel(String locale) { 1422 return SubmissionLocales.CLDR_OR_HIGH_LEVEL_LOCALES.contains(locale) 1423 || "supplementalData".equals(locale); 1424 } 1425 1426 /** 1427 * Changes to language aliases (supplemental metadata) 1428 * E.g., //supplementalData/metadata/alias/languageAlias[@type="aar"] 1429 * 1430 * @param path 1431 * @param locale must be "supplementalData" to match 1432 * @return true or false 1433 */ isHighLevelLangAlias(String path, String locale)1434 private static boolean isHighLevelLangAlias(String path, String locale) { 1435 if ("supplementalData".equals(locale)) { 1436 if (path.startsWith("//supplementalData/metadata/alias/languageAlias")) { 1437 return true; 1438 } 1439 } 1440 return false; 1441 } 1442 1443 /** 1444 * Changes in the containment graph 1445 * Data mostly (or entirely?) from M49 standard, thus CLDR has limited control. 1446 * Users use the containment graph in a variety of ways. 1447 * E.g., //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"] 1448 * 1449 * @param path 1450 * @param locale must be "supplementalData" to match 1451 * @return true or false 1452 */ isHighLevelTerritoryContainment(String path, String locale)1453 private static boolean isHighLevelTerritoryContainment(String path, String locale) { 1454 if ("supplementalData".equals(locale)) { 1455 if (path.startsWith("//supplementalData/territoryContainment")) { 1456 return true; 1457 } 1458 } 1459 return false; 1460 } 1461 1462 /** 1463 * Is the given path a high-level territory name path in the given locale? 1464 * 1465 * E.g., //ldml/localeDisplayNames/territories/territory[@type="NNN"] 1466 * if type "NNN" CORRESPONDS TO the locale or the locale is "en" 1467 * 1468 * English names (en.xml): match all types 1469 * Native: check each territory type NNN corresponding to the given locale 1470 * 1471 * Exclude "alt" 1472 * 1473 * @param path 1474 * @param locale 1475 * @return true or false 1476 */ isHighLevelTerritoryName(String path, String locale)1477 private static boolean isHighLevelTerritoryName(String path, String locale) { 1478 if (path.startsWith("//ldml/localeDisplayNames/territories/territory") 1479 && !path.contains("[@alt=")) { 1480 if ("en".equals(locale)) { 1481 return true; 1482 } 1483 CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale); 1484 if (cvi != null) { 1485 for (String type : cvi.targetTerritories) { 1486 if (path.contains("[@type=\"" + type + "\"]")) { 1487 return true; 1488 } 1489 } 1490 } 1491 } 1492 return false; 1493 } 1494 1495 /** 1496 * Is the given path a high-level language name path in the given locale? 1497 * 1498 * E.g., //ldml/localeDisplayNames/languages/language[@type="xx"] 1499 * if type "xx" matches the locale or the locale is "en" 1500 * 1501 * Exclude "alt" 1502 * 1503 * @param path 1504 * @param locale 1505 * @return true or false 1506 */ isHighLevelLangName(String path, String locale)1507 private static boolean isHighLevelLangName(String path, String locale) { 1508 if (path.startsWith("//ldml/localeDisplayNames/languages/language") 1509 && !path.contains("[@alt=")) { 1510 if ("en".equals(locale)) { 1511 /* 1512 * English names (en.xml): match all types 1513 */ 1514 return true; 1515 } else if (path.contains("[@type=\"" + locale + "\"]")) { 1516 /* 1517 * Native names: match the type=”xx” of each xml file to identify the Native. E.g., type=ko if ko.xml 1518 */ 1519 return true; 1520 } 1521 } 1522 return false; 1523 } 1524 1525 /** 1526 * Is the given path a high-level currency name path in the given locale? 1527 * 1528 * E.g., //ldml/numbers/currencies/currency[@type=\"AAA\"]/displayName 1529 * if type "AAA" CORRESPONDS TO the locale or the locale is "en" 1530 * 1531 * English names (en.xml): match all types 1532 * Native: check each currency type AAA corresponding to the given locale 1533 * 1534 * Do NOT exclude "alt"; e.g., 1535 * //ldml/numbers/currencies/currency[@type="ADP"]/displayName[@alt="proposed-u167-1"] 1536 * 1537 * @param path 1538 * @param locale 1539 * @return true or false 1540 */ isHighLevelCurrencyName(String path, String locale)1541 private static boolean isHighLevelCurrencyName(String path, String locale) { 1542 if (currencyPattern.matcher(path).matches()) { 1543 if ("en".equals(locale)) { 1544 return true; 1545 } 1546 CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale); 1547 if (cvi != null) { 1548 for (String type : cvi.targetCurrencies) { 1549 if (path.contains("[@type=\"" + type + "\"]")) { 1550 return true; 1551 } 1552 } 1553 } 1554 } 1555 return false; 1556 } 1557 1558 /** 1559 * Is the given path a high-level currency code path in the given locale? 1560 * 1561 * E.g., //supplementalData/currencyData/region[@iso3166="AC"]/currency[@iso4217="SHP"][@from="1976-01-01"] 1562 * 1563 * @param path 1564 * @param locale must be "supplementalData" to match 1565 * @return true or false 1566 */ isHighLevelCurrencyCode(String path, String locale)1567 private static boolean isHighLevelCurrencyCode(String path, String locale) { 1568 if ("supplementalData".equals(locale)) { 1569 if (path.contains("iso3166")) { 1570 return true; 1571 } 1572 } 1573 return false; 1574 } 1575 1576 /** 1577 * Is the given path a high-level currency thousands-separator or decimal-separator path in the given locale? 1578 * 1579 * E.g., //ldml/numbers/currencies/currency[@type="ESP"]/group 1580 * //ldml/numbers/currencies/currency[@type="ESP"]/decimal 1581 * //ldml/numbers/currencies/currency[@type="CNY"]/symbol 1582 * //ldml/numbers/currencies/currency[@type="CNY"]/symbol[@alt="narrow"]" 1583 * 1584 * @param path 1585 * @param locale 1586 * @return true or false 1587 */ isHighLevelCurrencySeparatorOrSymbol(String path, String locale)1588 private static boolean isHighLevelCurrencySeparatorOrSymbol(String path, String locale) { 1589 if (path.startsWith("//ldml/numbers/currencies/currency") 1590 && (path.contains("group") || path.contains("decimal") || path.contains("symbol"))) { 1591 return true; 1592 } 1593 return false; 1594 } 1595 1596 /** 1597 * Is the given path a high-level weekData/firstDay in the given locale? 1598 * 1599 * E.g.,//supplementalData/weekData/firstDay[@day="fri"][@territories="MV"] 1600 * 1601 * @param path 1602 * @param locale must be "supplementalData" to match 1603 * @return true or false 1604 */ isHighLevelFirstDay(String path, String locale)1605 private static boolean isHighLevelFirstDay(String path, String locale) { 1606 if ("supplementalData".equals(locale)) { 1607 if (path.startsWith("//supplementalData/weekData/firstDay")) { 1608 return true; 1609 } 1610 } 1611 return false; 1612 } 1613 1614 /** 1615 * Is the given path a high-level weekOfPreference in the given locale? 1616 * 1617 * E.g., //supplementalData/weekData/weekOfPreference[@ordering="weekOfYear"][@locales="und"] 1618 * 1619 * @param path 1620 * @param locale must be "supplementalData" to match 1621 * @return true or false 1622 */ isHighLevelWeekOfPreference(String path, String locale)1623 private static boolean isHighLevelWeekOfPreference(String path, String locale) { 1624 if ("supplementalData".equals(locale)) { 1625 if (path.startsWith("//supplementalData/weekData/weekOfPreference")) { 1626 return true; 1627 } 1628 } 1629 return false; 1630 } 1631 1632 /** 1633 * For debugging, testing 1634 */ 1635 private static Set<String> highLevelPathMatched = null; 1636 private static boolean verboseHighLevelReporting = false; 1637 recordHighLevelMatch(String path)1638 private static void recordHighLevelMatch(String path) { 1639 if (highLevelPathMatched == null) { 1640 highLevelPathMatched = new HashSet<>(); 1641 } 1642 highLevelPathMatched.add(path); 1643 } 1644 1645 /** 1646 * For debugging, report on any paths in highLevelPaths that never matched 1647 */ reportHighLevelPathUsage()1648 private static void reportHighLevelPathUsage() { 1649 if (!verboseHighLevelReporting) { 1650 return; 1651 } 1652 if (highLevelPathMatched == null) { 1653 System.out.println("Zero high-level paths were matched!"); 1654 return; 1655 } 1656 for (String path : highLevelPaths) { 1657 if (!highLevelPathMatched.contains(path)) { 1658 System.out.println("Unmatched high-level path: " + path); 1659 } 1660 } 1661 for (String path : highLevelPathMatched) { 1662 if (!highLevelPaths.contains(path)) { 1663 System.out.println("Special matched high-level path: " + path); 1664 } 1665 } 1666 } 1667 } 1668 } 1669