• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.util.ArrayList;
7 import java.util.Arrays;
8 import java.util.Collection;
9 import java.util.Collections;
10 import java.util.HashSet;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.Map.Entry;
14 import java.util.Objects;
15 import java.util.Set;
16 import java.util.TreeMap;
17 import java.util.TreeSet;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 
21 import org.unicode.cldr.draft.FileUtilities;
22 import org.unicode.cldr.test.DisplayAndInputProcessor;
23 import org.unicode.cldr.test.SubmissionLocales;
24 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
25 import org.unicode.cldr.tool.Option.Options;
26 import org.unicode.cldr.tool.Option.Params;
27 import org.unicode.cldr.util.CLDRConfig;
28 import org.unicode.cldr.util.CLDRFile;
29 import org.unicode.cldr.util.CLDRFile.Status;
30 import org.unicode.cldr.util.CLDRPaths;
31 import org.unicode.cldr.util.CldrUtility;
32 import org.unicode.cldr.util.Counter;
33 import org.unicode.cldr.util.DtdData;
34 import org.unicode.cldr.util.DtdType;
35 import org.unicode.cldr.util.Factory;
36 import org.unicode.cldr.util.LanguageTagParser;
37 import org.unicode.cldr.util.Level;
38 import org.unicode.cldr.util.LocaleIDParser;
39 import org.unicode.cldr.util.Organization;
40 import org.unicode.cldr.util.Pair;
41 import org.unicode.cldr.util.PathHeader;
42 import org.unicode.cldr.util.PathHeader.PageId;
43 import org.unicode.cldr.util.PathStarrer;
44 import org.unicode.cldr.util.PatternCache;
45 import org.unicode.cldr.util.SimpleXMLSource;
46 import org.unicode.cldr.util.StandardCodes;
47 import org.unicode.cldr.util.SupplementalDataInfo;
48 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo;
49 import org.unicode.cldr.util.TransliteratorUtilities;
50 import org.unicode.cldr.util.XMLFileReader;
51 import org.unicode.cldr.util.XPathParts;
52 
53 import com.google.common.base.Joiner;
54 import com.google.common.base.Splitter;
55 import com.google.common.collect.Multimap;
56 import com.google.common.collect.TreeMultimap;
57 import com.ibm.icu.impl.Relation;
58 import com.ibm.icu.impl.Row.R2;
59 import com.ibm.icu.impl.Row.R3;
60 import com.ibm.icu.impl.Row.R4;
61 import com.ibm.icu.text.NumberFormat;
62 import com.ibm.icu.text.UnicodeSet;
63 import com.ibm.icu.util.ICUUncheckedIOException;
64 import com.ibm.icu.util.Output;
65 
66 public class ChartDelta extends Chart {
67     private static final boolean verbose_skipping = false;
68 
69     private static final String DEFAULT_DELTA_DIR_NAME = "delta";
70     private static final String DEFAULT_CHURN_DIR_NAME = "churn";
71 
72     private static final boolean SKIP_REFORMAT_ANNOTATIONS = ToolConstants.PREV_CHART_VERSION.compareTo("30") >= 0;
73 
74     private static final PageId DEBUG_PAGE_ID = PageId.DayPeriod;
75 
76     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = CLDRConfig.getInstance().getSupplementalDataInfo();
77 
78     private enum MyOptions {
79         fileFilter(new Params().setHelp("filter files by dir/locale, eg: ^main/en$ or .*/en").setMatch(".*")),
80         orgFilter(new Params().setHelp("filter files by organization").setMatch(".*")),
81         Vxml(new Params().setHelp("use cldr-aux for the base directory")),
82         coverageFilter(new Params().setHelp("filter files by coverage").setMatch(".*")),
83         directory(new Params().setHelp("Set the output directory name").setDefault(DEFAULT_DELTA_DIR_NAME).setMatch(".*")),
84         verbose(new Params().setHelp("verbose debugging messages")),
85         highLevelOnly(new Params().setHelp("check high-level paths (churn) only").setFlag('H')),
86         ;
87 
88         // BOILERPLATE TO COPY
89         final Option option;
90 
MyOptions(Params params)91         private MyOptions(Params params) {
92             option = new Option(this, params);
93         }
94 
95         private static Options myOptions = new Options();
96         static {
97             for (MyOptions option : MyOptions.values()) {
myOptions.add(option, option.option)98                 myOptions.add(option, option.option);
99             }
100         }
101 
parse(String[] args)102         private static Set<String> parse(String[] args) {
103             return myOptions.parse(MyOptions.values()[0], args, true);
104         }
105     }
106 
107     private final Matcher fileFilter;
108     private final String dirName; // "delta" or "churn" or set as option
109     private final String chartNameCap; // capitalized, e.g., "Delta" or "Churn"
110     private final String DIR; // full path of output folder
111     private final Level minimumPathCoverage;
112     private final boolean verbose;
113 
114     /**
115      * If true, check only high-level paths, i.e., paths for which any changes
116      * have high potential to cause disruptive "churn"
117      */
118     private final boolean highLevelOnly;
119 
main(String[] args)120     public static void main(String[] args) {
121         main(args, false);
122     }
123 
main(String[] args, boolean highLevelOnly)124     public static void main(String[] args, boolean highLevelOnly) {
125         System.out.println("use -DCHART_VERSION=36.0 -DPREV_CHART_VERSION=34.0 to generate the differences between v36 and v34.");
126         MyOptions.parse(args);
127         Matcher fileFilter = !MyOptions.fileFilter.option.doesOccur() ? null : PatternCache.get(MyOptions.fileFilter.option.getValue()).matcher("");
128         if (MyOptions.orgFilter.option.doesOccur()) {
129             if (MyOptions.fileFilter.option.doesOccur()) {
130                 throw new IllegalArgumentException("Can't have both fileFilter and orgFilter");
131             }
132             String rawOrg = MyOptions.orgFilter.option.getValue();
133             Organization org = Organization.fromString(rawOrg);
134             Set<String> locales = StandardCodes.make().getLocaleCoverageLocales(org);
135             fileFilter = PatternCache.get("^(main|annotations)/(" + Joiner.on("|").join(locales) + ")$").matcher("");
136         }
137         Level coverage = !MyOptions.coverageFilter.option.doesOccur() ? null : Level.fromString(MyOptions.coverageFilter.option.getValue());
138         boolean verbose = MyOptions.verbose.option.doesOccur();
139         if (MyOptions.highLevelOnly.option.doesOccur()) {
140             highLevelOnly = true;
141         }
142         String dirName = MyOptions.directory.option.getValue();
143         if (highLevelOnly && DEFAULT_DELTA_DIR_NAME.equals(dirName)) {
144             System.out.println("For highLevelOnly, changing directory from " + DEFAULT_DELTA_DIR_NAME
145                     + " to " + DEFAULT_CHURN_DIR_NAME);
146             dirName = DEFAULT_CHURN_DIR_NAME;
147         }
148         ChartDelta temp = new ChartDelta(fileFilter, coverage, dirName, verbose, highLevelOnly);
149         temp.writeChart(null);
150         temp.showTotals();
151         if (highLevelOnly) {
152             HighLevelPaths.reportHighLevelPathUsage();
153         }
154         System.out.println("Finished. Files may have been created in these directories:");
155         System.out.println(temp.DIR);
156         System.out.println(getTsvDir(temp.DIR, temp.dirName));
157     }
158 
ChartDelta(Matcher fileFilter, Level coverage, String dirName, boolean verbose, boolean highLevelOnly)159     private ChartDelta(Matcher fileFilter, Level coverage, String dirName, boolean verbose, boolean highLevelOnly) {
160         this.fileFilter = fileFilter;
161         this.verbose = verbose;
162         this.highLevelOnly = highLevelOnly;
163         this.dirName = dirName;
164         this.chartNameCap = dirName.substring(0, 1).toUpperCase() + dirName.substring(1);
165         this.DIR = CLDRPaths.CHART_DIRECTORY + dirName;
166         this.minimumPathCoverage = coverage;
167     }
168 
169     private static final String SEP = "\u0001";
170     private static final boolean DEBUG = false;
171     private static final String DEBUG_FILE = null; // "windowsZones.xml";
172     static Pattern fileMatcher = PatternCache.get(".*");
173 
174     static PathHeader.Factory phf = PathHeader.getFactory(ENGLISH);
175     static final Set<String> DONT_CARE = new HashSet<>(Arrays.asList("draft", "standard", "reference"));
176 
177     @Override
getDirectory()178     public String getDirectory() {
179         return DIR;
180     }
181 
182     @Override
getTitle()183     public String getTitle() {
184         return chartNameCap + " Charts";
185     }
186 
187     @Override
getFileName()188     public String getFileName() {
189         return "index";
190     }
191 
192     @Override
getExplanation()193     public String getExplanation() {
194         return "<p>Charts showing the differences from the last version. "
195             + "Titles prefixed by ¤ are special: either the locale data summary or supplemental data. "
196             + "Not all changed data is charted yet. For details see each chart.</p>";
197     }
198 
199     @Override
writeContents(FormattedFileWriter pw)200     public void writeContents(FormattedFileWriter pw) throws IOException {
201         FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors();
202         FileUtilities.copyFile(ChartDelta.class, "index.css", getDirectory());
203         FormattedFileWriter.copyIncludeHtmls(getDirectory(), true);
204         counter.clear();
205         fileCounters.clear();
206         writeNonLdmlPlain(anchors);
207         writeLdml(anchors);
208         pw.setIndex("Main Chart Index", "../index.html");
209         pw.write(anchors.toString());
210     }
211 
212     private static class PathHeaderSegment extends R3<PathHeader, Integer, String> {
PathHeaderSegment(PathHeader b, int elementIndex, String attribute)213         public PathHeaderSegment(PathHeader b, int elementIndex, String attribute) {
214             super(b, elementIndex, attribute);
215         }
216     }
217 
218     private static class PathDiff extends R4<PathHeaderSegment, String, String, String> {
PathDiff(String locale, PathHeaderSegment pathHeaderSegment, String oldValue, String newValue)219         public PathDiff(String locale, PathHeaderSegment pathHeaderSegment, String oldValue, String newValue) {
220             super(pathHeaderSegment, locale, oldValue, newValue);
221         }
222     }
223 
224     private static final CLDRFile EMPTY_CLDR = new CLDRFile(new SimpleXMLSource("und").freeze());
225 
226     private static final File CLDR_BASE_DIR = CLDRConfig.getInstance().getCldrBaseDirectory();
227 
228     private enum ChangeType {
229         added, deleted, changed, same;
get(String oldValue, String currentValue)230         public static ChangeType get(String oldValue, String currentValue) {
231             return oldValue == null ? added
232                 : currentValue == null ? deleted
233                     : oldValue.equals(currentValue) ? same
234                         : changed;
235         }
236     }
237 
238     private Counter<ChangeType> counter = new Counter<>();
239     private Map<String, Counter<ChangeType>> fileCounters = new TreeMap<>();
240     private Set<String> badHeaders = new TreeSet<>();
241 
242     /**
243      * Add the count of changed items
244      */
addChange(String file, ChangeType changeType, int count)245     private void addChange(String file, ChangeType changeType, int count) {
246         counter.add(changeType, count); // unified add
247         Counter<ChangeType> fileCounter = fileCounters.get(file);
248         if (fileCounter == null) {
249             fileCounters.put(file, fileCounter = new Counter<>());
250         }
251         fileCounter.add(changeType, count);
252     }
253 
showTotals()254     private void showTotals() {
255         try (PrintWriter pw = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_summary.tsv")) {
256             // pw.println("# percentages are of *new* total");
257             pw.print("# dir\tfile");
258             for (ChangeType item : ChangeType.values()) {
259                 pw.print("\t" + (item == ChangeType.same ? "total" : item.toString()));
260             }
261             pw.println();
262             showTotal(pw, "TOTAL/", counter);
263 
264             for (Entry<String, Counter<ChangeType>> entry : fileCounters.entrySet()) {
265                 showTotal(pw, entry.getKey(), entry.getValue());
266             }
267             for (String s : badHeaders) {
268                 pw.println(s);
269             }
270             // pw.println("# EOF");
271         } catch (IOException e) {
272             throw new ICUUncheckedIOException(e);
273         }
274     }
275 
showTotal(PrintWriter pw, String title2, Counter<ChangeType> counter2)276     private void showTotal(PrintWriter pw, String title2, Counter<ChangeType> counter2) {
277         long total = counter2.getTotal();
278         NumberFormat pf = NumberFormat.getPercentInstance();
279         pf.setMinimumFractionDigits(2);
280         NumberFormat nf = NumberFormat.getIntegerInstance();
281         pw.print(title2.replace("/", "\t"));
282         for (ChangeType item : ChangeType.values()) {
283             if (item == ChangeType.same) {
284                 pw.print("\t" + nf.format(total));
285             } else {
286                 final long current = counter2.getCount(item);
287                 pw.print("\t" + nf.format(current));
288             }
289         }
290         pw.println();
291     }
292 
293     /**
294      *
295      * @param anchors
296      * @throws IOException
297      *
298      * TODO: shorten the function using subroutines
299      */
writeLdml(Anchors anchors)300     private void writeLdml(Anchors anchors)  throws IOException {
301         try (PrintWriter tsvFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + ".tsv");
302             PrintWriter tsvCountFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_count.tsv");
303             ) {
304             tsvFile.println("# Section\tPage\tHeader\tCode\tLocale\tOld\tNew\tLevel");
305 
306             // set up factories
307             List<Factory> factories = new ArrayList<>();
308             List<Factory> oldFactories = new ArrayList<>();
309 
310             Counter<PathHeader> counts = new Counter<>();
311 
312             String dirBase = ToolConstants.getBaseDirectory(ToolConstants.CHART_VERSION);
313             String prevDirBase = ToolConstants.getBaseDirectory(ToolConstants.PREV_CHART_VERSION);
314 
315             for (String dir : DtdType.ldml.directories) {
316                 if (dir.equals("annotationsDerived") || dir.equals("casing")) {
317                     continue;
318                 }
319                 String current = dirBase + "common/" + dir;
320                 String past = prevDirBase + "common/" + dir;
321                 try {
322                     factories.add(Factory.make(current, ".*"));
323                 } catch (Exception e1) {
324                     System.out.println("Skipping: " + dir + "\t" + e1.getMessage());
325                     continue; // skip where the directories don't exist in old versions
326                 }
327                 try {
328                     oldFactories.add(Factory.make(past, ".*"));
329                 } catch (Exception e) {
330                     System.out.println("Couldn't open factory: " + past);
331                     past = null;
332                     oldFactories.add(null);
333                 }
334                 System.out.println("Will compare: " + dir + "\t\t" + current + "\t\t" + past);
335             }
336             if (factories.isEmpty()) {
337                 throw new IllegalArgumentException("No factories found for "
338                     + dirBase + ": " + DtdType.ldml.directories);
339             }
340             // get a list of all the locales to cycle over
341 
342             Relation<String, String> baseToLocales = Relation.of(new TreeMap<String, Set<String>>(), HashSet.class);
343             Matcher m = fileMatcher.matcher("");
344             Set<String> defaultContents = SDI.getDefaultContentLocales();
345             LanguageTagParser ltp = new LanguageTagParser();
346             LikelySubtags ls = new LikelySubtags();
347             for (String file : factories.get(0).getAvailable()) {
348                 if (defaultContents.contains(file)) {
349                     continue;
350                 }
351                 if (!m.reset(file).matches()) {
352                     continue;
353                 }
354                 String base = file.equals("root") ? "root" : ltp.set(ls.minimize(file)).getLanguageScript();
355                 baseToLocales.put(base, file);
356             }
357 
358             // do keyboards later
359 
360             Status currentStatus = new Status();
361             Status oldStatus = new Status();
362             Set<PathDiff> diff = new TreeSet<>();
363             Set<String> paths = new HashSet<>();
364 
365             Relation<PathHeader, String> diffAll = Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class);
366             for (Entry<String, Set<String>> baseNLocale : baseToLocales.keyValuesSet()) {
367                 String base = baseNLocale.getKey();
368                 for (int i = 0; i < factories.size(); ++i) {
369                     Factory factory = factories.get(i);
370                     Factory oldFactory = oldFactories.get(i);
371                     List<File> sourceDirs = Arrays.asList(factory.getSourceDirectories());
372                     if (sourceDirs.size() != 1) {
373                         throw new IllegalArgumentException("Internal error: expect single source dir");
374                     }
375                     File sourceDir = sourceDirs.get(0);
376                     String sourceDirLeaf = sourceDir.getName();
377                     boolean resolving = !sourceDirLeaf.contains("subdivisions")
378                         && !sourceDirLeaf.contains("transforms");
379 
380                     for (String locale : baseNLocale.getValue()) {
381                         String nameAndLocale = sourceDirLeaf + "/" + locale;
382                         if (fileFilter != null && !fileFilter.reset(nameAndLocale).find()) {
383                             if (verbose && verbose_skipping) {
384                                 System.out.println("SKIPPING: " + nameAndLocale);
385                             }
386                             continue;
387                         }
388                         if (verbose) {
389                             System.out.println(nameAndLocale);
390                         }
391                         CLDRFile current = makeWithFallback(factory, locale, resolving);
392                         CLDRFile old = makeWithFallback(oldFactory, locale, resolving);
393                         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(old);
394 
395                         if (!locale.equals("root") && current.getLocaleID().equals("root") && old.getLocaleID().equals("root")) {
396                             continue;
397                         }
398                         if (old == EMPTY_CLDR && current == EMPTY_CLDR) {
399                             continue;
400                         }
401                         if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(locale)) {
402                             continue;
403                         }
404                         paths.clear();
405                         for (String path : current.fullIterable()) {
406                             if (allowPath(locale, path)) {
407                                 paths.add(path);
408                             }
409                         }
410                         for (String path : old.fullIterable()) {
411                             if (!paths.contains(path) && allowPath(locale, path)) {
412                                 paths.add(path);
413                             }
414                         }
415 
416                         Output<String> reformattedValue = new Output<>();
417                         Output<Boolean> hasReformattedValue = new Output<>();
418 
419                         for (String path : paths) {
420                             if (path.startsWith("//ldml/identity")
421                                 || path.endsWith("/alias")
422                                 || path.startsWith("//ldml/segmentations") // do later
423                                 || path.startsWith("//ldml/rbnf") // do later
424                                 ) {
425                                 continue;
426                             }
427                             PathHeader ph = getPathHeader(path);
428                             if (ph == null) {
429                                 continue;
430                             }
431 
432                             String oldValue;
433                             String currentValue;
434 
435                             {
436                                 String sourceLocaleCurrent = current.getSourceLocaleID(path, currentStatus);
437                                 String sourceLocaleOld = getReformattedPath(oldStatus, old, path, reformattedValue, hasReformattedValue);
438 
439                                 // filter out stuff that differs at a higher level
440                                 if (!sourceLocaleCurrent.equals(locale)
441                                     && !sourceLocaleOld.equals(locale)) {
442                                     continue;
443                                 }
444                                 if (!path.equals(currentStatus.pathWhereFound)
445                                     && !path.equals(oldStatus.pathWhereFound)) {
446                                     continue;
447                                 }
448                                 // fix some incorrect cases?
449 
450                                 currentValue = current.getStringValue(path);
451                                 if (CldrUtility.INHERITANCE_MARKER.equals(currentValue)) {
452                                     currentValue = current.getBaileyValue(path, null, null);
453                                 }
454 
455                                 String oldRawValue = hasReformattedValue.value ? reformattedValue.value : old.getStringValue(path);
456                                 if (CldrUtility.INHERITANCE_MARKER.equals(oldRawValue)) {
457                                     oldRawValue = old.getBaileyValue(path, null, null);
458                                 }
459                                 // ignore differences due to old DAIP
460                                 oldValue = dontDaipValue(oldRawValue, path) ? oldRawValue : daip.processInput(path, oldRawValue, null);
461                             }
462                             if (highLevelOnly && new SuspiciousChange(oldValue, currentValue, path, locale).isDisruptive() == false) {
463                                 continue;
464                             }
465                             // handle non-distinguishing attributes
466                             addPathDiff(sourceDir, old, current, locale, ph, diff);
467 
468                             addValueDiff(sourceDir, oldValue, currentValue, locale, ph, diff, diffAll);
469                         }
470                     }
471                 }
472                 writeDiffs(anchors, base, diff, tsvFile, counts);
473                 diff.clear();
474             }
475             writeDiffs(diffAll);
476 
477             writeCounter(tsvCountFile, "Count", counts);
478         }
479     }
480 
dontDaipValue(String oldRawValue, String path)481     public boolean dontDaipValue(String oldRawValue, String path) {
482         return oldRawValue == null || path.startsWith("//ldml/collations");
483     }
484 
allowPath(String locale, String path)485     private boolean allowPath(String locale, String path) {
486         if (minimumPathCoverage != null) {
487             Level pathLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, locale);
488             if (minimumPathCoverage.compareTo(pathLevel) < 0) {
489                 return false;
490             }
491         }
492         return true;
493     }
494 
getReformattedPath(Status oldStatus, CLDRFile old, String path, Output<String> value, Output<Boolean> hasReformattedValue)495     private String getReformattedPath(Status oldStatus, CLDRFile old, String path, Output<String> value, Output<Boolean> hasReformattedValue) {
496         if (SKIP_REFORMAT_ANNOTATIONS || !path.startsWith("//ldml/annotations/")) {
497             hasReformattedValue.value = Boolean.FALSE;
498             return old.getSourceLocaleID(path, oldStatus);
499         }
500         // OLD:     <annotation cp='[��]' tts='grinning face'>face; grin</annotation>
501         // NEW:     <annotation cp="��">face | grin</annotation>
502         //          <annotation cp="��" type="tts">grinning face</annotation>
503         // from the NEW paths, get the OLD values
504         XPathParts parts = XPathParts.getFrozenInstance(path).cloneAsThawed(); // not frozen, for removeAttribute
505         boolean isTts = parts.getAttributeValue(-1, "type") != null;
506         if (isTts) {
507             parts.removeAttribute(-1, "type");
508         }
509         String cp = parts.getAttributeValue(-1, "cp");
510         parts.setAttribute(-1, "cp", "[" + cp + "]");
511 
512         String oldStylePath = parts.toString();
513         String temp = old.getStringValue(oldStylePath);
514         if (temp == null) {
515             hasReformattedValue.value = Boolean.FALSE;
516         } else if (isTts) {
517             String temp2 = old.getFullXPath(oldStylePath);
518             value.value = XPathParts.getFrozenInstance(temp2).getAttributeValue(-1, "tts");
519             hasReformattedValue.value = Boolean.TRUE;
520         } else {
521             value.value = temp.replaceAll("\\s*;\\s*", " | ");
522             hasReformattedValue.value = Boolean.TRUE;
523         }
524         return old.getSourceLocaleID(oldStylePath, oldStatus);
525     }
526 
527     PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A");
528 
getPathHeader(String path)529     private PathHeader getPathHeader(String path) {
530         try {
531             PathHeader ph = phf.fromPath(path);
532             if (ph.getPageId() == PageId.Unknown) {
533                 String star = starrer.set(path);
534                 badHeaders.add(star);
535                 return null;
536             }
537             return ph;
538         } catch (Exception e) {
539             String star = starrer.set(path);
540             badHeaders.add(star);
541             // System.err.println("Skipping path with bad PathHeader: " + path);
542             return null;
543         }
544     }
545 
makeWithFallback(Factory oldFactory, String locale, boolean resolving)546     private CLDRFile makeWithFallback(Factory oldFactory, String locale, boolean resolving) {
547         if (oldFactory == null) {
548             return EMPTY_CLDR;
549         }
550         CLDRFile old;
551         String oldLocale = locale;
552         while (true) { // fall back for old, maybe to root
553             try {
554                 old = oldFactory.make(oldLocale, resolving);
555                 break;
556             } catch (Exception e) {
557                 oldLocale = LocaleIDParser.getParent(oldLocale);
558                 if (oldLocale == null) {
559                     return EMPTY_CLDR;
560                 }
561             }
562         }
563         return old;
564     }
565 
addPathDiff(File sourceDir, CLDRFile old, CLDRFile current, String locale, PathHeader ph, Set<PathDiff> diff2)566     private void addPathDiff(File sourceDir, CLDRFile old, CLDRFile current, String locale, PathHeader ph, Set<PathDiff> diff2) {
567         String path = ph.getOriginalPath();
568         String fullPathCurrent = current.getFullXPath(path);
569         String fullPathOld = old.getFullXPath(path);
570         if (Objects.equals(fullPathCurrent, fullPathOld)) {
571             return;
572         }
573         XPathParts pathPlain = XPathParts.getFrozenInstance(path);
574         XPathParts pathCurrent = fullPathCurrent == null ? pathPlain : XPathParts.getFrozenInstance(fullPathCurrent);
575         XPathParts pathOld = fullPathOld == null ? pathPlain : XPathParts.getFrozenInstance(fullPathOld);
576         TreeSet<String> fullAttributes = null;
577         int size = pathCurrent.size();
578         String parentAndName = parentAndName(sourceDir, locale);
579         for (int elementIndex = 0; elementIndex < size; ++elementIndex) { // will have same size
580             Collection<String> distinguishing = pathPlain.getAttributeKeys(elementIndex);
581             Collection<String> attributesCurrent = pathCurrent.getAttributeKeys(elementIndex);
582             Collection<String> attributesOld = pathCurrent.getAttributeKeys(elementIndex);
583             if (attributesCurrent.isEmpty() && attributesOld.isEmpty()) {
584                 continue;
585             }
586             if (fullAttributes == null) {
587                 fullAttributes = new TreeSet<>();
588             } else {
589                 fullAttributes.clear();
590             }
591             fullAttributes.addAll(attributesCurrent);
592             fullAttributes.addAll(attributesOld);
593             fullAttributes.removeAll(distinguishing);
594             fullAttributes.removeAll(DONT_CARE);
595 
596             // at this point we only have non-distinguishing
597             for (String attribute : fullAttributes) {
598                 String attributeValueOld = pathOld.getAttributeValue(elementIndex, attribute);
599                 String attributeValueCurrent = pathCurrent.getAttributeValue(elementIndex, attribute);
600                 if (Objects.equals(attributeValueOld, attributeValueCurrent)) {
601                     addChange(parentAndName, ChangeType.same, 1);
602                     continue;
603                 }
604                 addChange(parentAndName, ChangeType.get(attributeValueOld, attributeValueCurrent), 1);
605 
606                 PathDiff row = new PathDiff(
607                     locale,
608                     new PathHeaderSegment(ph, size - elementIndex - 1, attribute),
609                     attributeValueOld,
610                     attributeValueCurrent);
611                 if (DEBUG) {
612                     System.out.println(row);
613                 }
614                 diff2.add(row);
615             }
616         }
617     }
618 
parentAndName(File sourceDir, String locale)619     private String parentAndName(File sourceDir, String locale) {
620         return sourceDir.getName() + "/" + locale + ".xml";
621     }
622 
addValueDiff(File sourceDir, String valueOld, String valueCurrent, String locale, PathHeader ph, Set<PathDiff> diff, Relation<PathHeader, String> diffAll)623     private void addValueDiff(File sourceDir, String valueOld, String valueCurrent, String locale, PathHeader ph, Set<PathDiff> diff,
624         Relation<PathHeader, String> diffAll) {
625         // handle stuff that can be split specially
626         Splitter splitter = getSplitter(ph.getOriginalPath(), valueOld, valueCurrent);
627         int count = 1;
628         String parentAndName = parentAndName(sourceDir, locale);
629         if (Objects.equals(valueCurrent, valueOld)) {
630             if (splitter != null && valueCurrent != null) {
631                 count = splitHandlingNull(splitter, valueCurrent).size();
632             }
633             addChange(parentAndName, ChangeType.same, count);
634         } else {
635             if (splitter != null) {
636                 List<String> setOld = splitHandlingNull(splitter, valueOld);
637                 List<String> setNew = splitHandlingNull(splitter, valueCurrent);
638                 int[] sameAndNotInSecond = new int[2];
639                 valueOld = getFilteredValue(setOld, setNew, sameAndNotInSecond);
640                 addChange(parentAndName, ChangeType.same, sameAndNotInSecond[0]);
641                 addChange(parentAndName, ChangeType.deleted, sameAndNotInSecond[1]);
642                 sameAndNotInSecond[0] = sameAndNotInSecond[1] = 0;
643                 valueCurrent = getFilteredValue(setNew, setOld, sameAndNotInSecond);
644                 addChange(parentAndName, ChangeType.added, sameAndNotInSecond[1]);
645             } else if (hasUnicodeSetValue(ph.getOriginalPath())) {
646                 UnicodeSet usOld = valueOld == null ? UnicodeSet.EMPTY : new UnicodeSet(valueOld);
647                 UnicodeSet usCurrent = valueCurrent == null ? UnicodeSet.EMPTY : new UnicodeSet(valueCurrent);
648                 UnicodeSet oldOnly = new UnicodeSet(usOld).removeAll(usCurrent);
649                 UnicodeSet currentOnly = new UnicodeSet(usCurrent).removeAll(usOld);
650                 addChange(parentAndName, ChangeType.same, usOld.size()-oldOnly.size());
651                 addChange(parentAndName, ChangeType.deleted, oldOnly.size());
652                 addChange(parentAndName, ChangeType.added, currentOnly.size());
653                 valueOld = usOld.size()==oldOnly.size() ? oldOnly.toPattern(false) : "…" + oldOnly + "…";
654                 valueCurrent = usCurrent.size()==currentOnly.size() ? currentOnly.toPattern(false) : "…" + currentOnly + "…";
655             } else {
656                 addChange(parentAndName, ChangeType.get(valueOld, valueCurrent), count);
657             }
658             PathDiff row = new PathDiff(locale, new PathHeaderSegment(ph, -1, ""), valueOld, valueCurrent);
659             diff.add(row);
660             diffAll.put(ph, locale);
661         }
662     }
663 
hasUnicodeSetValue(String xpath)664     private boolean hasUnicodeSetValue(String xpath) {
665         return xpath.startsWith("//ldml/characters/exemplar");
666     }
667 
splitHandlingNull(Splitter splitter, String value)668     private List<String> splitHandlingNull(Splitter splitter, String value) {
669         return value == null ? null : splitter.splitToList(value);
670     }
671 
getSplitter(String path, String valueOld, String valueCurrent)672     private Splitter getSplitter(String path, String valueOld, String valueCurrent) {
673         if (path.contains("/annotation") && !path.contains("tts")) {
674             return DtdData.BAR_SPLITTER;
675         } else if (valueOld != null && valueOld.contains("\n") || valueCurrent != null && valueCurrent.contains("\n")) {
676             return DtdData.CR_SPLITTER;
677         } else {
678             return null;
679         }
680     }
681 
682     /**
683      * Return string with all lines from linesToRemove removed
684      * @param toGetStringFor
685      * @param linesToRemove
686      * @return
687      */
getFilteredValue(Collection<String> toGetStringFor, Collection<String> linesToRemove, int[] sameAndDiff)688     private String getFilteredValue(Collection<String> toGetStringFor, Collection<String> linesToRemove,
689         int[] sameAndDiff) {
690         if (toGetStringFor == null) {
691             return null;
692         }
693         StringBuilder buf = new StringBuilder();
694         Set<String> toRemove = linesToRemove == null ? Collections.emptySet() : new HashSet<>(linesToRemove);
695         boolean removed = false;
696         for (String old : toGetStringFor) {
697             if (toRemove.contains(old)) {
698                 removed = true;
699                 sameAndDiff[0]++;
700             } else {
701                 sameAndDiff[1]++;
702                 if (removed) {
703                     buf.append("…\n");
704                     removed = false;
705                 }
706                 buf.append(old).append('\n');
707             }
708         }
709         if (removed) {
710             buf.append("…");
711         } else if (buf.length() > 0) {
712             buf.setLength(buf.length() - 1); // remove final \n
713         }
714         return buf.toString();
715     }
716 
writeDiffs(Anchors anchors, String file, String title, Multimap<PathHeader, String> bcp, PrintWriter tsvFile)717     private void writeDiffs(Anchors anchors, String file, String title, Multimap<PathHeader, String> bcp, PrintWriter tsvFile) {
718         if (bcp.isEmpty()) {
719             System.out.println("\tDeleting: " + DIR + "/" + file);
720             new File(DIR + file).delete();
721             return;
722         }
723         TablePrinter tablePrinter = new TablePrinter()
724             .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
725             .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)//.setRepeatDivider(true)
726             .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
727             .addColumn("Code", "class='source'", null, "class='source'", false)
728             .addColumn("Old", "class='target'", null, "class='target'", false) //  width='20%'
729             .addColumn("New", "class='target'", null, "class='target'", false); //  width='20%'
730         PathHeader ph1 = phf.fromPath("//supplementalData/metadata/alias/subdivisionAlias[@type=\"TW-TXQ\"]/_reason");
731         PathHeader ph2 = phf.fromPath("//supplementalData/metadata/alias/subdivisionAlias[@type=\"LA-XN\"]/_replacement");
732         ph1.compareTo(ph2);
733         for (Entry<PathHeader, Collection<String>> entry : bcp.asMap().entrySet()) {
734             PathHeader ph = entry.getKey();
735             if (ph.getPageId() == DEBUG_PAGE_ID) {
736                 System.out.println(ph + "\t" + ph.getOriginalPath());
737             }
738             for (String value : entry.getValue()) {
739                 String[] oldNew = value.split(SEP);
740                 tablePrinter.addRow()
741                 .addCell(ph.getSectionId())
742                 .addCell(ph.getPageId())
743                 .addCell(ph.getHeader())
744                 .addCell(ph.getCode())
745                 .addCell(oldNew[0])
746                 .addCell(oldNew[1])
747                 .finishRow();
748             }
749         }
750         writeTable(anchors, file, tablePrinter, title, tsvFile);
751     }
752 
writeDiffs(Relation<PathHeader, String> diffAll)753     private void writeDiffs(Relation<PathHeader, String> diffAll) {
754         TablePrinter tablePrinter = new TablePrinter()
755             .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
756             .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
757             .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
758             .addColumn("Code", "class='source'", null, "class='source'", true)
759             .addColumn("Locales where different", "class='target'", null, "class='target'", true);
760         for (Entry<PathHeader, Set<String>> row : diffAll.keyValuesSet()) {
761             PathHeader ph = row.getKey();
762             Set<String> locales = row.getValue();
763             tablePrinter.addRow()
764             .addCell(ph.getSectionId())
765             .addCell(ph.getPageId())
766             .addCell(ph.getHeader())
767             .addCell(ph.getCode())
768             .addCell(Joiner.on(" ").join(locales))
769             .finishRow();
770         }
771     }
772 
writeDiffs(Anchors anchors, String file, Set<PathDiff> diff, PrintWriter tsvFile, Counter<PathHeader> counts)773     private void writeDiffs(Anchors anchors, String file, Set<PathDiff> diff, PrintWriter tsvFile, Counter<PathHeader> counts) {
774         if (diff.isEmpty()) {
775             return;
776         }
777         TablePrinter tablePrinter = new TablePrinter()
778             .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
779             .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
780             .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
781             .addColumn("Code", "class='source'", null, "class='source'", true)
782             .addColumn("Locale", "class='source'", null, "class='source'", true)
783             .addColumn("Old", "class='target'", null, "class='target'", true) //  width='20%'
784             .addColumn("New", "class='target'", null, "class='target'", true) //  width='20%'
785             .addColumn("Level", "class='target'", null, "class='target'", true);
786 
787         for (PathDiff row : diff) {
788             PathHeaderSegment phs = row.get0();
789             counts.add(phs.get0(), 1);
790             String locale = row.get1();
791             String oldValue = row.get2();
792             String currentValue = row.get3();
793 
794             PathHeader ph = phs.get0();
795             Integer pathIndex = phs.get1();
796             String attribute = phs.get2();
797             String specialCode = ph.getCode();
798 
799             if (!attribute.isEmpty()) {
800                 specialCode += "_" + attribute;
801                 if (pathIndex != 0) {
802                     specialCode += "|" + pathIndex;
803                 }
804             }
805             Level coverageLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(ph.getOriginalPath(), locale);
806             String fixedOldValue = oldValue == null ? "▷missing◁" : TransliteratorUtilities.toHTML.transform(oldValue);
807             String fixedNewValue = currentValue == null ? "▷removed◁" : TransliteratorUtilities.toHTML.transform(currentValue);
808 
809             tablePrinter.addRow()
810             .addCell(ph.getSectionId())
811             .addCell(ph.getPageId())
812             .addCell(ph.getHeader())
813             .addCell(specialCode)
814             .addCell(locale)
815             .addCell(fixedOldValue)
816             .addCell(fixedNewValue)
817             .addCell(coverageLevel)
818             .finishRow();
819 
820         }
821         String title = ENGLISH.getName(file) + " " + chartNameCap;
822         writeTable(anchors, file, tablePrinter, title, tsvFile);
823 
824         diff.clear();
825     }
826 
827     private class ChartDeltaSub extends Chart {
828         private String title;
829         private String file;
830         private TablePrinter tablePrinter;
831         private PrintWriter tsvFile;
832 
ChartDeltaSub(String title, String file, TablePrinter tablePrinter, PrintWriter tsvFile)833         private ChartDeltaSub(String title, String file, TablePrinter tablePrinter, PrintWriter tsvFile) {
834             super();
835             this.title = title;
836             this.file = file;
837             this.tablePrinter = tablePrinter;
838             this.tsvFile = tsvFile;
839         }
840 
841         @Override
getDirectory()842         public String getDirectory() {
843             return DIR;
844         }
845 
846         @Override
getShowDate()847         public boolean getShowDate() {
848             return false;
849         }
850 
851         @Override
getTitle()852         public String getTitle() {
853             return title;
854         }
855 
856         @Override
getFileName()857         public String getFileName() {
858             return file;
859         }
860 
861         @Override
getExplanation()862         public String getExplanation() {
863             return "<p>Lists data fields that differ from the last major version (see versions above)."
864                 + " Inherited differences in locales are suppressed, except where the source locales are different. "
865                 + "<p>";
866         }
867 
868         @Override
writeContents(FormattedFileWriter pw)869         public void writeContents(FormattedFileWriter pw) throws IOException {
870             pw.write(tablePrinter.toTable());
871             tablePrinter.toTsv(tsvFile);
872         }
873     }
874 
writeTable(Anchors anchors, String file, TablePrinter tablePrinter, String title, PrintWriter tsvFile)875     private void writeTable(Anchors anchors, String file, TablePrinter tablePrinter, String title, PrintWriter tsvFile) {
876         ChartDeltaSub chartDeltaSub = new ChartDeltaSub(title, file, tablePrinter, tsvFile);
877         chartDeltaSub.writeChart(anchors);
878     }
879 
writeNonLdmlPlain(Anchors anchors)880     private void writeNonLdmlPlain(Anchors anchors) throws IOException {
881         try (PrintWriter tsvFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_supp.tsv");
882             PrintWriter tsvCountFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_supp_count.tsv");
883             ) {
884             tsvFile.println("# Section\tPage\tHeader\tCode\tOld\tNew");
885 
886             Multimap<PathHeader, String> bcp = TreeMultimap.create();
887             Multimap<PathHeader, String> supplemental = TreeMultimap.create();
888             Multimap<PathHeader, String> transforms = TreeMultimap.create();
889 
890             Counter<PathHeader> countSame = new Counter<>();
891             Counter<PathHeader> countAdded = new Counter<>();
892             Counter<PathHeader> countDeleted = new Counter<>();
893 
894             for (String dir : new File(CLDRPaths.BASE_DIRECTORY + "common/").list()) {
895                 if (DtdType.ldml.directories.contains(dir)
896                     || dir.equals(".DS_Store")
897                     || dir.equals("dtd") // TODO as flat files
898                     || dir.equals("properties") // TODO as flat files
899                     || dir.equals("uca") // TODO as flat files
900                     ) {
901                     continue;
902                 }
903                 File dirOld = new File(PREV_CHART_VERSION_DIRECTORY + "common/" + dir);
904                 System.out.println("\tLast dir: " + dirOld);
905                 File dir2 = new File(CHART_VERSION_DIRECTORY + "common/" + dir);
906                 System.out.println("\tCurr dir: " + dir2);
907 
908                 for (String file : dir2.list()) {
909                     if (!file.endsWith(".xml")) {
910                         continue;
911                     }
912                     String parentAndFile = dir + "/" + file;
913                     String base = file.substring(0, file.length() - 4);
914                     if (fileFilter != null && !fileFilter.reset(dir + "/" + base).find()) {
915                         if (verbose) { //  && verbose_skipping
916                             System.out.println("SKIPPING: " + dir + "/" + base);
917                         }
918                         continue;
919                     }
920                     if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(base)) {
921                         continue;
922                     }
923                     if (verbose) {
924                         System.out.println(file);
925                     }
926                     Relation<PathHeader, String> contentsOld = fillData(dirOld.toString() + "/", file, base);
927                     Relation<PathHeader, String> contents2 = fillData(dir2.toString() + "/", file, base);
928 
929                     Set<PathHeader> keys = new TreeSet<>(CldrUtility.ifNull(contentsOld.keySet(), Collections.<PathHeader> emptySet()));
930                     keys.addAll(CldrUtility.ifNull(contents2.keySet(), Collections.<PathHeader> emptySet()));
931                     DtdType dtdType = null;
932                     for (PathHeader key : keys) {
933                         String originalPath = key.getOriginalPath();
934                         if (highLevelOnly && !HighLevelPaths.pathIsHighLevel(originalPath, base)) {
935                             continue;
936                         }
937                         boolean isTransform = originalPath.contains("/tRule");
938                         if (dtdType == null) {
939                             dtdType = DtdType.fromPath(originalPath);
940                         }
941                         Multimap<PathHeader, String> target = dtdType == DtdType.ldmlBCP47 ? bcp
942                             : isTransform ? transforms
943                                 : supplemental;
944                         Set<String> setOld = contentsOld.get(key);
945                         Set<String> set2 = contents2.get(key);
946 
947                         if (Objects.equals(setOld, set2)) {
948                             if (file.equals(DEBUG_FILE)) { // for debugging
949                                 System.out.println("**Same: " + key + "\t" + setOld);
950                             }
951                             addChange(parentAndFile, ChangeType.same, setOld.size());
952                             countSame.add(key, 1);
953                             continue;
954                         }
955                         if (setOld == null) {
956                             addChange(parentAndFile, ChangeType.added, set2.size());
957                             for (String s : set2) {
958                                 addRow(target, key, "▷missing◁", s);
959                                 countAdded.add(key, 1);
960                             }
961                         } else if (set2 == null) {
962                             addChange(parentAndFile, ChangeType.deleted, setOld.size());
963                             for (String s : setOld) {
964                                 addRow(target, key, s, "▷removed◁");
965                                 countDeleted.add(key, 1);
966                             }
967                         } else {
968                             Set<String> s1MOld = setOld;
969                             Set<String> s2M1 = set2;
970                             if (s1MOld.isEmpty()) {
971                                 addRow(target, key, "▷missing◁", Joiner.on(", ").join(s2M1));
972                                 addChange(parentAndFile, ChangeType.added, s2M1.size());
973                                 countAdded.add(key, 1);
974                             } else if (s2M1.isEmpty()) {
975                                 addRow(target, key, Joiner.on(", ").join(s1MOld), "▷removed◁");
976                                 addChange(parentAndFile, ChangeType.deleted, s1MOld.size());
977                                 countDeleted.add(key, 1);
978                             } else {
979                                 String valueOld;
980                                 String valueCurrent;
981 
982                                 int[] sameAndNotInSecond = new int[2];
983                                 valueOld = getFilteredValue(s1MOld, s1MOld, sameAndNotInSecond);
984                                 addChange(parentAndFile, ChangeType.same, sameAndNotInSecond[0]);
985                                 countSame.add(key, 1);
986                                 addChange(parentAndFile, ChangeType.deleted, sameAndNotInSecond[1]);
987                                 sameAndNotInSecond[1] = 0;
988                                 countDeleted.add(key, 1);
989                                 valueCurrent = getFilteredValue(s2M1, s1MOld, sameAndNotInSecond);
990                                 addChange(parentAndFile, ChangeType.added, sameAndNotInSecond[1]);
991                                 addRow(target, key, valueOld, valueCurrent);
992                                 countAdded.add(key, 1);
993                             }
994                         }
995                     }
996                 }
997             }
998             writeDiffs(anchors, "bcp47", "¤¤BCP47 " + chartNameCap, bcp, tsvFile);
999             writeDiffs(anchors, "supplemental-data", "¤¤Supplemental " + chartNameCap, supplemental, tsvFile);
1000             writeDiffs(anchors, "transforms", "¤¤Transforms " + chartNameCap, transforms, tsvFile);
1001 
1002             writeCounter(tsvCountFile, "CountSame", countSame);
1003             tsvCountFile.println();
1004             writeCounter(tsvCountFile, "CountAdded", countAdded);
1005             tsvCountFile.println();
1006             writeCounter(tsvCountFile, "CountDeleted", countDeleted);
1007 
1008             //tsvFile.println("# EOF");
1009             //tsvCountFile.println("# EOF");
1010         }
1011     }
1012 
writeCounter(PrintWriter tsvFile, String title, Counter<PathHeader> countDeleted)1013     private void writeCounter(PrintWriter tsvFile, String title, Counter<PathHeader> countDeleted) {
1014         tsvFile.append("# "
1015             + title
1016             + "\tSection\tPage\tSubhead\tCode\n\n");
1017         for (R2<Long, PathHeader> entry : countDeleted.getEntrySetSortedByCount(false, null)) {
1018             tsvFile.println(entry.get0() + "\t" + entry.get1());
1019         }
1020     }
1021 
addRow(Multimap<PathHeader, String> target, PathHeader key, String oldItem, String newItem)1022     private void addRow(Multimap<PathHeader, String> target, PathHeader key, String oldItem, String newItem) {
1023         if (oldItem.isEmpty() || newItem.isEmpty()) {
1024             throw new IllegalArgumentException();
1025         }
1026         target.put(key, oldItem + SEP + newItem);
1027     }
1028 
1029     /**
1030      * Fill in the chart data for the specified file
1031      *
1032      * @param directory
1033      * @param file like "xx.xml" where "xx" may be a locale name
1034      * @param fileBase like "xx", same as file without ".xml"
1035      * @return the Relation
1036      */
fillData(String directory, String file, String fileBase)1037     private Relation<PathHeader, String> fillData(String directory, String file, String fileBase) {
1038         Relation<PathHeader, String> results = Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class);
1039 
1040         List<Pair<String, String>> contents1;
1041         try {
1042             contents1 = XMLFileReader.loadPathValues(directory + file, new ArrayList<Pair<String, String>>(), true);
1043         } catch (Exception e) {
1044             /*
1045              * This happens with e = ICUException, file = grammaticalFeatures.xml in cldr-36.0
1046              */
1047             return results;
1048         }
1049         DtdType dtdType = null;
1050         DtdData dtdData = null;
1051         Multimap<String, String> extras = TreeMultimap.create();
1052 
1053         for (Pair<String, String> s : contents1) {
1054             String path = s.getFirst();
1055             if (highLevelOnly && !HighLevelPaths.pathIsHighLevel(path, fileBase /* locale, or not */)) {
1056                 continue;
1057             }
1058             String value = s.getSecond();
1059             if (dtdType == null) {
1060                 /*
1061                  * Note: although dtdType and dtdData depend on path, they are the same for all paths
1062                  * in the same file, so they only need to be set the first time through this loop.
1063                  *
1064                  * Note: the current DTD in CLDR_BASE_DIR is supposed to be backward-compatible, that is, to support
1065                  * paths from all archived versions. Any exception to that rule (e.g., for "grammaticalState") is a bug.
1066                  */
1067                 dtdType = DtdType.fromPath(path);
1068                 dtdData = DtdData.getInstance(dtdType, CLDR_BASE_DIR);
1069             }
1070             XPathParts pathPlain = XPathParts.getFrozenInstance(path);
1071             try {
1072                 if (dtdData.isMetadata(pathPlain)) {
1073                     continue;
1074                 }
1075             } catch (NullPointerException e) {
1076                 /*
1077                  * TODO: this happens for "grammaticalState" in this path from version 37:
1078                  * //supplementalData/grammaticalData/grammaticalFeatures[@targets="nominal"][@locales="he"]/grammaticalState[@values="definite indefinite construct"]
1079                  * Reference: https://unicode-org.atlassian.net/browse/CLDR-13306
1080                  */
1081                 System.out.println("Caught NullPointerException in fillData calling isMetadata, path = " + path);
1082                 continue;
1083             }
1084             Set<String> pathForValues = dtdData.getRegularizedPaths(pathPlain, extras);
1085             if (pathForValues != null) {
1086                 for (String pathForValue : pathForValues) {
1087                     PathHeader pathHeader = phf.fromPath(pathForValue);
1088                     if (pathHeader.getPageId() == PageId.Suppress) {
1089                         continue;
1090                     }
1091                     Splitter splitter = DtdData.getValueSplitter(pathPlain);
1092                     for (String line : splitter.split(value)) {
1093                         // special case # in transforms
1094                         if (isComment(pathPlain, line)) {
1095                             continue;
1096                         }
1097                         results.put(pathHeader, line);
1098                     }
1099                 }
1100             }
1101             for (Entry<String, Collection<String>> entry : extras.asMap().entrySet()) {
1102                 final String extraPath = entry.getKey();
1103                 final PathHeader pathHeaderExtra = phf.fromPath(extraPath);
1104                 if (pathHeaderExtra.getPageId() == PageId.Suppress) {
1105                     continue;
1106                 }
1107                 final Collection<String> extraValue = entry.getValue();
1108                 if (isExtraSplit(extraPath)) {
1109                     for (String items : extraValue) {
1110                         results.putAll(pathHeaderExtra, DtdData.SPACE_SPLITTER.splitToList(items));
1111                     }
1112                 } else {
1113                     results.putAll(pathHeaderExtra, extraValue);
1114                 }
1115             }
1116             if (pathForValues == null && !value.isEmpty()) {
1117                 System.err.println("Shouldn't happen");
1118             }
1119         }
1120         return results;
1121     }
1122 
isExtraSplit(String extraPath)1123     private boolean isExtraSplit(String extraPath) {
1124         if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) {
1125             return true;
1126         }
1127         return false;
1128     }
1129 
isComment(XPathParts pathPlain, String line)1130     private static boolean isComment(XPathParts pathPlain, String line) {
1131         if (pathPlain.contains("transform")) {
1132             if (line.startsWith("#")) {
1133                 return true;
1134             }
1135         }
1136         return false;
1137     }
1138 
1139     /**
1140      * Determine when changes to the values for paths should be treated as
1141      * potentially "disruptive" for the purpose of "churn" reporting
1142      */
1143     private class SuspiciousChange {
1144         /**
1145          * the old and new values, such as "HH:mm–HH:mm v" and "HH:mm – HH:mm v"
1146          */
1147         private String oldValue, newValue;
1148 
1149         /**
1150          * the path, such as //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id="Hmv"]/greatestDifference[@id="H"]
1151          */
1152         private String path;
1153 
1154         /**
1155          * the locale (such as "doi") in which the path was found, or null, or possibly
1156          * the base file name without extension, like "xx" if the file name is "xx.xml",
1157          * where "xx" may or may not be a locale; e.g., "supplementalData"
1158          */
1159         private String locale;
1160 
SuspiciousChange(String oldValue, String newValue, String path, String locale)1161         SuspiciousChange(String oldValue, String newValue, String path, String locale) {
1162             this.oldValue = oldValue;
1163             this.newValue = newValue;
1164             this.path = path;
1165             this.locale = locale;
1166         }
1167 
1168         /**
1169          * Is the change from the old value to the new value, for this path and locale, potentially disruptive?
1170          *
1171          * @return true or false
1172          */
isDisruptive()1173         public boolean isDisruptive() {
1174             /*
1175              * OR, not AND: certain changes in value are disruptive even for paths not
1176              * otherwise treated as high-level, and changes for high-level paths are
1177              * disruptive even if the changes in values themselves are not identified
1178              * as disruptive.
1179              */
1180             return valueChangeIsDisruptive() || HighLevelPaths.pathIsHighLevel(path, locale);
1181         }
1182 
1183         /**
1184          * Is the change from the old value to the current value potentially disruptive, based (primarily) on
1185          * the values themselves?
1186          *
1187          * @return true or false
1188          */
valueChangeIsDisruptive()1189         private boolean valueChangeIsDisruptive() {
1190             if (oldValue == null || newValue == null || oldValue.equals(newValue)) {
1191                 return false;
1192             }
1193             if (valueChangeIsDisruptiveWhitespaceOnly()) {
1194                 return true;
1195             }
1196             return false;
1197         }
1198 
1199         /**
1200          * Is the change disruptive whitespace only?
1201          * Per design doc, "Format changes: second to none on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP
1202          * or NBSP -> Narrow NBSP. Or adding a space somewhere in the format where previously there was none."
1203          *
1204          * @return true or false
1205          */
valueChangeIsDisruptiveWhitespaceOnly()1206         private boolean valueChangeIsDisruptiveWhitespaceOnly() {
1207             /*
1208              * annotations often have changes like "pop gorn", "popgorn", not treated as disruptive
1209              */
1210             if (path.startsWith("//ldml/annotations")) {
1211                 return false;
1212             }
1213             if (removeWhitespace(oldValue).equals(removeWhitespace(newValue))) {
1214                 return true;
1215             }
1216             return false;
1217         }
1218 
1219         /**
1220          * Remove whitespace from the given string
1221          *
1222          * Remove whitespace as defined by regex \s, and also
1223          * U+00A0 NO-BREAK SPACE
1224          * U+2007 FIGURE SPACE
1225          * U+202F NARROW NO-BREAK SPACE
1226          *
1227          * @param s the string
1228          * @return the modified string
1229          */
removeWhitespace(String s)1230         private String removeWhitespace(String s) {
1231             return s.replaceAll("[\\s\\u00A0\\u2007\\u202F]", "");
1232         }
1233     }
1234 
1235     /**
1236      * Determine which paths are considered "high-level" paths, i.e.,
1237      * paths for which any changes have high potential to cause disruptive "churn".
1238      * Whether a path is high-level sometimes depends on the locale or xml file in
1239      * which it occurs.
1240      * Some paths are high-level regardless of the locale in which they are located.
1241      * Other paths are high-level for some locales but not others. For example,
1242      *    //ldml/localeDisplayNames/languages/language[@type="xx"]
1243      * is high level in locale "xx", and maybe "en", but not for all locales.
1244      */
1245     private static class HighLevelPaths {
1246         /**
1247          * A set of paths to be treated as "high-level".
1248          * These are complete paths to be matched exactly.
1249          * Other paths are recognized by special functions like isHighLevelTerritoryName.
1250          *
1251          * The ordering and comments are based on the design spec.
1252          */
1253         final private static Set<String> highLevelPaths = new HashSet<>(Arrays.asList(
1254             /*
1255              * Core data
1256              */
1257             "//ldml/characters/exemplarCharacters",
1258             "//ldml/numbers/defaultNumberingSystem",
1259             "//ldml/numbers/otherNumberingSystems/native",
1260             /*
1261              * Territory and Language names
1262              *  Country/Region names (English and Native names) -- see isHighLevelTerritoryName
1263              *   //ldml/localeDisplayName/territories/territory/...
1264              *  Language names (English and Native) -- see isHighLevelLangName
1265              *   //ldml/localeDisplayNames/languages/language/...
1266              */
1267             /*
1268              * Date
1269              * Note: "year", "month", etc., below, form a subset (eight) of all possible values for type,
1270              * excluding, for example, "fri" and "zone". If we use starred paths, we would need further complication
1271              * to filter out "fri", "zone", etc.
1272              */
1273             "//ldml/dates/fields/field[@type=\"year\"]/displayName",
1274             "//ldml/dates/fields/field[@type=\"month\"]/displayName",
1275             "//ldml/dates/fields/field[@type=\"week\"]/displayName",
1276             "//ldml/dates/fields/field[@type=\"day\"]/displayName",
1277             "//ldml/dates/fields/field[@type=\"hour\"]/displayName",
1278             "//ldml/dates/fields/field[@type=\"era\"]/displayName",
1279             "//ldml/dates/fields/field[@type=\"minute\"]/displayName",
1280             "//ldml/dates/fields/field[@type=\"second\"]/displayName",
1281             /*
1282              * First day of week: firstDay in supplementalData.xml; see isHighLevelFirstDay
1283              * First week of year: see isHighLevelWeekOfPreference
1284              */
1285             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"full\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1286             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"long\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1287             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"medium\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1288             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"short\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1289             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MMMEd\"]",
1290             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MEd\"]",
1291             /*
1292              * Time
1293              */
1294             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"full\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1295             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"long\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1296             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"medium\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1297             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"short\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1298             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"am\"]",
1299             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"am\"]",
1300             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"pm\"]",
1301             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"pm\"]",
1302             /*
1303              * Currency (English and Native) -- see isHighLevelCurrencyName
1304              * E.g., //ldml/numbers/currencies/currency[@type=\"KRW\"]/displayName"
1305              *
1306              * ISO Currency Code: SupplementalData.xml match <region iso3166> -- see isHighLevelCurrencyCode
1307              */
1308             /*
1309              * Currency Formats
1310              *  a. Currency thousand separator
1311              *  b. Currency decimal separator
1312              *  c. Currency Symbol //ldml/numbers/currencies/currency[@type="CNY"]/symbol
1313              *  d. Currency Symbol Narrow //ldml/numbers/currencies/currency[@type=\"CNY\"]/symbol[@alt=\"narrow\"]"
1314              *
1315              * See isHighLevelCurrencySeparatorOrSymbol
1316              */
1317             "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1318             "//ldml/numbers/currencyFormats[@numberSystem=\"arab\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1319             /*
1320              * Number Symbols
1321              */
1322             "//ldml/numbers/minimumGroupingDigits",
1323             "//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal",
1324             "//ldml/numbers/symbols[@numberSystem=\"latn\"]/group",
1325             "//ldml/numbers/symbols[@numberSystem=\"arab\"]/decimal",
1326             "//ldml/numbers/symbols[@numberSystem=\"arab\"]/group",
1327             /*
1328              * Number formats
1329              */
1330             "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1331             "//ldml/numbers/percentFormats[@numberSystem=\"latn\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1332             "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"accounting\"]/pattern[@type=\"standard\"]",
1333             "//ldml/numbers/decimalFormats[@numberSystem=\"arab\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1334             "//ldml/numbers/percentFormats[@numberSystem=\"arab\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"
1335             /*
1336              * "Complementary Observations"
1337              */
1338             /*
1339              * Changes to language aliases (supplementalMetaData) -- see isHighLevelLangAlias
1340              * E.g., //supplementalData/metadata/alias/languageAlias[@type="aar"]
1341              */
1342             /*
1343              * Changes in the containment graph -- see isHighLevelTerritoryContainment
1344              * Data mostly (or entirely?) from M49 standard, thus CLDR has limited control.
1345              * Users use the containment graph in a variety of ways.
1346              * E.g., //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"]
1347              */
1348             /*
1349              * Format changes: second to none on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP
1350              *  or NBSP -> Narrow NBSP. Or adding a space somewhere in the format where previously there was none.
1351              *  -- see SuspiciousChange.valueChangeIsDisruptiveWhitespaceOnly
1352              */
1353             /*
1354              * TODO: per design doc, "Adding a timezone"
1355              * TODO: per design doc, "Changes of symbols or codes that are cross-locale in some way such as the unknown
1356              *  currency symbol change '???' -> '¤'."
1357              * TODO: per design doc, "Change in character properties (not a CLDR but a Unicode change), and here especially
1358              *  newly adding or removing punctuation. Frequently irritates parsers."
1359              */
1360         ));
1361 
1362         static Pattern currencyPattern = Pattern.compile("^//ldml/numbers/currencies/currency.*/displayName.*");
1363 
1364         /**
1365          * Should the given path in the given locale be taken into account for generating "churn" reports?
1366          *
1367          * @param path the path of interest
1368          * @param locale the locale in which the path was found, or null, or possibly
1369          *     the base file name without extension, like "xx" if the file name is "xx.xml",
1370          *     where "xx" may or may not be a locale; e.g., "supplementalData"
1371          * @return true if it counts, else false to ignore
1372          */
pathIsHighLevel(String path, String locale)1373         private static boolean pathIsHighLevel(String path, String locale) {
1374             if (path == null || locale == null) {
1375                 return false;
1376             }
1377             if (!localeIsHighLevel(locale)) { // for efficiency, this should be caught at a higher level
1378                 System.out.println("locale [" + locale + "] failed localeIsHighLevel in pathIsHighLevel; path = " + path);
1379                 return false;
1380             }
1381             if (pathIsReallyHighLevel(path, locale)) {
1382                 if (verboseHighLevelReporting) {
1383                     recordHighLevelMatch(path);
1384                 }
1385                 return true;
1386             }
1387             return false;
1388         }
1389 
pathIsReallyHighLevel(String path, String locale)1390         private static boolean pathIsReallyHighLevel(String path, String locale) {
1391             if (highLevelPaths.contains(path)) {
1392                 return true;
1393             } else if (isHighLevelTerritoryName(path, locale)) {
1394                 return true;
1395             } else if (isHighLevelLangName(path, locale)) {
1396                 return true;
1397             } else if (isHighLevelCurrencyName(path, locale)) {
1398                 return true;
1399             } else if (isHighLevelCurrencyCode(path, locale)) {
1400                 return true;
1401             } else if (isHighLevelCurrencySeparatorOrSymbol(path, locale)) {
1402                 return true;
1403             } else if (isHighLevelLangAlias(path, locale)) {
1404                 return true;
1405             } else if (isHighLevelTerritoryContainment(path, locale)) {
1406                 return true;
1407             } else if (isHighLevelFirstDay(path, locale)) {
1408                 return true;
1409             } else if (isHighLevelWeekOfPreference(path, locale)) {
1410                 return true;
1411             }
1412             return false;
1413         }
1414 
1415         /**
1416          * Is the given locale, or base name, to be considered for "high level" churn report?
1417          *
1418          * @param locale the locale string, or base name like "supplementalData" as in "supplementalData.xml"
1419          * @return true or false
1420          */
localeIsHighLevel(String locale)1421         private static boolean localeIsHighLevel(String locale) {
1422             return SubmissionLocales.CLDR_OR_HIGH_LEVEL_LOCALES.contains(locale)
1423                 || "supplementalData".equals(locale);
1424         }
1425 
1426         /**
1427          * Changes to language aliases (supplemental metadata)
1428          * E.g., //supplementalData/metadata/alias/languageAlias[@type="aar"]
1429          *
1430          * @param path
1431          * @param locale must be "supplementalData" to match
1432          * @return true or false
1433          */
isHighLevelLangAlias(String path, String locale)1434         private static boolean isHighLevelLangAlias(String path, String locale) {
1435             if ("supplementalData".equals(locale)) {
1436                 if (path.startsWith("//supplementalData/metadata/alias/languageAlias")) {
1437                     return true;
1438                 }
1439             }
1440             return false;
1441         }
1442 
1443         /**
1444          * Changes in the containment graph
1445          * Data mostly (or entirely?) from M49 standard, thus CLDR has limited control.
1446          * Users use the containment graph in a variety of ways.
1447          * E.g., //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"]
1448          *
1449          * @param path
1450          * @param locale must be "supplementalData" to match
1451          * @return true or false
1452          */
isHighLevelTerritoryContainment(String path, String locale)1453         private static boolean isHighLevelTerritoryContainment(String path, String locale) {
1454             if ("supplementalData".equals(locale)) {
1455                 if (path.startsWith("//supplementalData/territoryContainment")) {
1456                     return true;
1457                 }
1458             }
1459             return false;
1460         }
1461 
1462         /**
1463          * Is the given path a high-level territory name path in the given locale?
1464          *
1465          * E.g., //ldml/localeDisplayNames/territories/territory[@type="NNN"]
1466          * if type "NNN" CORRESPONDS TO the locale or the locale is "en"
1467          *
1468          * English names (en.xml): match all types
1469          * Native: check each territory type NNN corresponding to the given locale
1470          *
1471          * Exclude "alt"
1472          *
1473          * @param path
1474          * @param locale
1475          * @return true or false
1476          */
isHighLevelTerritoryName(String path, String locale)1477         private static boolean isHighLevelTerritoryName(String path, String locale) {
1478             if (path.startsWith("//ldml/localeDisplayNames/territories/territory")
1479                     && !path.contains("[@alt=")) {
1480                 if ("en".equals(locale)) {
1481                     return true;
1482                 }
1483                 CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale);
1484                 if (cvi != null) {
1485                     for (String type : cvi.targetTerritories) {
1486                         if (path.contains("[@type=\"" + type + "\"]")) {
1487                             return true;
1488                         }
1489                     }
1490                 }
1491             }
1492             return false;
1493         }
1494 
1495         /**
1496          * Is the given path a high-level language name path in the given locale?
1497          *
1498          * E.g., //ldml/localeDisplayNames/languages/language[@type="xx"]
1499          * if type "xx" matches the locale or the locale is "en"
1500          *
1501          * Exclude "alt"
1502          *
1503          * @param path
1504          * @param locale
1505          * @return true or false
1506          */
isHighLevelLangName(String path, String locale)1507         private static boolean isHighLevelLangName(String path, String locale) {
1508             if (path.startsWith("//ldml/localeDisplayNames/languages/language")
1509                     && !path.contains("[@alt=")) {
1510                 if ("en".equals(locale)) {
1511                     /*
1512                      * English names (en.xml): match all types
1513                      */
1514                     return true;
1515                 } else if (path.contains("[@type=\"" + locale + "\"]")) {
1516                     /*
1517                      * Native names: match the type=”xx” of each xml file to identify the Native. E.g., type=ko if ko.xml
1518                      */
1519                     return true;
1520                 }
1521             }
1522             return false;
1523         }
1524 
1525         /**
1526          * Is the given path a high-level currency name path in the given locale?
1527          *
1528          * E.g., //ldml/numbers/currencies/currency[@type=\"AAA\"]/displayName
1529          * if type "AAA" CORRESPONDS TO the locale or the locale is "en"
1530          *
1531          * English names (en.xml): match all types
1532          * Native: check each currency type AAA corresponding to the given locale
1533          *
1534          * Do NOT exclude "alt"; e.g.,
1535          * //ldml/numbers/currencies/currency[@type="ADP"]/displayName[@alt="proposed-u167-1"]
1536          *
1537          * @param path
1538          * @param locale
1539          * @return true or false
1540          */
isHighLevelCurrencyName(String path, String locale)1541         private static boolean isHighLevelCurrencyName(String path, String locale) {
1542             if (currencyPattern.matcher(path).matches()) {
1543                 if ("en".equals(locale)) {
1544                     return true;
1545                 }
1546                 CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale);
1547                 if (cvi != null) {
1548                     for (String type : cvi.targetCurrencies) {
1549                         if (path.contains("[@type=\"" + type + "\"]")) {
1550                             return true;
1551                         }
1552                     }
1553                 }
1554             }
1555             return false;
1556         }
1557 
1558         /**
1559          * Is the given path a high-level currency code path in the given locale?
1560          *
1561          * E.g., //supplementalData/currencyData/region[@iso3166="AC"]/currency[@iso4217="SHP"][@from="1976-01-01"]
1562          *
1563          * @param path
1564          * @param locale must be "supplementalData" to match
1565          * @return true or false
1566          */
isHighLevelCurrencyCode(String path, String locale)1567         private static boolean isHighLevelCurrencyCode(String path, String locale) {
1568             if ("supplementalData".equals(locale)) {
1569                 if (path.contains("iso3166")) {
1570                     return true;
1571                 }
1572             }
1573             return false;
1574         }
1575 
1576         /**
1577          * Is the given path a high-level currency thousands-separator or decimal-separator path in the given locale?
1578          *
1579          * E.g., //ldml/numbers/currencies/currency[@type="ESP"]/group
1580          *       //ldml/numbers/currencies/currency[@type="ESP"]/decimal
1581          *       //ldml/numbers/currencies/currency[@type="CNY"]/symbol
1582          *       //ldml/numbers/currencies/currency[@type="CNY"]/symbol[@alt="narrow"]"
1583          *
1584          * @param path
1585          * @param locale
1586          * @return true or false
1587          */
isHighLevelCurrencySeparatorOrSymbol(String path, String locale)1588         private static boolean isHighLevelCurrencySeparatorOrSymbol(String path, String locale) {
1589             if (path.startsWith("//ldml/numbers/currencies/currency")
1590                 && (path.contains("group") || path.contains("decimal") || path.contains("symbol"))) {
1591                 return true;
1592             }
1593             return false;
1594         }
1595 
1596         /**
1597          * Is the given path a high-level weekData/firstDay in the given locale?
1598          *
1599          * E.g.,//supplementalData/weekData/firstDay[@day="fri"][@territories="MV"]
1600          *
1601          * @param path
1602          * @param locale must be "supplementalData" to match
1603          * @return true or false
1604          */
isHighLevelFirstDay(String path, String locale)1605         private static boolean isHighLevelFirstDay(String path, String locale) {
1606             if ("supplementalData".equals(locale)) {
1607                 if (path.startsWith("//supplementalData/weekData/firstDay")) {
1608                     return true;
1609                 }
1610             }
1611             return false;
1612         }
1613 
1614         /**
1615          * Is the given path a high-level weekOfPreference in the given locale?
1616          *
1617          * E.g., //supplementalData/weekData/weekOfPreference[@ordering="weekOfYear"][@locales="und"]
1618          *
1619          * @param path
1620          * @param locale must be "supplementalData" to match
1621          * @return true or false
1622          */
isHighLevelWeekOfPreference(String path, String locale)1623         private static boolean isHighLevelWeekOfPreference(String path, String locale) {
1624             if ("supplementalData".equals(locale)) {
1625                 if (path.startsWith("//supplementalData/weekData/weekOfPreference")) {
1626                     return true;
1627                 }
1628             }
1629             return false;
1630         }
1631 
1632         /**
1633          * For debugging, testing
1634          */
1635         private static Set<String> highLevelPathMatched = null;
1636         private static boolean verboseHighLevelReporting = false;
1637 
recordHighLevelMatch(String path)1638         private static void recordHighLevelMatch(String path) {
1639             if (highLevelPathMatched == null) {
1640                 highLevelPathMatched = new HashSet<>();
1641             }
1642             highLevelPathMatched.add(path);
1643         }
1644 
1645         /**
1646          * For debugging, report on any paths in highLevelPaths that never matched
1647          */
reportHighLevelPathUsage()1648         private static void reportHighLevelPathUsage() {
1649             if (!verboseHighLevelReporting) {
1650                 return;
1651             }
1652             if (highLevelPathMatched == null) {
1653                 System.out.println("Zero high-level paths were matched!");
1654                 return;
1655             }
1656             for (String path : highLevelPaths) {
1657                 if (!highLevelPathMatched.contains(path)) {
1658                     System.out.println("Unmatched high-level path: " + path);
1659                 }
1660             }
1661             for (String path : highLevelPathMatched) {
1662                 if (!highLevelPaths.contains(path)) {
1663                     System.out.println("Special matched high-level path: " + path);
1664                 }
1665             }
1666         }
1667     }
1668 }
1669