• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Objects;
5 import com.google.common.collect.ImmutableMultimap;
6 import com.google.common.collect.ImmutableSet;
7 import com.google.common.collect.Multimap;
8 import com.google.common.collect.TreeMultimap;
9 import com.ibm.icu.impl.Relation;
10 import com.ibm.icu.impl.Row;
11 import com.ibm.icu.impl.Row.R2;
12 import com.ibm.icu.impl.Row.R3;
13 import com.ibm.icu.impl.Utility;
14 import com.ibm.icu.lang.UCharacter;
15 import com.ibm.icu.text.Collator;
16 import com.ibm.icu.text.DecimalFormat;
17 import com.ibm.icu.text.Normalizer;
18 import com.ibm.icu.text.NumberFormat;
19 import com.ibm.icu.text.UTF16;
20 import com.ibm.icu.text.UnicodeSet;
21 import com.ibm.icu.text.UnicodeSetIterator;
22 import com.ibm.icu.util.Currency;
23 import com.ibm.icu.util.ULocale;
24 import java.io.File;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.PrintWriter;
28 import java.io.StringWriter;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.Collection;
32 import java.util.Collections;
33 import java.util.Comparator;
34 import java.util.EnumSet;
35 import java.util.HashSet;
36 import java.util.Iterator;
37 import java.util.LinkedHashSet;
38 import java.util.List;
39 import java.util.Map;
40 import java.util.Map.Entry;
41 import java.util.Set;
42 import java.util.TreeMap;
43 import java.util.TreeSet;
44 import org.unicode.cldr.test.DisplayAndInputProcessor;
45 import org.unicode.cldr.tool.CldrVersion;
46 import org.unicode.cldr.tool.LikelySubtags;
47 import org.unicode.cldr.util.Builder;
48 import org.unicode.cldr.util.CLDRConfig;
49 import org.unicode.cldr.util.CLDRFile;
50 import org.unicode.cldr.util.CLDRFile.DraftStatus;
51 import org.unicode.cldr.util.CLDRFile.Status;
52 import org.unicode.cldr.util.CLDRFile.WinningChoice;
53 import org.unicode.cldr.util.CLDRPaths;
54 import org.unicode.cldr.util.ChainedMap;
55 import org.unicode.cldr.util.ChainedMap.M4;
56 import org.unicode.cldr.util.CharacterFallbacks;
57 import org.unicode.cldr.util.CldrUtility;
58 import org.unicode.cldr.util.Counter;
59 import org.unicode.cldr.util.DiscreteComparator;
60 import org.unicode.cldr.util.DiscreteComparator.Ordering;
61 import org.unicode.cldr.util.DoctypeXmlStreamWrapper;
62 import org.unicode.cldr.util.DtdData;
63 import org.unicode.cldr.util.DtdData.Attribute;
64 import org.unicode.cldr.util.DtdData.Element;
65 import org.unicode.cldr.util.DtdData.ElementType;
66 import org.unicode.cldr.util.DtdType;
67 import org.unicode.cldr.util.DtdType.DtdStatus;
68 import org.unicode.cldr.util.ElementAttributeInfo;
69 import org.unicode.cldr.util.Factory;
70 import org.unicode.cldr.util.InputStreamFactory;
71 import org.unicode.cldr.util.LanguageTagParser;
72 import org.unicode.cldr.util.Level;
73 import org.unicode.cldr.util.LocaleIDParser;
74 import org.unicode.cldr.util.Pair;
75 import org.unicode.cldr.util.PathHeader;
76 import org.unicode.cldr.util.PathUtilities;
77 import org.unicode.cldr.util.StandardCodes;
78 import org.unicode.cldr.util.SupplementalDataInfo;
79 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
80 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
81 import org.unicode.cldr.util.TestCLDRPaths;
82 import org.unicode.cldr.util.XMLFileReader;
83 import org.unicode.cldr.util.XPathParts;
84 import org.xml.sax.ErrorHandler;
85 import org.xml.sax.InputSource;
86 import org.xml.sax.SAXException;
87 import org.xml.sax.SAXParseException;
88 import org.xml.sax.XMLReader;
89 
90 public class TestBasic extends TestFmwkPlus {
91 
92     private static final boolean DEBUG = false;
93 
94     static CLDRConfig testInfo = CLDRConfig.getInstance();
95 
96     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO =
97             testInfo.getSupplementalDataInfo();
98 
99     private static final ImmutableSet<Pair<String, String>> knownElementExceptions =
100             ImmutableSet.of(Pair.of("ldml", "usesMetazone"), Pair.of("ldmlICU", "usesMetazone"));
101 
102     private static final ImmutableSet<Pair<String, String>> knownAttributeExceptions =
103             ImmutableSet.of(
104                     Pair.of("ldml", "version"),
105                     Pair.of("supplementalData", "version"),
106                     Pair.of("ldmlICU", "version"),
107                     Pair.of("layout", "standard"),
108                     Pair.of("currency", "id"), // for v1.1.1
109                     Pair.of("monthNames", "type"), // for v1.1.1
110                     Pair.of("alias", "type") // for v1.1.1
111                     );
112 
113     private static final ImmutableSet<Pair<String, String>> knownChildExceptions =
114             ImmutableSet.of(
115                     Pair.of("abbreviationFallback", "special"),
116                     Pair.of("inList", "special"),
117                     Pair.of("preferenceOrdering", "special"));
118 
119     /**
120      * Simple test that loads each file in the cldr directory, thus verifying that the DTD works,
121      * and also checks that the PrettyPaths work.
122      *
123      * @author markdavis
124      */
main(String[] args)125     public static void main(String[] args) {
126         new TestBasic().run(args);
127     }
128 
129     private static final ImmutableSet<String> skipAttributes =
130             ImmutableSet.of("alt", "draft", "references");
131 
132     private final ImmutableSet<String> eightPointLocales =
133             ImmutableSet.of(
134                     "ar", "ca", "cs", "da", "de", "el", "es", "fi", "fr", "he", "hi", "hr", "hu",
135                     "id", "it", "ja", "ko", "lt", "lv", "nl", "no", "pl", "pt", "pt_PT", "ro", "ru",
136                     "sk", "sl", "sr", "sv", "th", "tr", "uk", "vi", "zh", "zh_Hant");
137 
138     // private final boolean showForceZoom = Utility.getProperty("forcezoom",
139     // false);
140 
141     private final boolean resolved = CldrUtility.getProperty("resolved", false);
142 
143     private final Exception[] internalException = new Exception[1];
144 
TestDtds()145     public void TestDtds() throws IOException {
146         Relation<Row.R2<DtdType, String>, String> foundAttributes =
147                 Relation.of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(), TreeSet.class);
148         final CLDRConfig config = CLDRConfig.getInstance();
149         final File basedir = config.getCldrBaseDirectory();
150         List<TimingInfo> data = new ArrayList<>();
151 
152         for (String subdir : CLDRConfig.getCLDRDataDirectories()) {
153             checkDtds(new File(basedir, subdir), 0, foundAttributes, data);
154         }
155         if (foundAttributes.size() > 0) {
156             showFoundElements(foundAttributes);
157         }
158         if (isVerbose()) {
159             long totalBytes = 0;
160             long totalNanos = 0;
161             for (TimingInfo i : data) {
162                 long length = i.file.length();
163                 totalBytes += length;
164                 totalNanos += i.nanos;
165                 logln(i.nanos + "\t" + length + "\t" + i.file);
166             }
167             logln(totalNanos + "\t" + totalBytes);
168         }
169     }
170 
checkDtds( File directoryFile, int level, Relation<R2<DtdType, String>, String> foundAttributes, List<TimingInfo> data)171     private void checkDtds(
172             File directoryFile,
173             int level,
174             Relation<R2<DtdType, String>, String> foundAttributes,
175             List<TimingInfo> data)
176             throws IOException {
177         boolean deepCheck = getInclusion() >= 10;
178         if (directoryFile.getName().equals("import")
179                 && directoryFile.getParentFile().getName().equals("keyboards")) {
180             return; // skip imports
181         }
182         File[] listFiles = directoryFile.listFiles();
183         String normalizedPath = PathUtilities.getNormalizedPathString(directoryFile);
184         String indent = Utility.repeat("\t", level);
185         if (listFiles == null) {
186             throw new IllegalArgumentException(indent + "Empty directory: " + normalizedPath);
187         }
188         logln("Checking files for DTD errors in: " + indent + normalizedPath);
189         for (File fileName : listFiles) {
190             String name = fileName.getName();
191             if (CLDRConfig.isJunkFile(name)) {
192                 continue;
193             } else if (fileName.isDirectory()) {
194                 checkDtds(fileName, level + 1, foundAttributes, data);
195             } else if (fileName.getPath().contains("/keyboards/3.0/")
196                     && logKnownIssue(
197                             "CLDR-17574", "With v46, parsing issues for keyboard xml files")) {
198                 ; // do nothing, skip test
199             } else if (name.endsWith(".xml")) {
200                 data.add(check(fileName));
201                 if (deepCheck // takes too long to do all the time
202                 ) {
203                     CLDRFile cldrfile =
204                             CLDRFile.loadFromFile(fileName, "temp", DraftStatus.unconfirmed);
205                     for (String xpath : cldrfile) {
206                         String fullPath = cldrfile.getFullXPath(xpath);
207                         if (fullPath == null) {
208                             fullPath = cldrfile.getFullXPath(xpath);
209                             assertNotNull("", fullPath);
210                             continue;
211                         }
212                         XPathParts parts = XPathParts.getFrozenInstance(fullPath);
213                         DtdType type = parts.getDtdData().dtdType;
214                         for (int i = 0; i < parts.size(); ++i) {
215                             String element = parts.getElement(i);
216                             R2<DtdType, String> typeElement = Row.of(type, element);
217                             if (parts.getAttributeCount(i) == 0) {
218                                 foundAttributes.put(typeElement, "NONE");
219                             } else {
220                                 for (String attribute : parts.getAttributeKeys(i)) {
221                                     foundAttributes.put(typeElement, attribute);
222                                 }
223                             }
224                         }
225                     }
226                 }
227             }
228         }
229     }
230 
showFoundElements(Relation<Row.R2<DtdType, String>, String> foundAttributes)231     public void showFoundElements(Relation<Row.R2<DtdType, String>, String> foundAttributes) {
232         Relation<Row.R2<DtdType, String>, String> theoryAttributes =
233                 Relation.of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(), TreeSet.class);
234         for (DtdType type : DtdType.values()) {
235             if (type.getStatus() != DtdType.DtdStatus.active) {
236                 continue;
237             }
238             DtdData dtdData = DtdData.getInstance(type);
239             for (Element element : dtdData.getElementFromName().values()) {
240                 String name = element.getName();
241                 Set<Attribute> attributes = element.getAttributes().keySet();
242                 R2<DtdType, String> typeElement = Row.of(type, name);
243                 if (attributes.isEmpty()) {
244                     theoryAttributes.put(typeElement, "NONE");
245                 } else {
246                     for (Attribute attribute : attributes) {
247                         theoryAttributes.put(typeElement, attribute.name);
248                     }
249                 }
250             }
251         }
252         Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed =
253                 Relation.of(
254                         new TreeMap<String, Set<R3<Boolean, DtdType, String>>>(),
255                         LinkedHashSet.class);
256 
257         for (Entry<R2<DtdType, String>, Set<String>> s : theoryAttributes.keyValuesSet()) {
258             R2<DtdType, String> typeElement = s.getKey();
259             Set<String> theoryAttributeSet = s.getValue();
260             DtdType type = typeElement.get0();
261             String element = typeElement.get1();
262             if (element.equals("ANY") || element.equals("#PCDATA")) {
263                 continue;
264             }
265             boolean deprecatedElement =
266                     SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element, "*", "*");
267             String header = type + "\t" + element + "\t" + (deprecatedElement ? "X" : "") + "\t";
268             Set<String> usedAttributes = foundAttributes.get(typeElement);
269             Set<String> unusedAttributes = new LinkedHashSet<>(theoryAttributeSet);
270             if (usedAttributes == null) {
271                 logln(
272                         header
273                                 + "<NOT-FOUND>\t\t"
274                                 + siftDeprecated(
275                                         type,
276                                         element,
277                                         unusedAttributes,
278                                         attributesToTypeElementUsed,
279                                         false));
280                 continue;
281             }
282             unusedAttributes.removeAll(usedAttributes);
283             logln(
284                     header
285                             + siftDeprecated(
286                                     type,
287                                     element,
288                                     usedAttributes,
289                                     attributesToTypeElementUsed,
290                                     true)
291                             + "\t"
292                             + siftDeprecated(
293                                     type,
294                                     element,
295                                     unusedAttributes,
296                                     attributesToTypeElementUsed,
297                                     false));
298         }
299 
300         logln("Undeprecated Attributes\t");
301         for (Entry<String, R3<Boolean, DtdType, String>> s :
302                 attributesToTypeElementUsed.keyValueSet()) {
303             R3<Boolean, DtdType, String> typeElementUsed = s.getValue();
304             logln(
305                     s.getKey()
306                             + "\t"
307                             + typeElementUsed.get0()
308                             + "\t"
309                             + typeElementUsed.get1()
310                             + "\t"
311                             + typeElementUsed.get2());
312         }
313     }
314 
siftDeprecated( DtdType type, String element, Set<String> attributeSet, Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed, boolean used)315     private String siftDeprecated(
316             DtdType type,
317             String element,
318             Set<String> attributeSet,
319             Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed,
320             boolean used) {
321         StringBuilder b = new StringBuilder();
322         StringBuilder bdep = new StringBuilder();
323         for (String attribute : attributeSet) {
324             String attributeName =
325                     "«"
326                             + attribute
327                             + (!"NONE".equals(attribute)
328                                             && CLDRFile.isDistinguishing(type, element, attribute)
329                                     ? "*"
330                                     : "")
331                             + "»";
332             if (!"NONE".equals(attribute)
333                     && SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element, attribute, "*")) {
334                 if (bdep.length() != 0) {
335                     bdep.append(" ");
336                 }
337                 bdep.append(attributeName);
338             } else {
339                 if (b.length() != 0) {
340                     b.append(" ");
341                 }
342                 b.append(attributeName);
343                 if (!"NONE".equals(attribute)) {
344                     attributesToTypeElementUsed.put(attribute, Row.of(used, type, element));
345                 }
346             }
347         }
348         return b.toString() + "\t" + bdep.toString();
349     }
350 
351     class MyErrorHandler implements ErrorHandler {
352         @Override
error(SAXParseException exception)353         public void error(SAXParseException exception) throws SAXException {
354             errln("error: " + XMLFileReader.showSAX(exception));
355             throw exception;
356         }
357 
358         @Override
fatalError(SAXParseException exception)359         public void fatalError(SAXParseException exception) throws SAXException {
360             errln("fatalError: " + XMLFileReader.showSAX(exception));
361             throw exception;
362         }
363 
364         @Override
warning(SAXParseException exception)365         public void warning(SAXParseException exception) throws SAXException {
366             errln("warning: " + XMLFileReader.showSAX(exception));
367             throw exception;
368         }
369     }
370 
371     private class TimingInfo {
372         File file;
373         long nanos;
374     }
375 
check(File systemID)376     public TimingInfo check(File systemID) {
377         long start = System.nanoTime();
378         try (InputStream fis = InputStreamFactory.createInputStream(systemID)) {
379             // FileInputStream fis = new FileInputStream(systemID);
380             XMLReader xmlReader = XMLFileReader.createXMLReader(true);
381             xmlReader.setErrorHandler(new MyErrorHandler());
382             InputSource is = new InputSource(fis);
383             is.setSystemId(systemID.toString());
384             DoctypeXmlStreamWrapper.wrap(is);
385             xmlReader.parse(is);
386             // fis.close();
387         } catch (SAXException | IOException e) {
388             errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" + e.getMessage());
389         }
390         // catch (SAXParseException e) {
391         // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" +
392         // e.getMessage());
393         // } catch (IOException e) {
394         // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" +
395         // e.getMessage());
396         // }
397         TimingInfo timingInfo = new TimingInfo();
398         timingInfo.nanos = System.nanoTime() - start;
399         timingInfo.file = systemID;
400         return timingInfo;
401     }
402 
TestCurrencyFallback()403     public void TestCurrencyFallback() {
404         Factory cldrFactory = testInfo.getCldrFactory();
405         Set<String> currencies = StandardCodes.make().getAvailableCodes("currency");
406 
407         final UnicodeSet CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS =
408                 new UnicodeSet("[[:sc:]-[\\u0000-\\u00FF]]").freeze();
409 
410         CharacterFallbacks fallbacks = CharacterFallbacks.make();
411 
412         for (String locale : cldrFactory.getAvailable()) {
413             if (!StandardCodes.isLocaleAtLeastBasic(locale)) {
414                 continue;
415             }
416             CLDRFile file = testInfo.getCLDRFile(locale, false);
417             if (file.isNonInheriting()) continue;
418 
419             final UnicodeSet OK_CURRENCY_FALLBACK =
420                     new UnicodeSet("[\\u0000-\\u00FF]")
421                             .addAll(safeExemplars(file, ""))
422                             .addAll(safeExemplars(file, "auxiliary"))
423                             .freeze();
424             UnicodeSet badSoFar = new UnicodeSet();
425 
426             for (Iterator<String> it = file.iterator(); it.hasNext(); ) {
427                 String path = it.next();
428                 if (path.endsWith("/alias")) {
429                     continue;
430                 }
431                 String value = file.getStringValue(path);
432 
433                 // check for special characters
434                 if (CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS.containsSome(value)) {
435                     XPathParts parts = XPathParts.getFrozenInstance(path);
436                     if (!parts.getElement(-1).equals("symbol")) {
437                         continue;
438                     }
439                     // We don't care about fallbacks for narrow currency symbols
440                     if ("narrow".equals(parts.getAttributeValue(-1, "alt"))) {
441                         continue;
442                     }
443                     String currencyType = parts.getAttributeValue(-2, "type");
444 
445                     UnicodeSet fishy =
446                             new UnicodeSet()
447                                     .addAll(value)
448                                     .retainAll(CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS)
449                                     .removeAll(badSoFar);
450                     for (UnicodeSetIterator it2 = new UnicodeSetIterator(fishy); it2.next(); ) {
451                         final int fishyCodepoint = it2.codepoint;
452                         List<String> fallbackList = fallbacks.getSubstitutes(fishyCodepoint);
453 
454                         String nfkc = Normalizer.normalize(fishyCodepoint, Normalizer.NFKC);
455                         if (!nfkc.equals(UTF16.valueOf(fishyCodepoint))) {
456                             if (fallbackList == null) {
457                                 fallbackList = new ArrayList<>();
458                             } else {
459                                 fallbackList = new ArrayList<>(fallbackList); // writable
460                             }
461                             fallbackList.add(nfkc);
462                         }
463                         // later test for all Latin-1
464                         if (fallbackList == null) {
465                             if (locale.equals("nqo")
466                                     && logKnownIssue("CLDR-16987", "fishy fallback test")) {
467                                 continue;
468                             }
469                             errln(
470                                     "Locale:\t"
471                                             + locale
472                                             + ";\tCharacter with no fallback:\t"
473                                             + it2.getString()
474                                             + "\t"
475                                             + UCharacter.getName(fishyCodepoint));
476                             badSoFar.add(fishyCodepoint);
477                         } else {
478                             String fallback = null;
479                             for (String fb : fallbackList) {
480                                 if (OK_CURRENCY_FALLBACK.containsAll(fb)) {
481                                     if (!fb.equals(currencyType) && currencies.contains(fb)) {
482                                         errln(
483                                                 "Locale:\t"
484                                                         + locale
485                                                         + ";\tCurrency:\t"
486                                                         + currencyType
487                                                         + ";\tFallback converts to different code!:\t"
488                                                         + fb
489                                                         + "\t"
490                                                         + it2.getString()
491                                                         + "\t"
492                                                         + UCharacter.getName(fishyCodepoint));
493                                     }
494                                     if (fallback == null) {
495                                         fallback = fb;
496                                     }
497                                 }
498                             }
499                             if (fallback == null) {
500                                 errln(
501                                         "Locale:\t"
502                                                 + locale
503                                                 + ";\tCharacter with no good fallback (exemplars+Latin1):\t"
504                                                 + it2.getString()
505                                                 + "\t"
506                                                 + UCharacter.getName(fishyCodepoint));
507                                 badSoFar.add(fishyCodepoint);
508                             } else {
509                                 logln(
510                                         "Locale:\t"
511                                                 + locale
512                                                 + ";\tCharacter with good fallback:\t"
513                                                 + it2.getString()
514                                                 + " "
515                                                 + UCharacter.getName(fishyCodepoint)
516                                                 + " => "
517                                                 + fallback);
518                                 // badSoFar.add(fishyCodepoint);
519                             }
520                         }
521                     }
522                 }
523             }
524         }
525     }
526 
TestAbstractPaths()527     public void TestAbstractPaths() {
528         Factory cldrFactory = testInfo.getCldrFactory();
529         CLDRFile english = testInfo.getEnglish();
530         Map<String, Counter<Level>> abstactPaths = new TreeMap<>();
531         RegexTransform abstractPathTransform =
532                 new RegexTransform(RegexTransform.Processing.ONE_PASS)
533                         .add("//ldml/", "")
534                         .add("\\[@alt=\"[^\"]*\"\\]", "")
535                         .add("=\"[^\"]*\"", "=\"*\"")
536                         .add("([^]])\\[", "$1\t[")
537                         .add("([^]])/", "$1\t/")
538                         .add("/", "\t");
539 
540         for (String locale : getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable()) {
541             CLDRFile file = testInfo.getCLDRFile(locale, resolved);
542             if (file.isNonInheriting()) continue;
543             logln(locale + "\t-\t" + english.getName(locale));
544 
545             for (Iterator<String> it = file.iterator(); it.hasNext(); ) {
546                 String path = it.next();
547                 if (path.endsWith("/alias")) {
548                     continue;
549                 }
550                 // collect abstracted paths
551                 String abstractPath = abstractPathTransform.transform(path);
552                 Level level = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, locale);
553                 if (level == Level.OPTIONAL) {
554                     level = Level.COMPREHENSIVE;
555                 }
556                 Counter<Level> row = abstactPaths.get(abstractPath);
557                 if (row == null) {
558                     abstactPaths.put(abstractPath, row = new Counter<>());
559                 }
560                 row.add(level, 1);
561             }
562         }
563         logln(CldrUtility.LINE_SEPARATOR + "Abstract Paths");
564         for (Entry<String, Counter<Level>> pathInfo : abstactPaths.entrySet()) {
565             String path = pathInfo.getKey();
566             Counter<Level> counter = pathInfo.getValue();
567             logln(counter.getTotal() + "\t" + getCoverage(counter) + "\t" + path);
568         }
569     }
570 
getCoverage(Counter<Level> counter)571     private CharSequence getCoverage(Counter<Level> counter) {
572         StringBuilder result = new StringBuilder();
573         boolean first = true;
574         for (Level level : counter.getKeysetSortedByKey()) {
575             if (first) {
576                 first = false;
577             } else {
578                 result.append(' ');
579             }
580             result.append("L").append(level.ordinal()).append("=").append(counter.get(level));
581         }
582         return result;
583     }
584 
585     // public void TestCLDRFileCache() {
586     // long start = System.nanoTime();
587     // Factory cldrFactory = testInfo.getCldrFactory();
588     // String unusualLocale = "hi";
589     // CLDRFile file = cldrFactory.make(unusualLocale, true);
590     // long afterOne = System.nanoTime();
591     // logln("First: " + (afterOne-start));
592     // CLDRFile file2 = cldrFactory.make(unusualLocale, true);
593     // long afterTwo = System.nanoTime();
594     // logln("Second: " + (afterTwo-afterOne));
595     // }
596     //
TestPaths()597     public void TestPaths() {
598         Relation<String, String> distinguishing =
599                 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
600         Relation<String, String> nonDistinguishing =
601                 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
602         Factory cldrFactory = testInfo.getCldrFactory();
603         CLDRFile english = testInfo.getEnglish();
604 
605         Relation<String, String> pathToLocale =
606                 Relation.of(
607                         new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)),
608                         TreeSet.class,
609                         null);
610         Set<String> localesToTest =
611                 getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable();
612         for (String locale : localesToTest) {
613             CLDRFile file = testInfo.getCLDRFile(locale, resolved);
614             DtdType dtdType = null;
615             if (file.isNonInheriting()) continue;
616             DisplayAndInputProcessor displayAndInputProcessor =
617                     new DisplayAndInputProcessor(file, false);
618 
619             logln(locale + "\t-\t" + english.getName(locale));
620 
621             for (Iterator<String> it = file.iterator(); it.hasNext(); ) {
622                 String path = it.next();
623                 if (dtdType == null) {
624                     dtdType = DtdType.fromPath(path);
625                 }
626 
627                 if (path.endsWith("/alias")) {
628                     continue;
629                 }
630                 String value = file.getStringValue(path);
631                 if (value == null) {
632                     throw new IllegalArgumentException(
633                             locale + "\tError: in null value at " + path);
634                 }
635 
636                 String displayValue = displayAndInputProcessor.processForDisplay(path, value);
637                 if (!displayValue.equals(value)) {
638                     logln(
639                             "\t"
640                                     + locale
641                                     + "\tdisplayAndInputProcessor changes display value <"
642                                     + value
643                                     + ">\t=>\t<"
644                                     + displayValue
645                                     + ">\t\t"
646                                     + path);
647                 }
648                 String inputValue =
649                         displayAndInputProcessor.processInput(
650                                 path, displayValue, internalException);
651                 if (internalException[0] != null) {
652                     errln(
653                             "\t"
654                                     + locale
655                                     + "\tdisplayAndInputProcessor internal error <"
656                                     + value
657                                     + ">\t=>\t<"
658                                     + inputValue
659                                     + ">\t\t"
660                                     + path);
661                     internalException[0].printStackTrace(System.out);
662                 }
663                 if (isVerbose() && !inputValue.equals(value)) {
664                     displayAndInputProcessor.processInput(path, value, internalException); // for
665                     // debugging
666                     logln(
667                             "\t"
668                                     + locale
669                                     + "\tdisplayAndInputProcessor changes input value <"
670                                     + value
671                                     + ">\t=>\t<"
672                                     + inputValue
673                                     + ">\t\t"
674                                     + path);
675                 }
676 
677                 pathToLocale.put(path, locale);
678 
679                 // also check for non-distinguishing attributes
680                 if (path.contains("/identity")) continue;
681 
682                 String fullPath = file.getFullXPath(path);
683                 XPathParts parts = XPathParts.getFrozenInstance(fullPath);
684                 for (int i = 0; i < parts.size(); ++i) {
685                     if (parts.getAttributeCount(i) == 0) {
686                         continue;
687                     }
688                     String element = parts.getElement(i);
689                     for (String attribute : parts.getAttributeKeys(i)) {
690                         if (skipAttributes.contains(attribute)) continue;
691                         if (CLDRFile.isDistinguishing(dtdType, element, attribute)) {
692                             distinguishing.put(element, attribute);
693                         } else {
694                             nonDistinguishing.put(element, attribute);
695                         }
696                     }
697                 }
698             }
699         }
700 
701         if (isVerbose()) {
702             System.out.format(
703                     "Distinguishing Elements: %s" + CldrUtility.LINE_SEPARATOR, distinguishing);
704             System.out.format(
705                     "Nondistinguishing Elements: %s" + CldrUtility.LINE_SEPARATOR,
706                     nonDistinguishing);
707             System.out.format("Skipped %s" + CldrUtility.LINE_SEPARATOR, skipAttributes);
708         }
709     }
710 
711     /** The verbose output shows the results of 1..3 \u00a4 signs. */
checkCurrency()712     public void checkCurrency() {
713         Map<String, Set<R2<String, Integer>>> results =
714                 new TreeMap<>(Collator.getInstance(ULocale.ENGLISH));
715         for (ULocale locale : ULocale.getAvailableLocales()) {
716             if (locale.getCountry().length() != 0) {
717                 continue;
718             }
719             for (int i = 1; i < 4; ++i) {
720                 NumberFormat format = getCurrencyInstance(locale, i);
721                 for (Currency c :
722                         new Currency[] {
723                             Currency.getInstance("USD"),
724                             Currency.getInstance("EUR"),
725                             Currency.getInstance("INR")
726                         }) {
727                     format.setCurrency(c);
728                     final String formatted = format.format(12345.67);
729                     Set<R2<String, Integer>> set = results.get(formatted);
730                     if (set == null) {
731                         results.put(formatted, set = new TreeSet<>());
732                     }
733                     set.add(Row.of(locale.toString(), i));
734                 }
735             }
736         }
737         for (String formatted : results.keySet()) {
738             logln(formatted + "\t" + results.get(formatted));
739         }
740     }
741 
getCurrencyInstance(ULocale locale, int type)742     private static NumberFormat getCurrencyInstance(ULocale locale, int type) {
743         NumberFormat format = NumberFormat.getCurrencyInstance(locale);
744         if (type > 1) {
745             DecimalFormat format2 = (DecimalFormat) format;
746             String pattern = format2.toPattern();
747             String replacement = "\u00a4\u00a4";
748             for (int i = 2; i < type; ++i) {
749                 replacement += "\u00a4";
750             }
751             pattern = pattern.replace("\u00a4", replacement);
752             format2.applyPattern(pattern);
753         }
754         return format;
755     }
756 
safeExemplars(CLDRFile file, String string)757     private UnicodeSet safeExemplars(CLDRFile file, String string) {
758         final UnicodeSet result = file.getExemplarSet(string, WinningChoice.NORMAL);
759         return result != null ? result : new UnicodeSet();
760     }
761 
TestAPath()762     public void TestAPath() {
763         // <month type="1">1</month>
764         String path =
765                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]";
766         CLDRFile root = testInfo.getRoot();
767         logln("path: " + path);
768         String fullpath = root.getFullXPath(path);
769         logln("fullpath: " + fullpath);
770         String value = root.getStringValue(path);
771         logln("value: " + value);
772         Status status = new Status();
773         String source = root.getSourceLocaleID(path, status);
774         logln("locale: " + source);
775         logln("status: " + status);
776     }
777 
TestDefaultContents()778     public void TestDefaultContents() {
779         Set<String> defaultContents = Inheritance.defaultContents;
780         Multimap<String, String> parentToChildren = Inheritance.parentToChildren;
781 
782         // Put a list of locales that should be default content here.
783         final String expectDC[] = {
784             "os_GE" // see CLDR-14118
785         };
786         for (final String locale : expectDC) {
787             assertTrue(
788                     "expect " + locale + " to be a default content locale",
789                     defaultContents.contains(locale));
790         }
791 
792         if (DEBUG) {
793             Inheritance.showChain("", "", "root");
794         }
795 
796         for (String locale : defaultContents) {
797             CLDRFile cldrFile;
798             try {
799                 cldrFile = testInfo.getCLDRFile(locale, false);
800             } catch (RuntimeException e) {
801                 logln("Can't open default content file:\t" + locale);
802                 continue;
803             }
804             // we check that the default content locale is always empty
805             for (Iterator<String> it = cldrFile.iterator(); it.hasNext(); ) {
806                 String path = it.next();
807                 if (path.contains("/identity")) {
808                     continue;
809                 }
810                 errln("Default content file not empty:\t" + locale);
811                 showDifferences(locale);
812                 break;
813             }
814         }
815 
816         // check that if a locale has any children, that exactly one of them is
817         // the default content. Ignore locales with variants
818 
819         for (Entry<String, Collection<String>> localeAndKids :
820                 parentToChildren.asMap().entrySet()) {
821             String locale = localeAndKids.getKey();
822             if (locale.equals("root")) {
823                 continue;
824             }
825 
826             Collection<String> rawChildren = localeAndKids.getValue();
827 
828             // remove variant children
829             Set<String> children = new LinkedHashSet<>();
830             for (String child : rawChildren) {
831                 if (new LocaleIDParser().set(child).getVariants().length == 0) {
832                     children.add(child);
833                 }
834             }
835             if (children.isEmpty()) {
836                 continue;
837             }
838 
839             Set<String> defaultContentChildren = new LinkedHashSet<>(children);
840             defaultContentChildren.retainAll(defaultContents);
841             if (defaultContentChildren.size() == 1) {
842                 continue;
843                 // If we're already down to the region level then it's OK not to have
844                 // default contents.
845             } else if (!new LocaleIDParser().set(locale).getRegion().isEmpty()) {
846                 continue;
847             } else if (defaultContentChildren.isEmpty()) {
848                 Object possible = highestShared(locale, children);
849                 errln(
850                         "Locale has children but is missing default contents locale: "
851                                 + locale
852                                 + ", children: "
853                                 + children
854                                 + "; possible fixes for children:\n"
855                                 + possible);
856             } else {
857                 errln(
858                         "Locale has too many defaultContent locales!!: "
859                                 + locale
860                                 + ", defaultContents: "
861                                 + defaultContentChildren);
862             }
863         }
864 
865         // check that each default content locale is likely-subtag equivalent to
866         // its parent.
867 
868         for (String locale : defaultContents) {
869             String maxLocale = LikelySubtags.maximize(locale, likelyData);
870             String localeParent = LocaleIDParser.getParent(locale);
871             String maxLocaleParent = LikelySubtags.maximize(localeParent, likelyData);
872             if (locale.equals("ar_001") || locale.equals("nb")) {
873                 logln(
874                         "Known exception to likelyMax(locale="
875                                 + locale
876                                 + ")"
877                                 + " == "
878                                 + "likelyMax(defaultContent="
879                                 + localeParent
880                                 + ")");
881                 continue;
882             }
883             assertEquals(
884                     "likelyMax(locale="
885                             + locale
886                             + ")"
887                             + " == "
888                             + "likelyMax(defaultContent="
889                             + localeParent
890                             + ")",
891                     maxLocaleParent,
892                     maxLocale);
893         }
894     }
895 
highestShared(String parent, Set<String> children)896     private String highestShared(String parent, Set<String> children) {
897         M4<PathHeader, String, String, Boolean> data =
898                 ChainedMap.of(
899                         new TreeMap<PathHeader, Object>(),
900                         new TreeMap<String, Object>(),
901                         new TreeMap<String, Object>(),
902                         Boolean.class);
903         CLDRFile parentFile = testInfo.getCLDRFile(parent, true);
904         PathHeader.Factory phf = PathHeader.getFactory(testInfo.getEnglish());
905         for (String child : children) {
906             CLDRFile cldrFile = testInfo.getCLDRFile(child, false);
907             for (String path : cldrFile) {
908                 if (path.contains("/identity")) {
909                     continue;
910                 }
911                 if (path.contains("provisional") || path.contains("unconfirmed")) {
912                     continue;
913                 }
914                 String value = cldrFile.getStringValue(path);
915                 // double-check
916                 String parentValue = parentFile.getStringValue(path);
917                 if (value.equals(parentValue)) {
918                     continue;
919                 }
920                 PathHeader ph = phf.fromPath(path);
921                 data.put(ph, value, child, Boolean.TRUE);
922                 data.put(ph, parentValue == null ? "∅∅∅" : parentValue, child, Boolean.TRUE);
923             }
924         }
925         StringBuilder result = new StringBuilder();
926         for (Entry<PathHeader, Map<String, Map<String, Boolean>>> entry : data) {
927             for (Entry<String, Map<String, Boolean>> item : entry.getValue().entrySet()) {
928                 result.append("\n")
929                         .append(entry.getKey())
930                         .append("\t")
931                         .append(item.getKey() + "\t" + item.getValue().keySet());
932             }
933         }
934         return result.toString();
935     }
936 
937     public static class Inheritance {
938         public static final Set<String> defaultContents =
939                 SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales();
940         public static final Multimap<String, String> parentToChildren;
941 
942         static {
943             Multimap<String, String> _parentToChildren = TreeMultimap.create();
944             for (String child : testInfo.getCldrFactory().getAvailable()) {
945                 if (child.equals("root")) {
946                     continue;
947                 }
948                 String localeParent = LocaleIDParser.getParent(child);
_parentToChildren.put(localeParent, child)949                 _parentToChildren.put(localeParent, child);
950             }
951             parentToChildren = ImmutableMultimap.copyOf(_parentToChildren);
952         }
953 
showChain(String prefix, String gparent, String current)954         public static void showChain(String prefix, String gparent, String current) {
955             Collection<String> children = parentToChildren.get(current);
956             if (children == null) {
957                 throw new IllegalArgumentException();
958             }
959             prefix +=
960                     current
961                             + (defaultContents.contains(current) ? "*" : "")
962                             + (isLikelyEquivalent(gparent, current) ? "~" : "")
963                             + "\t";
964 
965             // find leaves
966             Set<String> parents = new LinkedHashSet<>(children);
967             parents.retainAll(parentToChildren.keySet());
968             Set<String> leaves = new LinkedHashSet<>(children);
969             leaves.removeAll(parentToChildren.keySet());
970             if (!leaves.isEmpty()) {
971                 List<String> presentation = new ArrayList<>();
972                 boolean gotDc = false;
973                 for (String s : leaves) {
974                     String shown = s;
975                     if (isLikelyEquivalent(current, s)) {
976                         shown += "~";
977                     }
978                     if (defaultContents.contains(s)) {
979                         gotDc = true;
980                         shown += "*";
981                     }
982                     if (!shown.equals(s)) {
983                         presentation.add(0, shown);
984                     } else {
985                         presentation.add(shown);
986                     }
987                 }
988                 if (!gotDc) {
989                     int debug = 0;
990                 }
991                 if (leaves.size() == 1) {
992                     System.out.println(prefix + Joiner.on(" ").join(presentation));
993                 } else {
994                     System.out.println(prefix + "{" + Joiner.on(" ").join(presentation) + "}");
995                 }
996             }
997             for (String parent : parents) {
998                 showChain(prefix, current, parent);
999             }
1000         }
1001 
isLikelyEquivalent(String locale1, String locale2)1002         static boolean isLikelyEquivalent(String locale1, String locale2) {
1003             if (locale1.equals(locale2)) {
1004                 return true;
1005             }
1006             try {
1007                 String maxLocale1 = LikelySubtags.maximize(locale1, likelyData);
1008                 String maxLocale2 = LikelySubtags.maximize(locale2, likelyData);
1009                 return maxLocale1 != null && Objects.equal(maxLocale1, maxLocale2);
1010             } catch (Exception e) {
1011                 return false;
1012             }
1013         }
1014     }
1015 
1016     static final Map<String, String> likelyData = SUPPLEMENTAL_DATA_INFO.getLikelySubtags();
1017 
1018     private static final EnumSet<CldrVersion> badLdmlICUVersions =
1019             EnumSet.of(
1020                     CldrVersion.v1_1_1, CldrVersion.v1_2, CldrVersion.v1_4_1, CldrVersion.v1_5_1);
1021 
TestLikelySubtagsComplete()1022     public void TestLikelySubtagsComplete() {
1023         LanguageTagParser ltp = new LanguageTagParser();
1024         for (String locale : testInfo.getCldrFactory().getAvailable()) {
1025             if (locale.equals("root")) {
1026                 continue;
1027             }
1028             String maxLocale = LikelySubtags.maximize(locale, likelyData);
1029             if (maxLocale == null) {
1030                 errln("Locale missing likely subtag: " + locale);
1031                 continue;
1032             }
1033             ltp.set(maxLocale);
1034             if (ltp.getLanguage().isEmpty()
1035                     || ltp.getScript().isEmpty()
1036                     || ltp.getRegion().isEmpty()) {
1037                 errln("Locale has defective likely subtag: " + locale + " => " + maxLocale);
1038             }
1039         }
1040     }
1041 
showDifferences(String locale)1042     private void showDifferences(String locale) {
1043         CLDRFile cldrFile = testInfo.getCLDRFile(locale, false);
1044         final String localeParent = LocaleIDParser.getParent(locale);
1045         CLDRFile parentFile = testInfo.getCLDRFile(localeParent, true);
1046         int funnyCount = 0;
1047         for (Iterator<String> it = cldrFile.iterator("", cldrFile.getComparator());
1048                 it.hasNext(); ) {
1049             String path = it.next();
1050             if (path.contains("/identity")) {
1051                 continue;
1052             }
1053             final String fullXPath = cldrFile.getFullXPath(path);
1054             if (fullXPath.contains("[@draft=\"unconfirmed\"]")
1055                     || fullXPath.contains("[@draft=\"provisional\"]")) {
1056                 funnyCount++;
1057                 continue;
1058             }
1059             logln("\tpath:\t" + path);
1060             logln("\t\t" + locale + " value:\t<" + cldrFile.getStringValue(path) + ">");
1061             final String parentFullPath = parentFile.getFullXPath(path);
1062             logln("\t\t" + localeParent + " value:\t<" + parentFile.getStringValue(path) + ">");
1063             logln("\t\t" + locale + " fullpath:\t" + fullXPath);
1064             logln("\t\t" + localeParent + " fullpath:\t" + parentFullPath);
1065         }
1066         logln("\tCount of non-approved:\t" + funnyCount);
1067     }
1068 
1069     enum MissingType {
1070         plurals,
1071         main_exemplars,
1072         no_main,
1073         collation,
1074         index_exemplars,
1075         punct_exemplars
1076     }
1077 
TestCoreData()1078     public void TestCoreData() {
1079         Set<String> availableLanguages = testInfo.getCldrFactory().getAvailableLanguages();
1080         PluralInfo rootRules = SUPPLEMENTAL_DATA_INFO.getPlurals(PluralType.cardinal, "root");
1081         Multimap<MissingType, Comparable> errors = TreeMultimap.create();
1082         errors.put(MissingType.collation, "?");
1083 
1084         Multimap<MissingType, Comparable> warnings = TreeMultimap.create();
1085         warnings.put(MissingType.collation, "?");
1086         warnings.put(MissingType.index_exemplars, "?");
1087         warnings.put(MissingType.punct_exemplars, "?");
1088 
1089         Set<String> collations = new HashSet<>();
1090 
1091         // collect collation info
1092         Factory collationFactory =
1093                 Factory.make(CLDRPaths.COLLATION_DIRECTORY, ".*", DraftStatus.contributed);
1094         for (String localeID : collationFactory.getAvailable()) {
1095             if (isTopLevel(localeID)) {
1096                 collations.add(localeID);
1097             }
1098         }
1099         logln(collations.toString());
1100 
1101         Set<String> allLanguages =
1102                 Builder.with(new TreeSet<String>())
1103                         .addAll(collations)
1104                         .addAll(availableLanguages)
1105                         .freeze();
1106 
1107         for (String localeID : allLanguages) {
1108             if (localeID.equals("root")) {
1109                 continue; // skip script locales
1110             }
1111             if (!isTopLevel(localeID)) {
1112                 continue;
1113             }
1114             if (!StandardCodes.isLocaleAtLeastBasic(localeID)) {
1115                 continue;
1116             }
1117             errors.clear();
1118             warnings.clear();
1119 
1120             String name =
1121                     "Locale:" + localeID + " (" + testInfo.getEnglish().getName(localeID) + ")";
1122 
1123             if (!collations.contains(localeID)) {
1124                 warnings.put(MissingType.collation, "missing");
1125                 logln(name + " is missing " + MissingType.collation.toString());
1126             }
1127 
1128             try {
1129                 CLDRFile cldrFile =
1130                         testInfo.getCldrFactory().make(localeID, false, DraftStatus.contributed);
1131 
1132                 String wholeFileAlias = cldrFile.getStringValue("//ldml/alias");
1133                 if (wholeFileAlias != null) {
1134                     logln("Whole-file alias:" + name);
1135                     continue;
1136                 }
1137 
1138                 PluralInfo pluralInfo =
1139                         SUPPLEMENTAL_DATA_INFO.getPlurals(PluralType.cardinal, localeID);
1140                 if (pluralInfo == rootRules) {
1141                     logln(name + " is missing " + MissingType.plurals.toString());
1142                     warnings.put(MissingType.plurals, "missing");
1143                 }
1144                 UnicodeSet main = cldrFile.getExemplarSet("", WinningChoice.WINNING);
1145                 if (main == null || main.isEmpty()) {
1146                     errln("  " + name + " is missing " + MissingType.main_exemplars.toString());
1147                     errors.put(MissingType.main_exemplars, "missing");
1148                 }
1149                 UnicodeSet index = cldrFile.getExemplarSet("index", WinningChoice.WINNING);
1150                 if (index == null || index.isEmpty()) {
1151                     logln(name + " is missing " + MissingType.index_exemplars.toString());
1152                     warnings.put(MissingType.index_exemplars, "missing");
1153                 }
1154                 UnicodeSet punctuation =
1155                         cldrFile.getExemplarSet("punctuation", WinningChoice.WINNING);
1156                 if (punctuation == null || punctuation.isEmpty()) {
1157                     logln(name + " is missing " + MissingType.punct_exemplars.toString());
1158                     warnings.put(MissingType.punct_exemplars, "missing");
1159                 }
1160             } catch (Exception e) {
1161                 StringWriter x = new StringWriter();
1162                 PrintWriter pw = new PrintWriter(x);
1163                 e.printStackTrace(pw);
1164                 pw.flush();
1165                 errln("  " + name + " is missing main locale data." + x);
1166                 errors.put(MissingType.no_main, x.toString());
1167             }
1168 
1169             // report errors
1170 
1171             if (errors.isEmpty() && warnings.isEmpty()) {
1172                 logln(name + ": No problems...");
1173             }
1174         }
1175     }
1176 
isTopLevel(String localeID)1177     private boolean isTopLevel(String localeID) {
1178         return "root".equals(LocaleIDParser.getParent(localeID));
1179     }
1180 
1181     /** Tests that every dtd item is connected from root */
TestDtdCompleteness()1182     public void TestDtdCompleteness() {
1183         for (DtdType type : DtdType.values()) {
1184             if (type.getStatus() != DtdType.DtdStatus.active) {
1185                 continue;
1186             }
1187             DtdData dtdData = DtdData.getInstance(type);
1188             Set<Element> descendents = new LinkedHashSet<>();
1189             dtdData.getDescendents(dtdData.ROOT, descendents);
1190             Set<Element> elements = dtdData.getElements();
1191             if (!elements.equals(descendents)) {
1192                 for (Element e : elements) {
1193                     if (!descendents.contains(e)
1194                             && !e.equals(dtdData.PCDATA)
1195                             && !e.equals(dtdData.ANY)) {
1196                         errln(type + ": Element " + e + " not contained in descendents of ROOT.");
1197                     }
1198                 }
1199                 for (Element e : descendents) {
1200                     if (!elements.contains(e)) {
1201                         errln(type + ": Element " + e + ", descendent of ROOT, not in elements.");
1202                     }
1203                 }
1204             }
1205             LinkedHashSet<Element> all = new LinkedHashSet<>(descendents);
1206             all.addAll(elements);
1207             Set<Attribute> attributes = dtdData.getAttributes();
1208             for (Attribute a : attributes) {
1209                 if (!elements.contains(a.element)) {
1210                     errln(type + ": Attribute " + a + " isn't for any element.");
1211                 }
1212             }
1213         }
1214     }
1215 
TestBasicDTDCompatibility()1216     public void TestBasicDTDCompatibility() {
1217 
1218         if (!TestCLDRPaths.canUseArchiveDirectory()) {
1219             return;
1220         }
1221 
1222         final String oldCommon = CldrVersion.LAST_RELEASE_VERSION.getBaseDirectory() + "/common";
1223 
1224         // set up exceptions
1225         Set<String> changedToEmpty =
1226                 new HashSet<>(
1227                         Arrays.asList(
1228                                 new String[] {
1229                                     "version",
1230                                     "languageCoverage",
1231                                     "scriptCoverage",
1232                                     "territoryCoverage",
1233                                     "currencyCoverage",
1234                                     "timezoneCoverage",
1235                                     "skipDefaultLocale"
1236                                 }));
1237         Set<String> PCDATA = new HashSet<>();
1238         PCDATA.add("PCDATA");
1239         Set<String> EMPTY = new HashSet<>();
1240         EMPTY.add("EMPTY");
1241         Set<String> VERSION = new HashSet<>();
1242         VERSION.add("version");
1243 
1244         // test all DTDs
1245         for (DtdType dtd : DtdType.values()) {
1246             if (dtd.getStatus() != DtdType.DtdStatus.active) {
1247                 continue;
1248             }
1249             if (dtd.firstVersion != null
1250                     && CldrVersion.LAST_RELEASE_VERSION.isOlderThan(
1251                             CldrVersion.from(dtd.firstVersion))) {
1252                 continue; // DTD didn't exist in last release
1253             }
1254             if (dtd == DtdType.ldmlICU) continue;
1255             try {
1256                 ElementAttributeInfo oldDtd = ElementAttributeInfo.getInstance(oldCommon, dtd);
1257                 ElementAttributeInfo newDtd = ElementAttributeInfo.getInstance(dtd);
1258 
1259                 if (oldDtd == newDtd) {
1260                     continue;
1261                 }
1262                 Relation<String, String> oldElement2Children = oldDtd.getElement2Children();
1263                 Relation<String, String> newElement2Children = newDtd.getElement2Children();
1264 
1265                 Relation<String, String> oldElement2Attributes = oldDtd.getElement2Attributes();
1266                 Relation<String, String> newElement2Attributes = newDtd.getElement2Attributes();
1267 
1268                 for (String element : oldElement2Children.keySet()) {
1269                     Set<String> oldChildren = oldElement2Children.getAll(element);
1270                     Set<String> newChildren = newElement2Children.getAll(element);
1271                     if (newChildren == null) {
1272                         if (!knownElementExceptions.contains(Pair.of(dtd.toString(), element))) {
1273                             errln("Old " + dtd + " contains element not in new: <" + element + ">");
1274                         }
1275                         continue;
1276                     }
1277                     Set<String> funny = containsInOrder(newChildren, oldChildren);
1278                     if (funny != null) {
1279                         if (changedToEmpty.contains(element)
1280                                 && oldChildren.equals(PCDATA)
1281                                 && newChildren.equals(EMPTY)) {
1282                             // ok, skip
1283                         } else {
1284                             errln(
1285                                     "Old "
1286                                             + dtd
1287                                             + " element <"
1288                                             + element
1289                                             + "> has children Missing/Misordered:\t"
1290                                             + funny
1291                                             + "\n\t\tOld:\t"
1292                                             + oldChildren
1293                                             + "\n\t\tNew:\t"
1294                                             + newChildren);
1295                         }
1296                     }
1297 
1298                     Set<String> oldAttributes = oldElement2Attributes.getAll(element);
1299                     if (oldAttributes == null) {
1300                         oldAttributes = Collections.emptySet();
1301                     }
1302                     Set<String> newAttributes = newElement2Attributes.getAll(element);
1303                     if (newAttributes == null) {
1304                         newAttributes = Collections.emptySet();
1305                     }
1306                     if (!newAttributes.containsAll(oldAttributes)) {
1307                         LinkedHashSet<String> missing = new LinkedHashSet<>(oldAttributes);
1308                         missing.removeAll(newAttributes);
1309                         if (element.equals(dtd.toString()) && missing.equals(VERSION)) {
1310                             // ok, skip
1311                         } else {
1312                             errln(
1313                                     "Old "
1314                                             + dtd
1315                                             + " element <"
1316                                             + element
1317                                             + "> has attributes Missing:\t"
1318                                             + missing
1319                                             + "\n\t\tOld:\t"
1320                                             + oldAttributes
1321                                             + "\n\t\tNew:\t"
1322                                             + newAttributes);
1323                         }
1324                     }
1325                 }
1326             } catch (Exception e) {
1327                 e.printStackTrace();
1328                 errln("Failure with " + dtd);
1329             }
1330         }
1331     }
1332 
containsInOrder(Set<T> superset, Set<T> subset)1333     private <T> Set<T> containsInOrder(Set<T> superset, Set<T> subset) {
1334         if (!superset.containsAll(subset)) {
1335             LinkedHashSet<T> missing = new LinkedHashSet<>(subset);
1336             missing.removeAll(superset);
1337             return missing;
1338         }
1339         // ok, we know that they are subsets, try order
1340         Set<T> result = null;
1341         DiscreteComparator<T> comp =
1342                 new DiscreteComparator.Builder<T>(Ordering.ARBITRARY).add(superset).get();
1343         T last = null;
1344         for (T item : subset) {
1345             if (last != null) {
1346                 int order = comp.compare(last, item);
1347                 if (order != -1) {
1348                     if (result == null) {
1349                         result = new HashSet<>();
1350                         result.add(last);
1351                         result.add(item);
1352                     }
1353                 }
1354             }
1355             last = item;
1356         }
1357         return result;
1358     }
1359 
TestDtdCompatibility()1360     public void TestDtdCompatibility() {
1361 
1362         for (DtdType type : DtdType.values()) {
1363             if (type.getStatus() != DtdType.DtdStatus.active) {
1364                 continue;
1365             }
1366             DtdData dtdData = DtdData.getInstance(type);
1367             Map<String, Element> currentElementFromName = dtdData.getElementFromName();
1368 
1369             // current has no orphan
1370             Set<Element> orphans = new LinkedHashSet<>(dtdData.getElementFromName().values());
1371             orphans.remove(dtdData.ROOT);
1372             orphans.remove(dtdData.PCDATA);
1373             orphans.remove(dtdData.ANY);
1374             Set<String> elementsWithoutAlt = new TreeSet<>();
1375             Set<String> elementsWithoutDraft = new TreeSet<>();
1376             Set<String> elementsWithoutAlias = new TreeSet<>();
1377             Set<String> elementsWithoutSpecial = new TreeSet<>();
1378 
1379             for (Element element : dtdData.getElementFromName().values()) {
1380                 Set<Element> children = element.getChildren().keySet();
1381                 orphans.removeAll(children);
1382                 if (type == DtdType.ldml
1383                         && !SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element.name, "*", "*")) {
1384                     if (element.getType() == ElementType.PCDATA) {
1385                         if (element.getAttributeNamed("alt") == null) {
1386                             elementsWithoutAlt.add(element.name);
1387                         }
1388                         if (element.getAttributeNamed("draft") == null) {
1389                             elementsWithoutDraft.add(element.name);
1390                         }
1391                     } else {
1392                         if (children.size() != 0 && !"alias".equals(element.name)) {
1393                             if (element.getChildNamed("alias") == null) {
1394                                 elementsWithoutAlias.add(element.name);
1395                             }
1396                             if (element.getChildNamed("special") == null) {
1397                                 elementsWithoutSpecial.add(element.name);
1398                             }
1399                         }
1400                     }
1401                 }
1402             }
1403             assertEquals(
1404                     type + " DTD Must not have orphan elements", Collections.EMPTY_SET, orphans);
1405             assertEquals(
1406                     type + " DTD elements with PCDATA must have 'alt' attributes",
1407                     Collections.EMPTY_SET,
1408                     elementsWithoutAlt);
1409             assertEquals(
1410                     type + " DTD elements with PCDATA must have 'draft' attributes",
1411                     Collections.EMPTY_SET,
1412                     elementsWithoutDraft);
1413             assertEquals(
1414                     type + " DTD elements with children must have 'alias' elements",
1415                     Collections.EMPTY_SET,
1416                     elementsWithoutAlias);
1417             assertEquals(
1418                     type + " DTD elements with children must have 'special' elements",
1419                     Collections.EMPTY_SET,
1420                     elementsWithoutSpecial);
1421 
1422             if (!TestCLDRPaths.canUseArchiveDirectory()) {
1423                 return;
1424             }
1425 
1426             for (CldrVersion version : CldrVersion.CLDR_VERSIONS_DESCENDING) {
1427                 if (version == CldrVersion.unknown || version == CldrVersion.baseline) {
1428                     continue;
1429                 }
1430                 if (type.getStatus() != DtdStatus.active) {
1431                     continue; // not active
1432                 }
1433                 if (type.firstVersion != null
1434                         && version.isOlderThan(CldrVersion.from(type.firstVersion))) {
1435                     continue; // didn't exist at that point
1436                 }
1437                 DtdData dtdDataOld;
1438                 try {
1439                     dtdDataOld = DtdData.getInstance(type, version.toString());
1440                 } catch (IllegalArgumentException e) {
1441                     boolean tooOld = false;
1442                     switch (type) {
1443                         case ldmlICU:
1444                             tooOld = badLdmlICUVersions.contains(version);
1445                             break;
1446                         case ldmlBCP47:
1447                         case keyboard3:
1448                             if (type.firstVersion != null) {
1449                                 tooOld = version.isOlderThan(CldrVersion.from(type.firstVersion));
1450                             }
1451                             break;
1452                         default:
1453                             break;
1454                     }
1455                     if (tooOld) {
1456                         continue;
1457                     } else {
1458                         errln(
1459                                 "v"
1460                                         + version
1461                                         + ": "
1462                                         + e.getClass().getSimpleName()
1463                                         + ", "
1464                                         + e.getMessage());
1465                         continue;
1466                     }
1467                 }
1468                 // verify that if E is in dtdDataOld, then it is in dtdData, and
1469                 // has at least the same children and attributes
1470                 for (Entry<String, Element> entry : dtdDataOld.getElementFromName().entrySet()) {
1471                     Element oldElement = entry.getValue();
1472                     Element newElement = currentElementFromName.get(entry.getKey());
1473                     if (knownElementExceptions.contains(
1474                             Pair.of(type.toString(), oldElement.getName()))) {
1475                         continue;
1476                     }
1477                     if (assertNotNull(
1478                             type
1479                                     + " DTD for trunk must be superset of v"
1480                                     + version
1481                                     + ", and must contain «"
1482                                     + oldElement.getName()
1483                                     + "»",
1484                             newElement)) {
1485                         // TODO Check order also
1486                         for (Element oldChild : oldElement.getChildren().keySet()) {
1487                             if (oldChild == null) {
1488                                 continue;
1489                             }
1490                             Element newChild = newElement.getChildNamed(oldChild.getName());
1491                             // skip certain items
1492                             if (version.isOlderThan(CldrVersion.v1_6_1)
1493                                     && newElement.getName().equals("zone")
1494                                     && oldChild.getName().equals("usesMetazone")) {
1495                                 if (logKnownIssue(
1496                                         "CLDR-17054",
1497                                         "Breakage with items older than 1.6.1: "
1498                                                 + newElement.getName()
1499                                                 + " / "
1500                                                 + oldChild.getName())) {
1501                                     continue;
1502                                 }
1503                             }
1504 
1505                             if (knownChildExceptions.contains(
1506                                     Pair.of(newElement.getName(), oldChild.getName()))) {
1507                                 continue;
1508                             }
1509                             assertNotNull(
1510                                     type
1511                                             + " DTD - Trunk children of «"
1512                                             + newElement.getName()
1513                                             + "» must be superset of v"
1514                                             + version
1515                                             + ", and must contain «"
1516                                             + oldChild.getName()
1517                                             + "»",
1518                                     newChild);
1519                         }
1520                         for (Attribute oldAttribute : oldElement.getAttributes().keySet()) {
1521                             Attribute newAttribute =
1522                                     newElement.getAttributeNamed(oldAttribute.getName());
1523 
1524                             if (knownAttributeExceptions.contains(
1525                                     Pair.of(newElement.getName(), oldAttribute.getName()))) {
1526                                 continue;
1527                             }
1528                             assertNotNull(
1529                                     type
1530                                             + " DTD - Trunk attributes of «"
1531                                             + newElement.getName()
1532                                             + "» must be superset of v"
1533                                             + version
1534                                             + ", and must contain «"
1535                                             + oldAttribute.getName()
1536                                             + "»",
1537                                     newAttribute);
1538                         }
1539                     }
1540                 }
1541             }
1542         }
1543     }
1544 
1545     /** Compare each path to each other path for every single file in CLDR */
TestDtdComparison()1546     public void TestDtdComparison() {
1547         // try some simple paths for regression
1548 
1549         sortPaths(
1550                 DtdData.getInstance(DtdType.ldml).getDtdComparator(null),
1551                 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/dateTimeFormatLength[@type=\"full\"]/dateTimeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1552                 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats");
1553 
1554         sortPaths(
1555                 DtdData.getInstance(DtdType.supplementalData).getDtdComparator(null),
1556                 "//supplementalData/territoryContainment/group[@type=\"419\"][@contains=\"013 029 005\"][@grouping=\"true\"]",
1557                 "//supplementalData/territoryContainment/group[@type=\"003\"][@contains=\"021 013 029\"][@grouping=\"true\"]");
1558     }
1559 
TestDtdComparisonsAll()1560     public void TestDtdComparisonsAll() {
1561         if (getInclusion() <= 5) { // Only run this test in exhaustive mode.
1562             return;
1563         }
1564         for (File file : CLDRConfig.getInstance().getAllCLDRFilesEndingWith(".xml")) {
1565             if (file.getParentFile().getName().equals("import")
1566                     && file.getParentFile().getParentFile().getName().equals("keyboards")) {
1567                 return; // skip imports
1568             }
1569             if (file.getPath().contains("/keyboards/3.0/")
1570                     && logKnownIssue(
1571                             "CLDR-17574", "With v46, parsing issues for keyboard xml files")) {
1572                 continue;
1573             }
1574             checkDtdComparatorFor(file, null);
1575         }
1576     }
1577 
checkDtdComparatorForResource(String fileToRead, DtdType overrideDtdType)1578     public void checkDtdComparatorForResource(String fileToRead, DtdType overrideDtdType) {
1579         MyHandler myHandler = new MyHandler(overrideDtdType);
1580         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
1581         try {
1582             myHandler.fileName = fileToRead;
1583             xfr.read(myHandler.fileName, TestBasic.class, -1, true);
1584             logln(myHandler.fileName);
1585         } catch (Exception e) {
1586             Throwable t = e;
1587             StringBuilder b = new StringBuilder();
1588             String indent = "";
1589             while (t != null) {
1590                 b.append(indent).append(t.getMessage());
1591                 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t";
1592                 t = t.getCause();
1593             }
1594             errln(b.toString());
1595             return;
1596         }
1597         DtdData dtdData = DtdData.getInstance(myHandler.dtdType);
1598         sortPaths(dtdData.getDtdComparator(null), myHandler.data);
1599     }
1600 
checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType)1601     public void checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType) {
1602         MyHandler myHandler = new MyHandler(overrideDtdType);
1603         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
1604         try {
1605             myHandler.fileName = PathUtilities.getNormalizedPathString(fileToRead);
1606             xfr.read(myHandler.fileName, -1, true);
1607             logln(myHandler.fileName);
1608         } catch (Exception e) {
1609             e.printStackTrace();
1610             Throwable t = e;
1611             StringBuilder b = new StringBuilder();
1612             String indent = "";
1613             while (t != null) {
1614                 b.append(indent).append(t.getMessage());
1615                 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t";
1616                 t = t.getCause();
1617             }
1618             errln(b.toString());
1619             return;
1620         }
1621         DtdData dtdData = DtdData.getInstance(myHandler.dtdType);
1622         sortPaths(dtdData.getDtdComparator(null), myHandler.data);
1623     }
1624 
1625     static class MyHandler extends XMLFileReader.SimpleHandler {
1626         private String fileName;
1627         private DtdType dtdType;
1628         private final Set<String> data = new LinkedHashSet<>();
1629 
MyHandler(DtdType overrideDtdType)1630         public MyHandler(DtdType overrideDtdType) {
1631             dtdType = overrideDtdType;
1632         }
1633 
1634         @Override
handlePathValue(String path, @SuppressWarnings("unused") String value)1635         public void handlePathValue(String path, @SuppressWarnings("unused") String value) {
1636             if (dtdType == null) {
1637                 try {
1638                     dtdType = DtdType.fromPath(path);
1639                 } catch (Exception e) {
1640                     throw new IllegalArgumentException("Can't read " + fileName, e);
1641                 }
1642             }
1643             data.add(path);
1644         }
1645     }
1646 
sortPaths(Comparator<String> dc, Collection<String> paths)1647     public void sortPaths(Comparator<String> dc, Collection<String> paths) {
1648         String[] array = paths.toArray(new String[paths.size()]);
1649         sortPaths(dc, array);
1650     }
1651 
sortPaths(Comparator<String> dc, String... array)1652     public void sortPaths(Comparator<String> dc, String... array) {
1653         Arrays.sort(array, 0, array.length, dc);
1654     }
1655     // public void TestNewDtdData() moved to TestDtdData
1656 }
1657