• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import java.io.File;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import java.util.Collection;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.Iterator;
10 import java.util.LinkedHashSet;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.TreeSet;
15 
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRFile.Status;
19 import org.unicode.cldr.util.CLDRPaths;
20 import org.unicode.cldr.util.ChainedMap;
21 import org.unicode.cldr.util.ChainedMap.M3;
22 import org.unicode.cldr.util.ChainedMap.M4;
23 import org.unicode.cldr.util.ChainedMap.M5;
24 import org.unicode.cldr.util.DtdData;
25 import org.unicode.cldr.util.DtdData.Attribute;
26 import org.unicode.cldr.util.DtdData.Element;
27 import org.unicode.cldr.util.DtdData.ElementType;
28 import org.unicode.cldr.util.DtdType;
29 import org.unicode.cldr.util.Pair;
30 import org.unicode.cldr.util.PathHeader;
31 import org.unicode.cldr.util.PathHeader.Factory;
32 import org.unicode.cldr.util.PathHeader.PageId;
33 import org.unicode.cldr.util.PathHeader.SectionId;
34 import org.unicode.cldr.util.PathStarrer;
35 import org.unicode.cldr.util.XMLFileReader;
36 import org.unicode.cldr.util.XPathParts;
37 
38 import com.google.common.collect.ImmutableSet;
39 
40 public class TestPaths extends TestFmwkPlus {
41     static CLDRConfig testInfo = CLDRConfig.getInstance();
42 
main(String[] args)43     public static void main(String[] args) {
44         new TestPaths().run(args);
45     }
46 
VerifyEnglishVsRoot()47     public void VerifyEnglishVsRoot() {
48         HashSet<String> rootPaths = new HashSet<String>();
49         testInfo.getRoot().forEach(rootPaths::add);
50         HashSet<String> englishPaths = new HashSet<String>();
51         testInfo.getEnglish().forEach(englishPaths::add);
52         englishPaths.removeAll(rootPaths);
53         if (englishPaths.size() == 0) {
54             return;
55         }
56         Factory phf = PathHeader.getFactory(testInfo.getEnglish());
57         Status status = new Status();
58         Set<PathHeader> suspiciousPaths = new TreeSet<PathHeader>();
59         Set<PathHeader> errorPaths = new TreeSet<PathHeader>();
60         ImmutableSet<String> SKIP_VARIANT = ImmutableSet.of(
61             "ps-variant", "ug-variant", "ky-variant", "az-short",
62             "Arab-variant", "am-variant", "pm-variant");
63         for (String path : englishPaths) {
64             // skip aliases, other counts
65             if (!status.pathWhereFound.equals(path)
66                 || path.contains("[@count=\"one\"]")) {
67                 continue;
68             }
69             PathHeader ph = phf.fromPath(path);
70             if (ph.getSectionId() == SectionId.Special
71                 || ph.getCode().endsWith("-name-other")) {
72                 continue;
73             }
74             if (path.contains("@alt") && !SKIP_VARIANT.contains(ph.getCode())
75                 && ph.getPageId() != PageId.Alphabetic_Information) {
76                 errorPaths.add(ph);
77             } else {
78                 suspiciousPaths.add(ph);
79             }
80         }
81         if (errorPaths.size() != 0) {
82             errln("Error: paths in English but not root:"
83                 + getPaths(errorPaths));
84         }
85         logln("Suspicious: paths in English but not root:"
86             + getPaths(suspiciousPaths));
87     }
88 
getPaths(Set<PathHeader> altPaths)89     private String getPaths(Set<PathHeader> altPaths) {
90         StringBuilder b = new StringBuilder();
91         for (PathHeader path : altPaths) {
92             b.append("\n\t\t")
93             .append(path)
94             .append(":\t")
95             .append(testInfo.getEnglish().getStringValue(
96                 path.getOriginalPath()));
97         }
98         return b.toString();
99     }
100 
101     /**
102      * For each locale to test, loop through all the paths, including "extra" paths,
103      * checking for each path: checkFullpathValue; checkPrettyPaths
104      */
TestPathHeadersAndValues()105     public void TestPathHeadersAndValues() {
106         /*
107          * Use the pathsSeen hash to keep track of which paths have
108          * already been seen. Since the test checkPrettyPaths isn't really
109          * locale-dependent, run it only once for each path, for the first
110          * locale in which the path occurs.
111          */
112         Set<String> pathsSeen = new HashSet<String>();
113         CLDRFile englishFile = testInfo.getCldrFactory().make("en", true);
114         PathHeader.Factory phf = PathHeader.getFactory(englishFile);
115         Status status = new Status();
116         for (String locale : getLocalesToTest()) {
117             CLDRFile file = testInfo.getCLDRFile(locale, true);
118             logln("Testing path headers and values for locale => " + locale);
119             for (Iterator<String> it = file.iterator(); it.hasNext();) {
120                 String path = it.next();
121                 checkFullpathValue(path, file, locale, status, false /* not extra path */);
122                 if (!pathsSeen.contains(path)) {
123                     pathsSeen.add(path);
124                     checkPrettyPaths(path, phf);
125                 }
126             }
127             for (String path : file.getExtraPaths()) {
128                 checkFullpathValue(path, file, locale, status, true /* extra path */);
129                 if (!pathsSeen.contains(path)) {
130                     pathsSeen.add(path);
131                     checkPrettyPaths(path, phf);
132                 }
133             }
134         }
135     }
136 
137     /**
138      * For the given path and CLDRFile, check that fullPath, value, and source are all non-null.
139      *
140      * Allow null value for some exceptional extra paths.
141      *
142      * @param path the path, such as '//ldml/dates/fields/field[@type="tue"]/relative[@type="1"]'
143      * @param file the CLDRFile
144      * @param locale the locale string
145      * @param status the Status to be used/set by getSourceLocaleID
146      * @param isExtraPath true if the path is an "extra" path, else false
147      */
checkFullpathValue(String path, CLDRFile file, String locale, Status status, boolean isExtraPath)148     private void checkFullpathValue(String path, CLDRFile file, String locale, Status status, boolean isExtraPath) {
149         String fullPath = file.getFullXPath(path);
150         String value = file.getStringValue(path);
151         String source = file.getSourceLocaleID(path, status);
152 
153         assertEquals("CanonicalOrder", XPathParts.getFrozenInstance(path).toString(), path);
154 
155         if (fullPath == null) {
156             errln("Locale: " + locale + ",\t Null FullPath: " + path);
157         } else if (!path.equals(fullPath)) {
158             assertEquals("CanonicalOrder (FP)", XPathParts.getFrozenInstance(fullPath).toString(), fullPath);
159         }
160 
161         if (value == null) {
162             if (isExtraPath && extraPathAllowsNullValue(path)) {
163                 return;
164             }
165             errln("Locale: " + locale + ",\t Null Value: " + path);
166         }
167 
168         if (source == null) {
169             errln("Locale: " + locale + ",\t Null Source: " + path);
170         }
171 
172         if (status.pathWhereFound == null) {
173             errln("Locale: " + locale + ",\t Null Found Path: " + path);
174         }
175     }
176 
177     /**
178      * Is the given extra path exceptional in the sense that null value is allowed?
179      *
180      * @param path the extra path
181      * @return true if null value is allowed for path, else false
182      *
183      * As of 2019-08-09, null values are found for many "metazone" paths like:
184      * //ldml/dates/timeZoneNames/metazone[@type="Galapagos"]/long/standard
185      * for many locales. Also for some "zone" paths like:
186      * //ldml/dates/timeZoneNames/zone[@type="Pacific/Honolulu"]/short/generic
187      * for locales including root, ja, and ar. Also for some "dayPeriods" paths like
188      * //ldml/dates/calendars/calendar[@type="gregorian"]/dayPeriods/dayPeriodContext[@type="stand-alone"]/dayPeriodWidth[@type="wide"]/dayPeriod[@type="midnight"]
189      * only for these six locales: bs_Cyrl, bs_Cyrl_BA, pa_Arab, pa_Arab_PK, uz_Arab, uz_Arab_AF.
190      *
191      * This function is nearly identical to the JavaScript function with the same name.
192      * Keep the two functions consistent with each other. It would be more ideal if this
193      * knowledge were encapsulated on the server and the client didn't need to know about it.
194      * The server could send the client special fallback values instead of null.
195      *
196      * Extra paths are generated by CLDRFile.getRawExtraPathsPrivate; this function may need
197      * updating (to allow null for other paths) if that function changes.
198      *
199      * Reference: https://unicode-org.atlassian.net/browse/CLDR-11238
200      */
extraPathAllowsNullValue(String path)201     private boolean extraPathAllowsNullValue(String path) {
202         if (path.contains("/timeZoneNames/metazone")
203             || path.contains("/timeZoneNames/zone")
204             || path.contains("/dayPeriods/dayPeriodContext")
205             || path.contains("/unitPattern")
206             || path.contains("/gender")
207             || path.contains("/caseMinimalPairs")
208             || path.contains("/genderMinimalPairs")
209             ) {
210             return true;
211         }
212         return false;
213     }
214 
215     /**
216      * Check that the given path and PathHeader.Factory undergo correct
217      * roundtrip conversion between original and pretty paths.
218      *
219      * @param path the path string
220      * @param phf the PathHeader.Factory
221      */
checkPrettyPaths(String path, PathHeader.Factory phf)222     private void checkPrettyPaths(String path, PathHeader.Factory phf) {
223         if (path.endsWith("/alias")) {
224             return;
225         }
226         logln("Testing ==> " + path);
227         String prettied = phf.fromPath(path).toString();
228         String unprettied = phf.fromPath(path).getOriginalPath();
229         if (!path.equals(unprettied)) {
230             errln("Path Header doesn't roundtrip:\t" + path + "\t" + prettied
231                 + "\t" + unprettied);
232         } else {
233             logln(prettied + "\t" + path);
234         }
235     }
236 
getLocalesToTest()237     private Collection<String> getLocalesToTest() {
238         return params.inclusion <= 5 ? Arrays.asList("root", "en", "ja", "ar", "de", "ru")
239             : params.inclusion < 10 ? testInfo.getCldrFactory().getAvailableLanguages()
240                 : testInfo.getCldrFactory().getAvailable();
241     }
242 
243     /**
244      * find all the items that are deprecated, but appear in paths
245      * and the items that aren't deprecated, but don't appear in paths
246      */
247 
248     static final class CheckDeprecated {
249         M5<DtdType, String, String, String, Boolean> data = ChainedMap.of(
250             new HashMap<DtdType, Object>(),
251             new HashMap<String, Object>(),
252             new HashMap<String, Object>(),
253             new HashMap<String, Object>(),
254             Boolean.class);
255         private TestPaths testPaths;
256 
CheckDeprecated(TestPaths testPaths)257         public CheckDeprecated(TestPaths testPaths) {
258             this.testPaths = testPaths;
259         }
260 
261         static final Set<String> ALLOWED = new HashSet<>(Arrays.asList("postalCodeData", "postCodeRegex"));
262         static final Set<String> OK_IF_MISSING = new HashSet<>(Arrays.asList("alt", "draft", "references"));
263 
check(DtdData dtdData, XPathParts parts, String fullName)264         public boolean check(DtdData dtdData, XPathParts parts, String fullName) {
265             for (int i = 0; i < parts.size(); ++i) {
266                 String elementName = parts.getElement(i);
267                 if (dtdData.isDeprecated(elementName, "*", "*")) {
268                     if (ALLOWED.contains(elementName)) {
269                         return false;
270                     }
271                     testPaths.errln("Deprecated element in data: "
272                         + dtdData.dtdType
273                         + ":" + elementName
274                         + " \t;" + fullName);
275                     return true;
276                 }
277                 data.put(dtdData.dtdType, elementName, "*", "*", true);
278                 for (Entry<String, String> attributeNValue : parts.getAttributes(i).entrySet()) {
279                     String attributeName = attributeNValue.getKey();
280                     if (dtdData.isDeprecated(elementName, attributeName, "*")) {
281                         if (attributeName.equals("draft")) {
282                             testPaths.errln("Deprecated attribute in data: "
283                                             + dtdData.dtdType
284                                             + ":" + elementName
285                                             + ":" + attributeName
286                                             + " \t;" + fullName +
287                                             " - consider adding to DtdData.DRAFT_ON_NON_LEAF_ALLOWED if you are sure this is ok.");
288                         } else {
289                             testPaths.errln("Deprecated attribute in data: "
290                                             + dtdData.dtdType
291                                             + ":" + elementName
292                                             + ":" + attributeName
293                                             + " \t;" + fullName);
294                         }
295                         return true;
296                     }
297                     String attributeValue = attributeNValue.getValue();
298                     if (dtdData.isDeprecated(elementName, attributeName, attributeValue)) {
299                         testPaths.errln("Deprecated attribute value in data: "
300                             + dtdData.dtdType
301                             + ":" + elementName
302                             + ":" + attributeName
303                             + ":" + attributeValue
304                             + " \t;" + fullName);
305                         return true;
306                     }
307                     data.put(dtdData.dtdType, elementName, attributeName, "*", true);
308                     data.put(dtdData.dtdType, elementName, attributeName, attributeValue, true);
309                 }
310             }
311             return false;
312         }
313 
show(int inclusion)314         public void show(int inclusion) {
315             for (DtdType dtdType : DtdType.values()) {
316                 if (dtdType == DtdType.ldmlICU ||
317                     (inclusion <= 5 && dtdType == DtdType.platform)) { // keyboards/*/_platform.xml won't be in the list for non-exhaustive runs
318                     continue;
319                 }
320                 M4<String, String, String, Boolean> infoEAV = data.get(dtdType);
321                 if (infoEAV == null) {
322                     testPaths.warnln("Data doesn't contain: "
323                         + dtdType);
324                     continue;
325                 }
326                 DtdData dtdData = DtdData.getInstance(dtdType);
327                 for (Element element : dtdData.getElements()) {
328                     if (element.isDeprecated() || element == dtdData.ANY || element == dtdData.PCDATA) {
329                         continue;
330                     }
331                     M3<String, String, Boolean> infoAV = infoEAV.get(element.name);
332                     if (infoAV == null) {
333                         testPaths.logln("Data doesn't contain: "
334                             + dtdType
335                             + ":" + element.name);
336                         continue;
337                     }
338 
339                     for (Attribute attribute : element.getAttributes().keySet()) {
340                         if (attribute.isDeprecated() || OK_IF_MISSING.contains(attribute.name)) {
341                             continue;
342                         }
343                         Map<String, Boolean> infoV = infoAV.get(attribute.name);
344                         if (infoV == null) {
345                             testPaths.logln("Data doesn't contain: "
346                                 + dtdType
347                                 + ":" + element.name
348                                 + ":" + attribute.name);
349                             continue;
350                         }
351                         for (String value : attribute.values.keySet()) {
352                             if (attribute.isDeprecatedValue(value)) {
353                                 continue;
354                             }
355                             if (!infoV.containsKey(value)) {
356                                 testPaths.logln("Data doesn't contain: "
357                                     + dtdType
358                                     + ":" + element.name
359                                     + ":" + attribute.name
360                                     + ":" + value);
361                             }
362                         }
363                     }
364                 }
365             }
366         }
367     }
368 
TestNonLdml()369     public void TestNonLdml() {
370         int maxPerDirectory = getInclusion() <= 5 ? 20 : Integer.MAX_VALUE;
371         CheckDeprecated checkDeprecated = new CheckDeprecated(this);
372         PathStarrer starrer = new PathStarrer();
373         StringBuilder removed = new StringBuilder();
374         Set<String> nonFinalValues = new LinkedHashSet<>();
375         Set<String> skipLast = new HashSet(Arrays.asList("version", "generation"));
376         String[] normalizedPath = { "" };
377 
378         int counter = 0;
379         for (String directory : Arrays.asList("keyboards/", "common/", "seed/", "exemplars/")) {
380             String dirPath = CLDRPaths.BASE_DIRECTORY + directory;
381             for (String fileName : new File(dirPath).list()) {
382                 File dir2 = new File(dirPath + fileName);
383                 if (!dir2.isDirectory()
384                     || fileName.equals("properties") // TODO as flat files
385 //                    || fileName.equals(".DS_Store")
386 //                    || ChartDelta.LDML_DIRECTORIES.contains(dir)
387 //                    || fileName.equals("dtd")  // TODO as flat files
388 //                    || fileName.equals(".project")  // TODO as flat files
389 //                    //|| dir.equals("uca") // TODO as flat files
390                     ) {
391                     continue;
392                 }
393 
394                 Set<Pair<String, String>> seen = new HashSet<>();
395                 Set<String> seenStarred = new HashSet<>();
396                 int count = 0;
397                 Set<Element> haveErrorsAlready = new HashSet<>();
398                 for (String file : dir2.list()) {
399                     if (!file.endsWith(".xml")) {
400                         continue;
401                     }
402                     if (++count > maxPerDirectory) {
403                         break;
404                     }
405                     DtdType type = null;
406                     DtdData dtdData = null;
407                     String fullName = dir2 + "/" + file;
408                     for (Pair<String, String> pathValue : XMLFileReader.loadPathValues(fullName, new ArrayList<Pair<String, String>>(), true)) {
409                         String path = pathValue.getFirst();
410                         final String value = pathValue.getSecond();
411                         XPathParts parts = XPathParts.getFrozenInstance(path);
412                         if (dtdData == null) {
413                             type = DtdType.valueOf(parts.getElement(0));
414                             dtdData = DtdData.getInstance(type);
415                         }
416 
417                         XPathParts pathParts = XPathParts.getFrozenInstance(path);
418                         String finalElementString = pathParts.getElement(-1);
419                         Element finalElement = dtdData.getElementFromName().get(finalElementString);
420                         if (!haveErrorsAlready.contains(finalElement)) {
421                             ElementType elementType = finalElement.getType();
422                             // HACK!!
423                             if (pathParts.size() > 1 && "identity".equals(pathParts.getElement(1))) {
424                                 elementType = ElementType.EMPTY;
425                                 logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests");
426                             } else if (pathParts.size() > 2
427                                 && "validity".equals(pathParts.getElement(2))
428                                 && value.isEmpty()) {
429                                 String typeValue = pathParts.getAttributeValue(-1, "type");
430                                 if ("TODO".equals(typeValue)
431                                     || "locale".equals(typeValue)) {
432                                     elementType = ElementType.EMPTY;
433                                     logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests");
434                                 }
435                             }
436                             if ((elementType == ElementType.PCDATA) == (value.isEmpty())) {
437                                 errln("Inconsistency:"
438                                     + "\tfile=" + fileName + "/" + file
439                                     + "\telementType=" + elementType
440                                     + "\tvalue=«" + value + "»"
441                                     + "\tpath=" + path);
442                                 haveErrorsAlready.add(finalElement); // suppress all but first error
443                             }
444                         }
445 
446                         if (checkDeprecated.check(dtdData, parts, fullName)) {
447                             break;
448                         }
449 
450                         String last = parts.getElement(-1);
451                         if (skipLast.contains(last)) {
452                             continue;
453                         }
454                         String dpath = CLDRFile.getDistinguishingXPath(path, normalizedPath);
455                         if (!dpath.equals(path)) {
456                             checkParts(dpath, dtdData);
457                         }
458                         if (!normalizedPath.equals(path) && !normalizedPath[0].equals(dpath)) {
459                             checkParts(normalizedPath[0], dtdData);
460                         }
461                         parts = parts.cloneAsThawed();
462                         counter = removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues);
463                         String cleaned = parts.toString();
464                         Pair<String, String> pair = Pair.of(type == DtdType.ldml ? file : type.toString(), cleaned);
465                         if (seen.contains(pair)) {
466 //                        parts.set(path);
467 //                        removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues);
468                             errln("Duplicate: " + file + ", " + path + ", " + cleaned + ", " + value);
469                         } else {
470                             seen.add(pair);
471                             if (!nonFinalValues.isEmpty()) {
472                                 String starredPath = starrer.set(path);
473                                 if (!seenStarred.contains(starredPath)) {
474                                     seenStarred.add(starredPath);
475                                     logln("Non-node values: " + nonFinalValues + "\t" + path);
476                                 }
477                             }
478                             if (isVerbose()) {
479                                 String starredPath = starrer.set(path);
480                                 if (!seenStarred.contains(starredPath)) {
481                                     seenStarred.add(starredPath);
482                                     logln("@" + "\t" + cleaned + "\t" + removed);
483                                 }
484                             }
485                         }
486                     }
487                 }
488             }
489         }
490         checkDeprecated.show(getInclusion());
491     }
492 
checkParts(String path, DtdData dtdData)493     private void checkParts(String path, DtdData dtdData) {
494         XPathParts parts = XPathParts.getFrozenInstance(path);
495         Element current = dtdData.ROOT;
496         for (int i = 0; i < parts.size(); ++i) {
497             String elementName = parts.getElement(i);
498             if (i == 0) {
499                 assertEquals("root", current.name, elementName);
500             } else {
501                 current = current.getChildNamed(elementName);
502                 if (!assertNotNull("element", current)) {
503                     return; // failed
504                 }
505             }
506             for (String attributeName : parts.getAttributeKeys(i)) {
507                 Attribute attribute = current.getAttributeNamed(attributeName);
508                 if (!assertNotNull("attribute", attribute)) {
509                     return; // failed
510                 }
511                 // later, check values
512             }
513         }
514     }
515 
516     static final Set<String> SKIP_NON_NODE = new HashSet<>(Arrays.asList("references", "visibility", "access"));
517 
518     /**
519      *
520      * @param parts the thawed XPathParts (can't be frozen, for putAttributeValue)
521      * @param data
522      * @param counter
523      * @param removed
524      * @param nonFinalValues
525      * @return
526      */
removeNonDistinguishing(XPathParts parts, DtdData data, int counter, StringBuilder removed, Set<String> nonFinalValues)527     private int removeNonDistinguishing(XPathParts parts, DtdData data, int counter, StringBuilder removed, Set<String> nonFinalValues) {
528         removed.setLength(0);
529         nonFinalValues.clear();
530         HashSet<String> toRemove = new HashSet<>();
531         nonFinalValues.clear();
532         int size = parts.size();
533         int last = size - 1;
534         for (int i = 0; i < size; ++i) {
535             removed.append("/");
536             String element = parts.getElement(i);
537             if (data.isOrdered(element)) {
538                 parts.putAttributeValue(i, "_q", String.valueOf(counter));
539                 counter++;
540             }
541             for (String attribute : parts.getAttributeKeys(i)) {
542                 if (!data.isDistinguishing(element, attribute)) {
543                     toRemove.add(attribute);
544                     if (i != last && !SKIP_NON_NODE.contains(attribute)) {
545                         if (attribute.equals("draft")
546                             && (parts.getElement(1).equals("transforms") || parts.getElement(1).equals("collations"))) {
547                             // do nothing
548                         } else {
549                             nonFinalValues.add(attribute);
550                         }
551                     }
552                 }
553             }
554             if (!toRemove.isEmpty()) {
555                 for (String attribute : toRemove) {
556                     removed.append("[@" + attribute + "=\"" + parts.getAttributeValue(i, attribute) + "\"]");
557                     parts.removeAttribute(i, attribute);
558                 }
559                 toRemove.clear();
560             }
561         }
562         return counter;
563     }
564 }
565