• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import java.io.File;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import java.util.Collection;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.Iterator;
10 import java.util.LinkedHashSet;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.TreeSet;
15 
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRFile.Status;
19 import org.unicode.cldr.util.CLDRPaths;
20 import org.unicode.cldr.util.ChainedMap;
21 import org.unicode.cldr.util.ChainedMap.M3;
22 import org.unicode.cldr.util.ChainedMap.M4;
23 import org.unicode.cldr.util.ChainedMap.M5;
24 import org.unicode.cldr.util.DtdData;
25 import org.unicode.cldr.util.DtdData.Attribute;
26 import org.unicode.cldr.util.DtdData.Element;
27 import org.unicode.cldr.util.DtdData.ElementType;
28 import org.unicode.cldr.util.DtdType;
29 import org.unicode.cldr.util.Pair;
30 import org.unicode.cldr.util.PathHeader;
31 import org.unicode.cldr.util.PathHeader.Factory;
32 import org.unicode.cldr.util.PathHeader.PageId;
33 import org.unicode.cldr.util.PathHeader.SectionId;
34 import org.unicode.cldr.util.PathStarrer;
35 import org.unicode.cldr.util.XMLFileReader;
36 import org.unicode.cldr.util.XPathParts;
37 
38 import com.google.common.collect.ImmutableSet;
39 import com.ibm.icu.dev.util.CollectionUtilities;
40 
41 public class TestPaths extends TestFmwkPlus {
42     static CLDRConfig testInfo = CLDRConfig.getInstance();
43 
main(String[] args)44     public static void main(String[] args) {
45         new TestPaths().run(args);
46     }
47 
VerifyEnglishVsRoot()48     public void VerifyEnglishVsRoot() {
49         Set<String> rootPaths = CollectionUtilities.addAll(testInfo
50             .getRoot().iterator(),
51             new HashSet<String>());
52         Set<String> englishPaths = CollectionUtilities.addAll(testInfo
53             .getEnglish().iterator(), new HashSet<String>());
54         englishPaths.removeAll(rootPaths);
55         if (englishPaths.size() == 0) {
56             return;
57         }
58         Factory phf = PathHeader.getFactory(testInfo.getEnglish());
59         Status status = new Status();
60         Set<PathHeader> suspiciousPaths = new TreeSet<PathHeader>();
61         Set<PathHeader> errorPaths = new TreeSet<PathHeader>();
62         ImmutableSet<String> SKIP_VARIANT = ImmutableSet.of(
63             "ps-variant", "ug-variant", "ky-variant", "az-short",
64             "Arab-variant", "am-variant", "pm-variant");
65         for (String path : englishPaths) {
66             // skip aliases, other counts
67             if (!status.pathWhereFound.equals(path)
68                 || path.contains("[@count=\"one\"]")) {
69                 continue;
70             }
71             PathHeader ph = phf.fromPath(path);
72             if (ph.getSectionId() == SectionId.Special
73                 || ph.getCode().endsWith("-name-other")) {
74                 continue;
75             }
76             if (path.contains("@alt") && !SKIP_VARIANT.contains(ph.getCode())
77                 && ph.getPageId() != PageId.Alphabetic_Information) {
78                 errorPaths.add(ph);
79             } else {
80                 suspiciousPaths.add(ph);
81             }
82         }
83         if (errorPaths.size() != 0) {
84             errln("Error: paths in English but not root:"
85                 + getPaths(errorPaths));
86         }
87         logln("Suspicious: paths in English but not root:"
88             + getPaths(suspiciousPaths));
89     }
90 
getPaths(Set<PathHeader> altPaths)91     private String getPaths(Set<PathHeader> altPaths) {
92         StringBuilder b = new StringBuilder();
93         for (PathHeader path : altPaths) {
94             b.append("\n\t\t")
95                 .append(path)
96                 .append(":\t")
97                 .append(testInfo.getEnglish().getStringValue(
98                     path.getOriginalPath()));
99         }
100         return b.toString();
101     }
102 
103     /**
104      * For each locale to test, loop through all the paths, including "extra" paths,
105      * checking for each path: checkFullpathValue; checkPrettyPaths
106      */
TestPathHeadersAndValues()107     public void TestPathHeadersAndValues() {
108         /*
109          * Use the pathsSeen hash to keep track of which paths have
110          * already been seen. Since the test checkPrettyPaths isn't really
111          * locale-dependent, run it only once for each path, for the first
112          * locale in which the path occurs.
113          */
114         Set<String> pathsSeen = new HashSet<String>();
115         CLDRFile englishFile = testInfo.getCldrFactory().make("en", true);
116         PathHeader.Factory phf = PathHeader.getFactory(englishFile);
117         Status status = new Status();
118         for (String locale : getLocalesToTest()) {
119             CLDRFile file = testInfo.getCLDRFile(locale, true);
120             logln("Testing path headers and values for locale => " + locale);
121             for (Iterator<String> it = file.iterator(); it.hasNext();) {
122                 String path = it.next();
123                 checkFullpathValue(path, file, locale, status, false /* not extra path */);
124                 if (!pathsSeen.contains(path)) {
125                     pathsSeen.add(path);
126                     checkPrettyPaths(path, phf);
127                 }
128             }
129             for (String path : file.getExtraPaths()) {
130                 checkFullpathValue(path, file, locale, status, true /* extra path */);
131                 if (!pathsSeen.contains(path)) {
132                     pathsSeen.add(path);
133                     checkPrettyPaths(path, phf);
134                 }
135             }
136         }
137     }
138 
139     /**
140      * For the given path and CLDRFile, check that fullPath, value, and source are all non-null.
141      *
142      * Allow null value for some exceptional extra paths.
143      *
144      * @param path the path, such as '//ldml/dates/fields/field[@type="tue"]/relative[@type="1"]'
145      * @param file the CLDRFile
146      * @param locale the locale string
147      * @param status the Status to be used/set by getSourceLocaleID
148      * @param isExtraPath true if the path is an "extra" path, else false
149      */
checkFullpathValue(String path, CLDRFile file, String locale, Status status, boolean isExtraPath)150     private void checkFullpathValue(String path, CLDRFile file, String locale, Status status, boolean isExtraPath) {
151         String fullPath = file.getFullXPath(path);
152         String value = file.getStringValue(path);
153         String source = file.getSourceLocaleID(path, status);
154         if (fullPath == null) {
155             errln("Locale: " + locale + ",\t FullPath: " + path);
156         }
157         if (value == null) {
158             /*
159              * Allow null value for some exceptional extra paths.
160              */
161             if (!isExtraPath || !extraPathAllowsNullValue(path)) {
162                 errln("Locale: " + locale + ",\t Value: " + path);
163             }
164         }
165         if (source == null) {
166             errln("Locale: " + locale + ",\t Source: " + path);
167         }
168         if (status.pathWhereFound == null) {
169             errln("Locale: " + locale + ",\t Found Path: " + path);
170         }
171     }
172 
173     /**
174      * Is the given extra path exceptional in the sense that null value is allowed?
175      *
176      * @param path the extra path
177      * @return true if null value is allowed for path, else false
178      *
179      * As of 2019-08-09, null values are found for many "metazone" paths like:
180      * //ldml/dates/timeZoneNames/metazone[@type="Galapagos"]/long/standard
181      * for many locales. Also for some "zone" paths like:
182      * //ldml/dates/timeZoneNames/zone[@type="Pacific/Honolulu"]/short/generic
183      * for locales including root, ja, and ar. Also for some "dayPeriods" paths like
184      * //ldml/dates/calendars/calendar[@type="gregorian"]/dayPeriods/dayPeriodContext[@type="stand-alone"]/dayPeriodWidth[@type="wide"]/dayPeriod[@type="midnight"]
185      * only for these six locales: bs_Cyrl, bs_Cyrl_BA, pa_Arab, pa_Arab_PK, uz_Arab, uz_Arab_AF.
186      *
187      * This function is nearly identical to the JavaScript function with the same name.
188      * Keep the two functions consistent with each other. It would be more ideal if this
189      * knowledge were encapsulated on the server and the client didn't need to know about it.
190      * The server could send the client special fallback values instead of null.
191      *
192      * Extra paths are generated by CLDRFile.getRawExtraPathsPrivate; this function may need
193      * updating (to allow null for other paths) if that function changes.
194      *
195      * Reference: https://unicode-org.atlassian.net/browse/CLDR-11238
196      */
extraPathAllowsNullValue(String path)197     private boolean extraPathAllowsNullValue(String path) {
198         if (path.contains("timeZoneNames/metazone") ||
199             path.contains("timeZoneNames/zone") ||
200             path.contains("dayPeriods/dayPeriodContext")) {
201             return true;
202         }
203         return false;
204     }
205 
206     /**
207      * Check that the given path and PathHeader.Factory undergo correct
208      * roundtrip conversion between original and pretty paths.
209      *
210      * @param path the path string
211      * @param phf the PathHeader.Factory
212      */
checkPrettyPaths(String path, PathHeader.Factory phf)213     private void checkPrettyPaths(String path, PathHeader.Factory phf) {
214         if (path.endsWith("/alias")) {
215             return;
216         }
217         logln("Testing ==> " + path);
218         String prettied = phf.fromPath(path).toString();
219         String unprettied = phf.fromPath(path).getOriginalPath();
220         if (!path.equals(unprettied)) {
221             errln("Path Header doesn't roundtrip:\t" + path + "\t" + prettied
222                 + "\t" + unprettied);
223         } else {
224             logln(prettied + "\t" + path);
225         }
226     }
227 
getLocalesToTest()228     private Collection<String> getLocalesToTest() {
229         return params.inclusion <= 5 ? Arrays.asList("root", "en", "ja", "ar")
230             : params.inclusion < 10 ? testInfo.getCldrFactory()
231                 .getAvailableLanguages() : testInfo.getCldrFactory()
232                     .getAvailable();
233     }
234 
235     /**
236      * find all the items that are deprecated, but appear in paths
237      * and the items that aren't deprecated, but don't appear in paths
238      */
239 
240     static final class CheckDeprecated {
241         M5<DtdType, String, String, String, Boolean> data = ChainedMap.of(
242             new HashMap<DtdType, Object>(),
243             new HashMap<String, Object>(),
244             new HashMap<String, Object>(),
245             new HashMap<String, Object>(),
246             Boolean.class);
247         private TestPaths testPaths;
248 
CheckDeprecated(TestPaths testPaths)249         public CheckDeprecated(TestPaths testPaths) {
250             this.testPaths = testPaths;
251         }
252 
253         static final Set<String> ALLOWED = new HashSet<>(Arrays.asList("postalCodeData", "postCodeRegex"));
254         static final Set<String> OK_IF_MISSING = new HashSet<>(Arrays.asList("alt", "draft", "references"));
255 
check(DtdData dtdData, XPathParts parts, String fullName)256         public boolean check(DtdData dtdData, XPathParts parts, String fullName) {
257             for (int i = 0; i < parts.size(); ++i) {
258                 String elementName = parts.getElement(i);
259                 if (dtdData.isDeprecated(elementName, "*", "*")) {
260                     if (ALLOWED.contains(elementName)) {
261                         return false;
262                     }
263                     testPaths.errln("Deprecated item in data: "
264                         + dtdData.dtdType
265                         + ":" + elementName
266                         + " \t;" + fullName);
267                     return true;
268                 }
269                 data.put(dtdData.dtdType, elementName, "*", "*", true);
270                 for (Entry<String, String> attributeNValue : parts.getAttributes(i).entrySet()) {
271                     String attributeName = attributeNValue.getKey();
272                     if (dtdData.isDeprecated(elementName, attributeName, "*")) {
273                         testPaths.errln("Deprecated item in data: "
274                             + dtdData.dtdType
275                             + ":" + elementName
276                             + ":" + attributeName
277                             + " \t;" + fullName);
278                         return true;
279                     }
280                     String attributeValue = attributeNValue.getValue();
281                     if (dtdData.isDeprecated(elementName, attributeName, attributeValue)) {
282                         testPaths.errln("Deprecated item in data: "
283                             + dtdData.dtdType
284                             + ":" + elementName
285                             + ":" + attributeName
286                             + ":" + attributeValue
287                             + " \t;" + fullName);
288                         return true;
289                     }
290                     data.put(dtdData.dtdType, elementName, attributeName, "*", true);
291                     data.put(dtdData.dtdType, elementName, attributeName, attributeValue, true);
292                 }
293             }
294             return false;
295         }
296 
show(int inclusion)297         public void show(int inclusion) {
298             for (DtdType dtdType : DtdType.values()) {
299                 if (dtdType == DtdType.ldmlICU ||
300                     (inclusion <= 5 && dtdType == DtdType.platform)) { // keyboards/*/_platform.xml won't be in the list for non-exhaustive runs
301                     continue;
302                 }
303                 M4<String, String, String, Boolean> infoEAV = data.get(dtdType);
304                 if (infoEAV == null) {
305                     testPaths.warnln("Data doesn't contain: "
306                         + dtdType);
307                     continue;
308                 }
309                 DtdData dtdData = DtdData.getInstance(dtdType);
310                 for (Element element : dtdData.getElements()) {
311                     if (element.isDeprecated() || element == dtdData.ANY || element == dtdData.PCDATA) {
312                         continue;
313                     }
314                     M3<String, String, Boolean> infoAV = infoEAV.get(element.name);
315                     if (infoAV == null) {
316                         testPaths.logln("Data doesn't contain: "
317                             + dtdType
318                             + ":" + element.name);
319                         continue;
320                     }
321 
322                     for (Attribute attribute : element.getAttributes().keySet()) {
323                         if (attribute.isDeprecated() || OK_IF_MISSING.contains(attribute.name)) {
324                             continue;
325                         }
326                         Map<String, Boolean> infoV = infoAV.get(attribute.name);
327                         if (infoV == null) {
328                             testPaths.logln("Data doesn't contain: "
329                                 + dtdType
330                                 + ":" + element.name
331                                 + ":" + attribute.name);
332                             continue;
333                         }
334                         for (String value : attribute.values.keySet()) {
335                             if (attribute.isDeprecatedValue(value)) {
336                                 continue;
337                             }
338                             if (!infoV.containsKey(value)) {
339                                 testPaths.logln("Data doesn't contain: "
340                                     + dtdType
341                                     + ":" + element.name
342                                     + ":" + attribute.name
343                                     + ":" + value);
344                             }
345                         }
346                     }
347                 }
348             }
349         }
350     }
351 
TestNonLdml()352     public void TestNonLdml() {
353         int maxPerDirectory = getInclusion() <= 5 ? 20 : Integer.MAX_VALUE;
354         CheckDeprecated checkDeprecated = new CheckDeprecated(this);
355         PathStarrer starrer = new PathStarrer();
356         StringBuilder removed = new StringBuilder();
357         Set<String> nonFinalValues = new LinkedHashSet<>();
358         Set<String> skipLast = new HashSet(Arrays.asList("version", "generation"));
359         String[] normalizedPath = { "" };
360 
361         int counter = 0;
362         for (String directory : Arrays.asList("keyboards/", "common/")) {
363             String dirPath = CLDRPaths.BASE_DIRECTORY + directory;
364             for (String fileName : new File(dirPath).list()) {
365                 File dir2 = new File(dirPath + fileName);
366                 if (!dir2.isDirectory()
367                     || fileName.equals("properties") // TODO as flat files
368 //                    || fileName.equals(".DS_Store")
369 //                    || ChartDelta.LDML_DIRECTORIES.contains(dir)
370 //                    || fileName.equals("dtd")  // TODO as flat files
371 //                    || fileName.equals(".project")  // TODO as flat files
372 //                    //|| dir.equals("uca") // TODO as flat files
373                 ) {
374                     continue;
375                 }
376 
377                 Set<Pair<String, String>> seen = new HashSet<>();
378                 Set<String> seenStarred = new HashSet<>();
379                 int count = 0;
380                 Set<Element> haveErrorsAlready = new HashSet<>();
381                 for (String file : dir2.list()) {
382                     if (!file.endsWith(".xml")) {
383                         continue;
384                     }
385                     if (++count > maxPerDirectory) {
386                         break;
387                     }
388                     DtdType type = null;
389                     DtdData dtdData = null;
390                     String fullName = dir2 + "/" + file;
391                     for (Pair<String, String> pathValue : XMLFileReader.loadPathValues(fullName, new ArrayList<Pair<String, String>>(), true)) {
392                         String path = pathValue.getFirst();
393                         final String value = pathValue.getSecond();
394                         XPathParts parts = XPathParts.getInstance(path); // not frozen, for removeNonDistinguishing
395                         if (dtdData == null) {
396                             type = DtdType.valueOf(parts.getElement(0));
397                             dtdData = DtdData.getInstance(type);
398                         }
399 
400                         XPathParts pathParts = XPathParts.getFrozenInstance(path);
401                         String finalElementString = pathParts.getElement(-1);
402                         Element finalElement = dtdData.getElementFromName().get(finalElementString);
403                         if (!haveErrorsAlready.contains(finalElement)) {
404                             ElementType elementType = finalElement.getType();
405                             // HACK!!
406                             if (pathParts.size() > 1 && "identity".equals(pathParts.getElement(1))) {
407                                 elementType = ElementType.EMPTY;
408                                 logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests");
409                             } else if (pathParts.size() > 2
410                                 && "validity".equals(pathParts.getElement(2))
411                                 && value.isEmpty()) {
412                                 String typeValue = pathParts.getAttributeValue(-1, "type");
413                                 if ("TODO".equals(typeValue)
414                                     || "locale".equals(typeValue)) {
415                                     elementType = ElementType.EMPTY;
416                                     logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests");
417                                 }
418                             }
419                             if ((elementType == ElementType.PCDATA) == (value.isEmpty())) {
420                                 errln("Inconsistency:"
421                                     + "\tfile=" + fileName + "/" + file
422                                     + "\telementType=" + elementType
423                                     + "\tvalue=«" + value + "»"
424                                     + "\tpath=" + path);
425                                 haveErrorsAlready.add(finalElement); // suppress all but first error
426                             }
427                         }
428 
429                         if (checkDeprecated.check(dtdData, parts, fullName)) {
430                             break;
431                         }
432 
433                         String last = parts.getElement(-1);
434                         if (skipLast.contains(last)) {
435                             continue;
436                         }
437                         String dpath = CLDRFile.getDistinguishingXPath(path, normalizedPath);
438                         if (!dpath.equals(path)) {
439                             checkParts(dpath, dtdData);
440                         }
441                         if (!normalizedPath.equals(path) && !normalizedPath[0].equals(dpath)) {
442                             checkParts(normalizedPath[0], dtdData);
443                         }
444                         counter = removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues);
445                         String cleaned = parts.toString();
446                         Pair<String, String> pair = Pair.of(type == DtdType.ldml ? file : type.toString(), cleaned);
447                         if (seen.contains(pair)) {
448 //                        parts.set(path);
449 //                        removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues);
450                             errln("Duplicate: " + file + ", " + path + ", " + cleaned + ", " + value);
451                         } else {
452                             seen.add(pair);
453                             if (!nonFinalValues.isEmpty()) {
454                                 String starredPath = starrer.set(path);
455                                 if (!seenStarred.contains(starredPath)) {
456                                     seenStarred.add(starredPath);
457                                     logln("Non-node values: " + nonFinalValues + "\t" + path);
458                                 }
459                             }
460                             if (isVerbose()) {
461                                 String starredPath = starrer.set(path);
462                                 if (!seenStarred.contains(starredPath)) {
463                                     seenStarred.add(starredPath);
464                                     logln("@" + "\t" + cleaned + "\t" + removed);
465                                 }
466                             }
467                         }
468                     }
469                 }
470             }
471         }
472         checkDeprecated.show(getInclusion());
473     }
474 
checkParts(String path, DtdData dtdData)475     private void checkParts(String path, DtdData dtdData) {
476         XPathParts parts = XPathParts.getFrozenInstance(path);
477         Element current = dtdData.ROOT;
478         for (int i = 0; i < parts.size(); ++i) {
479             String elementName = parts.getElement(i);
480             if (i == 0) {
481                 assertEquals("root", current.name, elementName);
482             } else {
483                 current = current.getChildNamed(elementName);
484                 if (!assertNotNull("element", current)) {
485                     return; // failed
486                 }
487             }
488             for (String attributeName : parts.getAttributeKeys(i)) {
489                 Attribute attribute = current.getAttributeNamed(attributeName);
490                 if (!assertNotNull("attribute", attribute)) {
491                     return; // failed
492                 }
493                 // later, check values
494             }
495         }
496     }
497 
498     static final Set<String> SKIP_NON_NODE = new HashSet<>(Arrays.asList("references", "visibility", "access"));
499 
500     /**
501      *
502      * @param parts the thawed XPathParts (can't be frozen, for putAttributeValue)
503      * @param data
504      * @param counter
505      * @param removed
506      * @param nonFinalValues
507      * @return
508      */
removeNonDistinguishing(XPathParts parts, DtdData data, int counter, StringBuilder removed, Set<String> nonFinalValues)509     private int removeNonDistinguishing(XPathParts parts, DtdData data, int counter, StringBuilder removed, Set<String> nonFinalValues) {
510         removed.setLength(0);
511         nonFinalValues.clear();
512         HashSet<String> toRemove = new HashSet<>();
513         nonFinalValues.clear();
514         int size = parts.size();
515         int last = size - 1;
516         for (int i = 0; i < size; ++i) {
517             removed.append("/");
518             String element = parts.getElement(i);
519             if (data.isOrdered(element)) {
520                 parts.putAttributeValue(i, "_q", String.valueOf(counter));
521                 counter++;
522             }
523             for (String attribute : parts.getAttributeKeys(i)) {
524                 if (!data.isDistinguishing(element, attribute)) {
525                     toRemove.add(attribute);
526                     if (i != last && !SKIP_NON_NODE.contains(attribute)) {
527                         if (attribute.equals("draft")
528                             && (parts.getElement(1).equals("transforms") || parts.getElement(1).equals("collations"))) {
529                             // do nothing
530                         } else {
531                             nonFinalValues.add(attribute);
532                         }
533                     }
534                 }
535             }
536             if (!toRemove.isEmpty()) {
537                 for (String attribute : toRemove) {
538                     removed.append("[@" + attribute + "=\"" + parts.getAttributeValue(i, attribute) + "\"]");
539                     parts.removeAttribute(i, attribute);
540                 }
541                 toRemove.clear();
542             }
543         }
544         return counter;
545     }
546 }
547