1 package org.unicode.cldr.unittest; 2 3 import java.io.File; 4 import java.util.ArrayList; 5 import java.util.Arrays; 6 import java.util.Collection; 7 import java.util.HashMap; 8 import java.util.HashSet; 9 import java.util.Iterator; 10 import java.util.LinkedHashSet; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeSet; 15 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRFile.Status; 19 import org.unicode.cldr.util.CLDRPaths; 20 import org.unicode.cldr.util.ChainedMap; 21 import org.unicode.cldr.util.ChainedMap.M3; 22 import org.unicode.cldr.util.ChainedMap.M4; 23 import org.unicode.cldr.util.ChainedMap.M5; 24 import org.unicode.cldr.util.DtdData; 25 import org.unicode.cldr.util.DtdData.Attribute; 26 import org.unicode.cldr.util.DtdData.Element; 27 import org.unicode.cldr.util.DtdData.ElementType; 28 import org.unicode.cldr.util.DtdType; 29 import org.unicode.cldr.util.Pair; 30 import org.unicode.cldr.util.PathHeader; 31 import org.unicode.cldr.util.PathHeader.Factory; 32 import org.unicode.cldr.util.PathHeader.PageId; 33 import org.unicode.cldr.util.PathHeader.SectionId; 34 import org.unicode.cldr.util.PathStarrer; 35 import org.unicode.cldr.util.XMLFileReader; 36 import org.unicode.cldr.util.XPathParts; 37 38 import com.google.common.collect.ImmutableSet; 39 40 public class TestPaths extends TestFmwkPlus { 41 static CLDRConfig testInfo = CLDRConfig.getInstance(); 42 main(String[] args)43 public static void main(String[] args) { 44 new TestPaths().run(args); 45 } 46 VerifyEnglishVsRoot()47 public void VerifyEnglishVsRoot() { 48 HashSet<String> rootPaths = new HashSet<>(); 49 testInfo.getRoot().forEach(rootPaths::add); 50 HashSet<String> englishPaths = new HashSet<>(); 51 testInfo.getEnglish().forEach(englishPaths::add); 52 englishPaths.removeAll(rootPaths); 53 if (englishPaths.size() == 0) { 54 return; 55 } 56 Factory phf = PathHeader.getFactory(testInfo.getEnglish()); 57 Status status = new Status(); 58 Set<PathHeader> suspiciousPaths = new TreeSet<>(); 59 Set<PathHeader> errorPaths = new TreeSet<>(); 60 ImmutableSet<String> SKIP_VARIANT = ImmutableSet.of( 61 "ps-variant", "ug-variant", "ky-variant", "az-short", 62 "Arab-variant", "am-variant", "pm-variant"); 63 for (String path : englishPaths) { 64 // skip aliases, other counts 65 if (!status.pathWhereFound.equals(path) 66 || path.contains("[@count=\"one\"]")) { 67 continue; 68 } 69 PathHeader ph = phf.fromPath(path); 70 if (ph.getSectionId() == SectionId.Special 71 || ph.getCode().endsWith("-name-other")) { 72 continue; 73 } 74 if (path.contains("@alt") && !SKIP_VARIANT.contains(ph.getCode()) 75 && ph.getPageId() != PageId.Alphabetic_Information) { 76 errorPaths.add(ph); 77 } else { 78 suspiciousPaths.add(ph); 79 } 80 } 81 if (errorPaths.size() != 0) { 82 errln("Error: paths in English but not root:" 83 + getPaths(errorPaths)); 84 } 85 logln("Suspicious: paths in English but not root:" 86 + getPaths(suspiciousPaths)); 87 } 88 getPaths(Set<PathHeader> altPaths)89 private String getPaths(Set<PathHeader> altPaths) { 90 StringBuilder b = new StringBuilder(); 91 for (PathHeader path : altPaths) { 92 b.append("\n\t\t") 93 .append(path) 94 .append(":\t") 95 .append(testInfo.getEnglish().getStringValue( 96 path.getOriginalPath())); 97 } 98 return b.toString(); 99 } 100 101 /** 102 * For each locale to test, loop through all the paths, including "extra" paths, 103 * checking for each path: checkFullpathValue; checkPrettyPaths 104 */ TestPathHeadersAndValues()105 public void TestPathHeadersAndValues() { 106 /* 107 * Use the pathsSeen hash to keep track of which paths have 108 * already been seen. Since the test checkPrettyPaths isn't really 109 * locale-dependent, run it only once for each path, for the first 110 * locale in which the path occurs. 111 */ 112 Set<String> pathsSeen = new HashSet<>(); 113 CLDRFile englishFile = testInfo.getCldrFactory().make("en", true); 114 PathHeader.Factory phf = PathHeader.getFactory(englishFile); 115 Status status = new Status(); 116 for (String locale : getLocalesToTest()) { 117 CLDRFile file = testInfo.getCLDRFile(locale, true); 118 logln("Testing path headers and values for locale => " + locale); 119 final Collection<String> extraPaths = file.getExtraPaths(); 120 for (Iterator<String> it = file.iterator(); it.hasNext();) { 121 String path = it.next(); 122 if (extraPaths.contains(path)) { 123 continue; 124 } 125 checkFullpathValue(path, file, locale, status, false /* not extra path */); 126 if (!pathsSeen.contains(path)) { 127 pathsSeen.add(path); 128 checkPrettyPaths(path, phf); 129 } 130 } 131 for (String path : extraPaths) { 132 checkFullpathValue(path, file, locale, status, true /* extra path */); 133 if (!pathsSeen.contains(path)) { 134 pathsSeen.add(path); 135 checkPrettyPaths(path, phf); 136 } 137 } 138 } 139 } 140 141 /** 142 * For the given path and CLDRFile, check that fullPath, value, and source are all non-null. 143 * 144 * Allow null value for some exceptional extra paths. 145 * 146 * @param path the path, such as '//ldml/dates/fields/field[@type="tue"]/relative[@type="1"]' 147 * @param file the CLDRFile 148 * @param locale the locale string 149 * @param status the Status to be used/set by getSourceLocaleID 150 * @param isExtraPath true if the path is an "extra" path, else false 151 */ checkFullpathValue(String path, CLDRFile file, String locale, Status status, boolean isExtraPath)152 private void checkFullpathValue(String path, CLDRFile file, String locale, Status status, boolean isExtraPath) { 153 String fullPath = file.getFullXPath(path); 154 String value = file.getStringValue(path); 155 String source = file.getSourceLocaleID(path, status); 156 157 assertEquals("CanonicalOrder", XPathParts.getFrozenInstance(path).toString(), path); 158 159 if (fullPath == null) { 160 errln("Locale: " + locale + ",\t Null FullPath: " + path); 161 } else if (!path.equals(fullPath)) { 162 assertEquals("CanonicalOrder (FP)", XPathParts.getFrozenInstance(fullPath).toString(), fullPath); 163 } 164 165 if (value == null) { 166 if (allowsExtraPath(path, isExtraPath)) { 167 return; 168 } 169 errln("Locale: " + locale + ",\t Value=null, \tPath: " + path + ",\t IsExtraPath: " + isExtraPath); 170 } 171 172 if (source == null) { 173 errln("Locale: " + locale + ",\t Source=null, \tPath: " + path); 174 } 175 176 if (status.pathWhereFound == null) { 177 errln("Locale: " + locale + ",\t Path=null, \tPath: " + path); 178 } 179 } 180 181 final ImmutableSet<String> ALLOWED_NULL = ImmutableSet.of( 182 "//ldml/dates/timeZoneNames/zone[@type=\"Australia/Currie\"]/exemplarCity", 183 "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Enderbury\"]/exemplarCity" 184 ); 185 186 /** 187 * Is the path allowed to have a null value? 188 */ allowsExtraPath(String path, boolean isExtraPath)189 public boolean allowsExtraPath(String path, boolean isExtraPath) { 190 return (isExtraPath 191 && extraPathAllowsNullValue(path)) 192 || ALLOWED_NULL.contains(path); 193 } 194 195 /** 196 * Is the given extra path exceptional in the sense that null value is allowed? 197 * 198 * @param path the extra path 199 * @return true if null value is allowed for path, else false 200 * 201 * As of 2019-08-09, null values are found for many "metazone" paths like: 202 * //ldml/dates/timeZoneNames/metazone[@type="Galapagos"]/long/standard 203 * for many locales. Also for some "zone" paths like: 204 * //ldml/dates/timeZoneNames/zone[@type="Pacific/Honolulu"]/short/generic 205 * for locales including root, ja, and ar. Also for some "dayPeriods" paths like 206 * //ldml/dates/calendars/calendar[@type="gregorian"]/dayPeriods/dayPeriodContext[@type="stand-alone"]/dayPeriodWidth[@type="wide"]/dayPeriod[@type="midnight"] 207 * only for these six locales: bs_Cyrl, bs_Cyrl_BA, pa_Arab, pa_Arab_PK, uz_Arab, uz_Arab_AF. 208 * 209 * This function is nearly identical to the JavaScript function with the same name. 210 * Keep the two functions consistent with each other. It would be more ideal if this 211 * knowledge were encapsulated on the server and the client didn't need to know about it. 212 * The server could send the client special fallback values instead of null. 213 * 214 * Extra paths are generated by CLDRFile.getRawExtraPathsPrivate; this function may need 215 * updating (to allow null for other paths) if that function changes. 216 * 217 * Reference: https://unicode-org.atlassian.net/browse/CLDR-11238 218 */ extraPathAllowsNullValue(String path)219 private boolean extraPathAllowsNullValue(String path) { 220 if (path.contains("/timeZoneNames/metazone") 221 || path.contains("/timeZoneNames/zone") 222 || path.contains("/dayPeriods/dayPeriodContext") 223 || path.contains("/unitPattern") 224 || path.contains("/gender") 225 || path.contains("/caseMinimalPairs") 226 || path.contains("/genderMinimalPairs") 227 // || path.equals("//ldml/dates/timeZoneNames/zone[@type=\"Australia/Currie\"]/exemplarCity") 228 // || path.equals("//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Enderbury\"]/exemplarCity") 229 //+ 230 ) { 231 return true; 232 } 233 return false; 234 } 235 236 /** 237 * Check that the given path and PathHeader.Factory undergo correct 238 * roundtrip conversion between original and pretty paths. 239 * 240 * @param path the path string 241 * @param phf the PathHeader.Factory 242 */ checkPrettyPaths(String path, PathHeader.Factory phf)243 private void checkPrettyPaths(String path, PathHeader.Factory phf) { 244 if (path.endsWith("/alias")) { 245 return; 246 } 247 logln("Testing ==> " + path); 248 String prettied = phf.fromPath(path).toString(); 249 String unprettied = phf.fromPath(path).getOriginalPath(); 250 if (!path.equals(unprettied)) { 251 errln("Path Header doesn't roundtrip:\t" + path + "\t" + prettied 252 + "\t" + unprettied); 253 } else { 254 logln(prettied + "\t" + path); 255 } 256 } 257 getLocalesToTest()258 private Collection<String> getLocalesToTest() { 259 return params.inclusion <= 5 ? Arrays.asList("root", "en", "ja", "ar", "de", "ru") 260 : params.inclusion < 10 ? testInfo.getCldrFactory().getAvailableLanguages() 261 : testInfo.getCldrFactory().getAvailable(); 262 } 263 264 /** 265 * find all the items that are deprecated, but appear in paths 266 * and the items that aren't deprecated, but don't appear in paths 267 */ 268 269 static final class CheckDeprecated { 270 M5<DtdType, String, String, String, Boolean> data = ChainedMap.of( 271 new HashMap<DtdType, Object>(), 272 new HashMap<String, Object>(), 273 new HashMap<String, Object>(), 274 new HashMap<String, Object>(), 275 Boolean.class); 276 private TestPaths testPaths; 277 CheckDeprecated(TestPaths testPaths)278 public CheckDeprecated(TestPaths testPaths) { 279 this.testPaths = testPaths; 280 } 281 282 static final Set<String> ALLOWED = new HashSet<>(Arrays.asList("postalCodeData", "postCodeRegex")); 283 static final Set<String> OK_IF_MISSING = new HashSet<>(Arrays.asList("alt", "draft", "references")); 284 check(DtdData dtdData, XPathParts parts, String fullName)285 public boolean check(DtdData dtdData, XPathParts parts, String fullName) { 286 for (int i = 0; i < parts.size(); ++i) { 287 String elementName = parts.getElement(i); 288 if (dtdData.isDeprecated(elementName, "*", "*")) { 289 if (ALLOWED.contains(elementName)) { 290 return false; 291 } 292 testPaths.errln("Deprecated element in data: " 293 + dtdData.dtdType 294 + ":" + elementName 295 + " \t;" + fullName); 296 return true; 297 } 298 data.put(dtdData.dtdType, elementName, "*", "*", true); 299 for (Entry<String, String> attributeNValue : parts.getAttributes(i).entrySet()) { 300 String attributeName = attributeNValue.getKey(); 301 if (dtdData.isDeprecated(elementName, attributeName, "*")) { 302 if (attributeName.equals("draft")) { 303 testPaths.errln("Deprecated attribute in data: " 304 + dtdData.dtdType 305 + ":" + elementName 306 + ":" + attributeName 307 + " \t;" + fullName + 308 " - consider adding to DtdData.DRAFT_ON_NON_LEAF_ALLOWED if you are sure this is ok."); 309 } else { 310 testPaths.errln("Deprecated attribute in data: " 311 + dtdData.dtdType 312 + ":" + elementName 313 + ":" + attributeName 314 + " \t;" + fullName); 315 } 316 return true; 317 } 318 String attributeValue = attributeNValue.getValue(); 319 if (dtdData.isDeprecated(elementName, attributeName, attributeValue)) { 320 testPaths.errln("Deprecated attribute value in data: " 321 + dtdData.dtdType 322 + ":" + elementName 323 + ":" + attributeName 324 + ":" + attributeValue 325 + " \t;" + fullName); 326 return true; 327 } 328 data.put(dtdData.dtdType, elementName, attributeName, "*", true); 329 data.put(dtdData.dtdType, elementName, attributeName, attributeValue, true); 330 } 331 } 332 return false; 333 } 334 show(int inclusion)335 public void show(int inclusion) { 336 for (DtdType dtdType : DtdType.values()) { 337 if (dtdType == DtdType.ldmlICU || 338 (inclusion <= 5 && dtdType == DtdType.platform)) { // keyboards/*/_platform.xml won't be in the list for non-exhaustive runs 339 continue; 340 } 341 M4<String, String, String, Boolean> infoEAV = data.get(dtdType); 342 if (infoEAV == null) { 343 testPaths.warnln("Data doesn't contain: " 344 + dtdType); 345 continue; 346 } 347 DtdData dtdData = DtdData.getInstance(dtdType); 348 for (Element element : dtdData.getElements()) { 349 if (element.isDeprecated() || element == dtdData.ANY || element == dtdData.PCDATA) { 350 continue; 351 } 352 M3<String, String, Boolean> infoAV = infoEAV.get(element.name); 353 if (infoAV == null) { 354 testPaths.logln("Data doesn't contain: " 355 + dtdType 356 + ":" + element.name); 357 continue; 358 } 359 360 for (Attribute attribute : element.getAttributes().keySet()) { 361 if (attribute.isDeprecated() || OK_IF_MISSING.contains(attribute.name)) { 362 continue; 363 } 364 Map<String, Boolean> infoV = infoAV.get(attribute.name); 365 if (infoV == null) { 366 testPaths.logln("Data doesn't contain: " 367 + dtdType 368 + ":" + element.name 369 + ":" + attribute.name); 370 continue; 371 } 372 for (String value : attribute.values.keySet()) { 373 if (attribute.isDeprecatedValue(value)) { 374 continue; 375 } 376 if (!infoV.containsKey(value)) { 377 testPaths.logln("Data doesn't contain: " 378 + dtdType 379 + ":" + element.name 380 + ":" + attribute.name 381 + ":" + value); 382 } 383 } 384 } 385 } 386 } 387 } 388 } 389 TestNonLdml()390 public void TestNonLdml() { 391 int maxPerDirectory = getInclusion() <= 5 ? 20 : Integer.MAX_VALUE; 392 CheckDeprecated checkDeprecated = new CheckDeprecated(this); 393 PathStarrer starrer = new PathStarrer(); 394 StringBuilder removed = new StringBuilder(); 395 Set<String> nonFinalValues = new LinkedHashSet<>(); 396 Set<String> skipLast = new HashSet(Arrays.asList("version", "generation")); 397 String[] normalizedPath = { "" }; 398 399 int counter = 0; 400 for (String directory : Arrays.asList("keyboards/", "common/", "seed/", "exemplars/")) { 401 String dirPath = CLDRPaths.BASE_DIRECTORY + directory; 402 for (String fileName : new File(dirPath).list()) { 403 File dir2 = new File(dirPath + fileName); 404 if (!dir2.isDirectory() 405 || fileName.equals("properties") // TODO as flat files 406 // || fileName.equals(".DS_Store") 407 // || ChartDelta.LDML_DIRECTORIES.contains(dir) 408 // || fileName.equals("dtd") // TODO as flat files 409 // || fileName.equals(".project") // TODO as flat files 410 // //|| dir.equals("uca") // TODO as flat files 411 ) { 412 continue; 413 } 414 415 Set<Pair<String, String>> seen = new HashSet<>(); 416 Set<String> seenStarred = new HashSet<>(); 417 int count = 0; 418 Set<Element> haveErrorsAlready = new HashSet<>(); 419 for (String file : dir2.list()) { 420 if (!file.endsWith(".xml")) { 421 continue; 422 } 423 if (++count > maxPerDirectory) { 424 break; 425 } 426 DtdType type = null; 427 DtdData dtdData = null; 428 String fullName = dir2 + "/" + file; 429 for (Pair<String, String> pathValue : XMLFileReader.loadPathValues(fullName, new ArrayList<Pair<String, String>>(), true)) { 430 String path = pathValue.getFirst(); 431 final String value = pathValue.getSecond(); 432 XPathParts parts = XPathParts.getFrozenInstance(path); 433 if (dtdData == null) { 434 type = DtdType.valueOf(parts.getElement(0)); 435 dtdData = DtdData.getInstance(type); 436 } 437 438 XPathParts pathParts = XPathParts.getFrozenInstance(path); 439 String finalElementString = pathParts.getElement(-1); 440 Element finalElement = dtdData.getElementFromName().get(finalElementString); 441 if (!haveErrorsAlready.contains(finalElement)) { 442 ElementType elementType = finalElement.getType(); 443 // HACK!! 444 if (pathParts.size() > 1 && "identity".equals(pathParts.getElement(1))) { 445 elementType = ElementType.EMPTY; 446 logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests"); 447 } else if (pathParts.size() > 2 448 && "validity".equals(pathParts.getElement(2)) 449 && value.isEmpty()) { 450 String typeValue = pathParts.getAttributeValue(-1, "type"); 451 if ("TODO".equals(typeValue) 452 || "locale".equals(typeValue)) { 453 elementType = ElementType.EMPTY; 454 logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests"); 455 } 456 } 457 if ((elementType == ElementType.PCDATA) == (value.isEmpty()) 458 && !finalElement.name.equals("nameOrderLocales")) { 459 errln("PCDATA ≠ emptyValue inconsistency:" 460 + "\tfile=" + fileName + "/" + file 461 + "\telementType=" + elementType 462 + "\tvalue=«" + value + "»" 463 + "\tpath=" + path); 464 haveErrorsAlready.add(finalElement); // suppress all but first error 465 } 466 } 467 468 if (checkDeprecated.check(dtdData, parts, fullName)) { 469 break; 470 } 471 472 String last = parts.getElement(-1); 473 if (skipLast.contains(last)) { 474 continue; 475 } 476 String dpath = CLDRFile.getDistinguishingXPath(path, normalizedPath); 477 if (!dpath.equals(path)) { 478 checkParts(dpath, dtdData); 479 } 480 if (!normalizedPath.equals(path) && !normalizedPath[0].equals(dpath)) { 481 checkParts(normalizedPath[0], dtdData); 482 } 483 parts = parts.cloneAsThawed(); 484 counter = removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues); 485 String cleaned = parts.toString(); 486 Pair<String, String> pair = Pair.of(type == DtdType.ldml ? file : type.toString(), cleaned); 487 if (seen.contains(pair)) { 488 // parts.set(path); 489 // removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues); 490 errln("Duplicate: " + file + ", " + path + ", " + cleaned + ", " + value); 491 } else { 492 seen.add(pair); 493 if (!nonFinalValues.isEmpty()) { 494 String starredPath = starrer.set(path); 495 if (!seenStarred.contains(starredPath)) { 496 seenStarred.add(starredPath); 497 logln("Non-node values: " + nonFinalValues + "\t" + path); 498 } 499 } 500 if (isVerbose()) { 501 String starredPath = starrer.set(path); 502 if (!seenStarred.contains(starredPath)) { 503 seenStarred.add(starredPath); 504 logln("@" + "\t" + cleaned + "\t" + removed); 505 } 506 } 507 } 508 } 509 } 510 } 511 } 512 checkDeprecated.show(getInclusion()); 513 } 514 checkParts(String path, DtdData dtdData)515 private void checkParts(String path, DtdData dtdData) { 516 XPathParts parts = XPathParts.getFrozenInstance(path); 517 Element current = dtdData.ROOT; 518 for (int i = 0; i < parts.size(); ++i) { 519 String elementName = parts.getElement(i); 520 if (i == 0) { 521 assertEquals("root", current.name, elementName); 522 } else { 523 current = current.getChildNamed(elementName); 524 if (!assertNotNull("element", current)) { 525 return; // failed 526 } 527 } 528 for (String attributeName : parts.getAttributeKeys(i)) { 529 Attribute attribute = current.getAttributeNamed(attributeName); 530 if (!assertNotNull("attribute", attribute)) { 531 return; // failed 532 } 533 // later, check values 534 } 535 } 536 } 537 538 static final Set<String> SKIP_NON_NODE = new HashSet<>(Arrays.asList("references", "visibility", "access")); 539 540 /** 541 * 542 * @param parts the thawed XPathParts (can't be frozen, for putAttributeValue) 543 * @param data 544 * @param counter 545 * @param removed 546 * @param nonFinalValues 547 * @return 548 */ removeNonDistinguishing(XPathParts parts, DtdData data, int counter, StringBuilder removed, Set<String> nonFinalValues)549 private int removeNonDistinguishing(XPathParts parts, DtdData data, int counter, StringBuilder removed, Set<String> nonFinalValues) { 550 removed.setLength(0); 551 nonFinalValues.clear(); 552 HashSet<String> toRemove = new HashSet<>(); 553 nonFinalValues.clear(); 554 int size = parts.size(); 555 int last = size - 1; 556 for (int i = 0; i < size; ++i) { 557 removed.append("/"); 558 String element = parts.getElement(i); 559 if (data.isOrdered(element)) { 560 parts.putAttributeValue(i, "_q", String.valueOf(counter)); 561 counter++; 562 } 563 for (String attribute : parts.getAttributeKeys(i)) { 564 if (!data.isDistinguishing(element, attribute)) { 565 toRemove.add(attribute); 566 if (i != last && !SKIP_NON_NODE.contains(attribute)) { 567 if (attribute.equals("draft") 568 && (parts.getElement(1).equals("transforms") || parts.getElement(1).equals("collations"))) { 569 // do nothing 570 } else { 571 nonFinalValues.add(attribute); 572 } 573 } 574 } 575 } 576 if (!toRemove.isEmpty()) { 577 for (String attribute : toRemove) { 578 removed.append("[@" + attribute + "=\"" + parts.getAttributeValue(i, attribute) + "\"]"); 579 parts.removeAttribute(i, attribute); 580 } 581 toRemove.clear(); 582 } 583 } 584 return counter; 585 } 586 } 587