1 package org.unicode.cldr.unittest; 2 3 import java.io.File; 4 import java.util.ArrayList; 5 import java.util.Arrays; 6 import java.util.Collection; 7 import java.util.HashMap; 8 import java.util.HashSet; 9 import java.util.Iterator; 10 import java.util.LinkedHashSet; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeSet; 15 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRFile.Status; 19 import org.unicode.cldr.util.CLDRPaths; 20 import org.unicode.cldr.util.ChainedMap; 21 import org.unicode.cldr.util.ChainedMap.M3; 22 import org.unicode.cldr.util.ChainedMap.M4; 23 import org.unicode.cldr.util.ChainedMap.M5; 24 import org.unicode.cldr.util.DtdData; 25 import org.unicode.cldr.util.DtdData.Attribute; 26 import org.unicode.cldr.util.DtdData.Element; 27 import org.unicode.cldr.util.DtdData.ElementType; 28 import org.unicode.cldr.util.DtdType; 29 import org.unicode.cldr.util.Pair; 30 import org.unicode.cldr.util.PathHeader; 31 import org.unicode.cldr.util.PathHeader.Factory; 32 import org.unicode.cldr.util.PathHeader.PageId; 33 import org.unicode.cldr.util.PathHeader.SectionId; 34 import org.unicode.cldr.util.PathStarrer; 35 import org.unicode.cldr.util.XMLFileReader; 36 import org.unicode.cldr.util.XPathParts; 37 38 import com.google.common.collect.ImmutableSet; 39 40 public class TestPaths extends TestFmwkPlus { 41 static CLDRConfig testInfo = CLDRConfig.getInstance(); 42 main(String[] args)43 public static void main(String[] args) { 44 new TestPaths().run(args); 45 } 46 VerifyEnglishVsRoot()47 public void VerifyEnglishVsRoot() { 48 HashSet<String> rootPaths = new HashSet<String>(); 49 testInfo.getRoot().forEach(rootPaths::add); 50 HashSet<String> englishPaths = new HashSet<String>(); 51 testInfo.getEnglish().forEach(englishPaths::add); 52 englishPaths.removeAll(rootPaths); 53 if (englishPaths.size() == 0) { 54 return; 55 } 56 Factory phf = PathHeader.getFactory(testInfo.getEnglish()); 57 Status status = new Status(); 58 Set<PathHeader> suspiciousPaths = new TreeSet<PathHeader>(); 59 Set<PathHeader> errorPaths = new TreeSet<PathHeader>(); 60 ImmutableSet<String> SKIP_VARIANT = ImmutableSet.of( 61 "ps-variant", "ug-variant", "ky-variant", "az-short", 62 "Arab-variant", "am-variant", "pm-variant"); 63 for (String path : englishPaths) { 64 // skip aliases, other counts 65 if (!status.pathWhereFound.equals(path) 66 || path.contains("[@count=\"one\"]")) { 67 continue; 68 } 69 PathHeader ph = phf.fromPath(path); 70 if (ph.getSectionId() == SectionId.Special 71 || ph.getCode().endsWith("-name-other")) { 72 continue; 73 } 74 if (path.contains("@alt") && !SKIP_VARIANT.contains(ph.getCode()) 75 && ph.getPageId() != PageId.Alphabetic_Information) { 76 errorPaths.add(ph); 77 } else { 78 suspiciousPaths.add(ph); 79 } 80 } 81 if (errorPaths.size() != 0) { 82 errln("Error: paths in English but not root:" 83 + getPaths(errorPaths)); 84 } 85 logln("Suspicious: paths in English but not root:" 86 + getPaths(suspiciousPaths)); 87 } 88 getPaths(Set<PathHeader> altPaths)89 private String getPaths(Set<PathHeader> altPaths) { 90 StringBuilder b = new StringBuilder(); 91 for (PathHeader path : altPaths) { 92 b.append("\n\t\t") 93 .append(path) 94 .append(":\t") 95 .append(testInfo.getEnglish().getStringValue( 96 path.getOriginalPath())); 97 } 98 return b.toString(); 99 } 100 101 /** 102 * For each locale to test, loop through all the paths, including "extra" paths, 103 * checking for each path: checkFullpathValue; checkPrettyPaths 104 */ TestPathHeadersAndValues()105 public void TestPathHeadersAndValues() { 106 /* 107 * Use the pathsSeen hash to keep track of which paths have 108 * already been seen. Since the test checkPrettyPaths isn't really 109 * locale-dependent, run it only once for each path, for the first 110 * locale in which the path occurs. 111 */ 112 Set<String> pathsSeen = new HashSet<String>(); 113 CLDRFile englishFile = testInfo.getCldrFactory().make("en", true); 114 PathHeader.Factory phf = PathHeader.getFactory(englishFile); 115 Status status = new Status(); 116 for (String locale : getLocalesToTest()) { 117 CLDRFile file = testInfo.getCLDRFile(locale, true); 118 logln("Testing path headers and values for locale => " + locale); 119 for (Iterator<String> it = file.iterator(); it.hasNext();) { 120 String path = it.next(); 121 checkFullpathValue(path, file, locale, status, false /* not extra path */); 122 if (!pathsSeen.contains(path)) { 123 pathsSeen.add(path); 124 checkPrettyPaths(path, phf); 125 } 126 } 127 for (String path : file.getExtraPaths()) { 128 checkFullpathValue(path, file, locale, status, true /* extra path */); 129 if (!pathsSeen.contains(path)) { 130 pathsSeen.add(path); 131 checkPrettyPaths(path, phf); 132 } 133 } 134 } 135 } 136 137 /** 138 * For the given path and CLDRFile, check that fullPath, value, and source are all non-null. 139 * 140 * Allow null value for some exceptional extra paths. 141 * 142 * @param path the path, such as '//ldml/dates/fields/field[@type="tue"]/relative[@type="1"]' 143 * @param file the CLDRFile 144 * @param locale the locale string 145 * @param status the Status to be used/set by getSourceLocaleID 146 * @param isExtraPath true if the path is an "extra" path, else false 147 */ checkFullpathValue(String path, CLDRFile file, String locale, Status status, boolean isExtraPath)148 private void checkFullpathValue(String path, CLDRFile file, String locale, Status status, boolean isExtraPath) { 149 String fullPath = file.getFullXPath(path); 150 String value = file.getStringValue(path); 151 String source = file.getSourceLocaleID(path, status); 152 153 assertEquals("CanonicalOrder", XPathParts.getFrozenInstance(path).toString(), path); 154 155 if (fullPath == null) { 156 errln("Locale: " + locale + ",\t Null FullPath: " + path); 157 } else if (!path.equals(fullPath)) { 158 assertEquals("CanonicalOrder (FP)", XPathParts.getFrozenInstance(fullPath).toString(), fullPath); 159 } 160 161 if (value == null) { 162 if (isExtraPath && extraPathAllowsNullValue(path)) { 163 return; 164 } 165 errln("Locale: " + locale + ",\t Null Value: " + path); 166 } 167 168 if (source == null) { 169 errln("Locale: " + locale + ",\t Null Source: " + path); 170 } 171 172 if (status.pathWhereFound == null) { 173 errln("Locale: " + locale + ",\t Null Found Path: " + path); 174 } 175 } 176 177 /** 178 * Is the given extra path exceptional in the sense that null value is allowed? 179 * 180 * @param path the extra path 181 * @return true if null value is allowed for path, else false 182 * 183 * As of 2019-08-09, null values are found for many "metazone" paths like: 184 * //ldml/dates/timeZoneNames/metazone[@type="Galapagos"]/long/standard 185 * for many locales. Also for some "zone" paths like: 186 * //ldml/dates/timeZoneNames/zone[@type="Pacific/Honolulu"]/short/generic 187 * for locales including root, ja, and ar. Also for some "dayPeriods" paths like 188 * //ldml/dates/calendars/calendar[@type="gregorian"]/dayPeriods/dayPeriodContext[@type="stand-alone"]/dayPeriodWidth[@type="wide"]/dayPeriod[@type="midnight"] 189 * only for these six locales: bs_Cyrl, bs_Cyrl_BA, pa_Arab, pa_Arab_PK, uz_Arab, uz_Arab_AF. 190 * 191 * This function is nearly identical to the JavaScript function with the same name. 192 * Keep the two functions consistent with each other. It would be more ideal if this 193 * knowledge were encapsulated on the server and the client didn't need to know about it. 194 * The server could send the client special fallback values instead of null. 195 * 196 * Extra paths are generated by CLDRFile.getRawExtraPathsPrivate; this function may need 197 * updating (to allow null for other paths) if that function changes. 198 * 199 * Reference: https://unicode-org.atlassian.net/browse/CLDR-11238 200 */ extraPathAllowsNullValue(String path)201 private boolean extraPathAllowsNullValue(String path) { 202 if (path.contains("/timeZoneNames/metazone") 203 || path.contains("/timeZoneNames/zone") 204 || path.contains("/dayPeriods/dayPeriodContext") 205 || path.contains("/unitPattern") 206 || path.contains("/gender") 207 || path.contains("/caseMinimalPairs") 208 || path.contains("/genderMinimalPairs") 209 ) { 210 return true; 211 } 212 return false; 213 } 214 215 /** 216 * Check that the given path and PathHeader.Factory undergo correct 217 * roundtrip conversion between original and pretty paths. 218 * 219 * @param path the path string 220 * @param phf the PathHeader.Factory 221 */ checkPrettyPaths(String path, PathHeader.Factory phf)222 private void checkPrettyPaths(String path, PathHeader.Factory phf) { 223 if (path.endsWith("/alias")) { 224 return; 225 } 226 logln("Testing ==> " + path); 227 String prettied = phf.fromPath(path).toString(); 228 String unprettied = phf.fromPath(path).getOriginalPath(); 229 if (!path.equals(unprettied)) { 230 errln("Path Header doesn't roundtrip:\t" + path + "\t" + prettied 231 + "\t" + unprettied); 232 } else { 233 logln(prettied + "\t" + path); 234 } 235 } 236 getLocalesToTest()237 private Collection<String> getLocalesToTest() { 238 return params.inclusion <= 5 ? Arrays.asList("root", "en", "ja", "ar", "de", "ru") 239 : params.inclusion < 10 ? testInfo.getCldrFactory().getAvailableLanguages() 240 : testInfo.getCldrFactory().getAvailable(); 241 } 242 243 /** 244 * find all the items that are deprecated, but appear in paths 245 * and the items that aren't deprecated, but don't appear in paths 246 */ 247 248 static final class CheckDeprecated { 249 M5<DtdType, String, String, String, Boolean> data = ChainedMap.of( 250 new HashMap<DtdType, Object>(), 251 new HashMap<String, Object>(), 252 new HashMap<String, Object>(), 253 new HashMap<String, Object>(), 254 Boolean.class); 255 private TestPaths testPaths; 256 CheckDeprecated(TestPaths testPaths)257 public CheckDeprecated(TestPaths testPaths) { 258 this.testPaths = testPaths; 259 } 260 261 static final Set<String> ALLOWED = new HashSet<>(Arrays.asList("postalCodeData", "postCodeRegex")); 262 static final Set<String> OK_IF_MISSING = new HashSet<>(Arrays.asList("alt", "draft", "references")); 263 check(DtdData dtdData, XPathParts parts, String fullName)264 public boolean check(DtdData dtdData, XPathParts parts, String fullName) { 265 for (int i = 0; i < parts.size(); ++i) { 266 String elementName = parts.getElement(i); 267 if (dtdData.isDeprecated(elementName, "*", "*")) { 268 if (ALLOWED.contains(elementName)) { 269 return false; 270 } 271 testPaths.errln("Deprecated element in data: " 272 + dtdData.dtdType 273 + ":" + elementName 274 + " \t;" + fullName); 275 return true; 276 } 277 data.put(dtdData.dtdType, elementName, "*", "*", true); 278 for (Entry<String, String> attributeNValue : parts.getAttributes(i).entrySet()) { 279 String attributeName = attributeNValue.getKey(); 280 if (dtdData.isDeprecated(elementName, attributeName, "*")) { 281 if (attributeName.equals("draft")) { 282 testPaths.errln("Deprecated attribute in data: " 283 + dtdData.dtdType 284 + ":" + elementName 285 + ":" + attributeName 286 + " \t;" + fullName + 287 " - consider adding to DtdData.DRAFT_ON_NON_LEAF_ALLOWED if you are sure this is ok."); 288 } else { 289 testPaths.errln("Deprecated attribute in data: " 290 + dtdData.dtdType 291 + ":" + elementName 292 + ":" + attributeName 293 + " \t;" + fullName); 294 } 295 return true; 296 } 297 String attributeValue = attributeNValue.getValue(); 298 if (dtdData.isDeprecated(elementName, attributeName, attributeValue)) { 299 testPaths.errln("Deprecated attribute value in data: " 300 + dtdData.dtdType 301 + ":" + elementName 302 + ":" + attributeName 303 + ":" + attributeValue 304 + " \t;" + fullName); 305 return true; 306 } 307 data.put(dtdData.dtdType, elementName, attributeName, "*", true); 308 data.put(dtdData.dtdType, elementName, attributeName, attributeValue, true); 309 } 310 } 311 return false; 312 } 313 show(int inclusion)314 public void show(int inclusion) { 315 for (DtdType dtdType : DtdType.values()) { 316 if (dtdType == DtdType.ldmlICU || 317 (inclusion <= 5 && dtdType == DtdType.platform)) { // keyboards/*/_platform.xml won't be in the list for non-exhaustive runs 318 continue; 319 } 320 M4<String, String, String, Boolean> infoEAV = data.get(dtdType); 321 if (infoEAV == null) { 322 testPaths.warnln("Data doesn't contain: " 323 + dtdType); 324 continue; 325 } 326 DtdData dtdData = DtdData.getInstance(dtdType); 327 for (Element element : dtdData.getElements()) { 328 if (element.isDeprecated() || element == dtdData.ANY || element == dtdData.PCDATA) { 329 continue; 330 } 331 M3<String, String, Boolean> infoAV = infoEAV.get(element.name); 332 if (infoAV == null) { 333 testPaths.logln("Data doesn't contain: " 334 + dtdType 335 + ":" + element.name); 336 continue; 337 } 338 339 for (Attribute attribute : element.getAttributes().keySet()) { 340 if (attribute.isDeprecated() || OK_IF_MISSING.contains(attribute.name)) { 341 continue; 342 } 343 Map<String, Boolean> infoV = infoAV.get(attribute.name); 344 if (infoV == null) { 345 testPaths.logln("Data doesn't contain: " 346 + dtdType 347 + ":" + element.name 348 + ":" + attribute.name); 349 continue; 350 } 351 for (String value : attribute.values.keySet()) { 352 if (attribute.isDeprecatedValue(value)) { 353 continue; 354 } 355 if (!infoV.containsKey(value)) { 356 testPaths.logln("Data doesn't contain: " 357 + dtdType 358 + ":" + element.name 359 + ":" + attribute.name 360 + ":" + value); 361 } 362 } 363 } 364 } 365 } 366 } 367 } 368 TestNonLdml()369 public void TestNonLdml() { 370 int maxPerDirectory = getInclusion() <= 5 ? 20 : Integer.MAX_VALUE; 371 CheckDeprecated checkDeprecated = new CheckDeprecated(this); 372 PathStarrer starrer = new PathStarrer(); 373 StringBuilder removed = new StringBuilder(); 374 Set<String> nonFinalValues = new LinkedHashSet<>(); 375 Set<String> skipLast = new HashSet(Arrays.asList("version", "generation")); 376 String[] normalizedPath = { "" }; 377 378 int counter = 0; 379 for (String directory : Arrays.asList("keyboards/", "common/", "seed/", "exemplars/")) { 380 String dirPath = CLDRPaths.BASE_DIRECTORY + directory; 381 for (String fileName : new File(dirPath).list()) { 382 File dir2 = new File(dirPath + fileName); 383 if (!dir2.isDirectory() 384 || fileName.equals("properties") // TODO as flat files 385 // || fileName.equals(".DS_Store") 386 // || ChartDelta.LDML_DIRECTORIES.contains(dir) 387 // || fileName.equals("dtd") // TODO as flat files 388 // || fileName.equals(".project") // TODO as flat files 389 // //|| dir.equals("uca") // TODO as flat files 390 ) { 391 continue; 392 } 393 394 Set<Pair<String, String>> seen = new HashSet<>(); 395 Set<String> seenStarred = new HashSet<>(); 396 int count = 0; 397 Set<Element> haveErrorsAlready = new HashSet<>(); 398 for (String file : dir2.list()) { 399 if (!file.endsWith(".xml")) { 400 continue; 401 } 402 if (++count > maxPerDirectory) { 403 break; 404 } 405 DtdType type = null; 406 DtdData dtdData = null; 407 String fullName = dir2 + "/" + file; 408 for (Pair<String, String> pathValue : XMLFileReader.loadPathValues(fullName, new ArrayList<Pair<String, String>>(), true)) { 409 String path = pathValue.getFirst(); 410 final String value = pathValue.getSecond(); 411 XPathParts parts = XPathParts.getFrozenInstance(path); 412 if (dtdData == null) { 413 type = DtdType.valueOf(parts.getElement(0)); 414 dtdData = DtdData.getInstance(type); 415 } 416 417 XPathParts pathParts = XPathParts.getFrozenInstance(path); 418 String finalElementString = pathParts.getElement(-1); 419 Element finalElement = dtdData.getElementFromName().get(finalElementString); 420 if (!haveErrorsAlready.contains(finalElement)) { 421 ElementType elementType = finalElement.getType(); 422 // HACK!! 423 if (pathParts.size() > 1 && "identity".equals(pathParts.getElement(1))) { 424 elementType = ElementType.EMPTY; 425 logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests"); 426 } else if (pathParts.size() > 2 427 && "validity".equals(pathParts.getElement(2)) 428 && value.isEmpty()) { 429 String typeValue = pathParts.getAttributeValue(-1, "type"); 430 if ("TODO".equals(typeValue) 431 || "locale".equals(typeValue)) { 432 elementType = ElementType.EMPTY; 433 logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests"); 434 } 435 } 436 if ((elementType == ElementType.PCDATA) == (value.isEmpty())) { 437 errln("Inconsistency:" 438 + "\tfile=" + fileName + "/" + file 439 + "\telementType=" + elementType 440 + "\tvalue=«" + value + "»" 441 + "\tpath=" + path); 442 haveErrorsAlready.add(finalElement); // suppress all but first error 443 } 444 } 445 446 if (checkDeprecated.check(dtdData, parts, fullName)) { 447 break; 448 } 449 450 String last = parts.getElement(-1); 451 if (skipLast.contains(last)) { 452 continue; 453 } 454 String dpath = CLDRFile.getDistinguishingXPath(path, normalizedPath); 455 if (!dpath.equals(path)) { 456 checkParts(dpath, dtdData); 457 } 458 if (!normalizedPath.equals(path) && !normalizedPath[0].equals(dpath)) { 459 checkParts(normalizedPath[0], dtdData); 460 } 461 parts = parts.cloneAsThawed(); 462 counter = removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues); 463 String cleaned = parts.toString(); 464 Pair<String, String> pair = Pair.of(type == DtdType.ldml ? file : type.toString(), cleaned); 465 if (seen.contains(pair)) { 466 // parts.set(path); 467 // removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues); 468 errln("Duplicate: " + file + ", " + path + ", " + cleaned + ", " + value); 469 } else { 470 seen.add(pair); 471 if (!nonFinalValues.isEmpty()) { 472 String starredPath = starrer.set(path); 473 if (!seenStarred.contains(starredPath)) { 474 seenStarred.add(starredPath); 475 logln("Non-node values: " + nonFinalValues + "\t" + path); 476 } 477 } 478 if (isVerbose()) { 479 String starredPath = starrer.set(path); 480 if (!seenStarred.contains(starredPath)) { 481 seenStarred.add(starredPath); 482 logln("@" + "\t" + cleaned + "\t" + removed); 483 } 484 } 485 } 486 } 487 } 488 } 489 } 490 checkDeprecated.show(getInclusion()); 491 } 492 checkParts(String path, DtdData dtdData)493 private void checkParts(String path, DtdData dtdData) { 494 XPathParts parts = XPathParts.getFrozenInstance(path); 495 Element current = dtdData.ROOT; 496 for (int i = 0; i < parts.size(); ++i) { 497 String elementName = parts.getElement(i); 498 if (i == 0) { 499 assertEquals("root", current.name, elementName); 500 } else { 501 current = current.getChildNamed(elementName); 502 if (!assertNotNull("element", current)) { 503 return; // failed 504 } 505 } 506 for (String attributeName : parts.getAttributeKeys(i)) { 507 Attribute attribute = current.getAttributeNamed(attributeName); 508 if (!assertNotNull("attribute", attribute)) { 509 return; // failed 510 } 511 // later, check values 512 } 513 } 514 } 515 516 static final Set<String> SKIP_NON_NODE = new HashSet<>(Arrays.asList("references", "visibility", "access")); 517 518 /** 519 * 520 * @param parts the thawed XPathParts (can't be frozen, for putAttributeValue) 521 * @param data 522 * @param counter 523 * @param removed 524 * @param nonFinalValues 525 * @return 526 */ removeNonDistinguishing(XPathParts parts, DtdData data, int counter, StringBuilder removed, Set<String> nonFinalValues)527 private int removeNonDistinguishing(XPathParts parts, DtdData data, int counter, StringBuilder removed, Set<String> nonFinalValues) { 528 removed.setLength(0); 529 nonFinalValues.clear(); 530 HashSet<String> toRemove = new HashSet<>(); 531 nonFinalValues.clear(); 532 int size = parts.size(); 533 int last = size - 1; 534 for (int i = 0; i < size; ++i) { 535 removed.append("/"); 536 String element = parts.getElement(i); 537 if (data.isOrdered(element)) { 538 parts.putAttributeValue(i, "_q", String.valueOf(counter)); 539 counter++; 540 } 541 for (String attribute : parts.getAttributeKeys(i)) { 542 if (!data.isDistinguishing(element, attribute)) { 543 toRemove.add(attribute); 544 if (i != last && !SKIP_NON_NODE.contains(attribute)) { 545 if (attribute.equals("draft") 546 && (parts.getElement(1).equals("transforms") || parts.getElement(1).equals("collations"))) { 547 // do nothing 548 } else { 549 nonFinalValues.add(attribute); 550 } 551 } 552 } 553 } 554 if (!toRemove.isEmpty()) { 555 for (String attribute : toRemove) { 556 removed.append("[@" + attribute + "=\"" + parts.getAttributeValue(i, attribute) + "\"]"); 557 parts.removeAttribute(i, attribute); 558 } 559 toRemove.clear(); 560 } 561 } 562 return counter; 563 } 564 } 565