1 package org.unicode.cldr.unittest; 2 3 import java.io.File; 4 import java.util.ArrayList; 5 import java.util.Arrays; 6 import java.util.Collection; 7 import java.util.HashMap; 8 import java.util.HashSet; 9 import java.util.Iterator; 10 import java.util.LinkedHashSet; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeSet; 15 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRFile.Status; 19 import org.unicode.cldr.util.CLDRPaths; 20 import org.unicode.cldr.util.ChainedMap; 21 import org.unicode.cldr.util.ChainedMap.M3; 22 import org.unicode.cldr.util.ChainedMap.M4; 23 import org.unicode.cldr.util.ChainedMap.M5; 24 import org.unicode.cldr.util.DtdData; 25 import org.unicode.cldr.util.DtdData.Attribute; 26 import org.unicode.cldr.util.DtdData.Element; 27 import org.unicode.cldr.util.DtdData.ElementType; 28 import org.unicode.cldr.util.DtdType; 29 import org.unicode.cldr.util.Pair; 30 import org.unicode.cldr.util.PathHeader; 31 import org.unicode.cldr.util.PathHeader.Factory; 32 import org.unicode.cldr.util.PathHeader.PageId; 33 import org.unicode.cldr.util.PathHeader.SectionId; 34 import org.unicode.cldr.util.PathStarrer; 35 import org.unicode.cldr.util.XMLFileReader; 36 import org.unicode.cldr.util.XPathParts; 37 38 import com.google.common.collect.ImmutableSet; 39 import com.ibm.icu.dev.util.CollectionUtilities; 40 41 public class TestPaths extends TestFmwkPlus { 42 static CLDRConfig testInfo = CLDRConfig.getInstance(); 43 main(String[] args)44 public static void main(String[] args) { 45 new TestPaths().run(args); 46 } 47 VerifyEnglishVsRoot()48 public void VerifyEnglishVsRoot() { 49 Set<String> rootPaths = CollectionUtilities.addAll(testInfo 50 .getRoot().iterator(), 51 new HashSet<String>()); 52 Set<String> englishPaths = CollectionUtilities.addAll(testInfo 53 .getEnglish().iterator(), new HashSet<String>()); 54 englishPaths.removeAll(rootPaths); 55 if (englishPaths.size() == 0) { 56 return; 57 } 58 Factory phf = PathHeader.getFactory(testInfo.getEnglish()); 59 Status status = new Status(); 60 Set<PathHeader> suspiciousPaths = new TreeSet<PathHeader>(); 61 Set<PathHeader> errorPaths = new TreeSet<PathHeader>(); 62 ImmutableSet<String> SKIP_VARIANT = ImmutableSet.of( 63 "ps-variant", "ug-variant", "ky-variant", "az-short", 64 "Arab-variant", "am-variant", "pm-variant"); 65 for (String path : englishPaths) { 66 // skip aliases, other counts 67 if (!status.pathWhereFound.equals(path) 68 || path.contains("[@count=\"one\"]")) { 69 continue; 70 } 71 PathHeader ph = phf.fromPath(path); 72 if (ph.getSectionId() == SectionId.Special 73 || ph.getCode().endsWith("-name-other")) { 74 continue; 75 } 76 if (path.contains("@alt") && !SKIP_VARIANT.contains(ph.getCode()) 77 && ph.getPageId() != PageId.Alphabetic_Information) { 78 errorPaths.add(ph); 79 } else { 80 suspiciousPaths.add(ph); 81 } 82 } 83 if (errorPaths.size() != 0) { 84 errln("Error: paths in English but not root:" 85 + getPaths(errorPaths)); 86 } 87 logln("Suspicious: paths in English but not root:" 88 + getPaths(suspiciousPaths)); 89 } 90 getPaths(Set<PathHeader> altPaths)91 private String getPaths(Set<PathHeader> altPaths) { 92 StringBuilder b = new StringBuilder(); 93 for (PathHeader path : altPaths) { 94 b.append("\n\t\t") 95 .append(path) 96 .append(":\t") 97 .append(testInfo.getEnglish().getStringValue( 98 path.getOriginalPath())); 99 } 100 return b.toString(); 101 } 102 103 /** 104 * For each locale to test, loop through all the paths, including "extra" paths, 105 * checking for each path: checkFullpathValue; checkPrettyPaths 106 */ TestPathHeadersAndValues()107 public void TestPathHeadersAndValues() { 108 /* 109 * Use the pathsSeen hash to keep track of which paths have 110 * already been seen. Since the test checkPrettyPaths isn't really 111 * locale-dependent, run it only once for each path, for the first 112 * locale in which the path occurs. 113 */ 114 Set<String> pathsSeen = new HashSet<String>(); 115 CLDRFile englishFile = testInfo.getCldrFactory().make("en", true); 116 PathHeader.Factory phf = PathHeader.getFactory(englishFile); 117 Status status = new Status(); 118 for (String locale : getLocalesToTest()) { 119 CLDRFile file = testInfo.getCLDRFile(locale, true); 120 logln("Testing path headers and values for locale => " + locale); 121 for (Iterator<String> it = file.iterator(); it.hasNext();) { 122 String path = it.next(); 123 checkFullpathValue(path, file, locale, status, false /* not extra path */); 124 if (!pathsSeen.contains(path)) { 125 pathsSeen.add(path); 126 checkPrettyPaths(path, phf); 127 } 128 } 129 for (String path : file.getExtraPaths()) { 130 checkFullpathValue(path, file, locale, status, true /* extra path */); 131 if (!pathsSeen.contains(path)) { 132 pathsSeen.add(path); 133 checkPrettyPaths(path, phf); 134 } 135 } 136 } 137 } 138 139 /** 140 * For the given path and CLDRFile, check that fullPath, value, and source are all non-null. 141 * 142 * Allow null value for some exceptional extra paths. 143 * 144 * @param path the path, such as '//ldml/dates/fields/field[@type="tue"]/relative[@type="1"]' 145 * @param file the CLDRFile 146 * @param locale the locale string 147 * @param status the Status to be used/set by getSourceLocaleID 148 * @param isExtraPath true if the path is an "extra" path, else false 149 */ checkFullpathValue(String path, CLDRFile file, String locale, Status status, boolean isExtraPath)150 private void checkFullpathValue(String path, CLDRFile file, String locale, Status status, boolean isExtraPath) { 151 String fullPath = file.getFullXPath(path); 152 String value = file.getStringValue(path); 153 String source = file.getSourceLocaleID(path, status); 154 if (fullPath == null) { 155 errln("Locale: " + locale + ",\t FullPath: " + path); 156 } 157 if (value == null) { 158 /* 159 * Allow null value for some exceptional extra paths. 160 */ 161 if (!isExtraPath || !extraPathAllowsNullValue(path)) { 162 errln("Locale: " + locale + ",\t Value: " + path); 163 } 164 } 165 if (source == null) { 166 errln("Locale: " + locale + ",\t Source: " + path); 167 } 168 if (status.pathWhereFound == null) { 169 errln("Locale: " + locale + ",\t Found Path: " + path); 170 } 171 } 172 173 /** 174 * Is the given extra path exceptional in the sense that null value is allowed? 175 * 176 * @param path the extra path 177 * @return true if null value is allowed for path, else false 178 * 179 * As of 2019-08-09, null values are found for many "metazone" paths like: 180 * //ldml/dates/timeZoneNames/metazone[@type="Galapagos"]/long/standard 181 * for many locales. Also for some "zone" paths like: 182 * //ldml/dates/timeZoneNames/zone[@type="Pacific/Honolulu"]/short/generic 183 * for locales including root, ja, and ar. Also for some "dayPeriods" paths like 184 * //ldml/dates/calendars/calendar[@type="gregorian"]/dayPeriods/dayPeriodContext[@type="stand-alone"]/dayPeriodWidth[@type="wide"]/dayPeriod[@type="midnight"] 185 * only for these six locales: bs_Cyrl, bs_Cyrl_BA, pa_Arab, pa_Arab_PK, uz_Arab, uz_Arab_AF. 186 * 187 * This function is nearly identical to the JavaScript function with the same name. 188 * Keep the two functions consistent with each other. It would be more ideal if this 189 * knowledge were encapsulated on the server and the client didn't need to know about it. 190 * The server could send the client special fallback values instead of null. 191 * 192 * Extra paths are generated by CLDRFile.getRawExtraPathsPrivate; this function may need 193 * updating (to allow null for other paths) if that function changes. 194 * 195 * Reference: https://unicode-org.atlassian.net/browse/CLDR-11238 196 */ extraPathAllowsNullValue(String path)197 private boolean extraPathAllowsNullValue(String path) { 198 if (path.contains("timeZoneNames/metazone") || 199 path.contains("timeZoneNames/zone") || 200 path.contains("dayPeriods/dayPeriodContext")) { 201 return true; 202 } 203 return false; 204 } 205 206 /** 207 * Check that the given path and PathHeader.Factory undergo correct 208 * roundtrip conversion between original and pretty paths. 209 * 210 * @param path the path string 211 * @param phf the PathHeader.Factory 212 */ checkPrettyPaths(String path, PathHeader.Factory phf)213 private void checkPrettyPaths(String path, PathHeader.Factory phf) { 214 if (path.endsWith("/alias")) { 215 return; 216 } 217 logln("Testing ==> " + path); 218 String prettied = phf.fromPath(path).toString(); 219 String unprettied = phf.fromPath(path).getOriginalPath(); 220 if (!path.equals(unprettied)) { 221 errln("Path Header doesn't roundtrip:\t" + path + "\t" + prettied 222 + "\t" + unprettied); 223 } else { 224 logln(prettied + "\t" + path); 225 } 226 } 227 getLocalesToTest()228 private Collection<String> getLocalesToTest() { 229 return params.inclusion <= 5 ? Arrays.asList("root", "en", "ja", "ar") 230 : params.inclusion < 10 ? testInfo.getCldrFactory() 231 .getAvailableLanguages() : testInfo.getCldrFactory() 232 .getAvailable(); 233 } 234 235 /** 236 * find all the items that are deprecated, but appear in paths 237 * and the items that aren't deprecated, but don't appear in paths 238 */ 239 240 static final class CheckDeprecated { 241 M5<DtdType, String, String, String, Boolean> data = ChainedMap.of( 242 new HashMap<DtdType, Object>(), 243 new HashMap<String, Object>(), 244 new HashMap<String, Object>(), 245 new HashMap<String, Object>(), 246 Boolean.class); 247 private TestPaths testPaths; 248 CheckDeprecated(TestPaths testPaths)249 public CheckDeprecated(TestPaths testPaths) { 250 this.testPaths = testPaths; 251 } 252 253 static final Set<String> ALLOWED = new HashSet<>(Arrays.asList("postalCodeData", "postCodeRegex")); 254 static final Set<String> OK_IF_MISSING = new HashSet<>(Arrays.asList("alt", "draft", "references")); 255 check(DtdData dtdData, XPathParts parts, String fullName)256 public boolean check(DtdData dtdData, XPathParts parts, String fullName) { 257 for (int i = 0; i < parts.size(); ++i) { 258 String elementName = parts.getElement(i); 259 if (dtdData.isDeprecated(elementName, "*", "*")) { 260 if (ALLOWED.contains(elementName)) { 261 return false; 262 } 263 testPaths.errln("Deprecated item in data: " 264 + dtdData.dtdType 265 + ":" + elementName 266 + " \t;" + fullName); 267 return true; 268 } 269 data.put(dtdData.dtdType, elementName, "*", "*", true); 270 for (Entry<String, String> attributeNValue : parts.getAttributes(i).entrySet()) { 271 String attributeName = attributeNValue.getKey(); 272 if (dtdData.isDeprecated(elementName, attributeName, "*")) { 273 testPaths.errln("Deprecated item in data: " 274 + dtdData.dtdType 275 + ":" + elementName 276 + ":" + attributeName 277 + " \t;" + fullName); 278 return true; 279 } 280 String attributeValue = attributeNValue.getValue(); 281 if (dtdData.isDeprecated(elementName, attributeName, attributeValue)) { 282 testPaths.errln("Deprecated item in data: " 283 + dtdData.dtdType 284 + ":" + elementName 285 + ":" + attributeName 286 + ":" + attributeValue 287 + " \t;" + fullName); 288 return true; 289 } 290 data.put(dtdData.dtdType, elementName, attributeName, "*", true); 291 data.put(dtdData.dtdType, elementName, attributeName, attributeValue, true); 292 } 293 } 294 return false; 295 } 296 show(int inclusion)297 public void show(int inclusion) { 298 for (DtdType dtdType : DtdType.values()) { 299 if (dtdType == DtdType.ldmlICU || 300 (inclusion <= 5 && dtdType == DtdType.platform)) { // keyboards/*/_platform.xml won't be in the list for non-exhaustive runs 301 continue; 302 } 303 M4<String, String, String, Boolean> infoEAV = data.get(dtdType); 304 if (infoEAV == null) { 305 testPaths.warnln("Data doesn't contain: " 306 + dtdType); 307 continue; 308 } 309 DtdData dtdData = DtdData.getInstance(dtdType); 310 for (Element element : dtdData.getElements()) { 311 if (element.isDeprecated() || element == dtdData.ANY || element == dtdData.PCDATA) { 312 continue; 313 } 314 M3<String, String, Boolean> infoAV = infoEAV.get(element.name); 315 if (infoAV == null) { 316 testPaths.logln("Data doesn't contain: " 317 + dtdType 318 + ":" + element.name); 319 continue; 320 } 321 322 for (Attribute attribute : element.getAttributes().keySet()) { 323 if (attribute.isDeprecated() || OK_IF_MISSING.contains(attribute.name)) { 324 continue; 325 } 326 Map<String, Boolean> infoV = infoAV.get(attribute.name); 327 if (infoV == null) { 328 testPaths.logln("Data doesn't contain: " 329 + dtdType 330 + ":" + element.name 331 + ":" + attribute.name); 332 continue; 333 } 334 for (String value : attribute.values.keySet()) { 335 if (attribute.isDeprecatedValue(value)) { 336 continue; 337 } 338 if (!infoV.containsKey(value)) { 339 testPaths.logln("Data doesn't contain: " 340 + dtdType 341 + ":" + element.name 342 + ":" + attribute.name 343 + ":" + value); 344 } 345 } 346 } 347 } 348 } 349 } 350 } 351 TestNonLdml()352 public void TestNonLdml() { 353 int maxPerDirectory = getInclusion() <= 5 ? 20 : Integer.MAX_VALUE; 354 CheckDeprecated checkDeprecated = new CheckDeprecated(this); 355 PathStarrer starrer = new PathStarrer(); 356 StringBuilder removed = new StringBuilder(); 357 Set<String> nonFinalValues = new LinkedHashSet<>(); 358 Set<String> skipLast = new HashSet(Arrays.asList("version", "generation")); 359 String[] normalizedPath = { "" }; 360 361 int counter = 0; 362 for (String directory : Arrays.asList("keyboards/", "common/")) { 363 String dirPath = CLDRPaths.BASE_DIRECTORY + directory; 364 for (String fileName : new File(dirPath).list()) { 365 File dir2 = new File(dirPath + fileName); 366 if (!dir2.isDirectory() 367 || fileName.equals("properties") // TODO as flat files 368 // || fileName.equals(".DS_Store") 369 // || ChartDelta.LDML_DIRECTORIES.contains(dir) 370 // || fileName.equals("dtd") // TODO as flat files 371 // || fileName.equals(".project") // TODO as flat files 372 // //|| dir.equals("uca") // TODO as flat files 373 ) { 374 continue; 375 } 376 377 Set<Pair<String, String>> seen = new HashSet<>(); 378 Set<String> seenStarred = new HashSet<>(); 379 int count = 0; 380 Set<Element> haveErrorsAlready = new HashSet<>(); 381 for (String file : dir2.list()) { 382 if (!file.endsWith(".xml")) { 383 continue; 384 } 385 if (++count > maxPerDirectory) { 386 break; 387 } 388 DtdType type = null; 389 DtdData dtdData = null; 390 String fullName = dir2 + "/" + file; 391 for (Pair<String, String> pathValue : XMLFileReader.loadPathValues(fullName, new ArrayList<Pair<String, String>>(), true)) { 392 String path = pathValue.getFirst(); 393 final String value = pathValue.getSecond(); 394 XPathParts parts = XPathParts.getInstance(path); // not frozen, for removeNonDistinguishing 395 if (dtdData == null) { 396 type = DtdType.valueOf(parts.getElement(0)); 397 dtdData = DtdData.getInstance(type); 398 } 399 400 XPathParts pathParts = XPathParts.getFrozenInstance(path); 401 String finalElementString = pathParts.getElement(-1); 402 Element finalElement = dtdData.getElementFromName().get(finalElementString); 403 if (!haveErrorsAlready.contains(finalElement)) { 404 ElementType elementType = finalElement.getType(); 405 // HACK!! 406 if (pathParts.size() > 1 && "identity".equals(pathParts.getElement(1))) { 407 elementType = ElementType.EMPTY; 408 logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests"); 409 } else if (pathParts.size() > 2 410 && "validity".equals(pathParts.getElement(2)) 411 && value.isEmpty()) { 412 String typeValue = pathParts.getAttributeValue(-1, "type"); 413 if ("TODO".equals(typeValue) 414 || "locale".equals(typeValue)) { 415 elementType = ElementType.EMPTY; 416 logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests"); 417 } 418 } 419 if ((elementType == ElementType.PCDATA) == (value.isEmpty())) { 420 errln("Inconsistency:" 421 + "\tfile=" + fileName + "/" + file 422 + "\telementType=" + elementType 423 + "\tvalue=«" + value + "»" 424 + "\tpath=" + path); 425 haveErrorsAlready.add(finalElement); // suppress all but first error 426 } 427 } 428 429 if (checkDeprecated.check(dtdData, parts, fullName)) { 430 break; 431 } 432 433 String last = parts.getElement(-1); 434 if (skipLast.contains(last)) { 435 continue; 436 } 437 String dpath = CLDRFile.getDistinguishingXPath(path, normalizedPath); 438 if (!dpath.equals(path)) { 439 checkParts(dpath, dtdData); 440 } 441 if (!normalizedPath.equals(path) && !normalizedPath[0].equals(dpath)) { 442 checkParts(normalizedPath[0], dtdData); 443 } 444 counter = removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues); 445 String cleaned = parts.toString(); 446 Pair<String, String> pair = Pair.of(type == DtdType.ldml ? file : type.toString(), cleaned); 447 if (seen.contains(pair)) { 448 // parts.set(path); 449 // removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues); 450 errln("Duplicate: " + file + ", " + path + ", " + cleaned + ", " + value); 451 } else { 452 seen.add(pair); 453 if (!nonFinalValues.isEmpty()) { 454 String starredPath = starrer.set(path); 455 if (!seenStarred.contains(starredPath)) { 456 seenStarred.add(starredPath); 457 logln("Non-node values: " + nonFinalValues + "\t" + path); 458 } 459 } 460 if (isVerbose()) { 461 String starredPath = starrer.set(path); 462 if (!seenStarred.contains(starredPath)) { 463 seenStarred.add(starredPath); 464 logln("@" + "\t" + cleaned + "\t" + removed); 465 } 466 } 467 } 468 } 469 } 470 } 471 } 472 checkDeprecated.show(getInclusion()); 473 } 474 checkParts(String path, DtdData dtdData)475 private void checkParts(String path, DtdData dtdData) { 476 XPathParts parts = XPathParts.getFrozenInstance(path); 477 Element current = dtdData.ROOT; 478 for (int i = 0; i < parts.size(); ++i) { 479 String elementName = parts.getElement(i); 480 if (i == 0) { 481 assertEquals("root", current.name, elementName); 482 } else { 483 current = current.getChildNamed(elementName); 484 if (!assertNotNull("element", current)) { 485 return; // failed 486 } 487 } 488 for (String attributeName : parts.getAttributeKeys(i)) { 489 Attribute attribute = current.getAttributeNamed(attributeName); 490 if (!assertNotNull("attribute", attribute)) { 491 return; // failed 492 } 493 // later, check values 494 } 495 } 496 } 497 498 static final Set<String> SKIP_NON_NODE = new HashSet<>(Arrays.asList("references", "visibility", "access")); 499 500 /** 501 * 502 * @param parts the thawed XPathParts (can't be frozen, for putAttributeValue) 503 * @param data 504 * @param counter 505 * @param removed 506 * @param nonFinalValues 507 * @return 508 */ removeNonDistinguishing(XPathParts parts, DtdData data, int counter, StringBuilder removed, Set<String> nonFinalValues)509 private int removeNonDistinguishing(XPathParts parts, DtdData data, int counter, StringBuilder removed, Set<String> nonFinalValues) { 510 removed.setLength(0); 511 nonFinalValues.clear(); 512 HashSet<String> toRemove = new HashSet<>(); 513 nonFinalValues.clear(); 514 int size = parts.size(); 515 int last = size - 1; 516 for (int i = 0; i < size; ++i) { 517 removed.append("/"); 518 String element = parts.getElement(i); 519 if (data.isOrdered(element)) { 520 parts.putAttributeValue(i, "_q", String.valueOf(counter)); 521 counter++; 522 } 523 for (String attribute : parts.getAttributeKeys(i)) { 524 if (!data.isDistinguishing(element, attribute)) { 525 toRemove.add(attribute); 526 if (i != last && !SKIP_NON_NODE.contains(attribute)) { 527 if (attribute.equals("draft") 528 && (parts.getElement(1).equals("transforms") || parts.getElement(1).equals("collations"))) { 529 // do nothing 530 } else { 531 nonFinalValues.add(attribute); 532 } 533 } 534 } 535 } 536 if (!toRemove.isEmpty()) { 537 for (String attribute : toRemove) { 538 removed.append("[@" + attribute + "=\"" + parts.getAttributeValue(i, attribute) + "\"]"); 539 parts.removeAttribute(i, attribute); 540 } 541 toRemove.clear(); 542 } 543 } 544 return counter; 545 } 546 } 547