1 package org.unicode.cldr.test; 2 3 import java.util.Arrays; 4 import java.util.Collection; 5 import java.util.Collections; 6 import java.util.EnumMap; 7 import java.util.HashMap; 8 import java.util.HashSet; 9 import java.util.LinkedHashSet; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeMap; 15 import java.util.TreeSet; 16 import java.util.function.Predicate; 17 import java.util.regex.Pattern; 18 19 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 20 import org.unicode.cldr.util.CLDRConfig; 21 import org.unicode.cldr.util.CLDRFile; 22 import org.unicode.cldr.util.CldrUtility; 23 import org.unicode.cldr.util.DtdData; 24 import org.unicode.cldr.util.DtdData.Attribute; 25 import org.unicode.cldr.util.DtdData.Element; 26 import org.unicode.cldr.util.DtdType; 27 import org.unicode.cldr.util.Factory; 28 import org.unicode.cldr.util.LocaleIDParser; 29 import org.unicode.cldr.util.PatternCache; 30 import org.unicode.cldr.util.SupplementalDataInfo; 31 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo; 32 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 33 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 34 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 35 import org.unicode.cldr.util.XPathParts; 36 37 import com.ibm.icu.impl.Relation; 38 import com.ibm.icu.impl.Row; 39 import com.ibm.icu.impl.Row.R2; 40 import com.ibm.icu.text.UnicodeSet; 41 42 public class CheckAttributeValues extends FactoryCheckCLDR { 43 44 private static final Predicate<String> NOT_DONE_YET = new RegexMatcher().set(".*", Pattern.COMMENTS); 45 private static final boolean FIND_MISSING = CldrUtility.getProperty("FIND_MISSING_ATTRIBUTE_TESTS", false); // turn on to show <attributeValues> that are missing. 46 private static final boolean SHOW_UNNECESSARY = false; // turn on to show <attributeValues> we should delete. 47 48 static LinkedHashSet<String> elementOrder = new LinkedHashSet<>(); 49 static LinkedHashSet<String> attributeOrder = new LinkedHashSet<>(); 50 static LinkedHashSet<String> serialElements = new LinkedHashSet<>(); 51 static Map<String, Map<String, MatcherPattern>> element_attribute_validity = new HashMap<>(); 52 static Map<String, MatcherPattern> common_attribute_validity = new HashMap<>(); 53 static Map<String, MatcherPattern> variables = new HashMap<>(); 54 // static VariableReplacer variableReplacer = new VariableReplacer(); // note: this can be coalesced with the above 55 // -- to do later. 56 static boolean initialized = false; 57 static LocaleMatcher localeMatcher; 58 static Map<String, Map<String, String>> code_type_replacement = new TreeMap<>(); 59 static final SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo(); 60 static DtdData ldmlDtdData = DtdData.getInstance(DtdType.ldml); 61 62 boolean isEnglish; 63 PluralInfo pluralInfo; 64 Relation<String, String> missingTests = Relation.of(new TreeMap(), TreeSet.class); 65 66 static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 67 CheckAttributeValues(Factory factory)68 public CheckAttributeValues(Factory factory) { 69 super(factory); 70 } 71 72 @Override handleFinish()73 public void handleFinish() { 74 for (Entry<String, Set<String>> entry : missingTests.keyValuesSet()) { 75 System.out.println("Missing element: " + entry.getKey() + ", attributes: " + entry.getValue()); 76 } 77 } 78 79 @Override handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)80 public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, 81 List<CheckStatus> result) { 82 if (fullPath == null) return this; // skip paths that we don't have 83 if (fullPath.indexOf('[') < 0) return this; // skip paths with no attributes 84 String locale = getCldrFileToCheck().getSourceLocaleID(path, null); 85 86 // skip paths that are not in the immediate locale 87 if (!getCldrFileToCheck().getLocaleID().equals(locale)) { 88 return this; 89 } 90 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 91 for (int i = 0; i < parts.size(); ++i) { 92 if (parts.getAttributeCount(i) == 0) { 93 continue; 94 } 95 Map<String, String> attributes = parts.getAttributes(i); 96 String element = parts.getElement(i); 97 Element elementInfo = ldmlDtdData.getElementFromName().get(element); 98 99 Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element); 100 for (String attribute : attributes.keySet()) { 101 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute); 102 if (!attributeInfo.values.isEmpty()) { 103 // we don't need to check, since the DTD will enforce values 104 continue; 105 } 106 String attributeValue = attributes.get(attribute); 107 108 // special hack for // <type key="calendar" type="chinese">Chinese Calendar</type> 109 if (element.equals("type") && attribute.equals("type")) { 110 Set<String> typeValues = BCP47_KEY_VALUES.get(attributes.get("key")); 111 if (!typeValues.contains(attributeValue)) { 112 result.add(new CheckStatus() 113 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue) 114 .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}", 115 new Object[] { attribute, attributeValue, typeValues })); 116 } 117 continue; 118 } 119 // check the common attributes first 120 boolean haveTest = check(common_attribute_validity, attribute, attributeValue, result); 121 // then for the specific element 122 haveTest = haveTest || check(attribute_validity, attribute, attributeValue, result); 123 if (!haveTest && FIND_MISSING) { 124 missingTests.put(element, attribute); 125 } 126 127 // now for plurals 128 129 if (attribute.equals("count")) { 130 if (DIGITS.containsAll(attributeValue)) { 131 // ok, keep going 132 } else { 133 final Count countValue = PluralInfo.Count.valueOf(attributeValue); 134 if (!pluralInfo.getCounts().contains(countValue) 135 && !isPluralException(countValue, locale)) { 136 result.add(new CheckStatus() 137 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.illegalPlural) 138 .setMessage("Illegal plural value {0}; must be one of: {1}", 139 new Object[] { countValue, pluralInfo.getCounts() })); 140 } 141 } 142 } 143 144 // TODO check other variable elements, like dayPeriods 145 } 146 } 147 return this; 148 } 149 150 static final Relation<PluralInfo.Count, String> PLURAL_EXCEPTIONS = Relation.of( 151 new EnumMap<PluralInfo.Count, Set<String>>(PluralInfo.Count.class), HashSet.class); 152 153 static { PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr")154 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr")155 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh")156 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs")157 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru")158 PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru"); 159 } 160 isPluralException(Count countValue, String locale)161 static boolean isPluralException(Count countValue, String locale) { 162 Set<String> exceptions = PLURAL_EXCEPTIONS.get(countValue); 163 if (exceptions == null) { 164 return false; 165 } 166 if (exceptions.contains(locale)) { 167 return true; 168 } 169 int bar = locale.indexOf('_'); // catch bs_Cyrl, etc. 170 if (bar > 0) { 171 String base = locale.substring(0, bar); 172 if (exceptions.contains(base)) { 173 return true; 174 } 175 } 176 return false; 177 } 178 179 /** 180 * return true if we performed a test 181 * @param attribute_validity 182 * @param attribute 183 * @param attributeValue 184 * @param result 185 * @return 186 */ check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue, List<CheckStatus> result)187 private boolean check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue, 188 List<CheckStatus> result) { 189 if (attribute_validity == null) { 190 return false; // no test 191 } 192 MatcherPattern matcherPattern = attribute_validity.get(attribute); 193 if (matcherPattern == null) { 194 return false; // no test 195 } 196 if (matcherPattern.matcher.test(attributeValue)) { 197 return true; 198 } 199 // special check for deprecated codes 200 String replacement = getReplacement(matcherPattern.value, attributeValue); 201 if (replacement != null) { 202 if (isEnglish) { 203 return true; // don't flag English 204 } 205 if (replacement.length() == 0) { 206 result.add(new CheckStatus() 207 .setCause(this).setMainType(CheckStatus.warningType).setSubtype(Subtype.deprecatedAttribute) 208 .setMessage("Deprecated Attribute Value {0}={1}. Consider removing.", 209 new Object[] { attribute, attributeValue })); 210 } else { 211 result 212 .add(new CheckStatus() 213 .setCause(this) 214 .setMainType(CheckStatus.warningType) 215 .setSubtype(Subtype.deprecatedAttributeWithReplacement) 216 .setMessage( 217 "Deprecated Attribute Value {0}={1}. Consider removing, and possibly modifying the related value for {2}.", 218 new Object[] { attribute, attributeValue, replacement })); 219 } 220 } else { 221 result.add(new CheckStatus() 222 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue) 223 .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}", 224 new Object[] { attribute, attributeValue, matcherPattern.pattern })); 225 } 226 return true; 227 } 228 229 /** 230 * Returns replacement, or null if there is none. "" if the code is deprecated, but without a replacement. 231 * Input is of the form $language 232 * 233 * @return 234 */ getReplacement(String value, String attributeValue)235 String getReplacement(String value, String attributeValue) { 236 Map<String, String> type_replacement = code_type_replacement.get(value); 237 if (type_replacement == null) { 238 return null; 239 } 240 return type_replacement.get(attributeValue); 241 } 242 243 LocaleIDParser localeIDParser = new LocaleIDParser(); 244 245 @Override setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)246 public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, 247 List<CheckStatus> possibleErrors) { 248 if (cldrFileToCheck == null) return this; 249 if (Phase.FINAL_TESTING == getPhase() || Phase.BUILD == getPhase()) { 250 setSkipTest(false); // ok 251 } else { 252 setSkipTest(true); 253 return this; 254 } 255 256 pluralInfo = supplementalData.getPlurals(PluralType.cardinal, cldrFileToCheck.getLocaleID()); 257 super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 258 isEnglish = "en".equals(localeIDParser.set(cldrFileToCheck.getLocaleID()).getLanguage()); 259 synchronized (elementOrder) { 260 if (!initialized) { 261 getMetadata(); 262 initialized = true; 263 localeMatcher = LocaleMatcher.make(); 264 } 265 } 266 if (!localeMatcher.test(cldrFileToCheck.getLocaleID())) { 267 possibleErrors.add(new CheckStatus() 268 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.invalidLocale) 269 .setMessage("Invalid Locale {0}", 270 new Object[] { cldrFileToCheck.getLocaleID() })); 271 272 } 273 return this; 274 } 275 getMetadata()276 private void getMetadata() { 277 278 // sorting is expensive, but we need it here. 279 280 Map<String, R2<String, String>> rawVariables = supplementalData.getValidityInfo(); 281 for (Entry<String, R2<String, String>> item : rawVariables.entrySet()) { 282 String id = item.getKey(); 283 String type = item.getValue().get0(); 284 String value = item.getValue().get1(); 285 MatcherPattern mp = getMatcherPattern2(type, value); 286 if (mp != null) { 287 variables.put(id, mp); 288 // variableReplacer.add(id, value); 289 } 290 } 291 //System.out.println("Variables: " + variables.keySet()); 292 293 Map<AttributeValidityInfo, String> rawAttributeValueInfo = supplementalData.getAttributeValidity(); 294 295 for (Entry<AttributeValidityInfo, String> entry : rawAttributeValueInfo.entrySet()) { 296 AttributeValidityInfo item = entry.getKey(); 297 String value = entry.getValue(); 298 MatcherPattern mp = getMatcherPattern2(item.getType(), value); 299 if (mp == null) { 300 System.out.println("Failed to make matcher for: " + item); 301 continue; 302 } 303 if (FIND_MISSING && mp.matcher == NOT_DONE_YET) { 304 missingTests.put(item.getElements().toString(), item.getAttributes().toString()); 305 } 306 307 Set<DtdType> dtds = item.getDtds(); 308 // TODO handle other DTDs 309 if (!dtds.contains(DtdType.ldml)) { 310 continue; 311 } 312 Set<String> attributeList = item.getAttributes(); 313 Set<String> elementList = item.getElements(); 314 if (elementList.size() == 0) { 315 addAttributes(attributeList, common_attribute_validity, mp); 316 } else { 317 for (String element : elementList) { 318 // check if unnecessary 319 Element elementInfo = ldmlDtdData.getElementFromName().get(element); 320 if (elementInfo == null) { 321 System.out.println("Illegal <attributeValues>, element not valid: element: " + element); 322 } else { 323 for (String attribute : attributeList) { 324 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute); 325 if (attributeInfo == null) { 326 System.out.println("Illegal <attributeValues>, attribute not valid: element: " + element + ", attribute: " + attribute); 327 } else if (!attributeInfo.values.isEmpty()) { 328 if (SHOW_UNNECESSARY) { 329 System.out.println("Unnecessary <attributeValues …>, the DTD has specific list: element: " + element + ", attribute: " 330 + attribute + ", " + attributeInfo.values); 331 } 332 } 333 } 334 } 335 // System.out.println("\t" + element); 336 Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element); 337 if (attribute_validity == null) { 338 element_attribute_validity.put(element, attribute_validity = new TreeMap<>()); 339 } 340 addAttributes(attributeList, attribute_validity, mp); 341 } 342 } 343 } 344 } 345 346 final static Map<String, Set<String>> BCP47_KEY_VALUES; 347 static { 348 Map<String, Set<String>> temp = new HashMap<>(); 349 Relation<R2<String, String>, String> bcp47Aliases = supplementalData.getBcp47Aliases(); 350 for (Entry<String, Set<String>> keyValues : supplementalData.getBcp47Keys().keyValuesSet()) { 351 Set<String> fullValues = new TreeSet<>(); 352 String key = keyValues.getKey(); 353 Set<String> rawValues = keyValues.getValue(); 354 for (String value : rawValues) { 355 if (key.equals("cu")) { // Currency codes are in upper case. value.toUpperCase()356 fullValues.add(value.toUpperCase()); 357 } else { 358 fullValues.add(value); 359 } 360 R2<String, String> keyValue = R2.of(key, value); 361 Set<String> aliases = bcp47Aliases.getAll(keyValue); 362 if (aliases != null) { 363 fullValues.addAll(aliases); 364 } 365 } 366 // Special case exception for generic calendar, since we don't want to expose it in bcp47 367 if (key.equals("ca")) { 368 fullValues.add("generic"); 369 } 370 fullValues = Collections.unmodifiableSet(fullValues); temp.put(key, fullValues)371 temp.put(key, fullValues); 372 // add aliased keys 373 Set<String> aliases = supplementalData.getBcp47Aliases().getAll(Row.of(key, "")); 374 if (aliases != null) { 375 for (String aliasKey : aliases) { temp.put(aliasKey, fullValues)376 temp.put(aliasKey, fullValues); 377 } 378 } 379 temp.put("x", Collections.EMPTY_SET); // Hack for 'x', private use. 380 } 381 BCP47_KEY_VALUES = Collections.unmodifiableMap(temp); 382 } 383 getBcp47MatcherPattern(String key)384 private MatcherPattern getBcp47MatcherPattern(String key) { 385 // <key type="calendar">Calendar</key> 386 // <type key="calendar" type="chinese">Chinese Calendar</type> 387 388 //<attributeValues elements="key" attributes="type" type="bcp47">key</attributeValues> 389 //<attributeValues elements="type" attributes="key" type="bcp47">key</attributeValues> 390 //<attributeValues elements="type" attributes="type" type="bcp47">use-key</attributeValues> 391 392 MatcherPattern m = new MatcherPattern(); 393 Set<String> values; 394 if (key.equals("key")) { 395 values = BCP47_KEY_VALUES.keySet(); 396 } else { 397 values = BCP47_KEY_VALUES.get(key); 398 } 399 m.value = key; 400 m.pattern = values.toString(); 401 m.matcher = new CollectionMatcher().set(values); 402 return m; 403 } 404 getMatcherPattern2(String type, String value)405 private MatcherPattern getMatcherPattern2(String type, String value) { 406 String typeAttribute = type; 407 MatcherPattern result = variables.get(value); 408 if (result != null) { 409 MatcherPattern temp = new MatcherPattern(); 410 temp.pattern = result.pattern; 411 temp.matcher = result.matcher; 412 temp.value = value; 413 result = temp; 414 if ("list".equals(typeAttribute)) { 415 temp.matcher = new ListMatcher().set(result.matcher); 416 } 417 return result; 418 } 419 420 result = new MatcherPattern(); 421 result.pattern = value; 422 result.value = value; 423 if ("choice".equals(typeAttribute)) { 424 result.matcher = new CollectionMatcher() 425 .set(new HashSet<>(Arrays.asList(value.trim().split("\\s+")))); 426 } else if ("bcp47".equals(typeAttribute)) { 427 result = getBcp47MatcherPattern(value); 428 } else if ("regex".equals(typeAttribute)) { 429 result.matcher = new RegexMatcher().set(value, Pattern.COMMENTS); // Pattern.COMMENTS to get whitespace 430 } else if ("locale".equals(typeAttribute)) { 431 result.matcher = LocaleMatcher.make(); 432 } else if ("notDoneYet".equals(typeAttribute) || "notDoneYet".equals(value)) { 433 result.matcher = NOT_DONE_YET; 434 } else { 435 System.out.println("unknown type; value: <" + value + ">,\t" + typeAttribute); 436 return null; 437 } 438 return result; 439 } 440 addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp)441 private void addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp) { 442 for (String attribute : attributes) { 443 MatcherPattern old = attribute_validity.get(attribute); 444 if (old != null) { 445 mp.matcher = new OrMatcher().set(old.matcher, mp.matcher); 446 mp.pattern = old.pattern + " OR " + mp.pattern; 447 } 448 attribute_validity.put(attribute, mp); 449 } 450 } 451 452 private static class MatcherPattern { 453 public String value; 454 Predicate<String> matcher; 455 String pattern; 456 457 @Override toString()458 public String toString() { 459 return matcher.getClass().getName() + "\t" + pattern; 460 } 461 } 462 463 public static class RegexMatcher implements Predicate<String> { 464 private java.util.regex.Matcher matcher; 465 set(String pattern)466 public Predicate<String> set(String pattern) { 467 matcher = PatternCache.get(pattern).matcher(""); 468 return this; 469 } 470 set(String pattern, int flags)471 public Predicate<String> set(String pattern, int flags) { 472 matcher = Pattern.compile(pattern, flags).matcher(""); 473 return this; 474 } 475 476 @Override test(String value)477 public boolean test(String value) { 478 matcher.reset(value.toString()); 479 return matcher.matches(); 480 } 481 } 482 483 public static class CollectionMatcher implements Predicate<String> { 484 private Collection<String> collection; 485 set(Collection<String> collection)486 public Predicate<String> set(Collection<String> collection) { 487 this.collection = collection; 488 return this; 489 } 490 491 @Override test(String value)492 public boolean test(String value) { 493 return collection.contains(value); 494 } 495 } 496 497 public static class OrMatcher implements Predicate<String> { 498 private Predicate<String> a; 499 private Predicate<String> b; 500 set(Predicate<String> a, Predicate<String> b)501 public Predicate<String> set(Predicate<String> a, Predicate<String> b) { 502 this.a = a; 503 this.b = b; 504 return this; 505 } 506 507 @Override test(String value)508 public boolean test(String value) { 509 return a.test(value) || b.test(value); 510 } 511 } 512 513 public static class ListMatcher implements Predicate<String> { 514 private Predicate<String> other; 515 set(Predicate<String> other)516 public Predicate<String> set(Predicate<String> other) { 517 this.other = other; 518 return this; 519 } 520 521 @Override test(String value)522 public boolean test(String value) { 523 String[] values = value.trim().split("\\s+"); 524 if (values.length == 1 && values[0].length() == 0) return true; 525 for (int i = 0; i < values.length; ++i) { 526 if (!other.test(values[i])) { 527 return false; 528 } 529 } 530 return true; 531 } 532 } 533 534 public static class LocaleMatcher implements Predicate<String> { 535 Predicate<String> legacy = variables.get("$grandfathered").matcher; 536 Predicate<String> language = variables.get("$language").matcher; 537 Predicate<String> script = variables.get("$script").matcher; 538 Predicate<String> territory = variables.get("$territory").matcher; 539 Predicate<String> variant = variables.get("$variant").matcher; 540 LocaleIDParser lip = new LocaleIDParser(); 541 LocaleMatcher()542 private LocaleMatcher() { 543 } 544 545 private static final class LocaleMatcherHelper { 546 static LocaleMatcher SINGLETON = new LocaleMatcher(); 547 } 548 make()549 public static LocaleMatcher make() { 550 return LocaleMatcherHelper.SINGLETON; 551 } 552 553 @Override test(String value)554 public boolean test(String value) { 555 if (legacy.test(value)) return true; 556 lip.set((String) value); 557 String field = lip.getLanguage(); 558 if (!language.test(field)) return false; 559 field = lip.getScript(); 560 if (field.length() != 0 && !script.test(field)) return false; 561 field = lip.getRegion(); 562 if (field.length() != 0 && !territory.test(field)) return false; 563 String[] fields = lip.getVariants(); 564 for (int i = 0; i < fields.length; ++i) { 565 if (!variant.test(fields[i])) return false; 566 } 567 return true; 568 } 569 } 570 571 } 572