1 package org.unicode.cldr.test; 2 3 import java.util.Arrays; 4 import java.util.Collection; 5 import java.util.Collections; 6 import java.util.EnumMap; 7 import java.util.HashMap; 8 import java.util.HashSet; 9 import java.util.LinkedHashSet; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeMap; 15 import java.util.TreeSet; 16 import java.util.regex.Pattern; 17 18 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 19 import org.unicode.cldr.util.CLDRConfig; 20 import org.unicode.cldr.util.CLDRFile; 21 import org.unicode.cldr.util.CldrUtility; 22 import org.unicode.cldr.util.DtdData; 23 import org.unicode.cldr.util.DtdData.Attribute; 24 import org.unicode.cldr.util.DtdData.Element; 25 import org.unicode.cldr.util.DtdType; 26 import org.unicode.cldr.util.Factory; 27 import org.unicode.cldr.util.LocaleIDParser; 28 import org.unicode.cldr.util.PatternCache; 29 import org.unicode.cldr.util.SupplementalDataInfo; 30 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo; 31 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 32 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 33 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 34 import org.unicode.cldr.util.XPathParts; 35 36 import com.ibm.icu.dev.util.CollectionUtilities.ObjectMatcher; 37 import com.ibm.icu.impl.Relation; 38 import com.ibm.icu.impl.Row; 39 import com.ibm.icu.impl.Row.R2; 40 import com.ibm.icu.text.UnicodeSet; 41 42 public class CheckAttributeValues extends FactoryCheckCLDR { 43 44 private static final ObjectMatcher<String> NOT_DONE_YET = new RegexMatcher().set(".*", Pattern.COMMENTS); 45 private static final boolean FIND_MISSING = CldrUtility.getProperty("FIND_MISSING_ATTRIBUTE_TESTS", false); // turn on to show <attributeValues> that are missing. 46 private static final boolean SHOW_UNNECESSARY = false; // turn on to show <attributeValues> we should delete. 47 48 static LinkedHashSet<String> elementOrder = new LinkedHashSet<String>(); 49 static LinkedHashSet<String> attributeOrder = new LinkedHashSet<String>(); 50 static LinkedHashSet<String> serialElements = new LinkedHashSet<String>(); 51 static Map<String, Map<String, MatcherPattern>> element_attribute_validity = new HashMap<String, Map<String, MatcherPattern>>(); 52 static Map<String, MatcherPattern> common_attribute_validity = new HashMap<String, MatcherPattern>(); 53 static Map<String, MatcherPattern> variables = new HashMap<String, MatcherPattern>(); 54 // static VariableReplacer variableReplacer = new VariableReplacer(); // note: this can be coalesced with the above 55 // -- to do later. 56 static boolean initialized = false; 57 static LocaleMatcher localeMatcher; 58 static Map<String, Map<String, String>> code_type_replacement = new TreeMap<String, Map<String, String>>(); 59 static final SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo(); 60 static DtdData ldmlDtdData = DtdData.getInstance(DtdType.ldml); 61 62 boolean isEnglish; 63 PluralInfo pluralInfo; 64 Relation<String, String> missingTests = Relation.of(new TreeMap(), TreeSet.class); 65 66 static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 67 CheckAttributeValues(Factory factory)68 public CheckAttributeValues(Factory factory) { 69 super(factory); 70 } 71 handleFinish()72 public void handleFinish() { 73 for (Entry<String, Set<String>> entry : missingTests.keyValuesSet()) { 74 System.out.println("Missing element: " + entry.getKey() + ", attributes: " + entry.getValue()); 75 } 76 } 77 handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)78 public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, 79 List<CheckStatus> result) { 80 if (fullPath == null) return this; // skip paths that we don't have 81 if (fullPath.indexOf('[') < 0) return this; // skip paths with no attributes 82 String locale = getCldrFileToCheck().getSourceLocaleID(path, null); 83 84 // skip paths that are not in the immediate locale 85 if (!getCldrFileToCheck().getLocaleID().equals(locale)) { 86 return this; 87 } 88 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 89 for (int i = 0; i < parts.size(); ++i) { 90 if (parts.getAttributeCount(i) == 0) { 91 continue; 92 } 93 Map<String, String> attributes = parts.getAttributes(i); 94 String element = parts.getElement(i); 95 Element elementInfo = ldmlDtdData.getElementFromName().get(element); 96 97 Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element); 98 for (String attribute : attributes.keySet()) { 99 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute); 100 if (!attributeInfo.values.isEmpty()) { 101 // we don't need to check, since the DTD will enforce values 102 continue; 103 } 104 String attributeValue = attributes.get(attribute); 105 106 // special hack for // <type key="calendar" type="chinese">Chinese Calendar</type> 107 if (element.equals("type") && attribute.equals("type")) { 108 Set<String> typeValues = BCP47_KEY_VALUES.get(attributes.get("key")); 109 if (!typeValues.contains(attributeValue)) { 110 result.add(new CheckStatus() 111 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue) 112 .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}", 113 new Object[] { attribute, attributeValue, typeValues })); 114 } 115 continue; 116 } 117 // check the common attributes first 118 boolean haveTest = check(common_attribute_validity, attribute, attributeValue, result); 119 // then for the specific element 120 haveTest = haveTest || check(attribute_validity, attribute, attributeValue, result); 121 if (!haveTest && FIND_MISSING) { 122 missingTests.put(element, attribute); 123 } 124 125 // now for plurals 126 127 if (attribute.equals("count")) { 128 if (DIGITS.containsAll(attributeValue)) { 129 // ok, keep going 130 } else { 131 final Count countValue = PluralInfo.Count.valueOf(attributeValue); 132 if (!pluralInfo.getCounts().contains(countValue) 133 && !isPluralException(countValue, locale)) { 134 result.add(new CheckStatus() 135 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.illegalPlural) 136 .setMessage("Illegal plural value {0}; must be one of: {1}", 137 new Object[] { countValue, pluralInfo.getCounts() })); 138 } 139 } 140 } 141 142 // TODO check other variable elements, like dayPeriods 143 } 144 } 145 return this; 146 } 147 148 static final Relation<PluralInfo.Count, String> PLURAL_EXCEPTIONS = Relation.of( 149 new EnumMap<PluralInfo.Count, Set<String>>(PluralInfo.Count.class), HashSet.class); 150 151 static { PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr")152 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr")153 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh")154 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs")155 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru")156 PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru"); 157 } 158 isPluralException(Count countValue, String locale)159 static boolean isPluralException(Count countValue, String locale) { 160 Set<String> exceptions = PLURAL_EXCEPTIONS.get(countValue); 161 if (exceptions == null) { 162 return false; 163 } 164 if (exceptions.contains(locale)) { 165 return true; 166 } 167 int bar = locale.indexOf('_'); // catch bs_Cyrl, etc. 168 if (bar > 0) { 169 String base = locale.substring(0, bar); 170 if (exceptions.contains(base)) { 171 return true; 172 } 173 } 174 return false; 175 } 176 177 /** 178 * return true if we performed a test 179 * @param attribute_validity 180 * @param attribute 181 * @param attributeValue 182 * @param result 183 * @return 184 */ check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue, List<CheckStatus> result)185 private boolean check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue, 186 List<CheckStatus> result) { 187 if (attribute_validity == null) { 188 return false; // no test 189 } 190 MatcherPattern matcherPattern = attribute_validity.get(attribute); 191 if (matcherPattern == null) { 192 return false; // no test 193 } 194 if (matcherPattern.matcher.matches(attributeValue)) { 195 return true; 196 } 197 // special check for deprecated codes 198 String replacement = getReplacement(matcherPattern.value, attributeValue); 199 if (replacement != null) { 200 if (isEnglish) { 201 return true; // don't flag English 202 } 203 if (replacement.length() == 0) { 204 result.add(new CheckStatus() 205 .setCause(this).setMainType(CheckStatus.warningType).setSubtype(Subtype.deprecatedAttribute) 206 .setMessage("Deprecated Attribute Value {0}={1}. Consider removing.", 207 new Object[] { attribute, attributeValue })); 208 } else { 209 result 210 .add(new CheckStatus() 211 .setCause(this) 212 .setMainType(CheckStatus.warningType) 213 .setSubtype(Subtype.deprecatedAttributeWithReplacement) 214 .setMessage( 215 "Deprecated Attribute Value {0}={1}. Consider removing, and possibly modifying the related value for {2}.", 216 new Object[] { attribute, attributeValue, replacement })); 217 } 218 } else { 219 result.add(new CheckStatus() 220 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue) 221 .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}", 222 new Object[] { attribute, attributeValue, matcherPattern.pattern })); 223 } 224 return true; 225 } 226 227 /** 228 * Returns replacement, or null if there is none. "" if the code is deprecated, but without a replacement. 229 * Input is of the form $language 230 * 231 * @return 232 */ getReplacement(String value, String attributeValue)233 String getReplacement(String value, String attributeValue) { 234 Map<String, String> type_replacement = code_type_replacement.get(value); 235 if (type_replacement == null) { 236 return null; 237 } 238 return type_replacement.get(attributeValue); 239 } 240 241 LocaleIDParser localeIDParser = new LocaleIDParser(); 242 243 @Override setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)244 public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, 245 List<CheckStatus> possibleErrors) { 246 if (cldrFileToCheck == null) return this; 247 if (Phase.FINAL_TESTING == getPhase() || Phase.BUILD == getPhase()) { 248 setSkipTest(false); // ok 249 } else { 250 setSkipTest(true); 251 return this; 252 } 253 254 pluralInfo = supplementalData.getPlurals(PluralType.cardinal, cldrFileToCheck.getLocaleID()); 255 super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 256 isEnglish = "en".equals(localeIDParser.set(cldrFileToCheck.getLocaleID()).getLanguage()); 257 synchronized (elementOrder) { 258 if (!initialized) { 259 getMetadata(); 260 initialized = true; 261 localeMatcher = LocaleMatcher.make(); 262 } 263 } 264 if (!localeMatcher.matches(cldrFileToCheck.getLocaleID())) { 265 possibleErrors.add(new CheckStatus() 266 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.invalidLocale) 267 .setMessage("Invalid Locale {0}", 268 new Object[] { cldrFileToCheck.getLocaleID() })); 269 270 } 271 return this; 272 } 273 getMetadata()274 private void getMetadata() { 275 276 // sorting is expensive, but we need it here. 277 278 Map<String, R2<String, String>> rawVariables = supplementalData.getValidityInfo(); 279 for (Entry<String, R2<String, String>> item : rawVariables.entrySet()) { 280 String id = item.getKey(); 281 String type = item.getValue().get0(); 282 String value = item.getValue().get1(); 283 MatcherPattern mp = getMatcherPattern2(type, value); 284 if (mp != null) { 285 variables.put(id, mp); 286 // variableReplacer.add(id, value); 287 } 288 } 289 //System.out.println("Variables: " + variables.keySet()); 290 291 Map<AttributeValidityInfo, String> rawAttributeValueInfo = supplementalData.getAttributeValidity(); 292 293 for (Entry<AttributeValidityInfo, String> entry : rawAttributeValueInfo.entrySet()) { 294 AttributeValidityInfo item = entry.getKey(); 295 String value = entry.getValue(); 296 MatcherPattern mp = getMatcherPattern2(item.getType(), value); 297 if (mp == null) { 298 System.out.println("Failed to make matcher for: " + item); 299 continue; 300 } 301 if (FIND_MISSING && mp.matcher == NOT_DONE_YET) { 302 missingTests.put(item.getElements().toString(), item.getAttributes().toString()); 303 } 304 305 Set<DtdType> dtds = item.getDtds(); 306 // TODO handle other DTDs 307 if (!dtds.contains(DtdType.ldml)) { 308 continue; 309 } 310 Set<String> attributeList = item.getAttributes(); 311 Set<String> elementList = item.getElements(); 312 if (elementList.size() == 0) { 313 addAttributes(attributeList, common_attribute_validity, mp); 314 } else { 315 for (String element : elementList) { 316 // check if unnecessary 317 Element elementInfo = ldmlDtdData.getElementFromName().get(element); 318 if (elementInfo == null) { 319 System.out.println("Illegal <attributeValues>, element not valid: element: " + element); 320 } else { 321 for (String attribute : attributeList) { 322 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute); 323 if (attributeInfo == null) { 324 System.out.println("Illegal <attributeValues>, attribute not valid: element: " + element + ", attribute: " + attribute); 325 } else if (!attributeInfo.values.isEmpty()) { 326 if (SHOW_UNNECESSARY) { 327 System.out.println("Unnecessary <attributeValues …>, the DTD has specific list: element: " + element + ", attribute: " 328 + attribute + ", " + attributeInfo.values); 329 } 330 } 331 } 332 } 333 // System.out.println("\t" + element); 334 Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element); 335 if (attribute_validity == null) { 336 element_attribute_validity.put(element, attribute_validity = new TreeMap<String, MatcherPattern>()); 337 } 338 addAttributes(attributeList, attribute_validity, mp); 339 } 340 } 341 } 342 } 343 344 final static Map<String, Set<String>> BCP47_KEY_VALUES; 345 static { 346 Map<String, Set<String>> temp = new HashMap<>(); 347 Relation<R2<String, String>, String> bcp47Aliases = supplementalData.getBcp47Aliases(); 348 for (Entry<String, Set<String>> keyValues : supplementalData.getBcp47Keys().keyValuesSet()) { 349 Set<String> fullValues = new TreeSet<>(); 350 String key = keyValues.getKey(); 351 Set<String> rawValues = keyValues.getValue(); 352 for (String value : rawValues) { 353 if (key.equals("cu")) { // Currency codes are in upper case. value.toUpperCase()354 fullValues.add(value.toUpperCase()); 355 } else { 356 fullValues.add(value); 357 } 358 R2<String, String> keyValue = R2.of(key, value); 359 Set<String> aliases = bcp47Aliases.getAll(keyValue); 360 if (aliases != null) { 361 fullValues.addAll(aliases); 362 } 363 } 364 // Special case exception for generic calendar, since we don't want to expose it in bcp47 365 if (key.equals("ca")) { 366 fullValues.add("generic"); 367 } 368 fullValues = Collections.unmodifiableSet(fullValues); temp.put(key, fullValues)369 temp.put(key, fullValues); 370 // add aliased keys 371 Set<String> aliases = supplementalData.getBcp47Aliases().getAll(Row.of(key, "")); 372 if (aliases != null) { 373 for (String aliasKey : aliases) { temp.put(aliasKey, fullValues)374 temp.put(aliasKey, fullValues); 375 } 376 } 377 temp.put("x", Collections.EMPTY_SET); // Hack for 'x', private use. 378 } 379 BCP47_KEY_VALUES = Collections.unmodifiableMap(temp); 380 } 381 getBcp47MatcherPattern(String key)382 private MatcherPattern getBcp47MatcherPattern(String key) { 383 // <key type="calendar">Calendar</key> 384 // <type key="calendar" type="chinese">Chinese Calendar</type> 385 386 //<attributeValues elements="key" attributes="type" type="bcp47">key</attributeValues> 387 //<attributeValues elements="type" attributes="key" type="bcp47">key</attributeValues> 388 //<attributeValues elements="type" attributes="type" type="bcp47">use-key</attributeValues> 389 390 MatcherPattern m = new MatcherPattern(); 391 Set<String> values; 392 if (key.equals("key")) { 393 values = BCP47_KEY_VALUES.keySet(); 394 } else { 395 values = BCP47_KEY_VALUES.get(key); 396 } 397 m.value = key; 398 m.pattern = values.toString(); 399 m.matcher = new CollectionMatcher().set(values); 400 return m; 401 } 402 getMatcherPattern2(String type, String value)403 private MatcherPattern getMatcherPattern2(String type, String value) { 404 String typeAttribute = type; 405 MatcherPattern result = variables.get(value); 406 if (result != null) { 407 MatcherPattern temp = new MatcherPattern(); 408 temp.pattern = result.pattern; 409 temp.matcher = result.matcher; 410 temp.value = value; 411 result = temp; 412 if ("list".equals(typeAttribute)) { 413 temp.matcher = new ListMatcher().set(result.matcher); 414 } 415 return result; 416 } 417 418 result = new MatcherPattern(); 419 result.pattern = value; 420 result.value = value; 421 if ("choice".equals(typeAttribute)) { 422 result.matcher = new CollectionMatcher() 423 .set(new HashSet<String>(Arrays.asList(value.trim().split("\\s+")))); 424 } else if ("bcp47".equals(typeAttribute)) { 425 result = getBcp47MatcherPattern(value); 426 } else if ("regex".equals(typeAttribute)) { 427 result.matcher = new RegexMatcher().set(value, Pattern.COMMENTS); // Pattern.COMMENTS to get whitespace 428 } else if ("locale".equals(typeAttribute)) { 429 result.matcher = LocaleMatcher.make(); 430 } else if ("notDoneYet".equals(typeAttribute) || "notDoneYet".equals(value)) { 431 result.matcher = NOT_DONE_YET; 432 } else { 433 System.out.println("unknown type; value: <" + value + ">,\t" + typeAttribute); 434 return null; 435 } 436 return result; 437 } 438 addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp)439 private void addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp) { 440 for (String attribute : attributes) { 441 MatcherPattern old = attribute_validity.get(attribute); 442 if (old != null) { 443 mp.matcher = new OrMatcher().set(old.matcher, mp.matcher); 444 mp.pattern = old.pattern + " OR " + mp.pattern; 445 } 446 attribute_validity.put(attribute, mp); 447 } 448 } 449 450 private static class MatcherPattern { 451 public String value; 452 ObjectMatcher<String> matcher; 453 String pattern; 454 toString()455 public String toString() { 456 return matcher.getClass().getName() + "\t" + pattern; 457 } 458 } 459 460 public static class RegexMatcher implements ObjectMatcher<String> { 461 private java.util.regex.Matcher matcher; 462 set(String pattern)463 public ObjectMatcher<String> set(String pattern) { 464 matcher = PatternCache.get(pattern).matcher(""); 465 return this; 466 } 467 set(String pattern, int flags)468 public ObjectMatcher<String> set(String pattern, int flags) { 469 matcher = Pattern.compile(pattern, flags).matcher(""); 470 return this; 471 } 472 matches(String value)473 public boolean matches(String value) { 474 matcher.reset(value.toString()); 475 return matcher.matches(); 476 } 477 } 478 479 public static class CollectionMatcher implements ObjectMatcher<String> { 480 private Collection<String> collection; 481 set(Collection<String> collection)482 public ObjectMatcher<String> set(Collection<String> collection) { 483 this.collection = collection; 484 return this; 485 } 486 matches(String value)487 public boolean matches(String value) { 488 return collection.contains(value); 489 } 490 } 491 492 public static class OrMatcher implements ObjectMatcher<String> { 493 private ObjectMatcher<String> a; 494 private ObjectMatcher<String> b; 495 set(ObjectMatcher<String> a, ObjectMatcher<String> b)496 public ObjectMatcher<String> set(ObjectMatcher<String> a, ObjectMatcher<String> b) { 497 this.a = a; 498 this.b = b; 499 return this; 500 } 501 matches(String value)502 public boolean matches(String value) { 503 return a.matches(value) || b.matches(value); 504 } 505 } 506 507 public static class ListMatcher implements ObjectMatcher<String> { 508 private ObjectMatcher<String> other; 509 set(ObjectMatcher<String> other)510 public ObjectMatcher<String> set(ObjectMatcher<String> other) { 511 this.other = other; 512 return this; 513 } 514 matches(String value)515 public boolean matches(String value) { 516 String[] values = value.trim().split("\\s+"); 517 if (values.length == 1 && values[0].length() == 0) return true; 518 for (int i = 0; i < values.length; ++i) { 519 if (!other.matches(values[i])) { 520 return false; 521 } 522 } 523 return true; 524 } 525 } 526 527 public static class LocaleMatcher implements ObjectMatcher<String> { 528 ObjectMatcher<String> grandfathered = variables.get("$grandfathered").matcher; 529 ObjectMatcher<String> language = variables.get("$language").matcher; 530 ObjectMatcher<String> script = variables.get("$script").matcher; 531 ObjectMatcher<String> territory = variables.get("$territory").matcher; 532 ObjectMatcher<String> variant = variables.get("$variant").matcher; 533 LocaleIDParser lip = new LocaleIDParser(); 534 static LocaleMatcher singleton = null; 535 static Object sync = new Object(); 536 LocaleMatcher(boolean b)537 private LocaleMatcher(boolean b) { 538 } 539 make()540 public static LocaleMatcher make() { 541 synchronized (sync) { 542 if (singleton == null) { 543 singleton = new LocaleMatcher(true); 544 } 545 } 546 return singleton; 547 } 548 matches(String value)549 public boolean matches(String value) { 550 if (grandfathered.matches(value)) return true; 551 lip.set((String) value); 552 String field = lip.getLanguage(); 553 if (!language.matches(field)) return false; 554 field = lip.getScript(); 555 if (field.length() != 0 && !script.matches(field)) return false; 556 field = lip.getRegion(); 557 if (field.length() != 0 && !territory.matches(field)) return false; 558 String[] fields = lip.getVariants(); 559 for (int i = 0; i < fields.length; ++i) { 560 if (!variant.matches(fields[i])) return false; 561 } 562 return true; 563 } 564 } 565 566 }