• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.test;
2 
3 import java.util.Arrays;
4 import java.util.Collection;
5 import java.util.Collections;
6 import java.util.EnumMap;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.LinkedHashSet;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
16 import java.util.regex.Pattern;
17 
18 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
19 import org.unicode.cldr.util.CLDRConfig;
20 import org.unicode.cldr.util.CLDRFile;
21 import org.unicode.cldr.util.CldrUtility;
22 import org.unicode.cldr.util.DtdData;
23 import org.unicode.cldr.util.DtdData.Attribute;
24 import org.unicode.cldr.util.DtdData.Element;
25 import org.unicode.cldr.util.DtdType;
26 import org.unicode.cldr.util.Factory;
27 import org.unicode.cldr.util.LocaleIDParser;
28 import org.unicode.cldr.util.PatternCache;
29 import org.unicode.cldr.util.SupplementalDataInfo;
30 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo;
31 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
32 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
33 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
34 import org.unicode.cldr.util.XPathParts;
35 
36 import com.ibm.icu.dev.util.CollectionUtilities.ObjectMatcher;
37 import com.ibm.icu.impl.Relation;
38 import com.ibm.icu.impl.Row;
39 import com.ibm.icu.impl.Row.R2;
40 import com.ibm.icu.text.UnicodeSet;
41 
42 public class CheckAttributeValues extends FactoryCheckCLDR {
43 
44     private static final ObjectMatcher<String> NOT_DONE_YET = new RegexMatcher().set(".*", Pattern.COMMENTS);
45     private static final boolean FIND_MISSING = CldrUtility.getProperty("FIND_MISSING_ATTRIBUTE_TESTS", false); // turn on to show <attributeValues> that are missing.
46     private static final boolean SHOW_UNNECESSARY = false; // turn on to show <attributeValues> we should delete.
47 
48     static LinkedHashSet<String> elementOrder = new LinkedHashSet<String>();
49     static LinkedHashSet<String> attributeOrder = new LinkedHashSet<String>();
50     static LinkedHashSet<String> serialElements = new LinkedHashSet<String>();
51     static Map<String, Map<String, MatcherPattern>> element_attribute_validity = new HashMap<String, Map<String, MatcherPattern>>();
52     static Map<String, MatcherPattern> common_attribute_validity = new HashMap<String, MatcherPattern>();
53     static Map<String, MatcherPattern> variables = new HashMap<String, MatcherPattern>();
54     // static VariableReplacer variableReplacer = new VariableReplacer(); // note: this can be coalesced with the above
55     // -- to do later.
56     static boolean initialized = false;
57     static LocaleMatcher localeMatcher;
58     static Map<String, Map<String, String>> code_type_replacement = new TreeMap<String, Map<String, String>>();
59     static final SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo();
60     static DtdData ldmlDtdData = DtdData.getInstance(DtdType.ldml);
61 
62     boolean isEnglish;
63     PluralInfo pluralInfo;
64     Relation<String, String> missingTests = Relation.of(new TreeMap(), TreeSet.class);
65 
66     static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
67 
CheckAttributeValues(Factory factory)68     public CheckAttributeValues(Factory factory) {
69         super(factory);
70     }
71 
handleFinish()72     public void handleFinish() {
73         for (Entry<String, Set<String>> entry : missingTests.keyValuesSet()) {
74             System.out.println("Missing element: " + entry.getKey() + ", attributes: " + entry.getValue());
75         }
76     }
77 
handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)78     public CheckCLDR handleCheck(String path, String fullPath, String value, Options options,
79         List<CheckStatus> result) {
80         if (fullPath == null) return this; // skip paths that we don't have
81         if (fullPath.indexOf('[') < 0) return this; // skip paths with no attributes
82         String locale = getCldrFileToCheck().getSourceLocaleID(path, null);
83 
84         // skip paths that are not in the immediate locale
85         if (!getCldrFileToCheck().getLocaleID().equals(locale)) {
86             return this;
87         }
88         XPathParts parts = XPathParts.getFrozenInstance(fullPath);
89         for (int i = 0; i < parts.size(); ++i) {
90             if (parts.getAttributeCount(i) == 0) {
91                 continue;
92             }
93             Map<String, String> attributes = parts.getAttributes(i);
94             String element = parts.getElement(i);
95             Element elementInfo = ldmlDtdData.getElementFromName().get(element);
96 
97             Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element);
98             for (String attribute : attributes.keySet()) {
99                 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute);
100                 if (!attributeInfo.values.isEmpty()) {
101                     // we don't need to check, since the DTD will enforce values
102                     continue;
103                 }
104                 String attributeValue = attributes.get(attribute);
105 
106                 // special hack for         // <type key="calendar" type="chinese">Chinese Calendar</type>
107                 if (element.equals("type") && attribute.equals("type")) {
108                     Set<String> typeValues = BCP47_KEY_VALUES.get(attributes.get("key"));
109                     if (!typeValues.contains(attributeValue)) {
110                         result.add(new CheckStatus()
111                             .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue)
112                             .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}",
113                                 new Object[] { attribute, attributeValue, typeValues }));
114                     }
115                     continue;
116                 }
117                 // check the common attributes first
118                 boolean haveTest = check(common_attribute_validity, attribute, attributeValue, result);
119                 // then for the specific element
120                 haveTest = haveTest || check(attribute_validity, attribute, attributeValue, result);
121                 if (!haveTest && FIND_MISSING) {
122                     missingTests.put(element, attribute);
123                 }
124 
125                 // now for plurals
126 
127                 if (attribute.equals("count")) {
128                     if (DIGITS.containsAll(attributeValue)) {
129                         // ok, keep going
130                     } else {
131                         final Count countValue = PluralInfo.Count.valueOf(attributeValue);
132                         if (!pluralInfo.getCounts().contains(countValue)
133                             && !isPluralException(countValue, locale)) {
134                             result.add(new CheckStatus()
135                                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.illegalPlural)
136                                 .setMessage("Illegal plural value {0}; must be one of: {1}",
137                                     new Object[] { countValue, pluralInfo.getCounts() }));
138                         }
139                     }
140                 }
141 
142                 // TODO check other variable elements, like dayPeriods
143             }
144         }
145         return this;
146     }
147 
148     static final Relation<PluralInfo.Count, String> PLURAL_EXCEPTIONS = Relation.of(
149         new EnumMap<PluralInfo.Count, Set<String>>(PluralInfo.Count.class), HashSet.class);
150 
151     static {
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr")152         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr")153         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh")154         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs")155         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru")156         PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru");
157     }
158 
isPluralException(Count countValue, String locale)159     static boolean isPluralException(Count countValue, String locale) {
160         Set<String> exceptions = PLURAL_EXCEPTIONS.get(countValue);
161         if (exceptions == null) {
162             return false;
163         }
164         if (exceptions.contains(locale)) {
165             return true;
166         }
167         int bar = locale.indexOf('_'); // catch bs_Cyrl, etc.
168         if (bar > 0) {
169             String base = locale.substring(0, bar);
170             if (exceptions.contains(base)) {
171                 return true;
172             }
173         }
174         return false;
175     }
176 
177     /**
178      * return true if we performed a test
179      * @param attribute_validity
180      * @param attribute
181      * @param attributeValue
182      * @param result
183      * @return
184      */
check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue, List<CheckStatus> result)185     private boolean check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue,
186         List<CheckStatus> result) {
187         if (attribute_validity == null) {
188             return false; // no test
189         }
190         MatcherPattern matcherPattern = attribute_validity.get(attribute);
191         if (matcherPattern == null) {
192             return false; // no test
193         }
194         if (matcherPattern.matcher.matches(attributeValue)) {
195             return true;
196         }
197         // special check for deprecated codes
198         String replacement = getReplacement(matcherPattern.value, attributeValue);
199         if (replacement != null) {
200             if (isEnglish) {
201                 return true; // don't flag English
202             }
203             if (replacement.length() == 0) {
204                 result.add(new CheckStatus()
205                     .setCause(this).setMainType(CheckStatus.warningType).setSubtype(Subtype.deprecatedAttribute)
206                     .setMessage("Deprecated Attribute Value {0}={1}. Consider removing.",
207                         new Object[] { attribute, attributeValue }));
208             } else {
209                 result
210                     .add(new CheckStatus()
211                         .setCause(this)
212                         .setMainType(CheckStatus.warningType)
213                         .setSubtype(Subtype.deprecatedAttributeWithReplacement)
214                         .setMessage(
215                             "Deprecated Attribute Value {0}={1}. Consider removing, and possibly modifying the related value for {2}.",
216                             new Object[] { attribute, attributeValue, replacement }));
217             }
218         } else {
219             result.add(new CheckStatus()
220                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue)
221                 .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}",
222                     new Object[] { attribute, attributeValue, matcherPattern.pattern }));
223         }
224         return true;
225     }
226 
227     /**
228      * Returns replacement, or null if there is none. "" if the code is deprecated, but without a replacement.
229      * Input is of the form $language
230      *
231      * @return
232      */
getReplacement(String value, String attributeValue)233     String getReplacement(String value, String attributeValue) {
234         Map<String, String> type_replacement = code_type_replacement.get(value);
235         if (type_replacement == null) {
236             return null;
237         }
238         return type_replacement.get(attributeValue);
239     }
240 
241     LocaleIDParser localeIDParser = new LocaleIDParser();
242 
243     @Override
setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)244     public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options,
245         List<CheckStatus> possibleErrors) {
246         if (cldrFileToCheck == null) return this;
247         if (Phase.FINAL_TESTING == getPhase() || Phase.BUILD == getPhase()) {
248             setSkipTest(false); // ok
249         } else {
250             setSkipTest(true);
251             return this;
252         }
253 
254         pluralInfo = supplementalData.getPlurals(PluralType.cardinal, cldrFileToCheck.getLocaleID());
255         super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
256         isEnglish = "en".equals(localeIDParser.set(cldrFileToCheck.getLocaleID()).getLanguage());
257         synchronized (elementOrder) {
258             if (!initialized) {
259                 getMetadata();
260                 initialized = true;
261                 localeMatcher = LocaleMatcher.make();
262             }
263         }
264         if (!localeMatcher.matches(cldrFileToCheck.getLocaleID())) {
265             possibleErrors.add(new CheckStatus()
266                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.invalidLocale)
267                 .setMessage("Invalid Locale {0}",
268                     new Object[] { cldrFileToCheck.getLocaleID() }));
269 
270         }
271         return this;
272     }
273 
getMetadata()274     private void getMetadata() {
275 
276         // sorting is expensive, but we need it here.
277 
278         Map<String, R2<String, String>> rawVariables = supplementalData.getValidityInfo();
279         for (Entry<String, R2<String, String>> item : rawVariables.entrySet()) {
280             String id = item.getKey();
281             String type = item.getValue().get0();
282             String value = item.getValue().get1();
283             MatcherPattern mp = getMatcherPattern2(type, value);
284             if (mp != null) {
285                 variables.put(id, mp);
286                 // variableReplacer.add(id, value);
287             }
288         }
289         //System.out.println("Variables: " + variables.keySet());
290 
291         Map<AttributeValidityInfo, String> rawAttributeValueInfo = supplementalData.getAttributeValidity();
292 
293         for (Entry<AttributeValidityInfo, String> entry : rawAttributeValueInfo.entrySet()) {
294             AttributeValidityInfo item = entry.getKey();
295             String value = entry.getValue();
296             MatcherPattern mp = getMatcherPattern2(item.getType(), value);
297             if (mp == null) {
298                 System.out.println("Failed to make matcher for: " + item);
299                 continue;
300             }
301             if (FIND_MISSING && mp.matcher == NOT_DONE_YET) {
302                 missingTests.put(item.getElements().toString(), item.getAttributes().toString());
303             }
304 
305             Set<DtdType> dtds = item.getDtds();
306             // TODO handle other DTDs
307             if (!dtds.contains(DtdType.ldml)) {
308                 continue;
309             }
310             Set<String> attributeList = item.getAttributes();
311             Set<String> elementList = item.getElements();
312             if (elementList.size() == 0) {
313                 addAttributes(attributeList, common_attribute_validity, mp);
314             } else {
315                 for (String element : elementList) {
316                     // check if unnecessary
317                     Element elementInfo = ldmlDtdData.getElementFromName().get(element);
318                     if (elementInfo == null) {
319                         System.out.println("Illegal <attributeValues>, element not valid: element: " + element);
320                     } else {
321                         for (String attribute : attributeList) {
322                             Attribute attributeInfo = elementInfo.getAttributeNamed(attribute);
323                             if (attributeInfo == null) {
324                                 System.out.println("Illegal <attributeValues>, attribute not valid: element: " + element + ", attribute: " + attribute);
325                             } else if (!attributeInfo.values.isEmpty()) {
326                                 if (SHOW_UNNECESSARY) {
327                                     System.out.println("Unnecessary <attributeValues …>, the DTD has specific list: element: " + element + ", attribute: "
328                                         + attribute + ", " + attributeInfo.values);
329                                 }
330                             }
331                         }
332                     }
333                     // System.out.println("\t" + element);
334                     Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element);
335                     if (attribute_validity == null) {
336                         element_attribute_validity.put(element, attribute_validity = new TreeMap<String, MatcherPattern>());
337                     }
338                     addAttributes(attributeList, attribute_validity, mp);
339                 }
340             }
341         }
342     }
343 
344     final static Map<String, Set<String>> BCP47_KEY_VALUES;
345     static {
346         Map<String, Set<String>> temp = new HashMap<>();
347         Relation<R2<String, String>, String> bcp47Aliases = supplementalData.getBcp47Aliases();
348         for (Entry<String, Set<String>> keyValues : supplementalData.getBcp47Keys().keyValuesSet()) {
349             Set<String> fullValues = new TreeSet<>();
350             String key = keyValues.getKey();
351             Set<String> rawValues = keyValues.getValue();
352             for (String value : rawValues) {
353                 if (key.equals("cu")) { // Currency codes are in upper case.
value.toUpperCase()354                     fullValues.add(value.toUpperCase());
355                 } else {
356                     fullValues.add(value);
357                 }
358                 R2<String, String> keyValue = R2.of(key, value);
359                 Set<String> aliases = bcp47Aliases.getAll(keyValue);
360                 if (aliases != null) {
361                     fullValues.addAll(aliases);
362                 }
363             }
364             // Special case exception for generic calendar, since we don't want to expose it in bcp47
365             if (key.equals("ca")) {
366                 fullValues.add("generic");
367             }
368             fullValues = Collections.unmodifiableSet(fullValues);
temp.put(key, fullValues)369             temp.put(key, fullValues);
370             // add aliased keys
371             Set<String> aliases = supplementalData.getBcp47Aliases().getAll(Row.of(key, ""));
372             if (aliases != null) {
373                 for (String aliasKey : aliases) {
temp.put(aliasKey, fullValues)374                     temp.put(aliasKey, fullValues);
375                 }
376             }
377             temp.put("x", Collections.EMPTY_SET); // Hack for 'x', private use.
378         }
379         BCP47_KEY_VALUES = Collections.unmodifiableMap(temp);
380     }
381 
getBcp47MatcherPattern(String key)382     private MatcherPattern getBcp47MatcherPattern(String key) {
383         // <key type="calendar">Calendar</key>
384         // <type key="calendar" type="chinese">Chinese Calendar</type>
385 
386         //<attributeValues elements="key" attributes="type" type="bcp47">key</attributeValues>
387         //<attributeValues elements="type" attributes="key" type="bcp47">key</attributeValues>
388         //<attributeValues elements="type" attributes="type" type="bcp47">use-key</attributeValues>
389 
390         MatcherPattern m = new MatcherPattern();
391         Set<String> values;
392         if (key.equals("key")) {
393             values = BCP47_KEY_VALUES.keySet();
394         } else {
395             values = BCP47_KEY_VALUES.get(key);
396         }
397         m.value = key;
398         m.pattern = values.toString();
399         m.matcher = new CollectionMatcher().set(values);
400         return m;
401     }
402 
getMatcherPattern2(String type, String value)403     private MatcherPattern getMatcherPattern2(String type, String value) {
404         String typeAttribute = type;
405         MatcherPattern result = variables.get(value);
406         if (result != null) {
407             MatcherPattern temp = new MatcherPattern();
408             temp.pattern = result.pattern;
409             temp.matcher = result.matcher;
410             temp.value = value;
411             result = temp;
412             if ("list".equals(typeAttribute)) {
413                 temp.matcher = new ListMatcher().set(result.matcher);
414             }
415             return result;
416         }
417 
418         result = new MatcherPattern();
419         result.pattern = value;
420         result.value = value;
421         if ("choice".equals(typeAttribute)) {
422             result.matcher = new CollectionMatcher()
423                 .set(new HashSet<String>(Arrays.asList(value.trim().split("\\s+"))));
424         } else if ("bcp47".equals(typeAttribute)) {
425             result = getBcp47MatcherPattern(value);
426         } else if ("regex".equals(typeAttribute)) {
427             result.matcher = new RegexMatcher().set(value, Pattern.COMMENTS); // Pattern.COMMENTS to get whitespace
428         } else if ("locale".equals(typeAttribute)) {
429             result.matcher = LocaleMatcher.make();
430         } else if ("notDoneYet".equals(typeAttribute) || "notDoneYet".equals(value)) {
431             result.matcher = NOT_DONE_YET;
432         } else {
433             System.out.println("unknown type; value: <" + value + ">,\t" + typeAttribute);
434             return null;
435         }
436         return result;
437     }
438 
addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp)439     private void addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp) {
440         for (String attribute : attributes) {
441             MatcherPattern old = attribute_validity.get(attribute);
442             if (old != null) {
443                 mp.matcher = new OrMatcher().set(old.matcher, mp.matcher);
444                 mp.pattern = old.pattern + " OR " + mp.pattern;
445             }
446             attribute_validity.put(attribute, mp);
447         }
448     }
449 
450     private static class MatcherPattern {
451         public String value;
452         ObjectMatcher<String> matcher;
453         String pattern;
454 
toString()455         public String toString() {
456             return matcher.getClass().getName() + "\t" + pattern;
457         }
458     }
459 
460     public static class RegexMatcher implements ObjectMatcher<String> {
461         private java.util.regex.Matcher matcher;
462 
set(String pattern)463         public ObjectMatcher<String> set(String pattern) {
464             matcher = PatternCache.get(pattern).matcher("");
465             return this;
466         }
467 
set(String pattern, int flags)468         public ObjectMatcher<String> set(String pattern, int flags) {
469             matcher = Pattern.compile(pattern, flags).matcher("");
470             return this;
471         }
472 
matches(String value)473         public boolean matches(String value) {
474             matcher.reset(value.toString());
475             return matcher.matches();
476         }
477     }
478 
479     public static class CollectionMatcher implements ObjectMatcher<String> {
480         private Collection<String> collection;
481 
set(Collection<String> collection)482         public ObjectMatcher<String> set(Collection<String> collection) {
483             this.collection = collection;
484             return this;
485         }
486 
matches(String value)487         public boolean matches(String value) {
488             return collection.contains(value);
489         }
490     }
491 
492     public static class OrMatcher implements ObjectMatcher<String> {
493         private ObjectMatcher<String> a;
494         private ObjectMatcher<String> b;
495 
set(ObjectMatcher<String> a, ObjectMatcher<String> b)496         public ObjectMatcher<String> set(ObjectMatcher<String> a, ObjectMatcher<String> b) {
497             this.a = a;
498             this.b = b;
499             return this;
500         }
501 
matches(String value)502         public boolean matches(String value) {
503             return a.matches(value) || b.matches(value);
504         }
505     }
506 
507     public static class ListMatcher implements ObjectMatcher<String> {
508         private ObjectMatcher<String> other;
509 
set(ObjectMatcher<String> other)510         public ObjectMatcher<String> set(ObjectMatcher<String> other) {
511             this.other = other;
512             return this;
513         }
514 
matches(String value)515         public boolean matches(String value) {
516             String[] values = value.trim().split("\\s+");
517             if (values.length == 1 && values[0].length() == 0) return true;
518             for (int i = 0; i < values.length; ++i) {
519                 if (!other.matches(values[i])) {
520                     return false;
521                 }
522             }
523             return true;
524         }
525     }
526 
527     public static class LocaleMatcher implements ObjectMatcher<String> {
528         ObjectMatcher<String> grandfathered = variables.get("$grandfathered").matcher;
529         ObjectMatcher<String> language = variables.get("$language").matcher;
530         ObjectMatcher<String> script = variables.get("$script").matcher;
531         ObjectMatcher<String> territory = variables.get("$territory").matcher;
532         ObjectMatcher<String> variant = variables.get("$variant").matcher;
533         LocaleIDParser lip = new LocaleIDParser();
534         static LocaleMatcher singleton = null;
535         static Object sync = new Object();
536 
LocaleMatcher(boolean b)537         private LocaleMatcher(boolean b) {
538         }
539 
make()540         public static LocaleMatcher make() {
541             synchronized (sync) {
542                 if (singleton == null) {
543                     singleton = new LocaleMatcher(true);
544                 }
545             }
546             return singleton;
547         }
548 
matches(String value)549         public boolean matches(String value) {
550             if (grandfathered.matches(value)) return true;
551             lip.set((String) value);
552             String field = lip.getLanguage();
553             if (!language.matches(field)) return false;
554             field = lip.getScript();
555             if (field.length() != 0 && !script.matches(field)) return false;
556             field = lip.getRegion();
557             if (field.length() != 0 && !territory.matches(field)) return false;
558             String[] fields = lip.getVariants();
559             for (int i = 0; i < fields.length; ++i) {
560                 if (!variant.matches(fields[i])) return false;
561             }
562             return true;
563         }
564     }
565 
566 }