• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.test;
2 
3 import java.util.Arrays;
4 import java.util.Collection;
5 import java.util.Collections;
6 import java.util.EnumMap;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.LinkedHashSet;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
16 import java.util.function.Predicate;
17 import java.util.regex.Pattern;
18 
19 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
20 import org.unicode.cldr.util.CLDRConfig;
21 import org.unicode.cldr.util.CLDRFile;
22 import org.unicode.cldr.util.CldrUtility;
23 import org.unicode.cldr.util.DtdData;
24 import org.unicode.cldr.util.DtdData.Attribute;
25 import org.unicode.cldr.util.DtdData.Element;
26 import org.unicode.cldr.util.DtdType;
27 import org.unicode.cldr.util.Factory;
28 import org.unicode.cldr.util.LocaleIDParser;
29 import org.unicode.cldr.util.PatternCache;
30 import org.unicode.cldr.util.SupplementalDataInfo;
31 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo;
32 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
33 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
34 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
35 import org.unicode.cldr.util.XPathParts;
36 
37 import com.ibm.icu.impl.Relation;
38 import com.ibm.icu.impl.Row;
39 import com.ibm.icu.impl.Row.R2;
40 import com.ibm.icu.text.UnicodeSet;
41 
42 public class CheckAttributeValues extends FactoryCheckCLDR {
43 
44     private static final Predicate<String> NOT_DONE_YET = new RegexMatcher().set(".*", Pattern.COMMENTS);
45     private static final boolean FIND_MISSING = CldrUtility.getProperty("FIND_MISSING_ATTRIBUTE_TESTS", false); // turn on to show <attributeValues> that are missing.
46     private static final boolean SHOW_UNNECESSARY = false; // turn on to show <attributeValues> we should delete.
47 
48     static LinkedHashSet<String> elementOrder = new LinkedHashSet<>();
49     static LinkedHashSet<String> attributeOrder = new LinkedHashSet<>();
50     static LinkedHashSet<String> serialElements = new LinkedHashSet<>();
51     static Map<String, Map<String, MatcherPattern>> element_attribute_validity = new HashMap<>();
52     static Map<String, MatcherPattern> common_attribute_validity = new HashMap<>();
53     static Map<String, MatcherPattern> variables = new HashMap<>();
54     // static VariableReplacer variableReplacer = new VariableReplacer(); // note: this can be coalesced with the above
55     // -- to do later.
56     static boolean initialized = false;
57     static LocaleMatcher localeMatcher;
58     static Map<String, Map<String, String>> code_type_replacement = new TreeMap<>();
59     static final SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo();
60     static DtdData ldmlDtdData = DtdData.getInstance(DtdType.ldml);
61 
62     boolean isEnglish;
63     PluralInfo pluralInfo;
64     Relation<String, String> missingTests = Relation.of(new TreeMap(), TreeSet.class);
65 
66     static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
67 
CheckAttributeValues(Factory factory)68     public CheckAttributeValues(Factory factory) {
69         super(factory);
70     }
71 
72     @Override
handleFinish()73     public void handleFinish() {
74         for (Entry<String, Set<String>> entry : missingTests.keyValuesSet()) {
75             System.out.println("Missing element: " + entry.getKey() + ", attributes: " + entry.getValue());
76         }
77     }
78 
79     @Override
handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)80     public CheckCLDR handleCheck(String path, String fullPath, String value, Options options,
81         List<CheckStatus> result) {
82         if (fullPath == null) return this; // skip paths that we don't have
83         if (fullPath.indexOf('[') < 0) return this; // skip paths with no attributes
84         String locale = getCldrFileToCheck().getSourceLocaleID(path, null);
85 
86         // skip paths that are not in the immediate locale
87         if (!getCldrFileToCheck().getLocaleID().equals(locale)) {
88             return this;
89         }
90         XPathParts parts = XPathParts.getFrozenInstance(fullPath);
91         for (int i = 0; i < parts.size(); ++i) {
92             if (parts.getAttributeCount(i) == 0) {
93                 continue;
94             }
95             Map<String, String> attributes = parts.getAttributes(i);
96             String element = parts.getElement(i);
97             Element elementInfo = ldmlDtdData.getElementFromName().get(element);
98 
99             Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element);
100             for (String attribute : attributes.keySet()) {
101                 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute);
102                 if (!attributeInfo.values.isEmpty()) {
103                     // we don't need to check, since the DTD will enforce values
104                     continue;
105                 }
106                 String attributeValue = attributes.get(attribute);
107 
108                 // special hack for         // <type key="calendar" type="chinese">Chinese Calendar</type>
109                 if (element.equals("type") && attribute.equals("type")) {
110                     Set<String> typeValues = BCP47_KEY_VALUES.get(attributes.get("key"));
111                     if (!typeValues.contains(attributeValue)) {
112                         result.add(new CheckStatus()
113                             .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue)
114                             .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}",
115                                 new Object[] { attribute, attributeValue, typeValues }));
116                     }
117                     continue;
118                 }
119                 // check the common attributes first
120                 boolean haveTest = check(common_attribute_validity, attribute, attributeValue, result);
121                 // then for the specific element
122                 haveTest = haveTest || check(attribute_validity, attribute, attributeValue, result);
123                 if (!haveTest && FIND_MISSING) {
124                     missingTests.put(element, attribute);
125                 }
126 
127                 // now for plurals
128 
129                 if (attribute.equals("count")) {
130                     if (DIGITS.containsAll(attributeValue)) {
131                         // ok, keep going
132                     } else {
133                         final Count countValue = PluralInfo.Count.valueOf(attributeValue);
134                         if (!pluralInfo.getCounts().contains(countValue)
135                             && !isPluralException(countValue, locale)) {
136                             result.add(new CheckStatus()
137                                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.illegalPlural)
138                                 .setMessage("Illegal plural value {0}; must be one of: {1}",
139                                     new Object[] { countValue, pluralInfo.getCounts() }));
140                         }
141                     }
142                 }
143 
144                 // TODO check other variable elements, like dayPeriods
145             }
146         }
147         return this;
148     }
149 
150     static final Relation<PluralInfo.Count, String> PLURAL_EXCEPTIONS = Relation.of(
151         new EnumMap<PluralInfo.Count, Set<String>>(PluralInfo.Count.class), HashSet.class);
152 
153     static {
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr")154         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr")155         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh")156         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs")157         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru")158         PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru");
159     }
160 
isPluralException(Count countValue, String locale)161     static boolean isPluralException(Count countValue, String locale) {
162         Set<String> exceptions = PLURAL_EXCEPTIONS.get(countValue);
163         if (exceptions == null) {
164             return false;
165         }
166         if (exceptions.contains(locale)) {
167             return true;
168         }
169         int bar = locale.indexOf('_'); // catch bs_Cyrl, etc.
170         if (bar > 0) {
171             String base = locale.substring(0, bar);
172             if (exceptions.contains(base)) {
173                 return true;
174             }
175         }
176         return false;
177     }
178 
179     /**
180      * return true if we performed a test
181      * @param attribute_validity
182      * @param attribute
183      * @param attributeValue
184      * @param result
185      * @return
186      */
check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue, List<CheckStatus> result)187     private boolean check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue,
188         List<CheckStatus> result) {
189         if (attribute_validity == null) {
190             return false; // no test
191         }
192         MatcherPattern matcherPattern = attribute_validity.get(attribute);
193         if (matcherPattern == null) {
194             return false; // no test
195         }
196         if (matcherPattern.matcher.test(attributeValue)) {
197             return true;
198         }
199         // special check for deprecated codes
200         String replacement = getReplacement(matcherPattern.value, attributeValue);
201         if (replacement != null) {
202             if (isEnglish) {
203                 return true; // don't flag English
204             }
205             if (replacement.length() == 0) {
206                 result.add(new CheckStatus()
207                     .setCause(this).setMainType(CheckStatus.warningType).setSubtype(Subtype.deprecatedAttribute)
208                     .setMessage("Deprecated Attribute Value {0}={1}. Consider removing.",
209                         new Object[] { attribute, attributeValue }));
210             } else {
211                 result
212                     .add(new CheckStatus()
213                         .setCause(this)
214                         .setMainType(CheckStatus.warningType)
215                         .setSubtype(Subtype.deprecatedAttributeWithReplacement)
216                         .setMessage(
217                             "Deprecated Attribute Value {0}={1}. Consider removing, and possibly modifying the related value for {2}.",
218                             new Object[] { attribute, attributeValue, replacement }));
219             }
220         } else {
221             result.add(new CheckStatus()
222                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue)
223                 .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}",
224                     new Object[] { attribute, attributeValue, matcherPattern.pattern }));
225         }
226         return true;
227     }
228 
229     /**
230      * Returns replacement, or null if there is none. "" if the code is deprecated, but without a replacement.
231      * Input is of the form $language
232      *
233      * @return
234      */
getReplacement(String value, String attributeValue)235     String getReplacement(String value, String attributeValue) {
236         Map<String, String> type_replacement = code_type_replacement.get(value);
237         if (type_replacement == null) {
238             return null;
239         }
240         return type_replacement.get(attributeValue);
241     }
242 
243     LocaleIDParser localeIDParser = new LocaleIDParser();
244 
245     @Override
setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)246     public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options,
247         List<CheckStatus> possibleErrors) {
248         if (cldrFileToCheck == null) return this;
249         if (Phase.FINAL_TESTING == getPhase() || Phase.BUILD == getPhase()) {
250             setSkipTest(false); // ok
251         } else {
252             setSkipTest(true);
253             return this;
254         }
255 
256         pluralInfo = supplementalData.getPlurals(PluralType.cardinal, cldrFileToCheck.getLocaleID());
257         super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
258         isEnglish = "en".equals(localeIDParser.set(cldrFileToCheck.getLocaleID()).getLanguage());
259         synchronized (elementOrder) {
260             if (!initialized) {
261                 getMetadata();
262                 initialized = true;
263                 localeMatcher = LocaleMatcher.make();
264             }
265         }
266         if (!localeMatcher.test(cldrFileToCheck.getLocaleID())) {
267             possibleErrors.add(new CheckStatus()
268                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.invalidLocale)
269                 .setMessage("Invalid Locale {0}",
270                     new Object[] { cldrFileToCheck.getLocaleID() }));
271 
272         }
273         return this;
274     }
275 
getMetadata()276     private void getMetadata() {
277 
278         // sorting is expensive, but we need it here.
279 
280         Map<String, R2<String, String>> rawVariables = supplementalData.getValidityInfo();
281         for (Entry<String, R2<String, String>> item : rawVariables.entrySet()) {
282             String id = item.getKey();
283             String type = item.getValue().get0();
284             String value = item.getValue().get1();
285             MatcherPattern mp = getMatcherPattern2(type, value);
286             if (mp != null) {
287                 variables.put(id, mp);
288                 // variableReplacer.add(id, value);
289             }
290         }
291         //System.out.println("Variables: " + variables.keySet());
292 
293         Map<AttributeValidityInfo, String> rawAttributeValueInfo = supplementalData.getAttributeValidity();
294 
295         for (Entry<AttributeValidityInfo, String> entry : rawAttributeValueInfo.entrySet()) {
296             AttributeValidityInfo item = entry.getKey();
297             String value = entry.getValue();
298             MatcherPattern mp = getMatcherPattern2(item.getType(), value);
299             if (mp == null) {
300                 System.out.println("Failed to make matcher for: " + item);
301                 continue;
302             }
303             if (FIND_MISSING && mp.matcher == NOT_DONE_YET) {
304                 missingTests.put(item.getElements().toString(), item.getAttributes().toString());
305             }
306 
307             Set<DtdType> dtds = item.getDtds();
308             // TODO handle other DTDs
309             if (!dtds.contains(DtdType.ldml)) {
310                 continue;
311             }
312             Set<String> attributeList = item.getAttributes();
313             Set<String> elementList = item.getElements();
314             if (elementList.size() == 0) {
315                 addAttributes(attributeList, common_attribute_validity, mp);
316             } else {
317                 for (String element : elementList) {
318                     // check if unnecessary
319                     Element elementInfo = ldmlDtdData.getElementFromName().get(element);
320                     if (elementInfo == null) {
321                         System.out.println("Illegal <attributeValues>, element not valid: element: " + element);
322                     } else {
323                         for (String attribute : attributeList) {
324                             Attribute attributeInfo = elementInfo.getAttributeNamed(attribute);
325                             if (attributeInfo == null) {
326                                 System.out.println("Illegal <attributeValues>, attribute not valid: element: " + element + ", attribute: " + attribute);
327                             } else if (!attributeInfo.values.isEmpty()) {
328                                 if (SHOW_UNNECESSARY) {
329                                     System.out.println("Unnecessary <attributeValues …>, the DTD has specific list: element: " + element + ", attribute: "
330                                         + attribute + ", " + attributeInfo.values);
331                                 }
332                             }
333                         }
334                     }
335                     // System.out.println("\t" + element);
336                     Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element);
337                     if (attribute_validity == null) {
338                         element_attribute_validity.put(element, attribute_validity = new TreeMap<>());
339                     }
340                     addAttributes(attributeList, attribute_validity, mp);
341                 }
342             }
343         }
344     }
345 
346     final static Map<String, Set<String>> BCP47_KEY_VALUES;
347     static {
348         Map<String, Set<String>> temp = new HashMap<>();
349         Relation<R2<String, String>, String> bcp47Aliases = supplementalData.getBcp47Aliases();
350         for (Entry<String, Set<String>> keyValues : supplementalData.getBcp47Keys().keyValuesSet()) {
351             Set<String> fullValues = new TreeSet<>();
352             String key = keyValues.getKey();
353             Set<String> rawValues = keyValues.getValue();
354             for (String value : rawValues) {
355                 if (key.equals("cu")) { // Currency codes are in upper case.
value.toUpperCase()356                     fullValues.add(value.toUpperCase());
357                 } else {
358                     fullValues.add(value);
359                 }
360                 R2<String, String> keyValue = R2.of(key, value);
361                 Set<String> aliases = bcp47Aliases.getAll(keyValue);
362                 if (aliases != null) {
363                     fullValues.addAll(aliases);
364                 }
365             }
366             // Special case exception for generic calendar, since we don't want to expose it in bcp47
367             if (key.equals("ca")) {
368                 fullValues.add("generic");
369             }
370             fullValues = Collections.unmodifiableSet(fullValues);
temp.put(key, fullValues)371             temp.put(key, fullValues);
372             // add aliased keys
373             Set<String> aliases = supplementalData.getBcp47Aliases().getAll(Row.of(key, ""));
374             if (aliases != null) {
375                 for (String aliasKey : aliases) {
temp.put(aliasKey, fullValues)376                     temp.put(aliasKey, fullValues);
377                 }
378             }
379             temp.put("x", Collections.EMPTY_SET); // Hack for 'x', private use.
380         }
381         BCP47_KEY_VALUES = Collections.unmodifiableMap(temp);
382     }
383 
getBcp47MatcherPattern(String key)384     private MatcherPattern getBcp47MatcherPattern(String key) {
385         // <key type="calendar">Calendar</key>
386         // <type key="calendar" type="chinese">Chinese Calendar</type>
387 
388         //<attributeValues elements="key" attributes="type" type="bcp47">key</attributeValues>
389         //<attributeValues elements="type" attributes="key" type="bcp47">key</attributeValues>
390         //<attributeValues elements="type" attributes="type" type="bcp47">use-key</attributeValues>
391 
392         MatcherPattern m = new MatcherPattern();
393         Set<String> values;
394         if (key.equals("key")) {
395             values = BCP47_KEY_VALUES.keySet();
396         } else {
397             values = BCP47_KEY_VALUES.get(key);
398         }
399         m.value = key;
400         m.pattern = values.toString();
401         m.matcher = new CollectionMatcher().set(values);
402         return m;
403     }
404 
getMatcherPattern2(String type, String value)405     private MatcherPattern getMatcherPattern2(String type, String value) {
406         String typeAttribute = type;
407         MatcherPattern result = variables.get(value);
408         if (result != null) {
409             MatcherPattern temp = new MatcherPattern();
410             temp.pattern = result.pattern;
411             temp.matcher = result.matcher;
412             temp.value = value;
413             result = temp;
414             if ("list".equals(typeAttribute)) {
415                 temp.matcher = new ListMatcher().set(result.matcher);
416             }
417             return result;
418         }
419 
420         result = new MatcherPattern();
421         result.pattern = value;
422         result.value = value;
423         if ("choice".equals(typeAttribute)) {
424             result.matcher = new CollectionMatcher()
425                 .set(new HashSet<>(Arrays.asList(value.trim().split("\\s+"))));
426         } else if ("bcp47".equals(typeAttribute)) {
427             result = getBcp47MatcherPattern(value);
428         } else if ("regex".equals(typeAttribute)) {
429             result.matcher = new RegexMatcher().set(value, Pattern.COMMENTS); // Pattern.COMMENTS to get whitespace
430         } else if ("locale".equals(typeAttribute)) {
431             result.matcher = LocaleMatcher.make();
432         } else if ("notDoneYet".equals(typeAttribute) || "notDoneYet".equals(value)) {
433             result.matcher = NOT_DONE_YET;
434         } else {
435             System.out.println("unknown type; value: <" + value + ">,\t" + typeAttribute);
436             return null;
437         }
438         return result;
439     }
440 
addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp)441     private void addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp) {
442         for (String attribute : attributes) {
443             MatcherPattern old = attribute_validity.get(attribute);
444             if (old != null) {
445                 mp.matcher = new OrMatcher().set(old.matcher, mp.matcher);
446                 mp.pattern = old.pattern + " OR " + mp.pattern;
447             }
448             attribute_validity.put(attribute, mp);
449         }
450     }
451 
452     private static class MatcherPattern {
453         public String value;
454         Predicate<String> matcher;
455         String pattern;
456 
457         @Override
toString()458         public String toString() {
459             return matcher.getClass().getName() + "\t" + pattern;
460         }
461     }
462 
463     public static class RegexMatcher implements Predicate<String> {
464         private java.util.regex.Matcher matcher;
465 
set(String pattern)466         public Predicate<String> set(String pattern) {
467             matcher = PatternCache.get(pattern).matcher("");
468             return this;
469         }
470 
set(String pattern, int flags)471         public Predicate<String> set(String pattern, int flags) {
472             matcher = Pattern.compile(pattern, flags).matcher("");
473             return this;
474         }
475 
476         @Override
test(String value)477         public boolean test(String value) {
478             matcher.reset(value.toString());
479             return matcher.matches();
480         }
481     }
482 
483     public static class CollectionMatcher implements Predicate<String> {
484         private Collection<String> collection;
485 
set(Collection<String> collection)486         public Predicate<String> set(Collection<String> collection) {
487             this.collection = collection;
488             return this;
489         }
490 
491         @Override
test(String value)492         public boolean test(String value) {
493             return collection.contains(value);
494         }
495     }
496 
497     public static class OrMatcher implements Predicate<String> {
498         private Predicate<String> a;
499         private Predicate<String> b;
500 
set(Predicate<String> a, Predicate<String> b)501         public Predicate<String> set(Predicate<String> a, Predicate<String> b) {
502             this.a = a;
503             this.b = b;
504             return this;
505         }
506 
507         @Override
test(String value)508         public boolean test(String value) {
509             return a.test(value) || b.test(value);
510         }
511     }
512 
513     public static class ListMatcher implements Predicate<String> {
514         private Predicate<String> other;
515 
set(Predicate<String> other)516         public Predicate<String> set(Predicate<String> other) {
517             this.other = other;
518             return this;
519         }
520 
521         @Override
test(String value)522         public boolean test(String value) {
523             String[] values = value.trim().split("\\s+");
524             if (values.length == 1 && values[0].length() == 0) return true;
525             for (int i = 0; i < values.length; ++i) {
526                 if (!other.test(values[i])) {
527                     return false;
528                 }
529             }
530             return true;
531         }
532     }
533 
534     public static class LocaleMatcher implements Predicate<String> {
535         Predicate<String> legacy = variables.get("$grandfathered").matcher;
536         Predicate<String> language = variables.get("$language").matcher;
537         Predicate<String> script = variables.get("$script").matcher;
538         Predicate<String> territory = variables.get("$territory").matcher;
539         Predicate<String> variant = variables.get("$variant").matcher;
540         LocaleIDParser lip = new LocaleIDParser();
541 
LocaleMatcher()542         private LocaleMatcher() {
543         }
544 
545         private static final class LocaleMatcherHelper {
546             static LocaleMatcher SINGLETON = new LocaleMatcher();
547         }
548 
make()549         public static LocaleMatcher make() {
550             return LocaleMatcherHelper.SINGLETON;
551         }
552 
553         @Override
test(String value)554         public boolean test(String value) {
555             if (legacy.test(value)) return true;
556             lip.set((String) value);
557             String field = lip.getLanguage();
558             if (!language.test(field)) return false;
559             field = lip.getScript();
560             if (field.length() != 0 && !script.test(field)) return false;
561             field = lip.getRegion();
562             if (field.length() != 0 && !territory.test(field)) return false;
563             String[] fields = lip.getVariants();
564             for (int i = 0; i < fields.length; ++i) {
565                 if (!variant.test(fields[i])) return false;
566             }
567             return true;
568         }
569     }
570 
571 }
572