• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.HashMultimap;
5 import com.google.common.collect.ImmutableMap;
6 import com.google.common.collect.ImmutableMultimap;
7 import com.google.common.collect.Multimap;
8 import com.ibm.icu.impl.Row;
9 import com.ibm.icu.impl.Row.R2;
10 import com.ibm.icu.lang.UCharacter;
11 import java.util.Collection;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Map.Entry;
15 import java.util.Set;
16 import java.util.function.Function;
17 import java.util.regex.Pattern;
18 import java.util.stream.Collectors;
19 import org.unicode.cldr.util.StandardCodes.LstrField;
20 import org.unicode.cldr.util.StandardCodes.LstrType;
21 
22 public class LocaleValidator {
23     static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance();
24 
25     /** For backwards compatibility, certain non-regular codes are allowed in LikelySubtags. */
26     public static final LocaleValidator.AllowedValid ALLOW_IN_LIKELY =
27             new LocaleValidator.AllowedValid(
28                     null,
29                     LstrType.region,
30                     new LocaleValidator.AllowedMatch("001|419"),
31                     LstrType.language,
32                     new LocaleValidator.AllowedMatch("und|in|iw|ji|jw|mo|tl"));
33 
34     static final Validity VALIDITY = Validity.getInstance();
35     static final Set<LstrType> FIELD_ALLOWS_EMPTY = Set.of(LstrType.script, LstrType.region);
36     // Map<LstrType, Map<String, Map<LstrField, String>>>
37     static final Map<String, Validity.Status> VALID_VARIANTS =
38             ImmutableMap.copyOf(
39                     StandardCodes.getEnumLstreg().get(LstrType.variant).entrySet().stream()
40                             .collect(
41                                     Collectors.toMap(
42                                             x -> x.getKey(),
43                                             y ->
44                                                     y.getValue().get(LstrField.Deprecated) == null
45                                                             ? Validity.Status.regular
46                                                             : Validity.Status.deprecated)));
47 
48     private static final Map<String, Validity.Status> KR_REORDER =
49             SupplementalDataInfo.getInstance().getBcp47Keys().get("kr").stream()
50                     .filter(x -> !x.equals("REORDER_CODE"))
51                     .collect(
52                             Collectors.toMap(
53                                     Function.identity(),
54                                     y -> {
55                                         String temp =
56                                                 SupplementalDataInfo.getInstance()
57                                                         .getBcp47Deprecated()
58                                                         .get(Row.of("kr", y));
59                                         return "false".equals(temp)
60                                                 ? Validity.Status.regular
61                                                 : Validity.Status.deprecated;
62                                     }));
63     private static final Map<String, Validity.Status> LOWERCASE_SCRIPT =
64             VALIDITY.getCodeToStatus(LstrType.script).entrySet().stream()
65                     .collect(
66                             Collectors.toMap(
67                                     x -> UCharacter.toLowerCase(x.getKey()), x -> x.getValue()));
68 
69     private static final Map<String, Validity.Status> LOWERCASE_REGION =
70             VALIDITY.getCodeToStatus(LstrType.script).entrySet().stream()
71                     .collect(
72                             Collectors.toMap(
73                                     x -> UCharacter.toLowerCase(x.getKey()), x -> x.getValue()));
74 
75     public static class AllowedMatch {
76         final Pattern key;
77         final Pattern value;
78         final Validity.Status status;
79 
AllowedMatch(String code)80         public AllowedMatch(String code) {
81             this(code, null, null);
82         }
83 
AllowedMatch(String code, String value)84         public AllowedMatch(String code, String value) {
85             this(code, value, null);
86         }
87 
AllowedMatch(String code, String value, Validity.Status status)88         public AllowedMatch(String code, String value, Validity.Status status) {
89             this.key = code == null ? null : Pattern.compile(code);
90             this.value = value == null ? null : Pattern.compile(value);
91             this.status = status;
92         }
93 
matches(String key0, String value0, Validity.Status status)94         public boolean matches(String key0, String value0, Validity.Status status) {
95             return (key == null || key.matcher(key0).matches())
96                     && (value == null
97                             || value.matcher(value0).matches()
98                                     && (status == null || status == status));
99         }
100 
101         @Override
toString()102         public String toString() {
103             return key + "→" + value;
104         }
105     }
106 
107     public static class AllowedValid {
108 
109         private final Set<Validity.Status> allowedStatus; // allowed without exception
110         private final Multimap<LstrType, AllowedMatch> allowedExceptions;
111 
isAllowed(Validity.Status status)112         public boolean isAllowed(Validity.Status status) {
113             return allowedStatus.contains(status);
114         }
115 
116         /** Only called if isAllowed is not true */
isAllowed( LstrType lstrType, String key, String value, Validity.Status status)117         public boolean isAllowed(
118                 LstrType lstrType, String key, String value, Validity.Status status) {
119             Collection<AllowedMatch> allowedMatches = allowedExceptions.get(lstrType);
120             if (allowedMatches == null) {
121                 return false;
122             }
123             for (AllowedMatch allowedMatch : allowedMatches) {
124                 if (allowedMatch.matches(key, value, status)) {
125                     return true;
126                 }
127             }
128             return false;
129         }
130 
AllowedValid(Set<Validity.Status> allowedStatus, Object... allowedExceptions)131         public AllowedValid(Set<Validity.Status> allowedStatus, Object... allowedExceptions) {
132             this.allowedStatus =
133                     allowedStatus == null
134                             ? Set.of(Validity.Status.regular)
135                             : Set.copyOf(allowedStatus);
136             Multimap<LstrType, AllowedMatch> allowed = HashMultimap.create();
137             if (allowedExceptions != null) {
138                 for (int i = 0; i < allowedExceptions.length; i += 2) {
139                     allowed.put(
140                             (LstrType) allowedExceptions[i],
141                             (AllowedMatch) allowedExceptions[i + 1]);
142                 }
143             }
144             this.allowedExceptions = ImmutableMultimap.copyOf(allowed);
145         }
146 
147         @Override
toString()148         public String toString() {
149             return allowedStatus + " " + allowedExceptions;
150         }
151     }
152 
153     /**
154      * @return true iff the component validates
155      */
isValid( LanguageTagParser ltp, LocaleValidator.AllowedValid allowed, Set<String> errors)156     public static boolean isValid(
157             LanguageTagParser ltp, LocaleValidator.AllowedValid allowed, Set<String> errors) {
158         if (errors != null) {
159             errors.clear();
160         }
161         if (allowed == null) {
162             allowed = new AllowedValid(null, null);
163         }
164         if (ltp.isLegacy() && allowed.isAllowed(Validity.Status.deprecated)) {
165             return true; // don't need further checking, since we already did so when parsing
166         }
167         if (Validation.abort
168                 == validates(LstrType.language, ltp.getLanguage(), null, allowed, errors)) {
169             return false;
170         }
171         if (Validation.abort
172                 == validates(LstrType.script, ltp.getScript(), null, allowed, errors)) {
173             return false;
174         }
175         if (Validation.abort
176                 == validates(LstrType.region, ltp.getRegion(), null, allowed, errors)) {
177             return false;
178         }
179         for (String variant : ltp.getVariants()) {
180             if (Validation.abort == validates(LstrType.variant, variant, null, allowed, errors)) {
181                 return false;
182             }
183         }
184         for (Entry<String, List<String>> entry : ltp.getLocaleExtensionsDetailed().entrySet()) {
185             if (Validation.abort
186                     == validates(
187                             LstrType.extension,
188                             entry.getKey(),
189                             entry.getValue(),
190                             allowed,
191                             errors)) {
192                 return false;
193             }
194         }
195         for (Entry<String, List<String>> entry : ltp.getExtensionsDetailed().entrySet()) {
196             if (Validation.abort
197                     == validates(
198                             LstrType.extension,
199                             entry.getKey(),
200                             entry.getValue(),
201                             allowed,
202                             errors)) {
203                 return false;
204             }
205         }
206         return errors.isEmpty(); // if we didn't abort, then we recorded errors in the set
207     }
208 
209     private enum Validation {
210         abort,
211         keepOn
212     }
213     /**
214      * Returns true if it doesn't validate and errors == null (allows for fast rejection)
215      *
216      * @param type
217      * @param values TODO
218      * @param subtag
219      * @return true if the subtag is empty, or it is an allowed status
220      */
validates( LstrType type, String field, List<String> values, LocaleValidator.AllowedValid allowed, Set<String> errors)221     private static LocaleValidator.Validation validates(
222             LstrType type,
223             String field,
224             List<String> values,
225             LocaleValidator.AllowedValid allowed,
226             Set<String> errors) {
227         Validity.Status status;
228         switch (type) {
229             case language:
230             case script:
231             case region:
232                 status = VALIDITY.getCodeToStatus(type).get(field);
233                 if (status == null) {
234                     status = Validity.Status.invalid;
235                 }
236                 if (allowed.isAllowed(status)
237                         || allowed.isAllowed(type, field, null, null)
238                         || field.length() == 0) {
239                     return Validation.keepOn;
240                 }
241                 break;
242             case variant:
243                 status = VALID_VARIANTS.get(field);
244                 if (status == null) {
245                     status = Validity.Status.invalid;
246                 }
247                 if (allowed.isAllowed(status)) {
248                     return Validation.keepOn;
249                 }
250                 break;
251             case extension:
252                 switch (field.length()) {
253                     case 1:
254                         switch (field) {
255                             case "t": // value is an LSRV
256                                 String lsvr = Joiner.on("-").join(values);
257                                 status = Validity.Status.invalid;
258                                 try {
259                                     LanguageTagParser ltp2 = new LanguageTagParser().set(lsvr);
260                                     if (isValid(ltp2, allowed, errors)) {
261                                         return Validation.keepOn;
262                                     }
263                                 } catch (Exception e) {
264                                     if (errors != null) {
265                                         errors.add(
266                                                 String.format(
267                                                         "Disallowed %s=%s, tlang=%s, status=%s",
268                                                         type, lsvr, field, status));
269                                         return Validation.keepOn;
270                                     }
271                                 }
272                                 return Validation.abort;
273                             case "x": // private use, everything is valid
274                                 status = Validity.Status.private_use;
275                                 break;
276                             case "u": // value is an attribute, none currently valid
277                                 status = Validity.Status.invalid;
278                                 break;
279                             default:
280                                 status = Validity.Status.invalid;
281                                 break;
282                         }
283                         break;
284                     case 2:
285                         // field is a tkey or a ukey, based on last char
286                         String tOrU = field.charAt(1) < 'A' ? "t" : "u";
287                         Set<String> subtypes = SDI.getBcp47Keys().get(field);
288                         if (subtypes == null) {
289                             status = Validity.Status.invalid;
290                         } else {
291                             String subtype = Joiner.on("-").join(values);
292                             final Map<R2<String, String>, String> bcp47Deprecated =
293                                     SDI.getBcp47Deprecated();
294                             if ("true".equals(bcp47Deprecated.get(Row.of(field, subtype)))) {
295                                 status = Validity.Status.deprecated;
296                             } else {
297                                 if (subtypes.contains(subtype)) {
298                                     status = Validity.Status.regular;
299                                 } else {
300                                     boolean mapUnknownToRegular = false;
301                                     fieldSwitch:
302                                     switch (field) {
303                                         case "x0":
304                                             status = Validity.Status.deprecated;
305                                             break;
306                                         case "dx":
307                                             status =
308                                                     checkSpecials(
309                                                             type,
310                                                             field,
311                                                             values,
312                                                             allowed,
313                                                             LOWERCASE_SCRIPT);
314                                             break;
315                                         case "kr":
316                                             status =
317                                                     checkSpecials(
318                                                             type,
319                                                             field,
320                                                             values,
321                                                             allowed,
322                                                             LOWERCASE_SCRIPT,
323                                                             KR_REORDER);
324                                             break;
325                                         case "rg":
326                                             mapUnknownToRegular = true;
327                                         case "sd":
328                                             status =
329                                                     checkSpecials(
330                                                             type,
331                                                             field,
332                                                             values,
333                                                             allowed,
334                                                             VALIDITY.getCodeToStatus(
335                                                                     LstrType.subdivision));
336                                             break;
337                                         case "vt":
338                                             status = Validity.Status.invalid;
339                                             if (values.isEmpty()) {
340                                                 break fieldSwitch;
341                                             }
342                                             for (String value : values) {
343                                                 try {
344                                                     int intValue = Integer.parseInt(value, 16);
345                                                     if (intValue < 0
346                                                             || intValue > 0x10FFFF
347                                                             || (Character.MIN_SURROGATE <= intValue
348                                                                     && intValue
349                                                                             <= Character
350                                                                                     .MAX_SURROGATE)) {
351                                                         break fieldSwitch;
352                                                     }
353                                                 } catch (NumberFormatException e) {
354                                                     break fieldSwitch;
355                                                 }
356                                             }
357                                             status = Validity.Status.regular;
358                                             break;
359                                         default:
360                                             status = Validity.Status.invalid;
361                                             break;
362                                     }
363                                     if (mapUnknownToRegular == true
364                                             && status == Validity.Status.unknown) {
365                                         status = Validity.Status.regular;
366                                     }
367                                 }
368                             }
369                             if (allowed.isAllowed(status)
370                                     || allowed.isAllowed(
371                                             LstrType.extension, field, subtype, status)) {
372                                 return Validation.keepOn;
373                             } else if (errors == null) {
374                                 return Validation.abort;
375                             }
376                             errors.add(
377                                     String.format(
378                                             "Disallowed %s=%s=%s, status=%s",
379                                             type, field, subtype, status));
380                             return Validation.keepOn;
381                         }
382                         break;
383                     default:
384                         status = Validity.Status.invalid;
385                         break;
386                 }
387                 break;
388             default:
389                 status = null;
390                 break;
391         }
392         if (errors == null) {
393             return Validation.abort;
394         }
395         errors.add(String.format("Disallowed %s=%s, status=%s", type, field, status));
396         return Validation.keepOn;
397     }
398 
399     public static Validity.Status checkSpecials(
400             LstrType type,
401             String field,
402             List<String> values,
403             LocaleValidator.AllowedValid allowed,
404             Map<String, Validity.Status>... validityMaps) {
405         if (values.size() > 1
406                 && (field.equals("sd") || field.equals("rg"))) { // TODO generalize this
407             return Validity.Status.invalid;
408         }
409         Validity.Status best = null;
410         for (String value : values) {
411             Validity.Status status = null;
412             for (Map<String, Validity.Status> validityMap : validityMaps) {
413                 status = validityMap.get(value);
414                 if (status != null) {
415                     break;
416                 }
417             }
418             if (status == null) {
419                 return Validity.Status.invalid;
420             }
421             if (allowed.isAllowed(status) || allowed.isAllowed(type, field, value, status)) {
422                 if (best == null) {
423                     best = status;
424                 }
425             } else {
426                 return status;
427             }
428         }
429         return best == null ? Validity.Status.invalid : best;
430     }
431 
432     public Validity.Status checkRegion(
433             LstrType type,
434             String field,
435             List<String> values,
436             LocaleValidator.AllowedValid allowed) {
437         Validity.Status best = null;
438         for (String value : values) {
439             String value2 = UCharacter.toTitleCase(value, null);
440             Validity.Status status = VALIDITY.getCodeToStatus(LstrType.script).get(value2);
441             if (status == null) {
442                 return Validity.Status.invalid;
443             }
444             if (allowed.isAllowed(status) || allowed.isAllowed(type, field, value, null)) {
445                 if (best == null) {
446                     best = status;
447                 }
448             } else {
449                 return status;
450             }
451         }
452         return best == null ? Validity.Status.invalid : best;
453     }
454 }
455