1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.HashMultimap; 5 import com.google.common.collect.ImmutableMap; 6 import com.google.common.collect.ImmutableMultimap; 7 import com.google.common.collect.Multimap; 8 import com.ibm.icu.impl.Row; 9 import com.ibm.icu.impl.Row.R2; 10 import com.ibm.icu.lang.UCharacter; 11 import java.util.Collection; 12 import java.util.List; 13 import java.util.Map; 14 import java.util.Map.Entry; 15 import java.util.Set; 16 import java.util.function.Function; 17 import java.util.regex.Pattern; 18 import java.util.stream.Collectors; 19 import org.unicode.cldr.util.StandardCodes.LstrField; 20 import org.unicode.cldr.util.StandardCodes.LstrType; 21 22 public class LocaleValidator { 23 static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance(); 24 25 /** For backwards compatibility, certain non-regular codes are allowed in LikelySubtags. */ 26 public static final LocaleValidator.AllowedValid ALLOW_IN_LIKELY = 27 new LocaleValidator.AllowedValid( 28 null, 29 LstrType.region, 30 new LocaleValidator.AllowedMatch("001|419"), 31 LstrType.language, 32 new LocaleValidator.AllowedMatch("und|in|iw|ji|jw|mo|tl")); 33 34 static final Validity VALIDITY = Validity.getInstance(); 35 static final Set<LstrType> FIELD_ALLOWS_EMPTY = Set.of(LstrType.script, LstrType.region); 36 // Map<LstrType, Map<String, Map<LstrField, String>>> 37 static final Map<String, Validity.Status> VALID_VARIANTS = 38 ImmutableMap.copyOf( 39 StandardCodes.getEnumLstreg().get(LstrType.variant).entrySet().stream() 40 .collect( 41 Collectors.toMap( 42 x -> x.getKey(), 43 y -> 44 y.getValue().get(LstrField.Deprecated) == null 45 ? Validity.Status.regular 46 : Validity.Status.deprecated))); 47 48 private static final Map<String, Validity.Status> KR_REORDER = 49 SupplementalDataInfo.getInstance().getBcp47Keys().get("kr").stream() 50 .filter(x -> !x.equals("REORDER_CODE")) 51 .collect( 52 Collectors.toMap( 53 Function.identity(), 54 y -> { 55 String temp = 56 SupplementalDataInfo.getInstance() 57 .getBcp47Deprecated() 58 .get(Row.of("kr", y)); 59 return "false".equals(temp) 60 ? Validity.Status.regular 61 : Validity.Status.deprecated; 62 })); 63 private static final Map<String, Validity.Status> LOWERCASE_SCRIPT = 64 VALIDITY.getCodeToStatus(LstrType.script).entrySet().stream() 65 .collect( 66 Collectors.toMap( 67 x -> UCharacter.toLowerCase(x.getKey()), x -> x.getValue())); 68 69 private static final Map<String, Validity.Status> LOWERCASE_REGION = 70 VALIDITY.getCodeToStatus(LstrType.script).entrySet().stream() 71 .collect( 72 Collectors.toMap( 73 x -> UCharacter.toLowerCase(x.getKey()), x -> x.getValue())); 74 75 public static class AllowedMatch { 76 final Pattern key; 77 final Pattern value; 78 final Validity.Status status; 79 AllowedMatch(String code)80 public AllowedMatch(String code) { 81 this(code, null, null); 82 } 83 AllowedMatch(String code, String value)84 public AllowedMatch(String code, String value) { 85 this(code, value, null); 86 } 87 AllowedMatch(String code, String value, Validity.Status status)88 public AllowedMatch(String code, String value, Validity.Status status) { 89 this.key = code == null ? null : Pattern.compile(code); 90 this.value = value == null ? null : Pattern.compile(value); 91 this.status = status; 92 } 93 matches(String key0, String value0, Validity.Status status)94 public boolean matches(String key0, String value0, Validity.Status status) { 95 return (key == null || key.matcher(key0).matches()) 96 && (value == null 97 || value.matcher(value0).matches() 98 && (status == null || status == status)); 99 } 100 101 @Override toString()102 public String toString() { 103 return key + "→" + value; 104 } 105 } 106 107 public static class AllowedValid { 108 109 private final Set<Validity.Status> allowedStatus; // allowed without exception 110 private final Multimap<LstrType, AllowedMatch> allowedExceptions; 111 isAllowed(Validity.Status status)112 public boolean isAllowed(Validity.Status status) { 113 return allowedStatus.contains(status); 114 } 115 116 /** Only called if isAllowed is not true */ isAllowed( LstrType lstrType, String key, String value, Validity.Status status)117 public boolean isAllowed( 118 LstrType lstrType, String key, String value, Validity.Status status) { 119 Collection<AllowedMatch> allowedMatches = allowedExceptions.get(lstrType); 120 if (allowedMatches == null) { 121 return false; 122 } 123 for (AllowedMatch allowedMatch : allowedMatches) { 124 if (allowedMatch.matches(key, value, status)) { 125 return true; 126 } 127 } 128 return false; 129 } 130 AllowedValid(Set<Validity.Status> allowedStatus, Object... allowedExceptions)131 public AllowedValid(Set<Validity.Status> allowedStatus, Object... allowedExceptions) { 132 this.allowedStatus = 133 allowedStatus == null 134 ? Set.of(Validity.Status.regular) 135 : Set.copyOf(allowedStatus); 136 Multimap<LstrType, AllowedMatch> allowed = HashMultimap.create(); 137 if (allowedExceptions != null) { 138 for (int i = 0; i < allowedExceptions.length; i += 2) { 139 allowed.put( 140 (LstrType) allowedExceptions[i], 141 (AllowedMatch) allowedExceptions[i + 1]); 142 } 143 } 144 this.allowedExceptions = ImmutableMultimap.copyOf(allowed); 145 } 146 147 @Override toString()148 public String toString() { 149 return allowedStatus + " " + allowedExceptions; 150 } 151 } 152 153 /** 154 * @return true iff the component validates 155 */ isValid( LanguageTagParser ltp, LocaleValidator.AllowedValid allowed, Set<String> errors)156 public static boolean isValid( 157 LanguageTagParser ltp, LocaleValidator.AllowedValid allowed, Set<String> errors) { 158 if (errors != null) { 159 errors.clear(); 160 } 161 if (allowed == null) { 162 allowed = new AllowedValid(null, null); 163 } 164 if (ltp.isLegacy() && allowed.isAllowed(Validity.Status.deprecated)) { 165 return true; // don't need further checking, since we already did so when parsing 166 } 167 if (Validation.abort 168 == validates(LstrType.language, ltp.getLanguage(), null, allowed, errors)) { 169 return false; 170 } 171 if (Validation.abort 172 == validates(LstrType.script, ltp.getScript(), null, allowed, errors)) { 173 return false; 174 } 175 if (Validation.abort 176 == validates(LstrType.region, ltp.getRegion(), null, allowed, errors)) { 177 return false; 178 } 179 for (String variant : ltp.getVariants()) { 180 if (Validation.abort == validates(LstrType.variant, variant, null, allowed, errors)) { 181 return false; 182 } 183 } 184 for (Entry<String, List<String>> entry : ltp.getLocaleExtensionsDetailed().entrySet()) { 185 if (Validation.abort 186 == validates( 187 LstrType.extension, 188 entry.getKey(), 189 entry.getValue(), 190 allowed, 191 errors)) { 192 return false; 193 } 194 } 195 for (Entry<String, List<String>> entry : ltp.getExtensionsDetailed().entrySet()) { 196 if (Validation.abort 197 == validates( 198 LstrType.extension, 199 entry.getKey(), 200 entry.getValue(), 201 allowed, 202 errors)) { 203 return false; 204 } 205 } 206 return errors.isEmpty(); // if we didn't abort, then we recorded errors in the set 207 } 208 209 private enum Validation { 210 abort, 211 keepOn 212 } 213 /** 214 * Returns true if it doesn't validate and errors == null (allows for fast rejection) 215 * 216 * @param type 217 * @param values TODO 218 * @param subtag 219 * @return true if the subtag is empty, or it is an allowed status 220 */ validates( LstrType type, String field, List<String> values, LocaleValidator.AllowedValid allowed, Set<String> errors)221 private static LocaleValidator.Validation validates( 222 LstrType type, 223 String field, 224 List<String> values, 225 LocaleValidator.AllowedValid allowed, 226 Set<String> errors) { 227 Validity.Status status; 228 switch (type) { 229 case language: 230 case script: 231 case region: 232 status = VALIDITY.getCodeToStatus(type).get(field); 233 if (status == null) { 234 status = Validity.Status.invalid; 235 } 236 if (allowed.isAllowed(status) 237 || allowed.isAllowed(type, field, null, null) 238 || field.length() == 0) { 239 return Validation.keepOn; 240 } 241 break; 242 case variant: 243 status = VALID_VARIANTS.get(field); 244 if (status == null) { 245 status = Validity.Status.invalid; 246 } 247 if (allowed.isAllowed(status)) { 248 return Validation.keepOn; 249 } 250 break; 251 case extension: 252 switch (field.length()) { 253 case 1: 254 switch (field) { 255 case "t": // value is an LSRV 256 String lsvr = Joiner.on("-").join(values); 257 status = Validity.Status.invalid; 258 try { 259 LanguageTagParser ltp2 = new LanguageTagParser().set(lsvr); 260 if (isValid(ltp2, allowed, errors)) { 261 return Validation.keepOn; 262 } 263 } catch (Exception e) { 264 if (errors != null) { 265 errors.add( 266 String.format( 267 "Disallowed %s=%s, tlang=%s, status=%s", 268 type, lsvr, field, status)); 269 return Validation.keepOn; 270 } 271 } 272 return Validation.abort; 273 case "x": // private use, everything is valid 274 status = Validity.Status.private_use; 275 break; 276 case "u": // value is an attribute, none currently valid 277 status = Validity.Status.invalid; 278 break; 279 default: 280 status = Validity.Status.invalid; 281 break; 282 } 283 break; 284 case 2: 285 // field is a tkey or a ukey, based on last char 286 String tOrU = field.charAt(1) < 'A' ? "t" : "u"; 287 Set<String> subtypes = SDI.getBcp47Keys().get(field); 288 if (subtypes == null) { 289 status = Validity.Status.invalid; 290 } else { 291 String subtype = Joiner.on("-").join(values); 292 final Map<R2<String, String>, String> bcp47Deprecated = 293 SDI.getBcp47Deprecated(); 294 if ("true".equals(bcp47Deprecated.get(Row.of(field, subtype)))) { 295 status = Validity.Status.deprecated; 296 } else { 297 if (subtypes.contains(subtype)) { 298 status = Validity.Status.regular; 299 } else { 300 boolean mapUnknownToRegular = false; 301 fieldSwitch: 302 switch (field) { 303 case "x0": 304 status = Validity.Status.deprecated; 305 break; 306 case "dx": 307 status = 308 checkSpecials( 309 type, 310 field, 311 values, 312 allowed, 313 LOWERCASE_SCRIPT); 314 break; 315 case "kr": 316 status = 317 checkSpecials( 318 type, 319 field, 320 values, 321 allowed, 322 LOWERCASE_SCRIPT, 323 KR_REORDER); 324 break; 325 case "rg": 326 mapUnknownToRegular = true; 327 case "sd": 328 status = 329 checkSpecials( 330 type, 331 field, 332 values, 333 allowed, 334 VALIDITY.getCodeToStatus( 335 LstrType.subdivision)); 336 break; 337 case "vt": 338 status = Validity.Status.invalid; 339 if (values.isEmpty()) { 340 break fieldSwitch; 341 } 342 for (String value : values) { 343 try { 344 int intValue = Integer.parseInt(value, 16); 345 if (intValue < 0 346 || intValue > 0x10FFFF 347 || (Character.MIN_SURROGATE <= intValue 348 && intValue 349 <= Character 350 .MAX_SURROGATE)) { 351 break fieldSwitch; 352 } 353 } catch (NumberFormatException e) { 354 break fieldSwitch; 355 } 356 } 357 status = Validity.Status.regular; 358 break; 359 default: 360 status = Validity.Status.invalid; 361 break; 362 } 363 if (mapUnknownToRegular == true 364 && status == Validity.Status.unknown) { 365 status = Validity.Status.regular; 366 } 367 } 368 } 369 if (allowed.isAllowed(status) 370 || allowed.isAllowed( 371 LstrType.extension, field, subtype, status)) { 372 return Validation.keepOn; 373 } else if (errors == null) { 374 return Validation.abort; 375 } 376 errors.add( 377 String.format( 378 "Disallowed %s=%s=%s, status=%s", 379 type, field, subtype, status)); 380 return Validation.keepOn; 381 } 382 break; 383 default: 384 status = Validity.Status.invalid; 385 break; 386 } 387 break; 388 default: 389 status = null; 390 break; 391 } 392 if (errors == null) { 393 return Validation.abort; 394 } 395 errors.add(String.format("Disallowed %s=%s, status=%s", type, field, status)); 396 return Validation.keepOn; 397 } 398 399 public static Validity.Status checkSpecials( 400 LstrType type, 401 String field, 402 List<String> values, 403 LocaleValidator.AllowedValid allowed, 404 Map<String, Validity.Status>... validityMaps) { 405 if (values.size() > 1 406 && (field.equals("sd") || field.equals("rg"))) { // TODO generalize this 407 return Validity.Status.invalid; 408 } 409 Validity.Status best = null; 410 for (String value : values) { 411 Validity.Status status = null; 412 for (Map<String, Validity.Status> validityMap : validityMaps) { 413 status = validityMap.get(value); 414 if (status != null) { 415 break; 416 } 417 } 418 if (status == null) { 419 return Validity.Status.invalid; 420 } 421 if (allowed.isAllowed(status) || allowed.isAllowed(type, field, value, status)) { 422 if (best == null) { 423 best = status; 424 } 425 } else { 426 return status; 427 } 428 } 429 return best == null ? Validity.Status.invalid : best; 430 } 431 432 public Validity.Status checkRegion( 433 LstrType type, 434 String field, 435 List<String> values, 436 LocaleValidator.AllowedValid allowed) { 437 Validity.Status best = null; 438 for (String value : values) { 439 String value2 = UCharacter.toTitleCase(value, null); 440 Validity.Status status = VALIDITY.getCodeToStatus(LstrType.script).get(value2); 441 if (status == null) { 442 return Validity.Status.invalid; 443 } 444 if (allowed.isAllowed(status) || allowed.isAllowed(type, field, value, null)) { 445 if (best == null) { 446 best = status; 447 } 448 } else { 449 return status; 450 } 451 } 452 return best == null ? Validity.Status.invalid : best; 453 } 454 } 455