1 package org.unicode.cldr.util; 2 3 import java.text.ParseException; 4 import java.util.Date; 5 import java.util.EnumSet; 6 import java.util.HashMap; 7 import java.util.Iterator; 8 import java.util.List; 9 import java.util.Locale; 10 import java.util.Map; 11 import java.util.Map.Entry; 12 import java.util.Set; 13 import java.util.TreeMap; 14 import java.util.TreeSet; 15 import java.util.function.IntFunction; 16 import java.util.regex.Pattern; 17 18 import org.unicode.cldr.util.StandardCodes.LstrType; 19 import org.unicode.cldr.util.Validity.Status; 20 21 import com.google.common.base.Joiner; 22 import com.google.common.base.Splitter; 23 import com.google.common.collect.ImmutableList; 24 import com.google.common.collect.ImmutableMap; 25 import com.google.common.collect.ImmutableSet; 26 import com.ibm.icu.impl.Relation; 27 import com.ibm.icu.impl.Row; 28 import com.ibm.icu.impl.Row.R2; 29 import com.ibm.icu.text.SimpleDateFormat; 30 import com.ibm.icu.text.UnicodeSet; 31 import com.ibm.icu.text.UnicodeSet.SpanCondition; 32 import com.ibm.icu.util.ULocale; 33 import com.ibm.icu.util.VersionInfo; 34 35 public abstract class MatchValue implements Predicate<String> { 36 public static final String DEFAULT_SAMPLE = "❓"; 37 38 @Override is(String item)39 public abstract boolean is(String item); getName()40 public abstract String getName(); 41 getSample()42 public String getSample() { 43 return DEFAULT_SAMPLE; 44 } 45 46 @Override toString()47 public String toString() { 48 return getName(); 49 } 50 of(String command)51 public static MatchValue of(String command) { 52 String originalArg = command; 53 int colonPos = command.indexOf('/'); 54 String subargument = null; 55 if (colonPos >= 0) { 56 subargument = command.substring(colonPos + 1); 57 command = command.substring(0, colonPos); 58 } 59 try { 60 MatchValue result = null; 61 switch (command) { 62 case "any": 63 result = AnyMatchValue.of(subargument); 64 break; 65 case "set": 66 result = SetMatchValue.of(subargument); 67 break; 68 case "validity": 69 result = ValidityMatchValue.of(subargument); 70 break; 71 case "bcp47": 72 result = Bcp47MatchValue.of(subargument); 73 break; 74 case "range": 75 result = RangeMatchValue.of(subargument); 76 break; 77 case "literal": 78 result = LiteralMatchValue.of(subargument); 79 break; 80 case "regex": 81 result = RegexMatchValue.of(subargument); 82 break; 83 case "metazone": 84 result = MetazoneMatchValue.of(subargument); 85 break; 86 case "version": 87 result = VersionMatchValue.of(subargument); 88 break; 89 case "time": 90 result = TimeMatchValue.of(subargument); 91 break; 92 case "or": 93 result = OrMatchValue.of(subargument); 94 break; 95 case "unicodeset": 96 result = UnicodeSpanMatchValue.of(subargument); 97 break; 98 default: 99 throw new IllegalArgumentException("Illegal/Unimplemented match type: " + originalArg); 100 } 101 if (!originalArg.equals(result.getName())) { 102 System.err.println("Non-standard form or error: " + originalArg + " ==> " + result.getName()); 103 } 104 return result; 105 } catch (Exception e) { 106 throw new IllegalArgumentException("Problem with: " + originalArg, e); 107 } 108 } 109 110 static class LocaleMatchValue extends MatchValue { 111 private final Predicate<String> lang = new ValidityMatchValue(LstrType.language); 112 private final Predicate<String> script = new ValidityMatchValue(LstrType.script); 113 private final Predicate<String> region = new ValidityMatchValue(LstrType.region); 114 private final Predicate<String> variant = new ValidityMatchValue(LstrType.variant); 115 116 @Override getName()117 public String getName() { 118 return "validity/locale"; 119 } 120 121 @Override is(String item)122 public boolean is(String item) { 123 if (!item.contains("_")) { 124 return lang.is(item); 125 } 126 LanguageTagParser ltp; 127 try { 128 ltp = new LanguageTagParser().set(item); 129 } catch (Exception e) { 130 return false; 131 } 132 return lang.is(ltp.getLanguage()) 133 && (ltp.getScript().isEmpty() 134 || script.is(ltp.getScript())) 135 && (ltp.getRegion().isEmpty() 136 || region.is(ltp.getRegion())) 137 && (ltp.getVariants().isEmpty() 138 || and(variant,ltp.getVariants())) 139 && ltp.getExtensions().isEmpty() 140 && ltp.getLocaleExtensions().isEmpty() 141 ; 142 } 143 144 @Override getSample()145 public String getSample() { 146 return "de"; 147 } 148 } 149 150 // TODO remove these if possible — ticket/10120 151 static final Set<String> SCRIPT_HACK = ImmutableSet.of( 152 "Afak", "Blis", "Cirt", "Cyrs", "Egyd", "Egyh", "Geok", "Inds", "Jurc", "Kpel", "Latf", "Latg", 153 "Loma", "Maya", "Moon", "Nkgb", "Phlv", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Visp", "Wole"); 154 static final Set<String> VARIANT_HACK = ImmutableSet.of("POSIX", "REVISED", "SAAHO"); 155 and(Predicate<T> predicate, Iterable<T> items)156 public static <T> boolean and(Predicate<T> predicate, Iterable<T> items) { 157 for (T item : items) { 158 if (!predicate.is(item)) { 159 return false; 160 } 161 } 162 return true; 163 } 164 or(Predicate<T> predicate, Iterable<T> items)165 public static <T> boolean or(Predicate<T> predicate, Iterable<T> items) { 166 for (T item : items) { 167 if (predicate.is(item)) { 168 return true; 169 } 170 } 171 return false; 172 } 173 174 public static class EnumParser<T extends Enum> { 175 private final Class<T> aClass; 176 private final Set<T> all; 177 EnumParser(Class<T> aClass)178 private EnumParser(Class<T> aClass) { 179 this.aClass = aClass; 180 all = ImmutableSet.copyOf(EnumSet.allOf(aClass)); 181 } 182 of(Class<T> aClass)183 public static <T> EnumParser of(Class<T> aClass) { 184 return new EnumParser(aClass); 185 } 186 parse(String text)187 public Set<T> parse(String text) { 188 Set<T> statuses = EnumSet.noneOf(aClass); 189 boolean negative = text.startsWith("!"); 190 if (negative) { 191 text = text.substring(1); 192 } 193 for (String item : SPLIT_SPACE_OR_COMMA.split(text)) { 194 statuses.add(getItem(item)); 195 } 196 if (negative) { 197 TreeSet<T> temp = new TreeSet<>(all); 198 temp.removeAll(statuses); 199 statuses = temp; 200 } 201 return ImmutableSet.copyOf(statuses); 202 } getItem(String text)203 private T getItem(String text) { 204 try { 205 return (T) aClass.getMethod("valueOf", String.class).invoke(null, text); 206 } catch (Exception e) { 207 throw new IllegalArgumentException(e); 208 } 209 } 210 format(Set<?> set)211 public String format(Set<?> set) { 212 if (set.size() > all.size()/2) { 213 TreeSet<T> temp = new TreeSet<>(all); 214 temp.removeAll(set); 215 return "!" + Joiner.on(' ').join(temp); 216 } else { 217 return Joiner.on(' ').join(set); 218 } 219 } 220 isAll(Set<Status> statuses)221 public boolean isAll(Set<Status> statuses) { 222 return statuses.equals(all); 223 } 224 } 225 226 static public class ValidityMatchValue extends MatchValue { 227 private final LstrType type; 228 private final boolean shortId; 229 private final Set<Status> statuses; 230 private static Map<String, Status> shortCodeToStatus; 231 private static final EnumParser<Status> enumParser = EnumParser.of(Status.class); 232 233 @Override getName()234 public String getName() { 235 return "validity/" 236 + (shortId ? "short-" : "") + type.toString() 237 + (enumParser.isAll(statuses) ? "" : "/" + enumParser.format(statuses)); 238 } 239 ValidityMatchValue(LstrType type)240 private ValidityMatchValue(LstrType type) { 241 this(type, null, false); 242 } 243 ValidityMatchValue(LstrType type, Set<Status> statuses, boolean shortId)244 private ValidityMatchValue(LstrType type, Set<Status> statuses, boolean shortId) { 245 this.type = type; 246 if (type != LstrType.unit && shortId) { 247 throw new IllegalArgumentException("short- not supported except for units"); 248 } 249 this.shortId = shortId; 250 this.statuses = statuses == null ? EnumSet.allOf(Status.class) : ImmutableSet.copyOf(statuses); 251 } 252 of(String typeName)253 public static MatchValue of(String typeName) { 254 if (typeName.equals("locale")) { 255 return new LocaleMatchValue(); 256 } 257 int slashPos = typeName.indexOf('/'); 258 Set<Status> statuses = null; 259 if (slashPos > 0) { 260 statuses = enumParser.parse(typeName.substring(slashPos+1)); 261 typeName = typeName.substring(0, slashPos); 262 } 263 boolean shortId = typeName.startsWith("short-"); 264 if (shortId) { 265 typeName = typeName.substring(6); 266 } 267 LstrType type = LstrType.fromString(typeName); 268 return new ValidityMatchValue(type, statuses, shortId); 269 } 270 271 @Override is(String item)272 public boolean is(String item) { 273 // TODO handle deprecated 274 switch(type) { 275 case script: 276 if (SCRIPT_HACK.contains(item)) { 277 return true; 278 } 279 break; 280 case variant: 281 if (VARIANT_HACK.contains(item)) { 282 return true; 283 } 284 item = item.toLowerCase(Locale.ROOT); 285 break; 286 case language: 287 item = item.equals("root") ? "und" : item; 288 break; 289 case unit: 290 if (shortId) { 291 if (shortCodeToStatus == null) { // lazy evaluation to avoid circular dependencies 292 Map<String, Status> _shortCodeToStatus = new TreeMap<>(); 293 for (Entry<String, Status> entry : Validity.getInstance().getCodeToStatus(LstrType.unit).entrySet()) { 294 String key = entry.getKey(); 295 Status status = entry.getValue(); 296 final String shortKey = key.substring(key.indexOf('-')+1); 297 Status old = _shortCodeToStatus.get(shortKey); 298 if (old == null) { 299 _shortCodeToStatus.put(shortKey, status); 300 // } else { 301 // System.out.println("Skipping duplicate status: " + key + " old: " + old + " new: " + status); 302 } 303 } 304 shortCodeToStatus = ImmutableMap.copyOf(_shortCodeToStatus); 305 } 306 final Status status = shortCodeToStatus.get(item); 307 return status != null && statuses.contains(status); 308 } 309 default: break; 310 } 311 final Status status = Validity.getInstance().getCodeToStatus(type).get(item); 312 return status != null && statuses.contains(status); 313 } 314 315 @Override getSample()316 public String getSample() { 317 return Validity.getInstance().getCodeToStatus(type).keySet().iterator().next(); 318 } 319 } 320 321 static public class Bcp47MatchValue extends MatchValue { 322 private final String key; 323 private Set<String> valid; 324 325 @Override getName()326 public String getName() { 327 return "bcp47/" + key; 328 } 329 Bcp47MatchValue(String key)330 private Bcp47MatchValue(String key) { 331 this.key = key; 332 } 333 of(String key)334 public static Bcp47MatchValue of(String key) { 335 return new Bcp47MatchValue(key); 336 } 337 338 @Override is(String item)339 public synchronized boolean is(String item) { 340 if (valid == null) { // must lazy-eval 341 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(); 342 Relation<String, String> keyToSubtypes = sdi.getBcp47Keys(); 343 Relation<R2<String, String>, String> keySubtypeToAliases = sdi.getBcp47Aliases(); 344 Map<String, String> aliasesToKey = new HashMap<>(); 345 for (String key : keyToSubtypes.keySet()) { 346 Set<String> aliases = keySubtypeToAliases.get(Row.of(key, "")); 347 if (aliases != null) { 348 for (String alias : aliases) { 349 aliasesToKey.put(alias, key); 350 } 351 } 352 } 353 Set<String> keyList; 354 Set<String> subtypeList; 355 // TODO handle deprecated 356 // fix data to remove aliases, then narrow this 357 switch(key) { 358 case "anykey": 359 keyList = keyToSubtypes.keySet(); 360 valid = new TreeSet<>(keyList); 361 for (String keyItem : keyList) { 362 addAliases(keySubtypeToAliases, keyItem, ""); 363 } 364 valid.add("x"); // TODO: investigate adding to bcp47 data files 365 break; 366 case "anyvalue": 367 valid = new TreeSet<>(keyToSubtypes.values()); 368 for (String keyItem : keyToSubtypes.keySet()) { 369 subtypeList = keyToSubtypes.get(keyItem); 370 // if (subtypeList == null) { 371 // continue; 372 // } 373 for (String subtypeItem : subtypeList) { 374 addAliases(keySubtypeToAliases, keyItem, subtypeItem); 375 } 376 } 377 valid.add("generic"); // TODO: investigate adding to bcp47 data files 378 break; 379 default: 380 subtypeList = keyToSubtypes.get(key); 381 if (subtypeList == null) { 382 String key2 = aliasesToKey.get(key); 383 if (key2 != null) { 384 subtypeList = keyToSubtypes.get(key2); 385 } 386 } 387 try { 388 valid = new TreeSet<>(subtypeList); 389 } catch (Exception e) { 390 throw new IllegalArgumentException("Illegal keyValue: " + getName()); 391 } 392 for (String subtypeItem : subtypeList) { 393 addAliases(keySubtypeToAliases, key, subtypeItem); 394 } 395 switch(key) { 396 case "ca": 397 valid.add("generic"); // TODO: investigate adding to bcp47 data files 398 break; 399 } 400 break; 401 } 402 valid = ImmutableSet.copyOf(valid); 403 } 404 //<key name="tz" description="Time zone key" alias="timezone"> 405 // <type name="adalv" description="Andorra" alias="Europe/Andorra"/> 406 // <key name="nu" description="Numbering system type key" alias="numbers"> 407 // <type name="adlm" description="Adlam digits" since="30"/> 408 return valid.contains(item); 409 } 410 addAliases(Relation<R2<String, String>, String> keySubtypeToAliases, String keyItem, String subtype)411 private void addAliases(Relation<R2<String, String>, String> keySubtypeToAliases, String keyItem, String subtype) { 412 Set<String> aliases = keySubtypeToAliases.get(Row.of(keyItem, subtype)); 413 if (aliases != null && !aliases.isEmpty()) { 414 valid.addAll(aliases); 415 } 416 } 417 @Override getSample()418 public String getSample() { 419 is("X"); // force load data 420 return valid == null ? "XX" 421 : valid.iterator().next(); 422 } 423 } 424 425 static final Splitter RANGE = Splitter.on('~').trimResults(); 426 427 // TODO: have Range that can be ints, doubles, or versions 428 static public class RangeMatchValue extends MatchValue { 429 private final double start; 430 private final double end; 431 private final boolean isInt; 432 433 @Override getName()434 public String getName() { 435 return "range/" + (isInt ? (long)start + "~" + (long)end : start + "~" + end); 436 } 437 RangeMatchValue(String key)438 private RangeMatchValue(String key) { 439 Iterator<String> parts = RANGE.split(key).iterator(); 440 start = Double.parseDouble(parts.next()); 441 end = Double.parseDouble(parts.next()); 442 isInt = !key.contains("."); 443 if (parts.hasNext()) { 444 throw new IllegalArgumentException("Range must be of form <int>~<int>"); 445 } 446 } 447 of(String key)448 public static RangeMatchValue of(String key) { 449 return new RangeMatchValue(key); 450 } 451 452 @Override is(String item)453 public boolean is(String item) { 454 if (isInt && item.contains(".")) { 455 return false; 456 } 457 double value; 458 try { 459 value = Double.parseDouble(item); 460 } catch (NumberFormatException e) { 461 return false; 462 } 463 return start <= value && value <= end; 464 } 465 @Override getSample()466 public String getSample() { 467 return String.valueOf((int)(start + end)/2); 468 } 469 } 470 471 static final Splitter LIST = Splitter.on(", ").trimResults(); 472 static final Splitter SPLIT_SPACE_OR_COMMA = Splitter.on(Pattern.compile("[, ]")).omitEmptyStrings().trimResults(); 473 474 static public class LiteralMatchValue extends MatchValue { 475 private final Set<String> items; 476 477 @Override getName()478 public String getName() { 479 return "literal/" + Joiner.on(", ").join(items); 480 } 481 LiteralMatchValue(String key)482 private LiteralMatchValue(String key) { 483 items = ImmutableSet.copyOf(LIST.splitToList(key)); 484 } 485 of(String key)486 public static LiteralMatchValue of(String key) { 487 return new LiteralMatchValue(key); 488 } 489 490 @Override is(String item)491 public boolean is(String item) { 492 return items.contains(item); 493 } 494 495 @Override getSample()496 public String getSample() { 497 return items.iterator().next(); 498 } 499 } 500 501 static public class RegexMatchValue extends MatchValue { 502 private final Pattern pattern; 503 504 @Override getName()505 public String getName() { 506 return "regex/" + pattern; 507 } 508 RegexMatchValue(String key)509 private RegexMatchValue(String key) { 510 pattern = Pattern.compile(key); 511 } 512 of(String key)513 public static RegexMatchValue of(String key) { 514 return new RegexMatchValue(key); 515 } 516 517 @Override is(String item)518 public boolean is(String item) { 519 return pattern.matcher(item).matches(); 520 } 521 } 522 523 static public class VersionMatchValue extends MatchValue { 524 525 @Override getName()526 public String getName() { 527 return "version"; 528 } 529 VersionMatchValue(String key)530 private VersionMatchValue(String key) { 531 } 532 of(String key)533 public static VersionMatchValue of(String key) { 534 if (key != null) { 535 throw new IllegalArgumentException("No parameter allowed"); 536 } 537 return new VersionMatchValue(key); 538 } 539 540 @Override is(String item)541 public boolean is(String item) { 542 try { 543 VersionInfo.getInstance(item); 544 } catch (Exception e) { 545 return false; 546 } 547 return true; 548 } 549 } 550 551 static public class MetazoneMatchValue extends MatchValue { 552 private Set<String> valid; 553 554 @Override getName()555 public String getName() { 556 return "metazone"; 557 } 558 of(String key)559 public static MetazoneMatchValue of(String key) { 560 if (key != null) { 561 throw new IllegalArgumentException("No parameter allowed"); 562 } 563 return new MetazoneMatchValue(); 564 } 565 566 @Override is(String item)567 public synchronized boolean is(String item) { 568 // must lazy-eval 569 if (valid == null) { 570 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(); 571 valid = sdi.getAllMetazones(); 572 } 573 return valid.contains(item); 574 } 575 } 576 577 static public class AnyMatchValue extends MatchValue { 578 final String key; 579 AnyMatchValue(String key)580 public AnyMatchValue(String key) { 581 this.key = key; 582 } 583 584 @Override getName()585 public String getName() { 586 return "any" + (key == null ? "" : "/" + key); 587 } 588 of(String key)589 public static AnyMatchValue of(String key) { 590 return new AnyMatchValue(key); 591 } 592 593 @Override is(String item)594 public boolean is(String item) { 595 return true; 596 } 597 } 598 599 static final Splitter SPACE_SPLITTER = Splitter.on(' ').omitEmptyStrings(); 600 601 static public class SetMatchValue extends MatchValue { 602 final MatchValue subtest; 603 SetMatchValue(MatchValue subtest)604 public SetMatchValue(MatchValue subtest) { 605 this.subtest = subtest; 606 } 607 608 @Override getName()609 public String getName() { 610 return "set/"+subtest.getName(); 611 } 612 of(String key)613 public static SetMatchValue of(String key) { 614 return new SetMatchValue(MatchValue.of(key)); 615 } 616 617 @Override is(String items)618 public boolean is(String items) { 619 return and(subtest,SPACE_SPLITTER.split(items)); 620 } 621 622 @Override getSample()623 public String getSample() { 624 return subtest.getSample(); 625 } 626 } 627 628 static final Splitter BARS_SPLITTER = Splitter.on("||").omitEmptyStrings(); 629 630 static public class OrMatchValue extends MatchValue { 631 final List<MatchValue> subtests; 632 OrMatchValue(Iterator<MatchValue> iterator)633 private OrMatchValue(Iterator<MatchValue> iterator) { 634 this.subtests = ImmutableList.copyOf(iterator); 635 } 636 637 @Override getName()638 public String getName() { 639 return "or/"+ Joiner.on("||").join(subtests); 640 } 641 of(String key)642 public static OrMatchValue of(String key) { 643 IntFunction<MatchValue[]> generator = null; 644 return new OrMatchValue(BARS_SPLITTER.splitToList(key) 645 .stream() 646 .map(k -> MatchValue.of(k)) 647 .iterator()); 648 } 649 650 @Override is(String item)651 public boolean is(String item) { 652 for (MatchValue subtest : subtests) { 653 if (subtest.is(item)) { 654 return true; 655 } 656 } 657 return false; 658 } 659 @Override getSample()660 public String getSample() { 661 for (MatchValue subtest : subtests) { 662 String result = subtest.getSample(); 663 if (!result.equals(DEFAULT_SAMPLE)) { 664 return result; 665 } 666 } 667 return DEFAULT_SAMPLE; 668 } 669 } 670 671 static public class TimeMatchValue extends MatchValue { 672 final String sample; 673 final SimpleDateFormat formatter; 674 TimeMatchValue(String key)675 public TimeMatchValue(String key) { 676 formatter = new SimpleDateFormat(key,ULocale.ROOT); 677 sample = formatter.format(new Date()); 678 } 679 680 @Override getName()681 public String getName() { 682 return "time/" + formatter.toPattern(); 683 } 684 of(String key)685 public static TimeMatchValue of(String key) { 686 return new TimeMatchValue(key); 687 } 688 689 @Override is(String item)690 public boolean is(String item) { 691 try { 692 formatter.parse(item); 693 return true; 694 } catch (ParseException e) { 695 return false; 696 } 697 } 698 @Override getSample()699 public String getSample() { 700 return sample; 701 } 702 } 703 704 static public class UnicodeSpanMatchValue extends MatchValue { 705 final String sample; 706 final UnicodeSet uset; 707 UnicodeSpanMatchValue(String key)708 public UnicodeSpanMatchValue(String key) { 709 uset = new UnicodeSet(key); 710 sample = new StringBuilder().appendCodePoint(uset.getRangeStart(0)).toString(); 711 } 712 713 @Override getName()714 public String getName() { 715 return "unicodeset/" + uset; 716 } 717 of(String key)718 public static UnicodeSpanMatchValue of(String key) { 719 return new UnicodeSpanMatchValue(key); 720 } 721 722 @Override is(String item)723 public boolean is(String item) { 724 return uset.span(item, SpanCondition.CONTAINED) == item.length(); 725 } 726 727 @Override getSample()728 public String getSample() { 729 return sample; 730 } 731 } 732 733 } 734