1 package org.unicode.cldr.util; 2 3 import java.text.ParseException; 4 import java.util.Date; 5 import java.util.EnumSet; 6 import java.util.HashMap; 7 import java.util.HashSet; 8 import java.util.Iterator; 9 import java.util.List; 10 import java.util.Locale; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeMap; 15 import java.util.TreeSet; 16 import java.util.regex.Pattern; 17 18 import org.unicode.cldr.util.StandardCodes.LstrType; 19 import org.unicode.cldr.util.Validity.Status; 20 21 import com.google.common.base.Joiner; 22 import com.google.common.base.Splitter; 23 import com.google.common.collect.ImmutableList; 24 import com.google.common.collect.ImmutableMap; 25 import com.google.common.collect.ImmutableSet; 26 import com.ibm.icu.impl.Relation; 27 import com.ibm.icu.impl.Row; 28 import com.ibm.icu.impl.Row.R2; 29 import com.ibm.icu.text.SimpleDateFormat; 30 import com.ibm.icu.text.UnicodeSet; 31 import com.ibm.icu.text.UnicodeSet.SpanCondition; 32 import com.ibm.icu.util.ULocale; 33 import com.ibm.icu.util.VersionInfo; 34 35 public abstract class MatchValue implements Predicate<String> { 36 public static final String DEFAULT_SAMPLE = "❓"; 37 38 @Override is(String item)39 public abstract boolean is(String item); getName()40 public abstract String getName(); 41 getSample()42 public String getSample() { 43 return DEFAULT_SAMPLE; 44 } 45 46 @Override toString()47 public String toString() { 48 return getName(); 49 } 50 of(String command)51 public static MatchValue of(String command) { 52 String originalArg = command; 53 int colonPos = command.indexOf('/'); 54 String subargument = null; 55 if (colonPos >= 0) { 56 subargument = command.substring(colonPos + 1); 57 command = command.substring(0, colonPos); 58 } 59 try { 60 MatchValue result = null; 61 switch (command) { 62 case "any": 63 result = AnyMatchValue.of(subargument); 64 break; 65 case "set": 66 result = SetMatchValue.of(subargument); 67 break; 68 case "validity": 69 result = ValidityMatchValue.of(subargument); 70 break; 71 case "bcp47": 72 result = Bcp47MatchValue.of(subargument); 73 break; 74 case "range": 75 result = RangeMatchValue.of(subargument); 76 break; 77 case "literal": 78 result = LiteralMatchValue.of(subargument); 79 break; 80 case "regex": 81 result = RegexMatchValue.of(subargument); 82 break; 83 case "metazone": 84 result = MetazoneMatchValue.of(subargument); 85 break; 86 case "version": 87 result = VersionMatchValue.of(subargument); 88 break; 89 case "time": 90 result = TimeMatchValue.of(subargument); 91 break; 92 case "or": 93 result = OrMatchValue.of(subargument); 94 break; 95 case "unicodeset": 96 result = UnicodeSpanMatchValue.of(subargument); 97 break; 98 default: 99 throw new IllegalArgumentException("Illegal/Unimplemented match type: " + originalArg); 100 } 101 if (!originalArg.equals(result.getName())) { 102 System.err.println("Non-standard form or error: " + originalArg + " ==> " + result.getName()); 103 } 104 return result; 105 } catch (Exception e) { 106 throw new IllegalArgumentException("Problem with: " + originalArg, e); 107 } 108 } 109 110 public static class LocaleMatchValue extends MatchValue { 111 private final Predicate<String> lang; 112 private final Predicate<String> script; 113 private final Predicate<String> region; 114 private final Predicate<String> variant; 115 LocaleMatchValue()116 public LocaleMatchValue() { 117 this(null); 118 } 119 LocaleMatchValue(Set<Status> statuses)120 public LocaleMatchValue(Set<Status> statuses) { 121 lang = new ValidityMatchValue(LstrType.language, statuses, false); 122 script = new ValidityMatchValue(LstrType.script, statuses, false); 123 region = new ValidityMatchValue(LstrType.region, statuses, false); 124 variant = new ValidityMatchValue(LstrType.variant, statuses, false); 125 } 126 127 @Override getName()128 public String getName() { 129 return "validity/locale"; 130 } 131 132 @Override is(String item)133 public boolean is(String item) { 134 if (!item.contains("_")) { 135 return lang.is(item); 136 } 137 LanguageTagParser ltp; 138 try { 139 ltp = new LanguageTagParser().set(item); 140 } catch (Exception e) { 141 return false; 142 } 143 return lang.is(ltp.getLanguage()) 144 && (ltp.getScript().isEmpty() 145 || script.is(ltp.getScript())) 146 && (ltp.getRegion().isEmpty() 147 || region.is(ltp.getRegion())) 148 && (ltp.getVariants().isEmpty() 149 || and(variant,ltp.getVariants())) 150 && ltp.getExtensions().isEmpty() 151 && ltp.getLocaleExtensions().isEmpty() 152 ; 153 } 154 155 @Override getSample()156 public String getSample() { 157 return "de"; 158 } 159 } 160 161 // TODO remove these if possible — ticket/10120 162 static final Set<String> SCRIPT_HACK = ImmutableSet.of( 163 "Afak", "Blis", "Cirt", "Cyrs", "Egyd", "Egyh", "Geok", "Inds", "Jurc", "Kpel", "Latf", "Latg", 164 "Loma", "Maya", "Moon", "Nkgb", "Phlv", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Visp", "Wole"); 165 static final Set<String> VARIANT_HACK = ImmutableSet.of("POSIX", "REVISED", "SAAHO"); 166 167 /** 168 * Returns true if ALL items match the predicate 169 * @param <T> 170 * @param predicate predicate to check 171 * @param items items to be tested with the predicate 172 * @return 173 */ and(Predicate<T> predicate, Iterable<T> items)174 public static <T> boolean and(Predicate<T> predicate, Iterable<T> items) { 175 for (T item : items) { 176 if (!predicate.is(item)) { 177 return false; 178 } 179 } 180 return true; 181 } 182 183 /** 184 * Returns true if ANY items match the predicate 185 * @param <T> 186 * @param predicate predicate to check 187 * @param items items to be tested with the predicate 188 * @return 189 */ or(Predicate<T> predicate, Iterable<T> items)190 public static <T> boolean or(Predicate<T> predicate, Iterable<T> items) { 191 for (T item : items) { 192 if (predicate.is(item)) { 193 return true; 194 } 195 } 196 return false; 197 } 198 199 public static class EnumParser<T extends Enum> { 200 private final Class<T> aClass; 201 private final Set<T> all; 202 EnumParser(Class<T> aClass)203 private EnumParser(Class<T> aClass) { 204 this.aClass = aClass; 205 all = ImmutableSet.copyOf(EnumSet.allOf(aClass)); 206 } 207 of(Class<T> aClass)208 public static <T> EnumParser of(Class<T> aClass) { 209 return new EnumParser(aClass); 210 } 211 parse(String text)212 public Set<T> parse(String text) { 213 Set<T> statuses = EnumSet.noneOf(aClass); 214 boolean negative = text.startsWith("!"); 215 if (negative) { 216 text = text.substring(1); 217 } 218 for (String item : SPLIT_SPACE_OR_COMMA.split(text)) { 219 statuses.add(getItem(item)); 220 } 221 if (negative) { 222 TreeSet<T> temp = new TreeSet<>(all); 223 temp.removeAll(statuses); 224 statuses = temp; 225 } 226 return ImmutableSet.copyOf(statuses); 227 } getItem(String text)228 private T getItem(String text) { 229 try { 230 return (T) aClass.getMethod("valueOf", String.class).invoke(null, text); 231 } catch (Exception e) { 232 throw new IllegalArgumentException(e); 233 } 234 } 235 format(Set<?> set)236 public String format(Set<?> set) { 237 if (set.size() > all.size()/2) { 238 TreeSet<T> temp = new TreeSet<>(all); 239 temp.removeAll(set); 240 return "!" + Joiner.on(' ').join(temp); 241 } else { 242 return Joiner.on(' ').join(set); 243 } 244 } 245 isAll(Set<Status> statuses)246 public boolean isAll(Set<Status> statuses) { 247 return statuses.equals(all); 248 } 249 } 250 251 static public class ValidityMatchValue extends MatchValue { 252 private final LstrType type; 253 private final boolean shortId; 254 private final Set<Status> statuses; 255 private static Map<String, Status> shortCodeToStatus; 256 private static final EnumParser<Status> enumParser = EnumParser.of(Status.class); 257 258 @Override getName()259 public String getName() { 260 return "validity/" 261 + (shortId ? "short-" : "") + type.toString() 262 + (enumParser.isAll(statuses) ? "" : "/" + enumParser.format(statuses)); 263 } 264 ValidityMatchValue(LstrType type)265 private ValidityMatchValue(LstrType type) { 266 this(type, null, false); 267 } 268 ValidityMatchValue(LstrType type, Set<Status> statuses, boolean shortId)269 private ValidityMatchValue(LstrType type, Set<Status> statuses, boolean shortId) { 270 this.type = type; 271 if (type != LstrType.unit && shortId) { 272 throw new IllegalArgumentException("short- not supported except for units"); 273 } 274 this.shortId = shortId; 275 this.statuses = statuses == null ? EnumSet.allOf(Status.class) : ImmutableSet.copyOf(statuses); 276 } 277 of(String typeName)278 public static MatchValue of(String typeName) { 279 if (typeName.equals("locale")) { 280 return new LocaleMatchValue(); 281 } 282 int slashPos = typeName.indexOf('/'); 283 Set<Status> statuses = null; 284 if (slashPos > 0) { 285 statuses = enumParser.parse(typeName.substring(slashPos+1)); 286 typeName = typeName.substring(0, slashPos); 287 } 288 boolean shortId = typeName.startsWith("short-"); 289 if (shortId) { 290 typeName = typeName.substring(6); 291 } 292 LstrType type = LstrType.fromString(typeName); 293 return new ValidityMatchValue(type, statuses, shortId); 294 } 295 296 @Override is(String item)297 public boolean is(String item) { 298 // TODO handle deprecated 299 switch(type) { 300 case script: 301 if (SCRIPT_HACK.contains(item)) { 302 return true; 303 } 304 break; 305 case variant: 306 if (VARIANT_HACK.contains(item)) { 307 return true; 308 } 309 item = item.toLowerCase(Locale.ROOT); 310 break; 311 case language: 312 item = item.equals("root") ? "und" : item; 313 break; 314 case unit: 315 if (shortId) { 316 if (shortCodeToStatus == null) { // lazy evaluation to avoid circular dependencies 317 Map<String, Status> _shortCodeToStatus = new TreeMap<>(); 318 for (Entry<String, Status> entry : Validity.getInstance().getCodeToStatus(LstrType.unit).entrySet()) { 319 String key = entry.getKey(); 320 Status status = entry.getValue(); 321 final String shortKey = key.substring(key.indexOf('-')+1); 322 Status old = _shortCodeToStatus.get(shortKey); 323 if (old == null) { 324 _shortCodeToStatus.put(shortKey, status); 325 // } else { 326 // System.out.println("Skipping duplicate status: " + key + " old: " + old + " new: " + status); 327 } 328 } 329 shortCodeToStatus = ImmutableMap.copyOf(_shortCodeToStatus); 330 } 331 final Status status = shortCodeToStatus.get(item); 332 return status != null && statuses.contains(status); 333 } 334 default: break; 335 } 336 final Status status = Validity.getInstance().getCodeToStatus(type).get(item); 337 return status != null && statuses.contains(status); 338 } 339 340 @Override getSample()341 public String getSample() { 342 return Validity.getInstance().getCodeToStatus(type).keySet().iterator().next(); 343 } 344 } 345 346 static public class Bcp47MatchValue extends MatchValue { 347 private final String key; 348 private Set<String> valid; 349 350 @Override getName()351 public String getName() { 352 return "bcp47/" + key; 353 } 354 Bcp47MatchValue(String key)355 private Bcp47MatchValue(String key) { 356 this.key = key; 357 } 358 of(String key)359 public static Bcp47MatchValue of(String key) { 360 return new Bcp47MatchValue(key); 361 } 362 363 @Override is(String item)364 public synchronized boolean is(String item) { 365 if (valid == null) { // must lazy-eval 366 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(); 367 Relation<String, String> keyToSubtypes = sdi.getBcp47Keys(); 368 Relation<R2<String, String>, String> keySubtypeToAliases = sdi.getBcp47Aliases(); 369 Map<String, String> aliasesToKey = new HashMap<>(); 370 for (String key : keyToSubtypes.keySet()) { 371 Set<String> aliases = keySubtypeToAliases.get(Row.of(key, "")); 372 if (aliases != null) { 373 for (String alias : aliases) { 374 aliasesToKey.put(alias, key); 375 } 376 } 377 } 378 Set<String> keyList; 379 Set<String> subtypeList; 380 // TODO handle deprecated 381 // fix data to remove aliases, then narrow this 382 switch(key) { 383 case "anykey": 384 keyList = keyToSubtypes.keySet(); 385 valid = new TreeSet<>(keyList); 386 for (String keyItem : keyList) { 387 addAliases(keySubtypeToAliases, keyItem, ""); 388 } 389 valid.add("x"); // TODO: investigate adding to bcp47 data files 390 break; 391 case "anyvalue": 392 valid = new TreeSet<>(keyToSubtypes.values()); 393 for (String keyItem : keyToSubtypes.keySet()) { 394 subtypeList = keyToSubtypes.get(keyItem); 395 // if (subtypeList == null) { 396 // continue; 397 // } 398 for (String subtypeItem : subtypeList) { 399 addAliases(keySubtypeToAliases, keyItem, subtypeItem); 400 } 401 } 402 valid.add("generic"); // TODO: investigate adding to bcp47 data files 403 break; 404 default: 405 subtypeList = keyToSubtypes.get(key); 406 if (subtypeList == null) { 407 String key2 = aliasesToKey.get(key); 408 if (key2 != null) { 409 subtypeList = keyToSubtypes.get(key2); 410 } 411 } 412 try { 413 valid = new TreeSet<>(subtypeList); 414 } catch (Exception e) { 415 throw new IllegalArgumentException("Illegal keyValue: " + getName()); 416 } 417 for (String subtypeItem : subtypeList) { 418 addAliases(keySubtypeToAliases, key, subtypeItem); 419 } 420 switch(key) { 421 case "ca": 422 valid.add("generic"); // TODO: investigate adding to bcp47 data files 423 break; 424 } 425 break; 426 } 427 valid = ImmutableSet.copyOf(valid); 428 } 429 //<key name="tz" description="Time zone key" alias="timezone"> 430 // <type name="adalv" description="Andorra" alias="Europe/Andorra"/> 431 // <key name="nu" description="Numbering system type key" alias="numbers"> 432 // <type name="adlm" description="Adlam digits" since="30"/> 433 return valid.contains(item); 434 } 435 addAliases(Relation<R2<String, String>, String> keySubtypeToAliases, String keyItem, String subtype)436 private void addAliases(Relation<R2<String, String>, String> keySubtypeToAliases, String keyItem, String subtype) { 437 Set<String> aliases = keySubtypeToAliases.get(Row.of(keyItem, subtype)); 438 if (aliases != null && !aliases.isEmpty()) { 439 valid.addAll(aliases); 440 } 441 } 442 @Override getSample()443 public String getSample() { 444 is("X"); // force load data 445 return valid == null ? "XX" 446 : valid.iterator().next(); 447 } 448 } 449 450 static final Splitter RANGE = Splitter.on('~').trimResults(); 451 452 // TODO: have Range that can be ints, doubles, or versions 453 static public class RangeMatchValue extends MatchValue { 454 private final double start; 455 private final double end; 456 private final boolean isInt; 457 458 @Override getName()459 public String getName() { 460 return "range/" + (isInt ? (long)start + "~" + (long)end : start + "~" + end); 461 } 462 RangeMatchValue(String key)463 private RangeMatchValue(String key) { 464 Iterator<String> parts = RANGE.split(key).iterator(); 465 start = Double.parseDouble(parts.next()); 466 end = Double.parseDouble(parts.next()); 467 isInt = !key.contains("."); 468 if (parts.hasNext()) { 469 throw new IllegalArgumentException("Range must be of form <int>~<int>"); 470 } 471 } 472 of(String key)473 public static RangeMatchValue of(String key) { 474 return new RangeMatchValue(key); 475 } 476 477 @Override is(String item)478 public boolean is(String item) { 479 if (isInt && item.contains(".")) { 480 return false; 481 } 482 double value; 483 try { 484 value = Double.parseDouble(item); 485 } catch (NumberFormatException e) { 486 return false; 487 } 488 return start <= value && value <= end; 489 } 490 @Override getSample()491 public String getSample() { 492 return String.valueOf((int)(start + end)/2); 493 } 494 } 495 496 static final Splitter LIST = Splitter.on(", ").trimResults(); 497 static final Splitter SPLIT_SPACE_OR_COMMA = Splitter.on(Pattern.compile("[, ]")).omitEmptyStrings().trimResults(); 498 499 static public class LiteralMatchValue extends MatchValue { 500 private final Set<String> items; 501 502 @Override getName()503 public String getName() { 504 return "literal/" + Joiner.on(", ").join(items); 505 } 506 LiteralMatchValue(String key)507 private LiteralMatchValue(String key) { 508 items = ImmutableSet.copyOf(LIST.splitToList(key)); 509 } 510 of(String key)511 public static LiteralMatchValue of(String key) { 512 return new LiteralMatchValue(key); 513 } 514 515 @Override is(String item)516 public boolean is(String item) { 517 return items.contains(item); 518 } 519 520 @Override getSample()521 public String getSample() { 522 return items.iterator().next(); 523 } 524 } 525 526 static public class RegexMatchValue extends MatchValue { 527 private final Pattern pattern; 528 529 @Override getName()530 public String getName() { 531 return "regex/" + pattern; 532 } 533 RegexMatchValue(String key)534 private RegexMatchValue(String key) { 535 pattern = Pattern.compile(key); 536 } 537 of(String key)538 public static RegexMatchValue of(String key) { 539 return new RegexMatchValue(key); 540 } 541 542 @Override is(String item)543 public boolean is(String item) { 544 return pattern.matcher(item).matches(); 545 } 546 } 547 548 static public class VersionMatchValue extends MatchValue { 549 550 @Override getName()551 public String getName() { 552 return "version"; 553 } 554 VersionMatchValue(String key)555 private VersionMatchValue(String key) { 556 } 557 of(String key)558 public static VersionMatchValue of(String key) { 559 if (key != null) { 560 throw new IllegalArgumentException("No parameter allowed"); 561 } 562 return new VersionMatchValue(key); 563 } 564 565 @Override is(String item)566 public boolean is(String item) { 567 try { 568 VersionInfo.getInstance(item); 569 } catch (Exception e) { 570 return false; 571 } 572 return true; 573 } 574 } 575 576 static public class MetazoneMatchValue extends MatchValue { 577 private Set<String> valid; 578 579 @Override getName()580 public String getName() { 581 return "metazone"; 582 } 583 of(String key)584 public static MetazoneMatchValue of(String key) { 585 if (key != null) { 586 throw new IllegalArgumentException("No parameter allowed"); 587 } 588 return new MetazoneMatchValue(); 589 } 590 591 @Override is(String item)592 public synchronized boolean is(String item) { 593 // must lazy-eval 594 if (valid == null) { 595 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(); 596 valid = sdi.getAllMetazones(); 597 } 598 return valid.contains(item); 599 } 600 } 601 602 static public class AnyMatchValue extends MatchValue { 603 final String key; 604 AnyMatchValue(String key)605 public AnyMatchValue(String key) { 606 this.key = key; 607 } 608 609 @Override getName()610 public String getName() { 611 return "any" + (key == null ? "" : "/" + key); 612 } 613 of(String key)614 public static AnyMatchValue of(String key) { 615 return new AnyMatchValue(key); 616 } 617 618 @Override is(String item)619 public boolean is(String item) { 620 return true; 621 } 622 } 623 624 static final Splitter SPACE_SPLITTER = Splitter.on(' ').omitEmptyStrings(); 625 626 static public class SetMatchValue extends MatchValue { 627 final MatchValue subtest; 628 SetMatchValue(MatchValue subtest)629 public SetMatchValue(MatchValue subtest) { 630 this.subtest = subtest; 631 } 632 633 @Override getName()634 public String getName() { 635 return "set/"+subtest.getName(); 636 } 637 of(String key)638 public static SetMatchValue of(String key) { 639 return new SetMatchValue(MatchValue.of(key)); 640 } 641 642 @Override is(String items)643 public boolean is(String items) { 644 List<String> splitItems = SPACE_SPLITTER.splitToList(items); 645 if( (new HashSet<>(splitItems)).size() != splitItems.size() ) { 646 throw new IllegalArgumentException("Set contains duplicates: " + items); 647 } 648 return and(subtest, splitItems); 649 } 650 651 @Override getSample()652 public String getSample() { 653 return subtest.getSample(); 654 } 655 } 656 657 static final Splitter BARS_SPLITTER = Splitter.on("||").omitEmptyStrings(); 658 659 static public class OrMatchValue extends MatchValue { 660 final List<MatchValue> subtests; 661 OrMatchValue(Iterator<MatchValue> iterator)662 private OrMatchValue(Iterator<MatchValue> iterator) { 663 this.subtests = ImmutableList.copyOf(iterator); 664 } 665 666 @Override getName()667 public String getName() { 668 return "or/"+ Joiner.on("||").join(subtests); 669 } 670 of(String key)671 public static OrMatchValue of(String key) { 672 return new OrMatchValue(BARS_SPLITTER.splitToList(key) 673 .stream() 674 .map(k -> MatchValue.of(k)) 675 .iterator()); 676 } 677 678 @Override is(String item)679 public boolean is(String item) { 680 for (MatchValue subtest : subtests) { 681 if (subtest.is(item)) { 682 return true; 683 } 684 } 685 return false; 686 } 687 @Override getSample()688 public String getSample() { 689 for (MatchValue subtest : subtests) { 690 String result = subtest.getSample(); 691 if (!result.equals(DEFAULT_SAMPLE)) { 692 return result; 693 } 694 } 695 return DEFAULT_SAMPLE; 696 } 697 } 698 699 static public class TimeMatchValue extends MatchValue { 700 final String sample; 701 final SimpleDateFormat formatter; 702 TimeMatchValue(String key)703 public TimeMatchValue(String key) { 704 formatter = new SimpleDateFormat(key,ULocale.ROOT); 705 sample = formatter.format(new Date()); 706 } 707 708 @Override getName()709 public String getName() { 710 return "time/" + formatter.toPattern(); 711 } 712 of(String key)713 public static TimeMatchValue of(String key) { 714 return new TimeMatchValue(key); 715 } 716 717 @Override is(String item)718 public boolean is(String item) { 719 try { 720 formatter.parse(item); 721 return true; 722 } catch (ParseException e) { 723 return false; 724 } 725 } 726 @Override getSample()727 public String getSample() { 728 return sample; 729 } 730 } 731 732 static public class UnicodeSpanMatchValue extends MatchValue { 733 final String sample; 734 final UnicodeSet uset; 735 UnicodeSpanMatchValue(String key)736 public UnicodeSpanMatchValue(String key) { 737 uset = new UnicodeSet(key); 738 sample = new StringBuilder().appendCodePoint(uset.getRangeStart(0)).toString(); 739 } 740 741 @Override getName()742 public String getName() { 743 return "unicodeset/" + uset; 744 } 745 of(String key)746 public static UnicodeSpanMatchValue of(String key) { 747 return new UnicodeSpanMatchValue(key); 748 } 749 750 @Override is(String item)751 public boolean is(String item) { 752 return uset.span(item, SpanCondition.CONTAINED) == item.length(); 753 } 754 755 @Override getSample()756 public String getSample() { 757 return sample; 758 } 759 } 760 761 } 762