• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.text.ParseException;
4 import java.util.Date;
5 import java.util.EnumSet;
6 import java.util.HashMap;
7 import java.util.HashSet;
8 import java.util.Iterator;
9 import java.util.List;
10 import java.util.Locale;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
16 import java.util.regex.Pattern;
17 
18 import org.unicode.cldr.util.StandardCodes.LstrType;
19 import org.unicode.cldr.util.Validity.Status;
20 
21 import com.google.common.base.Joiner;
22 import com.google.common.base.Splitter;
23 import com.google.common.collect.ImmutableList;
24 import com.google.common.collect.ImmutableMap;
25 import com.google.common.collect.ImmutableSet;
26 import com.ibm.icu.impl.Relation;
27 import com.ibm.icu.impl.Row;
28 import com.ibm.icu.impl.Row.R2;
29 import com.ibm.icu.text.SimpleDateFormat;
30 import com.ibm.icu.text.UnicodeSet;
31 import com.ibm.icu.text.UnicodeSet.SpanCondition;
32 import com.ibm.icu.util.ULocale;
33 import com.ibm.icu.util.VersionInfo;
34 
35 public abstract class MatchValue implements Predicate<String> {
36     public static final String DEFAULT_SAMPLE = "❓";
37 
38     @Override
is(String item)39     public abstract boolean is(String item);
getName()40     public abstract String getName();
41 
getSample()42     public String getSample() {
43         return DEFAULT_SAMPLE;
44     }
45 
46     @Override
toString()47     public String toString() {
48         return getName();
49     }
50 
of(String command)51     public static MatchValue of(String command) {
52         String originalArg = command;
53         int colonPos = command.indexOf('/');
54         String subargument = null;
55         if (colonPos >= 0) {
56             subargument = command.substring(colonPos + 1);
57             command = command.substring(0, colonPos);
58         }
59         try {
60             MatchValue result = null;
61             switch (command) {
62             case "any":
63                 result = AnyMatchValue.of(subargument);
64                 break;
65             case "set":
66                 result =  SetMatchValue.of(subargument);
67                 break;
68             case "validity":
69                 result =  ValidityMatchValue.of(subargument);
70                 break;
71             case "bcp47":
72                 result =  Bcp47MatchValue.of(subargument);
73                 break;
74             case "range":
75                 result =  RangeMatchValue.of(subargument);
76                 break;
77             case "literal":
78                 result =  LiteralMatchValue.of(subargument);
79                 break;
80             case "regex":
81                 result =  RegexMatchValue.of(subargument);
82                 break;
83             case "metazone":
84                 result =  MetazoneMatchValue.of(subargument);
85                 break;
86             case "version":
87                 result =  VersionMatchValue.of(subargument);
88                 break;
89             case "time":
90                 result =  TimeMatchValue.of(subargument);
91                 break;
92             case "or":
93                 result =  OrMatchValue.of(subargument);
94                 break;
95             case "unicodeset":
96                 result =  UnicodeSpanMatchValue.of(subargument);
97                 break;
98             default:
99                 throw new IllegalArgumentException("Illegal/Unimplemented match type: " + originalArg);
100             }
101             if (!originalArg.equals(result.getName())) {
102                 System.err.println("Non-standard form or error: " + originalArg + " ==> " + result.getName());
103             }
104             return result;
105         } catch (Exception e) {
106             throw new IllegalArgumentException("Problem with: " + originalArg, e);
107         }
108     }
109 
110     public static class LocaleMatchValue extends MatchValue {
111         private final Predicate<String> lang;
112         private final Predicate<String> script;
113         private final Predicate<String> region;
114         private final Predicate<String> variant;
115 
LocaleMatchValue()116         public LocaleMatchValue() {
117             this(null);
118         }
119 
LocaleMatchValue(Set<Status> statuses)120         public LocaleMatchValue(Set<Status> statuses) {
121             lang = new ValidityMatchValue(LstrType.language, statuses, false);
122             script = new ValidityMatchValue(LstrType.script, statuses, false);
123             region = new ValidityMatchValue(LstrType.region, statuses, false);
124             variant = new ValidityMatchValue(LstrType.variant, statuses, false);
125         }
126 
127         @Override
getName()128         public String getName() {
129             return "validity/locale";
130         }
131 
132         @Override
is(String item)133         public boolean is(String item) {
134             if (!item.contains("_")) {
135                 return lang.is(item);
136             }
137             LanguageTagParser ltp;
138             try {
139                 ltp = new LanguageTagParser().set(item);
140             } catch (Exception e) {
141                 return false;
142             }
143             return lang.is(ltp.getLanguage())
144                 && (ltp.getScript().isEmpty()
145                     || script.is(ltp.getScript()))
146                 && (ltp.getRegion().isEmpty()
147                     || region.is(ltp.getRegion()))
148                 && (ltp.getVariants().isEmpty()
149                     || and(variant,ltp.getVariants()))
150                 && ltp.getExtensions().isEmpty()
151                 && ltp.getLocaleExtensions().isEmpty()
152                 ;
153         }
154 
155         @Override
getSample()156         public String getSample() {
157             return "de";
158         }
159     }
160 
161     // TODO remove these if possible — ticket/10120
162     static final Set<String> SCRIPT_HACK = ImmutableSet.of(
163         "Afak", "Blis", "Cirt", "Cyrs", "Egyd", "Egyh", "Geok", "Inds", "Jurc", "Kpel", "Latf", "Latg",
164         "Loma", "Maya", "Moon", "Nkgb", "Phlv", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Visp", "Wole");
165     static final Set<String> VARIANT_HACK = ImmutableSet.of("POSIX", "REVISED", "SAAHO");
166 
167     /**
168      * Returns true if ALL items match the predicate
169      * @param <T>
170      * @param predicate predicate to check
171      * @param items items to be tested with the predicate
172      * @return
173      */
and(Predicate<T> predicate, Iterable<T> items)174     public static <T> boolean and(Predicate<T> predicate, Iterable<T> items) {
175         for (T item : items) {
176             if (!predicate.is(item)) {
177                 return false;
178             }
179         }
180         return true;
181     }
182 
183     /**
184      * Returns true if ANY items match the predicate
185      * @param <T>
186      * @param predicate predicate to check
187      * @param items items to be tested with the predicate
188      * @return
189      */
or(Predicate<T> predicate, Iterable<T> items)190     public static <T> boolean or(Predicate<T> predicate, Iterable<T> items) {
191         for (T item : items) {
192             if (predicate.is(item)) {
193                 return true;
194             }
195         }
196         return false;
197     }
198 
199     public static class EnumParser<T extends Enum> {
200         private final Class<T> aClass;
201         private final Set<T> all;
202 
EnumParser(Class<T> aClass)203         private EnumParser(Class<T> aClass) {
204             this.aClass = aClass;
205             all = ImmutableSet.copyOf(EnumSet.allOf(aClass));
206         }
207 
of(Class<T> aClass)208         public static <T> EnumParser of(Class<T> aClass) {
209             return new EnumParser(aClass);
210         }
211 
parse(String text)212         public Set<T> parse(String text) {
213             Set<T> statuses = EnumSet.noneOf(aClass);
214             boolean negative = text.startsWith("!");
215             if (negative) {
216                 text = text.substring(1);
217             }
218             for (String item : SPLIT_SPACE_OR_COMMA.split(text)) {
219                 statuses.add(getItem(item));
220             }
221             if (negative) {
222                 TreeSet<T> temp = new TreeSet<>(all);
223                 temp.removeAll(statuses);
224                 statuses = temp;
225             }
226             return ImmutableSet.copyOf(statuses);
227         }
getItem(String text)228         private T getItem(String text) {
229             try {
230                 return (T) aClass.getMethod("valueOf", String.class).invoke(null, text);
231             } catch (Exception e) {
232                 throw new IllegalArgumentException(e);
233             }
234         }
235 
format(Set<?> set)236         public String format(Set<?> set) {
237             if (set.size() > all.size()/2) {
238                 TreeSet<T> temp = new TreeSet<>(all);
239                 temp.removeAll(set);
240                 return "!" + Joiner.on(' ').join(temp);
241             } else {
242                 return Joiner.on(' ').join(set);
243             }
244         }
245 
isAll(Set<Status> statuses)246         public boolean isAll(Set<Status> statuses) {
247             return statuses.equals(all);
248         }
249     }
250 
251     static public class ValidityMatchValue extends MatchValue {
252         private final LstrType type;
253         private final boolean shortId;
254         private final Set<Status> statuses;
255         private static Map<String, Status> shortCodeToStatus;
256         private static final EnumParser<Status> enumParser = EnumParser.of(Status.class);
257 
258         @Override
getName()259         public String getName() {
260             return "validity/"
261                 + (shortId ? "short-" : "") + type.toString()
262                 + (enumParser.isAll(statuses) ? "" : "/" + enumParser.format(statuses));
263         }
264 
ValidityMatchValue(LstrType type)265         private ValidityMatchValue(LstrType type) {
266             this(type, null, false);
267         }
268 
ValidityMatchValue(LstrType type, Set<Status> statuses, boolean shortId)269         private ValidityMatchValue(LstrType type, Set<Status> statuses, boolean shortId) {
270             this.type = type;
271             if (type != LstrType.unit && shortId) {
272                 throw new IllegalArgumentException("short- not supported except for units");
273             }
274             this.shortId = shortId;
275             this.statuses = statuses == null ? EnumSet.allOf(Status.class) : ImmutableSet.copyOf(statuses);
276         }
277 
of(String typeName)278         public static MatchValue of(String typeName) {
279             if (typeName.equals("locale")) {
280                 return new LocaleMatchValue();
281             }
282             int slashPos = typeName.indexOf('/');
283             Set<Status> statuses = null;
284             if (slashPos > 0) {
285                 statuses = enumParser.parse(typeName.substring(slashPos+1));
286                 typeName = typeName.substring(0, slashPos);
287             }
288             boolean shortId = typeName.startsWith("short-");
289             if (shortId) {
290                 typeName = typeName.substring(6);
291             }
292             LstrType type = LstrType.fromString(typeName);
293             return new ValidityMatchValue(type, statuses, shortId);
294         }
295 
296         @Override
is(String item)297         public boolean is(String item) {
298             // TODO handle deprecated
299             switch(type) {
300             case script:
301                 if (SCRIPT_HACK.contains(item)) {
302                     return true;
303                 }
304                 break;
305             case variant:
306                 if (VARIANT_HACK.contains(item)) {
307                     return true;
308                 }
309                 item = item.toLowerCase(Locale.ROOT);
310                 break;
311             case language:
312                 item = item.equals("root") ? "und" : item;
313                 break;
314             case unit:
315                 if (shortId) {
316                     if (shortCodeToStatus == null) { // lazy evaluation to avoid circular dependencies
317                         Map<String, Status> _shortCodeToStatus = new TreeMap<>();
318                         for (Entry<String, Status> entry : Validity.getInstance().getCodeToStatus(LstrType.unit).entrySet()) {
319                             String key = entry.getKey();
320                             Status status = entry.getValue();
321                             final String shortKey = key.substring(key.indexOf('-')+1);
322                             Status old = _shortCodeToStatus.get(shortKey);
323                             if (old == null) {
324                                 _shortCodeToStatus.put(shortKey, status);
325 //                            } else {
326 //                                System.out.println("Skipping duplicate status: " + key + " old: " + old + " new: " + status);
327                             }
328                         }
329                         shortCodeToStatus = ImmutableMap.copyOf(_shortCodeToStatus);
330                     }
331                     final Status status = shortCodeToStatus.get(item);
332                     return status != null && statuses.contains(status);
333                 }
334             default: break;
335             }
336             final Status status = Validity.getInstance().getCodeToStatus(type).get(item);
337             return status != null && statuses.contains(status);
338         }
339 
340         @Override
getSample()341         public String getSample() {
342             return Validity.getInstance().getCodeToStatus(type).keySet().iterator().next();
343         }
344     }
345 
346     static public class Bcp47MatchValue extends MatchValue {
347         private final String key;
348         private Set<String> valid;
349 
350         @Override
getName()351         public String getName() {
352             return "bcp47/" + key;
353         }
354 
Bcp47MatchValue(String key)355         private Bcp47MatchValue(String key) {
356             this.key = key;
357         }
358 
of(String key)359         public static Bcp47MatchValue of(String key) {
360             return new Bcp47MatchValue(key);
361         }
362 
363         @Override
is(String item)364         public synchronized boolean is(String item) {
365             if (valid == null) { // must lazy-eval
366                 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
367                 Relation<String, String> keyToSubtypes = sdi.getBcp47Keys();
368                 Relation<R2<String, String>, String> keySubtypeToAliases = sdi.getBcp47Aliases();
369                 Map<String, String> aliasesToKey = new HashMap<>();
370                 for (String key : keyToSubtypes.keySet()) {
371                     Set<String> aliases = keySubtypeToAliases.get(Row.of(key, ""));
372                     if (aliases != null) {
373                         for (String alias : aliases) {
374                             aliasesToKey.put(alias, key);
375                         }
376                     }
377                 }
378                 Set<String> keyList;
379                 Set<String> subtypeList;
380                 // TODO handle deprecated
381                 // fix data to remove aliases, then narrow this
382                 switch(key) {
383                 case "anykey":
384                     keyList = keyToSubtypes.keySet();
385                     valid = new TreeSet<>(keyList);
386                     for (String keyItem : keyList) {
387                         addAliases(keySubtypeToAliases, keyItem, "");
388                     }
389                     valid.add("x"); // TODO: investigate adding to bcp47 data files
390                     break;
391                 case "anyvalue":
392                     valid = new TreeSet<>(keyToSubtypes.values());
393                     for (String keyItem : keyToSubtypes.keySet()) {
394                         subtypeList = keyToSubtypes.get(keyItem);
395 //                        if (subtypeList == null) {
396 //                            continue;
397 //                        }
398                         for (String subtypeItem : subtypeList) {
399                             addAliases(keySubtypeToAliases, keyItem, subtypeItem);
400                         }
401                     }
402                     valid.add("generic"); // TODO: investigate adding to bcp47 data files
403                     break;
404                 default:
405                     subtypeList = keyToSubtypes.get(key);
406                     if (subtypeList == null) {
407                         String key2 = aliasesToKey.get(key);
408                         if (key2 != null) {
409                             subtypeList = keyToSubtypes.get(key2);
410                         }
411                     }
412                     try {
413                         valid = new TreeSet<>(subtypeList);
414                     } catch (Exception e) {
415                         throw new IllegalArgumentException("Illegal keyValue: " + getName());
416                     }
417                     for (String subtypeItem : subtypeList) {
418                         addAliases(keySubtypeToAliases, key, subtypeItem);
419                     }
420                     switch(key) {
421                     case "ca":
422                         valid.add("generic"); // TODO: investigate adding to bcp47 data files
423                         break;
424                     }
425                     break;
426                 }
427                 valid = ImmutableSet.copyOf(valid);
428             }
429             //<key name="tz" description="Time zone key" alias="timezone">
430             //  <type name="adalv" description="Andorra" alias="Europe/Andorra"/>
431             // <key name="nu" description="Numbering system type key" alias="numbers">
432             //  <type name="adlm" description="Adlam digits" since="30"/>
433             return valid.contains(item);
434         }
435 
addAliases(Relation<R2<String, String>, String> keySubtypeToAliases, String keyItem, String subtype)436         private void addAliases(Relation<R2<String, String>, String> keySubtypeToAliases, String keyItem, String subtype) {
437             Set<String> aliases = keySubtypeToAliases.get(Row.of(keyItem, subtype));
438             if (aliases != null && !aliases.isEmpty()) {
439                 valid.addAll(aliases);
440             }
441         }
442         @Override
getSample()443         public String getSample() {
444             is("X"); // force load data
445             return valid == null ? "XX"
446                 : valid.iterator().next();
447         }
448     }
449 
450     static final Splitter RANGE = Splitter.on('~').trimResults();
451 
452     // TODO: have Range that can be ints, doubles, or versions
453     static public class RangeMatchValue extends MatchValue {
454         private final double start;
455         private final double end;
456         private final boolean isInt;
457 
458         @Override
getName()459         public String getName() {
460             return "range/" + (isInt ? (long)start + "~" + (long)end : start + "~" + end);
461         }
462 
RangeMatchValue(String key)463         private RangeMatchValue(String key) {
464             Iterator<String> parts = RANGE.split(key).iterator();
465             start = Double.parseDouble(parts.next());
466             end = Double.parseDouble(parts.next());
467             isInt = !key.contains(".");
468             if (parts.hasNext()) {
469                 throw new IllegalArgumentException("Range must be of form <int>~<int>");
470             }
471         }
472 
of(String key)473         public static RangeMatchValue of(String key) {
474             return new RangeMatchValue(key);
475         }
476 
477         @Override
is(String item)478         public boolean is(String item) {
479             if (isInt && item.contains(".")) {
480                 return false;
481             }
482             double value;
483             try {
484                 value = Double.parseDouble(item);
485             } catch (NumberFormatException e) {
486                 return false;
487             }
488             return start <= value && value <= end;
489         }
490         @Override
getSample()491         public String getSample() {
492             return String.valueOf((int)(start + end)/2);
493         }
494     }
495 
496     static final Splitter LIST = Splitter.on(", ").trimResults();
497     static final Splitter SPLIT_SPACE_OR_COMMA = Splitter.on(Pattern.compile("[, ]")).omitEmptyStrings().trimResults();
498 
499     static public class LiteralMatchValue extends MatchValue {
500         private final Set<String> items;
501 
502         @Override
getName()503         public String getName() {
504             return "literal/" + Joiner.on(", ").join(items);
505         }
506 
LiteralMatchValue(String key)507         private LiteralMatchValue(String key) {
508             items = ImmutableSet.copyOf(LIST.splitToList(key));
509         }
510 
of(String key)511         public static LiteralMatchValue of(String key) {
512             return new LiteralMatchValue(key);
513         }
514 
515         @Override
is(String item)516         public boolean is(String item) {
517             return items.contains(item);
518         }
519 
520         @Override
getSample()521         public String getSample() {
522             return items.iterator().next();
523         }
524     }
525 
526     static public class RegexMatchValue extends MatchValue {
527         private final Pattern pattern;
528 
529         @Override
getName()530         public String getName() {
531             return "regex/" + pattern;
532         }
533 
RegexMatchValue(String key)534         private RegexMatchValue(String key) {
535             pattern = Pattern.compile(key);
536         }
537 
of(String key)538         public static RegexMatchValue of(String key) {
539             return new RegexMatchValue(key);
540         }
541 
542         @Override
is(String item)543         public boolean is(String item) {
544             return pattern.matcher(item).matches();
545         }
546     }
547 
548     static public class VersionMatchValue extends MatchValue {
549 
550         @Override
getName()551         public String getName() {
552             return "version";
553         }
554 
VersionMatchValue(String key)555         private VersionMatchValue(String key) {
556         }
557 
of(String key)558         public static VersionMatchValue of(String key) {
559             if (key != null) {
560                 throw new IllegalArgumentException("No parameter allowed");
561             }
562             return new VersionMatchValue(key);
563         }
564 
565         @Override
is(String item)566         public boolean is(String item) {
567             try {
568                 VersionInfo.getInstance(item);
569             } catch (Exception e) {
570                 return false;
571             }
572             return true;
573         }
574     }
575 
576     static public class MetazoneMatchValue extends MatchValue {
577         private Set<String> valid;
578 
579         @Override
getName()580         public String getName() {
581             return "metazone";
582         }
583 
of(String key)584         public static MetazoneMatchValue of(String key) {
585             if (key != null) {
586                 throw new IllegalArgumentException("No parameter allowed");
587             }
588             return new MetazoneMatchValue();
589         }
590 
591         @Override
is(String item)592         public synchronized boolean is(String item) {
593             // must lazy-eval
594             if (valid == null) {
595                 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
596                 valid = sdi.getAllMetazones();
597             }
598             return valid.contains(item);
599         }
600     }
601 
602     static public class AnyMatchValue extends MatchValue {
603         final String key;
604 
AnyMatchValue(String key)605         public AnyMatchValue(String key) {
606             this.key = key;
607         }
608 
609         @Override
getName()610         public String getName() {
611             return "any" + (key == null ? "" : "/" + key);
612         }
613 
of(String key)614         public static AnyMatchValue of(String key) {
615             return new AnyMatchValue(key);
616         }
617 
618         @Override
is(String item)619         public boolean is(String item) {
620             return true;
621         }
622     }
623 
624     static final Splitter SPACE_SPLITTER = Splitter.on(' ').omitEmptyStrings();
625 
626     static public class SetMatchValue extends MatchValue {
627         final MatchValue subtest;
628 
SetMatchValue(MatchValue subtest)629         public SetMatchValue(MatchValue subtest) {
630             this.subtest = subtest;
631         }
632 
633         @Override
getName()634         public String getName() {
635             return "set/"+subtest.getName();
636         }
637 
of(String key)638         public static SetMatchValue of(String key) {
639             return new SetMatchValue(MatchValue.of(key));
640         }
641 
642         @Override
is(String items)643         public  boolean is(String items) {
644             List<String> splitItems = SPACE_SPLITTER.splitToList(items);
645             if( (new HashSet<>(splitItems)).size() != splitItems.size() ) {
646                 throw new IllegalArgumentException("Set contains duplicates: " + items);
647             }
648             return and(subtest, splitItems);
649         }
650 
651         @Override
getSample()652         public String getSample() {
653             return subtest.getSample();
654         }
655     }
656 
657     static final Splitter BARS_SPLITTER = Splitter.on("||").omitEmptyStrings();
658 
659     static public class OrMatchValue extends MatchValue {
660         final List<MatchValue> subtests;
661 
OrMatchValue(Iterator<MatchValue> iterator)662         private OrMatchValue(Iterator<MatchValue> iterator) {
663             this.subtests = ImmutableList.copyOf(iterator);
664         }
665 
666         @Override
getName()667         public String getName() {
668             return "or/"+ Joiner.on("||").join(subtests);
669         }
670 
of(String key)671         public static OrMatchValue of(String key) {
672             return new OrMatchValue(BARS_SPLITTER.splitToList(key)
673                 .stream()
674                 .map(k -> MatchValue.of(k))
675                 .iterator());
676         }
677 
678         @Override
is(String item)679         public  boolean is(String item) {
680             for (MatchValue subtest : subtests) {
681                 if (subtest.is(item)) {
682                     return true;
683                 }
684             }
685             return false;
686         }
687         @Override
getSample()688         public String getSample() {
689             for (MatchValue subtest : subtests) {
690                 String result = subtest.getSample();
691                 if (!result.equals(DEFAULT_SAMPLE)) {
692                     return result;
693                 }
694             }
695             return DEFAULT_SAMPLE;
696         }
697     }
698 
699     static public class TimeMatchValue extends MatchValue {
700         final String sample;
701         final SimpleDateFormat formatter;
702 
TimeMatchValue(String key)703         public TimeMatchValue(String key) {
704             formatter = new SimpleDateFormat(key,ULocale.ROOT);
705             sample = formatter.format(new Date());
706         }
707 
708         @Override
getName()709         public String getName() {
710             return "time/" + formatter.toPattern();
711         }
712 
of(String key)713         public static TimeMatchValue of(String key) {
714             return new TimeMatchValue(key);
715         }
716 
717         @Override
is(String item)718         public  boolean is(String item) {
719             try {
720                 formatter.parse(item);
721                 return true;
722             } catch (ParseException e) {
723                 return false;
724             }
725         }
726         @Override
getSample()727         public String getSample() {
728             return sample;
729         }
730     }
731 
732     static public class UnicodeSpanMatchValue extends MatchValue {
733         final String sample;
734         final UnicodeSet uset;
735 
UnicodeSpanMatchValue(String key)736         public UnicodeSpanMatchValue(String key) {
737             uset = new UnicodeSet(key);
738             sample = new StringBuilder().appendCodePoint(uset.getRangeStart(0)).toString();
739         }
740 
741         @Override
getName()742         public String getName() {
743             return "unicodeset/" + uset;
744         }
745 
of(String key)746         public static UnicodeSpanMatchValue of(String key) {
747             return new UnicodeSpanMatchValue(key);
748         }
749 
750         @Override
is(String item)751         public  boolean is(String item) {
752             return uset.span(item, SpanCondition.CONTAINED) == item.length();
753         }
754 
755         @Override
getSample()756         public String getSample() {
757             return sample;
758         }
759     }
760 
761 }
762