• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.cldr.api;
4 
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static com.google.common.base.Preconditions.checkNotNull;
7 import static com.google.common.base.Preconditions.checkPositionIndex;
8 import static com.google.common.base.Preconditions.checkState;
9 import static com.google.common.collect.ImmutableMap.toImmutableMap;
10 import static org.unicode.cldr.api.AttributeKey.keyOf;
11 
12 import java.util.ArrayList;
13 import java.util.LinkedHashMap;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Optional;
17 import java.util.function.Predicate;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 
21 import com.google.common.collect.ImmutableList;
22 import com.google.common.collect.ImmutableMap;
23 
24 /**
25  * An immutable matcher for {@link CldrPath} instances.
26  *
27  * <p>A path matcher pattern looks like:
28  * <ul>
29  *     <li>{@code //ldml/path/prefix/element[@attribute="value"]}
30  *     <li>{@code //ldml/path/prefix/element[@attribute=*]}
31  *     <li>{@code //ldml/path/prefix/*[@attribute="value"]}
32  *     <li>{@code //ldml/path/prefix/element[@foo=*]/*[@bar="value"]}
33  * </ul>
34  * where element names and attribute values can be wildcards. No capturing of wildcard values is
35  * done, and currently wildcards are limited to the entire field (element name, or attribute value).
36  *
37  * <h2>Matching an exact path</h2>
38  * <pre>{@code
39  * private static final PathMatcher ALIAS =
40  *     PathMatcher.of("//supplementalData/likelySubtags/likelySubtag[@from=*][@to=*]");
41  *
42  * ...
43  *
44  * if (ALIAS.matches(path)) {
45  *     // Path represents a subtag alias.
46  * }
47  * }</pre>
48  *
49  * <h2>Matching path prefixes and hierarchical matching</h2>
50  * <pre>{@code
51  * private static final PathMatcher KEY = PathMatcher.of("//ldmlBCP47/keyword/key[@name=\"ca\"]");
52  * private static final PathMatcher TYPE = key.withSuffix("type[@name=*]");
53  *
54  * ...
55  *
56  * if (KEY.matchesPrefixOf(path)) {
57  *     // Once the KEY prefix is matched, we can just "locally" match the suffix.
58  *     if (TYPE.locallyMatches(path)) {
59  *         // Path is a key-type mapping.
60  *     }
61  * }
62  * }</pre>
63  */
64 public final class PathMatcher {
65     private static final Pattern ROOT_PATH_SPEC = Pattern.compile("//(\\w+)(/|$)");
66 
67     /**
68      * Parses the full or partial (prefix) path pattern into a matcher. Path patterns given to this
69      * method must start with {@code "//xxx/..."} where {@code xxx} is a known CLDR XML path prefix
70      * (such as {@code "ldml"}).
71      */
of(String pattern)72     public static PathMatcher of(String pattern) {
73         Matcher m = ROOT_PATH_SPEC.matcher(pattern);
74         checkArgument(m.lookingAt(), "invalid path pattern: %s", pattern);
75         // This throws IllegalArgumentException if the type isn't valid.
76         CldrDataType.forXmlName(m.group(1));
77         return new PathMatcher(null, parse(pattern.substring(2)));
78     }
79 
80     private final Optional<PathMatcher> parent;
81     private final ImmutableList<Predicate<CldrPath>> elementMatchers;
82     private final int totalElementCount;
83 
PathMatcher(PathMatcher parent, List<Predicate<CldrPath>> elementMatchers)84     private PathMatcher(PathMatcher parent, List<Predicate<CldrPath>> elementMatchers) {
85         this.parent = Optional.ofNullable(parent);
86         this.elementMatchers = ImmutableList.copyOf(elementMatchers);
87         this.totalElementCount =
88             elementMatchers.size() + (parent != null ? parent.totalElementCount : 0);
89     }
90 
91     /**
92      * Extends this matcher by the given path pattern suffix.
93      *
94      * <p>For example if we define:
95      * <pre>{@code
96      * private static final PathMatcher COLLATIONS = PathMatcher.of("//ldml/collations");
97      * private static final PathMatcher RULE = COLLATIONS.extendBy("collation/cr");
98      * }</pre>
99      * Then {@code RULE} would match paths like {@code "//ldml/collations/collation/cr"}.
100      */
withSuffix(String pattern)101     public PathMatcher withSuffix(String pattern) {
102         return new PathMatcher(this, parse(pattern));
103     }
104 
105     /**
106      * Returns whether this matcher fully matches the given path, including taking into account
107      * any parent matchers from which this matcher might have been extended.
108      */
matches(CldrPath path)109     public boolean matches(CldrPath path) {
110         if (!locallyMatches(path)) {
111             return false;
112         }
113         if (!parent.isPresent()) {
114             // No parent means this is a "root" matcher, so we've finished matching everything.
115             return true;
116         }
117         while (path.getLength() > totalElementCount - elementMatchers.size()) {
118             path = path.getParent();
119         }
120         return parent.get().locallyMatches(path);
121     }
122 
123     /**
124      * Returns whether this matcher matches a proper prefix of the given path, including taking
125      * into account any parent matchers from which this matcher might have been extended. Note that
126      * the matcher for {@code "//ldml/foo/bar"} is not considered a prefix match for the path
127      * {@code //ldml/foo/barbaz}, since partial matching of an element name is not allowed.
128      */
matchesPrefixOf(CldrPath path)129     public boolean matchesPrefixOf(CldrPath path) {
130         if (path.getLength() < totalElementCount) {
131             return false;
132         }
133         while (path.getLength() > totalElementCount) {
134             path = path.getParent();
135         }
136         return matches(path);
137     }
138 
139     /**
140      * Returns whether this matcher matches the given path only in relation to the pattern
141      * used to generate this matcher. This method is useful for nested visitation via {@link
142      * org.unicode.cldr.api.CldrData.PrefixVisitor PrefixVisitor} when path prefixes are visited
143      * before values.
144      *
145      * <p>This method assumes that any parent matchers from which this matcher was extended already
146      * match the path. The possible length of the parent matchers is taken into account however to
147      * ensure that the sub-sequence match for this matcher starts at the correct point in the path.
148      */
locallyMatches(CldrPath path)149     public boolean locallyMatches(CldrPath path) {
150         return (path.getLength() == totalElementCount)
151             && matchRegion(path, totalElementCount - elementMatchers.size());
152     }
153 
matchRegion(CldrPath path, int offset)154     private boolean matchRegion(CldrPath path, int offset) {
155         // offset is the path element corresponding the the "top most" element matcher, it
156         // must be in the range 0 ... (path.length() - elementMatchers.size()).
157         checkPositionIndex(offset, path.getLength() - elementMatchers.size());
158         // First jump over the path parents until we find the last matcher.
159         int matchPathLength = offset + elementMatchers.size();
160         while (path.getLength() > matchPathLength) {
161             path = path.getParent();
162         }
163         return matchForward(path, elementMatchers.size() - 1);
164     }
165 
matchForward(CldrPath path, int matcherIndex)166     private boolean matchForward(CldrPath path, int matcherIndex) {
167         if (matcherIndex < 0) {
168             return true;
169         }
170         return matchForward(path.getParent(), matcherIndex - 1)
171             && elementMatchers.get(matcherIndex).test(path);
172     }
173 
174     // --- Parsing of matcher path patterns ----
175 
176     // Make a new, non-interned, unique instance here which we can test by reference to
177     // determine if the argument is to be captured (needed as ImmutableMap prohibits null).
178     // DO NOT change this code to assign "*" as the value directly, it MUST be a new instance.
179     @SuppressWarnings("StringOperationCanBeSimplified")
180     private static final String WILDCARD = new String("*");
181 
182     private static final Pattern ELEMENT_START_REGEX =
183         Pattern.compile("(\\*|[-:\\w]+)(?:/|\\[|$)");
184     private static final Pattern ATTRIBUTE_REGEX =
185         Pattern.compile("\\[@([-:\\w]+)=(?:\\*|\"([^\"]*)\")]");
186 
187     // element := foo, foo[@bar="baz"], foo[@bar=*]
188     // pathspec := element{/element}*
parse(String pattern)189     private static List<Predicate<CldrPath>> parse(String pattern) {
190         List<Predicate<CldrPath>> specs = new ArrayList<>();
191         int pos = 0;
192         do {
193             pos = parse(pattern, pos, specs);
194         } while (pos >= 0);
195         return specs;
196     }
197 
198     // Return next start index or -1.
parse(String pattern, int pos, List<Predicate<CldrPath>> specs)199     private static int parse(String pattern, int pos, List<Predicate<CldrPath>> specs) {
200         Matcher m = ELEMENT_START_REGEX.matcher(pattern).region(pos, pattern.length());
201         checkArgument(m.lookingAt(), "invalid path pattern (index=%s): %s", pos, pattern);
202         String name = m.group(1);
203         Map<String, String> attributes = ImmutableMap.of();
204         pos = m.end(1);
205         if (pos < pattern.length() && pattern.charAt(pos) == '[') {
206             // We have attributes to add.
207             attributes = new LinkedHashMap<>();
208             do {
209                 m = ATTRIBUTE_REGEX.matcher(pattern).region(pos, pattern.length());
210                 checkArgument(m.lookingAt(),
211                     "invalid path pattern (index=%s): %s", pos, pattern);
212                 // Null if we matched the '*' wildcard.
213                 String value = m.group(2);
214                 attributes.put(m.group(1), value != null ? value : WILDCARD);
215                 pos = m.end();
216             } while (pos < pattern.length() && pattern.charAt(pos) == '[');
217         }
218         // Wildcard matching is less efficient because attribute keys cannot be made in advance, so
219         // since it's also very rare, we special case it.
220         Predicate<CldrPath> matcher = name.equals(WILDCARD)
221             ? new WildcardElementMatcher(attributes)::match
222             : new ElementMatcher(name, attributes)::match;
223         specs.add(matcher);
224         if (pos == pattern.length()) {
225             return -1;
226         }
227         checkState(pattern.charAt(pos) == '/',
228             "invalid path pattern (index=%s): %s", pos, pattern);
229         return pos + 1;
230     }
231 
232     // Matcher for path elements like "foo[@bar=*]" where the name is known in advance.
233     private static final class ElementMatcher {
234         private final String name;
235         private final ImmutableMap<AttributeKey, String> attributes;
236 
ElementMatcher(String name, Map<String, String> attributes)237         private ElementMatcher(String name, Map<String, String> attributes) {
238             this.name = checkNotNull(name);
239             this.attributes = attributes.entrySet().stream()
240                 .collect(toImmutableMap(e -> keyOf(name, e.getKey()), Map.Entry::getValue));
241         }
242 
243         @SuppressWarnings("StringEquality")
match(CldrPath path)244         boolean match(CldrPath path) {
245             if (!path.getName().equals(name)) {
246                 return false;
247             }
248             for (Map.Entry<AttributeKey, String> e : attributes.entrySet()) {
249                 String actual = path.get(e.getKey());
250                 if (actual == null) {
251                     return false;
252                 }
253                 String expected = e.getValue();
254                 // DO NOT change this to use expected.equals(WILDCARD).
255                 if (expected != WILDCARD && !expected.equals(actual)) {
256                     return false;
257                 }
258             }
259             return true;
260         }
261     }
262 
263     // Matcher for path elements like "*[@bar=*]", where the name isn't known until match time.
264     private static final class WildcardElementMatcher {
265         private final ImmutableMap<String, String> attributes;
266 
WildcardElementMatcher(Map<String, String> attributes)267         private WildcardElementMatcher(Map<String, String> attributes) {
268             this.attributes = ImmutableMap.copyOf(attributes);
269         }
270 
271         @SuppressWarnings("StringEquality")
match(CldrPath path)272         private boolean match(CldrPath path) {
273             // The wildcard matcher never fails due to the element name but must create new key
274             // instances every time matching occurs (because the key name is dynamic). Since this
275             // is rare, it's worth making into a separate case.
276             for (Map.Entry<String, String> attribute : attributes.entrySet()) {
277                 String actual = path.get(keyOf(path.getName(), attribute.getKey()));
278                 if (actual == null) {
279                     return false;
280                 }
281                 String expected = attribute.getValue();
282                 // DO NOT change this to use expected.equals(WILDCARD).
283                 if (expected != WILDCARD && !expected.equals(actual)) {
284                     return false;
285                 }
286             }
287             return true;
288         }
289     }
290 }
291