• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.os;
18 
19 import android.annotation.IntDef;
20 import android.util.Log;
21 import android.util.proto.ProtoOutputStream;
22 
23 import java.lang.annotation.Retention;
24 import java.lang.annotation.RetentionPolicy;
25 import java.util.Arrays;
26 
27 /**
28  * A simple pattern matcher, which is safe to use on untrusted data: it does
29  * not provide full reg-exp support, only simple globbing that can not be
30  * used maliciously.
31  */
32 @android.ravenwood.annotation.RavenwoodKeepWholeClass
33 public class PatternMatcher implements Parcelable {
34     /**
35      * Pattern type: the given pattern must exactly match the string it is
36      * tested against.
37      */
38     public static final int PATTERN_LITERAL = 0;
39 
40     /**
41      * Pattern type: the given pattern must match the
42      * beginning of the string it is tested against.
43      */
44     public static final int PATTERN_PREFIX = 1;
45 
46     /**
47      * Pattern type: the given pattern is interpreted with a
48      * simple glob syntax for matching against the string it is tested against.
49      * In this syntax, you can use the '*' character to match against zero or
50      * more occurrences of the character immediately before.  If the
51      * character before it is '.' it will match any character.  The character
52      * '\' can be used as an escape.  This essentially provides only the '*'
53      * wildcard part of a normal regexp.
54      */
55     public static final int PATTERN_SIMPLE_GLOB = 2;
56 
57     /**
58      * Pattern type: the given pattern is interpreted with a regular
59      * expression-like syntax for matching against the string it is tested
60      * against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
61      * with full support for character ranges and the not ({@code ^}) modifier.
62      * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
63      * for one-or-more and full range ({@code {...}}) support. This is a simple
64      * evaluation implementation in which matching is done against the pattern in
65      * real time with no backtracking support.
66      */
67     public static final int PATTERN_ADVANCED_GLOB = 3;
68 
69     /**
70      * Pattern type: the given pattern must match the
71      * end of the string it is tested against.
72      */
73     public static final int PATTERN_SUFFIX = 4;
74 
75     /** @hide */
76     @IntDef(value = {
77             PATTERN_LITERAL,
78             PATTERN_PREFIX,
79             PATTERN_SIMPLE_GLOB,
80             PATTERN_ADVANCED_GLOB,
81             PATTERN_SUFFIX,
82     })
83     @Retention(RetentionPolicy.SOURCE)
84     public @interface PatternType {}
85 
86     // token types for advanced matching
87     private static final int TOKEN_TYPE_LITERAL = 0;
88     private static final int TOKEN_TYPE_ANY = 1;
89     private static final int TOKEN_TYPE_SET = 2;
90     private static final int TOKEN_TYPE_INVERSE_SET = 3;
91 
92     // Return for no match
93     private static final int NO_MATCH = -1;
94 
95     private static final String TAG = "PatternMatcher";
96 
97     // Parsed placeholders for advanced patterns
98     private static final int PARSED_TOKEN_CHAR_SET_START = -1;
99     private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
100     private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
101     private static final int PARSED_TOKEN_CHAR_ANY = -4;
102     private static final int PARSED_MODIFIER_RANGE_START = -5;
103     private static final int PARSED_MODIFIER_RANGE_STOP = -6;
104     private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
105     private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
106 
107     private final String mPattern;
108     private final int mType;
109     private final int[] mParsedPattern;
110 
111 
112     private static final int MAX_PATTERN_STORAGE = 2048;
113     // workspace to use for building a parsed advanced pattern;
114     private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
115 
PatternMatcher(String pattern, int type)116     public PatternMatcher(String pattern, int type) {
117         mPattern = pattern;
118         mType = type;
119         if (mType == PATTERN_ADVANCED_GLOB) {
120             mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
121         } else {
122             mParsedPattern = null;
123         }
124     }
125 
getPath()126     public final String getPath() {
127         return mPattern;
128     }
129 
getType()130     public final int getType() {
131         return mType;
132     }
133 
match(String str)134     public boolean match(String str) {
135         return matchPattern(str, mPattern, mParsedPattern, mType);
136     }
137 
toString()138     public String toString() {
139         String type = "? ";
140         switch (mType) {
141             case PATTERN_LITERAL:
142                 type = "LITERAL: ";
143                 break;
144             case PATTERN_PREFIX:
145                 type = "PREFIX: ";
146                 break;
147             case PATTERN_SIMPLE_GLOB:
148                 type = "GLOB: ";
149                 break;
150             case PATTERN_ADVANCED_GLOB:
151                 type = "ADVANCED: ";
152                 break;
153             case PATTERN_SUFFIX:
154                 type = "SUFFIX: ";
155                 break;
156         }
157         return "PatternMatcher{" + type + mPattern + "}";
158     }
159 
160     /** @hide */
dumpDebug(ProtoOutputStream proto, long fieldId)161     public void dumpDebug(ProtoOutputStream proto, long fieldId) {
162         long token = proto.start(fieldId);
163         proto.write(PatternMatcherProto.PATTERN, mPattern);
164         proto.write(PatternMatcherProto.TYPE, mType);
165         // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to
166         // match the current data structure.
167         proto.end(token);
168     }
169 
170     /**
171      * Perform a check on the matcher for the pattern type of {@link #PATTERN_ADVANCED_GLOB}.
172      * Return true if it passed.
173      * @hide
174      */
check()175     public boolean check() {
176         try {
177             if (mType == PATTERN_ADVANCED_GLOB) {
178                 return Arrays.equals(mParsedPattern, parseAndVerifyAdvancedPattern(mPattern));
179             }
180         } catch (IllegalArgumentException e) {
181             Log.w(TAG, "Failed to verify advanced pattern: " + e.getMessage());
182             return false;
183         }
184         return true;
185     }
186 
describeContents()187     public int describeContents() {
188         return 0;
189     }
190 
writeToParcel(Parcel dest, int flags)191     public void writeToParcel(Parcel dest, int flags) {
192         dest.writeString(mPattern);
193         dest.writeInt(mType);
194         dest.writeIntArray(mParsedPattern);
195     }
196 
PatternMatcher(Parcel src)197     public PatternMatcher(Parcel src) {
198         mPattern = src.readString();
199         mType = src.readInt();
200         mParsedPattern = src.createIntArray();
201     }
202 
203     public static final @android.annotation.NonNull Parcelable.Creator<PatternMatcher> CREATOR
204             = new Parcelable.Creator<PatternMatcher>() {
205         public PatternMatcher createFromParcel(Parcel source) {
206             return new PatternMatcher(source);
207         }
208 
209         public PatternMatcher[] newArray(int size) {
210             return new PatternMatcher[size];
211         }
212     };
213 
matchPattern(String match, String pattern, int[] parsedPattern, int type)214     static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
215         if (match == null) return false;
216         if (type == PATTERN_LITERAL) {
217             return pattern.equals(match);
218         } if (type == PATTERN_PREFIX) {
219             return match.startsWith(pattern);
220         } else if (type == PATTERN_SIMPLE_GLOB) {
221             return matchGlobPattern(pattern, match);
222         } else if (type == PATTERN_ADVANCED_GLOB) {
223             return matchAdvancedPattern(parsedPattern, match);
224         } else if (type == PATTERN_SUFFIX) {
225             return match.endsWith(pattern);
226         }
227         return false;
228     }
229 
matchGlobPattern(String pattern, String match)230     static boolean matchGlobPattern(String pattern, String match) {
231         final int NP = pattern.length();
232         if (NP <= 0) {
233             return match.length() <= 0;
234         }
235         final int NM = match.length();
236         int ip = 0, im = 0;
237         char nextChar = pattern.charAt(0);
238         while ((ip<NP) && (im<NM)) {
239             char c = nextChar;
240             ip++;
241             nextChar = ip < NP ? pattern.charAt(ip) : 0;
242             final boolean escaped = (c == '\\');
243             if (escaped) {
244                 c = nextChar;
245                 ip++;
246                 nextChar = ip < NP ? pattern.charAt(ip) : 0;
247             }
248             if (nextChar == '*') {
249                 if (!escaped && c == '.') {
250                     if (ip >= (NP-1)) {
251                         // at the end with a pattern match, so
252                         // all is good without checking!
253                         return true;
254                     }
255                     ip++;
256                     nextChar = pattern.charAt(ip);
257                     // Consume everything until the next character in the
258                     // pattern is found.
259                     if (nextChar == '\\') {
260                         ip++;
261                         nextChar = ip < NP ? pattern.charAt(ip) : 0;
262                     }
263                     do {
264                         if (match.charAt(im) == nextChar) {
265                             break;
266                         }
267                         im++;
268                     } while (im < NM);
269                     if (im == NM) {
270                         // Whoops, the next character in the pattern didn't
271                         // exist in the match.
272                         return false;
273                     }
274                     ip++;
275                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
276                     im++;
277                 } else {
278                     // Consume only characters matching the one before '*'.
279                     do {
280                         if (match.charAt(im) != c) {
281                             break;
282                         }
283                         im++;
284                     } while (im < NM);
285                     ip++;
286                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
287                 }
288             } else {
289                 if (c != '.' && match.charAt(im) != c) return false;
290                 im++;
291             }
292         }
293 
294         if (ip >= NP && im >= NM) {
295             // Reached the end of both strings, all is good!
296             return true;
297         }
298 
299         // One last check: we may have finished the match string, but still
300         // have a '.*' at the end of the pattern, which should still count
301         // as a match.
302         if (ip == NP-2 && pattern.charAt(ip) == '.'
303             && pattern.charAt(ip+1) == '*') {
304             return true;
305         }
306 
307         return false;
308     }
309 
310     /**
311      * Parses the advanced pattern and returns an integer array representation of it. The integer
312      * array treats each field as a character if positive and a unique token placeholder if
313      * negative. This method will throw on any pattern structure violations.
314      */
315     synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
316         int ip = 0;
317         final int LP = pattern.length();
318 
319         int it = 0;
320 
321         boolean inSet = false;
322         boolean inRange = false;
323         boolean inCharClass = false;
324 
325         boolean addToParsedPattern;
326 
327         while (ip < LP) {
328             if (it > MAX_PATTERN_STORAGE - 3) {
329                 throw new IllegalArgumentException("Pattern is too large!");
330             }
331 
332             char c = pattern.charAt(ip);
333             addToParsedPattern = false;
334 
335             switch (c) {
336                 case '[':
337                     if (inSet) {
338                         addToParsedPattern = true; // treat as literal or char class in set
339                     } else {
340                         if (pattern.charAt(ip + 1) == '^') {
341                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
342                             ip++; // skip over the '^'
343                         } else {
344                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
345                         }
346                         ip++; // move to the next pattern char
347                         inSet = true;
348                         continue;
349                     }
350                     break;
351                 case ']':
352                     if (!inSet) {
353                         addToParsedPattern = true; // treat as literal outside of set
354                     } else {
355                         int parsedToken = sParsedPatternScratch[it - 1];
356                         if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
357                             parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
358                             throw new IllegalArgumentException(
359                                     "You must define characters in a set.");
360                         }
361                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
362                         inSet = false;
363                         inCharClass = false;
364                     }
365                     break;
366                 case '{':
367                     if (!inSet) {
368                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
369                             throw new IllegalArgumentException("Modifier must follow a token.");
370                         }
371                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
372                         ip++;
373                         inRange = true;
374                     }
375                     break;
376                 case '}':
377                     if (inRange) { // only terminate the range if we're currently in one
378                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
379                         inRange = false;
380                     }
381                     break;
382                 case '*':
383                     if (!inSet) {
384                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
385                             throw new IllegalArgumentException("Modifier must follow a token.");
386                         }
387                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
388                     }
389                     break;
390                 case '+':
391                     if (!inSet) {
392                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
393                             throw new IllegalArgumentException("Modifier must follow a token.");
394                         }
395                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
396                     }
397                     break;
398                 case '.':
399                     if (!inSet) {
400                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
401                     }
402                     break;
403                 case '\\': // escape
404                     if (ip + 1 >= LP) {
405                         throw new IllegalArgumentException("Escape found at end of pattern!");
406                     }
407                     c = pattern.charAt(++ip);
408                     addToParsedPattern = true;
409                     break;
410                 default:
411                     addToParsedPattern = true;
412                     break;
413             }
414             if (inSet) {
415                 if (inCharClass) {
416                     sParsedPatternScratch[it++] = c;
417                     inCharClass = false;
418                 } else {
419                     // look forward for character class
420                     if (ip + 2 < LP
421                             && pattern.charAt(ip + 1) == '-'
422                             && pattern.charAt(ip + 2) != ']') {
423                         inCharClass = true;
424                         sParsedPatternScratch[it++] = c; // set first token as lower end of range
425                         ip++; // advance past dash
426                     } else { // literal
427                         sParsedPatternScratch[it++] = c; // set first token as literal
428                         sParsedPatternScratch[it++] = c; // set second set as literal
429                     }
430                 }
431             } else if (inRange) {
432                 int endOfSet = pattern.indexOf('}', ip);
433                 if (endOfSet < 0) {
434                     throw new IllegalArgumentException("Range not ended with '}'");
435                 }
436                 String rangeString = pattern.substring(ip, endOfSet);
437                 int commaIndex = rangeString.indexOf(',');
438                 try {
439                     final int rangeMin;
440                     final int rangeMax;
441                     if (commaIndex < 0) {
442                         int parsedRange = Integer.parseInt(rangeString);
443                         rangeMin = rangeMax = parsedRange;
444                     } else {
445                         rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
446                         if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
447                             rangeMax = Integer.MAX_VALUE;
448                         } else {
449                             rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
450                         }
451                     }
452                     if (rangeMin > rangeMax) {
453                         throw new IllegalArgumentException(
454                             "Range quantifier minimum is greater than maximum");
455                     }
456                     sParsedPatternScratch[it++] = rangeMin;
457                     sParsedPatternScratch[it++] = rangeMax;
458                 } catch (NumberFormatException e) {
459                     throw new IllegalArgumentException("Range number format incorrect", e);
460                 }
461                 ip = endOfSet;
462                 continue; // don't increment ip
463             } else if (addToParsedPattern) {
464                 sParsedPatternScratch[it++] = c;
465             }
466             ip++;
467         }
468         if (inSet) {
469             throw new IllegalArgumentException("Set was not terminated!");
470         }
471         return Arrays.copyOf(sParsedPatternScratch, it);
472     }
473 
474     private static boolean isParsedModifier(int parsedChar) {
475         return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
476                 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
477                 parsedChar == PARSED_MODIFIER_RANGE_STOP ||
478                 parsedChar == PARSED_MODIFIER_RANGE_START;
479     }
480 
481     static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
482 
483         // create indexes
484         int ip = 0, im = 0;
485 
486         // one-time length check
487         final int LP = parsedPattern.length, LM = match.length();
488 
489         // The current character being analyzed in the pattern
490         int patternChar;
491 
492         int tokenType;
493 
494         int charSetStart = 0, charSetEnd = 0;
495 
496         while (ip < LP) { // we still have content in the pattern
497 
498             patternChar = parsedPattern[ip];
499             // get the match type of the next verb
500 
501             switch (patternChar) {
502                 case PARSED_TOKEN_CHAR_ANY:
503                     tokenType = TOKEN_TYPE_ANY;
504                     ip++;
505                     break;
506                 case PARSED_TOKEN_CHAR_SET_START:
507                 case PARSED_TOKEN_CHAR_SET_INVERSE_START:
508                     tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
509                             ? TOKEN_TYPE_SET
510                             : TOKEN_TYPE_INVERSE_SET;
511                     charSetStart = ip + 1; // start from the char after the set start
512                     while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
513                     charSetEnd = ip - 1; // we're on the set stop, end is the previous
514                     ip++; // move the pointer to the next pattern entry
515                     break;
516                 default:
517                     charSetStart = ip;
518                     tokenType = TOKEN_TYPE_LITERAL;
519                     ip++;
520                     break;
521             }
522 
523             final int minRepetition;
524             final int maxRepetition;
525 
526             // look for a match length modifier
527             if (ip >= LP) {
528                 minRepetition = maxRepetition = 1;
529             } else {
530                 patternChar = parsedPattern[ip];
531                 switch (patternChar) {
532                     case PARSED_MODIFIER_ZERO_OR_MORE:
533                         minRepetition = 0;
534                         maxRepetition = Integer.MAX_VALUE;
535                         ip++;
536                         break;
537                     case PARSED_MODIFIER_ONE_OR_MORE:
538                         minRepetition = 1;
539                         maxRepetition = Integer.MAX_VALUE;
540                         ip++;
541                         break;
542                     case PARSED_MODIFIER_RANGE_START:
543                         minRepetition = parsedPattern[++ip];
544                         maxRepetition = parsedPattern[++ip];
545                         ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
546                         break;
547                     default:
548                         minRepetition = maxRepetition = 1; // implied literal
549                         break;
550                 }
551             }
552             if (minRepetition > maxRepetition) {
553                 return false;
554             }
555 
556             // attempt to match as many characters as possible
557             int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
558                     parsedPattern, charSetStart, charSetEnd);
559 
560             // if we found a conflict, return false immediately
561             if (matched == NO_MATCH) {
562                 return false;
563             }
564 
565             // move the match pointer the number of characters matched
566             im += matched;
567         }
568         return ip >= LP && im >= LM; // have parsed entire string and regex
569     }
570 
571     private static int matchChars(String match, int im, final int lm, int tokenType,
572             int minRepetition, int maxRepetition, int[] parsedPattern,
573             int tokenStart, int tokenEnd) {
574         int matched = 0;
575 
576         while(matched < maxRepetition
577                 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
578                     tokenEnd)) {
579             matched++;
580         }
581 
582         return matched < minRepetition ? NO_MATCH : matched;
583     }
584 
585     private static boolean matchChar(String match, int im, final int lm, int tokenType,
586             int[] parsedPattern, int tokenStart, int tokenEnd) {
587         if (im >= lm) { // we've overrun the string, no match
588             return false;
589         }
590         switch (tokenType) {
591             case TOKEN_TYPE_ANY:
592                 return true;
593             case TOKEN_TYPE_SET:
594                 for (int i = tokenStart; i < tokenEnd; i += 2) {
595                     char matchChar = match.charAt(im);
596                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
597                         return true;
598                     }
599                 }
600                 return false;
601             case TOKEN_TYPE_INVERSE_SET:
602                 for (int i = tokenStart; i < tokenEnd; i += 2) {
603                     char matchChar = match.charAt(im);
604                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
605                         return false;
606                     }
607                 }
608                 return true;
609             case TOKEN_TYPE_LITERAL:
610                 return match.charAt(im) == parsedPattern[tokenStart];
611             default:
612                 return false;
613         }
614     }
615 }