• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.os;
18 
19 import android.util.proto.ProtoOutputStream;
20 
21 import java.util.Arrays;
22 
23 /**
24  * A simple pattern matcher, which is safe to use on untrusted data: it does
25  * not provide full reg-exp support, only simple globbing that can not be
26  * used maliciously.
27  */
28 public class PatternMatcher implements Parcelable {
29     /**
30      * Pattern type: the given pattern must exactly match the string it is
31      * tested against.
32      */
33     public static final int PATTERN_LITERAL = 0;
34 
35     /**
36      * Pattern type: the given pattern must match the
37      * beginning of the string it is tested against.
38      */
39     public static final int PATTERN_PREFIX = 1;
40 
41     /**
42      * Pattern type: the given pattern is interpreted with a
43      * simple glob syntax for matching against the string it is tested against.
44      * In this syntax, you can use the '*' character to match against zero or
45      * more occurrences of the character immediately before.  If the
46      * character before it is '.' it will match any character.  The character
47      * '\' can be used as an escape.  This essentially provides only the '*'
48      * wildcard part of a normal regexp.
49      */
50     public static final int PATTERN_SIMPLE_GLOB = 2;
51 
52     /**
53      * Pattern type: the given pattern is interpreted with a regular
54      * expression-like syntax for matching against the string it is tested
55      * against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
56      * with full support for character ranges and the not ({@code ^}) modifier.
57      * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
58      * for one-or-more and full range ({@code {...}}) support. This is a simple
59      * evaluation implementation in which matching is done against the pattern in
60      * real time with no backtracking support.
61      */
62     public static final int PATTERN_ADVANCED_GLOB = 3;
63 
64     /**
65      * Pattern type: the given pattern must match the
66      * end of the string it is tested against.
67      */
68     public static final int PATTERN_SUFFIX = 4;
69 
70     // token types for advanced matching
71     private static final int TOKEN_TYPE_LITERAL = 0;
72     private static final int TOKEN_TYPE_ANY = 1;
73     private static final int TOKEN_TYPE_SET = 2;
74     private static final int TOKEN_TYPE_INVERSE_SET = 3;
75 
76     // Return for no match
77     private static final int NO_MATCH = -1;
78 
79     private static final String TAG = "PatternMatcher";
80 
81     // Parsed placeholders for advanced patterns
82     private static final int PARSED_TOKEN_CHAR_SET_START = -1;
83     private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
84     private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
85     private static final int PARSED_TOKEN_CHAR_ANY = -4;
86     private static final int PARSED_MODIFIER_RANGE_START = -5;
87     private static final int PARSED_MODIFIER_RANGE_STOP = -6;
88     private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
89     private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
90 
91     private final String mPattern;
92     private final int mType;
93     private final int[] mParsedPattern;
94 
95 
96     private static final int MAX_PATTERN_STORAGE = 2048;
97     // workspace to use for building a parsed advanced pattern;
98     private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
99 
PatternMatcher(String pattern, int type)100     public PatternMatcher(String pattern, int type) {
101         mPattern = pattern;
102         mType = type;
103         if (mType == PATTERN_ADVANCED_GLOB) {
104             mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
105         } else {
106             mParsedPattern = null;
107         }
108     }
109 
getPath()110     public final String getPath() {
111         return mPattern;
112     }
113 
getType()114     public final int getType() {
115         return mType;
116     }
117 
match(String str)118     public boolean match(String str) {
119         return matchPattern(str, mPattern, mParsedPattern, mType);
120     }
121 
toString()122     public String toString() {
123         String type = "? ";
124         switch (mType) {
125             case PATTERN_LITERAL:
126                 type = "LITERAL: ";
127                 break;
128             case PATTERN_PREFIX:
129                 type = "PREFIX: ";
130                 break;
131             case PATTERN_SIMPLE_GLOB:
132                 type = "GLOB: ";
133                 break;
134             case PATTERN_ADVANCED_GLOB:
135                 type = "ADVANCED: ";
136                 break;
137             case PATTERN_SUFFIX:
138                 type = "SUFFIX: ";
139                 break;
140         }
141         return "PatternMatcher{" + type + mPattern + "}";
142     }
143 
144     /** @hide */
dumpDebug(ProtoOutputStream proto, long fieldId)145     public void dumpDebug(ProtoOutputStream proto, long fieldId) {
146         long token = proto.start(fieldId);
147         proto.write(PatternMatcherProto.PATTERN, mPattern);
148         proto.write(PatternMatcherProto.TYPE, mType);
149         // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to
150         // match the current data structure.
151         proto.end(token);
152     }
153 
describeContents()154     public int describeContents() {
155         return 0;
156     }
157 
writeToParcel(Parcel dest, int flags)158     public void writeToParcel(Parcel dest, int flags) {
159         dest.writeString(mPattern);
160         dest.writeInt(mType);
161         dest.writeIntArray(mParsedPattern);
162     }
163 
PatternMatcher(Parcel src)164     public PatternMatcher(Parcel src) {
165         mPattern = src.readString();
166         mType = src.readInt();
167         mParsedPattern = src.createIntArray();
168     }
169 
170     public static final @android.annotation.NonNull Parcelable.Creator<PatternMatcher> CREATOR
171             = new Parcelable.Creator<PatternMatcher>() {
172         public PatternMatcher createFromParcel(Parcel source) {
173             return new PatternMatcher(source);
174         }
175 
176         public PatternMatcher[] newArray(int size) {
177             return new PatternMatcher[size];
178         }
179     };
180 
matchPattern(String match, String pattern, int[] parsedPattern, int type)181     static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
182         if (match == null) return false;
183         if (type == PATTERN_LITERAL) {
184             return pattern.equals(match);
185         } if (type == PATTERN_PREFIX) {
186             return match.startsWith(pattern);
187         } else if (type == PATTERN_SIMPLE_GLOB) {
188             return matchGlobPattern(pattern, match);
189         } else if (type == PATTERN_ADVANCED_GLOB) {
190             return matchAdvancedPattern(parsedPattern, match);
191         } else if (type == PATTERN_SUFFIX) {
192             return match.endsWith(pattern);
193         }
194         return false;
195     }
196 
matchGlobPattern(String pattern, String match)197     static boolean matchGlobPattern(String pattern, String match) {
198         final int NP = pattern.length();
199         if (NP <= 0) {
200             return match.length() <= 0;
201         }
202         final int NM = match.length();
203         int ip = 0, im = 0;
204         char nextChar = pattern.charAt(0);
205         while ((ip<NP) && (im<NM)) {
206             char c = nextChar;
207             ip++;
208             nextChar = ip < NP ? pattern.charAt(ip) : 0;
209             final boolean escaped = (c == '\\');
210             if (escaped) {
211                 c = nextChar;
212                 ip++;
213                 nextChar = ip < NP ? pattern.charAt(ip) : 0;
214             }
215             if (nextChar == '*') {
216                 if (!escaped && c == '.') {
217                     if (ip >= (NP-1)) {
218                         // at the end with a pattern match, so
219                         // all is good without checking!
220                         return true;
221                     }
222                     ip++;
223                     nextChar = pattern.charAt(ip);
224                     // Consume everything until the next character in the
225                     // pattern is found.
226                     if (nextChar == '\\') {
227                         ip++;
228                         nextChar = ip < NP ? pattern.charAt(ip) : 0;
229                     }
230                     do {
231                         if (match.charAt(im) == nextChar) {
232                             break;
233                         }
234                         im++;
235                     } while (im < NM);
236                     if (im == NM) {
237                         // Whoops, the next character in the pattern didn't
238                         // exist in the match.
239                         return false;
240                     }
241                     ip++;
242                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
243                     im++;
244                 } else {
245                     // Consume only characters matching the one before '*'.
246                     do {
247                         if (match.charAt(im) != c) {
248                             break;
249                         }
250                         im++;
251                     } while (im < NM);
252                     ip++;
253                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
254                 }
255             } else {
256                 if (c != '.' && match.charAt(im) != c) return false;
257                 im++;
258             }
259         }
260 
261         if (ip >= NP && im >= NM) {
262             // Reached the end of both strings, all is good!
263             return true;
264         }
265 
266         // One last check: we may have finished the match string, but still
267         // have a '.*' at the end of the pattern, which should still count
268         // as a match.
269         if (ip == NP-2 && pattern.charAt(ip) == '.'
270             && pattern.charAt(ip+1) == '*') {
271             return true;
272         }
273 
274         return false;
275     }
276 
277     /**
278      * Parses the advanced pattern and returns an integer array representation of it. The integer
279      * array treats each field as a character if positive and a unique token placeholder if
280      * negative. This method will throw on any pattern structure violations.
281      */
282     synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
283         int ip = 0;
284         final int LP = pattern.length();
285 
286         int it = 0;
287 
288         boolean inSet = false;
289         boolean inRange = false;
290         boolean inCharClass = false;
291 
292         boolean addToParsedPattern;
293 
294         while (ip < LP) {
295             if (it > MAX_PATTERN_STORAGE - 3) {
296                 throw new IllegalArgumentException("Pattern is too large!");
297             }
298 
299             char c = pattern.charAt(ip);
300             addToParsedPattern = false;
301 
302             switch (c) {
303                 case '[':
304                     if (inSet) {
305                         addToParsedPattern = true; // treat as literal or char class in set
306                     } else {
307                         if (pattern.charAt(ip + 1) == '^') {
308                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
309                             ip++; // skip over the '^'
310                         } else {
311                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
312                         }
313                         ip++; // move to the next pattern char
314                         inSet = true;
315                         continue;
316                     }
317                     break;
318                 case ']':
319                     if (!inSet) {
320                         addToParsedPattern = true; // treat as literal outside of set
321                     } else {
322                         int parsedToken = sParsedPatternScratch[it - 1];
323                         if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
324                             parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
325                             throw new IllegalArgumentException(
326                                     "You must define characters in a set.");
327                         }
328                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
329                         inSet = false;
330                         inCharClass = false;
331                     }
332                     break;
333                 case '{':
334                     if (!inSet) {
335                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
336                             throw new IllegalArgumentException("Modifier must follow a token.");
337                         }
338                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
339                         ip++;
340                         inRange = true;
341                     }
342                     break;
343                 case '}':
344                     if (inRange) { // only terminate the range if we're currently in one
345                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
346                         inRange = false;
347                     }
348                     break;
349                 case '*':
350                     if (!inSet) {
351                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
352                             throw new IllegalArgumentException("Modifier must follow a token.");
353                         }
354                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
355                     }
356                     break;
357                 case '+':
358                     if (!inSet) {
359                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
360                             throw new IllegalArgumentException("Modifier must follow a token.");
361                         }
362                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
363                     }
364                     break;
365                 case '.':
366                     if (!inSet) {
367                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
368                     }
369                     break;
370                 case '\\': // escape
371                     if (ip + 1 >= LP) {
372                         throw new IllegalArgumentException("Escape found at end of pattern!");
373                     }
374                     c = pattern.charAt(++ip);
375                     addToParsedPattern = true;
376                     break;
377                 default:
378                     addToParsedPattern = true;
379                     break;
380             }
381             if (inSet) {
382                 if (inCharClass) {
383                     sParsedPatternScratch[it++] = c;
384                     inCharClass = false;
385                 } else {
386                     // look forward for character class
387                     if (ip + 2 < LP
388                             && pattern.charAt(ip + 1) == '-'
389                             && pattern.charAt(ip + 2) != ']') {
390                         inCharClass = true;
391                         sParsedPatternScratch[it++] = c; // set first token as lower end of range
392                         ip++; // advance past dash
393                     } else { // literal
394                         sParsedPatternScratch[it++] = c; // set first token as literal
395                         sParsedPatternScratch[it++] = c; // set second set as literal
396                     }
397                 }
398             } else if (inRange) {
399                 int endOfSet = pattern.indexOf('}', ip);
400                 if (endOfSet < 0) {
401                     throw new IllegalArgumentException("Range not ended with '}'");
402                 }
403                 String rangeString = pattern.substring(ip, endOfSet);
404                 int commaIndex = rangeString.indexOf(',');
405                 try {
406                     final int rangeMin;
407                     final int rangeMax;
408                     if (commaIndex < 0) {
409                         int parsedRange = Integer.parseInt(rangeString);
410                         rangeMin = rangeMax = parsedRange;
411                     } else {
412                         rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
413                         if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
414                             rangeMax = Integer.MAX_VALUE;
415                         } else {
416                             rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
417                         }
418                     }
419                     if (rangeMin > rangeMax) {
420                         throw new IllegalArgumentException(
421                             "Range quantifier minimum is greater than maximum");
422                     }
423                     sParsedPatternScratch[it++] = rangeMin;
424                     sParsedPatternScratch[it++] = rangeMax;
425                 } catch (NumberFormatException e) {
426                     throw new IllegalArgumentException("Range number format incorrect", e);
427                 }
428                 ip = endOfSet;
429                 continue; // don't increment ip
430             } else if (addToParsedPattern) {
431                 sParsedPatternScratch[it++] = c;
432             }
433             ip++;
434         }
435         if (inSet) {
436             throw new IllegalArgumentException("Set was not terminated!");
437         }
438         return Arrays.copyOf(sParsedPatternScratch, it);
439     }
440 
441     private static boolean isParsedModifier(int parsedChar) {
442         return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
443                 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
444                 parsedChar == PARSED_MODIFIER_RANGE_STOP ||
445                 parsedChar == PARSED_MODIFIER_RANGE_START;
446     }
447 
448     static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
449 
450         // create indexes
451         int ip = 0, im = 0;
452 
453         // one-time length check
454         final int LP = parsedPattern.length, LM = match.length();
455 
456         // The current character being analyzed in the pattern
457         int patternChar;
458 
459         int tokenType;
460 
461         int charSetStart = 0, charSetEnd = 0;
462 
463         while (ip < LP) { // we still have content in the pattern
464 
465             patternChar = parsedPattern[ip];
466             // get the match type of the next verb
467 
468             switch (patternChar) {
469                 case PARSED_TOKEN_CHAR_ANY:
470                     tokenType = TOKEN_TYPE_ANY;
471                     ip++;
472                     break;
473                 case PARSED_TOKEN_CHAR_SET_START:
474                 case PARSED_TOKEN_CHAR_SET_INVERSE_START:
475                     tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
476                             ? TOKEN_TYPE_SET
477                             : TOKEN_TYPE_INVERSE_SET;
478                     charSetStart = ip + 1; // start from the char after the set start
479                     while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
480                     charSetEnd = ip - 1; // we're on the set stop, end is the previous
481                     ip++; // move the pointer to the next pattern entry
482                     break;
483                 default:
484                     charSetStart = ip;
485                     tokenType = TOKEN_TYPE_LITERAL;
486                     ip++;
487                     break;
488             }
489 
490             final int minRepetition;
491             final int maxRepetition;
492 
493             // look for a match length modifier
494             if (ip >= LP) {
495                 minRepetition = maxRepetition = 1;
496             } else {
497                 patternChar = parsedPattern[ip];
498                 switch (patternChar) {
499                     case PARSED_MODIFIER_ZERO_OR_MORE:
500                         minRepetition = 0;
501                         maxRepetition = Integer.MAX_VALUE;
502                         ip++;
503                         break;
504                     case PARSED_MODIFIER_ONE_OR_MORE:
505                         minRepetition = 1;
506                         maxRepetition = Integer.MAX_VALUE;
507                         ip++;
508                         break;
509                     case PARSED_MODIFIER_RANGE_START:
510                         minRepetition = parsedPattern[++ip];
511                         maxRepetition = parsedPattern[++ip];
512                         ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
513                         break;
514                     default:
515                         minRepetition = maxRepetition = 1; // implied literal
516                         break;
517                 }
518             }
519             if (minRepetition > maxRepetition) {
520                 return false;
521             }
522 
523             // attempt to match as many characters as possible
524             int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
525                     parsedPattern, charSetStart, charSetEnd);
526 
527             // if we found a conflict, return false immediately
528             if (matched == NO_MATCH) {
529                 return false;
530             }
531 
532             // move the match pointer the number of characters matched
533             im += matched;
534         }
535         return ip >= LP && im >= LM; // have parsed entire string and regex
536     }
537 
538     private static int matchChars(String match, int im, final int lm, int tokenType,
539             int minRepetition, int maxRepetition, int[] parsedPattern,
540             int tokenStart, int tokenEnd) {
541         int matched = 0;
542 
543         while(matched < maxRepetition
544                 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
545                     tokenEnd)) {
546             matched++;
547         }
548 
549         return matched < minRepetition ? NO_MATCH : matched;
550     }
551 
552     private static boolean matchChar(String match, int im, final int lm, int tokenType,
553             int[] parsedPattern, int tokenStart, int tokenEnd) {
554         if (im >= lm) { // we've overrun the string, no match
555             return false;
556         }
557         switch (tokenType) {
558             case TOKEN_TYPE_ANY:
559                 return true;
560             case TOKEN_TYPE_SET:
561                 for (int i = tokenStart; i < tokenEnd; i += 2) {
562                     char matchChar = match.charAt(im);
563                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
564                         return true;
565                     }
566                 }
567                 return false;
568             case TOKEN_TYPE_INVERSE_SET:
569                 for (int i = tokenStart; i < tokenEnd; i += 2) {
570                     char matchChar = match.charAt(im);
571                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
572                         return false;
573                     }
574                 }
575                 return true;
576             case TOKEN_TYPE_LITERAL:
577                 return match.charAt(im) == parsedPattern[tokenStart];
578             default:
579                 return false;
580         }
581     }
582 }