1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.os; 18 19 import android.annotation.IntDef; 20 import android.util.Log; 21 import android.util.proto.ProtoOutputStream; 22 23 import java.lang.annotation.Retention; 24 import java.lang.annotation.RetentionPolicy; 25 import java.util.Arrays; 26 27 /** 28 * A simple pattern matcher, which is safe to use on untrusted data: it does 29 * not provide full reg-exp support, only simple globbing that can not be 30 * used maliciously. 31 */ 32 @android.ravenwood.annotation.RavenwoodKeepWholeClass 33 public class PatternMatcher implements Parcelable { 34 /** 35 * Pattern type: the given pattern must exactly match the string it is 36 * tested against. 37 */ 38 public static final int PATTERN_LITERAL = 0; 39 40 /** 41 * Pattern type: the given pattern must match the 42 * beginning of the string it is tested against. 43 */ 44 public static final int PATTERN_PREFIX = 1; 45 46 /** 47 * Pattern type: the given pattern is interpreted with a 48 * simple glob syntax for matching against the string it is tested against. 49 * In this syntax, you can use the '*' character to match against zero or 50 * more occurrences of the character immediately before. If the 51 * character before it is '.' it will match any character. The character 52 * '\' can be used as an escape. This essentially provides only the '*' 53 * wildcard part of a normal regexp. 54 */ 55 public static final int PATTERN_SIMPLE_GLOB = 2; 56 57 /** 58 * Pattern type: the given pattern is interpreted with a regular 59 * expression-like syntax for matching against the string it is tested 60 * against. Supported tokens include dot ({@code .}) and sets ({@code [...]}) 61 * with full support for character ranges and the not ({@code ^}) modifier. 62 * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +}) 63 * for one-or-more and full range ({@code {...}}) support. This is a simple 64 * evaluation implementation in which matching is done against the pattern in 65 * real time with no backtracking support. 66 */ 67 public static final int PATTERN_ADVANCED_GLOB = 3; 68 69 /** 70 * Pattern type: the given pattern must match the 71 * end of the string it is tested against. 72 */ 73 public static final int PATTERN_SUFFIX = 4; 74 75 /** @hide */ 76 @IntDef(value = { 77 PATTERN_LITERAL, 78 PATTERN_PREFIX, 79 PATTERN_SIMPLE_GLOB, 80 PATTERN_ADVANCED_GLOB, 81 PATTERN_SUFFIX, 82 }) 83 @Retention(RetentionPolicy.SOURCE) 84 public @interface PatternType {} 85 86 // token types for advanced matching 87 private static final int TOKEN_TYPE_LITERAL = 0; 88 private static final int TOKEN_TYPE_ANY = 1; 89 private static final int TOKEN_TYPE_SET = 2; 90 private static final int TOKEN_TYPE_INVERSE_SET = 3; 91 92 // Return for no match 93 private static final int NO_MATCH = -1; 94 95 private static final String TAG = "PatternMatcher"; 96 97 // Parsed placeholders for advanced patterns 98 private static final int PARSED_TOKEN_CHAR_SET_START = -1; 99 private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2; 100 private static final int PARSED_TOKEN_CHAR_SET_STOP = -3; 101 private static final int PARSED_TOKEN_CHAR_ANY = -4; 102 private static final int PARSED_MODIFIER_RANGE_START = -5; 103 private static final int PARSED_MODIFIER_RANGE_STOP = -6; 104 private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7; 105 private static final int PARSED_MODIFIER_ONE_OR_MORE = -8; 106 107 private final String mPattern; 108 private final int mType; 109 private final int[] mParsedPattern; 110 111 112 private static final int MAX_PATTERN_STORAGE = 2048; 113 // workspace to use for building a parsed advanced pattern; 114 private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE]; 115 PatternMatcher(String pattern, int type)116 public PatternMatcher(String pattern, int type) { 117 mPattern = pattern; 118 mType = type; 119 if (mType == PATTERN_ADVANCED_GLOB) { 120 mParsedPattern = parseAndVerifyAdvancedPattern(pattern); 121 } else { 122 mParsedPattern = null; 123 } 124 } 125 getPath()126 public final String getPath() { 127 return mPattern; 128 } 129 getType()130 public final int getType() { 131 return mType; 132 } 133 match(String str)134 public boolean match(String str) { 135 return matchPattern(str, mPattern, mParsedPattern, mType); 136 } 137 toString()138 public String toString() { 139 String type = "? "; 140 switch (mType) { 141 case PATTERN_LITERAL: 142 type = "LITERAL: "; 143 break; 144 case PATTERN_PREFIX: 145 type = "PREFIX: "; 146 break; 147 case PATTERN_SIMPLE_GLOB: 148 type = "GLOB: "; 149 break; 150 case PATTERN_ADVANCED_GLOB: 151 type = "ADVANCED: "; 152 break; 153 case PATTERN_SUFFIX: 154 type = "SUFFIX: "; 155 break; 156 } 157 return "PatternMatcher{" + type + mPattern + "}"; 158 } 159 160 /** @hide */ dumpDebug(ProtoOutputStream proto, long fieldId)161 public void dumpDebug(ProtoOutputStream proto, long fieldId) { 162 long token = proto.start(fieldId); 163 proto.write(PatternMatcherProto.PATTERN, mPattern); 164 proto.write(PatternMatcherProto.TYPE, mType); 165 // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to 166 // match the current data structure. 167 proto.end(token); 168 } 169 170 /** 171 * Perform a check on the matcher for the pattern type of {@link #PATTERN_ADVANCED_GLOB}. 172 * Return true if it passed. 173 * @hide 174 */ check()175 public boolean check() { 176 try { 177 if (mType == PATTERN_ADVANCED_GLOB) { 178 return Arrays.equals(mParsedPattern, parseAndVerifyAdvancedPattern(mPattern)); 179 } 180 } catch (IllegalArgumentException e) { 181 Log.w(TAG, "Failed to verify advanced pattern: " + e.getMessage()); 182 return false; 183 } 184 return true; 185 } 186 describeContents()187 public int describeContents() { 188 return 0; 189 } 190 writeToParcel(Parcel dest, int flags)191 public void writeToParcel(Parcel dest, int flags) { 192 dest.writeString(mPattern); 193 dest.writeInt(mType); 194 dest.writeIntArray(mParsedPattern); 195 } 196 PatternMatcher(Parcel src)197 public PatternMatcher(Parcel src) { 198 mPattern = src.readString(); 199 mType = src.readInt(); 200 mParsedPattern = src.createIntArray(); 201 } 202 203 public static final @android.annotation.NonNull Parcelable.Creator<PatternMatcher> CREATOR 204 = new Parcelable.Creator<PatternMatcher>() { 205 public PatternMatcher createFromParcel(Parcel source) { 206 return new PatternMatcher(source); 207 } 208 209 public PatternMatcher[] newArray(int size) { 210 return new PatternMatcher[size]; 211 } 212 }; 213 matchPattern(String match, String pattern, int[] parsedPattern, int type)214 static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) { 215 if (match == null) return false; 216 if (type == PATTERN_LITERAL) { 217 return pattern.equals(match); 218 } if (type == PATTERN_PREFIX) { 219 return match.startsWith(pattern); 220 } else if (type == PATTERN_SIMPLE_GLOB) { 221 return matchGlobPattern(pattern, match); 222 } else if (type == PATTERN_ADVANCED_GLOB) { 223 return matchAdvancedPattern(parsedPattern, match); 224 } else if (type == PATTERN_SUFFIX) { 225 return match.endsWith(pattern); 226 } 227 return false; 228 } 229 matchGlobPattern(String pattern, String match)230 static boolean matchGlobPattern(String pattern, String match) { 231 final int NP = pattern.length(); 232 if (NP <= 0) { 233 return match.length() <= 0; 234 } 235 final int NM = match.length(); 236 int ip = 0, im = 0; 237 char nextChar = pattern.charAt(0); 238 while ((ip<NP) && (im<NM)) { 239 char c = nextChar; 240 ip++; 241 nextChar = ip < NP ? pattern.charAt(ip) : 0; 242 final boolean escaped = (c == '\\'); 243 if (escaped) { 244 c = nextChar; 245 ip++; 246 nextChar = ip < NP ? pattern.charAt(ip) : 0; 247 } 248 if (nextChar == '*') { 249 if (!escaped && c == '.') { 250 if (ip >= (NP-1)) { 251 // at the end with a pattern match, so 252 // all is good without checking! 253 return true; 254 } 255 ip++; 256 nextChar = pattern.charAt(ip); 257 // Consume everything until the next character in the 258 // pattern is found. 259 if (nextChar == '\\') { 260 ip++; 261 nextChar = ip < NP ? pattern.charAt(ip) : 0; 262 } 263 do { 264 if (match.charAt(im) == nextChar) { 265 break; 266 } 267 im++; 268 } while (im < NM); 269 if (im == NM) { 270 // Whoops, the next character in the pattern didn't 271 // exist in the match. 272 return false; 273 } 274 ip++; 275 nextChar = ip < NP ? pattern.charAt(ip) : 0; 276 im++; 277 } else { 278 // Consume only characters matching the one before '*'. 279 do { 280 if (match.charAt(im) != c) { 281 break; 282 } 283 im++; 284 } while (im < NM); 285 ip++; 286 nextChar = ip < NP ? pattern.charAt(ip) : 0; 287 } 288 } else { 289 if (c != '.' && match.charAt(im) != c) return false; 290 im++; 291 } 292 } 293 294 if (ip >= NP && im >= NM) { 295 // Reached the end of both strings, all is good! 296 return true; 297 } 298 299 // One last check: we may have finished the match string, but still 300 // have a '.*' at the end of the pattern, which should still count 301 // as a match. 302 if (ip == NP-2 && pattern.charAt(ip) == '.' 303 && pattern.charAt(ip+1) == '*') { 304 return true; 305 } 306 307 return false; 308 } 309 310 /** 311 * Parses the advanced pattern and returns an integer array representation of it. The integer 312 * array treats each field as a character if positive and a unique token placeholder if 313 * negative. This method will throw on any pattern structure violations. 314 */ 315 synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) { 316 int ip = 0; 317 final int LP = pattern.length(); 318 319 int it = 0; 320 321 boolean inSet = false; 322 boolean inRange = false; 323 boolean inCharClass = false; 324 325 boolean addToParsedPattern; 326 327 while (ip < LP) { 328 if (it > MAX_PATTERN_STORAGE - 3) { 329 throw new IllegalArgumentException("Pattern is too large!"); 330 } 331 332 char c = pattern.charAt(ip); 333 addToParsedPattern = false; 334 335 switch (c) { 336 case '[': 337 if (inSet) { 338 addToParsedPattern = true; // treat as literal or char class in set 339 } else { 340 if (pattern.charAt(ip + 1) == '^') { 341 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START; 342 ip++; // skip over the '^' 343 } else { 344 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START; 345 } 346 ip++; // move to the next pattern char 347 inSet = true; 348 continue; 349 } 350 break; 351 case ']': 352 if (!inSet) { 353 addToParsedPattern = true; // treat as literal outside of set 354 } else { 355 int parsedToken = sParsedPatternScratch[it - 1]; 356 if (parsedToken == PARSED_TOKEN_CHAR_SET_START || 357 parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) { 358 throw new IllegalArgumentException( 359 "You must define characters in a set."); 360 } 361 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP; 362 inSet = false; 363 inCharClass = false; 364 } 365 break; 366 case '{': 367 if (!inSet) { 368 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 369 throw new IllegalArgumentException("Modifier must follow a token."); 370 } 371 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START; 372 ip++; 373 inRange = true; 374 } 375 break; 376 case '}': 377 if (inRange) { // only terminate the range if we're currently in one 378 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP; 379 inRange = false; 380 } 381 break; 382 case '*': 383 if (!inSet) { 384 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 385 throw new IllegalArgumentException("Modifier must follow a token."); 386 } 387 sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE; 388 } 389 break; 390 case '+': 391 if (!inSet) { 392 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 393 throw new IllegalArgumentException("Modifier must follow a token."); 394 } 395 sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE; 396 } 397 break; 398 case '.': 399 if (!inSet) { 400 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY; 401 } 402 break; 403 case '\\': // escape 404 if (ip + 1 >= LP) { 405 throw new IllegalArgumentException("Escape found at end of pattern!"); 406 } 407 c = pattern.charAt(++ip); 408 addToParsedPattern = true; 409 break; 410 default: 411 addToParsedPattern = true; 412 break; 413 } 414 if (inSet) { 415 if (inCharClass) { 416 sParsedPatternScratch[it++] = c; 417 inCharClass = false; 418 } else { 419 // look forward for character class 420 if (ip + 2 < LP 421 && pattern.charAt(ip + 1) == '-' 422 && pattern.charAt(ip + 2) != ']') { 423 inCharClass = true; 424 sParsedPatternScratch[it++] = c; // set first token as lower end of range 425 ip++; // advance past dash 426 } else { // literal 427 sParsedPatternScratch[it++] = c; // set first token as literal 428 sParsedPatternScratch[it++] = c; // set second set as literal 429 } 430 } 431 } else if (inRange) { 432 int endOfSet = pattern.indexOf('}', ip); 433 if (endOfSet < 0) { 434 throw new IllegalArgumentException("Range not ended with '}'"); 435 } 436 String rangeString = pattern.substring(ip, endOfSet); 437 int commaIndex = rangeString.indexOf(','); 438 try { 439 final int rangeMin; 440 final int rangeMax; 441 if (commaIndex < 0) { 442 int parsedRange = Integer.parseInt(rangeString); 443 rangeMin = rangeMax = parsedRange; 444 } else { 445 rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex)); 446 if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more) 447 rangeMax = Integer.MAX_VALUE; 448 } else { 449 rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1)); 450 } 451 } 452 if (rangeMin > rangeMax) { 453 throw new IllegalArgumentException( 454 "Range quantifier minimum is greater than maximum"); 455 } 456 sParsedPatternScratch[it++] = rangeMin; 457 sParsedPatternScratch[it++] = rangeMax; 458 } catch (NumberFormatException e) { 459 throw new IllegalArgumentException("Range number format incorrect", e); 460 } 461 ip = endOfSet; 462 continue; // don't increment ip 463 } else if (addToParsedPattern) { 464 sParsedPatternScratch[it++] = c; 465 } 466 ip++; 467 } 468 if (inSet) { 469 throw new IllegalArgumentException("Set was not terminated!"); 470 } 471 return Arrays.copyOf(sParsedPatternScratch, it); 472 } 473 474 private static boolean isParsedModifier(int parsedChar) { 475 return parsedChar == PARSED_MODIFIER_ONE_OR_MORE || 476 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE || 477 parsedChar == PARSED_MODIFIER_RANGE_STOP || 478 parsedChar == PARSED_MODIFIER_RANGE_START; 479 } 480 481 static boolean matchAdvancedPattern(int[] parsedPattern, String match) { 482 483 // create indexes 484 int ip = 0, im = 0; 485 486 // one-time length check 487 final int LP = parsedPattern.length, LM = match.length(); 488 489 // The current character being analyzed in the pattern 490 int patternChar; 491 492 int tokenType; 493 494 int charSetStart = 0, charSetEnd = 0; 495 496 while (ip < LP) { // we still have content in the pattern 497 498 patternChar = parsedPattern[ip]; 499 // get the match type of the next verb 500 501 switch (patternChar) { 502 case PARSED_TOKEN_CHAR_ANY: 503 tokenType = TOKEN_TYPE_ANY; 504 ip++; 505 break; 506 case PARSED_TOKEN_CHAR_SET_START: 507 case PARSED_TOKEN_CHAR_SET_INVERSE_START: 508 tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START 509 ? TOKEN_TYPE_SET 510 : TOKEN_TYPE_INVERSE_SET; 511 charSetStart = ip + 1; // start from the char after the set start 512 while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP); 513 charSetEnd = ip - 1; // we're on the set stop, end is the previous 514 ip++; // move the pointer to the next pattern entry 515 break; 516 default: 517 charSetStart = ip; 518 tokenType = TOKEN_TYPE_LITERAL; 519 ip++; 520 break; 521 } 522 523 final int minRepetition; 524 final int maxRepetition; 525 526 // look for a match length modifier 527 if (ip >= LP) { 528 minRepetition = maxRepetition = 1; 529 } else { 530 patternChar = parsedPattern[ip]; 531 switch (patternChar) { 532 case PARSED_MODIFIER_ZERO_OR_MORE: 533 minRepetition = 0; 534 maxRepetition = Integer.MAX_VALUE; 535 ip++; 536 break; 537 case PARSED_MODIFIER_ONE_OR_MORE: 538 minRepetition = 1; 539 maxRepetition = Integer.MAX_VALUE; 540 ip++; 541 break; 542 case PARSED_MODIFIER_RANGE_START: 543 minRepetition = parsedPattern[++ip]; 544 maxRepetition = parsedPattern[++ip]; 545 ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token 546 break; 547 default: 548 minRepetition = maxRepetition = 1; // implied literal 549 break; 550 } 551 } 552 if (minRepetition > maxRepetition) { 553 return false; 554 } 555 556 // attempt to match as many characters as possible 557 int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition, 558 parsedPattern, charSetStart, charSetEnd); 559 560 // if we found a conflict, return false immediately 561 if (matched == NO_MATCH) { 562 return false; 563 } 564 565 // move the match pointer the number of characters matched 566 im += matched; 567 } 568 return ip >= LP && im >= LM; // have parsed entire string and regex 569 } 570 571 private static int matchChars(String match, int im, final int lm, int tokenType, 572 int minRepetition, int maxRepetition, int[] parsedPattern, 573 int tokenStart, int tokenEnd) { 574 int matched = 0; 575 576 while(matched < maxRepetition 577 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart, 578 tokenEnd)) { 579 matched++; 580 } 581 582 return matched < minRepetition ? NO_MATCH : matched; 583 } 584 585 private static boolean matchChar(String match, int im, final int lm, int tokenType, 586 int[] parsedPattern, int tokenStart, int tokenEnd) { 587 if (im >= lm) { // we've overrun the string, no match 588 return false; 589 } 590 switch (tokenType) { 591 case TOKEN_TYPE_ANY: 592 return true; 593 case TOKEN_TYPE_SET: 594 for (int i = tokenStart; i < tokenEnd; i += 2) { 595 char matchChar = match.charAt(im); 596 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { 597 return true; 598 } 599 } 600 return false; 601 case TOKEN_TYPE_INVERSE_SET: 602 for (int i = tokenStart; i < tokenEnd; i += 2) { 603 char matchChar = match.charAt(im); 604 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { 605 return false; 606 } 607 } 608 return true; 609 case TOKEN_TYPE_LITERAL: 610 return match.charAt(im) == parsedPattern[tokenStart]; 611 default: 612 return false; 613 } 614 } 615 }