1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.os; 18 19 import android.util.proto.ProtoOutputStream; 20 21 import java.util.Arrays; 22 23 /** 24 * A simple pattern matcher, which is safe to use on untrusted data: it does 25 * not provide full reg-exp support, only simple globbing that can not be 26 * used maliciously. 27 */ 28 public class PatternMatcher implements Parcelable { 29 /** 30 * Pattern type: the given pattern must exactly match the string it is 31 * tested against. 32 */ 33 public static final int PATTERN_LITERAL = 0; 34 35 /** 36 * Pattern type: the given pattern must match the 37 * beginning of the string it is tested against. 38 */ 39 public static final int PATTERN_PREFIX = 1; 40 41 /** 42 * Pattern type: the given pattern is interpreted with a 43 * simple glob syntax for matching against the string it is tested against. 44 * In this syntax, you can use the '*' character to match against zero or 45 * more occurrences of the character immediately before. If the 46 * character before it is '.' it will match any character. The character 47 * '\' can be used as an escape. This essentially provides only the '*' 48 * wildcard part of a normal regexp. 49 */ 50 public static final int PATTERN_SIMPLE_GLOB = 2; 51 52 /** 53 * Pattern type: the given pattern is interpreted with a regular 54 * expression-like syntax for matching against the string it is tested 55 * against. Supported tokens include dot ({@code .}) and sets ({@code [...]}) 56 * with full support for character ranges and the not ({@code ^}) modifier. 57 * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +}) 58 * for one-or-more and full range ({@code {...}}) support. This is a simple 59 * evaluation implementation in which matching is done against the pattern in 60 * real time with no backtracking support. 61 */ 62 public static final int PATTERN_ADVANCED_GLOB = 3; 63 64 /** 65 * Pattern type: the given pattern must match the 66 * end of the string it is tested against. 67 */ 68 public static final int PATTERN_SUFFIX = 4; 69 70 // token types for advanced matching 71 private static final int TOKEN_TYPE_LITERAL = 0; 72 private static final int TOKEN_TYPE_ANY = 1; 73 private static final int TOKEN_TYPE_SET = 2; 74 private static final int TOKEN_TYPE_INVERSE_SET = 3; 75 76 // Return for no match 77 private static final int NO_MATCH = -1; 78 79 private static final String TAG = "PatternMatcher"; 80 81 // Parsed placeholders for advanced patterns 82 private static final int PARSED_TOKEN_CHAR_SET_START = -1; 83 private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2; 84 private static final int PARSED_TOKEN_CHAR_SET_STOP = -3; 85 private static final int PARSED_TOKEN_CHAR_ANY = -4; 86 private static final int PARSED_MODIFIER_RANGE_START = -5; 87 private static final int PARSED_MODIFIER_RANGE_STOP = -6; 88 private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7; 89 private static final int PARSED_MODIFIER_ONE_OR_MORE = -8; 90 91 private final String mPattern; 92 private final int mType; 93 private final int[] mParsedPattern; 94 95 96 private static final int MAX_PATTERN_STORAGE = 2048; 97 // workspace to use for building a parsed advanced pattern; 98 private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE]; 99 PatternMatcher(String pattern, int type)100 public PatternMatcher(String pattern, int type) { 101 mPattern = pattern; 102 mType = type; 103 if (mType == PATTERN_ADVANCED_GLOB) { 104 mParsedPattern = parseAndVerifyAdvancedPattern(pattern); 105 } else { 106 mParsedPattern = null; 107 } 108 } 109 getPath()110 public final String getPath() { 111 return mPattern; 112 } 113 getType()114 public final int getType() { 115 return mType; 116 } 117 match(String str)118 public boolean match(String str) { 119 return matchPattern(str, mPattern, mParsedPattern, mType); 120 } 121 toString()122 public String toString() { 123 String type = "? "; 124 switch (mType) { 125 case PATTERN_LITERAL: 126 type = "LITERAL: "; 127 break; 128 case PATTERN_PREFIX: 129 type = "PREFIX: "; 130 break; 131 case PATTERN_SIMPLE_GLOB: 132 type = "GLOB: "; 133 break; 134 case PATTERN_ADVANCED_GLOB: 135 type = "ADVANCED: "; 136 break; 137 case PATTERN_SUFFIX: 138 type = "SUFFIX: "; 139 break; 140 } 141 return "PatternMatcher{" + type + mPattern + "}"; 142 } 143 144 /** @hide */ dumpDebug(ProtoOutputStream proto, long fieldId)145 public void dumpDebug(ProtoOutputStream proto, long fieldId) { 146 long token = proto.start(fieldId); 147 proto.write(PatternMatcherProto.PATTERN, mPattern); 148 proto.write(PatternMatcherProto.TYPE, mType); 149 // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to 150 // match the current data structure. 151 proto.end(token); 152 } 153 describeContents()154 public int describeContents() { 155 return 0; 156 } 157 writeToParcel(Parcel dest, int flags)158 public void writeToParcel(Parcel dest, int flags) { 159 dest.writeString(mPattern); 160 dest.writeInt(mType); 161 dest.writeIntArray(mParsedPattern); 162 } 163 PatternMatcher(Parcel src)164 public PatternMatcher(Parcel src) { 165 mPattern = src.readString(); 166 mType = src.readInt(); 167 mParsedPattern = src.createIntArray(); 168 } 169 170 public static final @android.annotation.NonNull Parcelable.Creator<PatternMatcher> CREATOR 171 = new Parcelable.Creator<PatternMatcher>() { 172 public PatternMatcher createFromParcel(Parcel source) { 173 return new PatternMatcher(source); 174 } 175 176 public PatternMatcher[] newArray(int size) { 177 return new PatternMatcher[size]; 178 } 179 }; 180 matchPattern(String match, String pattern, int[] parsedPattern, int type)181 static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) { 182 if (match == null) return false; 183 if (type == PATTERN_LITERAL) { 184 return pattern.equals(match); 185 } if (type == PATTERN_PREFIX) { 186 return match.startsWith(pattern); 187 } else if (type == PATTERN_SIMPLE_GLOB) { 188 return matchGlobPattern(pattern, match); 189 } else if (type == PATTERN_ADVANCED_GLOB) { 190 return matchAdvancedPattern(parsedPattern, match); 191 } else if (type == PATTERN_SUFFIX) { 192 return match.endsWith(pattern); 193 } 194 return false; 195 } 196 matchGlobPattern(String pattern, String match)197 static boolean matchGlobPattern(String pattern, String match) { 198 final int NP = pattern.length(); 199 if (NP <= 0) { 200 return match.length() <= 0; 201 } 202 final int NM = match.length(); 203 int ip = 0, im = 0; 204 char nextChar = pattern.charAt(0); 205 while ((ip<NP) && (im<NM)) { 206 char c = nextChar; 207 ip++; 208 nextChar = ip < NP ? pattern.charAt(ip) : 0; 209 final boolean escaped = (c == '\\'); 210 if (escaped) { 211 c = nextChar; 212 ip++; 213 nextChar = ip < NP ? pattern.charAt(ip) : 0; 214 } 215 if (nextChar == '*') { 216 if (!escaped && c == '.') { 217 if (ip >= (NP-1)) { 218 // at the end with a pattern match, so 219 // all is good without checking! 220 return true; 221 } 222 ip++; 223 nextChar = pattern.charAt(ip); 224 // Consume everything until the next character in the 225 // pattern is found. 226 if (nextChar == '\\') { 227 ip++; 228 nextChar = ip < NP ? pattern.charAt(ip) : 0; 229 } 230 do { 231 if (match.charAt(im) == nextChar) { 232 break; 233 } 234 im++; 235 } while (im < NM); 236 if (im == NM) { 237 // Whoops, the next character in the pattern didn't 238 // exist in the match. 239 return false; 240 } 241 ip++; 242 nextChar = ip < NP ? pattern.charAt(ip) : 0; 243 im++; 244 } else { 245 // Consume only characters matching the one before '*'. 246 do { 247 if (match.charAt(im) != c) { 248 break; 249 } 250 im++; 251 } while (im < NM); 252 ip++; 253 nextChar = ip < NP ? pattern.charAt(ip) : 0; 254 } 255 } else { 256 if (c != '.' && match.charAt(im) != c) return false; 257 im++; 258 } 259 } 260 261 if (ip >= NP && im >= NM) { 262 // Reached the end of both strings, all is good! 263 return true; 264 } 265 266 // One last check: we may have finished the match string, but still 267 // have a '.*' at the end of the pattern, which should still count 268 // as a match. 269 if (ip == NP-2 && pattern.charAt(ip) == '.' 270 && pattern.charAt(ip+1) == '*') { 271 return true; 272 } 273 274 return false; 275 } 276 277 /** 278 * Parses the advanced pattern and returns an integer array representation of it. The integer 279 * array treats each field as a character if positive and a unique token placeholder if 280 * negative. This method will throw on any pattern structure violations. 281 */ 282 synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) { 283 int ip = 0; 284 final int LP = pattern.length(); 285 286 int it = 0; 287 288 boolean inSet = false; 289 boolean inRange = false; 290 boolean inCharClass = false; 291 292 boolean addToParsedPattern; 293 294 while (ip < LP) { 295 if (it > MAX_PATTERN_STORAGE - 3) { 296 throw new IllegalArgumentException("Pattern is too large!"); 297 } 298 299 char c = pattern.charAt(ip); 300 addToParsedPattern = false; 301 302 switch (c) { 303 case '[': 304 if (inSet) { 305 addToParsedPattern = true; // treat as literal or char class in set 306 } else { 307 if (pattern.charAt(ip + 1) == '^') { 308 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START; 309 ip++; // skip over the '^' 310 } else { 311 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START; 312 } 313 ip++; // move to the next pattern char 314 inSet = true; 315 continue; 316 } 317 break; 318 case ']': 319 if (!inSet) { 320 addToParsedPattern = true; // treat as literal outside of set 321 } else { 322 int parsedToken = sParsedPatternScratch[it - 1]; 323 if (parsedToken == PARSED_TOKEN_CHAR_SET_START || 324 parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) { 325 throw new IllegalArgumentException( 326 "You must define characters in a set."); 327 } 328 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP; 329 inSet = false; 330 inCharClass = false; 331 } 332 break; 333 case '{': 334 if (!inSet) { 335 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 336 throw new IllegalArgumentException("Modifier must follow a token."); 337 } 338 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START; 339 ip++; 340 inRange = true; 341 } 342 break; 343 case '}': 344 if (inRange) { // only terminate the range if we're currently in one 345 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP; 346 inRange = false; 347 } 348 break; 349 case '*': 350 if (!inSet) { 351 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 352 throw new IllegalArgumentException("Modifier must follow a token."); 353 } 354 sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE; 355 } 356 break; 357 case '+': 358 if (!inSet) { 359 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 360 throw new IllegalArgumentException("Modifier must follow a token."); 361 } 362 sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE; 363 } 364 break; 365 case '.': 366 if (!inSet) { 367 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY; 368 } 369 break; 370 case '\\': // escape 371 if (ip + 1 >= LP) { 372 throw new IllegalArgumentException("Escape found at end of pattern!"); 373 } 374 c = pattern.charAt(++ip); 375 addToParsedPattern = true; 376 break; 377 default: 378 addToParsedPattern = true; 379 break; 380 } 381 if (inSet) { 382 if (inCharClass) { 383 sParsedPatternScratch[it++] = c; 384 inCharClass = false; 385 } else { 386 // look forward for character class 387 if (ip + 2 < LP 388 && pattern.charAt(ip + 1) == '-' 389 && pattern.charAt(ip + 2) != ']') { 390 inCharClass = true; 391 sParsedPatternScratch[it++] = c; // set first token as lower end of range 392 ip++; // advance past dash 393 } else { // literal 394 sParsedPatternScratch[it++] = c; // set first token as literal 395 sParsedPatternScratch[it++] = c; // set second set as literal 396 } 397 } 398 } else if (inRange) { 399 int endOfSet = pattern.indexOf('}', ip); 400 if (endOfSet < 0) { 401 throw new IllegalArgumentException("Range not ended with '}'"); 402 } 403 String rangeString = pattern.substring(ip, endOfSet); 404 int commaIndex = rangeString.indexOf(','); 405 try { 406 final int rangeMin; 407 final int rangeMax; 408 if (commaIndex < 0) { 409 int parsedRange = Integer.parseInt(rangeString); 410 rangeMin = rangeMax = parsedRange; 411 } else { 412 rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex)); 413 if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more) 414 rangeMax = Integer.MAX_VALUE; 415 } else { 416 rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1)); 417 } 418 } 419 if (rangeMin > rangeMax) { 420 throw new IllegalArgumentException( 421 "Range quantifier minimum is greater than maximum"); 422 } 423 sParsedPatternScratch[it++] = rangeMin; 424 sParsedPatternScratch[it++] = rangeMax; 425 } catch (NumberFormatException e) { 426 throw new IllegalArgumentException("Range number format incorrect", e); 427 } 428 ip = endOfSet; 429 continue; // don't increment ip 430 } else if (addToParsedPattern) { 431 sParsedPatternScratch[it++] = c; 432 } 433 ip++; 434 } 435 if (inSet) { 436 throw new IllegalArgumentException("Set was not terminated!"); 437 } 438 return Arrays.copyOf(sParsedPatternScratch, it); 439 } 440 441 private static boolean isParsedModifier(int parsedChar) { 442 return parsedChar == PARSED_MODIFIER_ONE_OR_MORE || 443 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE || 444 parsedChar == PARSED_MODIFIER_RANGE_STOP || 445 parsedChar == PARSED_MODIFIER_RANGE_START; 446 } 447 448 static boolean matchAdvancedPattern(int[] parsedPattern, String match) { 449 450 // create indexes 451 int ip = 0, im = 0; 452 453 // one-time length check 454 final int LP = parsedPattern.length, LM = match.length(); 455 456 // The current character being analyzed in the pattern 457 int patternChar; 458 459 int tokenType; 460 461 int charSetStart = 0, charSetEnd = 0; 462 463 while (ip < LP) { // we still have content in the pattern 464 465 patternChar = parsedPattern[ip]; 466 // get the match type of the next verb 467 468 switch (patternChar) { 469 case PARSED_TOKEN_CHAR_ANY: 470 tokenType = TOKEN_TYPE_ANY; 471 ip++; 472 break; 473 case PARSED_TOKEN_CHAR_SET_START: 474 case PARSED_TOKEN_CHAR_SET_INVERSE_START: 475 tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START 476 ? TOKEN_TYPE_SET 477 : TOKEN_TYPE_INVERSE_SET; 478 charSetStart = ip + 1; // start from the char after the set start 479 while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP); 480 charSetEnd = ip - 1; // we're on the set stop, end is the previous 481 ip++; // move the pointer to the next pattern entry 482 break; 483 default: 484 charSetStart = ip; 485 tokenType = TOKEN_TYPE_LITERAL; 486 ip++; 487 break; 488 } 489 490 final int minRepetition; 491 final int maxRepetition; 492 493 // look for a match length modifier 494 if (ip >= LP) { 495 minRepetition = maxRepetition = 1; 496 } else { 497 patternChar = parsedPattern[ip]; 498 switch (patternChar) { 499 case PARSED_MODIFIER_ZERO_OR_MORE: 500 minRepetition = 0; 501 maxRepetition = Integer.MAX_VALUE; 502 ip++; 503 break; 504 case PARSED_MODIFIER_ONE_OR_MORE: 505 minRepetition = 1; 506 maxRepetition = Integer.MAX_VALUE; 507 ip++; 508 break; 509 case PARSED_MODIFIER_RANGE_START: 510 minRepetition = parsedPattern[++ip]; 511 maxRepetition = parsedPattern[++ip]; 512 ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token 513 break; 514 default: 515 minRepetition = maxRepetition = 1; // implied literal 516 break; 517 } 518 } 519 if (minRepetition > maxRepetition) { 520 return false; 521 } 522 523 // attempt to match as many characters as possible 524 int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition, 525 parsedPattern, charSetStart, charSetEnd); 526 527 // if we found a conflict, return false immediately 528 if (matched == NO_MATCH) { 529 return false; 530 } 531 532 // move the match pointer the number of characters matched 533 im += matched; 534 } 535 return ip >= LP && im >= LM; // have parsed entire string and regex 536 } 537 538 private static int matchChars(String match, int im, final int lm, int tokenType, 539 int minRepetition, int maxRepetition, int[] parsedPattern, 540 int tokenStart, int tokenEnd) { 541 int matched = 0; 542 543 while(matched < maxRepetition 544 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart, 545 tokenEnd)) { 546 matched++; 547 } 548 549 return matched < minRepetition ? NO_MATCH : matched; 550 } 551 552 private static boolean matchChar(String match, int im, final int lm, int tokenType, 553 int[] parsedPattern, int tokenStart, int tokenEnd) { 554 if (im >= lm) { // we've overrun the string, no match 555 return false; 556 } 557 switch (tokenType) { 558 case TOKEN_TYPE_ANY: 559 return true; 560 case TOKEN_TYPE_SET: 561 for (int i = tokenStart; i < tokenEnd; i += 2) { 562 char matchChar = match.charAt(im); 563 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { 564 return true; 565 } 566 } 567 return false; 568 case TOKEN_TYPE_INVERSE_SET: 569 for (int i = tokenStart; i < tokenEnd; i += 2) { 570 char matchChar = match.charAt(im); 571 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { 572 return false; 573 } 574 } 575 return true; 576 case TOKEN_TYPE_LITERAL: 577 return match.charAt(im) == parsedPattern[tokenStart]; 578 default: 579 return false; 580 } 581 } 582 }