1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.os; 18 19 import android.util.Log; 20 21 import java.util.Arrays; 22 23 /** 24 * A simple pattern matcher, which is safe to use on untrusted data: it does 25 * not provide full reg-exp support, only simple globbing that can not be 26 * used maliciously. 27 */ 28 public class PatternMatcher implements Parcelable { 29 /** 30 * Pattern type: the given pattern must exactly match the string it is 31 * tested against. 32 */ 33 public static final int PATTERN_LITERAL = 0; 34 35 /** 36 * Pattern type: the given pattern must match the 37 * beginning of the string it is tested against. 38 */ 39 public static final int PATTERN_PREFIX = 1; 40 41 /** 42 * Pattern type: the given pattern is interpreted with a 43 * simple glob syntax for matching against the string it is tested against. 44 * In this syntax, you can use the '*' character to match against zero or 45 * more occurrences of the character immediately before. If the 46 * character before it is '.' it will match any character. The character 47 * '\' can be used as an escape. This essentially provides only the '*' 48 * wildcard part of a normal regexp. 49 */ 50 public static final int PATTERN_SIMPLE_GLOB = 2; 51 52 /** 53 * Pattern type: the given pattern is interpreted with a regular 54 * expression-like syntax for matching against the string it is tested 55 * against. Supported tokens include dot ({@code .}) and sets ({@code [...]}) 56 * with full support for character ranges and the not ({@code ^}) modifier. 57 * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +}) 58 * for one-or-more and full range ({@code {...}}) support. This is a simple 59 * evaluation implementation in which matching is done against the pattern in 60 * real time with no backtracking support. 61 */ 62 public static final int PATTERN_ADVANCED_GLOB = 3; 63 64 // token types for advanced matching 65 private static final int TOKEN_TYPE_LITERAL = 0; 66 private static final int TOKEN_TYPE_ANY = 1; 67 private static final int TOKEN_TYPE_SET = 2; 68 private static final int TOKEN_TYPE_INVERSE_SET = 3; 69 70 // Return for no match 71 private static final int NO_MATCH = -1; 72 73 private static final String TAG = "PatternMatcher"; 74 75 // Parsed placeholders for advanced patterns 76 private static final int PARSED_TOKEN_CHAR_SET_START = -1; 77 private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2; 78 private static final int PARSED_TOKEN_CHAR_SET_STOP = -3; 79 private static final int PARSED_TOKEN_CHAR_ANY = -4; 80 private static final int PARSED_MODIFIER_RANGE_START = -5; 81 private static final int PARSED_MODIFIER_RANGE_STOP = -6; 82 private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7; 83 private static final int PARSED_MODIFIER_ONE_OR_MORE = -8; 84 85 private final String mPattern; 86 private final int mType; 87 private final int[] mParsedPattern; 88 89 90 private static final int MAX_PATTERN_STORAGE = 2048; 91 // workspace to use for building a parsed advanced pattern; 92 private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE]; 93 PatternMatcher(String pattern, int type)94 public PatternMatcher(String pattern, int type) { 95 mPattern = pattern; 96 mType = type; 97 if (mType == PATTERN_ADVANCED_GLOB) { 98 mParsedPattern = parseAndVerifyAdvancedPattern(pattern); 99 } else { 100 mParsedPattern = null; 101 } 102 } 103 getPath()104 public final String getPath() { 105 return mPattern; 106 } 107 getType()108 public final int getType() { 109 return mType; 110 } 111 match(String str)112 public boolean match(String str) { 113 return matchPattern(str, mPattern, mParsedPattern, mType); 114 } 115 toString()116 public String toString() { 117 String type = "? "; 118 switch (mType) { 119 case PATTERN_LITERAL: 120 type = "LITERAL: "; 121 break; 122 case PATTERN_PREFIX: 123 type = "PREFIX: "; 124 break; 125 case PATTERN_SIMPLE_GLOB: 126 type = "GLOB: "; 127 break; 128 case PATTERN_ADVANCED_GLOB: 129 type = "ADVANCED: "; 130 break; 131 } 132 return "PatternMatcher{" + type + mPattern + "}"; 133 } 134 describeContents()135 public int describeContents() { 136 return 0; 137 } 138 writeToParcel(Parcel dest, int flags)139 public void writeToParcel(Parcel dest, int flags) { 140 dest.writeString(mPattern); 141 dest.writeInt(mType); 142 dest.writeIntArray(mParsedPattern); 143 } 144 PatternMatcher(Parcel src)145 public PatternMatcher(Parcel src) { 146 mPattern = src.readString(); 147 mType = src.readInt(); 148 mParsedPattern = src.createIntArray(); 149 } 150 151 public static final Parcelable.Creator<PatternMatcher> CREATOR 152 = new Parcelable.Creator<PatternMatcher>() { 153 public PatternMatcher createFromParcel(Parcel source) { 154 return new PatternMatcher(source); 155 } 156 157 public PatternMatcher[] newArray(int size) { 158 return new PatternMatcher[size]; 159 } 160 }; 161 matchPattern(String match, String pattern, int[] parsedPattern, int type)162 static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) { 163 if (match == null) return false; 164 if (type == PATTERN_LITERAL) { 165 return pattern.equals(match); 166 } if (type == PATTERN_PREFIX) { 167 return match.startsWith(pattern); 168 } else if (type == PATTERN_SIMPLE_GLOB) { 169 return matchGlobPattern(pattern, match); 170 } else if (type == PATTERN_ADVANCED_GLOB) { 171 return matchAdvancedPattern(parsedPattern, match); 172 } 173 return false; 174 } 175 matchGlobPattern(String pattern, String match)176 static boolean matchGlobPattern(String pattern, String match) { 177 final int NP = pattern.length(); 178 if (NP <= 0) { 179 return match.length() <= 0; 180 } 181 final int NM = match.length(); 182 int ip = 0, im = 0; 183 char nextChar = pattern.charAt(0); 184 while ((ip<NP) && (im<NM)) { 185 char c = nextChar; 186 ip++; 187 nextChar = ip < NP ? pattern.charAt(ip) : 0; 188 final boolean escaped = (c == '\\'); 189 if (escaped) { 190 c = nextChar; 191 ip++; 192 nextChar = ip < NP ? pattern.charAt(ip) : 0; 193 } 194 if (nextChar == '*') { 195 if (!escaped && c == '.') { 196 if (ip >= (NP-1)) { 197 // at the end with a pattern match, so 198 // all is good without checking! 199 return true; 200 } 201 ip++; 202 nextChar = pattern.charAt(ip); 203 // Consume everything until the next character in the 204 // pattern is found. 205 if (nextChar == '\\') { 206 ip++; 207 nextChar = ip < NP ? pattern.charAt(ip) : 0; 208 } 209 do { 210 if (match.charAt(im) == nextChar) { 211 break; 212 } 213 im++; 214 } while (im < NM); 215 if (im == NM) { 216 // Whoops, the next character in the pattern didn't 217 // exist in the match. 218 return false; 219 } 220 ip++; 221 nextChar = ip < NP ? pattern.charAt(ip) : 0; 222 im++; 223 } else { 224 // Consume only characters matching the one before '*'. 225 do { 226 if (match.charAt(im) != c) { 227 break; 228 } 229 im++; 230 } while (im < NM); 231 ip++; 232 nextChar = ip < NP ? pattern.charAt(ip) : 0; 233 } 234 } else { 235 if (c != '.' && match.charAt(im) != c) return false; 236 im++; 237 } 238 } 239 240 if (ip >= NP && im >= NM) { 241 // Reached the end of both strings, all is good! 242 return true; 243 } 244 245 // One last check: we may have finished the match string, but still 246 // have a '.*' at the end of the pattern, which should still count 247 // as a match. 248 if (ip == NP-2 && pattern.charAt(ip) == '.' 249 && pattern.charAt(ip+1) == '*') { 250 return true; 251 } 252 253 return false; 254 } 255 256 /** 257 * Parses the advanced pattern and returns an integer array representation of it. The integer 258 * array treats each field as a character if positive and a unique token placeholder if 259 * negative. This method will throw on any pattern structure violations. 260 */ 261 synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) { 262 int ip = 0; 263 final int LP = pattern.length(); 264 265 int it = 0; 266 267 boolean inSet = false; 268 boolean inRange = false; 269 boolean inCharClass = false; 270 271 boolean addToParsedPattern; 272 273 while (ip < LP) { 274 if (it > MAX_PATTERN_STORAGE - 3) { 275 throw new IllegalArgumentException("Pattern is too large!"); 276 } 277 278 char c = pattern.charAt(ip); 279 addToParsedPattern = false; 280 281 switch (c) { 282 case '[': 283 if (inSet) { 284 addToParsedPattern = true; // treat as literal or char class in set 285 } else { 286 if (pattern.charAt(ip + 1) == '^') { 287 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START; 288 ip++; // skip over the '^' 289 } else { 290 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START; 291 } 292 ip++; // move to the next pattern char 293 inSet = true; 294 continue; 295 } 296 break; 297 case ']': 298 if (!inSet) { 299 addToParsedPattern = true; // treat as literal outside of set 300 } else { 301 int parsedToken = sParsedPatternScratch[it - 1]; 302 if (parsedToken == PARSED_TOKEN_CHAR_SET_START || 303 parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) { 304 throw new IllegalArgumentException( 305 "You must define characters in a set."); 306 } 307 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP; 308 inSet = false; 309 inCharClass = false; 310 } 311 break; 312 case '{': 313 if (!inSet) { 314 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 315 throw new IllegalArgumentException("Modifier must follow a token."); 316 } 317 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START; 318 ip++; 319 inRange = true; 320 } 321 break; 322 case '}': 323 if (inRange) { // only terminate the range if we're currently in one 324 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP; 325 inRange = false; 326 } 327 break; 328 case '*': 329 if (!inSet) { 330 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 331 throw new IllegalArgumentException("Modifier must follow a token."); 332 } 333 sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE; 334 } 335 break; 336 case '+': 337 if (!inSet) { 338 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 339 throw new IllegalArgumentException("Modifier must follow a token."); 340 } 341 sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE; 342 } 343 break; 344 case '.': 345 if (!inSet) { 346 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY; 347 } 348 break; 349 case '\\': // escape 350 if (ip + 1 >= LP) { 351 throw new IllegalArgumentException("Escape found at end of pattern!"); 352 } 353 c = pattern.charAt(++ip); 354 addToParsedPattern = true; 355 break; 356 default: 357 addToParsedPattern = true; 358 break; 359 } 360 if (inSet) { 361 if (inCharClass) { 362 sParsedPatternScratch[it++] = c; 363 inCharClass = false; 364 } else { 365 // look forward for character class 366 if (ip + 2 < LP 367 && pattern.charAt(ip + 1) == '-' 368 && pattern.charAt(ip + 2) != ']') { 369 inCharClass = true; 370 sParsedPatternScratch[it++] = c; // set first token as lower end of range 371 ip++; // advance past dash 372 } else { // literal 373 sParsedPatternScratch[it++] = c; // set first token as literal 374 sParsedPatternScratch[it++] = c; // set second set as literal 375 } 376 } 377 } else if (inRange) { 378 int endOfSet = pattern.indexOf('}', ip); 379 if (endOfSet < 0) { 380 throw new IllegalArgumentException("Range not ended with '}'"); 381 } 382 String rangeString = pattern.substring(ip, endOfSet); 383 int commaIndex = rangeString.indexOf(','); 384 try { 385 final int rangeMin; 386 final int rangeMax; 387 if (commaIndex < 0) { 388 int parsedRange = Integer.parseInt(rangeString); 389 rangeMin = rangeMax = parsedRange; 390 } else { 391 rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex)); 392 if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more) 393 rangeMax = Integer.MAX_VALUE; 394 } else { 395 rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1)); 396 } 397 } 398 if (rangeMin > rangeMax) { 399 throw new IllegalArgumentException( 400 "Range quantifier minimum is greater than maximum"); 401 } 402 sParsedPatternScratch[it++] = rangeMin; 403 sParsedPatternScratch[it++] = rangeMax; 404 } catch (NumberFormatException e) { 405 throw new IllegalArgumentException("Range number format incorrect", e); 406 } 407 ip = endOfSet; 408 continue; // don't increment ip 409 } else if (addToParsedPattern) { 410 sParsedPatternScratch[it++] = c; 411 } 412 ip++; 413 } 414 if (inSet) { 415 throw new IllegalArgumentException("Set was not terminated!"); 416 } 417 return Arrays.copyOf(sParsedPatternScratch, it); 418 } 419 420 private static boolean isParsedModifier(int parsedChar) { 421 return parsedChar == PARSED_MODIFIER_ONE_OR_MORE || 422 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE || 423 parsedChar == PARSED_MODIFIER_RANGE_STOP || 424 parsedChar == PARSED_MODIFIER_RANGE_START; 425 } 426 427 static boolean matchAdvancedPattern(int[] parsedPattern, String match) { 428 429 // create indexes 430 int ip = 0, im = 0; 431 432 // one-time length check 433 final int LP = parsedPattern.length, LM = match.length(); 434 435 // The current character being analyzed in the pattern 436 int patternChar; 437 438 int tokenType; 439 440 int charSetStart = 0, charSetEnd = 0; 441 442 while (ip < LP) { // we still have content in the pattern 443 444 patternChar = parsedPattern[ip]; 445 // get the match type of the next verb 446 447 switch (patternChar) { 448 case PARSED_TOKEN_CHAR_ANY: 449 tokenType = TOKEN_TYPE_ANY; 450 ip++; 451 break; 452 case PARSED_TOKEN_CHAR_SET_START: 453 case PARSED_TOKEN_CHAR_SET_INVERSE_START: 454 tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START 455 ? TOKEN_TYPE_SET 456 : TOKEN_TYPE_INVERSE_SET; 457 charSetStart = ip + 1; // start from the char after the set start 458 while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP); 459 charSetEnd = ip - 1; // we're on the set stop, end is the previous 460 ip++; // move the pointer to the next pattern entry 461 break; 462 default: 463 charSetStart = ip; 464 tokenType = TOKEN_TYPE_LITERAL; 465 ip++; 466 break; 467 } 468 469 final int minRepetition; 470 final int maxRepetition; 471 472 // look for a match length modifier 473 if (ip >= LP) { 474 minRepetition = maxRepetition = 1; 475 } else { 476 patternChar = parsedPattern[ip]; 477 switch (patternChar) { 478 case PARSED_MODIFIER_ZERO_OR_MORE: 479 minRepetition = 0; 480 maxRepetition = Integer.MAX_VALUE; 481 ip++; 482 break; 483 case PARSED_MODIFIER_ONE_OR_MORE: 484 minRepetition = 1; 485 maxRepetition = Integer.MAX_VALUE; 486 ip++; 487 break; 488 case PARSED_MODIFIER_RANGE_START: 489 minRepetition = parsedPattern[++ip]; 490 maxRepetition = parsedPattern[++ip]; 491 ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token 492 break; 493 default: 494 minRepetition = maxRepetition = 1; // implied literal 495 break; 496 } 497 } 498 if (minRepetition > maxRepetition) { 499 return false; 500 } 501 502 // attempt to match as many characters as possible 503 int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition, 504 parsedPattern, charSetStart, charSetEnd); 505 506 // if we found a conflict, return false immediately 507 if (matched == NO_MATCH) { 508 return false; 509 } 510 511 // move the match pointer the number of characters matched 512 im += matched; 513 } 514 return ip >= LP && im >= LM; // have parsed entire string and regex 515 } 516 517 private static int matchChars(String match, int im, final int lm, int tokenType, 518 int minRepetition, int maxRepetition, int[] parsedPattern, 519 int tokenStart, int tokenEnd) { 520 int matched = 0; 521 522 while(matched < maxRepetition 523 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart, 524 tokenEnd)) { 525 matched++; 526 } 527 528 return matched < minRepetition ? NO_MATCH : matched; 529 } 530 531 private static boolean matchChar(String match, int im, final int lm, int tokenType, 532 int[] parsedPattern, int tokenStart, int tokenEnd) { 533 if (im >= lm) { // we've overrun the string, no match 534 return false; 535 } 536 switch (tokenType) { 537 case TOKEN_TYPE_ANY: 538 return true; 539 case TOKEN_TYPE_SET: 540 for (int i = tokenStart; i < tokenEnd; i += 2) { 541 char matchChar = match.charAt(im); 542 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { 543 return true; 544 } 545 } 546 return false; 547 case TOKEN_TYPE_INVERSE_SET: 548 for (int i = tokenStart; i < tokenEnd; i += 2) { 549 char matchChar = match.charAt(im); 550 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { 551 return false; 552 } 553 } 554 return true; 555 case TOKEN_TYPE_LITERAL: 556 return match.charAt(im) == parsedPattern[tokenStart]; 557 default: 558 return false; 559 } 560 } 561 }