• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/pattern.h"
6 
7 #include "base/third_party/icu/icu_utf.h"
8 
9 namespace base {
10 
11 namespace {
12 
IsWildcard(base_icu::UChar32 character)13 static bool IsWildcard(base_icu::UChar32 character) {
14   return character == '*' || character == '?';
15 }
16 
17 // Move the strings pointers to the point where they start to differ.
18 template <typename CHAR, typename NEXT>
EatSameChars(const CHAR ** pattern,const CHAR * pattern_end,const CHAR ** string,const CHAR * string_end,NEXT next)19 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
20                          const CHAR** string, const CHAR* string_end,
21                          NEXT next) {
22   const CHAR* escape = NULL;
23   while (*pattern != pattern_end && *string != string_end) {
24     if (!escape && IsWildcard(**pattern)) {
25       // We don't want to match wildcard here, except if it's escaped.
26       return;
27     }
28 
29     // Check if the escapement char is found. If so, skip it and move to the
30     // next character.
31     if (!escape && **pattern == '\\') {
32       escape = *pattern;
33       next(pattern, pattern_end);
34       continue;
35     }
36 
37     // Check if the chars match, if so, increment the ptrs.
38     const CHAR* pattern_next = *pattern;
39     const CHAR* string_next = *string;
40     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
41     if (pattern_char == next(&string_next, string_end) &&
42         pattern_char != CBU_SENTINEL) {
43       *pattern = pattern_next;
44       *string = string_next;
45     } else {
46       // Uh oh, it did not match, we are done. If the last char was an
47       // escapement, that means that it was an error to advance the ptr here,
48       // let's put it back where it was. This also mean that the MatchPattern
49       // function will return false because if we can't match an escape char
50       // here, then no one will.
51       if (escape) {
52         *pattern = escape;
53       }
54       return;
55     }
56 
57     escape = NULL;
58   }
59 }
60 
61 template <typename CHAR, typename NEXT>
EatWildcard(const CHAR ** pattern,const CHAR * end,NEXT next)62 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
63   while (*pattern != end) {
64     if (!IsWildcard(**pattern))
65       return;
66     next(pattern, end);
67   }
68 }
69 
70 template <typename CHAR, typename NEXT>
MatchPatternT(const CHAR * eval,const CHAR * eval_end,const CHAR * pattern,const CHAR * pattern_end,int depth,NEXT next)71 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
72                           const CHAR* pattern, const CHAR* pattern_end,
73                           int depth,
74                           NEXT next) {
75   const int kMaxDepth = 16;
76   if (depth > kMaxDepth)
77     return false;
78 
79   // Eat all the matching chars.
80   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
81 
82   // If the string is empty, then the pattern must be empty too, or contains
83   // only wildcards.
84   if (eval == eval_end) {
85     EatWildcard(&pattern, pattern_end, next);
86     return pattern == pattern_end;
87   }
88 
89   // Pattern is empty but not string, this is not a match.
90   if (pattern == pattern_end)
91     return false;
92 
93   // If this is a question mark, then we need to compare the rest with
94   // the current string or the string with one character eaten.
95   const CHAR* next_pattern = pattern;
96   next(&next_pattern, pattern_end);
97   if (pattern[0] == '?') {
98     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
99                       depth + 1, next))
100       return true;
101     const CHAR* next_eval = eval;
102     next(&next_eval, eval_end);
103     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
104                       depth + 1, next))
105       return true;
106   }
107 
108   // This is a *, try to match all the possible substrings with the remainder
109   // of the pattern.
110   if (pattern[0] == '*') {
111     // Collapse duplicate wild cards (********** into *) so that the
112     // method does not recurse unnecessarily. http://crbug.com/52839
113     EatWildcard(&next_pattern, pattern_end, next);
114 
115     while (eval != eval_end) {
116       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
117                         depth + 1, next))
118         return true;
119       eval++;
120     }
121 
122     // We reached the end of the string, let see if the pattern contains only
123     // wildcards.
124     if (eval == eval_end) {
125       EatWildcard(&pattern, pattern_end, next);
126       if (pattern != pattern_end)
127         return false;
128       return true;
129     }
130   }
131 
132   return false;
133 }
134 
135 struct NextCharUTF8 {
operator ()base::__anon695dab160111::NextCharUTF8136   base_icu::UChar32 operator()(const char** p, const char* end) {
137     base_icu::UChar32 c;
138     int offset = 0;
139     CBU8_NEXT(*p, offset, end - *p, c);
140     *p += offset;
141     return c;
142   }
143 };
144 
145 struct NextCharUTF16 {
operator ()base::__anon695dab160111::NextCharUTF16146   base_icu::UChar32 operator()(const char16** p, const char16* end) {
147     base_icu::UChar32 c;
148     int offset = 0;
149     CBU16_NEXT(*p, offset, end - *p, c);
150     *p += offset;
151     return c;
152   }
153 };
154 
155 }  // namespace
156 
MatchPattern(const StringPiece & eval,const StringPiece & pattern)157 bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) {
158   return MatchPatternT(eval.data(), eval.data() + eval.size(),
159                        pattern.data(), pattern.data() + pattern.size(),
160                        0, NextCharUTF8());
161 }
162 
MatchPattern(const StringPiece16 & eval,const StringPiece16 & pattern)163 bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) {
164   return MatchPatternT(eval.data(), eval.data() + eval.size(),
165                        pattern.data(), pattern.data() + pattern.size(),
166                        0, NextCharUTF16());
167 }
168 
169 }  // namespace base
170