1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/pattern.h"
6
7 #include "base/third_party/icu/icu_utf.h"
8
9 namespace base {
10
11 namespace {
12
IsWildcard(base_icu::UChar32 character)13 static bool IsWildcard(base_icu::UChar32 character) {
14 return character == '*' || character == '?';
15 }
16
17 // Move the strings pointers to the point where they start to differ.
18 template <typename CHAR, typename NEXT>
EatSameChars(const CHAR ** pattern,const CHAR * pattern_end,const CHAR ** string,const CHAR * string_end,NEXT next)19 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
20 const CHAR** string, const CHAR* string_end,
21 NEXT next) {
22 const CHAR* escape = NULL;
23 while (*pattern != pattern_end && *string != string_end) {
24 if (!escape && IsWildcard(**pattern)) {
25 // We don't want to match wildcard here, except if it's escaped.
26 return;
27 }
28
29 // Check if the escapement char is found. If so, skip it and move to the
30 // next character.
31 if (!escape && **pattern == '\\') {
32 escape = *pattern;
33 next(pattern, pattern_end);
34 continue;
35 }
36
37 // Check if the chars match, if so, increment the ptrs.
38 const CHAR* pattern_next = *pattern;
39 const CHAR* string_next = *string;
40 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
41 if (pattern_char == next(&string_next, string_end) &&
42 pattern_char != CBU_SENTINEL) {
43 *pattern = pattern_next;
44 *string = string_next;
45 } else {
46 // Uh oh, it did not match, we are done. If the last char was an
47 // escapement, that means that it was an error to advance the ptr here,
48 // let's put it back where it was. This also mean that the MatchPattern
49 // function will return false because if we can't match an escape char
50 // here, then no one will.
51 if (escape) {
52 *pattern = escape;
53 }
54 return;
55 }
56
57 escape = NULL;
58 }
59 }
60
61 template <typename CHAR, typename NEXT>
EatWildcard(const CHAR ** pattern,const CHAR * end,NEXT next)62 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
63 while (*pattern != end) {
64 if (!IsWildcard(**pattern))
65 return;
66 next(pattern, end);
67 }
68 }
69
70 template <typename CHAR, typename NEXT>
MatchPatternT(const CHAR * eval,const CHAR * eval_end,const CHAR * pattern,const CHAR * pattern_end,int depth,NEXT next)71 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
72 const CHAR* pattern, const CHAR* pattern_end,
73 int depth,
74 NEXT next) {
75 const int kMaxDepth = 16;
76 if (depth > kMaxDepth)
77 return false;
78
79 // Eat all the matching chars.
80 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
81
82 // If the string is empty, then the pattern must be empty too, or contains
83 // only wildcards.
84 if (eval == eval_end) {
85 EatWildcard(&pattern, pattern_end, next);
86 return pattern == pattern_end;
87 }
88
89 // Pattern is empty but not string, this is not a match.
90 if (pattern == pattern_end)
91 return false;
92
93 // If this is a question mark, then we need to compare the rest with
94 // the current string or the string with one character eaten.
95 const CHAR* next_pattern = pattern;
96 next(&next_pattern, pattern_end);
97 if (pattern[0] == '?') {
98 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
99 depth + 1, next))
100 return true;
101 const CHAR* next_eval = eval;
102 next(&next_eval, eval_end);
103 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
104 depth + 1, next))
105 return true;
106 }
107
108 // This is a *, try to match all the possible substrings with the remainder
109 // of the pattern.
110 if (pattern[0] == '*') {
111 // Collapse duplicate wild cards (********** into *) so that the
112 // method does not recurse unnecessarily. http://crbug.com/52839
113 EatWildcard(&next_pattern, pattern_end, next);
114
115 while (eval != eval_end) {
116 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
117 depth + 1, next))
118 return true;
119 eval++;
120 }
121
122 // We reached the end of the string, let see if the pattern contains only
123 // wildcards.
124 if (eval == eval_end) {
125 EatWildcard(&pattern, pattern_end, next);
126 if (pattern != pattern_end)
127 return false;
128 return true;
129 }
130 }
131
132 return false;
133 }
134
135 struct NextCharUTF8 {
operator ()base::__anon695dab160111::NextCharUTF8136 base_icu::UChar32 operator()(const char** p, const char* end) {
137 base_icu::UChar32 c;
138 int offset = 0;
139 CBU8_NEXT(*p, offset, end - *p, c);
140 *p += offset;
141 return c;
142 }
143 };
144
145 struct NextCharUTF16 {
operator ()base::__anon695dab160111::NextCharUTF16146 base_icu::UChar32 operator()(const char16** p, const char16* end) {
147 base_icu::UChar32 c;
148 int offset = 0;
149 CBU16_NEXT(*p, offset, end - *p, c);
150 *p += offset;
151 return c;
152 }
153 };
154
155 } // namespace
156
MatchPattern(const StringPiece & eval,const StringPiece & pattern)157 bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) {
158 return MatchPatternT(eval.data(), eval.data() + eval.size(),
159 pattern.data(), pattern.data() + pattern.size(),
160 0, NextCharUTF8());
161 }
162
MatchPattern(const StringPiece16 & eval,const StringPiece16 & pattern)163 bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) {
164 return MatchPatternT(eval.data(), eval.data() + eval.size(),
165 pattern.data(), pattern.data() + pattern.size(),
166 0, NextCharUTF16());
167 }
168
169 } // namespace base
170