1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9
10 #include "base/strings/pattern.h"
11
12 #include <string_view>
13
14 #include "base/third_party/icu/icu_utf.h"
15
16 namespace base {
17
18 namespace {
19
IsWildcard(base_icu::UChar32 character)20 constexpr bool IsWildcard(base_icu::UChar32 character) {
21 return character == '*' || character == '?';
22 }
23
24 // Searches for the next subpattern of |pattern| in |string|, up to the given
25 // |maximum_distance|. The subpattern extends from the start of |pattern| up to
26 // the first wildcard character (or the end of the string). If the value of
27 // |maximum_distance| is negative, the maximum distance is considered infinite.
28 template <typename CHAR, typename NEXT>
SearchForChars(const CHAR ** pattern,const CHAR * pattern_end,const CHAR ** string,const CHAR * string_end,int maximum_distance,NEXT next)29 constexpr bool SearchForChars(const CHAR** pattern,
30 const CHAR* pattern_end,
31 const CHAR** string,
32 const CHAR* string_end,
33 int maximum_distance,
34 NEXT next) {
35 const CHAR* pattern_start = *pattern;
36 const CHAR* string_start = *string;
37 bool escape = false;
38 while (true) {
39 if (*pattern == pattern_end) {
40 // If this is the end of the pattern, only accept the end of the string;
41 // anything else falls through to the mismatch case.
42 if (*string == string_end)
43 return true;
44 } else {
45 // If we have found a wildcard, we're done.
46 if (!escape && IsWildcard(**pattern))
47 return true;
48
49 // Check if the escape character is found. If so, skip it and move to the
50 // next character.
51 if (!escape && **pattern == '\\') {
52 escape = true;
53 next(pattern, pattern_end);
54 continue;
55 }
56
57 escape = false;
58
59 if (*string == string_end)
60 return false;
61
62 // Check if the chars match, if so, increment the ptrs.
63 const CHAR* pattern_next = *pattern;
64 const CHAR* string_next = *string;
65 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
66 if (pattern_char == next(&string_next, string_end) &&
67 pattern_char != CBU_SENTINEL) {
68 *pattern = pattern_next;
69 *string = string_next;
70 continue;
71 }
72 }
73
74 // Mismatch. If we have reached the maximum distance, return false,
75 // otherwise restart at the beginning of the pattern with the next character
76 // in the string.
77 // TODO(bauerb): This is a naive implementation of substring search, which
78 // could be implemented with a more efficient algorithm, e.g.
79 // Knuth-Morris-Pratt (at the expense of requiring preprocessing).
80 if (maximum_distance == 0)
81 return false;
82
83 // Because unlimited distance is represented as -1, this will never reach 0
84 // and therefore fail the match above.
85 maximum_distance--;
86 *pattern = pattern_start;
87 next(&string_start, string_end);
88 *string = string_start;
89 }
90 }
91
92 // Consumes consecutive wildcard characters (? or *). Returns the maximum number
93 // of characters matched by the sequence of wildcards, or -1 if the wildcards
94 // match an arbitrary number of characters (which is the case if it contains at
95 // least one *).
96 template <typename CHAR, typename NEXT>
EatWildcards(const CHAR ** pattern,const CHAR * end,NEXT next)97 constexpr int EatWildcards(const CHAR** pattern, const CHAR* end, NEXT next) {
98 int num_question_marks = 0;
99 bool has_asterisk = false;
100 while (*pattern != end) {
101 if (**pattern == '?') {
102 num_question_marks++;
103 } else if (**pattern == '*') {
104 has_asterisk = true;
105 } else {
106 break;
107 }
108
109 next(pattern, end);
110 }
111 return has_asterisk ? -1 : num_question_marks;
112 }
113
114 template <typename CHAR, typename NEXT>
MatchPatternT(const CHAR * eval,const CHAR * eval_end,const CHAR * pattern,const CHAR * pattern_end,NEXT next)115 constexpr bool MatchPatternT(const CHAR* eval,
116 const CHAR* eval_end,
117 const CHAR* pattern,
118 const CHAR* pattern_end,
119 NEXT next) {
120 do {
121 int maximum_wildcard_length = EatWildcards(&pattern, pattern_end, next);
122 if (!SearchForChars(&pattern, pattern_end, &eval, eval_end,
123 maximum_wildcard_length, next)) {
124 return false;
125 }
126 } while (pattern != pattern_end);
127 return true;
128 }
129
130 struct NextCharUTF8 {
operator ()base::__anon281840fc0111::NextCharUTF8131 base_icu::UChar32 operator()(const char** p, const char* end) {
132 base_icu::UChar32 c;
133 int offset = 0;
134 CBU8_NEXT(reinterpret_cast<const uint8_t*>(*p), offset, end - *p, c);
135 *p += offset;
136 return c;
137 }
138 };
139
140 struct NextCharUTF16 {
operator ()base::__anon281840fc0111::NextCharUTF16141 base_icu::UChar32 operator()(const char16_t** p, const char16_t* end) {
142 base_icu::UChar32 c;
143 int offset = 0;
144 CBU16_NEXT(*p, offset, end - *p, c);
145 *p += offset;
146 return c;
147 }
148 };
149
150 } // namespace
151
MatchPattern(std::string_view eval,std::string_view pattern)152 bool MatchPattern(std::string_view eval, std::string_view pattern) {
153 return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
154 pattern.data() + pattern.size(), NextCharUTF8());
155 }
156
MatchPattern(std::u16string_view eval,std::u16string_view pattern)157 bool MatchPattern(std::u16string_view eval, std::u16string_view pattern) {
158 return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
159 pattern.data() + pattern.size(), NextCharUTF16());
160 }
161
162 } // namespace base
163