• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 #include "base/strings/pattern.h"
11 
12 #include <string_view>
13 
14 #include "base/third_party/icu/icu_utf.h"
15 
16 namespace base {
17 
18 namespace {
19 
IsWildcard(base_icu::UChar32 character)20 constexpr bool IsWildcard(base_icu::UChar32 character) {
21   return character == '*' || character == '?';
22 }
23 
24 // Searches for the next subpattern of |pattern| in |string|, up to the given
25 // |maximum_distance|. The subpattern extends from the start of |pattern| up to
26 // the first wildcard character (or the end of the string). If the value of
27 // |maximum_distance| is negative, the maximum distance is considered infinite.
28 template <typename CHAR, typename NEXT>
SearchForChars(const CHAR ** pattern,const CHAR * pattern_end,const CHAR ** string,const CHAR * string_end,int maximum_distance,NEXT next)29 constexpr bool SearchForChars(const CHAR** pattern,
30                               const CHAR* pattern_end,
31                               const CHAR** string,
32                               const CHAR* string_end,
33                               int maximum_distance,
34                               NEXT next) {
35   const CHAR* pattern_start = *pattern;
36   const CHAR* string_start = *string;
37   bool escape = false;
38   while (true) {
39     if (*pattern == pattern_end) {
40       // If this is the end of the pattern, only accept the end of the string;
41       // anything else falls through to the mismatch case.
42       if (*string == string_end)
43         return true;
44     } else {
45       // If we have found a wildcard, we're done.
46       if (!escape && IsWildcard(**pattern))
47         return true;
48 
49       // Check if the escape character is found. If so, skip it and move to the
50       // next character.
51       if (!escape && **pattern == '\\') {
52         escape = true;
53         next(pattern, pattern_end);
54         continue;
55       }
56 
57       escape = false;
58 
59       if (*string == string_end)
60         return false;
61 
62       // Check if the chars match, if so, increment the ptrs.
63       const CHAR* pattern_next = *pattern;
64       const CHAR* string_next = *string;
65       base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
66       if (pattern_char == next(&string_next, string_end) &&
67           pattern_char != CBU_SENTINEL) {
68         *pattern = pattern_next;
69         *string = string_next;
70         continue;
71       }
72     }
73 
74     // Mismatch. If we have reached the maximum distance, return false,
75     // otherwise restart at the beginning of the pattern with the next character
76     // in the string.
77     // TODO(bauerb): This is a naive implementation of substring search, which
78     // could be implemented with a more efficient algorithm, e.g.
79     // Knuth-Morris-Pratt (at the expense of requiring preprocessing).
80     if (maximum_distance == 0)
81       return false;
82 
83     // Because unlimited distance is represented as -1, this will never reach 0
84     // and therefore fail the match above.
85     maximum_distance--;
86     *pattern = pattern_start;
87     next(&string_start, string_end);
88     *string = string_start;
89   }
90 }
91 
92 // Consumes consecutive wildcard characters (? or *). Returns the maximum number
93 // of characters matched by the sequence of wildcards, or -1 if the wildcards
94 // match an arbitrary number of characters (which is the case if it contains at
95 // least one *).
96 template <typename CHAR, typename NEXT>
EatWildcards(const CHAR ** pattern,const CHAR * end,NEXT next)97 constexpr int EatWildcards(const CHAR** pattern, const CHAR* end, NEXT next) {
98   int num_question_marks = 0;
99   bool has_asterisk = false;
100   while (*pattern != end) {
101     if (**pattern == '?') {
102       num_question_marks++;
103     } else if (**pattern == '*') {
104       has_asterisk = true;
105     } else {
106       break;
107     }
108 
109     next(pattern, end);
110   }
111   return has_asterisk ? -1 : num_question_marks;
112 }
113 
114 template <typename CHAR, typename NEXT>
MatchPatternT(const CHAR * eval,const CHAR * eval_end,const CHAR * pattern,const CHAR * pattern_end,NEXT next)115 constexpr bool MatchPatternT(const CHAR* eval,
116                              const CHAR* eval_end,
117                              const CHAR* pattern,
118                              const CHAR* pattern_end,
119                              NEXT next) {
120   do {
121     int maximum_wildcard_length = EatWildcards(&pattern, pattern_end, next);
122     if (!SearchForChars(&pattern, pattern_end, &eval, eval_end,
123                         maximum_wildcard_length, next)) {
124       return false;
125     }
126   } while (pattern != pattern_end);
127   return true;
128 }
129 
130 struct NextCharUTF8 {
operator ()base::__anon0b36e7e00111::NextCharUTF8131   base_icu::UChar32 operator()(const char** p, const char* end) {
132     base_icu::UChar32 c;
133     int offset = 0;
134     CBU8_NEXT(reinterpret_cast<const uint8_t*>(*p), offset, end - *p, c);
135     *p += offset;
136     return c;
137   }
138 };
139 
140 struct NextCharUTF16 {
operator ()base::__anon0b36e7e00111::NextCharUTF16141   base_icu::UChar32 operator()(const char16_t** p, const char16_t* end) {
142     base_icu::UChar32 c;
143     int offset = 0;
144     CBU16_NEXT(*p, offset, end - *p, c);
145     *p += offset;
146     return c;
147   }
148 };
149 
150 }  // namespace
151 
MatchPattern(std::string_view eval,std::string_view pattern)152 bool MatchPattern(std::string_view eval, std::string_view pattern) {
153   return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
154                        pattern.data() + pattern.size(), NextCharUTF8());
155 }
156 
MatchPattern(std::u16string_view eval,std::u16string_view pattern)157 bool MatchPattern(std::u16string_view eval, std::u16string_view pattern) {
158   return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
159                        pattern.data() + pattern.size(), NextCharUTF16());
160 }
161 
162 }  // namespace base
163