1 /*
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/util/glob.h"
18
19 #include <cstddef>
20 #include <cstdint>
21
22 #include "perfetto/base/logging.h"
23 #include "perfetto/ext/base/string_utils.h"
24 #include "perfetto/ext/base/string_view.h"
25
26 namespace perfetto::trace_processor::util {
27
GlobMatcher(base::StringView pattern_str)28 GlobMatcher::GlobMatcher(base::StringView pattern_str)
29 : pattern_(pattern_str.size() + 1) {
30 base::StringCopy(pattern_.data(), pattern_str.data(), pattern_.size());
31
32 base::StringView pattern(pattern_.data());
33
34 // Note: see the class header for how this algorithm works.
35 uint32_t segment_start = 0;
36 uint32_t segment_potential_matched_chars = 0;
37 auto create_segment = [this, &segment_start, &segment_potential_matched_chars,
38 &pattern](size_t i) {
39 base::StringView segment = pattern.substr(segment_start, i - segment_start);
40 PERFETTO_DCHECK(segment_potential_matched_chars <= segment.size());
41 if (!segment.empty()) {
42 PERFETTO_DCHECK(segment_potential_matched_chars > 0);
43 segments_.emplace_back(Segment{segment, segment_potential_matched_chars});
44 }
45 return segment.empty();
46 };
47
48 for (uint32_t i = 0; i < pattern.size(); ++i) {
49 char c = pattern.at(i);
50
51 // If we don't have a star, we are only matching a single character (but
52 // potentially with a character class which contains >1 character).
53 if (c != '*') {
54 if (c == '[') {
55 base::StringView cclass = ExtractCharacterClass(pattern.substr(i + 1));
56 contains_char_class_or_question_ |= !cclass.empty();
57
58 // Skip the current '[' character.
59 ++i;
60
61 // Skip the whole character class. This will leave i pointing at the
62 // terminating character (i.e. ']'). With the ++i in the loop, this will
63 // correctly lead us going past the whole class.
64 i += cclass.size();
65 }
66
67 contains_char_class_or_question_ |= c == '?';
68 ++segment_potential_matched_chars;
69 continue;
70 }
71
72 // Add the characters collected so far as a segment.
73 create_segment(i);
74 segment_start = i + 1;
75 segment_potential_matched_chars = 0;
76 }
77
78 // Ensure we add any remaining characters as a segment.
79 bool empty_segment = create_segment(pattern.size());
80 leading_star_ = !pattern.empty() && pattern.at(0) == '*';
81 trailing_star_ = !pattern.empty() && empty_segment;
82 }
83
Matches(base::StringView in) const84 bool GlobMatcher::Matches(base::StringView in) const {
85 // If there are no segments, that means the pattern is either '' or '*'
86 // (or '**', '***' etc which is really the same as '*'). This means
87 // we are match if either a) there is a leading star (== trailing star) or
88 // b) the input string is empty.
89 if (segments_.empty()) {
90 PERFETTO_DCHECK(leading_star_ == trailing_star_);
91 return leading_star_ || in.empty();
92 }
93
94 // If there is only one segment and no stars we need an equality match.
95 // As we still need to handle '[..]' and '?', we cannot just use string
96 // equality. We *can* however use StartsWith and check the matched
97 // characters is equal to the length of the input: this is basically the
98 // same as checking equality.
99 if (segments_.size() == 1 && !leading_star_ && !trailing_star_) {
100 return segments_.front().matched_chars == in.size() &&
101 StartsWith(in, segments_.front());
102 }
103
104 // If there's no leading star, the first segment needs to be handled
105 // separately because it *needs* to be anchored to the left of the
106 // string rather than appearing at some point later.
107 if (!leading_star_ && !StartsWith(in, segments_.front())) {
108 return false;
109 }
110
111 // Similarily, if there's no trailing star, the last segment needs to be
112 // "anchored" to the right of the string.
113 if (!trailing_star_ && !EndsWith(in, segments_.back())) {
114 return false;
115 }
116
117 // For any segment we haven't checked, they needs to appear in the string
118 // sequentially with possibly some characters separating them. To handle
119 // this, we just need to iteratively find each segment, starting from the
120 // previous segment.
121 const Segment* segment_start = segments_.begin() + (leading_star_ ? 0 : 1);
122 const Segment* segment_end = segments_.end() - (trailing_star_ ? 0 : 1);
123 size_t find_idx = leading_star_ ? 0 : segments_.front().matched_chars;
124 for (const auto* segment = segment_start; segment < segment_end; ++segment) {
125 size_t pos = Find(in, *segment, find_idx);
126 if (pos == base::StringView::npos) {
127 return false;
128 }
129 find_idx = pos + segment->matched_chars;
130 }
131
132 // Every segment has been found to match so far including the leading and
133 // trailing one so the entire string matches!
134 return true;
135 }
136
StartsWithSlow(base::StringView in,const Segment & segment)137 bool GlobMatcher::StartsWithSlow(base::StringView in, const Segment& segment) {
138 base::StringView pattern = segment.pattern;
139 for (uint32_t i = 0, p = 0; p < pattern.size(); ++i, ++p) {
140 // We've run out of characters to consume in the input but still have more
141 // to consume in the pattern: |in| cannot possibly start with |pattern|.
142 if (i >= in.size()) {
143 return false;
144 }
145
146 char in_c = in.at(i);
147 char pattern_c = pattern.at(p);
148
149 // '?' matches any character.
150 if (pattern_c == '?') {
151 continue;
152 }
153
154 // '[' signifies the start of a character class.
155 if (pattern_c == '[') {
156 base::StringView cclass = ExtractCharacterClass(pattern.substr(p + 1));
157 if (!MatchesCharacterClass(in_c, cclass)) {
158 return false;
159 }
160
161 // Skip the current '[' character.
162 p++;
163
164 // Skip the whole character class. This will leave i pointing at the
165 // terminating character (i.e. ']'). With the ++i in the loop, this will
166 // correctly lead us going past the whole class.
167 p += cclass.size();
168 continue;
169 }
170
171 // Anything else is just an ordinary character.
172 if (in_c != pattern_c) {
173 return false;
174 }
175 }
176 return true;
177 }
178
ExtractCharacterClass(base::StringView in)179 base::StringView GlobMatcher::ExtractCharacterClass(base::StringView in) {
180 if (in.empty())
181 return {};
182
183 // We should always skip the first real character: it could be ']' but if
184 // so, it is treated as a normal character because empty classes are not
185 // valid.
186 bool invert = in.at(0) == '^';
187 size_t end_idx = in.find(']', invert ? 2 : 1);
188 return end_idx == base::StringView::npos ? base::StringView()
189 : in.substr(0, end_idx);
190 }
191
MatchesCharacterClass(char in,base::StringView char_class)192 bool GlobMatcher::MatchesCharacterClass(char in, base::StringView char_class) {
193 PERFETTO_DCHECK(!char_class.empty());
194
195 const char* start = char_class.data();
196 const char* end = start + char_class.size();
197
198 bool invert = *start == '^';
199 start += invert;
200
201 PERFETTO_DCHECK(start != end);
202
203 for (const char* ptr = start; ptr != end; ++ptr) {
204 char cur = *ptr;
205
206 // If we see a '-' at any point except at the start or end of the string,
207 // it represents a matching range (e.g. a-z represents matching any
208 // character between a and z).
209 if (cur == '-' && ptr != start && ptr != end - 1) {
210 // Look at the previous and next characters in the class and check if the
211 // character falls in the range.
212 char range_start = ptr[-1];
213 char range_end = ptr[1];
214 if (range_start <= in && in <= range_end) {
215 return !invert;
216 }
217 continue;
218 }
219
220 // We match a character in the class.
221 if (in == cur) {
222 return !invert;
223 }
224 }
225
226 // If we're here, nothing in the class matched: return whether the class was
227 // inverted as this would actually be a match.
228 return invert;
229 }
230
231 } // namespace perfetto::trace_processor::util
232