• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/util/glob.h"
18 
19 #include <cstddef>
20 #include <cstdint>
21 
22 #include "perfetto/base/logging.h"
23 #include "perfetto/ext/base/string_utils.h"
24 #include "perfetto/ext/base/string_view.h"
25 
26 namespace perfetto::trace_processor::util {
27 
GlobMatcher(base::StringView pattern_str)28 GlobMatcher::GlobMatcher(base::StringView pattern_str)
29     : pattern_(pattern_str.size() + 1) {
30   base::StringCopy(pattern_.data(), pattern_str.data(), pattern_.size());
31 
32   base::StringView pattern(pattern_.data());
33 
34   // Note: see the class header for how this algorithm works.
35   uint32_t segment_start = 0;
36   uint32_t segment_potential_matched_chars = 0;
37   auto create_segment = [this, &segment_start, &segment_potential_matched_chars,
38                          &pattern](size_t i) {
39     base::StringView segment = pattern.substr(segment_start, i - segment_start);
40     PERFETTO_DCHECK(segment_potential_matched_chars <= segment.size());
41     if (!segment.empty()) {
42       PERFETTO_DCHECK(segment_potential_matched_chars > 0);
43       segments_.emplace_back(Segment{segment, segment_potential_matched_chars});
44     }
45     return segment.empty();
46   };
47 
48   for (uint32_t i = 0; i < pattern.size(); ++i) {
49     char c = pattern.at(i);
50 
51     // If we don't have a star, we are only matching a single character (but
52     // potentially with a character class which contains >1 character).
53     if (c != '*') {
54       if (c == '[') {
55         base::StringView cclass = ExtractCharacterClass(pattern.substr(i + 1));
56         contains_char_class_or_question_ |= !cclass.empty();
57 
58         // Skip the current '[' character.
59         ++i;
60 
61         // Skip the whole character class. This will leave i pointing at the
62         // terminating character (i.e. ']'). With the ++i in the loop, this will
63         // correctly lead us going past the whole class.
64         i += cclass.size();
65       }
66 
67       contains_char_class_or_question_ |= c == '?';
68       ++segment_potential_matched_chars;
69       continue;
70     }
71 
72     // Add the characters collected so far as a segment.
73     create_segment(i);
74     segment_start = i + 1;
75     segment_potential_matched_chars = 0;
76   }
77 
78   // Ensure we add any remaining characters as a segment.
79   bool empty_segment = create_segment(pattern.size());
80   leading_star_ = !pattern.empty() && pattern.at(0) == '*';
81   trailing_star_ = !pattern.empty() && empty_segment;
82 }
83 
Matches(base::StringView in) const84 bool GlobMatcher::Matches(base::StringView in) const {
85   // If there are no segments, that means the pattern is either '' or '*'
86   // (or '**', '***' etc which is really the same as '*'). This means
87   // we are match if either a) there is a leading star (== trailing star) or
88   // b) the input string is empty.
89   if (segments_.empty()) {
90     PERFETTO_DCHECK(leading_star_ == trailing_star_);
91     return leading_star_ || in.empty();
92   }
93 
94   // If there is only one segment and no stars we need an equality match.
95   // As we still need to handle '[..]' and '?', we cannot just use string
96   // equality. We *can* however use StartsWith and check the matched
97   // characters is equal to the length of the input: this is basically the
98   // same as checking equality.
99   if (segments_.size() == 1 && !leading_star_ && !trailing_star_) {
100     return segments_.front().matched_chars == in.size() &&
101            StartsWith(in, segments_.front());
102   }
103 
104   // If there's no leading star, the first segment needs to be handled
105   // separately because it *needs* to be anchored to the left of the
106   // string rather than appearing at some point later.
107   if (!leading_star_ && !StartsWith(in, segments_.front())) {
108     return false;
109   }
110 
111   // Similarily, if there's no trailing star, the last segment needs to be
112   // "anchored" to the right of the string.
113   if (!trailing_star_ && !EndsWith(in, segments_.back())) {
114     return false;
115   }
116 
117   // For any segment we haven't checked, they needs to appear in the string
118   // sequentially with possibly some characters separating them. To handle
119   // this, we just need to iteratively find each segment, starting from the
120   // previous segment.
121   const Segment* segment_start = segments_.begin() + (leading_star_ ? 0 : 1);
122   const Segment* segment_end = segments_.end() - (trailing_star_ ? 0 : 1);
123   size_t find_idx = leading_star_ ? 0 : segments_.front().matched_chars;
124   for (const auto* segment = segment_start; segment < segment_end; ++segment) {
125     size_t pos = Find(in, *segment, find_idx);
126     if (pos == base::StringView::npos) {
127       return false;
128     }
129     find_idx = pos + segment->matched_chars;
130   }
131 
132   // Every segment has been found to match so far including the leading and
133   // trailing one so the entire string matches!
134   return true;
135 }
136 
StartsWithSlow(base::StringView in,const Segment & segment)137 bool GlobMatcher::StartsWithSlow(base::StringView in, const Segment& segment) {
138   base::StringView pattern = segment.pattern;
139   for (uint32_t i = 0, p = 0; p < pattern.size(); ++i, ++p) {
140     // We've run out of characters to consume in the input but still have more
141     // to consume in the pattern: |in| cannot possibly start with |pattern|.
142     if (i >= in.size()) {
143       return false;
144     }
145 
146     char in_c = in.at(i);
147     char pattern_c = pattern.at(p);
148 
149     // '?' matches any character.
150     if (pattern_c == '?') {
151       continue;
152     }
153 
154     // '[' signifies the start of a character class.
155     if (pattern_c == '[') {
156       base::StringView cclass = ExtractCharacterClass(pattern.substr(p + 1));
157       if (!MatchesCharacterClass(in_c, cclass)) {
158         return false;
159       }
160 
161       // Skip the current '[' character.
162       p++;
163 
164       // Skip the whole character class. This will leave i pointing at the
165       // terminating character (i.e. ']'). With the ++i in the loop, this will
166       // correctly lead us going past the whole class.
167       p += cclass.size();
168       continue;
169     }
170 
171     // Anything else is just an ordinary character.
172     if (in_c != pattern_c) {
173       return false;
174     }
175   }
176   return true;
177 }
178 
ExtractCharacterClass(base::StringView in)179 base::StringView GlobMatcher::ExtractCharacterClass(base::StringView in) {
180   if (in.empty())
181     return {};
182 
183   // We should always skip the first real character: it could be ']' but if
184   // so, it is treated as a normal character because empty classes are not
185   // valid.
186   bool invert = in.at(0) == '^';
187   size_t end_idx = in.find(']', invert ? 2 : 1);
188   return end_idx == base::StringView::npos ? base::StringView()
189                                            : in.substr(0, end_idx);
190 }
191 
MatchesCharacterClass(char in,base::StringView char_class)192 bool GlobMatcher::MatchesCharacterClass(char in, base::StringView char_class) {
193   PERFETTO_DCHECK(!char_class.empty());
194 
195   const char* start = char_class.data();
196   const char* end = start + char_class.size();
197 
198   bool invert = *start == '^';
199   start += invert;
200 
201   PERFETTO_DCHECK(start != end);
202 
203   for (const char* ptr = start; ptr != end; ++ptr) {
204     char cur = *ptr;
205 
206     // If we see a '-' at any point except at the start or end of the string,
207     // it represents a matching range (e.g. a-z represents matching any
208     // character between a and z).
209     if (cur == '-' && ptr != start && ptr != end - 1) {
210       // Look at the previous and next characters in the class and check if the
211       // character falls in the range.
212       char range_start = ptr[-1];
213       char range_end = ptr[1];
214       if (range_start <= in && in <= range_end) {
215         return !invert;
216       }
217       continue;
218     }
219 
220     // We match a character in the class.
221     if (in == cur) {
222       return !invert;
223     }
224   }
225 
226   // If we're here, nothing in the class matched: return whether the class was
227   // inverted as this would actually be a match.
228   return invert;
229 }
230 
231 }  // namespace perfetto::trace_processor::util
232