• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/util/glob.h"
18 
19 #include "perfetto/ext/base/string_utils.h"
20 
21 namespace perfetto {
22 namespace trace_processor {
23 namespace util {
24 
GlobMatcher(base::StringView pattern_str)25 GlobMatcher::GlobMatcher(base::StringView pattern_str)
26     : pattern_(pattern_str.size() + 1) {
27   base::StringCopy(pattern_.data(), pattern_str.data(), pattern_.size());
28 
29   base::StringView pattern(pattern_.data());
30 
31   // Note: see the class header for how this algorithm works.
32   uint32_t segment_start = 0;
33   uint32_t segment_potential_matched_chars = 0;
34   auto create_segment = [this, &segment_start, &segment_potential_matched_chars,
35                          &pattern](size_t i) {
36     base::StringView segment = pattern.substr(segment_start, i - segment_start);
37     PERFETTO_DCHECK(segment_potential_matched_chars <= segment.size());
38     if (!segment.empty()) {
39       PERFETTO_DCHECK(segment_potential_matched_chars > 0);
40       segments_.emplace_back(Segment{segment, segment_potential_matched_chars});
41     }
42     return segment.empty();
43   };
44 
45   for (uint32_t i = 0; i < pattern.size(); ++i) {
46     char c = pattern.at(i);
47 
48     // If we don't have a star, we are only matching a single character (but
49     // potentially with a character class which contains >1 character).
50     if (c != '*') {
51       if (c == '[') {
52         base::StringView cclass = ExtractCharacterClass(pattern.substr(i + 1));
53         contains_char_class_or_question_ |= !cclass.empty();
54 
55         // Skip the current '[' character.
56         ++i;
57 
58         // Skip the whole character class. This will leave i pointing at the
59         // terminating character (i.e. ']'). With the ++i in the loop, this will
60         // correctly lead us going past the whole class.
61         i += cclass.size();
62       }
63 
64       contains_char_class_or_question_ |= c == '?';
65       ++segment_potential_matched_chars;
66       continue;
67     }
68 
69     // Add the characters collected so far as a segment.
70     create_segment(i);
71     segment_start = i + 1;
72     segment_potential_matched_chars = 0;
73   }
74 
75   // Ensure we add any remaining characters as a segment.
76   bool empty_segment = create_segment(pattern.size());
77   leading_star_ = !pattern.empty() && pattern.at(0) == '*';
78   trailing_star_ = !pattern.empty() && empty_segment;
79 }
80 
Matches(base::StringView in)81 bool GlobMatcher::Matches(base::StringView in) {
82   // If there are no segments, that means the pattern is either '' or '*'
83   // (or '**', '***' etc which is really the same as '*'). This means
84   // we are match if either a) there is a leading star (== trailing star) or
85   // b) the input string is empty.
86   if (segments_.empty()) {
87     PERFETTO_DCHECK(leading_star_ == trailing_star_);
88     return leading_star_ || in.empty();
89   }
90 
91   // If there is only one segment and no stars we need an equality match.
92   // As we still need to handle '[..]' and '?', we cannot just use string
93   // equality. We *can* however use StartsWith and check the matched
94   // characters is equal to the length of the input: this is basically the
95   // same as checking equality.
96   if (segments_.size() == 1 && !leading_star_ && !trailing_star_) {
97     return segments_.front().matched_chars == in.size() &&
98            StartsWith(in, segments_.front());
99   }
100 
101   // If there's no leading star, the first segment needs to be handled
102   // separately because it *needs* to be anchored to the left of the
103   // string rather than appearing at some point later.
104   if (!leading_star_ && !StartsWith(in, segments_.front())) {
105     return false;
106   }
107 
108   // Similarily, if there's no trailing star, the last segment needs to be
109   // "anchored" to the right of the string.
110   if (!trailing_star_ && !EndsWith(in, segments_.back())) {
111     return false;
112   }
113 
114   // For any segment we haven't checked, they needs to appear in the string
115   // sequentially with possibly some characters separating them. To handle
116   // this, we just need to iteratively find each segment, starting from the
117   // previous segment.
118   Segment* segment_start = segments_.begin() + (leading_star_ ? 0 : 1);
119   Segment* segment_end = segments_.end() - (trailing_star_ ? 0 : 1);
120   size_t find_idx = leading_star_ ? 0 : segments_.front().matched_chars;
121   for (Segment* segment = segment_start; segment < segment_end; ++segment) {
122     size_t pos = Find(in, *segment, find_idx);
123     if (pos == base::StringView::npos) {
124       return false;
125     }
126     find_idx = pos + segment->matched_chars;
127   }
128 
129   // Every segment has been found to match so far including the leading and
130   // trailing one so the entire string matches!
131   return true;
132 }
133 
StartsWithSlow(base::StringView in,const Segment & segment)134 bool GlobMatcher::StartsWithSlow(base::StringView in, const Segment& segment) {
135   base::StringView pattern = segment.pattern;
136   for (uint32_t i = 0, p = 0; p < pattern.size(); ++i, ++p) {
137     // We've run out of characters to consume in the input but still have more
138     // to consume in the pattern: |in| cannot possibly start with |pattern|.
139     if (i >= in.size()) {
140       return false;
141     }
142 
143     char in_c = in.at(i);
144     char pattern_c = pattern.at(p);
145 
146     // '?' matches any character.
147     if (pattern_c == '?') {
148       continue;
149     }
150 
151     // '[' signifies the start of a character class.
152     if (pattern_c == '[') {
153       base::StringView cclass = ExtractCharacterClass(pattern.substr(p + 1));
154       if (!MatchesCharacterClass(in_c, cclass)) {
155         return false;
156       }
157 
158       // Skip the current '[' character.
159       p++;
160 
161       // Skip the whole character class. This will leave i pointing at the
162       // terminating character (i.e. ']'). With the ++i in the loop, this will
163       // correctly lead us going past the whole class.
164       p += cclass.size();
165       continue;
166     }
167 
168     // Anything else is just an ordinary character.
169     if (in_c != pattern_c) {
170       return false;
171     }
172   }
173   return true;
174 }
175 
ExtractCharacterClass(base::StringView in)176 base::StringView GlobMatcher::ExtractCharacterClass(base::StringView in) {
177   if (in.empty())
178     return base::StringView();
179 
180   // We should always skip the first real character: it could be ']' but if
181   // so, it is treated as a normal character because empty classes are not
182   // valid.
183   bool invert = in.at(0) == '^';
184   size_t end_idx = in.find(']', invert ? 2 : 1);
185   return end_idx == base::StringView::npos ? base::StringView()
186                                            : in.substr(0, end_idx);
187 }
188 
MatchesCharacterClass(char in,base::StringView char_class)189 bool GlobMatcher::MatchesCharacterClass(char in, base::StringView char_class) {
190   PERFETTO_DCHECK(!char_class.empty());
191 
192   const char* start = char_class.data();
193   const char* end = start + char_class.size();
194 
195   bool invert = *start == '^';
196   start += invert;
197 
198   PERFETTO_DCHECK(start != end);
199 
200   for (auto* ptr = start; ptr != end; ++ptr) {
201     char cur = *ptr;
202 
203     // If we see a '-' at any point except at the start or end of the string,
204     // it represents a matching range (e.g. a-z represents matching any
205     // character between a and z).
206     if (cur == '-' && ptr != start && ptr != end - 1) {
207       // Look at the previous and next characters in the class and check if the
208       // character falls in the range.
209       char range_start = ptr[-1];
210       char range_end = ptr[1];
211       if (range_start <= in && in <= range_end) {
212         return !invert;
213       }
214       continue;
215     }
216 
217     // We match a character in the class.
218     if (in == cur) {
219       return !invert;
220     }
221   }
222 
223   // If we're here, nothing in the class matched: return whether the class was
224   // inverted as this would actually be a match.
225   return invert;
226 }
227 
228 }  // namespace util
229 }  // namespace trace_processor
230 }  // namespace perfetto
231