• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
18 #define SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
19 
20 #include <cstddef>
21 #include <cstdint>
22 #include <vector>
23 
24 #include "perfetto/ext/base/small_vector.h"
25 #include "perfetto/ext/base/string_view.h"
26 
27 namespace perfetto::trace_processor::util {
28 
29 // Lightweight implementation of matching on UNIX glob patterns, maintaining
30 // compatibility of syntax and semantics used by SQLite.
31 //
32 // Usage:
33 //  GlobMatcher matcher = GlobMatcher::FromPattern("*foo*");
34 //  for (auto string : strings) {
35 //    if (matcher.Matches(string)) {
36 //      <do something>
37 //    }
38 //  }
39 //
40 // This is a class instead of a free function to allow preprocessing the
41 // pattern (e.g. to compute Kleene star offsets). This can create big savings
42 // because trace processsor needs to match the same pattern on many strings
43 // when filtering tables.
44 //
45 // Implementation:
46 // The algorithm used in this class is similar to the "alternative"
47 // algorithm proposed in [1].
48 //
49 // We preprocess the pattern (in the constructor) to split the pattern on *,
50 // accounting for character classes. This breaks the pattern in "segments": our
51 // name for the parts of the pattern between the stars.
52 //
53 // Then at match time, we go through each segment and check if it matches part
54 // of the string. The number of character matched defines the search start-point
55 // for the next segment. As described in [1], we don't need to do any
56 // backtracking which removes the exponential component of the algorithm and
57 // consequently simplifies the code.
58 //
59 // The subtle parts are:
60 // 1) the first and last segments - they need to be "anchored" to the
61 //    beginning and end of the string respectively. If not, they fail the match
62 //    straight away.
63 // 2) leading/trailing stars: they counteract the above point and "unanchor"
64 //    the first and last segments respectively by allowing them to happen
65 //    somewhere after/before the beginning/end.
66 //
67 // [1] https://research.swtch.com/glob
68 class GlobMatcher {
69  public:
70   GlobMatcher(GlobMatcher&&) = default;
71   GlobMatcher& operator=(GlobMatcher&&) = default;
72 
73   // Creates a glob matcher from a pattern.
FromPattern(base::StringView pattern_str)74   static GlobMatcher FromPattern(base::StringView pattern_str) {
75     return GlobMatcher(pattern_str);
76   }
77 
78   // Checks the provided string against the pattern and returns whether it
79   // matches.
80   bool Matches(base::StringView input) const;
81 
82   // Returns whether the comparison should really be an equality comparison.
IsEquality()83   bool IsEquality() const {
84     return !leading_star_ && !trailing_star_ && segments_.size() <= 1;
85   }
86 
87  private:
88   // Represents a portion of the pattern in between two * characters.
89   struct Segment {
90     // The portion of the pattern in the segment. Note that this will not
91     // contain a free '*' (i.e. outside a character class).
92     base::StringView pattern;
93 
94     // The number of consumed characters in an input string if this segment
95     // matches.
96     uint32_t matched_chars;
97   };
98 
99   // It would be very rare for a glob pattern to have more than 4 stars so
100   // reserve stack space for that many segments.
101   static constexpr uint32_t kMaxSegmentsOnStack = 4;
102 
103   explicit GlobMatcher(base::StringView pattern);
104 
105   GlobMatcher(const GlobMatcher&) = delete;
106   GlobMatcher& operator=(const GlobMatcher&) = delete;
107 
108   // Returns whether |input| starts with the pattern in |segment| following
109   // glob matching rules.
StartsWith(base::StringView input,const Segment & segment)110   bool StartsWith(base::StringView input, const Segment& segment) const {
111     if (!contains_char_class_or_question_) {
112       return input.StartsWith(segment.pattern);
113     }
114     return StartsWithSlow(input, segment);
115   }
116 
117   // Returns whether |input| ends with the pattern in |segment| following
118   // glob matching rules.
EndsWith(base::StringView input,const Segment & segment)119   bool EndsWith(base::StringView input, const Segment& segment) const {
120     if (!contains_char_class_or_question_) {
121       return input.EndsWith(segment.pattern);
122     }
123     // Ending with |segment| is the same as taking the substring of |in|
124     size_t start = input.size() - segment.matched_chars;
125     return StartsWithSlow(input.substr(start), segment);
126   }
127 
128   // Returns the index where |input| matches the pattern in |segment|
129   // following glob matching rules or base::StringView::npos, if no such index
130   // exists.
Find(base::StringView input,const Segment & segment,size_t start)131   size_t Find(base::StringView input,
132               const Segment& segment,
133               size_t start) const {
134     if (!contains_char_class_or_question_) {
135       return input.find(segment.pattern, start);
136     }
137     for (uint32_t i = 0; i < input.size(); ++i) {
138       if (StartsWithSlow(input.substr(i), segment)) {
139         return i;
140       }
141     }
142     return base::StringView::npos;
143   }
144 
145   // Given a StringView starting at the boundary of a character class, returns
146   // a StringView containing only the parts inside the [] or base::StringView()
147   // if no character class exists.
148   static base::StringView ExtractCharacterClass(base::StringView input);
149 
150   // Matches |in| against the given character class.
151   static bool MatchesCharacterClass(char input, base::StringView char_class);
152 
153   static bool StartsWithSlow(base::StringView input, const Segment& segment);
154 
155   // IMPORTANT: this should *not* be modified after the constructor as we store
156   // pointers to the data inside here.
157   // Note: this vector also allocates space for the null-terminator so is +1
158   // the "traditional" size of the string.
159   std::vector<char> pattern_;
160 
161   // Chunks of the |pattern_| tokenized on '*'. See the class comment for more
162   // info.
163   base::SmallVector<Segment, kMaxSegmentsOnStack> segments_;
164 
165   bool leading_star_ = false;
166   bool trailing_star_ = false;
167   bool contains_char_class_or_question_ = false;
168 };
169 
170 }  // namespace perfetto::trace_processor::util
171 
172 #endif  // SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
173