1 /* 2 * Copyright (C) 2022 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ 18 #define SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ 19 20 #include <cstddef> 21 #include <cstdint> 22 #include <vector> 23 24 #include "perfetto/ext/base/small_vector.h" 25 #include "perfetto/ext/base/string_view.h" 26 27 namespace perfetto::trace_processor::util { 28 29 // Lightweight implementation of matching on UNIX glob patterns, maintaining 30 // compatibility of syntax and semantics used by SQLite. 31 // 32 // Usage: 33 // GlobMatcher matcher = GlobMatcher::FromPattern("*foo*"); 34 // for (auto string : strings) { 35 // if (matcher.Matches(string)) { 36 // <do something> 37 // } 38 // } 39 // 40 // This is a class instead of a free function to allow preprocessing the 41 // pattern (e.g. to compute Kleene star offsets). This can create big savings 42 // because trace processsor needs to match the same pattern on many strings 43 // when filtering tables. 44 // 45 // Implementation: 46 // The algorithm used in this class is similar to the "alternative" 47 // algorithm proposed in [1]. 48 // 49 // We preprocess the pattern (in the constructor) to split the pattern on *, 50 // accounting for character classes. This breaks the pattern in "segments": our 51 // name for the parts of the pattern between the stars. 52 // 53 // Then at match time, we go through each segment and check if it matches part 54 // of the string. The number of character matched defines the search start-point 55 // for the next segment. As described in [1], we don't need to do any 56 // backtracking which removes the exponential component of the algorithm and 57 // consequently simplifies the code. 58 // 59 // The subtle parts are: 60 // 1) the first and last segments - they need to be "anchored" to the 61 // beginning and end of the string respectively. If not, they fail the match 62 // straight away. 63 // 2) leading/trailing stars: they counteract the above point and "unanchor" 64 // the first and last segments respectively by allowing them to happen 65 // somewhere after/before the beginning/end. 66 // 67 // [1] https://research.swtch.com/glob 68 class GlobMatcher { 69 public: 70 GlobMatcher(GlobMatcher&&) = default; 71 GlobMatcher& operator=(GlobMatcher&&) = default; 72 73 // Creates a glob matcher from a pattern. FromPattern(base::StringView pattern_str)74 static GlobMatcher FromPattern(base::StringView pattern_str) { 75 return GlobMatcher(pattern_str); 76 } 77 78 // Checks the provided string against the pattern and returns whether it 79 // matches. 80 bool Matches(base::StringView input) const; 81 82 // Returns whether the comparison should really be an equality comparison. IsEquality()83 bool IsEquality() const { 84 return !leading_star_ && !trailing_star_ && segments_.size() <= 1; 85 } 86 87 private: 88 // Represents a portion of the pattern in between two * characters. 89 struct Segment { 90 // The portion of the pattern in the segment. Note that this will not 91 // contain a free '*' (i.e. outside a character class). 92 base::StringView pattern; 93 94 // The number of consumed characters in an input string if this segment 95 // matches. 96 uint32_t matched_chars; 97 }; 98 99 // It would be very rare for a glob pattern to have more than 4 stars so 100 // reserve stack space for that many segments. 101 static constexpr uint32_t kMaxSegmentsOnStack = 4; 102 103 explicit GlobMatcher(base::StringView pattern); 104 105 GlobMatcher(const GlobMatcher&) = delete; 106 GlobMatcher& operator=(const GlobMatcher&) = delete; 107 108 // Returns whether |input| starts with the pattern in |segment| following 109 // glob matching rules. StartsWith(base::StringView input,const Segment & segment)110 bool StartsWith(base::StringView input, const Segment& segment) const { 111 if (!contains_char_class_or_question_) { 112 return input.StartsWith(segment.pattern); 113 } 114 return StartsWithSlow(input, segment); 115 } 116 117 // Returns whether |input| ends with the pattern in |segment| following 118 // glob matching rules. EndsWith(base::StringView input,const Segment & segment)119 bool EndsWith(base::StringView input, const Segment& segment) const { 120 if (!contains_char_class_or_question_) { 121 return input.EndsWith(segment.pattern); 122 } 123 // Ending with |segment| is the same as taking the substring of |in| 124 size_t start = input.size() - segment.matched_chars; 125 return StartsWithSlow(input.substr(start), segment); 126 } 127 128 // Returns the index where |input| matches the pattern in |segment| 129 // following glob matching rules or base::StringView::npos, if no such index 130 // exists. Find(base::StringView input,const Segment & segment,size_t start)131 size_t Find(base::StringView input, 132 const Segment& segment, 133 size_t start) const { 134 if (!contains_char_class_or_question_) { 135 return input.find(segment.pattern, start); 136 } 137 for (uint32_t i = 0; i < input.size(); ++i) { 138 if (StartsWithSlow(input.substr(i), segment)) { 139 return i; 140 } 141 } 142 return base::StringView::npos; 143 } 144 145 // Given a StringView starting at the boundary of a character class, returns 146 // a StringView containing only the parts inside the [] or base::StringView() 147 // if no character class exists. 148 static base::StringView ExtractCharacterClass(base::StringView input); 149 150 // Matches |in| against the given character class. 151 static bool MatchesCharacterClass(char input, base::StringView char_class); 152 153 static bool StartsWithSlow(base::StringView input, const Segment& segment); 154 155 // IMPORTANT: this should *not* be modified after the constructor as we store 156 // pointers to the data inside here. 157 // Note: this vector also allocates space for the null-terminator so is +1 158 // the "traditional" size of the string. 159 std::vector<char> pattern_; 160 161 // Chunks of the |pattern_| tokenized on '*'. See the class comment for more 162 // info. 163 base::SmallVector<Segment, kMaxSegmentsOnStack> segments_; 164 165 bool leading_star_ = false; 166 bool trailing_star_ = false; 167 bool contains_char_class_or_question_ = false; 168 }; 169 170 } // namespace perfetto::trace_processor::util 171 172 #endif // SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ 173