1 /* 2 * Copyright (C) 2022 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ 18 #define SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ 19 20 #include <optional> 21 #include <vector> 22 23 #include "perfetto/ext/base/small_vector.h" 24 #include "perfetto/ext/base/string_splitter.h" 25 #include "perfetto/ext/base/string_view.h" 26 27 namespace perfetto { 28 namespace trace_processor { 29 namespace util { 30 31 // Lightweight implementation of matching on UNIX glob patterns, maintaining 32 // compatibility of syntax and semantics used by SQLite. 33 // 34 // Usage: 35 // GlobMatcher matcher = GlobMatcher::FromPattern("*foo*"); 36 // for (auto string : strings) { 37 // if (matcher.Matches(string)) { 38 // <do something> 39 // } 40 // } 41 // 42 // This is a class instead of a free function to allow preprocessing the 43 // pattern (e.g. to compute Kleene star offsets). This can create big savings 44 // because trace processsor needs to match the same pattern on many strings 45 // when filtering tables. 46 // 47 // Implementation: 48 // The algorithm used in this class is similar to the "alternative" 49 // algorithm proposed in [1]. 50 // 51 // We preprocess the pattern (in the constructor) to split the pattern on *, 52 // accounting for character classes. This breaks the pattern in "segments": our 53 // name for the parts of the pattern between the stars. 54 // 55 // Then at match time, we go through each segment and check if it matches part 56 // of the string. The number of character matched defines the search start-point 57 // for the next segment. As described in [1], we don't need to do any 58 // backtracking which removes the exponential component of the algorithm and 59 // consequently simplifies the code. 60 // 61 // The subtle parts are: 62 // 1) the first and last segments - they need to be "anchored" to the 63 // beginning and end of the string respectively. If not, they fail the match 64 // straight away. 65 // 2) leading/trailing stars: they counteract the above point and "unanchor" 66 // the first and last segments respectively by allowing them to happen 67 // somewhere after/before the beginning/end. 68 // 69 // [1] https://research.swtch.com/glob 70 class GlobMatcher { 71 public: 72 // Creates a glob matcher from a pattern. FromPattern(base::StringView pattern_str)73 static GlobMatcher FromPattern(base::StringView pattern_str) { 74 return GlobMatcher(std::move(pattern_str)); 75 } 76 77 // Checks the provided string against the pattern and returns whether it 78 // matches. 79 bool Matches(base::StringView input); 80 81 private: 82 // Represents a portion of the pattern in between two * characters. 83 struct Segment { 84 // The portion of the pattern in the segment. Note that this will not 85 // contain a free '*' (i.e. outside a character class). 86 base::StringView pattern; 87 88 // The number of consumed characters in an input string if this segment 89 // matches. 90 uint32_t matched_chars; 91 }; 92 93 // It would be very rare for a glob pattern to have more than 4 stars so 94 // reserve stack space for that many segments. 95 static constexpr uint32_t kMaxSegmentsOnStack = 4; 96 97 explicit GlobMatcher(base::StringView pattern); 98 99 // Returns whether |input| starts with the pattern in |segment| following 100 // glob matching rules. StartsWith(base::StringView input,const Segment & segment)101 bool StartsWith(base::StringView input, const Segment& segment) { 102 if (!contains_char_class_or_question_) { 103 return input.StartsWith(segment.pattern); 104 } 105 return StartsWithSlow(input, segment); 106 } 107 108 // Returns whether |input| ends with the pattern in |segment| following 109 // glob matching rules. EndsWith(base::StringView input,const Segment & segment)110 bool EndsWith(base::StringView input, const Segment& segment) { 111 if (!contains_char_class_or_question_) { 112 return input.EndsWith(segment.pattern); 113 } 114 // Ending with |segment| is the same as taking the substring of |in| 115 size_t start = input.size() - segment.matched_chars; 116 return StartsWithSlow(input.substr(start), segment); 117 } 118 119 // Returns the index where |input| matches the pattern in |segment| 120 // following glob matching rules or base::StringView::npos, if no such index 121 // exists. Find(base::StringView input,const Segment & segment,size_t start)122 size_t Find(base::StringView input, const Segment& segment, size_t start) { 123 if (!contains_char_class_or_question_) { 124 return input.find(segment.pattern, start); 125 } 126 for (uint32_t i = 0; i < input.size(); ++i) { 127 if (StartsWithSlow(input.substr(i), segment)) { 128 return i; 129 } 130 } 131 return base::StringView::npos; 132 } 133 134 // Given a StringView starting at the boundary of a character class, returns 135 // a StringView containing only the parts inside the [] or base::StringView() 136 // if no character class exists. 137 static base::StringView ExtractCharacterClass(base::StringView input); 138 139 // Matches |in| against the given character class. 140 static bool MatchesCharacterClass(char input, base::StringView char_class); 141 142 bool StartsWithSlow(base::StringView input, const Segment& segment); 143 144 // IMPORTANT: this should *not* be modified after the constructor as we store 145 // pointers to the data inside here. 146 // Note: this vector also allocates space for the null-terminator so is +1 147 // the "traditional" size of the string. 148 std::vector<char> pattern_; 149 150 // Chunks of the |pattern_| tokenized on '*'. See the class comment for more 151 // info. 152 base::SmallVector<Segment, kMaxSegmentsOnStack> segments_; 153 154 bool leading_star_ = false; 155 bool trailing_star_ = false; 156 bool contains_char_class_or_question_ = false; 157 }; 158 159 } // namespace util 160 } // namespace trace_processor 161 } // namespace perfetto 162 163 #endif // SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ 164