• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
18 #define SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
19 
20 #include <optional>
21 #include <vector>
22 
23 #include "perfetto/ext/base/small_vector.h"
24 #include "perfetto/ext/base/string_splitter.h"
25 #include "perfetto/ext/base/string_view.h"
26 
27 namespace perfetto {
28 namespace trace_processor {
29 namespace util {
30 
31 // Lightweight implementation of matching on UNIX glob patterns, maintaining
32 // compatibility of syntax and semantics used by SQLite.
33 //
34 // Usage:
35 //  GlobMatcher matcher = GlobMatcher::FromPattern("*foo*");
36 //  for (auto string : strings) {
37 //    if (matcher.Matches(string)) {
38 //      <do something>
39 //    }
40 //  }
41 //
42 // This is a class instead of a free function to allow preprocessing the
43 // pattern (e.g. to compute Kleene star offsets). This can create big savings
44 // because trace processsor needs to match the same pattern on many strings
45 // when filtering tables.
46 //
47 // Implementation:
48 // The algorithm used in this class is similar to the "alternative"
49 // algorithm proposed in [1].
50 //
51 // We preprocess the pattern (in the constructor) to split the pattern on *,
52 // accounting for character classes. This breaks the pattern in "segments": our
53 // name for the parts of the pattern between the stars.
54 //
55 // Then at match time, we go through each segment and check if it matches part
56 // of the string. The number of character matched defines the search start-point
57 // for the next segment. As described in [1], we don't need to do any
58 // backtracking which removes the exponential component of the algorithm and
59 // consequently simplifies the code.
60 //
61 // The subtle parts are:
62 // 1) the first and last segments - they need to be "anchored" to the
63 //    beginning and end of the string respectively. If not, they fail the match
64 //    straight away.
65 // 2) leading/trailing stars: they counteract the above point and "unanchor"
66 //    the first and last segments respectively by allowing them to happen
67 //    somewhere after/before the beginning/end.
68 //
69 // [1] https://research.swtch.com/glob
70 class GlobMatcher {
71  public:
72   // Creates a glob matcher from a pattern.
FromPattern(base::StringView pattern_str)73   static GlobMatcher FromPattern(base::StringView pattern_str) {
74     return GlobMatcher(std::move(pattern_str));
75   }
76 
77   // Checks the provided string against the pattern and returns whether it
78   // matches.
79   bool Matches(base::StringView input);
80 
81  private:
82   // Represents a portion of the pattern in between two * characters.
83   struct Segment {
84     // The portion of the pattern in the segment. Note that this will not
85     // contain a free '*' (i.e. outside a character class).
86     base::StringView pattern;
87 
88     // The number of consumed characters in an input string if this segment
89     // matches.
90     uint32_t matched_chars;
91   };
92 
93   // It would be very rare for a glob pattern to have more than 4 stars so
94   // reserve stack space for that many segments.
95   static constexpr uint32_t kMaxSegmentsOnStack = 4;
96 
97   explicit GlobMatcher(base::StringView pattern);
98 
99   // Returns whether |input| starts with the pattern in |segment| following
100   // glob matching rules.
StartsWith(base::StringView input,const Segment & segment)101   bool StartsWith(base::StringView input, const Segment& segment) {
102     if (!contains_char_class_or_question_) {
103       return input.StartsWith(segment.pattern);
104     }
105     return StartsWithSlow(input, segment);
106   }
107 
108   // Returns whether |input| ends with the pattern in |segment| following
109   // glob matching rules.
EndsWith(base::StringView input,const Segment & segment)110   bool EndsWith(base::StringView input, const Segment& segment) {
111     if (!contains_char_class_or_question_) {
112       return input.EndsWith(segment.pattern);
113     }
114     // Ending with |segment| is the same as taking the substring of |in|
115     size_t start = input.size() - segment.matched_chars;
116     return StartsWithSlow(input.substr(start), segment);
117   }
118 
119   // Returns the index where |input| matches the pattern in |segment|
120   // following glob matching rules or base::StringView::npos, if no such index
121   // exists.
Find(base::StringView input,const Segment & segment,size_t start)122   size_t Find(base::StringView input, const Segment& segment, size_t start) {
123     if (!contains_char_class_or_question_) {
124       return input.find(segment.pattern, start);
125     }
126     for (uint32_t i = 0; i < input.size(); ++i) {
127       if (StartsWithSlow(input.substr(i), segment)) {
128         return i;
129       }
130     }
131     return base::StringView::npos;
132   }
133 
134   // Given a StringView starting at the boundary of a character class, returns
135   // a StringView containing only the parts inside the [] or base::StringView()
136   // if no character class exists.
137   static base::StringView ExtractCharacterClass(base::StringView input);
138 
139   // Matches |in| against the given character class.
140   static bool MatchesCharacterClass(char input, base::StringView char_class);
141 
142   bool StartsWithSlow(base::StringView input, const Segment& segment);
143 
144   // IMPORTANT: this should *not* be modified after the constructor as we store
145   // pointers to the data inside here.
146   // Note: this vector also allocates space for the null-terminator so is +1
147   // the "traditional" size of the string.
148   std::vector<char> pattern_;
149 
150   // Chunks of the |pattern_| tokenized on '*'. See the class comment for more
151   // info.
152   base::SmallVector<Segment, kMaxSegmentsOnStack> segments_;
153 
154   bool leading_star_ = false;
155   bool trailing_star_ = false;
156   bool contains_char_class_or_question_ = false;
157 };
158 
159 }  // namespace util
160 }  // namespace trace_processor
161 }  // namespace perfetto
162 
163 #endif  // SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
164