1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022-2023 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Giuliano Procida
19
20 #include "filter.h"
21
22 #include <fnmatch.h>
23
24 #include <array>
25 #include <cctype>
26 #include <cstddef>
27 #include <cstring>
28 #include <fstream>
29 #include <memory>
30 #include <ostream>
31 #include <queue>
32 #include <sstream>
33 #include <string>
34 #include <string_view>
35 #include <unordered_set>
36 #include <utility>
37
38 #include "error.h"
39
40 namespace stg {
41 namespace {
42
43 using Items = std::unordered_set<std::string>;
44
ReadAbigail(const std::string & filename)45 Items ReadAbigail(const std::string& filename) {
46 static constexpr std::string_view kSectionSuffix = "list";
47 Items items;
48 std::ifstream file(filename);
49 Check(file.good()) << "error opening filter file '" << filename << ": "
50 << Error(errno);
51 bool in_filter_section = false;
52 std::string line;
53 while (std::getline(file, line)) {
54 size_t start = 0;
55 size_t limit = line.size();
56 // Strip leading whitespace.
57 while (start < limit && std::isspace(line[start])) {
58 ++start;
59 }
60 // Skip comment lines.
61 if (start < limit && line[start] == '#') {
62 continue;
63 }
64 // Strip trailing whitespace.
65 while (start < limit && std::isspace(line[limit - 1])) {
66 --limit;
67 }
68 // Skip empty lines.
69 if (start == limit) {
70 continue;
71 }
72 // See if we are entering a filter list section.
73 if (line[start] == '[' && line[limit - 1] == ']') {
74 std::string_view section(&line[start + 1], limit - start - 2);
75 // TODO: use std::string_view::ends_with
76 const auto section_size = section.size();
77 const auto suffix_size = kSectionSuffix.size();
78 in_filter_section = section_size >= suffix_size &&
79 section.substr(section_size - suffix_size) == kSectionSuffix;
80 continue;
81 }
82 // Add item.
83 if (in_filter_section) {
84 items.insert(std::string(&line[start], limit - start));
85 }
86 }
87 Check(file.eof()) << "error reading filter file '" << filename << ": "
88 << Error(errno);
89 return items;
90 }
91
92 // Inverted filter.
93 class NotFilter : public Filter {
94 public:
NotFilter(std::unique_ptr<Filter> filter)95 explicit NotFilter(std::unique_ptr<Filter> filter)
96 : filter_(std::move(filter)) {}
operator ()(const std::string & item) const97 bool operator()(const std::string& item) const final {
98 return !(*filter_)(item);
99 };
100
101 private:
102 const std::unique_ptr<Filter> filter_;
103 };
104
105 // Conjunction of filters.
106 class AndFilter : public Filter {
107 public:
AndFilter(std::unique_ptr<Filter> filter1,std::unique_ptr<Filter> filter2)108 AndFilter(std::unique_ptr<Filter> filter1,
109 std::unique_ptr<Filter> filter2)
110 : filter1_(std::move(filter1)), filter2_(std::move(filter2)) {}
operator ()(const std::string & item) const111 bool operator()(const std::string& item) const final {
112 return (*filter1_)(item) && (*filter2_)(item);
113 };
114
115 private:
116 const std::unique_ptr<Filter> filter1_;
117 const std::unique_ptr<Filter> filter2_;
118 };
119
120 // Disjunction of filters.
121 class OrFilter : public Filter {
122 public:
OrFilter(std::unique_ptr<Filter> filter1,std::unique_ptr<Filter> filter2)123 OrFilter(std::unique_ptr<Filter> filter1,
124 std::unique_ptr<Filter> filter2)
125 : filter1_(std::move(filter1)), filter2_(std::move(filter2)) {}
operator ()(const std::string & item) const126 bool operator()(const std::string& item) const final {
127 return (*filter1_)(item) || (*filter2_)(item);
128 };
129
130 private:
131 const std::unique_ptr<Filter> filter1_;
132 const std::unique_ptr<Filter> filter2_;
133 };
134
135 // Glob filter.
136 class GlobFilter : public Filter {
137 public:
GlobFilter(const std::string & pattern)138 explicit GlobFilter(const std::string& pattern) : pattern_(pattern) {}
operator ()(const std::string & item) const139 bool operator()(const std::string& item) const final {
140 return fnmatch(pattern_.c_str(), item.c_str(), 0) == 0;
141 }
142
143 private:
144 const std::string pattern_;
145 };
146
147 // Literal list filter.
148 class SetFilter : public Filter {
149 public:
SetFilter(Items && items)150 explicit SetFilter(Items&& items)
151 : items_(std::move(items)) {}
operator ()(const std::string & item) const152 bool operator()(const std::string& item) const final {
153 return items_.count(item) > 0;
154 };
155
156 private:
157 const Items items_;
158 };
159
160 const char* kTokenCharacters = ":!()&|";
161
162 // Split a filter expression into tokens.
163 //
164 // All tokens are just strings, but single characters from kTokenCharacters are
165 // recognised as special syntax. Whitespace can be used between tokens and will
166 // be ignored.
Tokenise(const std::string & filter)167 std::queue<std::string> Tokenise(const std::string& filter) {
168 std::queue<std::string> result;
169 auto it = filter.begin();
170 const auto end = filter.end();
171 while (it != end) {
172 if (std::isspace(*it)) {
173 ++it;
174 } else if (std::strchr(kTokenCharacters, *it)) {
175 result.emplace(&*it, 1);
176 ++it;
177 } else if (std::isgraph(*it)) {
178 auto name = it;
179 ++it;
180 while (it != end && std::isgraph(*it)
181 && !std::strchr(kTokenCharacters, *it)) {
182 ++it;
183 }
184 result.emplace(&*name, it - name);
185 } else {
186 Die() << "unexpected character in filter: '" << *it;
187 }
188 }
189
190 return result;
191 }
192
193 // The failing parser.
Fail(const std::string & message,std::queue<std::string> & tokens)194 std::unique_ptr<Filter> Fail(
195 const std::string& message, std::queue<std::string>& tokens) {
196 std::ostringstream os;
197 os << "syntax error in filter expression: '" << message << "'; context:";
198 for (size_t i = 0; i < 3; ++i) {
199 os << ' ';
200 if (tokens.empty()) {
201 os << "<end>";
202 break;
203 }
204 os << '"' << tokens.front() << '"';
205 tokens.pop();
206 }
207 Die() << os.str();
208 }
209
210 std::unique_ptr<Filter> Expression(std::queue<std::string>& tokens);
211
212 // Parse a filter atom.
Atom(std::queue<std::string> & tokens)213 std::unique_ptr<Filter> Atom(std::queue<std::string>& tokens) {
214 if (tokens.empty()) {
215 return Fail("expected a filter expression", tokens);
216 }
217 auto token = tokens.front();
218 tokens.pop();
219 if (token == "(") {
220 auto expression = Expression(tokens);
221 if (tokens.empty() || tokens.front() != ")") {
222 return Fail("expected a ')'", tokens);
223 }
224 tokens.pop();
225 return expression;
226 } else if (token == ":") {
227 if (tokens.empty()) {
228 return Fail("expected a file name", tokens);
229 }
230 token = tokens.front();
231 tokens.pop();
232 return std::make_unique<SetFilter>(ReadAbigail(token));
233 } else {
234 if (std::strchr(kTokenCharacters, token[0])) {
235 return Fail("expected a glob token", tokens);
236 }
237 return std::make_unique<GlobFilter>(token);
238 }
239 }
240
241 // Parse a filter factor.
Factor(std::queue<std::string> & tokens)242 std::unique_ptr<Filter> Factor(std::queue<std::string>& tokens) {
243 bool invert = false;
244 while (!tokens.empty() && tokens.front() == "!") {
245 tokens.pop();
246 invert = !invert;
247 }
248 auto atom = Atom(tokens);
249 if (invert) {
250 atom = std::make_unique<NotFilter>(std::move(atom));
251 }
252 return atom;
253 }
254
255 // Parse a filter term.
Term(std::queue<std::string> & tokens)256 std::unique_ptr<Filter> Term(std::queue<std::string>& tokens) {
257 auto factor = Factor(tokens);
258 while (!tokens.empty() && tokens.front() == "&") {
259 tokens.pop();
260 factor = std::make_unique<AndFilter>(std::move(factor), Factor(tokens));
261 }
262 return factor;
263 }
264
265 // Parse a filter expression.
Expression(std::queue<std::string> & tokens)266 std::unique_ptr<Filter> Expression(std::queue<std::string>& tokens) {
267 auto term = Term(tokens);
268 while (!tokens.empty() && tokens.front() == "|") {
269 tokens.pop();
270 term = std::make_unique<OrFilter>(std::move(term), Term(tokens));
271 }
272 return term;
273 }
274
275 } // namespace
276
277 // Tokenise and parse a filter expression.
MakeFilter(const std::string & filter)278 std::unique_ptr<Filter> MakeFilter(const std::string& filter)
279 {
280 auto tokens = Tokenise(filter);
281 auto result = Expression(tokens);
282 if (!tokens.empty()) {
283 return Fail("unexpected junk at end of filter", tokens);
284 }
285 return result;
286 }
287
FilterUsage(std::ostream & os)288 void FilterUsage(std::ostream& os) {
289 os << "filter syntax:\n"
290 << " <filter> ::= <term> | <expression> '|' <term>\n"
291 << " <term> ::= <factor> | <term> '&' <factor>\n"
292 << " <factor> ::= <atom> | '!' <factor>\n"
293 << " <atom> ::= ':' <filename> | <glob> | '(' <expression> ')'\n"
294 << " <filename> ::= <string>\n"
295 << " <glob> ::= <string>\n";
296 }
297
298 } // namespace stg
299