• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022-2023 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Giuliano Procida
19 
20 #include "filter.h"
21 
22 #include <fnmatch.h>
23 
24 #include <array>
25 #include <cctype>
26 #include <cstddef>
27 #include <cstring>
28 #include <fstream>
29 #include <memory>
30 #include <ostream>
31 #include <queue>
32 #include <sstream>
33 #include <string>
34 #include <string_view>
35 #include <unordered_set>
36 #include <utility>
37 
38 #include "error.h"
39 
40 namespace stg {
41 namespace {
42 
43 using Items = std::unordered_set<std::string>;
44 
ReadAbigail(const std::string & filename)45 Items ReadAbigail(const std::string& filename) {
46   static constexpr std::string_view kSectionSuffix = "list";
47   Items items;
48   std::ifstream file(filename);
49   Check(file.good()) << "error opening filter file '" << filename << ": "
50                      << Error(errno);
51   bool in_filter_section = false;
52   std::string line;
53   while (std::getline(file, line)) {
54     size_t start = 0;
55     size_t limit = line.size();
56     // Strip leading whitespace.
57     while (start < limit && std::isspace(line[start])) {
58       ++start;
59     }
60     // Skip comment lines.
61     if (start < limit && line[start] == '#') {
62       continue;
63     }
64     // Strip trailing whitespace.
65     while (start < limit && std::isspace(line[limit - 1])) {
66       --limit;
67     }
68     // Skip empty lines.
69     if (start == limit) {
70       continue;
71     }
72     // See if we are entering a filter list section.
73     if (line[start] == '[' && line[limit - 1] == ']') {
74       std::string_view section(&line[start + 1], limit - start - 2);
75       // TODO: use std::string_view::ends_with
76       const auto section_size = section.size();
77       const auto suffix_size = kSectionSuffix.size();
78       in_filter_section = section_size >= suffix_size &&
79           section.substr(section_size - suffix_size) == kSectionSuffix;
80       continue;
81     }
82     // Add item.
83     if (in_filter_section) {
84       items.insert(std::string(&line[start], limit - start));
85     }
86   }
87   Check(file.eof()) << "error reading filter file '" << filename << ": "
88                     << Error(errno);
89   return items;
90 }
91 
92 // Inverted filter.
93 class NotFilter : public Filter {
94  public:
NotFilter(std::unique_ptr<Filter> filter)95   explicit NotFilter(std::unique_ptr<Filter> filter)
96       : filter_(std::move(filter)) {}
operator ()(const std::string & item) const97   bool operator()(const std::string& item) const final {
98     return !(*filter_)(item);
99   };
100 
101  private:
102   const std::unique_ptr<Filter> filter_;
103 };
104 
105 // Conjunction of filters.
106 class AndFilter : public Filter {
107  public:
AndFilter(std::unique_ptr<Filter> filter1,std::unique_ptr<Filter> filter2)108   AndFilter(std::unique_ptr<Filter> filter1,
109             std::unique_ptr<Filter> filter2)
110       : filter1_(std::move(filter1)), filter2_(std::move(filter2)) {}
operator ()(const std::string & item) const111   bool operator()(const std::string& item) const final {
112     return (*filter1_)(item) && (*filter2_)(item);
113   };
114 
115  private:
116   const std::unique_ptr<Filter> filter1_;
117   const std::unique_ptr<Filter> filter2_;
118 };
119 
120 // Disjunction of filters.
121 class OrFilter : public Filter {
122  public:
OrFilter(std::unique_ptr<Filter> filter1,std::unique_ptr<Filter> filter2)123   OrFilter(std::unique_ptr<Filter> filter1,
124            std::unique_ptr<Filter> filter2)
125       : filter1_(std::move(filter1)), filter2_(std::move(filter2)) {}
operator ()(const std::string & item) const126   bool operator()(const std::string& item) const final {
127     return (*filter1_)(item) || (*filter2_)(item);
128   };
129 
130  private:
131   const std::unique_ptr<Filter> filter1_;
132   const std::unique_ptr<Filter> filter2_;
133 };
134 
135 // Glob filter.
136 class GlobFilter : public Filter {
137  public:
GlobFilter(const std::string & pattern)138   explicit GlobFilter(const std::string& pattern) : pattern_(pattern) {}
operator ()(const std::string & item) const139   bool operator()(const std::string& item) const final {
140     return fnmatch(pattern_.c_str(), item.c_str(), 0) == 0;
141   }
142 
143  private:
144   const std::string pattern_;
145 };
146 
147 // Literal list filter.
148 class SetFilter : public Filter {
149  public:
SetFilter(Items && items)150   explicit SetFilter(Items&& items)
151       : items_(std::move(items)) {}
operator ()(const std::string & item) const152   bool operator()(const std::string& item) const final {
153     return items_.count(item) > 0;
154   };
155 
156  private:
157   const Items items_;
158 };
159 
160 const char* kTokenCharacters = ":!()&|";
161 
162 // Split a filter expression into tokens.
163 //
164 // All tokens are just strings, but single characters from kTokenCharacters are
165 // recognised as special syntax. Whitespace can be used between tokens and will
166 // be ignored.
Tokenise(const std::string & filter)167 std::queue<std::string> Tokenise(const std::string& filter) {
168   std::queue<std::string> result;
169   auto it = filter.begin();
170   const auto end = filter.end();
171   while (it != end) {
172     if (std::isspace(*it)) {
173       ++it;
174     } else if (std::strchr(kTokenCharacters, *it)) {
175       result.emplace(&*it, 1);
176       ++it;
177     } else if (std::isgraph(*it)) {
178       auto name = it;
179       ++it;
180       while (it != end && std::isgraph(*it)
181              && !std::strchr(kTokenCharacters, *it)) {
182         ++it;
183       }
184       result.emplace(&*name, it - name);
185     } else {
186       Die() << "unexpected character in filter: '" << *it;
187     }
188   }
189 
190   return result;
191 }
192 
193 // The failing parser.
Fail(const std::string & message,std::queue<std::string> & tokens)194 std::unique_ptr<Filter> Fail(
195     const std::string& message, std::queue<std::string>& tokens) {
196   std::ostringstream os;
197   os << "syntax error in filter expression: '" << message << "'; context:";
198   for (size_t i = 0; i < 3; ++i) {
199     os << ' ';
200     if (tokens.empty()) {
201       os << "<end>";
202       break;
203     }
204     os << '"' << tokens.front() << '"';
205     tokens.pop();
206   }
207   Die() << os.str();
208 }
209 
210 std::unique_ptr<Filter> Expression(std::queue<std::string>& tokens);
211 
212 // Parse a filter atom.
Atom(std::queue<std::string> & tokens)213 std::unique_ptr<Filter> Atom(std::queue<std::string>& tokens) {
214   if (tokens.empty()) {
215     return Fail("expected a filter expression", tokens);
216   }
217   auto token = tokens.front();
218   tokens.pop();
219   if (token == "(") {
220     auto expression = Expression(tokens);
221     if (tokens.empty() || tokens.front() != ")") {
222       return Fail("expected a ')'", tokens);
223     }
224     tokens.pop();
225     return expression;
226   } else if (token == ":") {
227     if (tokens.empty()) {
228       return Fail("expected a file name", tokens);
229     }
230     token = tokens.front();
231     tokens.pop();
232     return std::make_unique<SetFilter>(ReadAbigail(token));
233   } else {
234     if (std::strchr(kTokenCharacters, token[0])) {
235       return Fail("expected a glob token", tokens);
236     }
237     return std::make_unique<GlobFilter>(token);
238   }
239 }
240 
241 // Parse a filter factor.
Factor(std::queue<std::string> & tokens)242 std::unique_ptr<Filter> Factor(std::queue<std::string>& tokens) {
243   bool invert = false;
244   while (!tokens.empty() && tokens.front() == "!") {
245     tokens.pop();
246     invert = !invert;
247   }
248   auto atom = Atom(tokens);
249   if (invert) {
250     atom = std::make_unique<NotFilter>(std::move(atom));
251   }
252   return atom;
253 }
254 
255 // Parse a filter term.
Term(std::queue<std::string> & tokens)256 std::unique_ptr<Filter> Term(std::queue<std::string>& tokens) {
257   auto factor = Factor(tokens);
258   while (!tokens.empty() && tokens.front() == "&") {
259     tokens.pop();
260     factor = std::make_unique<AndFilter>(std::move(factor), Factor(tokens));
261   }
262   return factor;
263 }
264 
265 // Parse a filter expression.
Expression(std::queue<std::string> & tokens)266 std::unique_ptr<Filter> Expression(std::queue<std::string>& tokens) {
267   auto term = Term(tokens);
268   while (!tokens.empty() && tokens.front() == "|") {
269     tokens.pop();
270     term = std::make_unique<OrFilter>(std::move(term), Term(tokens));
271   }
272   return term;
273 }
274 
275 }  // namespace
276 
277 // Tokenise and parse a filter expression.
MakeFilter(const std::string & filter)278 std::unique_ptr<Filter> MakeFilter(const std::string& filter)
279 {
280   auto tokens = Tokenise(filter);
281   auto result = Expression(tokens);
282   if (!tokens.empty()) {
283     return Fail("unexpected junk at end of filter", tokens);
284   }
285   return result;
286 }
287 
FilterUsage(std::ostream & os)288 void FilterUsage(std::ostream& os) {
289   os << "filter syntax:\n"
290      << "  <filter>   ::= <term>          |  <expression> '|' <term>\n"
291      << "  <term>     ::= <factor>        |  <term> '&' <factor>\n"
292      << "  <factor>   ::= <atom>          |  '!' <factor>\n"
293      << "  <atom>     ::= ':' <filename>  |  <glob>  |  '(' <expression> ')'\n"
294      << "  <filename> ::= <string>\n"
295      << "  <glob>     ::= <string>\n";
296 }
297 
298 }  // namespace stg
299