• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "gn/pattern.h"
6 
7 #include "gn/value.h"
8 
9 const char kFilePattern_Help[] =
10     R"*(File patterns
11 
12   File patterns are VERY limited regular expressions. They must match the
13   entire input string to be counted as a match. In regular expression parlance,
14   there is an implicit "^...$" surrounding your input. If you want to match a
15   substring, you need to use wildcards at the beginning and end.
16 
17   There are only two special tokens understood by the pattern matcher.
18   Everything else is a literal.
19 
20    - "*" Matches zero or more of any character. It does not depend on the
21      preceding character (in regular expression parlance it is equivalent to
22      ".*").
23 
24    - "\b" Matches a path boundary. This will match the beginning or end of a
25      string, or a slash.
26 
27 Pattern examples
28 
29   "*asdf*"
30       Matches a string containing "asdf" anywhere.
31 
32   "asdf"
33       Matches only the exact string "asdf".
34 
35   "*.cc"
36       Matches strings ending in the literal ".cc".
37 
38   "\bwin/*"
39       Matches "win/foo" and "foo/win/bar.cc" but not "iwin/foo".
40 
41 )*";
42 
43 namespace {
44 
ParsePattern(const std::string & s,std::vector<Pattern::Subrange> * out)45 void ParsePattern(const std::string& s, std::vector<Pattern::Subrange>* out) {
46   // Set when the last subrange is a literal so we can just append when we
47   // find another literal.
48   Pattern::Subrange* last_literal = nullptr;
49 
50   for (size_t i = 0; i < s.size(); i++) {
51     if (s[i] == '*') {
52       // Don't allow two **.
53       if (out->size() == 0 ||
54           (*out)[out->size() - 1].type != Pattern::Subrange::ANYTHING)
55         out->push_back(Pattern::Subrange(Pattern::Subrange::ANYTHING));
56       last_literal = nullptr;
57     } else if (s[i] == '\\') {
58       if (i < s.size() - 1 && s[i + 1] == 'b') {
59         // "\b" means path boundary.
60         i++;
61         out->push_back(Pattern::Subrange(Pattern::Subrange::PATH_BOUNDARY));
62         last_literal = nullptr;
63       } else {
64         // Backslash + anything else means that literal char.
65         if (!last_literal) {
66           out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
67           last_literal = &(*out)[out->size() - 1];
68         }
69         if (i < s.size() - 1) {
70           i++;
71           last_literal->literal.push_back(s[i]);
72         } else {
73           // Single backslash at end, use literal backslash.
74           last_literal->literal.push_back('\\');
75         }
76       }
77     } else {
78       if (!last_literal) {
79         out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
80         last_literal = &(*out)[out->size() - 1];
81       }
82       last_literal->literal.push_back(s[i]);
83     }
84   }
85 }
86 
87 }  // namespace
88 
89 Pattern::Pattern(const std::string& s) {
90   ParsePattern(s, &subranges_);
91   is_suffix_ =
92       (subranges_.size() == 2 && subranges_[0].type == Subrange::ANYTHING &&
93        subranges_[1].type == Subrange::LITERAL);
94 }
95 
96 Pattern::Pattern(const Pattern& other) = default;
97 
98 Pattern::~Pattern() = default;
99 
100 bool Pattern::MatchesString(const std::string& s) const {
101   // Empty pattern matches only empty string.
102   if (subranges_.empty())
103     return s.empty();
104 
105   if (is_suffix_) {
106     const std::string& suffix = subranges_[1].literal;
107     if (suffix.size() > s.size())
108       return false;  // Too short.
109     return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
110   }
111 
112   return RecursiveMatch(s, 0, 0, true);
113 }
114 
115 // We assume the number of ranges is small so recursive is always reasonable.
116 // Could be optimized to only be recursive for *.
117 bool Pattern::RecursiveMatch(const std::string& s,
118                              size_t begin_char,
119                              size_t subrange_index,
120                              bool allow_implicit_path_boundary) const {
121   if (subrange_index >= subranges_.size()) {
122     // Hit the end of our subranges, the text should also be at the end for a
123     // match.
124     return begin_char == s.size();
125   }
126 
127   const Subrange& sr = subranges_[subrange_index];
128   switch (sr.type) {
129     case Subrange::LITERAL: {
130       if (s.size() - begin_char < sr.literal.size())
131         return false;  // Not enough room.
132       if (s.compare(begin_char, sr.literal.size(), sr.literal) != 0)
133         return false;  // Literal doesn't match.
134 
135       // Recursively check the next one.
136       return RecursiveMatch(s, begin_char + sr.literal.size(),
137                             subrange_index + 1, true);
138     }
139 
140     case Subrange::PATH_BOUNDARY: {
141       // When we can accept an implicit path boundary, we have to check both
142       // a match of the literal and the implicit one.
143       if (allow_implicit_path_boundary &&
144           (begin_char == 0 || begin_char == s.size())) {
145         // At implicit path boundary, see if the rest of the pattern matches.
146         if (RecursiveMatch(s, begin_char, subrange_index + 1, false))
147           return true;
148       }
149 
150       // Check for a literal "/".
151       if (begin_char < s.size() && s[begin_char] == '/') {
152         // At explicit boundary, see if the rest of the pattern matches.
153         if (RecursiveMatch(s, begin_char + 1, subrange_index + 1, true))
154           return true;
155       }
156       return false;
157     }
158 
159     case Subrange::ANYTHING: {
160       if (subrange_index == subranges_.size() - 1)
161         return true;  // * at the end, consider it matching.
162 
163       size_t min_next_size = sr.MinSize();
164 
165       // We don't care about exactly what matched as long as there was a match,
166       // so we can do this front-to-back. If we needed the match, we would
167       // normally want "*" to be greedy so would work backwards.
168       for (size_t i = begin_char; i < s.size() - min_next_size; i++) {
169         // Note: this could probably be faster by detecting the type of the
170         // next match in advance and checking for a match in this loop rather
171         // than doing a full recursive call for each character.
172         if (RecursiveMatch(s, i, subrange_index + 1, true))
173           return true;
174       }
175       return false;
176     }
177 
178     default:
179       NOTREACHED();
180   }
181 
182   return false;
183 }
184 
185 PatternList::PatternList() = default;
186 
187 PatternList::PatternList(const PatternList& other) = default;
188 
189 PatternList::~PatternList() = default;
190 
191 void PatternList::Append(const Pattern& pattern) {
192   patterns_.push_back(pattern);
193 }
194 
195 void PatternList::SetFromValue(const Value& v, Err* err) {
196   patterns_.clear();
197 
198   if (v.type() != Value::LIST) {
199     *err = Err(v.origin(), "This value must be a list.");
200     return;
201   }
202 
203   const std::vector<Value>& list = v.list_value();
204   for (const auto& elem : list) {
205     if (!elem.VerifyTypeIs(Value::STRING, err))
206       return;
207     patterns_.push_back(Pattern(elem.string_value()));
208   }
209 }
210 
211 bool PatternList::MatchesString(const std::string& s) const {
212   for (const auto& pattern : patterns_) {
213     if (pattern.MatchesString(s))
214       return true;
215   }
216   return false;
217 }
218 
219 bool PatternList::MatchesValue(const Value& v) const {
220   if (v.type() == Value::STRING)
221     return MatchesString(v.string_value());
222   return false;
223 }
224