1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "tools/gn/pattern.h"
6
7 #include "tools/gn/value.h"
8
9 const char kPattern_Help[] =
10 "Patterns\n"
11 " Patterns are VERY limited regular expressions that are used in\n"
12 " several places.\n"
13 "\n"
14 " Patterns must match the entire input string to be counted as a match.\n"
15 " In regular expression parlance, there is an implicit \"^...$\"\n"
16 " surrounding your input. If you want to match a substring, you need to\n"
17 " use wildcards at the beginning and end.\n"
18 "\n"
19 " There are only two special tokens understood by the pattern matcher.\n"
20 " Everything else is a literal.\n"
21 "\n"
22 " * Matches zero or more of any character. It does not depend on the\n"
23 " preceeding character (in regular expression parlance it is\n"
24 " equivalent to \".*\").\n"
25 "\n"
26 " \\b Matches a path boundary. This will match the beginning or end of\n"
27 " a string, or a slash.\n"
28 "\n"
29 "Examples\n"
30 "\n"
31 " \"*asdf*\"\n"
32 " Matches a string containing \"asdf\" anywhere.\n"
33 "\n"
34 " \"asdf\"\n"
35 " Matches only the exact string \"asdf\".\n"
36 "\n"
37 " \"*.cc\"\n"
38 " Matches strings ending in the literal \".cc\".\n"
39 "\n"
40 " \"\\bwin/*\"\n"
41 " Matches \"win/foo\" and \"foo/win/bar.cc\" but not \"iwin/foo\".\n";
42
43 namespace {
44
ParsePattern(const std::string & s,std::vector<Pattern::Subrange> * out)45 void ParsePattern(const std::string& s, std::vector<Pattern::Subrange>* out) {
46 // Set when the last subrange is a literal so we can just append when we
47 // find another literal.
48 Pattern::Subrange* last_literal = NULL;
49
50 for (size_t i = 0; i < s.size(); i++) {
51 if (s[i] == '*') {
52 // Don't allow two **.
53 if (out->size() == 0 ||
54 (*out)[out->size() - 1].type != Pattern::Subrange::ANYTHING)
55 out->push_back(Pattern::Subrange(Pattern::Subrange::ANYTHING));
56 last_literal = NULL;
57 } else if (s[i] == '\\') {
58 if (i < s.size() - 1 && s[i + 1] == 'b') {
59 // "\b" means path boundary.
60 i++;
61 out->push_back(Pattern::Subrange(Pattern::Subrange::PATH_BOUNDARY));
62 last_literal = NULL;
63 } else {
64 // Backslash + anything else means that literal char.
65 if (!last_literal) {
66 out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
67 last_literal = &(*out)[out->size() - 1];
68 }
69 if (i < s.size() - 1) {
70 i++;
71 last_literal->literal.push_back(s[i]);
72 } else {
73 // Single backslash at end, use literal backslash.
74 last_literal->literal.push_back('\\');
75 }
76 }
77 } else {
78 if (!last_literal) {
79 out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
80 last_literal = &(*out)[out->size() - 1];
81 }
82 last_literal->literal.push_back(s[i]);
83 }
84 }
85 }
86
87 } // namespace
88
Pattern(const std::string & s)89 Pattern::Pattern(const std::string& s) {
90 ParsePattern(s, &subranges_);
91 is_suffix_ =
92 (subranges_.size() == 2 &&
93 subranges_[0].type == Subrange::ANYTHING &&
94 subranges_[1].type == Subrange::LITERAL);
95 }
96
~Pattern()97 Pattern::~Pattern() {
98 }
99
MatchesString(const std::string & s) const100 bool Pattern::MatchesString(const std::string& s) const {
101 // Empty pattern matches only empty string.
102 if (subranges_.empty())
103 return s.empty();
104
105 if (is_suffix_) {
106 const std::string& suffix = subranges_[1].literal;
107 if (suffix.size() > s.size())
108 return false; // Too short.
109 return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
110 }
111
112 return RecursiveMatch(s, 0, 0, true);
113 }
114
115 // We assume the number of ranges is small so recursive is always reasonable.
116 // Could be optimized to only be recursive for *.
RecursiveMatch(const std::string & s,size_t begin_char,size_t subrange_index,bool allow_implicit_path_boundary) const117 bool Pattern::RecursiveMatch(const std::string& s,
118 size_t begin_char,
119 size_t subrange_index,
120 bool allow_implicit_path_boundary) const {
121 if (subrange_index >= subranges_.size()) {
122 // Hit the end of our subranges, the text should also be at the end for a
123 // match.
124 return begin_char == s.size();
125 }
126
127 const Subrange& sr = subranges_[subrange_index];
128 switch (sr.type) {
129 case Subrange::LITERAL: {
130 if (s.size() - begin_char < sr.literal.size())
131 return false; // Not enough room.
132 if (s.compare(begin_char, sr.literal.size(), sr.literal) != 0)
133 return false; // Literal doesn't match.
134
135 // Recursively check the next one.
136 return RecursiveMatch(s, begin_char + sr.literal.size(),
137 subrange_index + 1, true);
138 }
139
140 case Subrange::PATH_BOUNDARY: {
141 // When we can accept an implicit path boundary, we have to check both
142 // a match of the literal and the implicit one.
143 if (allow_implicit_path_boundary &&
144 (begin_char == 0 || begin_char == s.size())) {
145 // At implicit path boundary, see if the rest of the pattern matches.
146 if (RecursiveMatch(s, begin_char, subrange_index + 1, false))
147 return true;
148 }
149
150 // Check for a literal "/".
151 if (begin_char < s.size() && s[begin_char] == '/') {
152 // At explicit boundary, see if the rest of the pattern matches.
153 if (RecursiveMatch(s, begin_char + 1, subrange_index + 1, true))
154 return true;
155 }
156 return false;
157 }
158
159 case Subrange::ANYTHING: {
160 if (subrange_index == subranges_.size() - 1)
161 return true; // * at the end, consider it matching.
162
163 size_t min_next_size = sr.MinSize();
164
165 // We don't care about exactly what matched as long as there was a match,
166 // so we can do this front-to-back. If we needed the match, we would
167 // normally want "*" to be greedy so would work backwards.
168 for (size_t i = begin_char; i < s.size() - min_next_size; i++) {
169 // Note: this could probably be faster by detecting the type of the
170 // next match in advance and checking for a match in this loop rather
171 // than doing a full recursive call for each character.
172 if (RecursiveMatch(s, i, subrange_index + 1, true))
173 return true;
174 }
175 return false;
176 }
177
178 default:
179 NOTREACHED();
180 }
181
182 return false;
183 }
184
PatternList()185 PatternList::PatternList() {
186 }
187
~PatternList()188 PatternList::~PatternList() {
189 }
190
SetFromValue(const Value & v,Err * err)191 void PatternList::SetFromValue(const Value& v, Err* err) {
192 patterns_.clear();
193
194 if (v.type() != Value::LIST) {
195 *err = Err(v.origin(), "This value must be a list.");
196 return;
197 }
198
199 const std::vector<Value>& list = v.list_value();
200 for (size_t i = 0; i < list.size(); i++) {
201 if (!list[i].VerifyTypeIs(Value::STRING, err))
202 return;
203 patterns_.push_back(Pattern(list[i].string_value()));
204 }
205 }
206
MatchesString(const std::string & s) const207 bool PatternList::MatchesString(const std::string& s) const {
208 for (size_t i = 0; i < patterns_.size(); i++) {
209 if (patterns_[i].MatchesString(s))
210 return true;
211 }
212 return false;
213 }
214
MatchesValue(const Value & v) const215 bool PatternList::MatchesValue(const Value& v) const {
216 if (v.type() == Value::STRING)
217 return MatchesString(v.string_value());
218 return false;
219 }
220