1 /* Generated by re2c 1.3 */
2 // Copyright 2011 Google Inc. All Rights Reserved.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 #include "depfile_parser.h"
17 #include "util.h"
18
19 #include <algorithm>
20
DepfileParser(DepfileParserOptions options)21 DepfileParser::DepfileParser(DepfileParserOptions options)
22 : options_(options)
23 {
24 }
25
26 // A note on backslashes in Makefiles, from reading the docs:
27 // Backslash-newline is the line continuation character.
28 // Backslash-# escapes a # (otherwise meaningful as a comment start).
29 // Backslash-% escapes a % (otherwise meaningful as a special).
30 // Finally, quoting the GNU manual, "Backslashes that are not in danger
31 // of quoting ‘%’ characters go unmolested."
32 // How do you end a line with a backslash? The netbsd Make docs suggest
33 // reading the result of a shell command echoing a backslash!
34 //
35 // Rather than implement all of above, we follow what GCC/Clang produces:
36 // Backslashes escape a space or hash sign.
37 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
38 // followed by space.
39 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
40 // the end of a filename.
41 // A hash sign is escaped by a single backslash. All other backslashes remain
42 // unchanged.
43 //
44 // If anyone actually has depfiles that rely on the more complicated
45 // behavior we can adjust this.
Parse(string * content,string * err)46 bool DepfileParser::Parse(string* content, string* err) {
47 // in: current parser input point.
48 // end: end of input.
49 // parsing_targets: whether we are parsing targets or dependencies.
50 char* in = &(*content)[0];
51 char* end = in + content->size();
52 bool have_target = false;
53 bool parsing_targets = true;
54 bool poisoned_input = false;
55 while (in < end) {
56 bool have_newline = false;
57 // out: current output point (typically same as in, but can fall behind
58 // as we de-escape backslashes).
59 char* out = in;
60 // filename: start of the current parsed filename.
61 char* filename = out;
62 for (;;) {
63 // start: beginning of the current parsed span.
64 const char* start = in;
65 char* yymarker = NULL;
66
67 {
68 unsigned char yych;
69 static const unsigned char yybm[] = {
70 0, 0, 0, 0, 0, 0, 0, 0,
71 0, 0, 0, 0, 0, 0, 0, 0,
72 0, 0, 0, 0, 0, 0, 0, 0,
73 0, 0, 0, 0, 0, 0, 0, 0,
74 0, 128, 0, 0, 0, 128, 0, 0,
75 128, 128, 0, 128, 128, 128, 128, 128,
76 128, 128, 128, 128, 128, 128, 128, 128,
77 128, 128, 128, 0, 0, 128, 0, 0,
78 128, 128, 128, 128, 128, 128, 128, 128,
79 128, 128, 128, 128, 128, 128, 128, 128,
80 128, 128, 128, 128, 128, 128, 128, 128,
81 128, 128, 128, 128, 0, 128, 0, 128,
82 0, 128, 128, 128, 128, 128, 128, 128,
83 128, 128, 128, 128, 128, 128, 128, 128,
84 128, 128, 128, 128, 128, 128, 128, 128,
85 128, 128, 128, 128, 0, 128, 128, 0,
86 128, 128, 128, 128, 128, 128, 128, 128,
87 128, 128, 128, 128, 128, 128, 128, 128,
88 128, 128, 128, 128, 128, 128, 128, 128,
89 128, 128, 128, 128, 128, 128, 128, 128,
90 128, 128, 128, 128, 128, 128, 128, 128,
91 128, 128, 128, 128, 128, 128, 128, 128,
92 128, 128, 128, 128, 128, 128, 128, 128,
93 128, 128, 128, 128, 128, 128, 128, 128,
94 128, 128, 128, 128, 128, 128, 128, 128,
95 128, 128, 128, 128, 128, 128, 128, 128,
96 128, 128, 128, 128, 128, 128, 128, 128,
97 128, 128, 128, 128, 128, 128, 128, 128,
98 128, 128, 128, 128, 128, 128, 128, 128,
99 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128,
102 };
103 yych = *in;
104 if (yybm[0+yych] & 128) {
105 goto yy9;
106 }
107 if (yych <= '\r') {
108 if (yych <= '\t') {
109 if (yych >= 0x01) goto yy4;
110 } else {
111 if (yych <= '\n') goto yy6;
112 if (yych <= '\f') goto yy4;
113 goto yy8;
114 }
115 } else {
116 if (yych <= '$') {
117 if (yych <= '#') goto yy4;
118 goto yy12;
119 } else {
120 if (yych <= '?') goto yy4;
121 if (yych <= '\\') goto yy13;
122 goto yy4;
123 }
124 }
125 ++in;
126 {
127 break;
128 }
129 yy4:
130 ++in;
131 yy5:
132 {
133 // For any other character (e.g. whitespace), swallow it here,
134 // allowing the outer logic to loop around again.
135 break;
136 }
137 yy6:
138 ++in;
139 {
140 // A newline ends the current file name and the current rule.
141 have_newline = true;
142 break;
143 }
144 yy8:
145 yych = *++in;
146 if (yych == '\n') goto yy6;
147 goto yy5;
148 yy9:
149 yych = *++in;
150 if (yybm[0+yych] & 128) {
151 goto yy9;
152 }
153 yy11:
154 {
155 // Got a span of plain text.
156 int len = (int)(in - start);
157 // Need to shift it over if we're overwriting backslashes.
158 if (out < start)
159 memmove(out, start, len);
160 out += len;
161 continue;
162 }
163 yy12:
164 yych = *++in;
165 if (yych == '$') goto yy14;
166 goto yy5;
167 yy13:
168 yych = *(yymarker = ++in);
169 if (yych <= ' ') {
170 if (yych <= '\n') {
171 if (yych <= 0x00) goto yy5;
172 if (yych <= '\t') goto yy16;
173 goto yy17;
174 } else {
175 if (yych == '\r') goto yy19;
176 if (yych <= 0x1F) goto yy16;
177 goto yy21;
178 }
179 } else {
180 if (yych <= '9') {
181 if (yych == '#') goto yy23;
182 goto yy16;
183 } else {
184 if (yych <= ':') goto yy25;
185 if (yych == '\\') goto yy27;
186 goto yy16;
187 }
188 }
189 yy14:
190 ++in;
191 {
192 // De-escape dollar character.
193 *out++ = '$';
194 continue;
195 }
196 yy16:
197 ++in;
198 goto yy11;
199 yy17:
200 ++in;
201 {
202 // A line continuation ends the current file name.
203 break;
204 }
205 yy19:
206 yych = *++in;
207 if (yych == '\n') goto yy17;
208 in = yymarker;
209 goto yy5;
210 yy21:
211 ++in;
212 {
213 // 2N+1 backslashes plus space -> N backslashes plus space.
214 int len = (int)(in - start);
215 int n = len / 2 - 1;
216 if (out < start)
217 memset(out, '\\', n);
218 out += n;
219 *out++ = ' ';
220 continue;
221 }
222 yy23:
223 ++in;
224 {
225 // De-escape hash sign, but preserve other leading backslashes.
226 int len = (int)(in - start);
227 if (len > 2 && out < start)
228 memset(out, '\\', len - 2);
229 out += len - 2;
230 *out++ = '#';
231 continue;
232 }
233 yy25:
234 yych = *++in;
235 if (yych <= '\f') {
236 if (yych <= 0x00) goto yy28;
237 if (yych <= 0x08) goto yy26;
238 if (yych <= '\n') goto yy28;
239 } else {
240 if (yych <= '\r') goto yy28;
241 if (yych == ' ') goto yy28;
242 }
243 yy26:
244 {
245 // De-escape colon sign, but preserve other leading backslashes.
246 // Regular expression uses lookahead to make sure that no whitespace
247 // nor EOF follows. In that case it'd be the : at the end of a target
248 int len = (int)(in - start);
249 if (len > 2 && out < start)
250 memset(out, '\\', len - 2);
251 out += len - 2;
252 *out++ = ':';
253 continue;
254 }
255 yy27:
256 yych = *++in;
257 if (yych <= ' ') {
258 if (yych <= '\n') {
259 if (yych <= 0x00) goto yy11;
260 if (yych <= '\t') goto yy16;
261 goto yy11;
262 } else {
263 if (yych == '\r') goto yy11;
264 if (yych <= 0x1F) goto yy16;
265 goto yy30;
266 }
267 } else {
268 if (yych <= '9') {
269 if (yych == '#') goto yy23;
270 goto yy16;
271 } else {
272 if (yych <= ':') goto yy25;
273 if (yych == '\\') goto yy32;
274 goto yy16;
275 }
276 }
277 yy28:
278 ++in;
279 {
280 // Backslash followed by : and whitespace.
281 // It is therefore normal text and not an escaped colon
282 int len = (int)(in - start - 1);
283 // Need to shift it over if we're overwriting backslashes.
284 if (out < start)
285 memmove(out, start, len);
286 out += len;
287 if (*(in - 1) == '\n')
288 have_newline = true;
289 break;
290 }
291 yy30:
292 ++in;
293 {
294 // 2N backslashes plus space -> 2N backslashes, end of filename.
295 int len = (int)(in - start);
296 if (out < start)
297 memset(out, '\\', len - 1);
298 out += len - 1;
299 break;
300 }
301 yy32:
302 yych = *++in;
303 if (yych <= ' ') {
304 if (yych <= '\n') {
305 if (yych <= 0x00) goto yy11;
306 if (yych <= '\t') goto yy16;
307 goto yy11;
308 } else {
309 if (yych == '\r') goto yy11;
310 if (yych <= 0x1F) goto yy16;
311 goto yy21;
312 }
313 } else {
314 if (yych <= '9') {
315 if (yych == '#') goto yy23;
316 goto yy16;
317 } else {
318 if (yych <= ':') goto yy25;
319 if (yych == '\\') goto yy27;
320 goto yy16;
321 }
322 }
323 }
324
325 }
326
327 int len = (int)(out - filename);
328 const bool is_dependency = !parsing_targets;
329 if (len > 0 && filename[len - 1] == ':') {
330 len--; // Strip off trailing colon, if any.
331 parsing_targets = false;
332 have_target = true;
333 }
334
335 if (len > 0) {
336 StringPiece piece = StringPiece(filename, len);
337 // If we've seen this as an input before, skip it.
338 std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
339 if (pos == ins_.end()) {
340 if (is_dependency) {
341 if (poisoned_input) {
342 *err = "inputs may not also have inputs";
343 return false;
344 }
345 // New input.
346 ins_.push_back(piece);
347 } else {
348 // Check for a new output.
349 if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
350 outs_.push_back(piece);
351 }
352 } else if (!is_dependency) {
353 // We've passed an input on the left side; reject new inputs.
354 poisoned_input = true;
355 }
356 }
357
358 if (have_newline) {
359 // A newline ends a rule so the next filename will be a new target.
360 parsing_targets = true;
361 poisoned_input = false;
362 }
363 }
364 if (!have_target) {
365 *err = "expected ':' in depfile";
366 return false;
367 }
368 return true;
369 }
370