• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Generated by re2c */
2 // Copyright 2011 Google Inc. All Rights Reserved.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #include "depfile_parser.h"
17 #include "util.h"
18 
19 #include <algorithm>
20 
21 using namespace std;
22 
DepfileParser(DepfileParserOptions options)23 DepfileParser::DepfileParser(DepfileParserOptions options)
24   : options_(options)
25 {
26 }
27 
28 // A note on backslashes in Makefiles, from reading the docs:
29 // Backslash-newline is the line continuation character.
30 // Backslash-# escapes a # (otherwise meaningful as a comment start).
31 // Backslash-% escapes a % (otherwise meaningful as a special).
32 // Finally, quoting the GNU manual, "Backslashes that are not in danger
33 // of quoting ‘%’ characters go unmolested."
34 // How do you end a line with a backslash?  The netbsd Make docs suggest
35 // reading the result of a shell command echoing a backslash!
36 //
37 // Rather than implement all of above, we follow what GCC/Clang produces:
38 // Backslashes escape a space or hash sign.
39 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
40 // followed by space.
41 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
42 // the end of a filename.
43 // A hash sign is escaped by a single backslash. All other backslashes remain
44 // unchanged.
45 //
46 // If anyone actually has depfiles that rely on the more complicated
47 // behavior we can adjust this.
Parse(string * content,string * err)48 bool DepfileParser::Parse(string* content, string* err) {
49   // in: current parser input point.
50   // end: end of input.
51   // parsing_targets: whether we are parsing targets or dependencies.
52   char* in = &(*content)[0];
53   char* end = in + content->size();
54   bool have_target = false;
55   bool parsing_targets = true;
56   bool poisoned_input = false;
57   while (in < end) {
58     bool have_newline = false;
59     // out: current output point (typically same as in, but can fall behind
60     // as we de-escape backslashes).
61     char* out = in;
62     // filename: start of the current parsed filename.
63     char* filename = out;
64     for (;;) {
65       // start: beginning of the current parsed span.
66       const char* start = in;
67       char* yymarker = NULL;
68 
69     {
70       unsigned char yych;
71       static const unsigned char yybm[] = {
72           0,   0,   0,   0,   0,   0,   0,   0,
73           0,   0,   0,   0,   0,   0,   0,   0,
74           0,   0,   0,   0,   0,   0,   0,   0,
75           0,   0,   0,   0,   0,   0,   0,   0,
76           0, 128,   0,   0,   0, 128,   0,   0,
77         128, 128,   0, 128, 128, 128, 128, 128,
78         128, 128, 128, 128, 128, 128, 128, 128,
79         128, 128, 128,   0,   0, 128,   0,   0,
80         128, 128, 128, 128, 128, 128, 128, 128,
81         128, 128, 128, 128, 128, 128, 128, 128,
82         128, 128, 128, 128, 128, 128, 128, 128,
83         128, 128, 128, 128,   0, 128,   0, 128,
84           0, 128, 128, 128, 128, 128, 128, 128,
85         128, 128, 128, 128, 128, 128, 128, 128,
86         128, 128, 128, 128, 128, 128, 128, 128,
87         128, 128, 128, 128,   0, 128, 128,   0,
88         128, 128, 128, 128, 128, 128, 128, 128,
89         128, 128, 128, 128, 128, 128, 128, 128,
90         128, 128, 128, 128, 128, 128, 128, 128,
91         128, 128, 128, 128, 128, 128, 128, 128,
92         128, 128, 128, 128, 128, 128, 128, 128,
93         128, 128, 128, 128, 128, 128, 128, 128,
94         128, 128, 128, 128, 128, 128, 128, 128,
95         128, 128, 128, 128, 128, 128, 128, 128,
96         128, 128, 128, 128, 128, 128, 128, 128,
97         128, 128, 128, 128, 128, 128, 128, 128,
98         128, 128, 128, 128, 128, 128, 128, 128,
99         128, 128, 128, 128, 128, 128, 128, 128,
100         128, 128, 128, 128, 128, 128, 128, 128,
101         128, 128, 128, 128, 128, 128, 128, 128,
102         128, 128, 128, 128, 128, 128, 128, 128,
103         128, 128, 128, 128, 128, 128, 128, 128,
104       };
105       yych = *in;
106       if (yybm[0+yych] & 128) {
107         goto yy9;
108       }
109       if (yych <= '\r') {
110         if (yych <= '\t') {
111           if (yych >= 0x01) goto yy4;
112         } else {
113           if (yych <= '\n') goto yy6;
114           if (yych <= '\f') goto yy4;
115           goto yy8;
116         }
117       } else {
118         if (yych <= '$') {
119           if (yych <= '#') goto yy4;
120           goto yy12;
121         } else {
122           if (yych <= '?') goto yy4;
123           if (yych <= '\\') goto yy13;
124           goto yy4;
125         }
126       }
127       ++in;
128       {
129         break;
130       }
131 yy4:
132       ++in;
133 yy5:
134       {
135         // For any other character (e.g. whitespace), swallow it here,
136         // allowing the outer logic to loop around again.
137         break;
138       }
139 yy6:
140       ++in;
141       {
142         // A newline ends the current file name and the current rule.
143         have_newline = true;
144         break;
145       }
146 yy8:
147       yych = *++in;
148       if (yych == '\n') goto yy6;
149       goto yy5;
150 yy9:
151       yych = *++in;
152       if (yybm[0+yych] & 128) {
153         goto yy9;
154       }
155 yy11:
156       {
157         // Got a span of plain text.
158         int len = (int)(in - start);
159         // Need to shift it over if we're overwriting backslashes.
160         if (out < start)
161           memmove(out, start, len);
162         out += len;
163         continue;
164       }
165 yy12:
166       yych = *++in;
167       if (yych == '$') goto yy14;
168       goto yy5;
169 yy13:
170       yych = *(yymarker = ++in);
171       if (yych <= ' ') {
172         if (yych <= '\n') {
173           if (yych <= 0x00) goto yy5;
174           if (yych <= '\t') goto yy16;
175           goto yy17;
176         } else {
177           if (yych == '\r') goto yy19;
178           if (yych <= 0x1F) goto yy16;
179           goto yy21;
180         }
181       } else {
182         if (yych <= '9') {
183           if (yych == '#') goto yy23;
184           goto yy16;
185         } else {
186           if (yych <= ':') goto yy25;
187           if (yych == '\\') goto yy27;
188           goto yy16;
189         }
190       }
191 yy14:
192       ++in;
193       {
194         // De-escape dollar character.
195         *out++ = '$';
196         continue;
197       }
198 yy16:
199       ++in;
200       goto yy11;
201 yy17:
202       ++in;
203       {
204         // A line continuation ends the current file name.
205         break;
206       }
207 yy19:
208       yych = *++in;
209       if (yych == '\n') goto yy17;
210       in = yymarker;
211       goto yy5;
212 yy21:
213       ++in;
214       {
215         // 2N+1 backslashes plus space -> N backslashes plus space.
216         int len = (int)(in - start);
217         int n = len / 2 - 1;
218         if (out < start)
219           memset(out, '\\', n);
220         out += n;
221         *out++ = ' ';
222         continue;
223       }
224 yy23:
225       ++in;
226       {
227         // De-escape hash sign, but preserve other leading backslashes.
228         int len = (int)(in - start);
229         if (len > 2 && out < start)
230           memset(out, '\\', len - 2);
231         out += len - 2;
232         *out++ = '#';
233         continue;
234       }
235 yy25:
236       yych = *++in;
237       if (yych <= '\f') {
238         if (yych <= 0x00) goto yy28;
239         if (yych <= 0x08) goto yy26;
240         if (yych <= '\n') goto yy28;
241       } else {
242         if (yych <= '\r') goto yy28;
243         if (yych == ' ') goto yy28;
244       }
245 yy26:
246       {
247         // De-escape colon sign, but preserve other leading backslashes.
248         // Regular expression uses lookahead to make sure that no whitespace
249         // nor EOF follows. In that case it'd be the : at the end of a target
250         int len = (int)(in - start);
251         if (len > 2 && out < start)
252           memset(out, '\\', len - 2);
253         out += len - 2;
254         *out++ = ':';
255         continue;
256       }
257 yy27:
258       yych = *++in;
259       if (yych <= ' ') {
260         if (yych <= '\n') {
261           if (yych <= 0x00) goto yy11;
262           if (yych <= '\t') goto yy16;
263           goto yy11;
264         } else {
265           if (yych == '\r') goto yy11;
266           if (yych <= 0x1F) goto yy16;
267           goto yy30;
268         }
269       } else {
270         if (yych <= '9') {
271           if (yych == '#') goto yy23;
272           goto yy16;
273         } else {
274           if (yych <= ':') goto yy25;
275           if (yych == '\\') goto yy32;
276           goto yy16;
277         }
278       }
279 yy28:
280       ++in;
281       {
282         // Backslash followed by : and whitespace.
283         // It is therefore normal text and not an escaped colon
284         int len = (int)(in - start - 1);
285         // Need to shift it over if we're overwriting backslashes.
286         if (out < start)
287           memmove(out, start, len);
288         out += len;
289         if (*(in - 1) == '\n')
290           have_newline = true;
291         break;
292       }
293 yy30:
294       ++in;
295       {
296         // 2N backslashes plus space -> 2N backslashes, end of filename.
297         int len = (int)(in - start);
298         if (out < start)
299           memset(out, '\\', len - 1);
300         out += len - 1;
301         break;
302       }
303 yy32:
304       yych = *++in;
305       if (yych <= ' ') {
306         if (yych <= '\n') {
307           if (yych <= 0x00) goto yy11;
308           if (yych <= '\t') goto yy16;
309           goto yy11;
310         } else {
311           if (yych == '\r') goto yy11;
312           if (yych <= 0x1F) goto yy16;
313           goto yy21;
314         }
315       } else {
316         if (yych <= '9') {
317           if (yych == '#') goto yy23;
318           goto yy16;
319         } else {
320           if (yych <= ':') goto yy25;
321           if (yych == '\\') goto yy27;
322           goto yy16;
323         }
324       }
325     }
326 
327     }
328 
329     int len = (int)(out - filename);
330     const bool is_dependency = !parsing_targets;
331     if (len > 0 && filename[len - 1] == ':') {
332       len--;  // Strip off trailing colon, if any.
333       parsing_targets = false;
334       have_target = true;
335     }
336 
337     if (len > 0) {
338       StringPiece piece = StringPiece(filename, len);
339       // If we've seen this as an input before, skip it.
340       std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
341       if (pos == ins_.end()) {
342         if (is_dependency) {
343           if (poisoned_input) {
344             *err = "inputs may not also have inputs";
345             return false;
346           }
347           // New input.
348           ins_.push_back(piece);
349         } else {
350           // Check for a new output.
351           if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
352             outs_.push_back(piece);
353         }
354       } else if (!is_dependency) {
355         // We've passed an input on the left side; reject new inputs.
356         poisoned_input = true;
357       }
358     }
359 
360     if (have_newline) {
361       // A newline ends a rule so the next filename will be a new target.
362       parsing_targets = true;
363       poisoned_input = false;
364     }
365   }
366   if (!have_target) {
367     *err = "expected ':' in depfile";
368     return false;
369   }
370   return true;
371 }
372