• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "depfile_parser.h"
16 #include "util.h"
17 
18 #include <algorithm>
19 
DepfileParser(DepfileParserOptions options)20 DepfileParser::DepfileParser(DepfileParserOptions options)
21   : options_(options)
22 {
23 }
24 
25 // A note on backslashes in Makefiles, from reading the docs:
26 // Backslash-newline is the line continuation character.
27 // Backslash-# escapes a # (otherwise meaningful as a comment start).
28 // Backslash-% escapes a % (otherwise meaningful as a special).
29 // Finally, quoting the GNU manual, "Backslashes that are not in danger
30 // of quoting ‘%’ characters go unmolested."
31 // How do you end a line with a backslash?  The netbsd Make docs suggest
32 // reading the result of a shell command echoing a backslash!
33 //
34 // Rather than implement all of above, we follow what GCC/Clang produces:
35 // Backslashes escape a space or hash sign.
36 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
37 // followed by space.
38 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
39 // the end of a filename.
40 // A hash sign is escaped by a single backslash. All other backslashes remain
41 // unchanged.
42 //
43 // If anyone actually has depfiles that rely on the more complicated
44 // behavior we can adjust this.
Parse(string * content,string * err)45 bool DepfileParser::Parse(string* content, string* err) {
46   // in: current parser input point.
47   // end: end of input.
48   // parsing_targets: whether we are parsing targets or dependencies.
49   char* in = &(*content)[0];
50   char* end = in + content->size();
51   bool have_target = false;
52   bool parsing_targets = true;
53   bool poisoned_input = false;
54   while (in < end) {
55     bool have_newline = false;
56     // out: current output point (typically same as in, but can fall behind
57     // as we de-escape backslashes).
58     char* out = in;
59     // filename: start of the current parsed filename.
60     char* filename = out;
61     for (;;) {
62       // start: beginning of the current parsed span.
63       const char* start = in;
64       char* yymarker = NULL;
65       /*!re2c
66       re2c:define:YYCTYPE = "unsigned char";
67       re2c:define:YYCURSOR = in;
68       re2c:define:YYLIMIT = end;
69       re2c:define:YYMARKER = yymarker;
70 
71       re2c:yyfill:enable = 0;
72 
73       re2c:indent:top = 2;
74       re2c:indent:string = "  ";
75 
76       nul = "\000";
77       newline = '\r'?'\n';
78 
79       '\\\\'* '\\ ' {
80         // 2N+1 backslashes plus space -> N backslashes plus space.
81         int len = (int)(in - start);
82         int n = len / 2 - 1;
83         if (out < start)
84           memset(out, '\\', n);
85         out += n;
86         *out++ = ' ';
87         continue;
88       }
89       '\\\\'+ ' ' {
90         // 2N backslashes plus space -> 2N backslashes, end of filename.
91         int len = (int)(in - start);
92         if (out < start)
93           memset(out, '\\', len - 1);
94         out += len - 1;
95         break;
96       }
97       '\\'+ '#' {
98         // De-escape hash sign, but preserve other leading backslashes.
99         int len = (int)(in - start);
100         if (len > 2 && out < start)
101           memset(out, '\\', len - 2);
102         out += len - 2;
103         *out++ = '#';
104         continue;
105       }
106       '\\'+ ':' [\x00\x20\r\n\t] {
107         // Backslash followed by : and whitespace.
108         // It is therefore normal text and not an escaped colon
109         int len = (int)(in - start - 1);
110         // Need to shift it over if we're overwriting backslashes.
111         if (out < start)
112           memmove(out, start, len);
113         out += len;
114         if (*(in - 1) == '\n')
115           have_newline = true;
116         break;
117       }
118       '\\'+ ':' {
119         // De-escape colon sign, but preserve other leading backslashes.
120         // Regular expression uses lookahead to make sure that no whitespace
121         // nor EOF follows. In that case it'd be the : at the end of a target
122         int len = (int)(in - start);
123         if (len > 2 && out < start)
124           memset(out, '\\', len - 2);
125         out += len - 2;
126         *out++ = ':';
127         continue;
128       }
129       '$$' {
130         // De-escape dollar character.
131         *out++ = '$';
132         continue;
133       }
134       '\\'+ [^\000\r\n] | [a-zA-Z0-9+,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ {
135         // Got a span of plain text.
136         int len = (int)(in - start);
137         // Need to shift it over if we're overwriting backslashes.
138         if (out < start)
139           memmove(out, start, len);
140         out += len;
141         continue;
142       }
143       nul {
144         break;
145       }
146       '\\' newline {
147         // A line continuation ends the current file name.
148         break;
149       }
150       newline {
151         // A newline ends the current file name and the current rule.
152         have_newline = true;
153         break;
154       }
155       [^] {
156         // For any other character (e.g. whitespace), swallow it here,
157         // allowing the outer logic to loop around again.
158         break;
159       }
160       */
161     }
162 
163     int len = (int)(out - filename);
164     const bool is_dependency = !parsing_targets;
165     if (len > 0 && filename[len - 1] == ':') {
166       len--;  // Strip off trailing colon, if any.
167       parsing_targets = false;
168       have_target = true;
169     }
170 
171     if (len > 0) {
172       StringPiece piece = StringPiece(filename, len);
173       // If we've seen this as an input before, skip it.
174       std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
175       if (pos == ins_.end()) {
176         if (is_dependency) {
177           if (poisoned_input) {
178             *err = "inputs may not also have inputs";
179             return false;
180           }
181           // New input.
182           ins_.push_back(piece);
183         } else {
184           // Check for a new output.
185           if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
186             outs_.push_back(piece);
187         }
188       } else if (!is_dependency) {
189         // We've passed an input on the left side; reject new inputs.
190         poisoned_input = true;
191       }
192     }
193 
194     if (have_newline) {
195       // A newline ends a rule so the next filename will be a new target.
196       parsing_targets = true;
197       poisoned_input = false;
198     }
199   }
200   if (!have_target) {
201     *err = "expected ':' in depfile";
202     return false;
203   }
204   return true;
205 }
206