1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "deps_log.h"
16
17 #include <assert.h>
18 #include <stdio.h>
19 #include <errno.h>
20 #include <string.h>
21 #ifndef _WIN32
22 #include <unistd.h>
23 #elif defined(_MSC_VER) && (_MSC_VER < 1900)
24 typedef __int32 int32_t;
25 typedef unsigned __int32 uint32_t;
26 #endif
27
28 #include "graph.h"
29 #include "metrics.h"
30 #include "state.h"
31 #include "util.h"
32
33 using namespace std;
34
35 // The version is stored as 4 bytes after the signature and also serves as a
36 // byte order mark. Signature and version combined are 16 bytes long.
37 const char kFileSignature[] = "# ninjadeps\n";
38 const int kCurrentVersion = 4;
39
40 // Record size is currently limited to less than the full 32 bit, due to
41 // internal buffers having to have this size.
42 const unsigned kMaxRecordSize = (1 << 19) - 1;
43
~DepsLog()44 DepsLog::~DepsLog() {
45 Close();
46 }
47
OpenForWrite(const string & path,string * err)48 bool DepsLog::OpenForWrite(const string& path, string* err) {
49 if (needs_recompaction_) {
50 if (!Recompact(path, err))
51 return false;
52 }
53
54 assert(!file_);
55 file_path_ = path; // we don't actually open the file right now, but will do
56 // so on the first write attempt
57 return true;
58 }
59
RecordDeps(Node * node,TimeStamp mtime,const vector<Node * > & nodes)60 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
61 const vector<Node*>& nodes) {
62 return RecordDeps(node, mtime, nodes.size(),
63 nodes.empty() ? NULL : (Node**)&nodes.front());
64 }
65
RecordDeps(Node * node,TimeStamp mtime,int node_count,Node ** nodes)66 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
67 int node_count, Node** nodes) {
68 // Track whether there's any new data to be recorded.
69 bool made_change = false;
70
71 // Assign ids to all nodes that are missing one.
72 if (node->id() < 0) {
73 if (!RecordId(node))
74 return false;
75 made_change = true;
76 }
77 for (int i = 0; i < node_count; ++i) {
78 if (nodes[i]->id() < 0) {
79 if (!RecordId(nodes[i]))
80 return false;
81 made_change = true;
82 }
83 }
84
85 // See if the new data is different than the existing data, if any.
86 if (!made_change) {
87 Deps* deps = GetDeps(node);
88 if (!deps ||
89 deps->mtime != mtime ||
90 deps->node_count != node_count) {
91 made_change = true;
92 } else {
93 for (int i = 0; i < node_count; ++i) {
94 if (deps->nodes[i] != nodes[i]) {
95 made_change = true;
96 break;
97 }
98 }
99 }
100 }
101
102 // Don't write anything if there's no new info.
103 if (!made_change)
104 return true;
105
106 // Update on-disk representation.
107 unsigned size = 4 * (1 + 2 + node_count);
108 if (size > kMaxRecordSize) {
109 errno = ERANGE;
110 return false;
111 }
112
113 if (!OpenForWriteIfNeeded()) {
114 return false;
115 }
116 size |= 0x80000000; // Deps record: set high bit.
117 if (fwrite(&size, 4, 1, file_) < 1)
118 return false;
119 int id = node->id();
120 if (fwrite(&id, 4, 1, file_) < 1)
121 return false;
122 uint32_t mtime_part = static_cast<uint32_t>(mtime & 0xffffffff);
123 if (fwrite(&mtime_part, 4, 1, file_) < 1)
124 return false;
125 mtime_part = static_cast<uint32_t>((mtime >> 32) & 0xffffffff);
126 if (fwrite(&mtime_part, 4, 1, file_) < 1)
127 return false;
128 for (int i = 0; i < node_count; ++i) {
129 id = nodes[i]->id();
130 if (fwrite(&id, 4, 1, file_) < 1)
131 return false;
132 }
133 if (fflush(file_) != 0)
134 return false;
135
136 // Update in-memory representation.
137 Deps* deps = new Deps(mtime, node_count);
138 for (int i = 0; i < node_count; ++i)
139 deps->nodes[i] = nodes[i];
140 UpdateDeps(node->id(), deps);
141
142 return true;
143 }
144
Close()145 void DepsLog::Close() {
146 OpenForWriteIfNeeded(); // create the file even if nothing has been recorded
147 if (file_)
148 fclose(file_);
149 file_ = NULL;
150 }
151
Load(const string & path,State * state,string * err)152 LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
153 METRIC_RECORD(".ninja_deps load");
154 char buf[kMaxRecordSize + 1];
155 FILE* f = fopen(path.c_str(), "rb");
156 if (!f) {
157 if (errno == ENOENT)
158 return LOAD_NOT_FOUND;
159 *err = strerror(errno);
160 return LOAD_ERROR;
161 }
162
163 bool valid_header = true;
164 int version = 0;
165 if (!fgets(buf, sizeof(buf), f) || fread(&version, 4, 1, f) < 1)
166 valid_header = false;
167 // Note: For version differences, this should migrate to the new format.
168 // But the v1 format could sometimes (rarely) end up with invalid data, so
169 // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
170 // and there was no release with it, so pretend that it never happened.)
171 if (!valid_header || strcmp(buf, kFileSignature) != 0 ||
172 version != kCurrentVersion) {
173 if (version == 1)
174 *err = "deps log version change; rebuilding";
175 else
176 *err = "bad deps log signature or version; starting over";
177 fclose(f);
178 unlink(path.c_str());
179 // Don't report this as a failure. An empty deps log will cause
180 // us to rebuild the outputs anyway.
181 return LOAD_SUCCESS;
182 }
183
184 long offset;
185 bool read_failed = false;
186 int unique_dep_record_count = 0;
187 int total_dep_record_count = 0;
188 for (;;) {
189 offset = ftell(f);
190
191 unsigned size;
192 if (fread(&size, 4, 1, f) < 1) {
193 if (!feof(f))
194 read_failed = true;
195 break;
196 }
197 bool is_deps = (size >> 31) != 0;
198 size = size & 0x7FFFFFFF;
199
200 if (size > kMaxRecordSize || fread(buf, size, 1, f) < 1) {
201 read_failed = true;
202 break;
203 }
204
205 if (is_deps) {
206 assert(size % 4 == 0);
207 int* deps_data = reinterpret_cast<int*>(buf);
208 int out_id = deps_data[0];
209 TimeStamp mtime;
210 mtime = (TimeStamp)(((uint64_t)(unsigned int)deps_data[2] << 32) |
211 (uint64_t)(unsigned int)deps_data[1]);
212 deps_data += 3;
213 int deps_count = (size / 4) - 3;
214
215 Deps* deps = new Deps(mtime, deps_count);
216 for (int i = 0; i < deps_count; ++i) {
217 assert(deps_data[i] < (int)nodes_.size());
218 assert(nodes_[deps_data[i]]);
219 deps->nodes[i] = nodes_[deps_data[i]];
220 }
221
222 total_dep_record_count++;
223 if (!UpdateDeps(out_id, deps))
224 ++unique_dep_record_count;
225 } else {
226 int path_size = size - 4;
227 assert(path_size > 0); // CanonicalizePath() rejects empty paths.
228 // There can be up to 3 bytes of padding.
229 if (buf[path_size - 1] == '\0') --path_size;
230 if (buf[path_size - 1] == '\0') --path_size;
231 if (buf[path_size - 1] == '\0') --path_size;
232 StringPiece subpath(buf, path_size);
233 // It is not necessary to pass in a correct slash_bits here. It will
234 // either be a Node that's in the manifest (in which case it will already
235 // have a correct slash_bits that GetNode will look up), or it is an
236 // implicit dependency from a .d which does not affect the build command
237 // (and so need not have its slashes maintained).
238 Node* node = state->GetNode(subpath, 0);
239
240 // Check that the expected index matches the actual index. This can only
241 // happen if two ninja processes write to the same deps log concurrently.
242 // (This uses unary complement to make the checksum look less like a
243 // dependency record entry.)
244 unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4);
245 int expected_id = ~checksum;
246 int id = nodes_.size();
247 if (id != expected_id) {
248 read_failed = true;
249 break;
250 }
251
252 assert(node->id() < 0);
253 node->set_id(id);
254 nodes_.push_back(node);
255 }
256 }
257
258 if (read_failed) {
259 // An error occurred while loading; try to recover by truncating the
260 // file to the last fully-read record.
261 if (ferror(f)) {
262 *err = strerror(ferror(f));
263 } else {
264 *err = "premature end of file";
265 }
266 fclose(f);
267
268 if (!Truncate(path, offset, err))
269 return LOAD_ERROR;
270
271 // The truncate succeeded; we'll just report the load error as a
272 // warning because the build can proceed.
273 *err += "; recovering";
274 return LOAD_SUCCESS;
275 }
276
277 fclose(f);
278
279 // Rebuild the log if there are too many dead records.
280 int kMinCompactionEntryCount = 1000;
281 int kCompactionRatio = 3;
282 if (total_dep_record_count > kMinCompactionEntryCount &&
283 total_dep_record_count > unique_dep_record_count * kCompactionRatio) {
284 needs_recompaction_ = true;
285 }
286
287 return LOAD_SUCCESS;
288 }
289
GetDeps(Node * node)290 DepsLog::Deps* DepsLog::GetDeps(Node* node) {
291 // Abort if the node has no id (never referenced in the deps) or if
292 // there's no deps recorded for the node.
293 if (node->id() < 0 || node->id() >= (int)deps_.size())
294 return NULL;
295 return deps_[node->id()];
296 }
297
GetFirstReverseDepsNode(Node * node)298 Node* DepsLog::GetFirstReverseDepsNode(Node* node) {
299 for (size_t id = 0; id < deps_.size(); ++id) {
300 Deps* deps = deps_[id];
301 if (!deps)
302 continue;
303 for (int i = 0; i < deps->node_count; ++i) {
304 if (deps->nodes[i] == node)
305 return nodes_[id];
306 }
307 }
308 return NULL;
309 }
310
Recompact(const string & path,string * err)311 bool DepsLog::Recompact(const string& path, string* err) {
312 METRIC_RECORD(".ninja_deps recompact");
313
314 Close();
315 string temp_path = path + ".recompact";
316
317 // OpenForWrite() opens for append. Make sure it's not appending to a
318 // left-over file from a previous recompaction attempt that crashed somehow.
319 unlink(temp_path.c_str());
320
321 DepsLog new_log;
322 if (!new_log.OpenForWrite(temp_path, err))
323 return false;
324
325 // Clear all known ids so that new ones can be reassigned. The new indices
326 // will refer to the ordering in new_log, not in the current log.
327 for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i)
328 (*i)->set_id(-1);
329
330 // Write out all deps again.
331 for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) {
332 Deps* deps = deps_[old_id];
333 if (!deps) continue; // If nodes_[old_id] is a leaf, it has no deps.
334
335 if (!IsDepsEntryLiveFor(nodes_[old_id]))
336 continue;
337
338 if (!new_log.RecordDeps(nodes_[old_id], deps->mtime,
339 deps->node_count, deps->nodes)) {
340 new_log.Close();
341 return false;
342 }
343 }
344
345 new_log.Close();
346
347 // All nodes now have ids that refer to new_log, so steal its data.
348 deps_.swap(new_log.deps_);
349 nodes_.swap(new_log.nodes_);
350
351 if (unlink(path.c_str()) < 0) {
352 *err = strerror(errno);
353 return false;
354 }
355
356 if (rename(temp_path.c_str(), path.c_str()) < 0) {
357 *err = strerror(errno);
358 return false;
359 }
360
361 return true;
362 }
363
IsDepsEntryLiveFor(Node * node)364 bool DepsLog::IsDepsEntryLiveFor(Node* node) {
365 // Skip entries that don't have in-edges or whose edges don't have a
366 // "deps" attribute. They were in the deps log from previous builds, but
367 // the the files they were for were removed from the build and their deps
368 // entries are no longer needed.
369 // (Without the check for "deps", a chain of two or more nodes that each
370 // had deps wouldn't be collected in a single recompaction.)
371 return node->in_edge() && !node->in_edge()->GetBinding("deps").empty();
372 }
373
UpdateDeps(int out_id,Deps * deps)374 bool DepsLog::UpdateDeps(int out_id, Deps* deps) {
375 if (out_id >= (int)deps_.size())
376 deps_.resize(out_id + 1);
377
378 bool delete_old = deps_[out_id] != NULL;
379 if (delete_old)
380 delete deps_[out_id];
381 deps_[out_id] = deps;
382 return delete_old;
383 }
384
RecordId(Node * node)385 bool DepsLog::RecordId(Node* node) {
386 int path_size = node->path().size();
387 int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary.
388
389 unsigned size = path_size + padding + 4;
390 if (size > kMaxRecordSize) {
391 errno = ERANGE;
392 return false;
393 }
394
395 if (!OpenForWriteIfNeeded()) {
396 return false;
397 }
398 if (fwrite(&size, 4, 1, file_) < 1)
399 return false;
400 if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
401 assert(!node->path().empty());
402 return false;
403 }
404 if (padding && fwrite("\0\0", padding, 1, file_) < 1)
405 return false;
406 int id = nodes_.size();
407 unsigned checksum = ~(unsigned)id;
408 if (fwrite(&checksum, 4, 1, file_) < 1)
409 return false;
410 if (fflush(file_) != 0)
411 return false;
412
413 node->set_id(id);
414 nodes_.push_back(node);
415
416 return true;
417 }
418
OpenForWriteIfNeeded()419 bool DepsLog::OpenForWriteIfNeeded() {
420 if (file_path_.empty()) {
421 return true;
422 }
423 file_ = fopen(file_path_.c_str(), "ab");
424 if (!file_) {
425 return false;
426 }
427 // Set the buffer size to this and flush the file buffer after every record
428 // to make sure records aren't written partially.
429 if (setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1) != 0) {
430 return false;
431 }
432 SetCloseOnExec(fileno(file_));
433
434 // Opening a file in append mode doesn't set the file pointer to the file's
435 // end on Windows. Do that explicitly.
436 fseek(file_, 0, SEEK_END);
437
438 if (ftell(file_) == 0) {
439 if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, file_) < 1) {
440 return false;
441 }
442 if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) {
443 return false;
444 }
445 }
446 if (fflush(file_) != 0) {
447 return false;
448 }
449 file_path_.clear();
450 return true;
451 }
452