• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "deps_log.h"
16 
17 #include <assert.h>
18 #include <stdio.h>
19 #include <errno.h>
20 #include <string.h>
21 #ifndef _WIN32
22 #include <unistd.h>
23 #elif defined(_MSC_VER) && (_MSC_VER < 1900)
24 typedef __int32 int32_t;
25 typedef unsigned __int32 uint32_t;
26 #endif
27 
28 #include "graph.h"
29 #include "metrics.h"
30 #include "state.h"
31 #include "util.h"
32 
33 // The version is stored as 4 bytes after the signature and also serves as a
34 // byte order mark. Signature and version combined are 16 bytes long.
35 const char kFileSignature[] = "# ninjadeps\n";
36 const int kCurrentVersion = 4;
37 
38 // Record size is currently limited to less than the full 32 bit, due to
39 // internal buffers having to have this size.
40 const unsigned kMaxRecordSize = (1 << 19) - 1;
41 
~DepsLog()42 DepsLog::~DepsLog() {
43   Close();
44 }
45 
OpenForWrite(const string & path,string * err)46 bool DepsLog::OpenForWrite(const string& path, string* err) {
47   if (needs_recompaction_) {
48     if (!Recompact(path, err))
49       return false;
50   }
51 
52   assert(!file_);
53   file_path_ = path;  // we don't actually open the file right now, but will do
54                       // so on the first write attempt
55   return true;
56 }
57 
RecordDeps(Node * node,TimeStamp mtime,const vector<Node * > & nodes)58 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
59                          const vector<Node*>& nodes) {
60   return RecordDeps(node, mtime, nodes.size(),
61                     nodes.empty() ? NULL : (Node**)&nodes.front());
62 }
63 
RecordDeps(Node * node,TimeStamp mtime,int node_count,Node ** nodes)64 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
65                          int node_count, Node** nodes) {
66   // Track whether there's any new data to be recorded.
67   bool made_change = false;
68 
69   // Assign ids to all nodes that are missing one.
70   if (node->id() < 0) {
71     if (!RecordId(node))
72       return false;
73     made_change = true;
74   }
75   for (int i = 0; i < node_count; ++i) {
76     if (nodes[i]->id() < 0) {
77       if (!RecordId(nodes[i]))
78         return false;
79       made_change = true;
80     }
81   }
82 
83   // See if the new data is different than the existing data, if any.
84   if (!made_change) {
85     Deps* deps = GetDeps(node);
86     if (!deps ||
87         deps->mtime != mtime ||
88         deps->node_count != node_count) {
89       made_change = true;
90     } else {
91       for (int i = 0; i < node_count; ++i) {
92         if (deps->nodes[i] != nodes[i]) {
93           made_change = true;
94           break;
95         }
96       }
97     }
98   }
99 
100   // Don't write anything if there's no new info.
101   if (!made_change)
102     return true;
103 
104   // Update on-disk representation.
105   unsigned size = 4 * (1 + 2 + node_count);
106   if (size > kMaxRecordSize) {
107     errno = ERANGE;
108     return false;
109   }
110 
111   if (!OpenForWriteIfNeeded()) {
112     return false;
113   }
114   size |= 0x80000000;  // Deps record: set high bit.
115   if (fwrite(&size, 4, 1, file_) < 1)
116     return false;
117   int id = node->id();
118   if (fwrite(&id, 4, 1, file_) < 1)
119     return false;
120   uint32_t mtime_part = static_cast<uint32_t>(mtime & 0xffffffff);
121   if (fwrite(&mtime_part, 4, 1, file_) < 1)
122     return false;
123   mtime_part = static_cast<uint32_t>((mtime >> 32) & 0xffffffff);
124   if (fwrite(&mtime_part, 4, 1, file_) < 1)
125     return false;
126   for (int i = 0; i < node_count; ++i) {
127     id = nodes[i]->id();
128     if (fwrite(&id, 4, 1, file_) < 1)
129       return false;
130   }
131   if (fflush(file_) != 0)
132     return false;
133 
134   // Update in-memory representation.
135   Deps* deps = new Deps(mtime, node_count);
136   for (int i = 0; i < node_count; ++i)
137     deps->nodes[i] = nodes[i];
138   UpdateDeps(node->id(), deps);
139 
140   return true;
141 }
142 
Close()143 void DepsLog::Close() {
144   OpenForWriteIfNeeded();  // create the file even if nothing has been recorded
145   if (file_)
146     fclose(file_);
147   file_ = NULL;
148 }
149 
Load(const string & path,State * state,string * err)150 LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
151   METRIC_RECORD(".ninja_deps load");
152   char buf[kMaxRecordSize + 1];
153   FILE* f = fopen(path.c_str(), "rb");
154   if (!f) {
155     if (errno == ENOENT)
156       return LOAD_NOT_FOUND;
157     *err = strerror(errno);
158     return LOAD_ERROR;
159   }
160 
161   bool valid_header = true;
162   int version = 0;
163   if (!fgets(buf, sizeof(buf), f) || fread(&version, 4, 1, f) < 1)
164     valid_header = false;
165   // Note: For version differences, this should migrate to the new format.
166   // But the v1 format could sometimes (rarely) end up with invalid data, so
167   // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
168   // and there was no release with it, so pretend that it never happened.)
169   if (!valid_header || strcmp(buf, kFileSignature) != 0 ||
170       version != kCurrentVersion) {
171     if (version == 1)
172       *err = "deps log version change; rebuilding";
173     else
174       *err = "bad deps log signature or version; starting over";
175     fclose(f);
176     unlink(path.c_str());
177     // Don't report this as a failure.  An empty deps log will cause
178     // us to rebuild the outputs anyway.
179     return LOAD_SUCCESS;
180   }
181 
182   long offset;
183   bool read_failed = false;
184   int unique_dep_record_count = 0;
185   int total_dep_record_count = 0;
186   for (;;) {
187     offset = ftell(f);
188 
189     unsigned size;
190     if (fread(&size, 4, 1, f) < 1) {
191       if (!feof(f))
192         read_failed = true;
193       break;
194     }
195     bool is_deps = (size >> 31) != 0;
196     size = size & 0x7FFFFFFF;
197 
198     if (size > kMaxRecordSize || fread(buf, size, 1, f) < 1) {
199       read_failed = true;
200       break;
201     }
202 
203     if (is_deps) {
204       assert(size % 4 == 0);
205       int* deps_data = reinterpret_cast<int*>(buf);
206       int out_id = deps_data[0];
207       TimeStamp mtime;
208       mtime = (TimeStamp)(((uint64_t)(unsigned int)deps_data[2] << 32) |
209                           (uint64_t)(unsigned int)deps_data[1]);
210       deps_data += 3;
211       int deps_count = (size / 4) - 3;
212 
213       Deps* deps = new Deps(mtime, deps_count);
214       for (int i = 0; i < deps_count; ++i) {
215         assert(deps_data[i] < (int)nodes_.size());
216         assert(nodes_[deps_data[i]]);
217         deps->nodes[i] = nodes_[deps_data[i]];
218       }
219 
220       total_dep_record_count++;
221       if (!UpdateDeps(out_id, deps))
222         ++unique_dep_record_count;
223     } else {
224       int path_size = size - 4;
225       assert(path_size > 0);  // CanonicalizePath() rejects empty paths.
226       // There can be up to 3 bytes of padding.
227       if (buf[path_size - 1] == '\0') --path_size;
228       if (buf[path_size - 1] == '\0') --path_size;
229       if (buf[path_size - 1] == '\0') --path_size;
230       StringPiece subpath(buf, path_size);
231       // It is not necessary to pass in a correct slash_bits here. It will
232       // either be a Node that's in the manifest (in which case it will already
233       // have a correct slash_bits that GetNode will look up), or it is an
234       // implicit dependency from a .d which does not affect the build command
235       // (and so need not have its slashes maintained).
236       Node* node = state->GetNode(subpath, 0);
237 
238       // Check that the expected index matches the actual index. This can only
239       // happen if two ninja processes write to the same deps log concurrently.
240       // (This uses unary complement to make the checksum look less like a
241       // dependency record entry.)
242       unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4);
243       int expected_id = ~checksum;
244       int id = nodes_.size();
245       if (id != expected_id) {
246         read_failed = true;
247         break;
248       }
249 
250       assert(node->id() < 0);
251       node->set_id(id);
252       nodes_.push_back(node);
253     }
254   }
255 
256   if (read_failed) {
257     // An error occurred while loading; try to recover by truncating the
258     // file to the last fully-read record.
259     if (ferror(f)) {
260       *err = strerror(ferror(f));
261     } else {
262       *err = "premature end of file";
263     }
264     fclose(f);
265 
266     if (!Truncate(path, offset, err))
267       return LOAD_ERROR;
268 
269     // The truncate succeeded; we'll just report the load error as a
270     // warning because the build can proceed.
271     *err += "; recovering";
272     return LOAD_SUCCESS;
273   }
274 
275   fclose(f);
276 
277   // Rebuild the log if there are too many dead records.
278   int kMinCompactionEntryCount = 1000;
279   int kCompactionRatio = 3;
280   if (total_dep_record_count > kMinCompactionEntryCount &&
281       total_dep_record_count > unique_dep_record_count * kCompactionRatio) {
282     needs_recompaction_ = true;
283   }
284 
285   return LOAD_SUCCESS;
286 }
287 
GetDeps(Node * node)288 DepsLog::Deps* DepsLog::GetDeps(Node* node) {
289   // Abort if the node has no id (never referenced in the deps) or if
290   // there's no deps recorded for the node.
291   if (node->id() < 0 || node->id() >= (int)deps_.size())
292     return NULL;
293   return deps_[node->id()];
294 }
295 
Recompact(const string & path,string * err)296 bool DepsLog::Recompact(const string& path, string* err) {
297   METRIC_RECORD(".ninja_deps recompact");
298 
299   Close();
300   string temp_path = path + ".recompact";
301 
302   // OpenForWrite() opens for append.  Make sure it's not appending to a
303   // left-over file from a previous recompaction attempt that crashed somehow.
304   unlink(temp_path.c_str());
305 
306   DepsLog new_log;
307   if (!new_log.OpenForWrite(temp_path, err))
308     return false;
309 
310   // Clear all known ids so that new ones can be reassigned.  The new indices
311   // will refer to the ordering in new_log, not in the current log.
312   for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i)
313     (*i)->set_id(-1);
314 
315   // Write out all deps again.
316   for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) {
317     Deps* deps = deps_[old_id];
318     if (!deps) continue;  // If nodes_[old_id] is a leaf, it has no deps.
319 
320     if (!IsDepsEntryLiveFor(nodes_[old_id]))
321       continue;
322 
323     if (!new_log.RecordDeps(nodes_[old_id], deps->mtime,
324                             deps->node_count, deps->nodes)) {
325       new_log.Close();
326       return false;
327     }
328   }
329 
330   new_log.Close();
331 
332   // All nodes now have ids that refer to new_log, so steal its data.
333   deps_.swap(new_log.deps_);
334   nodes_.swap(new_log.nodes_);
335 
336   if (unlink(path.c_str()) < 0) {
337     *err = strerror(errno);
338     return false;
339   }
340 
341   if (rename(temp_path.c_str(), path.c_str()) < 0) {
342     *err = strerror(errno);
343     return false;
344   }
345 
346   return true;
347 }
348 
IsDepsEntryLiveFor(Node * node)349 bool DepsLog::IsDepsEntryLiveFor(Node* node) {
350   // Skip entries that don't have in-edges or whose edges don't have a
351   // "deps" attribute. They were in the deps log from previous builds, but
352   // the the files they were for were removed from the build and their deps
353   // entries are no longer needed.
354   // (Without the check for "deps", a chain of two or more nodes that each
355   // had deps wouldn't be collected in a single recompaction.)
356   return node->in_edge() && !node->in_edge()->GetBinding("deps").empty();
357 }
358 
UpdateDeps(int out_id,Deps * deps)359 bool DepsLog::UpdateDeps(int out_id, Deps* deps) {
360   if (out_id >= (int)deps_.size())
361     deps_.resize(out_id + 1);
362 
363   bool delete_old = deps_[out_id] != NULL;
364   if (delete_old)
365     delete deps_[out_id];
366   deps_[out_id] = deps;
367   return delete_old;
368 }
369 
RecordId(Node * node)370 bool DepsLog::RecordId(Node* node) {
371   int path_size = node->path().size();
372   int padding = (4 - path_size % 4) % 4;  // Pad path to 4 byte boundary.
373 
374   unsigned size = path_size + padding + 4;
375   if (size > kMaxRecordSize) {
376     errno = ERANGE;
377     return false;
378   }
379 
380   if (!OpenForWriteIfNeeded()) {
381     return false;
382   }
383   if (fwrite(&size, 4, 1, file_) < 1)
384     return false;
385   if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
386     assert(!node->path().empty());
387     return false;
388   }
389   if (padding && fwrite("\0\0", padding, 1, file_) < 1)
390     return false;
391   int id = nodes_.size();
392   unsigned checksum = ~(unsigned)id;
393   if (fwrite(&checksum, 4, 1, file_) < 1)
394     return false;
395   if (fflush(file_) != 0)
396     return false;
397 
398   node->set_id(id);
399   nodes_.push_back(node);
400 
401   return true;
402 }
403 
OpenForWriteIfNeeded()404 bool DepsLog::OpenForWriteIfNeeded() {
405   if (file_path_.empty()) {
406     return true;
407   }
408   file_ = fopen(file_path_.c_str(), "ab");
409   if (!file_) {
410     return false;
411   }
412   // Set the buffer size to this and flush the file buffer after every record
413   // to make sure records aren't written partially.
414   setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1);
415   SetCloseOnExec(fileno(file_));
416 
417   // Opening a file in append mode doesn't set the file pointer to the file's
418   // end on Windows. Do that explicitly.
419   fseek(file_, 0, SEEK_END);
420 
421   if (ftell(file_) == 0) {
422     if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, file_) < 1) {
423       return false;
424     }
425     if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) {
426       return false;
427     }
428   }
429   if (fflush(file_) != 0) {
430     return false;
431   }
432   file_path_.clear();
433   return true;
434 }
435