1 // Copyright 2012 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "deps_log.h" 16 17 #include <assert.h> 18 #include <stdio.h> 19 #include <errno.h> 20 #include <string.h> 21 #ifndef _WIN32 22 #include <unistd.h> 23 #elif defined(_MSC_VER) && (_MSC_VER < 1900) 24 typedef __int32 int32_t; 25 typedef unsigned __int32 uint32_t; 26 #endif 27 28 #include "graph.h" 29 #include "metrics.h" 30 #include "state.h" 31 #include "util.h" 32 33 using namespace std; 34 35 // The version is stored as 4 bytes after the signature and also serves as a 36 // byte order mark. Signature and version combined are 16 bytes long. 37 const char kFileSignature[] = "# ninjadeps\n"; 38 const int kCurrentVersion = 4; 39 40 // Record size is currently limited to less than the full 32 bit, due to 41 // internal buffers having to have this size. 42 const unsigned kMaxRecordSize = (1 << 19) - 1; 43 ~DepsLog()44 DepsLog::~DepsLog() { 45 Close(); 46 } 47 OpenForWrite(const string & path,string * err)48 bool DepsLog::OpenForWrite(const string& path, string* err) { 49 if (needs_recompaction_) { 50 if (!Recompact(path, err)) 51 return false; 52 } 53 54 assert(!file_); 55 file_path_ = path; // we don't actually open the file right now, but will do 56 // so on the first write attempt 57 return true; 58 } 59 RecordDeps(Node * node,TimeStamp mtime,const vector<Node * > & nodes)60 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime, 61 const vector<Node*>& nodes) { 62 return RecordDeps(node, mtime, nodes.size(), 63 nodes.empty() ? NULL : (Node**)&nodes.front()); 64 } 65 RecordDeps(Node * node,TimeStamp mtime,int node_count,Node ** nodes)66 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime, 67 int node_count, Node** nodes) { 68 // Track whether there's any new data to be recorded. 69 bool made_change = false; 70 71 // Assign ids to all nodes that are missing one. 72 if (node->id() < 0) { 73 if (!RecordId(node)) 74 return false; 75 made_change = true; 76 } 77 for (int i = 0; i < node_count; ++i) { 78 if (nodes[i]->id() < 0) { 79 if (!RecordId(nodes[i])) 80 return false; 81 made_change = true; 82 } 83 } 84 85 // See if the new data is different than the existing data, if any. 86 if (!made_change) { 87 Deps* deps = GetDeps(node); 88 if (!deps || 89 deps->mtime != mtime || 90 deps->node_count != node_count) { 91 made_change = true; 92 } else { 93 for (int i = 0; i < node_count; ++i) { 94 if (deps->nodes[i] != nodes[i]) { 95 made_change = true; 96 break; 97 } 98 } 99 } 100 } 101 102 // Don't write anything if there's no new info. 103 if (!made_change) 104 return true; 105 106 // Update on-disk representation. 107 unsigned size = 4 * (1 + 2 + node_count); 108 if (size > kMaxRecordSize) { 109 errno = ERANGE; 110 return false; 111 } 112 113 if (!OpenForWriteIfNeeded()) { 114 return false; 115 } 116 size |= 0x80000000; // Deps record: set high bit. 117 if (fwrite(&size, 4, 1, file_) < 1) 118 return false; 119 int id = node->id(); 120 if (fwrite(&id, 4, 1, file_) < 1) 121 return false; 122 uint32_t mtime_part = static_cast<uint32_t>(mtime & 0xffffffff); 123 if (fwrite(&mtime_part, 4, 1, file_) < 1) 124 return false; 125 mtime_part = static_cast<uint32_t>((mtime >> 32) & 0xffffffff); 126 if (fwrite(&mtime_part, 4, 1, file_) < 1) 127 return false; 128 for (int i = 0; i < node_count; ++i) { 129 id = nodes[i]->id(); 130 if (fwrite(&id, 4, 1, file_) < 1) 131 return false; 132 } 133 if (fflush(file_) != 0) 134 return false; 135 136 // Update in-memory representation. 137 Deps* deps = new Deps(mtime, node_count); 138 for (int i = 0; i < node_count; ++i) 139 deps->nodes[i] = nodes[i]; 140 UpdateDeps(node->id(), deps); 141 142 return true; 143 } 144 Close()145 void DepsLog::Close() { 146 OpenForWriteIfNeeded(); // create the file even if nothing has been recorded 147 if (file_) 148 fclose(file_); 149 file_ = NULL; 150 } 151 Load(const string & path,State * state,string * err)152 LoadStatus DepsLog::Load(const string& path, State* state, string* err) { 153 METRIC_RECORD(".ninja_deps load"); 154 char buf[kMaxRecordSize + 1]; 155 FILE* f = fopen(path.c_str(), "rb"); 156 if (!f) { 157 if (errno == ENOENT) 158 return LOAD_NOT_FOUND; 159 *err = strerror(errno); 160 return LOAD_ERROR; 161 } 162 163 bool valid_header = true; 164 int version = 0; 165 if (!fgets(buf, sizeof(buf), f) || fread(&version, 4, 1, f) < 1) 166 valid_header = false; 167 // Note: For version differences, this should migrate to the new format. 168 // But the v1 format could sometimes (rarely) end up with invalid data, so 169 // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days, 170 // and there was no release with it, so pretend that it never happened.) 171 if (!valid_header || strcmp(buf, kFileSignature) != 0 || 172 version != kCurrentVersion) { 173 if (version == 1) 174 *err = "deps log version change; rebuilding"; 175 else 176 *err = "bad deps log signature or version; starting over"; 177 fclose(f); 178 unlink(path.c_str()); 179 // Don't report this as a failure. An empty deps log will cause 180 // us to rebuild the outputs anyway. 181 return LOAD_SUCCESS; 182 } 183 184 long offset; 185 bool read_failed = false; 186 int unique_dep_record_count = 0; 187 int total_dep_record_count = 0; 188 for (;;) { 189 offset = ftell(f); 190 191 unsigned size; 192 if (fread(&size, 4, 1, f) < 1) { 193 if (!feof(f)) 194 read_failed = true; 195 break; 196 } 197 bool is_deps = (size >> 31) != 0; 198 size = size & 0x7FFFFFFF; 199 200 if (size > kMaxRecordSize || fread(buf, size, 1, f) < 1) { 201 read_failed = true; 202 break; 203 } 204 205 if (is_deps) { 206 assert(size % 4 == 0); 207 int* deps_data = reinterpret_cast<int*>(buf); 208 int out_id = deps_data[0]; 209 TimeStamp mtime; 210 mtime = (TimeStamp)(((uint64_t)(unsigned int)deps_data[2] << 32) | 211 (uint64_t)(unsigned int)deps_data[1]); 212 deps_data += 3; 213 int deps_count = (size / 4) - 3; 214 215 Deps* deps = new Deps(mtime, deps_count); 216 for (int i = 0; i < deps_count; ++i) { 217 assert(deps_data[i] < (int)nodes_.size()); 218 assert(nodes_[deps_data[i]]); 219 deps->nodes[i] = nodes_[deps_data[i]]; 220 } 221 222 total_dep_record_count++; 223 if (!UpdateDeps(out_id, deps)) 224 ++unique_dep_record_count; 225 } else { 226 int path_size = size - 4; 227 assert(path_size > 0); // CanonicalizePath() rejects empty paths. 228 // There can be up to 3 bytes of padding. 229 if (buf[path_size - 1] == '\0') --path_size; 230 if (buf[path_size - 1] == '\0') --path_size; 231 if (buf[path_size - 1] == '\0') --path_size; 232 StringPiece subpath(buf, path_size); 233 // It is not necessary to pass in a correct slash_bits here. It will 234 // either be a Node that's in the manifest (in which case it will already 235 // have a correct slash_bits that GetNode will look up), or it is an 236 // implicit dependency from a .d which does not affect the build command 237 // (and so need not have its slashes maintained). 238 Node* node = state->GetNode(subpath, 0); 239 240 // Check that the expected index matches the actual index. This can only 241 // happen if two ninja processes write to the same deps log concurrently. 242 // (This uses unary complement to make the checksum look less like a 243 // dependency record entry.) 244 unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4); 245 int expected_id = ~checksum; 246 int id = nodes_.size(); 247 if (id != expected_id) { 248 read_failed = true; 249 break; 250 } 251 252 assert(node->id() < 0); 253 node->set_id(id); 254 nodes_.push_back(node); 255 } 256 } 257 258 if (read_failed) { 259 // An error occurred while loading; try to recover by truncating the 260 // file to the last fully-read record. 261 if (ferror(f)) { 262 *err = strerror(ferror(f)); 263 } else { 264 *err = "premature end of file"; 265 } 266 fclose(f); 267 268 if (!Truncate(path, offset, err)) 269 return LOAD_ERROR; 270 271 // The truncate succeeded; we'll just report the load error as a 272 // warning because the build can proceed. 273 *err += "; recovering"; 274 return LOAD_SUCCESS; 275 } 276 277 fclose(f); 278 279 // Rebuild the log if there are too many dead records. 280 int kMinCompactionEntryCount = 1000; 281 int kCompactionRatio = 3; 282 if (total_dep_record_count > kMinCompactionEntryCount && 283 total_dep_record_count > unique_dep_record_count * kCompactionRatio) { 284 needs_recompaction_ = true; 285 } 286 287 return LOAD_SUCCESS; 288 } 289 GetDeps(Node * node)290 DepsLog::Deps* DepsLog::GetDeps(Node* node) { 291 // Abort if the node has no id (never referenced in the deps) or if 292 // there's no deps recorded for the node. 293 if (node->id() < 0 || node->id() >= (int)deps_.size()) 294 return NULL; 295 return deps_[node->id()]; 296 } 297 GetFirstReverseDepsNode(Node * node)298 Node* DepsLog::GetFirstReverseDepsNode(Node* node) { 299 for (size_t id = 0; id < deps_.size(); ++id) { 300 Deps* deps = deps_[id]; 301 if (!deps) 302 continue; 303 for (int i = 0; i < deps->node_count; ++i) { 304 if (deps->nodes[i] == node) 305 return nodes_[id]; 306 } 307 } 308 return NULL; 309 } 310 Recompact(const string & path,string * err)311 bool DepsLog::Recompact(const string& path, string* err) { 312 METRIC_RECORD(".ninja_deps recompact"); 313 314 Close(); 315 string temp_path = path + ".recompact"; 316 317 // OpenForWrite() opens for append. Make sure it's not appending to a 318 // left-over file from a previous recompaction attempt that crashed somehow. 319 unlink(temp_path.c_str()); 320 321 DepsLog new_log; 322 if (!new_log.OpenForWrite(temp_path, err)) 323 return false; 324 325 // Clear all known ids so that new ones can be reassigned. The new indices 326 // will refer to the ordering in new_log, not in the current log. 327 for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i) 328 (*i)->set_id(-1); 329 330 // Write out all deps again. 331 for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) { 332 Deps* deps = deps_[old_id]; 333 if (!deps) continue; // If nodes_[old_id] is a leaf, it has no deps. 334 335 if (!IsDepsEntryLiveFor(nodes_[old_id])) 336 continue; 337 338 if (!new_log.RecordDeps(nodes_[old_id], deps->mtime, 339 deps->node_count, deps->nodes)) { 340 new_log.Close(); 341 return false; 342 } 343 } 344 345 new_log.Close(); 346 347 // All nodes now have ids that refer to new_log, so steal its data. 348 deps_.swap(new_log.deps_); 349 nodes_.swap(new_log.nodes_); 350 351 if (unlink(path.c_str()) < 0) { 352 *err = strerror(errno); 353 return false; 354 } 355 356 if (rename(temp_path.c_str(), path.c_str()) < 0) { 357 *err = strerror(errno); 358 return false; 359 } 360 361 return true; 362 } 363 IsDepsEntryLiveFor(Node * node)364 bool DepsLog::IsDepsEntryLiveFor(Node* node) { 365 // Skip entries that don't have in-edges or whose edges don't have a 366 // "deps" attribute. They were in the deps log from previous builds, but 367 // the the files they were for were removed from the build and their deps 368 // entries are no longer needed. 369 // (Without the check for "deps", a chain of two or more nodes that each 370 // had deps wouldn't be collected in a single recompaction.) 371 return node->in_edge() && !node->in_edge()->GetBinding("deps").empty(); 372 } 373 UpdateDeps(int out_id,Deps * deps)374 bool DepsLog::UpdateDeps(int out_id, Deps* deps) { 375 if (out_id >= (int)deps_.size()) 376 deps_.resize(out_id + 1); 377 378 bool delete_old = deps_[out_id] != NULL; 379 if (delete_old) 380 delete deps_[out_id]; 381 deps_[out_id] = deps; 382 return delete_old; 383 } 384 RecordId(Node * node)385 bool DepsLog::RecordId(Node* node) { 386 int path_size = node->path().size(); 387 int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary. 388 389 unsigned size = path_size + padding + 4; 390 if (size > kMaxRecordSize) { 391 errno = ERANGE; 392 return false; 393 } 394 395 if (!OpenForWriteIfNeeded()) { 396 return false; 397 } 398 if (fwrite(&size, 4, 1, file_) < 1) 399 return false; 400 if (fwrite(node->path().data(), path_size, 1, file_) < 1) { 401 assert(!node->path().empty()); 402 return false; 403 } 404 if (padding && fwrite("\0\0", padding, 1, file_) < 1) 405 return false; 406 int id = nodes_.size(); 407 unsigned checksum = ~(unsigned)id; 408 if (fwrite(&checksum, 4, 1, file_) < 1) 409 return false; 410 if (fflush(file_) != 0) 411 return false; 412 413 node->set_id(id); 414 nodes_.push_back(node); 415 416 return true; 417 } 418 OpenForWriteIfNeeded()419 bool DepsLog::OpenForWriteIfNeeded() { 420 if (file_path_.empty()) { 421 return true; 422 } 423 file_ = fopen(file_path_.c_str(), "ab"); 424 if (!file_) { 425 return false; 426 } 427 // Set the buffer size to this and flush the file buffer after every record 428 // to make sure records aren't written partially. 429 if (setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1) != 0) { 430 return false; 431 } 432 SetCloseOnExec(fileno(file_)); 433 434 // Opening a file in append mode doesn't set the file pointer to the file's 435 // end on Windows. Do that explicitly. 436 fseek(file_, 0, SEEK_END); 437 438 if (ftell(file_) == 0) { 439 if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, file_) < 1) { 440 return false; 441 } 442 if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) { 443 return false; 444 } 445 } 446 if (fflush(file_) != 0) { 447 return false; 448 } 449 file_path_.clear(); 450 return true; 451 } 452