• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/ninja/ninja_log_parser.h"
18 #include "perfetto/ext/base/string_splitter.h"
19 #include "perfetto/ext/base/string_utils.h"
20 #include "src/trace_processor/importers/common/process_tracker.h"
21 #include "src/trace_processor/importers/common/slice_tracker.h"
22 #include "src/trace_processor/importers/common/track_tracker.h"
23 #include "src/trace_processor/storage/trace_storage.h"
24 #include "src/trace_processor/trace_sorter.h"
25 
26 namespace perfetto {
27 namespace trace_processor {
28 
29 using base::StringSplitter;
30 
NinjaLogParser(TraceProcessorContext * ctx)31 NinjaLogParser::NinjaLogParser(TraceProcessorContext* ctx) : ctx_(ctx) {}
32 NinjaLogParser::~NinjaLogParser() = default;
33 
Parse(std::unique_ptr<uint8_t[]> buf,size_t len)34 util::Status NinjaLogParser::Parse(std::unique_ptr<uint8_t[]> buf, size_t len) {
35   // A trace is read in chunks of arbitrary size (for http fetch() pipeliniing),
36   // not necessarily aligned on a line boundary.
37   // Here we push everything into a vector and, on each call, consume only
38   // the leading part until the last \n, keeping the rest for the next call.
39   const char* src = reinterpret_cast<const char*>(&buf[0]);
40   log_.insert(log_.end(), src, src + len);
41 
42   // Find the last \n.
43   size_t valid_size = log_.size();
44   for (; valid_size > 0 && log_[valid_size - 1] != '\n'; --valid_size) {
45   }
46 
47   for (StringSplitter line(log_.data(), valid_size, '\n'); line.Next();) {
48     static const char kHeader[] = "# ninja log v";
49     if (!header_parsed_) {
50       if (!base::StartsWith(line.cur_token(), kHeader))
51         return util::ErrStatus("Failed to parse ninja log header");
52       header_parsed_ = true;
53       auto version = base::CStringToUInt32(line.cur_token() + strlen(kHeader));
54       if (!version || *version != 5)
55         return util::ErrStatus("Unsupported ninja log version");
56       continue;
57     }
58 
59     // Each line in the ninja log looks like this:
60     // 4 12  1579224178  ui/assets/modal.scss  832a958a9e234dfa
61     // Where:
62     // - [4, 12] are the timestamps in ms of [start, end] of the job, measured
63     //     from the beginning of the build.
64     // - 1579224178 is the "restat" (ignored).
65     // - ui/assets/modal.scss is the name of the output file being built.
66     // - 832a958a9e234dfa is a hash of the compiler invocation.
67     // In most cases, each hash should be unique per ninja invocation (because
68     // two rules shouln't generate the same output). However, in rare
69     // circumstances the same hash can show up more than once. Examples:
70     // - A GN action generates > 1 output per invocation (e.g., protos). In this
71     //   case all items will have the same [start, end] timestamp. In this case
72     //   we want to merge all the output names into one build step, because from
73     //   the build system viewpoint, that was the same compiler/tool invocation.
74     // - A subtle script that generates different outputs without taking a
75     //   --output=filename argument (e.g. via env vars or similar). Note that
76     //   this happens in the perfetto codebase itself (goto.google.com/nigew).
77     //   In this case we want to treat the two entries as two distinct jobs.
78     //
79     // In summary the deduping logic here is: if both the hash and the
80     // timestamps match -> merge, if not, keep distinct.
81     StringSplitter tok(&line, '\t');
82     auto t_start = base::CStringToInt64(tok.Next() ? tok.cur_token() : "");
83     auto t_end = base::CStringToInt64(tok.Next() ? tok.cur_token() : "");
84     tok.Next();  // Ignore restat.
85     const char* name = tok.Next() ? tok.cur_token() : nullptr;
86     auto cmdhash = base::CStringToUInt64(tok.Next() ? tok.cur_token() : "", 16);
87 
88     if (!t_start || !t_end || !name || !cmdhash) {
89       ctx_->storage->IncrementStats(stats::ninja_parse_errors);
90       continue;
91     }
92 
93     // The same log file can contain timestamps for different builds. The only
94     // way we can tell when a new build starts is by detecting the end timestamp
95     // breaking monotonicity.
96     if (last_end_seen_ == 0 || *t_end < last_end_seen_) {
97       // Create a new "process" for each build. In the UI this causes each build
98       // to be nested under a track group. |cur_build_id_| is the fake pid
99       // of the synthesized process.
100       ++cur_build_id_;
101       StringId name_id = ctx_->storage->InternString("Build");
102       ctx_->process_tracker->SetProcessNameIfUnset(
103           ctx_->process_tracker->GetOrCreateProcess(cur_build_id_), name_id);
104     }
105     last_end_seen_ = *t_end;
106 
107     // If more hashes show up back-to-back with the same timestamps, merge them
108     // together as they identify multiple outputs for the same build rule.
109     if (!jobs_.empty() && *cmdhash == jobs_.back().hash &&
110         *t_start == jobs_.back().start_ms && *t_end == jobs_.back().end_ms) {
111       jobs_.back().names.append(" ");
112       jobs_.back().names.append(name);
113       continue;
114     }
115 
116     jobs_.emplace_back(cur_build_id_, *t_start, *t_end, *cmdhash, name);
117   }
118   log_.erase(log_.begin(), log_.begin() + static_cast<ssize_t>(valid_size));
119   return util::OkStatus();
120 }
121 
122 // This is called after the last Parase() call. At this point all |jobs_| have
123 // been populated.
NotifyEndOfFile()124 void NinjaLogParser::NotifyEndOfFile() {
125   std::sort(jobs_.begin(), jobs_.end(),
126             [](const Job& x, const Job& y) { return x.start_ms < y.start_ms; });
127 
128   // Now we need to work out the job parallelism. There's no direct indication
129   // of that in the ninja logs, so it must be inferred by observing overlapping
130   // of timestamps. In this context a "Worker" is an inferred sequence of jobs
131   // that happened concurrently with other sequences.
132   // Here we pack jobs according the following heuristic, for the sake of making
133   // the graph nicer to read to humans. Consider the initial situation:
134   // 1: [  job 1 ]
135   // 2:   [   job 2   ]
136   // 3: [   job 3   ]
137   //    T=0              | T=6
138   // Assume that a new job starts at T=6. It's very likely that job4 was started
139   // as a consequence of job2 completion (othewise it could have been started
140   // earlier, soon after job 1 or Job 3). It seems to make more sense to draw
141   // it next in the 2nd worker, i.e. next to job 2.
142   struct Worker {
143     int64_t busy_until;
144     TrackId track_id;
145   };
146   std::map<uint32_t /*build_id*/, std::vector<Worker>> workers_by_build;
147 
148   // Assign thread ids to worker without conflicting with builds' process ids
149   // (to avoid main-thread auto-mapping).s
150   uint32_t last_worker_id = cur_build_id_;
151 
152   for (const auto& job : jobs_) {
153     Worker* worker = nullptr;
154     auto& workers = workers_by_build[job.build_id];
155     for (Worker& cur : workers) {
156       // Pick the worker which has the greatest end time (busy_until) <= the
157       // job's start time.
158       if (cur.busy_until <= job.start_ms) {
159         if (!worker || cur.busy_until > worker->busy_until)
160           worker = &cur;
161       }
162     }
163     if (worker) {
164       // Update the worker's end time with the newly assigned job.
165       worker->busy_until = job.end_ms;
166     } else {
167       // All workers are busy, allocate a new one.
168       uint32_t worker_id = ++last_worker_id;
169       char name[32];
170       snprintf(name, sizeof(name), "Worker %zu", workers.size() + 1);
171       StringId name_id = ctx_->storage->InternString(name);
172       auto utid = ctx_->process_tracker->UpdateThread(worker_id, job.build_id);
173       ctx_->process_tracker->SetThreadNameIfUnset(utid, name_id);
174       TrackId track_id = ctx_->track_tracker->InternThreadTrack(utid);
175       workers.emplace_back(Worker{/*busy_until=*/job.end_ms, track_id});
176       worker = &workers.back();
177     }
178 
179     static constexpr int64_t kMsToNs = 1000 * 1000;
180     const int64_t start_ns = job.start_ms * kMsToNs;
181     const int64_t dur_ns = (job.end_ms - job.start_ms) * kMsToNs;
182     StringId name_id = ctx_->storage->InternString(base::StringView(job.names));
183     ctx_->slice_tracker->Scoped(start_ns, worker->track_id, StringId::Null(),
184                                 name_id, dur_ns);
185   }
186 }
187 
188 }  // namespace trace_processor
189 }  // namespace perfetto
190