1 /*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "tools/trace_to_text/trace_to_hprof.h"
18
19 #include <algorithm>
20 #include <limits>
21 #include <string>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <vector>
25
26 #include "perfetto/base/logging.h"
27 #include "perfetto/ext/base/endian.h"
28 #include "perfetto/ext/base/optional.h"
29 #include "perfetto/ext/base/string_utils.h"
30 #include "tools/trace_to_text/utils.h"
31
32 // Spec
33 // http://hg.openjdk.java.net/jdk6/jdk6/jdk/raw-file/tip/src/share/demo/jvmti/hprof/manual.html#Basic_Type
34 // Parser
35 // https://cs.android.com/android/platform/superproject/+/master:art/tools/ahat/src/main/com/android/ahat/heapdump/Parser.java
36
37 namespace perfetto {
38 namespace trace_to_text {
39
40 namespace {
41 constexpr char kHeader[] = "PERFETTO_JAVA_HEAP";
42 constexpr uint32_t kIdSz = 8;
43 constexpr uint32_t kStackTraceSerialNumber = 1;
44
45 class BigEndianBuffer {
46 public:
WriteId(uint64_t val)47 void WriteId(uint64_t val) { WriteU8(val); }
48
WriteU8(uint64_t val)49 void WriteU8(uint64_t val) {
50 val = base::HostToBE64(val);
51 Write(reinterpret_cast<char*>(&val), sizeof(uint64_t));
52 }
53
WriteU4(uint32_t val)54 void WriteU4(uint32_t val) {
55 val = base::HostToBE32(val);
56 Write(reinterpret_cast<char*>(&val), sizeof(uint32_t));
57 }
58
SetU4(uint32_t val,size_t pos)59 void SetU4(uint32_t val, size_t pos) {
60 val = base::HostToBE32(val);
61 PERFETTO_CHECK(pos + 4 <= buf_.size());
62 memcpy(buf_.data() + pos, &val, sizeof(uint32_t));
63 }
64
65 // Uncomment when needed
66 // void WriteU2(uint16_t val) {
67 // val = base::HostToBE16(val);
68 // Write(reinterpret_cast<char*>(&val), sizeof(uint16_t));
69 // }
70
WriteByte(uint8_t val)71 void WriteByte(uint8_t val) { buf_.emplace_back(val); }
72
Write(const char * val,uint32_t sz)73 void Write(const char* val, uint32_t sz) {
74 const char* end = val + sz;
75 while (val < end) {
76 WriteByte(static_cast<uint8_t>(*val));
77 val++;
78 }
79 }
80
written() const81 size_t written() const { return buf_.size(); }
82
Flush(std::ostream * out) const83 void Flush(std::ostream* out) const {
84 out->write(buf_.data(), static_cast<std::streamsize>(buf_.size()));
85 }
86
87 private:
88 std::vector<char> buf_;
89 };
90
91 class HprofWriter {
92 public:
HprofWriter(std::ostream * output)93 HprofWriter(std::ostream* output) : output_(output) {}
94
WriteBuffer(const BigEndianBuffer & buf)95 void WriteBuffer(const BigEndianBuffer& buf) { buf.Flush(output_); }
96
WriteRecord(const uint8_t type,const std::function<void (BigEndianBuffer *)> && writer)97 void WriteRecord(const uint8_t type,
98 const std::function<void(BigEndianBuffer*)>&& writer) {
99 BigEndianBuffer buf;
100 buf.WriteByte(type);
101 // ts offset
102 buf.WriteU4(0);
103 // size placeholder
104 buf.WriteU4(0);
105 writer(&buf);
106 uint32_t record_sz = static_cast<uint32_t>(buf.written() - 9);
107 buf.SetU4(record_sz, 5);
108 WriteBuffer(buf);
109 }
110
111 private:
112 std::ostream* output_;
113 };
114
115 // A Class from the heap dump.
116 class ClassData {
117 public:
ClassData(uint64_t class_name_string_id)118 explicit ClassData(uint64_t class_name_string_id)
119 : class_name_string_id_(class_name_string_id) {}
120
121 // Writes a HPROF LOAD_CLASS record for this Class
WriteHprofLoadClass(HprofWriter * writer,uint64_t class_object_id,uint32_t class_serial_number) const122 void WriteHprofLoadClass(HprofWriter* writer,
123 uint64_t class_object_id,
124 uint32_t class_serial_number) const {
125 writer->WriteRecord(0x02, [class_object_id, class_serial_number,
126 this](BigEndianBuffer* buf) {
127 buf->WriteU4(class_serial_number);
128 buf->WriteId(class_object_id);
129 buf->WriteU4(kStackTraceSerialNumber);
130 buf->WriteId(class_name_string_id_);
131 });
132 }
133
134 private:
135 uint64_t class_name_string_id_;
136 };
137
138 // Ingested data from a Java Heap Profile for a name, location pair.
139 // We need to support multiple class datas per pair as name, location is
140 // not unique. Classloader should guarantee uniqueness but is not available
141 // until S.
142 class RawClassData {
143 public:
AddClass(uint64_t id,base::Optional<uint64_t> superclass_id)144 void AddClass(uint64_t id, base::Optional<uint64_t> superclass_id) {
145 ids_.push_back(std::make_pair(id, superclass_id));
146 }
147
AddTemplate(uint64_t template_id)148 void AddTemplate(uint64_t template_id) {
149 template_ids_.push_back(template_id);
150 }
151
152 // Transforms the raw data into one or more ClassData and adds them to the
153 // parameter map.
ToClassData(std::unordered_map<uint64_t,ClassData> * id_to_class,uint64_t class_name_string_id) const154 void ToClassData(std::unordered_map<uint64_t, ClassData>* id_to_class,
155 uint64_t class_name_string_id) const {
156 // TODO(dinoderek) assert the two vectors have same length, iterate on both
157 for (auto it_ids = ids_.begin(); it_ids != ids_.end(); ++it_ids) {
158 // TODO(dinoderek) more data will be needed to write CLASS_DUMP
159 id_to_class->emplace(it_ids->first, ClassData(class_name_string_id));
160 }
161 }
162
163 private:
164 // Pair contains class ID and super class ID.
165 std::vector<std::pair<uint64_t, base::Optional<uint64_t>>> ids_;
166 // Class id of the template
167 std::vector<uint64_t> template_ids_;
168 };
169
170 // The Heap Dump data
171 class HeapDump {
172 public:
HeapDump(trace_processor::TraceProcessor * tp)173 explicit HeapDump(trace_processor::TraceProcessor* tp) : tp_(tp) {}
174
Ingest()175 void Ingest() { IngestClasses(); }
176
Write(HprofWriter * writer)177 void Write(HprofWriter* writer) {
178 WriteStrings(writer);
179 WriteLoadClass(writer);
180 }
181
182 private:
183 trace_processor::TraceProcessor* tp_;
184
185 // String IDs start from 1 as 0 appears to be reserved.
186 uint64_t next_string_id_ = 1;
187 // Strings to corresponding String ID
188 std::unordered_map<std::string, uint64_t> string_to_id_;
189 // Type ID to corresponding Class
190 std::unordered_map<uint64_t, ClassData> id_to_class_;
191
192 // Ingests and processes the class data from the heap dump.
IngestClasses()193 void IngestClasses() {
194 // TODO(dinoderek): heap_graph_class does not support pid or ts filtering
195
196 std::map<std::pair<uint64_t, std::string>, RawClassData> raw_classes;
197
198 auto it = tp_->ExecuteQuery(R"(SELECT
199 id,
200 IFNULL(deobfuscated_name, name),
201 superclass_id,
202 location
203 FROM heap_graph_class )");
204
205 while (it.Next()) {
206 uint64_t id = static_cast<uint64_t>(it.Get(0).AsLong());
207
208 std::string raw_dname(it.Get(1).AsString());
209 std::string dname;
210 bool is_template_class =
211 base::StartsWith(raw_dname, std::string("java.lang.Class<"));
212 if (is_template_class) {
213 dname = raw_dname.substr(17, raw_dname.size() - 18);
214 } else {
215 dname = raw_dname;
216 }
217 uint64_t name_id = IngestString(dname);
218
219 auto raw_super_id = it.Get(2);
220 base::Optional<uint64_t> maybe_super_id =
221 raw_super_id.is_null()
222 ? base::nullopt
223 : base::Optional<uint64_t>(
224 static_cast<uint64_t>(raw_super_id.AsLong()));
225
226 std::string location(it.Get(3).AsString());
227
228 auto raw_classes_it =
229 raw_classes.emplace(std::make_pair(name_id, location), RawClassData())
230 .first;
231 if (is_template_class) {
232 raw_classes_it->second.AddTemplate(id);
233 } else {
234 raw_classes_it->second.AddClass(id, maybe_super_id);
235 }
236 }
237
238 for (const auto& raw : raw_classes) {
239 auto class_name_string_id = raw.first.first;
240 raw.second.ToClassData(&id_to_class_, class_name_string_id);
241 }
242 }
243
244 // Ingests the parameter string and returns the HPROF ID for the string.
IngestString(const std::string & s)245 uint64_t IngestString(const std::string& s) {
246 auto maybe_id = string_to_id_.find(s);
247 if (maybe_id != string_to_id_.end()) {
248 return maybe_id->second;
249 } else {
250 auto id = next_string_id_;
251 next_string_id_ += 1;
252 string_to_id_[s] = id;
253 return id;
254 }
255 }
256
257 // Writes STRING sections to the output
WriteStrings(HprofWriter * writer)258 void WriteStrings(HprofWriter* writer) {
259 for (const auto& it : string_to_id_) {
260 writer->WriteRecord(0x01, [it](BigEndianBuffer* buf) {
261 buf->WriteId(it.second);
262 // TODO(dinoderek): UTF-8 encoding
263 buf->Write(it.first.c_str(), static_cast<uint32_t>(it.first.length()));
264 });
265 }
266 }
267
268 // Writes LOAD CLASS sections to the output
WriteLoadClass(HprofWriter * writer)269 void WriteLoadClass(HprofWriter* writer) {
270 uint32_t class_serial_number = 1;
271 for (const auto& it : id_to_class_) {
272 it.second.WriteHprofLoadClass(writer, it.first, class_serial_number);
273 class_serial_number += 1;
274 }
275 }
276 };
277
WriteHeaderAndStack(HprofWriter * writer)278 void WriteHeaderAndStack(HprofWriter* writer) {
279 BigEndianBuffer header;
280 header.Write(kHeader, sizeof(kHeader));
281 // Identifier size
282 header.WriteU4(kIdSz);
283 // walltime high (unused)
284 header.WriteU4(0);
285 // walltime low (unused)
286 header.WriteU4(0);
287 writer->WriteBuffer(header);
288
289 // Add placeholder stack trace (required by the format).
290 writer->WriteRecord(0x05, [](BigEndianBuffer* buf) {
291 buf->WriteU4(kStackTraceSerialNumber);
292 buf->WriteU4(0);
293 buf->WriteU4(0);
294 });
295 }
296 } // namespace
297
TraceToHprof(trace_processor::TraceProcessor * tp,std::ostream * output,uint64_t pid,uint64_t ts)298 int TraceToHprof(trace_processor::TraceProcessor* tp,
299 std::ostream* output,
300 uint64_t pid,
301 uint64_t ts) {
302 PERFETTO_DCHECK(tp != nullptr && pid != 0 && ts != 0);
303
304 HprofWriter writer(output);
305 HeapDump dump(tp);
306
307 dump.Ingest();
308 WriteHeaderAndStack(&writer);
309 dump.Write(&writer);
310
311 return 0;
312 }
313
TraceToHprof(std::istream * input,std::ostream * output,uint64_t pid,std::vector<uint64_t> timestamps)314 int TraceToHprof(std::istream* input,
315 std::ostream* output,
316 uint64_t pid,
317 std::vector<uint64_t> timestamps) {
318 // TODO: Simplify this for cmdline users. For example, if there is a single
319 // heap graph, use this, and only fail when there is ambiguity.
320 if (pid == 0) {
321 PERFETTO_ELOG("Must specify pid");
322 return -1;
323 }
324 if (timestamps.size() != 1) {
325 PERFETTO_ELOG("Must specify single timestamp");
326 return -1;
327 }
328 trace_processor::Config config;
329 std::unique_ptr<trace_processor::TraceProcessor> tp =
330 trace_processor::TraceProcessor::CreateInstance(config);
331 if (!ReadTrace(tp.get(), input))
332 return false;
333 tp->NotifyEndOfFile();
334 return TraceToHprof(tp.get(), output, pid, timestamps[0]);
335 }
336
337 } // namespace trace_to_text
338 } // namespace perfetto
339