• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ETMDecoder.h"
18 
19 #include <sstream>
20 
21 #include <android-base/expected.h>
22 #include <android-base/logging.h>
23 #include <android-base/strings.h>
24 #include <llvm/Support/MemoryBuffer.h>
25 #include <opencsd.h>
26 
27 namespace simpleperf {
28 namespace {
29 
30 class DecoderLogStr : public ocsdMsgLogStrOutI {
31  public:
printOutStr(const std::string & out_str)32   void printOutStr(const std::string& out_str) override { LOG(INFO) << out_str; }
33 };
34 
35 class DecodeErrorLogger : public ocsdDefaultErrorLogger {
36  public:
DecodeErrorLogger(const std::function<void (const ocsdError &)> & error_callback)37   DecodeErrorLogger(const std::function<void(const ocsdError&)>& error_callback)
38       : error_callback_(error_callback) {
39     initErrorLogger(OCSD_ERR_SEV_INFO, false);
40     msg_logger_.setLogOpts(ocsdMsgLogger::OUT_STR_CB);
41     msg_logger_.setStrOutFn(&log_str_);
42     setOutputLogger(&msg_logger_);
43   }
44 
LogError(const ocsd_hndl_err_log_t handle,const ocsdError * error)45   void LogError(const ocsd_hndl_err_log_t handle, const ocsdError* error) override {
46     ocsdDefaultErrorLogger::LogError(handle, error);
47     if (error != nullptr) {
48       error_callback_(*error);
49     }
50   }
51 
52  private:
53   std::function<void(const ocsdError&)> error_callback_;
54   DecoderLogStr log_str_;
55   ocsdMsgLogger msg_logger_;
56 };
57 
IsRespError(ocsd_datapath_resp_t resp)58 static bool IsRespError(ocsd_datapath_resp_t resp) {
59   return resp >= OCSD_RESP_ERR_CONT;
60 }
61 
62 // Used instead of DecodeTree in OpenCSD to avoid linking decoders not for ETMV4 instruction tracing
63 // in OpenCSD.
64 class ETMV4IDecodeTree {
65  public:
ETMV4IDecodeTree()66   ETMV4IDecodeTree()
67       : error_logger_(std::bind(&ETMV4IDecodeTree::ProcessError, this, std::placeholders::_1)) {
68     frame_decoder_.Configure(OCSD_DFRMTR_FRAME_MEM_ALIGN);
69     frame_decoder_.getErrLogAttachPt()->attach(&error_logger_);
70   }
71 
CreateDecoder(const EtmV4Config & config)72   bool CreateDecoder(const EtmV4Config& config) {
73     uint8_t trace_id = config.getTraceID();
74     auto packet_decoder = std::make_unique<TrcPktProcEtmV4I>(trace_id);
75     packet_decoder->setProtocolConfig(&config);
76     packet_decoder->getErrorLogAttachPt()->replace_first(&error_logger_);
77     frame_decoder_.getIDStreamAttachPt(trace_id)->attach(packet_decoder.get());
78     auto result = packet_decoders_.emplace(trace_id, packet_decoder.release());
79     if (!result.second) {
80       LOG(ERROR) << "trace id " << trace_id << " has been used";
81     }
82     return result.second;
83   }
84 
AttachPacketSink(uint8_t trace_id,IPktDataIn<EtmV4ITrcPacket> & packet_sink)85   void AttachPacketSink(uint8_t trace_id, IPktDataIn<EtmV4ITrcPacket>& packet_sink) {
86     auto& packet_decoder = packet_decoders_[trace_id];
87     CHECK(packet_decoder);
88     packet_decoder->getPacketOutAttachPt()->replace_first(&packet_sink);
89   }
90 
AttachPacketMonitor(uint8_t trace_id,IPktRawDataMon<EtmV4ITrcPacket> & packet_monitor)91   void AttachPacketMonitor(uint8_t trace_id, IPktRawDataMon<EtmV4ITrcPacket>& packet_monitor) {
92     auto& packet_decoder = packet_decoders_[trace_id];
93     CHECK(packet_decoder);
94     packet_decoder->getRawPacketMonAttachPt()->replace_first(&packet_monitor);
95   }
96 
AttachRawFramePrinter(RawFramePrinter & frame_printer)97   void AttachRawFramePrinter(RawFramePrinter& frame_printer) {
98     frame_decoder_.Configure(frame_decoder_.getConfigFlags() | OCSD_DFRMTR_PACKED_RAW_OUT);
99     frame_decoder_.getTrcRawFrameAttachPt()->replace_first(&frame_printer);
100   }
101 
GetDataIn()102   ITrcDataIn& GetDataIn() { return frame_decoder_; }
103 
ProcessError(const ocsdError & error)104   void ProcessError(const ocsdError& error) {
105     if (error.getErrorCode() == OCSD_ERR_INVALID_PCKT_HDR) {
106       // Found an invalid packet header, following packets for this trace id may also be invalid.
107       // So reset the decoder to find I_ASYNC packet in the data stream.
108       if (auto it = packet_decoders_.find(error.getErrorChanID()); it != packet_decoders_.end()) {
109         auto& packet_decoder = it->second;
110         CHECK(packet_decoder);
111         packet_decoder->TraceDataIn(OCSD_OP_RESET, error.getErrorIndex(), 0, nullptr, nullptr);
112       }
113     }
114   }
115 
ErrorLogger()116   DecodeErrorLogger& ErrorLogger() { return error_logger_; }
117 
118  private:
119   DecodeErrorLogger error_logger_;
120   TraceFormatterFrameDecoder frame_decoder_;
121   std::unordered_map<uint8_t, std::unique_ptr<TrcPktProcEtmV4I>> packet_decoders_;
122 };
123 
124 // Similar to IPktDataIn<EtmV4ITrcPacket>, but add trace id.
125 struct PacketCallback {
126   // packet callbacks are called in priority order.
127   enum Priority {
128     MAP_LOCATOR,
129     BRANCH_LIST_PARSER,
130     PACKET_TO_ELEMENT,
131   };
132 
PacketCallbacksimpleperf::__anon57c802930111::PacketCallback133   PacketCallback(Priority prio) : priority(prio) {}
~PacketCallbacksimpleperf::__anon57c802930111::PacketCallback134   virtual ~PacketCallback() {}
135   virtual ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op,
136                                              ocsd_trc_index_t index_sop,
137                                              const EtmV4ITrcPacket* pkt) = 0;
138   const Priority priority;
139 };
140 
141 // Receives packets from a packet decoder in OpenCSD library.
142 class PacketSink : public IPktDataIn<EtmV4ITrcPacket> {
143  public:
PacketSink(uint8_t trace_id)144   PacketSink(uint8_t trace_id) : trace_id_(trace_id) {}
145 
AddCallback(PacketCallback * callback)146   void AddCallback(PacketCallback* callback) {
147     auto it = std::lower_bound(callbacks_.begin(), callbacks_.end(), callback,
148                                [](const PacketCallback* c1, const PacketCallback* c2) {
149                                  return c1->priority < c2->priority;
150                                });
151     callbacks_.insert(it, callback);
152   }
153 
PacketDataIn(ocsd_datapath_op_t op,ocsd_trc_index_t index_sop,const EtmV4ITrcPacket * pkt)154   ocsd_datapath_resp_t PacketDataIn(ocsd_datapath_op_t op, ocsd_trc_index_t index_sop,
155                                     const EtmV4ITrcPacket* pkt) override {
156     for (auto& callback : callbacks_) {
157       auto resp = callback->ProcessPacket(trace_id_, op, index_sop, pkt);
158       if (IsRespError(resp)) {
159         return resp;
160       }
161     }
162     return OCSD_RESP_CONT;
163   }
164 
165  private:
166   uint8_t trace_id_;
167   std::vector<PacketCallback*> callbacks_;
168 };
169 
170 // For each trace_id, when given an addr, find the thread and map it belongs to.
171 class MapLocator : public PacketCallback {
172  public:
MapLocator(ThreadTree & thread_tree)173   MapLocator(ThreadTree& thread_tree)
174       : PacketCallback(PacketCallback::MAP_LOCATOR), thread_tree_(thread_tree) {}
175 
GetThreadTree()176   ThreadTree& GetThreadTree() { return thread_tree_; }
177 
178   // Return current thread id of a trace_id. If not available, return -1.
GetTid(uint8_t trace_id) const179   pid_t GetTid(uint8_t trace_id) const { return trace_data_[trace_id].tid; }
180 
ProcessPacket(uint8_t trace_id,ocsd_datapath_op_t op,ocsd_trc_index_t index_sop,const EtmV4ITrcPacket * pkt)181   ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op,
182                                      ocsd_trc_index_t index_sop,
183                                      const EtmV4ITrcPacket* pkt) override {
184     TraceData& data = trace_data_[trace_id];
185     if (op == OCSD_OP_DATA) {
186       if (pkt != nullptr && pkt->getContext().updated_c) {
187         int32_t new_tid = static_cast<int32_t>(pkt->getContext().ctxtID);
188         if (data.tid != new_tid) {
189           data.tid = new_tid;
190           data.thread = nullptr;
191           data.userspace_map = nullptr;
192         }
193       }
194     } else if (op == OCSD_OP_RESET) {
195       data.tid = -1;
196       data.thread = nullptr;
197       data.userspace_map = nullptr;
198     }
199     return OCSD_RESP_CONT;
200   }
201 
FindMap(uint8_t trace_id,uint64_t addr)202   const MapEntry* FindMap(uint8_t trace_id, uint64_t addr) {
203     TraceData& data = trace_data_[trace_id];
204     if (data.userspace_map != nullptr && data.userspace_map->Contains(addr)) {
205       return data.userspace_map;
206     }
207     if (data.tid == -1) {
208       return nullptr;
209     }
210     if (data.thread == nullptr) {
211       data.thread = thread_tree_.FindThread(data.tid);
212       if (data.thread == nullptr) {
213         return nullptr;
214       }
215     }
216     data.userspace_map = data.thread->maps->FindMapByAddr(addr);
217     if (data.userspace_map != nullptr) {
218       return data.userspace_map;
219     }
220     // We don't cache kernel map. Because kernel map can start from 0 and overlap all userspace
221     // maps.
222     return thread_tree_.GetKernelMaps().FindMapByAddr(addr);
223   }
224 
225  private:
226   struct TraceData {
227     int32_t tid = -1;  // thread id, -1 if invalid
228     const ThreadEntry* thread = nullptr;
229     const MapEntry* userspace_map = nullptr;
230   };
231 
232   ThreadTree& thread_tree_;
233   TraceData trace_data_[256];
234 };
235 
236 // Map (trace_id, ip address) to (binary_path, binary_offset), and read binary files.
237 class MemAccess : public ITargetMemAccess {
238  public:
MemAccess(MapLocator & map_locator)239   MemAccess(MapLocator& map_locator) : map_locator_(map_locator) {}
240 
ReadTargetMemory(const ocsd_vaddr_t address,uint8_t trace_id,ocsd_mem_space_acc_t,uint32_t * num_bytes,uint8_t * p_buffer)241   ocsd_err_t ReadTargetMemory(const ocsd_vaddr_t address, uint8_t trace_id, ocsd_mem_space_acc_t,
242                               uint32_t* num_bytes, uint8_t* p_buffer) override {
243     TraceData& data = trace_data_[trace_id];
244     const MapEntry* map = map_locator_.FindMap(trace_id, address);
245     // fast path
246     if (map != nullptr && map == data.buffer_map && address >= data.buffer_start &&
247         address + *num_bytes <= data.buffer_end) {
248       if (data.buffer == nullptr) {
249         *num_bytes = 0;
250       } else {
251         memcpy(p_buffer, data.buffer + (address - data.buffer_start), *num_bytes);
252       }
253       return OCSD_OK;
254     }
255 
256     // slow path
257     size_t copy_size = 0;
258     if (map != nullptr) {
259       llvm::MemoryBuffer* memory = GetMemoryBuffer(map->dso);
260       if (memory != nullptr) {
261         if (auto opt_offset = map->dso->IpToFileOffset(address, map->start_addr, map->pgoff);
262             opt_offset) {
263           uint64_t offset = opt_offset.value();
264           size_t file_size = memory->getBufferSize();
265           copy_size = file_size > offset ? std::min<size_t>(file_size - offset, *num_bytes) : 0;
266           if (copy_size > 0) {
267             memcpy(p_buffer, memory->getBufferStart() + offset, copy_size);
268           }
269         }
270       }
271       // Update the last buffer cache.
272       // Don't cache for the kernel map. Because simpleperf doesn't record an accurate kernel end
273       // addr.
274       if (!map->in_kernel) {
275         data.buffer_map = map;
276         data.buffer = memory == nullptr ? nullptr : (memory->getBufferStart() + map->pgoff);
277         data.buffer_start = map->start_addr;
278         data.buffer_end = map->get_end_addr();
279       }
280     }
281     *num_bytes = copy_size;
282     return OCSD_OK;
283   }
284 
285  private:
GetMemoryBuffer(Dso * dso)286   llvm::MemoryBuffer* GetMemoryBuffer(Dso* dso) {
287     auto it = elf_map_.find(dso);
288     if (it == elf_map_.end()) {
289       ElfStatus status;
290       auto res = elf_map_.emplace(dso, ElfFile::Open(dso->GetDebugFilePath(), &status));
291       it = res.first;
292     }
293     return it->second ? it->second->GetMemoryBuffer() : nullptr;
294   }
295 
296   struct TraceData {
297     const MapEntry* buffer_map = nullptr;
298     const char* buffer = nullptr;
299     uint64_t buffer_start = 0;
300     uint64_t buffer_end = 0;
301   };
302 
303   MapLocator& map_locator_;
304   std::unordered_map<Dso*, std::unique_ptr<ElfFile>> elf_map_;
305   TraceData trace_data_[256];
306 };
307 
308 class InstructionDecoder : public TrcIDecode {
309  public:
DecodeInstruction(ocsd_instr_info * instr_info)310   ocsd_err_t DecodeInstruction(ocsd_instr_info* instr_info) {
311     this->instr_info = instr_info;
312     return TrcIDecode::DecodeInstruction(instr_info);
313   }
314 
315   ocsd_instr_info* instr_info;
316 };
317 
318 // Similar to ITrcGenElemIn, but add next instruction info, which is needed to get branch to addr
319 // for an InstructionRange element.
320 struct ElementCallback {
321  public:
~ElementCallbacksimpleperf::__anon57c802930111::ElementCallback322   virtual ~ElementCallback(){};
323   virtual ocsd_datapath_resp_t ProcessElement(ocsd_trc_index_t index_sop, uint8_t trace_id,
324                                               const OcsdTraceElement& elem,
325                                               const ocsd_instr_info* next_instr) = 0;
326 };
327 
328 // Decode packets into elements.
329 class PacketToElement : public PacketCallback, public ITrcGenElemIn {
330  public:
PacketToElement(MapLocator & map_locator,const std::unordered_map<uint8_t,EtmV4Config> & configs,DecodeErrorLogger & error_logger)331   PacketToElement(MapLocator& map_locator, const std::unordered_map<uint8_t, EtmV4Config>& configs,
332                   DecodeErrorLogger& error_logger)
333       : PacketCallback(PacketCallback::PACKET_TO_ELEMENT), mem_access_(map_locator) {
334     for (auto& p : configs) {
335       uint8_t trace_id = p.first;
336       const EtmV4Config& config = p.second;
337       element_decoders_.emplace(trace_id, trace_id);
338       auto& decoder = element_decoders_[trace_id];
339       decoder.setProtocolConfig(&config);
340       decoder.getErrorLogAttachPt()->replace_first(&error_logger);
341       decoder.getInstrDecodeAttachPt()->replace_first(&instruction_decoder_);
342       decoder.getMemoryAccessAttachPt()->replace_first(&mem_access_);
343       decoder.getTraceElemOutAttachPt()->replace_first(this);
344     }
345   }
346 
AddCallback(ElementCallback * callback)347   void AddCallback(ElementCallback* callback) { callbacks_.push_back(callback); }
348 
ProcessPacket(uint8_t trace_id,ocsd_datapath_op_t op,ocsd_trc_index_t index_sop,const EtmV4ITrcPacket * pkt)349   ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op,
350                                      ocsd_trc_index_t index_sop,
351                                      const EtmV4ITrcPacket* pkt) override {
352     return element_decoders_[trace_id].PacketDataIn(op, index_sop, pkt);
353   }
354 
TraceElemIn(const ocsd_trc_index_t index_sop,uint8_t trc_chan_id,const OcsdTraceElement & elem)355   ocsd_datapath_resp_t TraceElemIn(const ocsd_trc_index_t index_sop, uint8_t trc_chan_id,
356                                    const OcsdTraceElement& elem) override {
357     for (auto& callback : callbacks_) {
358       auto resp =
359           callback->ProcessElement(index_sop, trc_chan_id, elem, instruction_decoder_.instr_info);
360       if (IsRespError(resp)) {
361         return resp;
362       }
363     }
364     return OCSD_RESP_CONT;
365   }
366 
367  private:
368   // map from trace id of an etm device to its element decoder
369   std::unordered_map<uint8_t, TrcPktDecodeEtmV4I> element_decoders_;
370   MemAccess mem_access_;
371   InstructionDecoder instruction_decoder_;
372   std::vector<ElementCallback*> callbacks_;
373 };
374 
375 // Dump etm data generated at different stages.
376 class DataDumper : public ElementCallback {
377  public:
DataDumper(ETMV4IDecodeTree & decode_tree)378   DataDumper(ETMV4IDecodeTree& decode_tree) : decode_tree_(decode_tree) {}
379 
DumpRawData()380   void DumpRawData() {
381     decode_tree_.AttachRawFramePrinter(frame_printer_);
382     frame_printer_.setMessageLogger(&stdout_logger_);
383   }
384 
DumpPackets(const std::unordered_map<uint8_t,EtmV4Config> & configs)385   void DumpPackets(const std::unordered_map<uint8_t, EtmV4Config>& configs) {
386     for (auto& p : configs) {
387       uint8_t trace_id = p.first;
388       auto result = packet_printers_.emplace(trace_id, trace_id);
389       CHECK(result.second);
390       auto& packet_printer = result.first->second;
391       decode_tree_.AttachPacketMonitor(trace_id, packet_printer);
392       packet_printer.setMessageLogger(&stdout_logger_);
393     }
394   }
395 
DumpElements()396   void DumpElements() { element_printer_.setMessageLogger(&stdout_logger_); }
397 
ProcessElement(ocsd_trc_index_t index_sop,uint8_t trc_chan_id,const OcsdTraceElement & elem,const ocsd_instr_info *)398   ocsd_datapath_resp_t ProcessElement(ocsd_trc_index_t index_sop, uint8_t trc_chan_id,
399                                       const OcsdTraceElement& elem, const ocsd_instr_info*) {
400     return element_printer_.TraceElemIn(index_sop, trc_chan_id, elem);
401   }
402 
403  private:
404   ETMV4IDecodeTree& decode_tree_;
405   RawFramePrinter frame_printer_;
406   std::unordered_map<uint8_t, PacketPrinter<EtmV4ITrcPacket>> packet_printers_;
407   TrcGenericElementPrinter element_printer_;
408   ocsdMsgLogger stdout_logger_;
409 };
410 
411 // It decodes each ETMV4IPacket into TraceElements, and generates ETMInstrRanges from TraceElements.
412 // Decoding each packet is slow, but ensures correctness.
413 class InstrRangeParser : public ElementCallback {
414  private:
415   struct TraceData {
416     ETMInstrRange instr_range;
417     bool wait_for_branch_to_addr_fix = false;
418   };
419 
420  public:
InstrRangeParser(MapLocator & map_locator,const ETMDecoder::InstrRangeCallbackFn & callback)421   InstrRangeParser(MapLocator& map_locator, const ETMDecoder::InstrRangeCallbackFn& callback)
422       : map_locator_(map_locator), callback_(callback) {}
423 
ProcessElement(const ocsd_trc_index_t,uint8_t trace_id,const OcsdTraceElement & elem,const ocsd_instr_info * next_instr)424   ocsd_datapath_resp_t ProcessElement(const ocsd_trc_index_t, uint8_t trace_id,
425                                       const OcsdTraceElement& elem,
426                                       const ocsd_instr_info* next_instr) override {
427     if (elem.getType() == OCSD_GEN_TRC_ELEM_INSTR_RANGE) {
428       TraceData& data = trace_data_[trace_id];
429       const MapEntry* map = map_locator_.FindMap(trace_id, elem.st_addr);
430       if (map == nullptr) {
431         FlushData(data);
432         return OCSD_RESP_CONT;
433       }
434       uint64_t start_addr = map->GetVaddrInFile(elem.st_addr);
435       auto& instr_range = data.instr_range;
436 
437       if (data.wait_for_branch_to_addr_fix) {
438         // OpenCSD may cache a list of InstrRange elements, making it inaccurate to get branch to
439         // address from next_instr->branch_addr. So fix it by using the start address of the next
440         // InstrRange element.
441         instr_range.branch_to_addr = start_addr;
442       }
443       FlushData(data);
444       instr_range.dso = map->dso;
445       instr_range.start_addr = start_addr;
446       instr_range.end_addr = map->GetVaddrInFile(elem.en_addr - elem.last_instr_sz);
447       bool end_with_branch =
448           elem.last_i_type == OCSD_INSTR_BR || elem.last_i_type == OCSD_INSTR_BR_INDIRECT;
449       bool branch_taken = end_with_branch && elem.last_instr_exec;
450       if (elem.last_i_type == OCSD_INSTR_BR && branch_taken) {
451         // It is based on the assumption that we only do immediate branch inside a binary,
452         // which may not be true for all cases. TODO: http://b/151665001.
453         instr_range.branch_to_addr = map->GetVaddrInFile(next_instr->branch_addr);
454         data.wait_for_branch_to_addr_fix = true;
455       } else {
456         instr_range.branch_to_addr = 0;
457       }
458       instr_range.branch_taken_count = branch_taken ? 1 : 0;
459       instr_range.branch_not_taken_count = branch_taken ? 0 : 1;
460 
461     } else if (elem.getType() == OCSD_GEN_TRC_ELEM_TRACE_ON) {
462       // According to the ETM Specification, the Trace On element indicates a discontinuity in the
463       // instruction trace stream. So it cuts the connection between instr ranges.
464       FlushData(trace_data_[trace_id]);
465     }
466     return OCSD_RESP_CONT;
467   }
468 
FinishData()469   void FinishData() {
470     for (auto& pair : trace_data_) {
471       FlushData(pair.second);
472     }
473   }
474 
475  private:
FlushData(TraceData & data)476   void FlushData(TraceData& data) {
477     if (data.instr_range.dso != nullptr) {
478       callback_(data.instr_range);
479       data.instr_range.dso = nullptr;
480     }
481     data.wait_for_branch_to_addr_fix = false;
482   }
483 
484   MapLocator& map_locator_;
485   std::unordered_map<uint8_t, TraceData> trace_data_;
486   ETMDecoder::InstrRangeCallbackFn callback_;
487 };
488 
489 // It parses ETMBranchLists from ETMV4IPackets.
490 // It doesn't do element decoding and instruction decoding, thus is about 5 timers faster than
491 // InstrRangeParser. But some data will be lost when converting ETMBranchLists to InstrRanges:
492 //   1. InstrRanges described by Except packets (the last instructions executed before exeception,
493 //      about 2%?).
494 //   2. Branch to addresses of direct branch instructions across binaries.
495 class BranchListParser : public PacketCallback {
496  private:
497   struct TraceData {
498     uint64_t addr = 0;
499     uint8_t addr_valid_bits = 0;
500     uint8_t isa = 0;
501     bool invalid_branch = false;
502     ETMBranchList branch;
503   };
504 
505  public:
BranchListParser(MapLocator & map_locator,const ETMDecoder::BranchListCallbackFn & callback)506   BranchListParser(MapLocator& map_locator, const ETMDecoder::BranchListCallbackFn& callback)
507       : PacketCallback(BRANCH_LIST_PARSER), map_locator_(map_locator), callback_(callback) {}
508 
CheckConfigs(std::unordered_map<uint8_t,EtmV4Config> & configs)509   void CheckConfigs(std::unordered_map<uint8_t, EtmV4Config>& configs) {
510     // TODO: Current implementation doesn't support non-zero speculation length and return stack.
511     for (auto& p : configs) {
512       if (p.second.MaxSpecDepth() > 0) {
513         LOG(WARNING) << "branch list collection isn't accurate with non-zero speculation length";
514         break;
515       }
516     }
517     for (auto& p : configs) {
518       if (p.second.enabledRetStack()) {
519         LOG(WARNING) << "branch list collection will lose some data with return stack enabled";
520         break;
521       }
522     }
523   }
524 
IsAddrPacket(const EtmV4ITrcPacket * pkt)525   bool IsAddrPacket(const EtmV4ITrcPacket* pkt) {
526     return pkt->getType() >= ETM4_PKT_I_ADDR_CTXT_L_32IS0 &&
527            pkt->getType() <= ETM4_PKT_I_ADDR_L_64IS1;
528   }
529 
IsAtomPacket(const EtmV4ITrcPacket * pkt)530   bool IsAtomPacket(const EtmV4ITrcPacket* pkt) { return pkt->getAtom().num > 0; }
531 
ProcessPacket(uint8_t trace_id,ocsd_datapath_op_t op,ocsd_trc_index_t,const EtmV4ITrcPacket * pkt)532   ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op,
533                                      ocsd_trc_index_t /*index_sop */,
534                                      const EtmV4ITrcPacket* pkt) override {
535     TraceData& data = trace_data_[trace_id];
536     if (op == OCSD_OP_DATA) {
537       if (IsAddrPacket(pkt)) {
538         // Flush branch when seeing an Addr packet. Because it isn't correct to concatenate
539         // branches before and after an Addr packet.
540         FlushBranch(data);
541         data.addr = pkt->getAddrVal();
542         data.addr_valid_bits = pkt->v_addr.valid_bits;
543         data.isa = pkt->getAddrIS();
544       }
545 
546       if (IsAtomPacket(pkt)) {
547         // An atom packet contains a branch list. We may receive one or more atom packets in a row,
548         // and need to concatenate them.
549         ProcessAtomPacket(trace_id, data, pkt);
550       }
551 
552     } else {
553       // Flush branch when seeing a flush or reset operation.
554       FlushBranch(data);
555       if (op == OCSD_OP_RESET) {
556         data.addr = 0;
557         data.addr_valid_bits = 0;
558         data.isa = 0;
559         data.invalid_branch = false;
560       }
561     }
562     return OCSD_RESP_CONT;
563   }
564 
FinishData()565   void FinishData() {
566     for (auto& pair : trace_data_) {
567       FlushBranch(pair.second);
568     }
569   }
570 
571  private:
ProcessAtomPacket(uint8_t trace_id,TraceData & data,const EtmV4ITrcPacket * pkt)572   void ProcessAtomPacket(uint8_t trace_id, TraceData& data, const EtmV4ITrcPacket* pkt) {
573     if (data.invalid_branch) {
574       return;  // Skip atom packets when we think a branch list is invalid.
575     }
576     if (data.branch.branch.empty()) {
577       // This is the first atom packet in a branch list. Check if we have tid and addr info to
578       // parse it and the following atom packets. If not, mark the branch list as invalid.
579       if (map_locator_.GetTid(trace_id) == -1 || data.addr_valid_bits == 0) {
580         data.invalid_branch = true;
581         return;
582       }
583       const MapEntry* map = map_locator_.FindMap(trace_id, data.addr);
584       if (map == nullptr) {
585         data.invalid_branch = true;
586         return;
587       }
588       data.branch.dso = map->dso;
589       data.branch.addr = map->GetVaddrInFile(data.addr);
590       if (data.isa == 1) {  // thumb instruction, mark it in bit 0.
591         data.branch.addr |= 1;
592       }
593     }
594     uint32_t bits = pkt->atom.En_bits;
595     for (size_t i = 0; i < pkt->atom.num; i++) {
596       data.branch.branch.push_back((bits & 1) == 1);
597       bits >>= 1;
598     }
599   }
600 
FlushBranch(TraceData & data)601   void FlushBranch(TraceData& data) {
602     if (!data.branch.branch.empty()) {
603       callback_(data.branch);
604       data.branch.branch.clear();
605     }
606     data.invalid_branch = false;
607   }
608 
609   MapLocator& map_locator_;
610   ETMDecoder::BranchListCallbackFn callback_;
611   std::unordered_map<uint8_t, TraceData> trace_data_;
612 };
613 
614 // Etm data decoding in OpenCSD library has two steps:
615 // 1. From byte stream to etm packets. Each packet shows an event happened. For example,
616 // an Address packet shows the cpu is running the instruction at that address, an Atom
617 // packet shows whether the cpu decides to branch or not.
618 // 2. From etm packets to trace elements. To generates elements, the decoder needs both etm
619 // packets and executed binaries. For example, an InstructionRange element needs the decoder
620 // to find the next branch instruction starting from an address.
621 //
622 // ETMDecoderImpl uses OpenCSD library to decode etm data. It has the following properties:
623 // 1. Supports flexible decoding strategy. It allows installing packet callbacks and element
624 // callbacks, and decodes to either packets or elements based on requirements.
625 // 2. Supports dumping data at different stages.
626 class ETMDecoderImpl : public ETMDecoder {
627  public:
ETMDecoderImpl(ThreadTree & thread_tree)628   ETMDecoderImpl(ThreadTree& thread_tree) : thread_tree_(thread_tree) {}
629 
CreateDecodeTree(const AuxTraceInfoRecord & auxtrace_info)630   void CreateDecodeTree(const AuxTraceInfoRecord& auxtrace_info) {
631     for (int i = 0; i < auxtrace_info.data->nr_cpu; i++) {
632       auto& etm4 = auxtrace_info.data->etm4_info[i];
633       ocsd_etmv4_cfg cfg;
634       memset(&cfg, 0, sizeof(cfg));
635       cfg.reg_idr0 = etm4.trcidr0;
636       cfg.reg_idr1 = etm4.trcidr1;
637       cfg.reg_idr2 = etm4.trcidr2;
638       cfg.reg_idr8 = etm4.trcidr8;
639       cfg.reg_configr = etm4.trcconfigr;
640       cfg.reg_traceidr = etm4.trctraceidr;
641       cfg.arch_ver = ARCH_V8;
642       cfg.core_prof = profile_CortexA;
643       uint8_t trace_id = cfg.reg_traceidr & 0x7f;
644       configs_.emplace(trace_id, &cfg);
645       decode_tree_.CreateDecoder(configs_[trace_id]);
646       auto result = packet_sinks_.emplace(trace_id, trace_id);
647       CHECK(result.second);
648       decode_tree_.AttachPacketSink(trace_id, result.first->second);
649     }
650   }
651 
EnableDump(const ETMDumpOption & option)652   void EnableDump(const ETMDumpOption& option) override {
653     dumper_.reset(new DataDumper(decode_tree_));
654     if (option.dump_raw_data) {
655       dumper_->DumpRawData();
656     }
657     if (option.dump_packets) {
658       dumper_->DumpPackets(configs_);
659     }
660     if (option.dump_elements) {
661       dumper_->DumpElements();
662       InstallElementCallback(dumper_.get());
663     }
664   }
665 
RegisterCallback(const InstrRangeCallbackFn & callback)666   void RegisterCallback(const InstrRangeCallbackFn& callback) {
667     InstallMapLocator();
668     instr_range_parser_.reset(new InstrRangeParser(*map_locator_, callback));
669     InstallElementCallback(instr_range_parser_.get());
670   }
671 
RegisterCallback(const BranchListCallbackFn & callback)672   void RegisterCallback(const BranchListCallbackFn& callback) {
673     InstallMapLocator();
674     branch_list_parser_.reset(new BranchListParser(*map_locator_, callback));
675     branch_list_parser_->CheckConfigs(configs_);
676     InstallPacketCallback(branch_list_parser_.get());
677   }
678 
ProcessData(const uint8_t * data,size_t size)679   bool ProcessData(const uint8_t* data, size_t size) override {
680     // Reset decoders before processing each data block. Because:
681     // 1. Data blocks are not continuous. So decoders shouldn't keep previous states when
682     //    processing a new block.
683     // 2. The beginning part of a data block may be truncated if kernel buffer is temporarily full.
684     //    So we may see garbage data, which can cause decoding errors if we don't reset decoders.
685     auto resp =
686         decode_tree_.GetDataIn().TraceDataIn(OCSD_OP_RESET, data_index_, 0, nullptr, nullptr);
687     if (IsRespError(resp)) {
688       LOG(ERROR) << "failed to reset decoder, resp " << resp;
689       return false;
690     }
691     size_t left_size = size;
692     while (left_size > 0) {
693       uint32_t processed;
694       auto resp = decode_tree_.GetDataIn().TraceDataIn(OCSD_OP_DATA, data_index_, left_size, data,
695                                                        &processed);
696       if (IsRespError(resp)) {
697         // A decoding error shouldn't ruin all data. Reset decoders to recover from it.
698         LOG(INFO) << "reset etm decoders for seeing a decode failure, resp " << resp;
699         decode_tree_.GetDataIn().TraceDataIn(OCSD_OP_RESET, data_index_ + processed, 0, nullptr,
700                                              nullptr);
701       }
702       data += processed;
703       left_size -= processed;
704       data_index_ += processed;
705     }
706     return true;
707   }
708 
FinishData()709   bool FinishData() override {
710     if (instr_range_parser_) {
711       instr_range_parser_->FinishData();
712     }
713     if (branch_list_parser_) {
714       branch_list_parser_->FinishData();
715     }
716     return true;
717   }
718 
719  private:
InstallMapLocator()720   void InstallMapLocator() {
721     if (!map_locator_) {
722       map_locator_.reset(new MapLocator(thread_tree_));
723       InstallPacketCallback(map_locator_.get());
724     }
725   }
726 
InstallPacketCallback(PacketCallback * callback)727   void InstallPacketCallback(PacketCallback* callback) {
728     for (auto& p : packet_sinks_) {
729       p.second.AddCallback(callback);
730     }
731   }
732 
InstallElementCallback(ElementCallback * callback)733   void InstallElementCallback(ElementCallback* callback) {
734     if (!packet_to_element_) {
735       InstallMapLocator();
736       packet_to_element_.reset(
737           new PacketToElement(*map_locator_, configs_, decode_tree_.ErrorLogger()));
738       InstallPacketCallback(packet_to_element_.get());
739     }
740     packet_to_element_->AddCallback(callback);
741   }
742 
743   // map ip address to binary path and binary offset
744   ThreadTree& thread_tree_;
745   // handle to build OpenCSD decoder
746   ETMV4IDecodeTree decode_tree_;
747   // map from the trace id of an etm device to its config
748   std::unordered_map<uint8_t, EtmV4Config> configs_;
749   // map from the trace id of an etm device to its PacketSink
750   std::unordered_map<uint8_t, PacketSink> packet_sinks_;
751   std::unique_ptr<PacketToElement> packet_to_element_;
752   std::unique_ptr<DataDumper> dumper_;
753   // an index keeping processed etm data size
754   size_t data_index_ = 0;
755   std::unique_ptr<InstrRangeParser> instr_range_parser_;
756   std::unique_ptr<MapLocator> map_locator_;
757   std::unique_ptr<BranchListParser> branch_list_parser_;
758 };
759 
760 }  // namespace
761 
ParseEtmDumpOption(const std::string & s,ETMDumpOption * option)762 bool ParseEtmDumpOption(const std::string& s, ETMDumpOption* option) {
763   for (auto& value : android::base::Split(s, ",")) {
764     if (value == "raw") {
765       option->dump_raw_data = true;
766     } else if (value == "packet") {
767       option->dump_packets = true;
768     } else if (value == "element") {
769       option->dump_elements = true;
770     } else {
771       LOG(ERROR) << "unknown etm dump option: " << value;
772       return false;
773     }
774   }
775   return true;
776 }
777 
Create(const AuxTraceInfoRecord & auxtrace_info,ThreadTree & thread_tree)778 std::unique_ptr<ETMDecoder> ETMDecoder::Create(const AuxTraceInfoRecord& auxtrace_info,
779                                                ThreadTree& thread_tree) {
780   auto decoder = std::make_unique<ETMDecoderImpl>(thread_tree);
781   decoder->CreateDecodeTree(auxtrace_info);
782   return std::unique_ptr<ETMDecoder>(decoder.release());
783 }
784 
785 // Use OpenCSD instruction decoder to convert branches to instruction addresses.
786 class BranchDecoder {
787  public:
Init(Dso * dso)788   android::base::expected<void, std::string> Init(Dso* dso) {
789     ElfStatus status;
790     elf_ = ElfFile::Open(dso->GetDebugFilePath(), &status);
791     if (!elf_) {
792       std::stringstream ss;
793       ss << status;
794       return android::base::unexpected(ss.str());
795     }
796     if (dso->type() == DSO_KERNEL_MODULE) {
797       // Kernel module doesn't have program header. So create a fake one mapping to .text section.
798       for (const auto& section : elf_->GetSectionHeader()) {
799         if (section.name == ".text") {
800           segments_.resize(1);
801           segments_[0].is_executable = true;
802           segments_[0].is_load = true;
803           segments_[0].file_offset = section.file_offset;
804           segments_[0].file_size = section.size;
805           segments_[0].vaddr = section.vaddr;
806           break;
807         }
808       }
809     } else {
810       segments_ = elf_->GetProgramHeader();
811       auto it = std::remove_if(segments_.begin(), segments_.end(),
812                                [](const ElfSegment& s) { return !s.is_executable; });
813       segments_.resize(it - segments_.begin());
814     }
815     if (segments_.empty()) {
816       return android::base::unexpected("no segments");
817     }
818     buffer_ = elf_->GetMemoryBuffer();
819     return {};
820   }
821 
SetAddr(uint64_t addr,bool is_thumb)822   void SetAddr(uint64_t addr, bool is_thumb) {
823     memset(&instr_info_, 0, sizeof(instr_info_));
824     instr_info_.pe_type.arch = ARCH_V8;
825     instr_info_.pe_type.profile = profile_CortexA;
826     instr_info_.isa =
827         elf_->Is64Bit() ? ocsd_isa_aarch64 : (is_thumb ? ocsd_isa_thumb2 : ocsd_isa_arm);
828     instr_info_.instr_addr = addr;
829   }
830 
FindNextBranch()831   bool FindNextBranch() {
832     // Loop until we find a branch instruction.
833     while (ReadMem(instr_info_.instr_addr, 4, &instr_info_.opcode)) {
834       ocsd_err_t err = instruction_decoder_.DecodeInstruction(&instr_info_);
835       if (err != OCSD_OK) {
836         break;
837       }
838       if (instr_info_.type != OCSD_INSTR_OTHER) {
839         return true;
840       }
841       instr_info_.instr_addr += instr_info_.instr_size;
842     }
843     return false;
844   };
845 
InstrInfo()846   ocsd_instr_info& InstrInfo() { return instr_info_; }
847 
848  private:
ReadMem(uint64_t vaddr,size_t size,void * data)849   bool ReadMem(uint64_t vaddr, size_t size, void* data) {
850     for (auto& segment : segments_) {
851       if (vaddr >= segment.vaddr && vaddr + size <= segment.vaddr + segment.file_size) {
852         uint64_t offset = vaddr - segment.vaddr + segment.file_offset;
853         memcpy(data, buffer_->getBufferStart() + offset, size);
854         return true;
855       }
856     }
857     return false;
858   }
859 
860   std::unique_ptr<ElfFile> elf_;
861   std::vector<ElfSegment> segments_;
862   llvm::MemoryBuffer* buffer_ = nullptr;
863   ocsd_instr_info instr_info_;
864   InstructionDecoder instruction_decoder_;
865 };
866 
ConvertBranchMapToInstrRanges(Dso * dso,const BranchMap & branch_map,const ETMDecoder::InstrRangeCallbackFn & callback)867 android::base::expected<void, std::string> ConvertBranchMapToInstrRanges(
868     Dso* dso, const BranchMap& branch_map, const ETMDecoder::InstrRangeCallbackFn& callback) {
869   ETMInstrRange instr_range;
870   instr_range.dso = dso;
871 
872   BranchDecoder decoder;
873   if (auto result = decoder.Init(dso); !result.ok()) {
874     return result;
875   }
876 
877   for (const auto& addr_p : branch_map) {
878     uint64_t start_addr = addr_p.first & ~1ULL;
879     bool is_thumb = addr_p.first & 1;
880     for (const auto& branch_p : addr_p.second) {
881       const std::vector<bool>& branch = branch_p.first;
882       uint64_t count = branch_p.second;
883       decoder.SetAddr(start_addr, is_thumb);
884 
885       for (bool b : branch) {
886         ocsd_instr_info& instr = decoder.InstrInfo();
887         uint64_t from_addr = instr.instr_addr;
888         if (!decoder.FindNextBranch()) {
889           break;
890         }
891         bool end_with_branch = instr.type == OCSD_INSTR_BR || instr.type == OCSD_INSTR_BR_INDIRECT;
892         bool branch_taken = end_with_branch && b;
893         instr_range.start_addr = from_addr;
894         instr_range.end_addr = instr.instr_addr;
895         if (instr.type == OCSD_INSTR_BR) {
896           instr_range.branch_to_addr = instr.branch_addr;
897         } else {
898           instr_range.branch_to_addr = 0;
899         }
900         instr_range.branch_taken_count = branch_taken ? count : 0;
901         instr_range.branch_not_taken_count = branch_taken ? 0 : count;
902 
903         callback(instr_range);
904 
905         if (b) {
906           instr.instr_addr = instr.branch_addr;
907         } else {
908           instr.instr_addr += instr.instr_size;
909         }
910       }
911     }
912   }
913   return {};
914 }
915 
916 }  // namespace simpleperf