1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/service/cpu/disassembler.h"
17
18 #include <stdint.h>
19 #include <algorithm>
20 // IWYU pragma: no_include <system_error>
21 #include <type_traits>
22 #include <vector>
23
24 #include "absl/strings/str_format.h"
25 #include "llvm/MC/MCInst.h"
26 #include "llvm/Support/TargetRegistry.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include "tensorflow/compiler/xla/status_macros.h"
29 #include "tensorflow/compiler/xla/types.h"
30 #include "tensorflow/compiler/xla/util.h"
31 #include "tensorflow/core/platform/logging.h"
32 #include "tensorflow/core/platform/types.h"
33
34 namespace xla {
35 namespace cpu {
36
Disassembler(const llvm::TargetMachine & target_machine)37 Disassembler::Disassembler(const llvm::TargetMachine& target_machine)
38 : subtarget_info_(*target_machine.getMCSubtargetInfo()) {
39 objfile_info_.reset(new llvm::MCObjectFileInfo());
40 mc_context_.reset(new llvm::MCContext(target_machine.getMCAsmInfo(),
41 target_machine.getMCRegisterInfo(),
42 objfile_info_.get()));
43 disassembler_.reset(target_machine.getTarget().createMCDisassembler(
44 subtarget_info_, *mc_context_));
45 inst_printer_.reset(target_machine.getTarget().createMCInstPrinter(
46 target_machine.getTargetTriple(),
47 /*SyntaxVariant=*/1, // Use Intel syntax.
48 *target_machine.getMCAsmInfo(), *target_machine.getMCInstrInfo(),
49 *target_machine.getMCRegisterInfo()));
50 inst_analysis_.reset(target_machine.getTarget().createMCInstrAnalysis(
51 target_machine.getMCInstrInfo()));
52 }
53
54 // This code is based on llvm-objdump in llvm/tools.
DisassembleObjectFile(const llvm::object::ObjectFile & object_file) const55 StatusOr<DisassemblerResult> Disassembler::DisassembleObjectFile(
56 const llvm::object::ObjectFile& object_file) const {
57 if (disassembler_ == nullptr) {
58 return NotFound("could not find a disassembler for this platform");
59 }
60
61 std::string buffer_string;
62 llvm::raw_string_ostream ostream(buffer_string);
63 uint64_t code_size_bytes = 0;
64
65 // Iterate through sections. Disassemble symbols of the text section(s).
66 for (auto& section : object_file.sections()) {
67 if (!section.isText()) {
68 continue;
69 }
70
71 // Gather symbols from the section.
72 std::vector<llvm::object::SymbolRef> symbols;
73 for (auto& symbol : object_file.symbols()) {
74 if (section.containsSymbol(symbol)) {
75 symbols.push_back(symbol);
76 }
77 }
78
79 // Sort the symbols in increasing address order.
80 absl::c_sort(symbols, [](const llvm::object::SymbolRef& a,
81 const llvm::object::SymbolRef& b) {
82 // getAddress returns a Expected object. Assert there is no error
83 // before extracting the address.
84 llvm::Expected<uint64_t> a_address_or_error = a.getAddress();
85 CHECK(a_address_or_error);
86 llvm::Expected<uint64_t> b_address_or_error = b.getAddress();
87 CHECK(b_address_or_error);
88 return a_address_or_error.get() < b_address_or_error.get();
89 });
90
91 // Construct ArrayRef pointing to section contents.
92 llvm::StringRef section_content_string;
93 if (section.getContents(section_content_string)) {
94 continue;
95 }
96 llvm::ArrayRef<uint8_t> section_content_bytes(
97 reinterpret_cast<const uint8*>(section_content_string.data()),
98 section_content_string.size());
99
100 // Use int types from LLVM (eg, uint64_t) for values passed to and returned
101 // from the LLVM API. These values map to different types in LLVM and
102 // XLA (unsigned long vs unsigned long long).
103 uint64_t section_address = section.getAddress();
104 uint64_t section_size = section.getSize();
105
106 // Iterate through symbols in increasing address order and disassemble each
107 // one.
108 for (int i = 0; i < symbols.size(); ++i) {
109 auto symbol = symbols[i];
110 llvm::Expected<uint64_t> address = symbol.getAddress();
111 CHECK(address);
112 uint64_t start_index = address.get() - section_address;
113
114 // End of symbol is either the end of the section or the start of the next
115 // symbol.
116 uint64_t end_index;
117 if (i < symbols.size() - 1) {
118 llvm::Expected<uint64_t> next_address = symbols[i + 1].getAddress();
119 CHECK(next_address);
120 end_index = std::min(section_size, next_address.get());
121 } else {
122 end_index = section_size;
123 }
124
125 // Skip zero-length symbols.
126 if (start_index == end_index) {
127 continue;
128 }
129
130 llvm::Expected<llvm::StringRef> name_or_error = symbol.getName();
131 TF_RET_CHECK(name_or_error);
132 ostream << name_or_error.get().str() << ":\n";
133
134 // Update the code size statistic.
135 code_size_bytes += end_index - start_index;
136
137 // Disassemble symbol instruction-by-instruction.
138 uint64_t index = start_index;
139 while (index < end_index) {
140 llvm::MCInst instruction;
141 uint64_t size;
142 llvm::MCDisassembler::DecodeStatus decode_status =
143 disassembler_->getInstruction(instruction, size,
144 section_content_bytes.slice(index),
145 /*Address=*/section_address + index,
146 /*VStream=*/llvm::nulls(),
147 /*CStream=*/llvm::nulls());
148 // If we fail to disassemble, then we must skip past this address.
149 if (size == 0) {
150 size = 1;
151 }
152
153 ostream << absl::StrFormat("0x%08lx", index) << " ";
154
155 if (decode_status == llvm::MCDisassembler::Success) {
156 // For branches, try to determine the actual address and emit it as an
157 // annotation.
158 string annotation;
159 if (inst_analysis_ &&
160 (inst_analysis_->isUnconditionalBranch(instruction) ||
161 inst_analysis_->isConditionalBranch(instruction))) {
162 uint64_t target;
163 if (inst_analysis_->evaluateBranch(
164 instruction, section_address + index, size, target)) {
165 annotation = absl::StrFormat("[0x%08lx]", target);
166 }
167 }
168 inst_printer_->printInst(&instruction, ostream, annotation.c_str(),
169 subtarget_info_);
170 } else {
171 ostream << " <unknown>";
172 }
173
174 ostream << "\n";
175 index += size;
176 }
177 }
178 }
179
180 ostream.flush();
181 return DisassemblerResult{
182 string(buffer_string.data(), buffer_string.length()), code_size_bytes};
183 }
184
185 } // namespace cpu
186 } // namespace xla
187