1 //===-- DisassemblerLLVMC.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "DisassemblerLLVMC.h"
10
11 #include "llvm-c/Disassembler.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/MC/MCContext.h"
15 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
16 #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
17 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCInstPrinter.h"
20 #include "llvm/MC/MCInstrInfo.h"
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/MC/MCTargetOptions.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/ScopedPrinter.h"
26 #include "llvm/Support/TargetRegistry.h"
27 #include "llvm/Support/TargetSelect.h"
28
29 #include "lldb/Core/Address.h"
30 #include "lldb/Core/Module.h"
31 #include "lldb/Symbol/SymbolContext.h"
32 #include "lldb/Target/ExecutionContext.h"
33 #include "lldb/Target/Process.h"
34 #include "lldb/Target/RegisterContext.h"
35 #include "lldb/Target/SectionLoadList.h"
36 #include "lldb/Target/StackFrame.h"
37 #include "lldb/Target/Target.h"
38 #include "lldb/Utility/DataExtractor.h"
39 #include "lldb/Utility/Log.h"
40 #include "lldb/Utility/RegularExpression.h"
41 #include "lldb/Utility/Stream.h"
42
43 using namespace lldb;
44 using namespace lldb_private;
45
46 LLDB_PLUGIN_DEFINE(DisassemblerLLVMC)
47
48 class DisassemblerLLVMC::MCDisasmInstance {
49 public:
50 static std::unique_ptr<MCDisasmInstance>
51 Create(const char *triple, const char *cpu, const char *features_str,
52 unsigned flavor, DisassemblerLLVMC &owner);
53
54 ~MCDisasmInstance() = default;
55
56 uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
57 lldb::addr_t pc, llvm::MCInst &mc_inst) const;
58 void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string,
59 std::string &comments_string);
60 void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);
61 bool CanBranch(llvm::MCInst &mc_inst) const;
62 bool HasDelaySlot(llvm::MCInst &mc_inst) const;
63 bool IsCall(llvm::MCInst &mc_inst) const;
64
65 private:
66 MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
67 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up,
68 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
69 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
70 std::unique_ptr<llvm::MCContext> &&context_up,
71 std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
72 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up);
73
74 std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up;
75 std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up;
76 std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up;
77 std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up;
78 std::unique_ptr<llvm::MCContext> m_context_up;
79 std::unique_ptr<llvm::MCDisassembler> m_disasm_up;
80 std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up;
81 };
82
83 class InstructionLLVMC : public lldb_private::Instruction {
84 public:
InstructionLLVMC(DisassemblerLLVMC & disasm,const lldb_private::Address & address,AddressClass addr_class)85 InstructionLLVMC(DisassemblerLLVMC &disasm,
86 const lldb_private::Address &address,
87 AddressClass addr_class)
88 : Instruction(address, addr_class),
89 m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>(
90 disasm.shared_from_this())),
91 m_using_file_addr(false) {}
92
93 ~InstructionLLVMC() override = default;
94
DoesBranch()95 bool DoesBranch() override {
96 VisitInstruction();
97 return m_does_branch;
98 }
99
HasDelaySlot()100 bool HasDelaySlot() override {
101 VisitInstruction();
102 return m_has_delay_slot;
103 }
104
GetDisasmToUse(bool & is_alternate_isa)105 DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) {
106 DisassemblerScope disasm(*this);
107 return GetDisasmToUse(is_alternate_isa, disasm);
108 }
109
Decode(const lldb_private::Disassembler & disassembler,const lldb_private::DataExtractor & data,lldb::offset_t data_offset)110 size_t Decode(const lldb_private::Disassembler &disassembler,
111 const lldb_private::DataExtractor &data,
112 lldb::offset_t data_offset) override {
113 // All we have to do is read the opcode which can be easy for some
114 // architectures
115 bool got_op = false;
116 DisassemblerScope disasm(*this);
117 if (disasm) {
118 const ArchSpec &arch = disasm->GetArchitecture();
119 const lldb::ByteOrder byte_order = data.GetByteOrder();
120
121 const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();
122 const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();
123 if (min_op_byte_size == max_op_byte_size) {
124 // Fixed size instructions, just read that amount of data.
125 if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size))
126 return false;
127
128 switch (min_op_byte_size) {
129 case 1:
130 m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order);
131 got_op = true;
132 break;
133
134 case 2:
135 m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order);
136 got_op = true;
137 break;
138
139 case 4:
140 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
141 got_op = true;
142 break;
143
144 case 8:
145 m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order);
146 got_op = true;
147 break;
148
149 default:
150 m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size),
151 min_op_byte_size);
152 got_op = true;
153 break;
154 }
155 }
156 if (!got_op) {
157 bool is_alternate_isa = false;
158 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
159 GetDisasmToUse(is_alternate_isa, disasm);
160
161 const llvm::Triple::ArchType machine = arch.GetMachine();
162 if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) {
163 if (machine == llvm::Triple::thumb || is_alternate_isa) {
164 uint32_t thumb_opcode = data.GetU16(&data_offset);
165 if ((thumb_opcode & 0xe000) != 0xe000 ||
166 ((thumb_opcode & 0x1800u) == 0)) {
167 m_opcode.SetOpcode16(thumb_opcode, byte_order);
168 m_is_valid = true;
169 } else {
170 thumb_opcode <<= 16;
171 thumb_opcode |= data.GetU16(&data_offset);
172 m_opcode.SetOpcode16_2(thumb_opcode, byte_order);
173 m_is_valid = true;
174 }
175 } else {
176 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
177 m_is_valid = true;
178 }
179 } else {
180 // The opcode isn't evenly sized, so we need to actually use the llvm
181 // disassembler to parse it and get the size.
182 uint8_t *opcode_data =
183 const_cast<uint8_t *>(data.PeekData(data_offset, 1));
184 const size_t opcode_data_len = data.BytesLeft(data_offset);
185 const addr_t pc = m_address.GetFileAddress();
186 llvm::MCInst inst;
187
188 const size_t inst_size =
189 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
190 if (inst_size == 0)
191 m_opcode.Clear();
192 else {
193 m_opcode.SetOpcodeBytes(opcode_data, inst_size);
194 m_is_valid = true;
195 }
196 }
197 }
198 return m_opcode.GetByteSize();
199 }
200 return 0;
201 }
202
AppendComment(std::string & description)203 void AppendComment(std::string &description) {
204 if (m_comment.empty())
205 m_comment.swap(description);
206 else {
207 m_comment.append(", ");
208 m_comment.append(description);
209 }
210 }
211
CalculateMnemonicOperandsAndComment(const lldb_private::ExecutionContext * exe_ctx)212 void CalculateMnemonicOperandsAndComment(
213 const lldb_private::ExecutionContext *exe_ctx) override {
214 DataExtractor data;
215 const AddressClass address_class = GetAddressClass();
216
217 if (m_opcode.GetData(data)) {
218 std::string out_string;
219 std::string comment_string;
220
221 DisassemblerScope disasm(*this, exe_ctx);
222 if (disasm) {
223 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr;
224
225 if (address_class == AddressClass::eCodeAlternateISA)
226 mc_disasm_ptr = disasm->m_alternate_disasm_up.get();
227 else
228 mc_disasm_ptr = disasm->m_disasm_up.get();
229
230 lldb::addr_t pc = m_address.GetFileAddress();
231 m_using_file_addr = true;
232
233 const bool data_from_file = disasm->m_data_from_file;
234 bool use_hex_immediates = true;
235 Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC;
236
237 if (exe_ctx) {
238 Target *target = exe_ctx->GetTargetPtr();
239 if (target) {
240 use_hex_immediates = target->GetUseHexImmediates();
241 hex_style = target->GetHexImmediateStyle();
242
243 if (!data_from_file) {
244 const lldb::addr_t load_addr = m_address.GetLoadAddress(target);
245 if (load_addr != LLDB_INVALID_ADDRESS) {
246 pc = load_addr;
247 m_using_file_addr = false;
248 }
249 }
250 }
251 }
252
253 const uint8_t *opcode_data = data.GetDataStart();
254 const size_t opcode_data_len = data.GetByteSize();
255 llvm::MCInst inst;
256 size_t inst_size =
257 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
258
259 if (inst_size > 0) {
260 mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);
261 mc_disasm_ptr->PrintMCInst(inst, out_string, comment_string);
262
263 if (!comment_string.empty()) {
264 AppendComment(comment_string);
265 }
266 }
267
268 if (inst_size == 0) {
269 m_comment.assign("unknown opcode");
270 inst_size = m_opcode.GetByteSize();
271 StreamString mnemonic_strm;
272 lldb::offset_t offset = 0;
273 lldb::ByteOrder byte_order = data.GetByteOrder();
274 switch (inst_size) {
275 case 1: {
276 const uint8_t uval8 = data.GetU8(&offset);
277 m_opcode.SetOpcode8(uval8, byte_order);
278 m_opcode_name.assign(".byte");
279 mnemonic_strm.Printf("0x%2.2x", uval8);
280 } break;
281 case 2: {
282 const uint16_t uval16 = data.GetU16(&offset);
283 m_opcode.SetOpcode16(uval16, byte_order);
284 m_opcode_name.assign(".short");
285 mnemonic_strm.Printf("0x%4.4x", uval16);
286 } break;
287 case 4: {
288 const uint32_t uval32 = data.GetU32(&offset);
289 m_opcode.SetOpcode32(uval32, byte_order);
290 m_opcode_name.assign(".long");
291 mnemonic_strm.Printf("0x%8.8x", uval32);
292 } break;
293 case 8: {
294 const uint64_t uval64 = data.GetU64(&offset);
295 m_opcode.SetOpcode64(uval64, byte_order);
296 m_opcode_name.assign(".quad");
297 mnemonic_strm.Printf("0x%16.16" PRIx64, uval64);
298 } break;
299 default:
300 if (inst_size == 0)
301 return;
302 else {
303 const uint8_t *bytes = data.PeekData(offset, inst_size);
304 if (bytes == nullptr)
305 return;
306 m_opcode_name.assign(".byte");
307 m_opcode.SetOpcodeBytes(bytes, inst_size);
308 mnemonic_strm.Printf("0x%2.2x", bytes[0]);
309 for (uint32_t i = 1; i < inst_size; ++i)
310 mnemonic_strm.Printf(" 0x%2.2x", bytes[i]);
311 }
312 break;
313 }
314 m_mnemonics = std::string(mnemonic_strm.GetString());
315 return;
316 }
317
318 static RegularExpression s_regex(
319 llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
320
321 llvm::SmallVector<llvm::StringRef, 4> matches;
322 if (s_regex.Execute(out_string, &matches)) {
323 m_opcode_name = matches[1].str();
324 m_mnemonics = matches[2].str();
325 }
326 }
327 }
328 }
329
IsValid() const330 bool IsValid() const { return m_is_valid; }
331
UsingFileAddress() const332 bool UsingFileAddress() const { return m_using_file_addr; }
GetByteSize() const333 size_t GetByteSize() const { return m_opcode.GetByteSize(); }
334
335 /// Grants exclusive access to the disassembler and initializes it with the
336 /// given InstructionLLVMC and an optional ExecutionContext.
337 class DisassemblerScope {
338 std::shared_ptr<DisassemblerLLVMC> m_disasm;
339
340 public:
DisassemblerScope(InstructionLLVMC & i,const lldb_private::ExecutionContext * exe_ctx=nullptr)341 explicit DisassemblerScope(
342 InstructionLLVMC &i,
343 const lldb_private::ExecutionContext *exe_ctx = nullptr)
344 : m_disasm(i.m_disasm_wp.lock()) {
345 m_disasm->m_mutex.lock();
346 m_disasm->m_inst = &i;
347 m_disasm->m_exe_ctx = exe_ctx;
348 }
~DisassemblerScope()349 ~DisassemblerScope() { m_disasm->m_mutex.unlock(); }
350
351 /// Evaluates to true if this scope contains a valid disassembler.
operator bool() const352 operator bool() const { return static_cast<bool>(m_disasm); }
353
operator ->()354 std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; }
355 };
356
357 static llvm::StringRef::const_iterator
ConsumeWhitespace(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)358 ConsumeWhitespace(llvm::StringRef::const_iterator osi,
359 llvm::StringRef::const_iterator ose) {
360 while (osi != ose) {
361 switch (*osi) {
362 default:
363 return osi;
364 case ' ':
365 case '\t':
366 break;
367 }
368 ++osi;
369 }
370
371 return osi;
372 }
373
374 static std::pair<bool, llvm::StringRef::const_iterator>
ConsumeChar(llvm::StringRef::const_iterator osi,const char c,llvm::StringRef::const_iterator ose)375 ConsumeChar(llvm::StringRef::const_iterator osi, const char c,
376 llvm::StringRef::const_iterator ose) {
377 bool found = false;
378
379 osi = ConsumeWhitespace(osi, ose);
380 if (osi != ose && *osi == c) {
381 found = true;
382 ++osi;
383 }
384
385 return std::make_pair(found, osi);
386 }
387
388 static std::pair<Operand, llvm::StringRef::const_iterator>
ParseRegisterName(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)389 ParseRegisterName(llvm::StringRef::const_iterator osi,
390 llvm::StringRef::const_iterator ose) {
391 Operand ret;
392 ret.m_type = Operand::Type::Register;
393 std::string str;
394
395 osi = ConsumeWhitespace(osi, ose);
396
397 while (osi != ose) {
398 if (*osi >= '0' && *osi <= '9') {
399 if (str.empty()) {
400 return std::make_pair(Operand(), osi);
401 } else {
402 str.push_back(*osi);
403 }
404 } else if (*osi >= 'a' && *osi <= 'z') {
405 str.push_back(*osi);
406 } else {
407 switch (*osi) {
408 default:
409 if (str.empty()) {
410 return std::make_pair(Operand(), osi);
411 } else {
412 ret.m_register = ConstString(str);
413 return std::make_pair(ret, osi);
414 }
415 case '%':
416 if (!str.empty()) {
417 return std::make_pair(Operand(), osi);
418 }
419 break;
420 }
421 }
422 ++osi;
423 }
424
425 ret.m_register = ConstString(str);
426 return std::make_pair(ret, osi);
427 }
428
429 static std::pair<Operand, llvm::StringRef::const_iterator>
ParseImmediate(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)430 ParseImmediate(llvm::StringRef::const_iterator osi,
431 llvm::StringRef::const_iterator ose) {
432 Operand ret;
433 ret.m_type = Operand::Type::Immediate;
434 std::string str;
435 bool is_hex = false;
436
437 osi = ConsumeWhitespace(osi, ose);
438
439 while (osi != ose) {
440 if (*osi >= '0' && *osi <= '9') {
441 str.push_back(*osi);
442 } else if (*osi >= 'a' && *osi <= 'f') {
443 if (is_hex) {
444 str.push_back(*osi);
445 } else {
446 return std::make_pair(Operand(), osi);
447 }
448 } else {
449 switch (*osi) {
450 default:
451 if (str.empty()) {
452 return std::make_pair(Operand(), osi);
453 } else {
454 ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
455 return std::make_pair(ret, osi);
456 }
457 case 'x':
458 if (!str.compare("0")) {
459 is_hex = true;
460 str.push_back(*osi);
461 } else {
462 return std::make_pair(Operand(), osi);
463 }
464 break;
465 case '#':
466 case '$':
467 if (!str.empty()) {
468 return std::make_pair(Operand(), osi);
469 }
470 break;
471 case '-':
472 if (str.empty()) {
473 ret.m_negative = true;
474 } else {
475 return std::make_pair(Operand(), osi);
476 }
477 }
478 }
479 ++osi;
480 }
481
482 ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
483 return std::make_pair(ret, osi);
484 }
485
486 // -0x5(%rax,%rax,2)
487 static std::pair<Operand, llvm::StringRef::const_iterator>
ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)488 ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi,
489 llvm::StringRef::const_iterator ose) {
490 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
491 ParseImmediate(osi, ose);
492 if (offset_and_iterator.first.IsValid()) {
493 osi = offset_and_iterator.second;
494 }
495
496 bool found = false;
497 std::tie(found, osi) = ConsumeChar(osi, '(', ose);
498 if (!found) {
499 return std::make_pair(Operand(), osi);
500 }
501
502 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
503 ParseRegisterName(osi, ose);
504 if (base_and_iterator.first.IsValid()) {
505 osi = base_and_iterator.second;
506 } else {
507 return std::make_pair(Operand(), osi);
508 }
509
510 std::tie(found, osi) = ConsumeChar(osi, ',', ose);
511 if (!found) {
512 return std::make_pair(Operand(), osi);
513 }
514
515 std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator =
516 ParseRegisterName(osi, ose);
517 if (index_and_iterator.first.IsValid()) {
518 osi = index_and_iterator.second;
519 } else {
520 return std::make_pair(Operand(), osi);
521 }
522
523 std::tie(found, osi) = ConsumeChar(osi, ',', ose);
524 if (!found) {
525 return std::make_pair(Operand(), osi);
526 }
527
528 std::pair<Operand, llvm::StringRef::const_iterator>
529 multiplier_and_iterator = ParseImmediate(osi, ose);
530 if (index_and_iterator.first.IsValid()) {
531 osi = index_and_iterator.second;
532 } else {
533 return std::make_pair(Operand(), osi);
534 }
535
536 std::tie(found, osi) = ConsumeChar(osi, ')', ose);
537 if (!found) {
538 return std::make_pair(Operand(), osi);
539 }
540
541 Operand product;
542 product.m_type = Operand::Type::Product;
543 product.m_children.push_back(index_and_iterator.first);
544 product.m_children.push_back(multiplier_and_iterator.first);
545
546 Operand index;
547 index.m_type = Operand::Type::Sum;
548 index.m_children.push_back(base_and_iterator.first);
549 index.m_children.push_back(product);
550
551 if (offset_and_iterator.first.IsValid()) {
552 Operand offset;
553 offset.m_type = Operand::Type::Sum;
554 offset.m_children.push_back(offset_and_iterator.first);
555 offset.m_children.push_back(index);
556
557 Operand deref;
558 deref.m_type = Operand::Type::Dereference;
559 deref.m_children.push_back(offset);
560 return std::make_pair(deref, osi);
561 } else {
562 Operand deref;
563 deref.m_type = Operand::Type::Dereference;
564 deref.m_children.push_back(index);
565 return std::make_pair(deref, osi);
566 }
567 }
568
569 // -0x10(%rbp)
570 static std::pair<Operand, llvm::StringRef::const_iterator>
ParseIntelDerefAccess(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)571 ParseIntelDerefAccess(llvm::StringRef::const_iterator osi,
572 llvm::StringRef::const_iterator ose) {
573 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
574 ParseImmediate(osi, ose);
575 if (offset_and_iterator.first.IsValid()) {
576 osi = offset_and_iterator.second;
577 }
578
579 bool found = false;
580 std::tie(found, osi) = ConsumeChar(osi, '(', ose);
581 if (!found) {
582 return std::make_pair(Operand(), osi);
583 }
584
585 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
586 ParseRegisterName(osi, ose);
587 if (base_and_iterator.first.IsValid()) {
588 osi = base_and_iterator.second;
589 } else {
590 return std::make_pair(Operand(), osi);
591 }
592
593 std::tie(found, osi) = ConsumeChar(osi, ')', ose);
594 if (!found) {
595 return std::make_pair(Operand(), osi);
596 }
597
598 if (offset_and_iterator.first.IsValid()) {
599 Operand offset;
600 offset.m_type = Operand::Type::Sum;
601 offset.m_children.push_back(offset_and_iterator.first);
602 offset.m_children.push_back(base_and_iterator.first);
603
604 Operand deref;
605 deref.m_type = Operand::Type::Dereference;
606 deref.m_children.push_back(offset);
607 return std::make_pair(deref, osi);
608 } else {
609 Operand deref;
610 deref.m_type = Operand::Type::Dereference;
611 deref.m_children.push_back(base_and_iterator.first);
612 return std::make_pair(deref, osi);
613 }
614 }
615
616 // [sp, #8]!
617 static std::pair<Operand, llvm::StringRef::const_iterator>
ParseARMOffsetAccess(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)618 ParseARMOffsetAccess(llvm::StringRef::const_iterator osi,
619 llvm::StringRef::const_iterator ose) {
620 bool found = false;
621 std::tie(found, osi) = ConsumeChar(osi, '[', ose);
622 if (!found) {
623 return std::make_pair(Operand(), osi);
624 }
625
626 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
627 ParseRegisterName(osi, ose);
628 if (base_and_iterator.first.IsValid()) {
629 osi = base_and_iterator.second;
630 } else {
631 return std::make_pair(Operand(), osi);
632 }
633
634 std::tie(found, osi) = ConsumeChar(osi, ',', ose);
635 if (!found) {
636 return std::make_pair(Operand(), osi);
637 }
638
639 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
640 ParseImmediate(osi, ose);
641 if (offset_and_iterator.first.IsValid()) {
642 osi = offset_and_iterator.second;
643 }
644
645 std::tie(found, osi) = ConsumeChar(osi, ']', ose);
646 if (!found) {
647 return std::make_pair(Operand(), osi);
648 }
649
650 Operand offset;
651 offset.m_type = Operand::Type::Sum;
652 offset.m_children.push_back(offset_and_iterator.first);
653 offset.m_children.push_back(base_and_iterator.first);
654
655 Operand deref;
656 deref.m_type = Operand::Type::Dereference;
657 deref.m_children.push_back(offset);
658 return std::make_pair(deref, osi);
659 }
660
661 // [sp]
662 static std::pair<Operand, llvm::StringRef::const_iterator>
ParseARMDerefAccess(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)663 ParseARMDerefAccess(llvm::StringRef::const_iterator osi,
664 llvm::StringRef::const_iterator ose) {
665 bool found = false;
666 std::tie(found, osi) = ConsumeChar(osi, '[', ose);
667 if (!found) {
668 return std::make_pair(Operand(), osi);
669 }
670
671 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
672 ParseRegisterName(osi, ose);
673 if (base_and_iterator.first.IsValid()) {
674 osi = base_and_iterator.second;
675 } else {
676 return std::make_pair(Operand(), osi);
677 }
678
679 std::tie(found, osi) = ConsumeChar(osi, ']', ose);
680 if (!found) {
681 return std::make_pair(Operand(), osi);
682 }
683
684 Operand deref;
685 deref.m_type = Operand::Type::Dereference;
686 deref.m_children.push_back(base_and_iterator.first);
687 return std::make_pair(deref, osi);
688 }
689
DumpOperand(const Operand & op,Stream & s)690 static void DumpOperand(const Operand &op, Stream &s) {
691 switch (op.m_type) {
692 case Operand::Type::Dereference:
693 s.PutCString("*");
694 DumpOperand(op.m_children[0], s);
695 break;
696 case Operand::Type::Immediate:
697 if (op.m_negative) {
698 s.PutCString("-");
699 }
700 s.PutCString(llvm::to_string(op.m_immediate));
701 break;
702 case Operand::Type::Invalid:
703 s.PutCString("Invalid");
704 break;
705 case Operand::Type::Product:
706 s.PutCString("(");
707 DumpOperand(op.m_children[0], s);
708 s.PutCString("*");
709 DumpOperand(op.m_children[1], s);
710 s.PutCString(")");
711 break;
712 case Operand::Type::Register:
713 s.PutCString(op.m_register.GetStringRef());
714 break;
715 case Operand::Type::Sum:
716 s.PutCString("(");
717 DumpOperand(op.m_children[0], s);
718 s.PutCString("+");
719 DumpOperand(op.m_children[1], s);
720 s.PutCString(")");
721 break;
722 }
723 }
724
ParseOperands(llvm::SmallVectorImpl<Instruction::Operand> & operands)725 bool ParseOperands(
726 llvm::SmallVectorImpl<Instruction::Operand> &operands) override {
727 const char *operands_string = GetOperands(nullptr);
728
729 if (!operands_string) {
730 return false;
731 }
732
733 llvm::StringRef operands_ref(operands_string);
734
735 llvm::StringRef::const_iterator osi = operands_ref.begin();
736 llvm::StringRef::const_iterator ose = operands_ref.end();
737
738 while (osi != ose) {
739 Operand operand;
740 llvm::StringRef::const_iterator iter;
741
742 if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose),
743 operand.IsValid()) ||
744 (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose),
745 operand.IsValid()) ||
746 (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose),
747 operand.IsValid()) ||
748 (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose),
749 operand.IsValid()) ||
750 (std::tie(operand, iter) = ParseRegisterName(osi, ose),
751 operand.IsValid()) ||
752 (std::tie(operand, iter) = ParseImmediate(osi, ose),
753 operand.IsValid())) {
754 osi = iter;
755 operands.push_back(operand);
756 } else {
757 return false;
758 }
759
760 std::pair<bool, llvm::StringRef::const_iterator> found_and_iter =
761 ConsumeChar(osi, ',', ose);
762 if (found_and_iter.first) {
763 osi = found_and_iter.second;
764 }
765
766 osi = ConsumeWhitespace(osi, ose);
767 }
768
769 DisassemblerSP disasm_sp = m_disasm_wp.lock();
770
771 if (disasm_sp && operands.size() > 1) {
772 // TODO tie this into the MC Disassembler's notion of clobbers.
773 switch (disasm_sp->GetArchitecture().GetMachine()) {
774 default:
775 break;
776 case llvm::Triple::x86:
777 case llvm::Triple::x86_64:
778 operands[operands.size() - 1].m_clobbered = true;
779 break;
780 case llvm::Triple::arm:
781 operands[0].m_clobbered = true;
782 break;
783 }
784 }
785
786 if (Log *log =
787 lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)) {
788 StreamString ss;
789
790 ss.Printf("[%s] expands to %zu operands:\n", operands_string,
791 operands.size());
792 for (const Operand &operand : operands) {
793 ss.PutCString(" ");
794 DumpOperand(operand, ss);
795 ss.PutCString("\n");
796 }
797
798 log->PutString(ss.GetString());
799 }
800
801 return true;
802 }
803
IsCall()804 bool IsCall() override {
805 VisitInstruction();
806 return m_is_call;
807 }
808
809 protected:
810 std::weak_ptr<DisassemblerLLVMC> m_disasm_wp;
811
812 bool m_is_valid = false;
813 bool m_using_file_addr;
814 bool m_has_visited_instruction = false;
815
816 // Be conservative. If we didn't understand the instruction, say it:
817 // - Might branch
818 // - Does not have a delay slot
819 // - Is not a call
820 bool m_does_branch = true;
821 bool m_has_delay_slot = false;
822 bool m_is_call = false;
823
VisitInstruction()824 void VisitInstruction() {
825 if (m_has_visited_instruction)
826 return;
827
828 DisassemblerScope disasm(*this);
829 if (!disasm)
830 return;
831
832 DataExtractor data;
833 if (!m_opcode.GetData(data))
834 return;
835
836 bool is_alternate_isa;
837 lldb::addr_t pc = m_address.GetFileAddress();
838 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
839 GetDisasmToUse(is_alternate_isa, disasm);
840 const uint8_t *opcode_data = data.GetDataStart();
841 const size_t opcode_data_len = data.GetByteSize();
842 llvm::MCInst inst;
843 const size_t inst_size =
844 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
845 if (inst_size == 0)
846 return;
847
848 m_has_visited_instruction = true;
849 m_does_branch = mc_disasm_ptr->CanBranch(inst);
850 m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst);
851 m_is_call = mc_disasm_ptr->IsCall(inst);
852 }
853
854 private:
855 DisassemblerLLVMC::MCDisasmInstance *
GetDisasmToUse(bool & is_alternate_isa,DisassemblerScope & disasm)856 GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) {
857 is_alternate_isa = false;
858 if (disasm) {
859 if (disasm->m_alternate_disasm_up) {
860 const AddressClass address_class = GetAddressClass();
861
862 if (address_class == AddressClass::eCodeAlternateISA) {
863 is_alternate_isa = true;
864 return disasm->m_alternate_disasm_up.get();
865 }
866 }
867 return disasm->m_disasm_up.get();
868 }
869 return nullptr;
870 }
871 };
872
873 std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>
Create(const char * triple,const char * cpu,const char * features_str,unsigned flavor,DisassemblerLLVMC & owner)874 DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu,
875 const char *features_str,
876 unsigned flavor,
877 DisassemblerLLVMC &owner) {
878 using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>;
879
880 std::string Status;
881 const llvm::Target *curr_target =
882 llvm::TargetRegistry::lookupTarget(triple, Status);
883 if (!curr_target)
884 return Instance();
885
886 std::unique_ptr<llvm::MCInstrInfo> instr_info_up(
887 curr_target->createMCInstrInfo());
888 if (!instr_info_up)
889 return Instance();
890
891 std::unique_ptr<llvm::MCRegisterInfo> reg_info_up(
892 curr_target->createMCRegInfo(triple));
893 if (!reg_info_up)
894 return Instance();
895
896 std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up(
897 curr_target->createMCSubtargetInfo(triple, cpu, features_str));
898 if (!subtarget_info_up)
899 return Instance();
900
901 llvm::MCTargetOptions MCOptions;
902 std::unique_ptr<llvm::MCAsmInfo> asm_info_up(
903 curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions));
904 if (!asm_info_up)
905 return Instance();
906
907 std::unique_ptr<llvm::MCContext> context_up(
908 new llvm::MCContext(asm_info_up.get(), reg_info_up.get(), nullptr));
909 if (!context_up)
910 return Instance();
911
912 std::unique_ptr<llvm::MCDisassembler> disasm_up(
913 curr_target->createMCDisassembler(*subtarget_info_up, *context_up));
914 if (!disasm_up)
915 return Instance();
916
917 std::unique_ptr<llvm::MCRelocationInfo> rel_info_up(
918 curr_target->createMCRelocationInfo(triple, *context_up));
919 if (!rel_info_up)
920 return Instance();
921
922 std::unique_ptr<llvm::MCSymbolizer> symbolizer_up(
923 curr_target->createMCSymbolizer(
924 triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner,
925 context_up.get(), std::move(rel_info_up)));
926 disasm_up->setSymbolizer(std::move(symbolizer_up));
927
928 unsigned asm_printer_variant =
929 flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor;
930
931 std::unique_ptr<llvm::MCInstPrinter> instr_printer_up(
932 curr_target->createMCInstPrinter(llvm::Triple{triple},
933 asm_printer_variant, *asm_info_up,
934 *instr_info_up, *reg_info_up));
935 if (!instr_printer_up)
936 return Instance();
937
938 return Instance(
939 new MCDisasmInstance(std::move(instr_info_up), std::move(reg_info_up),
940 std::move(subtarget_info_up), std::move(asm_info_up),
941 std::move(context_up), std::move(disasm_up),
942 std::move(instr_printer_up)));
943 }
944
MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> && instr_info_up,std::unique_ptr<llvm::MCRegisterInfo> && reg_info_up,std::unique_ptr<llvm::MCSubtargetInfo> && subtarget_info_up,std::unique_ptr<llvm::MCAsmInfo> && asm_info_up,std::unique_ptr<llvm::MCContext> && context_up,std::unique_ptr<llvm::MCDisassembler> && disasm_up,std::unique_ptr<llvm::MCInstPrinter> && instr_printer_up)945 DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
946 std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
947 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up,
948 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
949 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
950 std::unique_ptr<llvm::MCContext> &&context_up,
951 std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
952 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up)
953 : m_instr_info_up(std::move(instr_info_up)),
954 m_reg_info_up(std::move(reg_info_up)),
955 m_subtarget_info_up(std::move(subtarget_info_up)),
956 m_asm_info_up(std::move(asm_info_up)),
957 m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)),
958 m_instr_printer_up(std::move(instr_printer_up)) {
959 assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up &&
960 m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up);
961 }
962
GetMCInst(const uint8_t * opcode_data,size_t opcode_data_len,lldb::addr_t pc,llvm::MCInst & mc_inst) const963 uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
964 const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,
965 llvm::MCInst &mc_inst) const {
966 llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);
967 llvm::MCDisassembler::DecodeStatus status;
968
969 uint64_t new_inst_size;
970 status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc,
971 llvm::nulls());
972 if (status == llvm::MCDisassembler::Success)
973 return new_inst_size;
974 else
975 return 0;
976 }
977
PrintMCInst(llvm::MCInst & mc_inst,std::string & inst_string,std::string & comments_string)978 void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(
979 llvm::MCInst &mc_inst, std::string &inst_string,
980 std::string &comments_string) {
981 llvm::raw_string_ostream inst_stream(inst_string);
982 llvm::raw_string_ostream comments_stream(comments_string);
983
984 m_instr_printer_up->setCommentStream(comments_stream);
985 m_instr_printer_up->printInst(&mc_inst, 0, llvm::StringRef(),
986 *m_subtarget_info_up, inst_stream);
987 m_instr_printer_up->setCommentStream(llvm::nulls());
988 comments_stream.flush();
989
990 static std::string g_newlines("\r\n");
991
992 for (size_t newline_pos = 0;
993 (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) !=
994 comments_string.npos;
995 /**/) {
996 comments_string.replace(comments_string.begin() + newline_pos,
997 comments_string.begin() + newline_pos + 1, 1, ' ');
998 }
999 }
1000
SetStyle(bool use_hex_immed,HexImmediateStyle hex_style)1001 void DisassemblerLLVMC::MCDisasmInstance::SetStyle(
1002 bool use_hex_immed, HexImmediateStyle hex_style) {
1003 m_instr_printer_up->setPrintImmHex(use_hex_immed);
1004 switch (hex_style) {
1005 case eHexStyleC:
1006 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C);
1007 break;
1008 case eHexStyleAsm:
1009 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm);
1010 break;
1011 }
1012 }
1013
CanBranch(llvm::MCInst & mc_inst) const1014 bool DisassemblerLLVMC::MCDisasmInstance::CanBranch(
1015 llvm::MCInst &mc_inst) const {
1016 return m_instr_info_up->get(mc_inst.getOpcode())
1017 .mayAffectControlFlow(mc_inst, *m_reg_info_up);
1018 }
1019
HasDelaySlot(llvm::MCInst & mc_inst) const1020 bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot(
1021 llvm::MCInst &mc_inst) const {
1022 return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot();
1023 }
1024
IsCall(llvm::MCInst & mc_inst) const1025 bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
1026 return m_instr_info_up->get(mc_inst.getOpcode()).isCall();
1027 }
1028
DisassemblerLLVMC(const ArchSpec & arch,const char * flavor_string)1029 DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
1030 const char *flavor_string)
1031 : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
1032 m_data_from_file(false) {
1033 if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
1034 m_flavor.assign("default");
1035 }
1036
1037 unsigned flavor = ~0U;
1038 llvm::Triple triple = arch.GetTriple();
1039
1040 // So far the only supported flavor is "intel" on x86. The base class will
1041 // set this correctly coming in.
1042 if (triple.getArch() == llvm::Triple::x86 ||
1043 triple.getArch() == llvm::Triple::x86_64) {
1044 if (m_flavor == "intel") {
1045 flavor = 1;
1046 } else if (m_flavor == "att") {
1047 flavor = 0;
1048 }
1049 }
1050
1051 ArchSpec thumb_arch(arch);
1052 if (triple.getArch() == llvm::Triple::arm) {
1053 std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str());
1054 // Replace "arm" with "thumb" so we get all thumb variants correct
1055 if (thumb_arch_name.size() > 3) {
1056 thumb_arch_name.erase(0, 3);
1057 thumb_arch_name.insert(0, "thumb");
1058 } else {
1059 thumb_arch_name = "thumbv8.2a";
1060 }
1061 thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name));
1062 }
1063
1064 // If no sub architecture specified then use the most recent arm architecture
1065 // so the disassembler will return all instruction. Without it we will see a
1066 // lot of unknow opcode in case the code uses instructions which are not
1067 // available in the oldest arm version (used when no sub architecture is
1068 // specified)
1069 if (triple.getArch() == llvm::Triple::arm &&
1070 triple.getSubArch() == llvm::Triple::NoSubArch)
1071 triple.setArchName("armv8.2a");
1072
1073 std::string features_str = "";
1074 const char *triple_str = triple.getTriple().c_str();
1075
1076 // ARM Cortex M0-M7 devices only execute thumb instructions
1077 if (arch.IsAlwaysThumbInstructions()) {
1078 triple_str = thumb_arch.GetTriple().getTriple().c_str();
1079 features_str += "+fp-armv8,";
1080 }
1081
1082 const char *cpu = "";
1083
1084 switch (arch.GetCore()) {
1085 case ArchSpec::eCore_mips32:
1086 case ArchSpec::eCore_mips32el:
1087 cpu = "mips32";
1088 break;
1089 case ArchSpec::eCore_mips32r2:
1090 case ArchSpec::eCore_mips32r2el:
1091 cpu = "mips32r2";
1092 break;
1093 case ArchSpec::eCore_mips32r3:
1094 case ArchSpec::eCore_mips32r3el:
1095 cpu = "mips32r3";
1096 break;
1097 case ArchSpec::eCore_mips32r5:
1098 case ArchSpec::eCore_mips32r5el:
1099 cpu = "mips32r5";
1100 break;
1101 case ArchSpec::eCore_mips32r6:
1102 case ArchSpec::eCore_mips32r6el:
1103 cpu = "mips32r6";
1104 break;
1105 case ArchSpec::eCore_mips64:
1106 case ArchSpec::eCore_mips64el:
1107 cpu = "mips64";
1108 break;
1109 case ArchSpec::eCore_mips64r2:
1110 case ArchSpec::eCore_mips64r2el:
1111 cpu = "mips64r2";
1112 break;
1113 case ArchSpec::eCore_mips64r3:
1114 case ArchSpec::eCore_mips64r3el:
1115 cpu = "mips64r3";
1116 break;
1117 case ArchSpec::eCore_mips64r5:
1118 case ArchSpec::eCore_mips64r5el:
1119 cpu = "mips64r5";
1120 break;
1121 case ArchSpec::eCore_mips64r6:
1122 case ArchSpec::eCore_mips64r6el:
1123 cpu = "mips64r6";
1124 break;
1125 default:
1126 cpu = "";
1127 break;
1128 }
1129
1130 if (arch.IsMIPS()) {
1131 uint32_t arch_flags = arch.GetFlags();
1132 if (arch_flags & ArchSpec::eMIPSAse_msa)
1133 features_str += "+msa,";
1134 if (arch_flags & ArchSpec::eMIPSAse_dsp)
1135 features_str += "+dsp,";
1136 if (arch_flags & ArchSpec::eMIPSAse_dspr2)
1137 features_str += "+dspr2,";
1138 }
1139
1140 // If any AArch64 variant, enable the ARMv8.5 ISA with SVE extensions so we
1141 // can disassemble newer instructions.
1142 if (triple.getArch() == llvm::Triple::aarch64 ||
1143 triple.getArch() == llvm::Triple::aarch64_32)
1144 features_str += "+v8.5a,+sve2";
1145
1146 if ((triple.getArch() == llvm::Triple::aarch64 ||
1147 triple.getArch() == llvm::Triple::aarch64_32)
1148 && triple.getVendor() == llvm::Triple::Apple) {
1149 cpu = "apple-latest";
1150 }
1151
1152 // We use m_disasm_up.get() to tell whether we are valid or not, so if this
1153 // isn't good for some reason, we won't be valid and FindPlugin will fail and
1154 // we won't get used.
1155 m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(),
1156 flavor, *this);
1157
1158 llvm::Triple::ArchType llvm_arch = triple.getArch();
1159
1160 // For arm CPUs that can execute arm or thumb instructions, also create a
1161 // thumb instruction disassembler.
1162 if (llvm_arch == llvm::Triple::arm) {
1163 std::string thumb_triple(thumb_arch.GetTriple().getTriple());
1164 m_alternate_disasm_up =
1165 MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(),
1166 flavor, *this);
1167 if (!m_alternate_disasm_up)
1168 m_disasm_up.reset();
1169
1170 } else if (arch.IsMIPS()) {
1171 /* Create alternate disassembler for MIPS16 and microMIPS */
1172 uint32_t arch_flags = arch.GetFlags();
1173 if (arch_flags & ArchSpec::eMIPSAse_mips16)
1174 features_str += "+mips16,";
1175 else if (arch_flags & ArchSpec::eMIPSAse_micromips)
1176 features_str += "+micromips,";
1177
1178 m_alternate_disasm_up = MCDisasmInstance::Create(
1179 triple_str, cpu, features_str.c_str(), flavor, *this);
1180 if (!m_alternate_disasm_up)
1181 m_disasm_up.reset();
1182 }
1183 }
1184
1185 DisassemblerLLVMC::~DisassemblerLLVMC() = default;
1186
CreateInstance(const ArchSpec & arch,const char * flavor)1187 Disassembler *DisassemblerLLVMC::CreateInstance(const ArchSpec &arch,
1188 const char *flavor) {
1189 if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) {
1190 std::unique_ptr<DisassemblerLLVMC> disasm_up(
1191 new DisassemblerLLVMC(arch, flavor));
1192
1193 if (disasm_up.get() && disasm_up->IsValid())
1194 return disasm_up.release();
1195 }
1196 return nullptr;
1197 }
1198
DecodeInstructions(const Address & base_addr,const DataExtractor & data,lldb::offset_t data_offset,size_t num_instructions,bool append,bool data_from_file)1199 size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr,
1200 const DataExtractor &data,
1201 lldb::offset_t data_offset,
1202 size_t num_instructions,
1203 bool append, bool data_from_file) {
1204 if (!append)
1205 m_instruction_list.Clear();
1206
1207 if (!IsValid())
1208 return 0;
1209
1210 m_data_from_file = data_from_file;
1211 uint32_t data_cursor = data_offset;
1212 const size_t data_byte_size = data.GetByteSize();
1213 uint32_t instructions_parsed = 0;
1214 Address inst_addr(base_addr);
1215
1216 while (data_cursor < data_byte_size &&
1217 instructions_parsed < num_instructions) {
1218
1219 AddressClass address_class = AddressClass::eCode;
1220
1221 if (m_alternate_disasm_up)
1222 address_class = inst_addr.GetAddressClass();
1223
1224 InstructionSP inst_sp(
1225 new InstructionLLVMC(*this, inst_addr, address_class));
1226
1227 if (!inst_sp)
1228 break;
1229
1230 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
1231
1232 if (inst_size == 0)
1233 break;
1234
1235 m_instruction_list.Append(inst_sp);
1236 data_cursor += inst_size;
1237 inst_addr.Slide(inst_size);
1238 instructions_parsed++;
1239 }
1240
1241 return data_cursor - data_offset;
1242 }
1243
Initialize()1244 void DisassemblerLLVMC::Initialize() {
1245 PluginManager::RegisterPlugin(GetPluginNameStatic(),
1246 "Disassembler that uses LLVM MC to disassemble "
1247 "i386, x86_64, ARM, and ARM64.",
1248 CreateInstance);
1249
1250 llvm::InitializeAllTargetInfos();
1251 llvm::InitializeAllTargetMCs();
1252 llvm::InitializeAllAsmParsers();
1253 llvm::InitializeAllDisassemblers();
1254 }
1255
Terminate()1256 void DisassemblerLLVMC::Terminate() {
1257 PluginManager::UnregisterPlugin(CreateInstance);
1258 }
1259
GetPluginNameStatic()1260 ConstString DisassemblerLLVMC::GetPluginNameStatic() {
1261 static ConstString g_name("llvm-mc");
1262 return g_name;
1263 }
1264
OpInfoCallback(void * disassembler,uint64_t pc,uint64_t offset,uint64_t size,int tag_type,void * tag_bug)1265 int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc,
1266 uint64_t offset, uint64_t size,
1267 int tag_type, void *tag_bug) {
1268 return static_cast<DisassemblerLLVMC *>(disassembler)
1269 ->OpInfo(pc, offset, size, tag_type, tag_bug);
1270 }
1271
SymbolLookupCallback(void * disassembler,uint64_t value,uint64_t * type,uint64_t pc,const char ** name)1272 const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler,
1273 uint64_t value,
1274 uint64_t *type, uint64_t pc,
1275 const char **name) {
1276 return static_cast<DisassemblerLLVMC *>(disassembler)
1277 ->SymbolLookup(value, type, pc, name);
1278 }
1279
FlavorValidForArchSpec(const lldb_private::ArchSpec & arch,const char * flavor)1280 bool DisassemblerLLVMC::FlavorValidForArchSpec(
1281 const lldb_private::ArchSpec &arch, const char *flavor) {
1282 llvm::Triple triple = arch.GetTriple();
1283 if (flavor == nullptr || strcmp(flavor, "default") == 0)
1284 return true;
1285
1286 if (triple.getArch() == llvm::Triple::x86 ||
1287 triple.getArch() == llvm::Triple::x86_64) {
1288 return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0;
1289 } else
1290 return false;
1291 }
1292
IsValid() const1293 bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); }
1294
OpInfo(uint64_t PC,uint64_t Offset,uint64_t Size,int tag_type,void * tag_bug)1295 int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size,
1296 int tag_type, void *tag_bug) {
1297 switch (tag_type) {
1298 default:
1299 break;
1300 case 1:
1301 memset(tag_bug, 0, sizeof(::LLVMOpInfo1));
1302 break;
1303 }
1304 return 0;
1305 }
1306
SymbolLookup(uint64_t value,uint64_t * type_ptr,uint64_t pc,const char ** name)1307 const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
1308 uint64_t pc, const char **name) {
1309 if (*type_ptr) {
1310 if (m_exe_ctx && m_inst) {
1311 // std::string remove_this_prior_to_checkin;
1312 Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
1313 Address value_so_addr;
1314 Address pc_so_addr;
1315 if (m_inst->UsingFileAddress()) {
1316 ModuleSP module_sp(m_inst->GetAddress().GetModule());
1317 if (module_sp) {
1318 module_sp->ResolveFileAddress(value, value_so_addr);
1319 module_sp->ResolveFileAddress(pc, pc_so_addr);
1320 }
1321 } else if (target && !target->GetSectionLoadList().IsEmpty()) {
1322 target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr);
1323 target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr);
1324 }
1325
1326 SymbolContext sym_ctx;
1327 const SymbolContextItem resolve_scope =
1328 eSymbolContextFunction | eSymbolContextSymbol;
1329 if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) {
1330 pc_so_addr.GetModule()->ResolveSymbolContextForAddress(
1331 pc_so_addr, resolve_scope, sym_ctx);
1332 }
1333
1334 if (value_so_addr.IsValid() && value_so_addr.GetSection()) {
1335 StreamString ss;
1336
1337 bool format_omitting_current_func_name = false;
1338 if (sym_ctx.symbol || sym_ctx.function) {
1339 AddressRange range;
1340 if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) &&
1341 range.GetBaseAddress().IsValid() &&
1342 range.ContainsLoadAddress(value_so_addr, target)) {
1343 format_omitting_current_func_name = true;
1344 }
1345 }
1346
1347 // If the "value" address (the target address we're symbolicating) is
1348 // inside the same SymbolContext as the current instruction pc
1349 // (pc_so_addr), don't print the full function name - just print it
1350 // with DumpStyleNoFunctionName style, e.g. "<+36>".
1351 if (format_omitting_current_func_name) {
1352 value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName,
1353 Address::DumpStyleSectionNameOffset);
1354 } else {
1355 value_so_addr.Dump(
1356 &ss, target,
1357 Address::DumpStyleResolvedDescriptionNoFunctionArguments,
1358 Address::DumpStyleSectionNameOffset);
1359 }
1360
1361 if (!ss.GetString().empty()) {
1362 // If Address::Dump returned a multi-line description, most commonly
1363 // seen when we have multiple levels of inlined functions at an
1364 // address, only show the first line.
1365 std::string str = std::string(ss.GetString());
1366 size_t first_eol_char = str.find_first_of("\r\n");
1367 if (first_eol_char != std::string::npos) {
1368 str.erase(first_eol_char);
1369 }
1370 m_inst->AppendComment(str);
1371 }
1372 }
1373 }
1374 }
1375
1376 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1377 *name = nullptr;
1378 return nullptr;
1379 }
1380
1381 // PluginInterface protocol
GetPluginName()1382 ConstString DisassemblerLLVMC::GetPluginName() { return GetPluginNameStatic(); }
1383
GetPluginVersion()1384 uint32_t DisassemblerLLVMC::GetPluginVersion() { return 1; }
1385