1 //===-- Disassembler.h ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLDB_CORE_DISASSEMBLER_H 10 #define LLDB_CORE_DISASSEMBLER_H 11 12 #include "lldb/Core/Address.h" 13 #include "lldb/Core/EmulateInstruction.h" 14 #include "lldb/Core/FormatEntity.h" 15 #include "lldb/Core/Opcode.h" 16 #include "lldb/Core/PluginInterface.h" 17 #include "lldb/Interpreter/OptionValue.h" 18 #include "lldb/Symbol/LineEntry.h" 19 #include "lldb/Target/ExecutionContext.h" 20 #include "lldb/Utility/ArchSpec.h" 21 #include "lldb/Utility/ConstString.h" 22 #include "lldb/Utility/FileSpec.h" 23 #include "lldb/lldb-defines.h" 24 #include "lldb/lldb-forward.h" 25 #include "lldb/lldb-private-enumerations.h" 26 #include "lldb/lldb-types.h" 27 28 #include "llvm/ADT/StringRef.h" 29 30 #include <functional> 31 #include <map> 32 #include <memory> 33 #include <set> 34 #include <string> 35 #include <vector> 36 37 #include <stddef.h> 38 #include <stdint.h> 39 #include <stdio.h> 40 41 namespace llvm { 42 template <typename T> class SmallVectorImpl; 43 } 44 45 namespace lldb_private { 46 class AddressRange; 47 class DataExtractor; 48 class Debugger; 49 class Disassembler; 50 class Module; 51 class StackFrame; 52 class Stream; 53 class SymbolContext; 54 class SymbolContextList; 55 class Target; 56 struct RegisterInfo; 57 58 class Instruction { 59 public: 60 Instruction(const Address &address, 61 AddressClass addr_class = AddressClass::eInvalid); 62 63 virtual ~Instruction(); 64 GetAddress()65 const Address &GetAddress() const { return m_address; } 66 GetMnemonic(const ExecutionContext * exe_ctx)67 const char *GetMnemonic(const ExecutionContext *exe_ctx) { 68 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 69 return m_opcode_name.c_str(); 70 } 71 GetOperands(const ExecutionContext * exe_ctx)72 const char *GetOperands(const ExecutionContext *exe_ctx) { 73 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 74 return m_mnemonics.c_str(); 75 } 76 GetComment(const ExecutionContext * exe_ctx)77 const char *GetComment(const ExecutionContext *exe_ctx) { 78 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 79 return m_comment.c_str(); 80 } 81 82 virtual void 83 CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0; 84 85 AddressClass GetAddressClass(); 86 SetAddress(const Address & addr)87 void SetAddress(const Address &addr) { 88 // Invalidate the address class to lazily discover it if we need to. 89 m_address_class = AddressClass::eInvalid; 90 m_address = addr; 91 } 92 93 /// Dump the text representation of this Instruction to a Stream 94 /// 95 /// Print the (optional) address, (optional) bytes, opcode, 96 /// operands, and instruction comments to a stream. 97 /// 98 /// \param[in] s 99 /// The Stream to add the text to. 100 /// 101 /// \param[in] show_address 102 /// Whether the address (using disassembly_addr_format_spec formatting) 103 /// should be printed. 104 /// 105 /// \param[in] show_bytes 106 /// Whether the bytes of the assembly instruction should be printed. 107 /// 108 /// \param[in] max_opcode_byte_size 109 /// The size (in bytes) of the largest instruction in the list that 110 /// we are printing (for text justification/alignment purposes) 111 /// Only needed if show_bytes is true. 112 /// 113 /// \param[in] exe_ctx 114 /// The current execution context, if available. May be used in 115 /// the assembling of the operands+comments for this instruction. 116 /// Pass NULL if not applicable. 117 /// 118 /// \param[in] sym_ctx 119 /// The SymbolContext for this instruction. 120 /// Pass NULL if not available/computed. 121 /// Only needed if show_address is true. 122 /// 123 /// \param[in] prev_sym_ctx 124 /// The SymbolContext for the previous instruction. Depending on 125 /// the disassembly address format specification, a change in 126 /// Symbol / Function may mean that a line is printed with the new 127 /// symbol/function name. 128 /// Pass NULL if unavailable, or if this is the first instruction of 129 /// the InstructionList. 130 /// Only needed if show_address is true. 131 /// 132 /// \param[in] disassembly_addr_format 133 /// The format specification for how addresses are printed. 134 /// Only needed if show_address is true. 135 /// 136 /// \param[in] max_address_text_size 137 /// The length of the longest address string at the start of the 138 /// disassembly line that will be printed (the 139 /// Debugger::FormatDisassemblerAddress() string) 140 /// so this method can properly align the instruction opcodes. 141 /// May be 0 to indicate no indentation/alignment of the opcodes. 142 virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address, 143 bool show_bytes, const ExecutionContext *exe_ctx, 144 const SymbolContext *sym_ctx, 145 const SymbolContext *prev_sym_ctx, 146 const FormatEntity::Entry *disassembly_addr_format, 147 size_t max_address_text_size); 148 149 virtual bool DoesBranch() = 0; 150 151 virtual bool HasDelaySlot(); 152 153 bool CanSetBreakpoint (); 154 155 virtual size_t Decode(const Disassembler &disassembler, 156 const DataExtractor &data, 157 lldb::offset_t data_offset) = 0; 158 SetDescription(llvm::StringRef)159 virtual void SetDescription(llvm::StringRef) { 160 } // May be overridden in sub-classes that have descriptions. 161 162 lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream, 163 OptionValue::Type data_type); 164 165 lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream); 166 167 bool DumpEmulation(const ArchSpec &arch); 168 169 virtual bool TestEmulation(Stream *stream, const char *test_file_name); 170 171 bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton, 172 EmulateInstruction::ReadMemoryCallback read_mem_callback, 173 EmulateInstruction::WriteMemoryCallback write_mem_calback, 174 EmulateInstruction::ReadRegisterCallback read_reg_callback, 175 EmulateInstruction::WriteRegisterCallback write_reg_callback); 176 GetOpcode()177 const Opcode &GetOpcode() const { return m_opcode; } 178 179 uint32_t GetData(DataExtractor &data); 180 181 struct Operand { 182 enum class Type { 183 Invalid = 0, 184 Register, 185 Immediate, 186 Dereference, 187 Sum, 188 Product 189 } m_type = Type::Invalid; 190 std::vector<Operand> m_children; 191 lldb::addr_t m_immediate = 0; 192 ConstString m_register; 193 bool m_negative = false; 194 bool m_clobbered = false; 195 IsValidOperand196 bool IsValid() { return m_type != Type::Invalid; } 197 198 static Operand BuildRegister(ConstString &r); 199 static Operand BuildImmediate(lldb::addr_t imm, bool neg); 200 static Operand BuildImmediate(int64_t imm); 201 static Operand BuildDereference(const Operand &ref); 202 static Operand BuildSum(const Operand &lhs, const Operand &rhs); 203 static Operand BuildProduct(const Operand &lhs, const Operand &rhs); 204 }; 205 ParseOperands(llvm::SmallVectorImpl<Operand> & operands)206 virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) { 207 return false; 208 } 209 IsCall()210 virtual bool IsCall() { return false; } 211 212 protected: 213 Address m_address; // The section offset address of this instruction 214 // We include an address class in the Instruction class to 215 // allow the instruction specify the 216 // AddressClass::eCodeAlternateISA (currently used for 217 // thumb), and also to specify data (AddressClass::eData). 218 // The usual value will be AddressClass::eCode, but often 219 // when disassembling memory, you might run into data. 220 // This can help us to disassemble appropriately. 221 private: 222 AddressClass m_address_class; // Use GetAddressClass () accessor function! 223 224 protected: 225 Opcode m_opcode; // The opcode for this instruction 226 std::string m_opcode_name; 227 std::string m_mnemonics; 228 std::string m_comment; 229 bool m_calculated_strings; 230 231 void CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext * exe_ctx)232 CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) { 233 if (!m_calculated_strings) { 234 m_calculated_strings = true; 235 CalculateMnemonicOperandsAndComment(exe_ctx); 236 } 237 } 238 }; 239 240 namespace OperandMatchers { 241 std::function<bool(const Instruction::Operand &)> 242 MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base, 243 std::function<bool(const Instruction::Operand &)> left, 244 std::function<bool(const Instruction::Operand &)> right); 245 246 std::function<bool(const Instruction::Operand &)> 247 MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base, 248 std::function<bool(const Instruction::Operand &)> child); 249 250 std::function<bool(const Instruction::Operand &)> 251 MatchRegOp(const RegisterInfo &info); 252 253 std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString ®); 254 255 std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm); 256 257 std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm); 258 259 std::function<bool(const Instruction::Operand &)> 260 MatchOpType(Instruction::Operand::Type type); 261 } 262 263 class InstructionList { 264 public: 265 InstructionList(); 266 ~InstructionList(); 267 268 size_t GetSize() const; 269 270 uint32_t GetMaxOpcocdeByteSize() const; 271 272 lldb::InstructionSP GetInstructionAtIndex(size_t idx) const; 273 274 /// Get the instruction at the given address. 275 /// 276 /// \return 277 /// A valid \a InstructionSP if the address could be found, or null 278 /// otherwise. 279 lldb::InstructionSP GetInstructionAtAddress(const Address &addr); 280 281 //------------------------------------------------------------------ 282 /// Get the index of the next branch instruction. 283 /// 284 /// Given a list of instructions, find the next branch instruction 285 /// in the list by returning an index. 286 /// 287 /// @param[in] start 288 /// The instruction index of the first instruction to check. 289 /// 290 /// @param[in] ignore_calls 291 /// It true, then fine the first branch instruction that isn't 292 /// a function call (a branch that calls and returns to the next 293 /// instruction). If false, find the instruction index of any 294 /// branch in the list. 295 /// 296 /// @param[out] found_calls 297 /// If non-null, this will be set to true if any calls were found in 298 /// extending the range. 299 /// 300 /// @return 301 /// The instruction index of the first branch that is at or past 302 /// \a start. Returns UINT32_MAX if no matching branches are 303 /// found. 304 //------------------------------------------------------------------ 305 uint32_t GetIndexOfNextBranchInstruction(uint32_t start, 306 bool ignore_calls, 307 bool *found_calls) const; 308 309 uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, 310 Target &target); 311 312 uint32_t GetIndexOfInstructionAtAddress(const Address &addr); 313 314 void Clear(); 315 316 void Append(lldb::InstructionSP &inst_sp); 317 318 void Dump(Stream *s, bool show_address, bool show_bytes, 319 const ExecutionContext *exe_ctx); 320 321 private: 322 typedef std::vector<lldb::InstructionSP> collection; 323 typedef collection::iterator iterator; 324 typedef collection::const_iterator const_iterator; 325 326 collection m_instructions; 327 }; 328 329 class PseudoInstruction : public Instruction { 330 public: 331 PseudoInstruction(); 332 333 ~PseudoInstruction() override; 334 335 bool DoesBranch() override; 336 337 bool HasDelaySlot() override; 338 CalculateMnemonicOperandsAndComment(const ExecutionContext * exe_ctx)339 void CalculateMnemonicOperandsAndComment( 340 const ExecutionContext *exe_ctx) override { 341 // TODO: fill this in and put opcode name into Instruction::m_opcode_name, 342 // mnemonic into Instruction::m_mnemonics, and any comment into 343 // Instruction::m_comment 344 } 345 346 size_t Decode(const Disassembler &disassembler, const DataExtractor &data, 347 lldb::offset_t data_offset) override; 348 349 void SetOpcode(size_t opcode_size, void *opcode_data); 350 351 void SetDescription(llvm::StringRef description) override; 352 353 protected: 354 std::string m_description; 355 356 PseudoInstruction(const PseudoInstruction &) = delete; 357 const PseudoInstruction &operator=(const PseudoInstruction &) = delete; 358 }; 359 360 class Disassembler : public std::enable_shared_from_this<Disassembler>, 361 public PluginInterface { 362 public: 363 enum { 364 eOptionNone = 0u, 365 eOptionShowBytes = (1u << 0), 366 eOptionRawOuput = (1u << 1), 367 eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains 368 // the current PC (mixed mode only) 369 eOptionMarkPCAddress = 370 (1u << 3) // Mark the disassembly line the contains the PC 371 }; 372 373 enum HexImmediateStyle { 374 eHexStyleC, 375 eHexStyleAsm, 376 }; 377 378 // FindPlugin should be lax about the flavor string (it is too annoying to 379 // have various internal uses of the disassembler fail because the global 380 // flavor string gets set wrong. Instead, if you get a flavor string you 381 // don't understand, use the default. Folks who care to check can use the 382 // FlavorValidForArchSpec method on the disassembler they got back. 383 static lldb::DisassemblerSP 384 FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name); 385 386 // This version will use the value in the Target settings if flavor is NULL; 387 static lldb::DisassemblerSP FindPluginForTarget(const Target &target, 388 const ArchSpec &arch, 389 const char *flavor, 390 const char *plugin_name); 391 392 struct Limit { 393 enum { Bytes, Instructions } kind; 394 lldb::addr_t value; 395 }; 396 397 static lldb::DisassemblerSP 398 DisassembleRange(const ArchSpec &arch, const char *plugin_name, 399 const char *flavor, Target &target, 400 const AddressRange &disasm_range, bool prefer_file_cache); 401 402 static lldb::DisassemblerSP 403 DisassembleBytes(const ArchSpec &arch, const char *plugin_name, 404 const char *flavor, const Address &start, const void *bytes, 405 size_t length, uint32_t max_num_instructions, 406 bool data_from_file); 407 408 static bool Disassemble(Debugger &debugger, const ArchSpec &arch, 409 const char *plugin_name, const char *flavor, 410 const ExecutionContext &exe_ctx, const Address &start, 411 Limit limit, bool mixed_source_and_assembly, 412 uint32_t num_mixed_context_lines, uint32_t options, 413 Stream &strm); 414 415 static bool Disassemble(Debugger &debugger, const ArchSpec &arch, 416 StackFrame &frame, Stream &strm); 417 418 // Constructors and Destructors 419 Disassembler(const ArchSpec &arch, const char *flavor); 420 ~Disassembler() override; 421 422 void PrintInstructions(Debugger &debugger, const ArchSpec &arch, 423 const ExecutionContext &exe_ctx, 424 bool mixed_source_and_assembly, 425 uint32_t num_mixed_context_lines, uint32_t options, 426 Stream &strm); 427 428 size_t ParseInstructions(Target &target, Address address, Limit limit, 429 Stream *error_strm_ptr, bool prefer_file_cache); 430 431 virtual size_t DecodeInstructions(const Address &base_addr, 432 const DataExtractor &data, 433 lldb::offset_t data_offset, 434 size_t num_instructions, bool append, 435 bool data_from_file) = 0; 436 437 InstructionList &GetInstructionList(); 438 439 const InstructionList &GetInstructionList() const; 440 GetArchitecture()441 const ArchSpec &GetArchitecture() const { return m_arch; } 442 GetFlavor()443 const char *GetFlavor() const { return m_flavor.c_str(); } 444 445 virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch, 446 const char *flavor) = 0; 447 448 protected: 449 // SourceLine and SourceLinesToDisplay structures are only used in the mixed 450 // source and assembly display methods internal to this class. 451 452 struct SourceLine { 453 FileSpec file; 454 uint32_t line; 455 uint32_t column; 456 SourceLineSourceLine457 SourceLine() : file(), line(LLDB_INVALID_LINE_NUMBER), column(0) {} 458 459 bool operator==(const SourceLine &rhs) const { 460 return file == rhs.file && line == rhs.line && rhs.column == column; 461 } 462 463 bool operator!=(const SourceLine &rhs) const { 464 return file != rhs.file || line != rhs.line || column != rhs.column; 465 } 466 IsValidSourceLine467 bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; } 468 }; 469 470 struct SourceLinesToDisplay { 471 std::vector<SourceLine> lines; 472 473 // index of the "current" source line, if we want to highlight that when 474 // displaying the source lines. (as opposed to the surrounding source 475 // lines provided to give context) 476 size_t current_source_line; 477 478 // Whether to print a blank line at the end of the source lines. 479 bool print_source_context_end_eol; 480 SourceLinesToDisplaySourceLinesToDisplay481 SourceLinesToDisplay() 482 : lines(), current_source_line(-1), print_source_context_end_eol(true) { 483 } 484 }; 485 486 // Get the function's declaration line number, hopefully a line number 487 // earlier than the opening curly brace at the start of the function body. 488 static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc); 489 490 // Add the provided SourceLine to the map of filenames-to-source-lines-seen. 491 static void AddLineToSourceLineTables( 492 SourceLine &line, 493 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen); 494 495 // Given a source line, determine if we should print it when we're doing 496 // mixed source & assembly output. We're currently using the 497 // target.process.thread.step-avoid-regexp setting (which is used for 498 // stepping over inlined STL functions by default) to determine what source 499 // lines to avoid showing. 500 // 501 // Returns true if this source line should be elided (if the source line 502 // should not be displayed). 503 static bool 504 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, 505 const SymbolContext &sc, SourceLine &line); 506 507 static bool ElideMixedSourceAndDisassemblyLine(const ExecutionContext & exe_ctx,const SymbolContext & sc,LineEntry & line)508 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, 509 const SymbolContext &sc, LineEntry &line) { 510 SourceLine sl; 511 sl.file = line.file; 512 sl.line = line.line; 513 sl.column = line.column; 514 return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl); 515 }; 516 517 // Classes that inherit from Disassembler can see and modify these 518 ArchSpec m_arch; 519 InstructionList m_instruction_list; 520 lldb::addr_t m_base_addr; 521 std::string m_flavor; 522 523 private: 524 // For Disassembler only 525 Disassembler(const Disassembler &) = delete; 526 const Disassembler &operator=(const Disassembler &) = delete; 527 }; 528 529 } // namespace lldb_private 530 531 #endif // LLDB_CORE_DISASSEMBLER_H 532