1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COURGETTE_ASSEMBLY_PROGRAM_H_ 6 #define COURGETTE_ASSEMBLY_PROGRAM_H_ 7 8 #include <map> 9 #include <set> 10 #include <vector> 11 12 #include "base/basictypes.h" 13 #include "base/memory/scoped_ptr.h" 14 15 #include "courgette/disassembler.h" 16 #include "courgette/memory_allocator.h" 17 18 namespace courgette { 19 20 class EncodedProgram; 21 class Instruction; 22 23 typedef NoThrowBuffer<Instruction*> InstructionVector; 24 25 // A Label is a symbolic reference to an address. Unlike a conventional 26 // assembly language, we always know the address. The address will later be 27 // stored in a table and the Label will be replaced with the index into the 28 // table. 29 // 30 // TODO(sra): Make fields private and add setters and getters. 31 class Label { 32 public: 33 static const int kNoIndex = -1; Label()34 Label() : rva_(0), index_(kNoIndex), count_(0) {} Label(RVA rva)35 explicit Label(RVA rva) : rva_(rva), index_(kNoIndex), count_(0) {} 36 37 RVA rva_; // Address referred to by the label. 38 int index_; // Index of address in address table, kNoIndex until assigned. 39 int count_; 40 }; 41 42 typedef std::map<RVA, Label*> RVAToLabel; 43 44 // An AssemblyProgram is the result of disassembling an executable file. 45 // 46 // * The disassembler creates labels in the AssemblyProgram and emits 47 // 'Instructions'. 48 // * The disassembler then calls DefaultAssignIndexes to assign 49 // addresses to positions in the address tables. 50 // * [Optional step] 51 // * At this point the AssemblyProgram can be converted into an 52 // EncodedProgram and serialized to an output stream. 53 // * Later, the EncodedProgram can be deserialized and assembled into 54 // the original file. 55 // 56 // The optional step is to modify the AssemblyProgram. One form of modification 57 // is to assign indexes in such a way as to make the EncodedProgram for this 58 // AssemblyProgram look more like the EncodedProgram for some other 59 // AssemblyProgram. The modification process should call UnassignIndexes, do 60 // its own assignment, and then call AssignRemainingIndexes to ensure all 61 // indexes are assigned. 62 // 63 class AssemblyProgram { 64 public: 65 explicit AssemblyProgram(ExecutableType kind); 66 ~AssemblyProgram(); 67 kind()68 ExecutableType kind() const { return kind_; } 69 set_image_base(uint64 image_base)70 void set_image_base(uint64 image_base) { image_base_ = image_base; } 71 72 // Instructions will be assembled in the order they are emitted. 73 74 // Generates an entire base relocation table. 75 CheckBool EmitPeRelocsInstruction() WARN_UNUSED_RESULT; 76 77 // Generates an ELF style relocation table for X86. 78 CheckBool EmitElfRelocationInstruction() WARN_UNUSED_RESULT; 79 80 // Generates an ELF style relocation table for ARM. 81 CheckBool EmitElfARMRelocationInstruction() WARN_UNUSED_RESULT; 82 83 // Following instruction will be assembled at address 'rva'. 84 CheckBool EmitOriginInstruction(RVA rva) WARN_UNUSED_RESULT; 85 86 // Generates a single byte of data or machine instruction. 87 CheckBool EmitByteInstruction(uint8 byte) WARN_UNUSED_RESULT; 88 89 // Generates multiple bytes of data or machine instructions. 90 CheckBool EmitBytesInstruction(const uint8* value, uint32 len) 91 WARN_UNUSED_RESULT; 92 93 // Generates 4-byte relative reference to address of 'label'. 94 CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT; 95 96 // Generates 4-byte relative reference to address of 'label' for 97 // ARM. 98 CheckBool EmitRel32ARM(uint16 op, Label* label, const uint8* arm_op, 99 uint16 op_size) WARN_UNUSED_RESULT; 100 101 // Generates 4-byte absolute reference to address of 'label'. 102 CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT; 103 104 // Looks up a label or creates a new one. Might return NULL. 105 Label* FindOrMakeAbs32Label(RVA rva); 106 107 // Looks up a label or creates a new one. Might return NULL. 108 Label* FindOrMakeRel32Label(RVA rva); 109 110 void DefaultAssignIndexes(); 111 void UnassignIndexes(); 112 void AssignRemainingIndexes(); 113 114 EncodedProgram* Encode() const; 115 116 // Accessor for instruction list. instructions()117 const InstructionVector& instructions() const { 118 return instructions_; 119 } 120 121 // Returns the label if the instruction contains and absolute address, 122 // otherwise returns NULL. 123 Label* InstructionAbs32Label(const Instruction* instruction) const; 124 125 // Returns the label if the instruction contains and rel32 offset, 126 // otherwise returns NULL. 127 Label* InstructionRel32Label(const Instruction* instruction) const; 128 129 // Trim underused labels 130 CheckBool TrimLabels(); 131 132 void PrintLabelCounts(RVAToLabel* labels); 133 void CountRel32ARM(); 134 135 private: 136 ExecutableType kind_; 137 138 CheckBool Emit(Instruction* instruction) WARN_UNUSED_RESULT; 139 140 static const int kLabelLowerLimit; 141 142 // Looks up a label or creates a new one. Might return NULL. 143 Label* FindLabel(RVA rva, RVAToLabel* labels); 144 145 // Helper methods for the public versions. 146 static void UnassignIndexes(RVAToLabel* labels); 147 static void DefaultAssignIndexes(RVAToLabel* labels); 148 static void AssignRemainingIndexes(RVAToLabel* labels); 149 150 // Sharing instructions that emit a single byte saves a lot of space. 151 Instruction* GetByteInstruction(uint8 byte); 152 scoped_ptr<Instruction*[]> byte_instruction_cache_; 153 154 uint64 image_base_; // Desired or mandated base address of image. 155 156 InstructionVector instructions_; // All the instructions in program. 157 158 // These are lookup maps to find the label associated with a given address. 159 // We have separate label spaces for addresses referenced by rel32 labels and 160 // abs32 labels. This is somewhat arbitrary. 161 RVAToLabel rel32_labels_; 162 RVAToLabel abs32_labels_; 163 164 DISALLOW_COPY_AND_ASSIGN(AssemblyProgram); 165 }; 166 167 } // namespace courgette 168 #endif // COURGETTE_ASSEMBLY_PROGRAM_H_ 169