• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COURGETTE_ASSEMBLY_PROGRAM_H_
6 #define COURGETTE_ASSEMBLY_PROGRAM_H_
7 
8 #include <map>
9 #include <set>
10 #include <vector>
11 
12 #include "base/basictypes.h"
13 #include "base/memory/scoped_ptr.h"
14 
15 #include "courgette/disassembler.h"
16 #include "courgette/memory_allocator.h"
17 
18 namespace courgette {
19 
20 class EncodedProgram;
21 class Instruction;
22 
23 typedef NoThrowBuffer<Instruction*> InstructionVector;
24 
25 // A Label is a symbolic reference to an address.  Unlike a conventional
26 // assembly language, we always know the address.  The address will later be
27 // stored in a table and the Label will be replaced with the index into the
28 // table.
29 //
30 // TODO(sra): Make fields private and add setters and getters.
31 class Label {
32  public:
33   static const int kNoIndex = -1;
Label()34   Label() : rva_(0), index_(kNoIndex), count_(0) {}
Label(RVA rva)35   explicit Label(RVA rva) : rva_(rva), index_(kNoIndex), count_(0) {}
36 
37   RVA rva_;    // Address referred to by the label.
38   int index_;  // Index of address in address table, kNoIndex until assigned.
39   int count_;
40 };
41 
42 typedef std::map<RVA, Label*> RVAToLabel;
43 
44 // An AssemblyProgram is the result of disassembling an executable file.
45 //
46 // * The disassembler creates labels in the AssemblyProgram and emits
47 //   'Instructions'.
48 // * The disassembler then calls DefaultAssignIndexes to assign
49 //   addresses to positions in the address tables.
50 // * [Optional step]
51 // * At this point the AssemblyProgram can be converted into an
52 //   EncodedProgram and serialized to an output stream.
53 // * Later, the EncodedProgram can be deserialized and assembled into
54 //   the original file.
55 //
56 // The optional step is to modify the AssemblyProgram.  One form of modification
57 // is to assign indexes in such a way as to make the EncodedProgram for this
58 // AssemblyProgram look more like the EncodedProgram for some other
59 // AssemblyProgram.  The modification process should call UnassignIndexes, do
60 // its own assignment, and then call AssignRemainingIndexes to ensure all
61 // indexes are assigned.
62 //
63 class AssemblyProgram {
64  public:
65   explicit AssemblyProgram(ExecutableType kind);
66   ~AssemblyProgram();
67 
kind()68   ExecutableType kind() const { return kind_; }
69 
set_image_base(uint64 image_base)70   void set_image_base(uint64 image_base) { image_base_ = image_base; }
71 
72   // Instructions will be assembled in the order they are emitted.
73 
74   // Generates an entire base relocation table.
75   CheckBool EmitPeRelocsInstruction() WARN_UNUSED_RESULT;
76 
77   // Generates an ELF style relocation table for X86.
78   CheckBool EmitElfRelocationInstruction() WARN_UNUSED_RESULT;
79 
80   // Generates an ELF style relocation table for ARM.
81   CheckBool EmitElfARMRelocationInstruction() WARN_UNUSED_RESULT;
82 
83   // Following instruction will be assembled at address 'rva'.
84   CheckBool EmitOriginInstruction(RVA rva) WARN_UNUSED_RESULT;
85 
86   // Generates a single byte of data or machine instruction.
87   CheckBool EmitByteInstruction(uint8 byte) WARN_UNUSED_RESULT;
88 
89   // Generates multiple bytes of data or machine instructions.
90   CheckBool EmitBytesInstruction(const uint8* value, uint32 len)
91       WARN_UNUSED_RESULT;
92 
93   // Generates 4-byte relative reference to address of 'label'.
94   CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT;
95 
96   // Generates 4-byte relative reference to address of 'label' for
97   // ARM.
98   CheckBool EmitRel32ARM(uint16 op, Label* label, const uint8* arm_op,
99                          uint16 op_size) WARN_UNUSED_RESULT;
100 
101   // Generates 4-byte absolute reference to address of 'label'.
102   CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT;
103 
104   // Looks up a label or creates a new one.  Might return NULL.
105   Label* FindOrMakeAbs32Label(RVA rva);
106 
107   // Looks up a label or creates a new one.  Might return NULL.
108   Label* FindOrMakeRel32Label(RVA rva);
109 
110   void DefaultAssignIndexes();
111   void UnassignIndexes();
112   void AssignRemainingIndexes();
113 
114   EncodedProgram* Encode() const;
115 
116   // Accessor for instruction list.
instructions()117   const InstructionVector& instructions() const {
118     return instructions_;
119   }
120 
121   // Returns the label if the instruction contains and absolute address,
122   // otherwise returns NULL.
123   Label* InstructionAbs32Label(const Instruction* instruction) const;
124 
125   // Returns the label if the instruction contains and rel32 offset,
126   // otherwise returns NULL.
127   Label* InstructionRel32Label(const Instruction* instruction) const;
128 
129   // Trim underused labels
130   CheckBool TrimLabels();
131 
132   void PrintLabelCounts(RVAToLabel* labels);
133   void CountRel32ARM();
134 
135  private:
136   ExecutableType kind_;
137 
138   CheckBool Emit(Instruction* instruction) WARN_UNUSED_RESULT;
139 
140   static const int kLabelLowerLimit;
141 
142   // Looks up a label or creates a new one.  Might return NULL.
143   Label* FindLabel(RVA rva, RVAToLabel* labels);
144 
145   // Helper methods for the public versions.
146   static void UnassignIndexes(RVAToLabel* labels);
147   static void DefaultAssignIndexes(RVAToLabel* labels);
148   static void AssignRemainingIndexes(RVAToLabel* labels);
149 
150   // Sharing instructions that emit a single byte saves a lot of space.
151   Instruction* GetByteInstruction(uint8 byte);
152   scoped_ptr<Instruction*[]> byte_instruction_cache_;
153 
154   uint64 image_base_;  // Desired or mandated base address of image.
155 
156   InstructionVector instructions_;  // All the instructions in program.
157 
158   // These are lookup maps to find the label associated with a given address.
159   // We have separate label spaces for addresses referenced by rel32 labels and
160   // abs32 labels.  This is somewhat arbitrary.
161   RVAToLabel rel32_labels_;
162   RVAToLabel abs32_labels_;
163 
164   DISALLOW_COPY_AND_ASSIGN(AssemblyProgram);
165 };
166 
167 }  // namespace courgette
168 #endif  // COURGETTE_ASSEMBLY_PROGRAM_H_
169