• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "courgette/assembly_program.h"
6 
7 #include <memory.h>
8 #include <algorithm>
9 #include <map>
10 #include <set>
11 #include <sstream>
12 #include <vector>
13 
14 #include "base/logging.h"
15 #include "base/memory/scoped_ptr.h"
16 
17 #include "courgette/courgette.h"
18 #include "courgette/encoded_program.h"
19 
20 namespace courgette {
21 
22 // Opcodes of simple assembly language
23 enum OP {
24   ORIGIN,         // ORIGIN <rva> - set current address for assembly.
25   MAKEPERELOCS,   // Generates a base relocation table.
26   MAKEELFRELOCS,  // Generates a base relocation table.
27   DEFBYTE,        // DEFBYTE <value> - emit a byte literal.
28   REL32,          // REL32 <label> - emit a rel32 encoded reference to 'label'.
29   ABS32,          // REL32 <label> - emit am abs32 encoded reference to 'label'.
30   REL32ARM,       // REL32ARM <c_op> <label> - arm-specific rel32 reference
31   MAKEELFARMRELOCS, // Generates a base relocation table.
32   DEFBYTES,       // Emits any number of byte literals
33   LAST_OP
34 };
35 
36 // Base class for instructions.  Because we have so many instructions we want to
37 // keep them as small as possible.  For this reason we avoid virtual functions.
38 //
39 class Instruction {
40  public:
op() const41   OP op() const { return static_cast<OP>(op_); }
42 
43  protected:
Instruction(OP op)44   explicit Instruction(OP op) : op_(op), info_(0) {}
Instruction(OP op,unsigned int info)45   Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
46 
47   uint32 op_   : 4;    // A few bits to store the OP code.
48   uint32 info_ : 28;   // Remaining bits in first word available to subclass.
49 
50  private:
51   DISALLOW_COPY_AND_ASSIGN(Instruction);
52 };
53 
54 namespace {
55 
56 // Sets the current address for the emitting instructions.
57 class OriginInstruction : public Instruction {
58  public:
OriginInstruction(RVA rva)59   explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
origin_rva() const60   RVA origin_rva() const { return rva_; }
61  private:
62   RVA  rva_;
63 };
64 
65 // Emits an entire PE base relocation table.
66 class PeRelocsInstruction : public Instruction {
67  public:
PeRelocsInstruction()68   PeRelocsInstruction() : Instruction(MAKEPERELOCS) {}
69 };
70 
71 // Emits an ELF relocation table.
72 class ElfRelocsInstruction : public Instruction {
73  public:
ElfRelocsInstruction()74   ElfRelocsInstruction() : Instruction(MAKEELFRELOCS) {}
75 };
76 
77 // Emits an ELF ARM relocation table.
78 class ElfARMRelocsInstruction : public Instruction {
79  public:
ElfARMRelocsInstruction()80   ElfARMRelocsInstruction() : Instruction(MAKEELFARMRELOCS) {}
81 };
82 
83 // Emits a single byte.
84 class ByteInstruction : public Instruction {
85  public:
ByteInstruction(uint8 value)86   explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
byte_value() const87   uint8 byte_value() const { return info_; }
88 };
89 
90 // Emits a single byte.
91 class BytesInstruction : public Instruction {
92  public:
BytesInstruction(const uint8 * values,uint32 len)93   BytesInstruction(const uint8* values, uint32 len)
94       : Instruction(DEFBYTES, 0),
95         values_(values),
96         len_(len) {}
byte_values() const97   const uint8* byte_values() const { return values_; }
len() const98   uint32 len() const { return len_; }
99 
100  private:
101   const uint8* values_;
102   uint32 len_;
103 };
104 
105 // A ABS32 to REL32 instruction emits a reference to a label's address.
106 class InstructionWithLabel : public Instruction {
107  public:
InstructionWithLabel(OP op,Label * label)108   InstructionWithLabel(OP op, Label* label)
109     : Instruction(op, 0), label_(label) {
110     if (label == NULL) NOTREACHED();
111   }
label() const112   Label* label() const { return label_; }
113  protected:
114   Label* label_;
115 };
116 
117 // An ARM REL32 instruction emits a reference to a label's address and
118 // a specially-compressed ARM op.
119 class InstructionWithLabelARM : public InstructionWithLabel {
120  public:
InstructionWithLabelARM(OP op,uint16 compressed_op,Label * label,const uint8 * arm_op,uint16 op_size)121   InstructionWithLabelARM(OP op, uint16 compressed_op, Label* label,
122                           const uint8* arm_op, uint16 op_size)
123     : InstructionWithLabel(op, label), compressed_op_(compressed_op),
124       arm_op_(arm_op), op_size_(op_size) {
125     if (label == NULL) NOTREACHED();
126   }
compressed_op() const127   uint16 compressed_op() const { return compressed_op_; }
arm_op() const128   const uint8* arm_op() const { return arm_op_; }
op_size() const129   uint16 op_size() const { return op_size_; }
130  private:
131   uint16 compressed_op_;
132   const uint8* arm_op_;
133   uint16 op_size_;
134 };
135 
136 }  // namespace
137 
AssemblyProgram(ExecutableType kind)138 AssemblyProgram::AssemblyProgram(ExecutableType kind)
139   : kind_(kind), image_base_(0) {
140 }
141 
DeleteContainedLabels(const RVAToLabel & labels)142 static void DeleteContainedLabels(const RVAToLabel& labels) {
143   for (RVAToLabel::const_iterator p = labels.begin();  p != labels.end();  ++p)
144     delete p->second;
145 }
146 
~AssemblyProgram()147 AssemblyProgram::~AssemblyProgram() {
148   for (size_t i = 0;  i < instructions_.size();  ++i) {
149     Instruction* instruction = instructions_[i];
150     if (instruction->op() != DEFBYTE)  // Will be in byte_instruction_cache_.
151       delete instruction;
152   }
153   if (byte_instruction_cache_.get()) {
154     for (size_t i = 0;  i < 256;  ++i)
155       delete byte_instruction_cache_[i];
156   }
157   DeleteContainedLabels(rel32_labels_);
158   DeleteContainedLabels(abs32_labels_);
159 }
160 
EmitPeRelocsInstruction()161 CheckBool AssemblyProgram::EmitPeRelocsInstruction() {
162   return Emit(new(std::nothrow) PeRelocsInstruction());
163 }
164 
EmitElfRelocationInstruction()165 CheckBool AssemblyProgram::EmitElfRelocationInstruction() {
166   return Emit(new(std::nothrow) ElfRelocsInstruction());
167 }
168 
EmitElfARMRelocationInstruction()169 CheckBool AssemblyProgram::EmitElfARMRelocationInstruction() {
170   return Emit(new(std::nothrow) ElfARMRelocsInstruction());
171 }
172 
EmitOriginInstruction(RVA rva)173 CheckBool AssemblyProgram::EmitOriginInstruction(RVA rva) {
174   return Emit(new(std::nothrow) OriginInstruction(rva));
175 }
176 
EmitByteInstruction(uint8 byte)177 CheckBool AssemblyProgram::EmitByteInstruction(uint8 byte) {
178   return Emit(GetByteInstruction(byte));
179 }
180 
EmitBytesInstruction(const uint8 * values,uint32 len)181 CheckBool AssemblyProgram::EmitBytesInstruction(const uint8* values,
182                                                 uint32 len) {
183   return Emit(new(std::nothrow) BytesInstruction(values, len));
184 }
185 
EmitRel32(Label * label)186 CheckBool AssemblyProgram::EmitRel32(Label* label) {
187   return Emit(new(std::nothrow) InstructionWithLabel(REL32, label));
188 }
189 
EmitRel32ARM(uint16 op,Label * label,const uint8 * arm_op,uint16 op_size)190 CheckBool AssemblyProgram::EmitRel32ARM(uint16 op, Label* label,
191                                         const uint8* arm_op, uint16 op_size) {
192   return Emit(new(std::nothrow) InstructionWithLabelARM(REL32ARM, op, label,
193                                                         arm_op, op_size));
194 }
195 
EmitAbs32(Label * label)196 CheckBool AssemblyProgram::EmitAbs32(Label* label) {
197   return Emit(new(std::nothrow) InstructionWithLabel(ABS32, label));
198 }
199 
FindOrMakeAbs32Label(RVA rva)200 Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
201   return FindLabel(rva, &abs32_labels_);
202 }
203 
FindOrMakeRel32Label(RVA rva)204 Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
205   return FindLabel(rva, &rel32_labels_);
206 }
207 
DefaultAssignIndexes()208 void AssemblyProgram::DefaultAssignIndexes() {
209   DefaultAssignIndexes(&abs32_labels_);
210   DefaultAssignIndexes(&rel32_labels_);
211 }
212 
UnassignIndexes()213 void AssemblyProgram::UnassignIndexes() {
214   UnassignIndexes(&abs32_labels_);
215   UnassignIndexes(&rel32_labels_);
216 }
217 
AssignRemainingIndexes()218 void AssemblyProgram::AssignRemainingIndexes() {
219   AssignRemainingIndexes(&abs32_labels_);
220   AssignRemainingIndexes(&rel32_labels_);
221 }
222 
InstructionAbs32Label(const Instruction * instruction) const223 Label* AssemblyProgram::InstructionAbs32Label(
224     const Instruction* instruction) const {
225   if (instruction->op() == ABS32)
226     return static_cast<const InstructionWithLabel*>(instruction)->label();
227   return NULL;
228 }
229 
InstructionRel32Label(const Instruction * instruction) const230 Label* AssemblyProgram::InstructionRel32Label(
231     const Instruction* instruction) const {
232   if (instruction->op() == REL32 || instruction->op() == REL32ARM) {
233     Label* label =
234         static_cast<const InstructionWithLabel*>(instruction)->label();
235     return label;
236   }
237   return NULL;
238 }
239 
Emit(Instruction * instruction)240 CheckBool AssemblyProgram::Emit(Instruction* instruction) {
241   if (!instruction)
242     return false;
243   bool ok = instructions_.push_back(instruction);
244   if (!ok)
245     delete instruction;
246   return ok;
247 }
248 
FindLabel(RVA rva,RVAToLabel * labels)249 Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
250   Label*& slot = (*labels)[rva];
251   if (slot == NULL) {
252     slot = new(std::nothrow) Label(rva);
253   }
254   slot->count_++;
255   return slot;
256 }
257 
UnassignIndexes(RVAToLabel * labels)258 void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
259   for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
260     Label* current = p->second;
261     current->index_ = Label::kNoIndex;
262   }
263 }
264 
265 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
266 // address order.
267 //
DefaultAssignIndexes(RVAToLabel * labels)268 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
269   int index = 0;
270   for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
271     Label* current = p->second;
272     if (current->index_ != Label::kNoIndex)
273       NOTREACHED();
274     current->index_ = index;
275     ++index;
276   }
277 }
278 
279 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
280 // yet assigned an index.
281 //
AssignRemainingIndexes(RVAToLabel * labels)282 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
283   // An address table compresses best when each index is associated with an
284   // address that is slight larger than the previous index.
285 
286   // First see which indexes have not been used.  The 'available' vector could
287   // grow even bigger, but the number of addresses is a better starting size
288   // than empty.
289   std::vector<bool> available(labels->size(), true);
290   int used = 0;
291 
292   for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
293     int index = p->second->index_;
294     if (index != Label::kNoIndex) {
295       while (static_cast<size_t>(index) >= available.size())
296         available.push_back(true);
297       available.at(index) = false;
298       ++used;
299     }
300   }
301 
302   VLOG(1) << used << " of " << labels->size() << " labels pre-assigned";
303 
304   // Are there any unused labels that happen to be adjacent following a used
305   // label?
306   //
307   int fill_forward_count = 0;
308   Label* prev = 0;
309   for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
310     Label* current = p->second;
311     if (current->index_ == Label::kNoIndex) {
312       int index = 0;
313       if (prev  &&  prev->index_ != Label::kNoIndex)
314         index = prev->index_ + 1;
315       if (index < static_cast<int>(available.size()) && available.at(index)) {
316         current->index_ = index;
317         available.at(index) = false;
318         ++fill_forward_count;
319       }
320     }
321     prev = current;
322   }
323 
324   // Are there any unused labels that happen to be adjacent preceeding a used
325   // label?
326   //
327   int fill_backward_count = 0;
328   prev = 0;
329   for (RVAToLabel::reverse_iterator p = labels->rbegin();
330        p != labels->rend();
331        ++p) {
332     Label* current = p->second;
333     if (current->index_ == Label::kNoIndex) {
334       int prev_index;
335       if (prev)
336         prev_index = prev->index_;
337       else
338         prev_index = static_cast<uint32>(available.size());
339       if (prev_index != 0  &&
340           prev_index != Label::kNoIndex  &&
341           available.at(prev_index - 1)) {
342         current->index_ = prev_index - 1;
343         available.at(current->index_) = false;
344         ++fill_backward_count;
345       }
346     }
347     prev = current;
348   }
349 
350   // Fill in any remaining indexes
351   int fill_infill_count = 0;
352   int index = 0;
353   for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
354     Label* current = p->second;
355     if (current->index_ == Label::kNoIndex) {
356       while (!available.at(index)) {
357         ++index;
358       }
359       current->index_ = index;
360       available.at(index) = false;
361       ++index;
362       ++fill_infill_count;
363     }
364   }
365 
366   VLOG(1) << "  fill forward " << fill_forward_count
367           << "  backward " << fill_backward_count
368           << "  infill " << fill_infill_count;
369 }
370 
371 typedef CheckBool (EncodedProgram::*DefineLabelMethod)(int index, RVA value);
372 
373 #if defined(OS_WIN)
374 __declspec(noinline)
375 #endif
DefineLabels(const RVAToLabel & labels,EncodedProgram * encoded_format,DefineLabelMethod define_label)376 static CheckBool DefineLabels(const RVAToLabel& labels,
377                               EncodedProgram* encoded_format,
378                               DefineLabelMethod define_label) {
379   bool ok = true;
380   for (RVAToLabel::const_iterator p = labels.begin();
381        ok && p != labels.end();
382        ++p) {
383     Label* label = p->second;
384     ok = (encoded_format->*define_label)(label->index_, label->rva_);
385   }
386   return ok;
387 }
388 
Encode() const389 EncodedProgram* AssemblyProgram::Encode() const {
390   scoped_ptr<EncodedProgram> encoded(new(std::nothrow) EncodedProgram());
391   if (!encoded.get())
392     return NULL;
393 
394   encoded->set_image_base(image_base_);
395 
396   if (!DefineLabels(abs32_labels_, encoded.get(),
397                     &EncodedProgram::DefineAbs32Label) ||
398       !DefineLabels(rel32_labels_, encoded.get(),
399                     &EncodedProgram::DefineRel32Label)) {
400     return NULL;
401   }
402 
403   encoded->EndLabels();
404 
405   for (size_t i = 0;  i < instructions_.size();  ++i) {
406     Instruction* instruction = instructions_[i];
407 
408     switch (instruction->op()) {
409       case ORIGIN: {
410         OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
411         if (!encoded->AddOrigin(org->origin_rva()))
412           return NULL;
413         break;
414       }
415       case DEFBYTE: {
416         uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
417         if (!encoded->AddCopy(1, &b))
418           return NULL;
419         break;
420       }
421       case DEFBYTES: {
422         const uint8* byte_values =
423           static_cast<BytesInstruction*>(instruction)->byte_values();
424         uint32 len = static_cast<BytesInstruction*>(instruction)->len();
425 
426         if (!encoded->AddCopy(len, byte_values))
427           return NULL;
428         break;
429       }
430       case REL32: {
431         Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
432         if (!encoded->AddRel32(label->index_))
433           return NULL;
434         break;
435       }
436       case REL32ARM: {
437         Label* label =
438             static_cast<InstructionWithLabelARM*>(instruction)->label();
439         uint16 compressed_op =
440           static_cast<InstructionWithLabelARM*>(instruction)->
441           compressed_op();
442         if (!encoded->AddRel32ARM(compressed_op, label->index_))
443           return NULL;
444         break;
445       }
446       case ABS32: {
447         Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
448         if (!encoded->AddAbs32(label->index_))
449           return NULL;
450         break;
451       }
452       case MAKEPERELOCS: {
453         if (!encoded->AddPeMakeRelocs(kind_))
454           return NULL;
455         break;
456       }
457       case MAKEELFRELOCS: {
458         if (!encoded->AddElfMakeRelocs())
459           return NULL;
460         break;
461       }
462       case MAKEELFARMRELOCS: {
463         if (!encoded->AddElfARMMakeRelocs())
464           return NULL;
465         break;
466       }
467       default: {
468         NOTREACHED() << "Unknown Insn OP kind";
469       }
470     }
471   }
472 
473   return encoded.release();
474 }
475 
GetByteInstruction(uint8 byte)476 Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
477   if (!byte_instruction_cache_.get()) {
478     byte_instruction_cache_.reset(new(std::nothrow) Instruction*[256]);
479     if (!byte_instruction_cache_.get())
480       return NULL;
481 
482     for (int i = 0; i < 256; ++i) {
483       byte_instruction_cache_[i] =
484           new(std::nothrow) ByteInstruction(static_cast<uint8>(i));
485       if (!byte_instruction_cache_[i]) {
486         for (int j = 0; j < i; ++j)
487           delete byte_instruction_cache_[j];
488         byte_instruction_cache_.reset();
489         return NULL;
490       }
491     }
492   }
493 
494   return byte_instruction_cache_[byte];
495 }
496 
497 // Chosen empirically to give the best reduction in payload size for
498 // an update from daisy_3701.98.0 to daisy_4206.0.0.
499 const int AssemblyProgram::kLabelLowerLimit = 5;
500 
TrimLabels()501 CheckBool AssemblyProgram::TrimLabels() {
502   // For now only trim for ARM binaries
503   if (kind() != EXE_ELF_32_ARM)
504     return true;
505 
506   int lower_limit = kLabelLowerLimit;
507 
508   VLOG(1) << "TrimLabels: threshold " << lower_limit;
509 
510   // Remove underused labels from the list of labels
511   RVAToLabel::iterator it = rel32_labels_.begin();
512   while (it != rel32_labels_.end()) {
513     if (it->second->count_ <= lower_limit) {
514       rel32_labels_.erase(it++);
515     } else {
516       ++it;
517     }
518   }
519 
520   // Walk through the list of instructions, replacing trimmed labels
521   // with the original machine instruction
522   for (size_t i = 0; i < instructions_.size(); ++i) {
523     Instruction* instruction = instructions_[i];
524     switch (instruction->op()) {
525       case REL32ARM: {
526         Label* label =
527             static_cast<InstructionWithLabelARM*>(instruction)->label();
528         if (label->count_ <= lower_limit) {
529           const uint8* arm_op =
530               static_cast<InstructionWithLabelARM*>(instruction)->arm_op();
531           uint16 op_size =
532               static_cast<InstructionWithLabelARM*>(instruction)->op_size();
533 
534           if (op_size < 1)
535             return false;
536           BytesInstruction* new_instruction =
537             new(std::nothrow) BytesInstruction(arm_op, op_size);
538           instructions_[i] = new_instruction;
539         }
540         break;
541       }
542       default:
543         break;
544     }
545   }
546 
547   return true;
548 }
549 
PrintLabelCounts(RVAToLabel * labels)550 void AssemblyProgram::PrintLabelCounts(RVAToLabel* labels) {
551   for (RVAToLabel::const_iterator p = labels->begin(); p != labels->end();
552        ++p) {
553     Label* current = p->second;
554     if (current->index_ != Label::kNoIndex)
555       printf("%d\n", current->count_);
556   }
557 }
558 
CountRel32ARM()559 void AssemblyProgram::CountRel32ARM() {
560   PrintLabelCounts(&rel32_labels_);
561 }
562 
563 ////////////////////////////////////////////////////////////////////////////////
564 
TrimLabels(AssemblyProgram * program)565 Status TrimLabels(AssemblyProgram* program) {
566   if (program->TrimLabels())
567     return C_OK;
568   else
569     return C_TRIM_FAILED;
570 }
571 
Encode(AssemblyProgram * program,EncodedProgram ** output)572 Status Encode(AssemblyProgram* program, EncodedProgram** output) {
573   *output = NULL;
574   EncodedProgram *encoded = program->Encode();
575   if (encoded) {
576     *output = encoded;
577     return C_OK;
578   } else {
579     return C_GENERAL_ERROR;
580   }
581 }
582 
583 }  // namespace courgette
584