1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "courgette/assembly_program.h"
6
7 #include <memory.h>
8 #include <algorithm>
9 #include <map>
10 #include <set>
11 #include <sstream>
12 #include <vector>
13
14 #include "base/logging.h"
15 #include "base/memory/scoped_ptr.h"
16
17 #include "courgette/courgette.h"
18 #include "courgette/encoded_program.h"
19
20 namespace courgette {
21
22 // Opcodes of simple assembly language
23 enum OP {
24 ORIGIN, // ORIGIN <rva> - set current address for assembly.
25 MAKEPERELOCS, // Generates a base relocation table.
26 MAKEELFRELOCS, // Generates a base relocation table.
27 DEFBYTE, // DEFBYTE <value> - emit a byte literal.
28 REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'.
29 ABS32, // REL32 <label> - emit am abs32 encoded reference to 'label'.
30 REL32ARM, // REL32ARM <c_op> <label> - arm-specific rel32 reference
31 MAKEELFARMRELOCS, // Generates a base relocation table.
32 DEFBYTES, // Emits any number of byte literals
33 LAST_OP
34 };
35
36 // Base class for instructions. Because we have so many instructions we want to
37 // keep them as small as possible. For this reason we avoid virtual functions.
38 //
39 class Instruction {
40 public:
op() const41 OP op() const { return static_cast<OP>(op_); }
42
43 protected:
Instruction(OP op)44 explicit Instruction(OP op) : op_(op), info_(0) {}
Instruction(OP op,unsigned int info)45 Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
46
47 uint32 op_ : 4; // A few bits to store the OP code.
48 uint32 info_ : 28; // Remaining bits in first word available to subclass.
49
50 private:
51 DISALLOW_COPY_AND_ASSIGN(Instruction);
52 };
53
54 namespace {
55
56 // Sets the current address for the emitting instructions.
57 class OriginInstruction : public Instruction {
58 public:
OriginInstruction(RVA rva)59 explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
origin_rva() const60 RVA origin_rva() const { return rva_; }
61 private:
62 RVA rva_;
63 };
64
65 // Emits an entire PE base relocation table.
66 class PeRelocsInstruction : public Instruction {
67 public:
PeRelocsInstruction()68 PeRelocsInstruction() : Instruction(MAKEPERELOCS) {}
69 };
70
71 // Emits an ELF relocation table.
72 class ElfRelocsInstruction : public Instruction {
73 public:
ElfRelocsInstruction()74 ElfRelocsInstruction() : Instruction(MAKEELFRELOCS) {}
75 };
76
77 // Emits an ELF ARM relocation table.
78 class ElfARMRelocsInstruction : public Instruction {
79 public:
ElfARMRelocsInstruction()80 ElfARMRelocsInstruction() : Instruction(MAKEELFARMRELOCS) {}
81 };
82
83 // Emits a single byte.
84 class ByteInstruction : public Instruction {
85 public:
ByteInstruction(uint8 value)86 explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
byte_value() const87 uint8 byte_value() const { return info_; }
88 };
89
90 // Emits a single byte.
91 class BytesInstruction : public Instruction {
92 public:
BytesInstruction(const uint8 * values,uint32 len)93 BytesInstruction(const uint8* values, uint32 len)
94 : Instruction(DEFBYTES, 0),
95 values_(values),
96 len_(len) {}
byte_values() const97 const uint8* byte_values() const { return values_; }
len() const98 uint32 len() const { return len_; }
99
100 private:
101 const uint8* values_;
102 uint32 len_;
103 };
104
105 // A ABS32 to REL32 instruction emits a reference to a label's address.
106 class InstructionWithLabel : public Instruction {
107 public:
InstructionWithLabel(OP op,Label * label)108 InstructionWithLabel(OP op, Label* label)
109 : Instruction(op, 0), label_(label) {
110 if (label == NULL) NOTREACHED();
111 }
label() const112 Label* label() const { return label_; }
113 protected:
114 Label* label_;
115 };
116
117 // An ARM REL32 instruction emits a reference to a label's address and
118 // a specially-compressed ARM op.
119 class InstructionWithLabelARM : public InstructionWithLabel {
120 public:
InstructionWithLabelARM(OP op,uint16 compressed_op,Label * label,const uint8 * arm_op,uint16 op_size)121 InstructionWithLabelARM(OP op, uint16 compressed_op, Label* label,
122 const uint8* arm_op, uint16 op_size)
123 : InstructionWithLabel(op, label), compressed_op_(compressed_op),
124 arm_op_(arm_op), op_size_(op_size) {
125 if (label == NULL) NOTREACHED();
126 }
compressed_op() const127 uint16 compressed_op() const { return compressed_op_; }
arm_op() const128 const uint8* arm_op() const { return arm_op_; }
op_size() const129 uint16 op_size() const { return op_size_; }
130 private:
131 uint16 compressed_op_;
132 const uint8* arm_op_;
133 uint16 op_size_;
134 };
135
136 } // namespace
137
AssemblyProgram(ExecutableType kind)138 AssemblyProgram::AssemblyProgram(ExecutableType kind)
139 : kind_(kind), image_base_(0) {
140 }
141
DeleteContainedLabels(const RVAToLabel & labels)142 static void DeleteContainedLabels(const RVAToLabel& labels) {
143 for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p)
144 delete p->second;
145 }
146
~AssemblyProgram()147 AssemblyProgram::~AssemblyProgram() {
148 for (size_t i = 0; i < instructions_.size(); ++i) {
149 Instruction* instruction = instructions_[i];
150 if (instruction->op() != DEFBYTE) // Will be in byte_instruction_cache_.
151 delete instruction;
152 }
153 if (byte_instruction_cache_.get()) {
154 for (size_t i = 0; i < 256; ++i)
155 delete byte_instruction_cache_[i];
156 }
157 DeleteContainedLabels(rel32_labels_);
158 DeleteContainedLabels(abs32_labels_);
159 }
160
EmitPeRelocsInstruction()161 CheckBool AssemblyProgram::EmitPeRelocsInstruction() {
162 return Emit(new(std::nothrow) PeRelocsInstruction());
163 }
164
EmitElfRelocationInstruction()165 CheckBool AssemblyProgram::EmitElfRelocationInstruction() {
166 return Emit(new(std::nothrow) ElfRelocsInstruction());
167 }
168
EmitElfARMRelocationInstruction()169 CheckBool AssemblyProgram::EmitElfARMRelocationInstruction() {
170 return Emit(new(std::nothrow) ElfARMRelocsInstruction());
171 }
172
EmitOriginInstruction(RVA rva)173 CheckBool AssemblyProgram::EmitOriginInstruction(RVA rva) {
174 return Emit(new(std::nothrow) OriginInstruction(rva));
175 }
176
EmitByteInstruction(uint8 byte)177 CheckBool AssemblyProgram::EmitByteInstruction(uint8 byte) {
178 return Emit(GetByteInstruction(byte));
179 }
180
EmitBytesInstruction(const uint8 * values,uint32 len)181 CheckBool AssemblyProgram::EmitBytesInstruction(const uint8* values,
182 uint32 len) {
183 return Emit(new(std::nothrow) BytesInstruction(values, len));
184 }
185
EmitRel32(Label * label)186 CheckBool AssemblyProgram::EmitRel32(Label* label) {
187 return Emit(new(std::nothrow) InstructionWithLabel(REL32, label));
188 }
189
EmitRel32ARM(uint16 op,Label * label,const uint8 * arm_op,uint16 op_size)190 CheckBool AssemblyProgram::EmitRel32ARM(uint16 op, Label* label,
191 const uint8* arm_op, uint16 op_size) {
192 return Emit(new(std::nothrow) InstructionWithLabelARM(REL32ARM, op, label,
193 arm_op, op_size));
194 }
195
EmitAbs32(Label * label)196 CheckBool AssemblyProgram::EmitAbs32(Label* label) {
197 return Emit(new(std::nothrow) InstructionWithLabel(ABS32, label));
198 }
199
FindOrMakeAbs32Label(RVA rva)200 Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
201 return FindLabel(rva, &abs32_labels_);
202 }
203
FindOrMakeRel32Label(RVA rva)204 Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
205 return FindLabel(rva, &rel32_labels_);
206 }
207
DefaultAssignIndexes()208 void AssemblyProgram::DefaultAssignIndexes() {
209 DefaultAssignIndexes(&abs32_labels_);
210 DefaultAssignIndexes(&rel32_labels_);
211 }
212
UnassignIndexes()213 void AssemblyProgram::UnassignIndexes() {
214 UnassignIndexes(&abs32_labels_);
215 UnassignIndexes(&rel32_labels_);
216 }
217
AssignRemainingIndexes()218 void AssemblyProgram::AssignRemainingIndexes() {
219 AssignRemainingIndexes(&abs32_labels_);
220 AssignRemainingIndexes(&rel32_labels_);
221 }
222
InstructionAbs32Label(const Instruction * instruction) const223 Label* AssemblyProgram::InstructionAbs32Label(
224 const Instruction* instruction) const {
225 if (instruction->op() == ABS32)
226 return static_cast<const InstructionWithLabel*>(instruction)->label();
227 return NULL;
228 }
229
InstructionRel32Label(const Instruction * instruction) const230 Label* AssemblyProgram::InstructionRel32Label(
231 const Instruction* instruction) const {
232 if (instruction->op() == REL32 || instruction->op() == REL32ARM) {
233 Label* label =
234 static_cast<const InstructionWithLabel*>(instruction)->label();
235 return label;
236 }
237 return NULL;
238 }
239
Emit(Instruction * instruction)240 CheckBool AssemblyProgram::Emit(Instruction* instruction) {
241 if (!instruction)
242 return false;
243 bool ok = instructions_.push_back(instruction);
244 if (!ok)
245 delete instruction;
246 return ok;
247 }
248
FindLabel(RVA rva,RVAToLabel * labels)249 Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
250 Label*& slot = (*labels)[rva];
251 if (slot == NULL) {
252 slot = new(std::nothrow) Label(rva);
253 }
254 slot->count_++;
255 return slot;
256 }
257
UnassignIndexes(RVAToLabel * labels)258 void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
259 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
260 Label* current = p->second;
261 current->index_ = Label::kNoIndex;
262 }
263 }
264
265 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
266 // address order.
267 //
DefaultAssignIndexes(RVAToLabel * labels)268 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
269 int index = 0;
270 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
271 Label* current = p->second;
272 if (current->index_ != Label::kNoIndex)
273 NOTREACHED();
274 current->index_ = index;
275 ++index;
276 }
277 }
278
279 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
280 // yet assigned an index.
281 //
AssignRemainingIndexes(RVAToLabel * labels)282 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
283 // An address table compresses best when each index is associated with an
284 // address that is slight larger than the previous index.
285
286 // First see which indexes have not been used. The 'available' vector could
287 // grow even bigger, but the number of addresses is a better starting size
288 // than empty.
289 std::vector<bool> available(labels->size(), true);
290 int used = 0;
291
292 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
293 int index = p->second->index_;
294 if (index != Label::kNoIndex) {
295 while (static_cast<size_t>(index) >= available.size())
296 available.push_back(true);
297 available.at(index) = false;
298 ++used;
299 }
300 }
301
302 VLOG(1) << used << " of " << labels->size() << " labels pre-assigned";
303
304 // Are there any unused labels that happen to be adjacent following a used
305 // label?
306 //
307 int fill_forward_count = 0;
308 Label* prev = 0;
309 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
310 Label* current = p->second;
311 if (current->index_ == Label::kNoIndex) {
312 int index = 0;
313 if (prev && prev->index_ != Label::kNoIndex)
314 index = prev->index_ + 1;
315 if (index < static_cast<int>(available.size()) && available.at(index)) {
316 current->index_ = index;
317 available.at(index) = false;
318 ++fill_forward_count;
319 }
320 }
321 prev = current;
322 }
323
324 // Are there any unused labels that happen to be adjacent preceeding a used
325 // label?
326 //
327 int fill_backward_count = 0;
328 prev = 0;
329 for (RVAToLabel::reverse_iterator p = labels->rbegin();
330 p != labels->rend();
331 ++p) {
332 Label* current = p->second;
333 if (current->index_ == Label::kNoIndex) {
334 int prev_index;
335 if (prev)
336 prev_index = prev->index_;
337 else
338 prev_index = static_cast<uint32>(available.size());
339 if (prev_index != 0 &&
340 prev_index != Label::kNoIndex &&
341 available.at(prev_index - 1)) {
342 current->index_ = prev_index - 1;
343 available.at(current->index_) = false;
344 ++fill_backward_count;
345 }
346 }
347 prev = current;
348 }
349
350 // Fill in any remaining indexes
351 int fill_infill_count = 0;
352 int index = 0;
353 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
354 Label* current = p->second;
355 if (current->index_ == Label::kNoIndex) {
356 while (!available.at(index)) {
357 ++index;
358 }
359 current->index_ = index;
360 available.at(index) = false;
361 ++index;
362 ++fill_infill_count;
363 }
364 }
365
366 VLOG(1) << " fill forward " << fill_forward_count
367 << " backward " << fill_backward_count
368 << " infill " << fill_infill_count;
369 }
370
371 typedef CheckBool (EncodedProgram::*DefineLabelMethod)(int index, RVA value);
372
373 #if defined(OS_WIN)
374 __declspec(noinline)
375 #endif
DefineLabels(const RVAToLabel & labels,EncodedProgram * encoded_format,DefineLabelMethod define_label)376 static CheckBool DefineLabels(const RVAToLabel& labels,
377 EncodedProgram* encoded_format,
378 DefineLabelMethod define_label) {
379 bool ok = true;
380 for (RVAToLabel::const_iterator p = labels.begin();
381 ok && p != labels.end();
382 ++p) {
383 Label* label = p->second;
384 ok = (encoded_format->*define_label)(label->index_, label->rva_);
385 }
386 return ok;
387 }
388
Encode() const389 EncodedProgram* AssemblyProgram::Encode() const {
390 scoped_ptr<EncodedProgram> encoded(new(std::nothrow) EncodedProgram());
391 if (!encoded.get())
392 return NULL;
393
394 encoded->set_image_base(image_base_);
395
396 if (!DefineLabels(abs32_labels_, encoded.get(),
397 &EncodedProgram::DefineAbs32Label) ||
398 !DefineLabels(rel32_labels_, encoded.get(),
399 &EncodedProgram::DefineRel32Label)) {
400 return NULL;
401 }
402
403 encoded->EndLabels();
404
405 for (size_t i = 0; i < instructions_.size(); ++i) {
406 Instruction* instruction = instructions_[i];
407
408 switch (instruction->op()) {
409 case ORIGIN: {
410 OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
411 if (!encoded->AddOrigin(org->origin_rva()))
412 return NULL;
413 break;
414 }
415 case DEFBYTE: {
416 uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
417 if (!encoded->AddCopy(1, &b))
418 return NULL;
419 break;
420 }
421 case DEFBYTES: {
422 const uint8* byte_values =
423 static_cast<BytesInstruction*>(instruction)->byte_values();
424 uint32 len = static_cast<BytesInstruction*>(instruction)->len();
425
426 if (!encoded->AddCopy(len, byte_values))
427 return NULL;
428 break;
429 }
430 case REL32: {
431 Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
432 if (!encoded->AddRel32(label->index_))
433 return NULL;
434 break;
435 }
436 case REL32ARM: {
437 Label* label =
438 static_cast<InstructionWithLabelARM*>(instruction)->label();
439 uint16 compressed_op =
440 static_cast<InstructionWithLabelARM*>(instruction)->
441 compressed_op();
442 if (!encoded->AddRel32ARM(compressed_op, label->index_))
443 return NULL;
444 break;
445 }
446 case ABS32: {
447 Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
448 if (!encoded->AddAbs32(label->index_))
449 return NULL;
450 break;
451 }
452 case MAKEPERELOCS: {
453 if (!encoded->AddPeMakeRelocs(kind_))
454 return NULL;
455 break;
456 }
457 case MAKEELFRELOCS: {
458 if (!encoded->AddElfMakeRelocs())
459 return NULL;
460 break;
461 }
462 case MAKEELFARMRELOCS: {
463 if (!encoded->AddElfARMMakeRelocs())
464 return NULL;
465 break;
466 }
467 default: {
468 NOTREACHED() << "Unknown Insn OP kind";
469 }
470 }
471 }
472
473 return encoded.release();
474 }
475
GetByteInstruction(uint8 byte)476 Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
477 if (!byte_instruction_cache_.get()) {
478 byte_instruction_cache_.reset(new(std::nothrow) Instruction*[256]);
479 if (!byte_instruction_cache_.get())
480 return NULL;
481
482 for (int i = 0; i < 256; ++i) {
483 byte_instruction_cache_[i] =
484 new(std::nothrow) ByteInstruction(static_cast<uint8>(i));
485 if (!byte_instruction_cache_[i]) {
486 for (int j = 0; j < i; ++j)
487 delete byte_instruction_cache_[j];
488 byte_instruction_cache_.reset();
489 return NULL;
490 }
491 }
492 }
493
494 return byte_instruction_cache_[byte];
495 }
496
497 // Chosen empirically to give the best reduction in payload size for
498 // an update from daisy_3701.98.0 to daisy_4206.0.0.
499 const int AssemblyProgram::kLabelLowerLimit = 5;
500
TrimLabels()501 CheckBool AssemblyProgram::TrimLabels() {
502 // For now only trim for ARM binaries
503 if (kind() != EXE_ELF_32_ARM)
504 return true;
505
506 int lower_limit = kLabelLowerLimit;
507
508 VLOG(1) << "TrimLabels: threshold " << lower_limit;
509
510 // Remove underused labels from the list of labels
511 RVAToLabel::iterator it = rel32_labels_.begin();
512 while (it != rel32_labels_.end()) {
513 if (it->second->count_ <= lower_limit) {
514 rel32_labels_.erase(it++);
515 } else {
516 ++it;
517 }
518 }
519
520 // Walk through the list of instructions, replacing trimmed labels
521 // with the original machine instruction
522 for (size_t i = 0; i < instructions_.size(); ++i) {
523 Instruction* instruction = instructions_[i];
524 switch (instruction->op()) {
525 case REL32ARM: {
526 Label* label =
527 static_cast<InstructionWithLabelARM*>(instruction)->label();
528 if (label->count_ <= lower_limit) {
529 const uint8* arm_op =
530 static_cast<InstructionWithLabelARM*>(instruction)->arm_op();
531 uint16 op_size =
532 static_cast<InstructionWithLabelARM*>(instruction)->op_size();
533
534 if (op_size < 1)
535 return false;
536 BytesInstruction* new_instruction =
537 new(std::nothrow) BytesInstruction(arm_op, op_size);
538 instructions_[i] = new_instruction;
539 }
540 break;
541 }
542 default:
543 break;
544 }
545 }
546
547 return true;
548 }
549
PrintLabelCounts(RVAToLabel * labels)550 void AssemblyProgram::PrintLabelCounts(RVAToLabel* labels) {
551 for (RVAToLabel::const_iterator p = labels->begin(); p != labels->end();
552 ++p) {
553 Label* current = p->second;
554 if (current->index_ != Label::kNoIndex)
555 printf("%d\n", current->count_);
556 }
557 }
558
CountRel32ARM()559 void AssemblyProgram::CountRel32ARM() {
560 PrintLabelCounts(&rel32_labels_);
561 }
562
563 ////////////////////////////////////////////////////////////////////////////////
564
TrimLabels(AssemblyProgram * program)565 Status TrimLabels(AssemblyProgram* program) {
566 if (program->TrimLabels())
567 return C_OK;
568 else
569 return C_TRIM_FAILED;
570 }
571
Encode(AssemblyProgram * program,EncodedProgram ** output)572 Status Encode(AssemblyProgram* program, EncodedProgram** output) {
573 *output = NULL;
574 EncodedProgram *encoded = program->Encode();
575 if (encoded) {
576 *output = encoded;
577 return C_OK;
578 } else {
579 return C_GENERAL_ERROR;
580 }
581 }
582
583 } // namespace courgette
584