• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "courgette/disassembler_elf_32_arm.h"
6 
7 #include <algorithm>
8 #include <string>
9 #include <vector>
10 
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13 
14 #include "courgette/assembly_program.h"
15 #include "courgette/courgette.h"
16 #include "courgette/encoded_program.h"
17 
18 namespace courgette {
19 
Compress(ARM_RVA type,uint32 arm_op,RVA rva,uint16 * c_op,uint32 * addr)20 CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, uint32 arm_op, RVA rva,
21                                          uint16* c_op, uint32* addr) {
22   // This method takes an ARM or thumb opcode, extracts the relative
23   // target address from it (addr), and creates a corresponding
24   // Courgette opcode (c_op).
25   //
26   // Details on ARM the opcodes, and how the relative targets are
27   // computed were taken from the "ARM Architecture Reference Manual",
28   // section A4.1.5 and the "Thumb-2 supplement", section 4.6.12.
29   // ARM_OFF24 is for the ARM opcode.  The rest are for thumb opcodes.
30   switch (type) {
31     case ARM_OFF8: {
32       // The offset is given by lower 8 bits of the op.  It is a 9-bit
33       // offset, shifted right one bit and signed extended.
34       uint32 temp = (arm_op & 0x00FF) << 1;
35       if (temp & 0x0100)
36         temp |= 0xFFFFFE00;
37       temp += 4;  // Offset from _next_ PC.
38       fflush(stdout);
39 
40       (*addr) = temp;
41       (*c_op) = (arm_op >> 8) | 0x1000;
42       break;
43     }
44     case ARM_OFF11: {
45       // The offset is given by lower 11 bits of the op, and is a
46       // 12-bit offset, shifted right one bit and sign extended.
47       uint32 temp = (arm_op & 0x07FF) << 1;
48       if (temp & 0x00000800)
49         temp |= 0xFFFFF000;
50       temp += 4;  // Offset from _next_ PC.
51 
52       (*addr) = temp;
53       (*c_op) = (arm_op >> 11) | 0x2000;
54       break;
55     }
56     case ARM_OFF24: {
57       // The offset is given by the lower 24-bits of the op, shifted
58       // left 2 bits, and sign extended.
59       uint32 temp = (arm_op & 0x00FFFFFF) << 2;
60       if (temp & 0x02000000)
61         temp |= 0xFC000000;
62       temp += 8;
63 
64       (*addr) = temp;
65       (*c_op) = (arm_op >> 24) | 0x3000;
66       break;
67     }
68     case ARM_OFF25: {
69       uint32 temp = 0;
70       temp |= (arm_op & 0x000007FF) << 1;  // imm11
71       temp |= (arm_op & 0x03FF0000) >> 4;  // imm10
72 
73       uint32 S   = (arm_op & (1 << 26)) >> 26;
74       uint32 j2  = (arm_op & (1 << 11)) >> 11;
75       uint32 j1  = (arm_op & (1 << 13)) >> 13;
76       bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0;
77       bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0;
78 
79       uint32 i2  = ~(j2 ^ S) & 1;
80       uint32 i1  = ~(j1 ^ S) & 1;
81       bool toARM =  bit14 && !bit12;
82 
83       temp |= (S << 24) | (i1 << 23) | (i2 << 22);
84 
85       if (temp & 0x01000000) // sign extension
86         temp |= 0xFE000000;
87       uint32 prefetch;
88       if (toARM) {
89         // Align PC on 4-byte boundary
90         uint32 align4byte = (rva % 4) ? 2 : 4;
91         prefetch = align4byte;
92       } else {
93         prefetch = 4;
94       }
95       temp += prefetch;
96       (*addr) = temp;
97 
98       uint32 temp2 = 0x4000;
99       temp2 |= (arm_op & (1 << 12)) >> 12;
100       temp2 |= (arm_op & (1 << 14)) >> 13;
101       temp2 |= (arm_op & (1 << 15)) >> 13;
102       temp2 |= (arm_op & 0xF8000000) >> 24;
103       temp2 |= (prefetch & 0x0000000F) << 8;
104       (*c_op) = temp2;
105       break;
106     }
107     case ARM_OFF21: {
108       uint32 temp = 0;
109       temp |= (arm_op & 0x000007FF) << 1;  // imm11
110       temp |= (arm_op & 0x003F0000) >> 4;  // imm6
111 
112       uint32 S   = (arm_op & (1 << 26)) >> 26;
113       uint32 j2  = (arm_op & (1 << 11)) >> 11;
114       uint32 j1  = (arm_op & (1 << 13)) >> 13;
115 
116       temp |= (S << 20) | (j1 << 19) | (j2 << 18);
117 
118       if (temp & 0x00100000)  // sign extension
119         temp |= 0xFFE00000;
120       temp += 4;
121       (*addr) = temp;
122 
123       uint32 temp2 = 0x5000;
124       temp2 |= (arm_op & 0x03C00000) >> 22;  // just save the cond
125       (*c_op) = temp2;
126       break;
127     }
128     default:
129       return false;
130   }
131   return true;
132 }
133 
Decompress(ARM_RVA type,uint16 c_op,uint32 addr,uint32 * arm_op)134 CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type, uint16 c_op,
135                                            uint32 addr, uint32* arm_op) {
136   // Reverses the process in the compress() method.  Takes the
137   // Courgette op and relative address and reconstructs the original
138   // ARM or thumb op.
139   switch (type) {
140     case ARM_OFF8:
141       (*arm_op) = ((c_op & 0x0FFF) << 8) | (((addr - 4) >> 1) & 0x000000FF);
142       break;
143     case ARM_OFF11:
144       (*arm_op) = ((c_op & 0x0FFF) << 11) | (((addr - 4) >> 1) & 0x000007FF);
145       break;
146     case ARM_OFF24:
147       (*arm_op) = ((c_op & 0x0FFF) << 24) | (((addr - 8) >> 2) & 0x00FFFFFF);
148       break;
149     case ARM_OFF25: {
150       uint32 temp = 0;
151       temp |= (c_op & (1 << 0)) << 12;
152       temp |= (c_op & (1 << 1)) << 13;
153       temp |= (c_op & (1 << 2)) << 13;
154       temp |= (c_op & (0xF8000000 >> 24)) << 24;
155 
156       uint32 prefetch = (c_op & 0x0F00) >> 8;
157       addr -= prefetch;
158 
159       addr &= 0x01FFFFFF;
160 
161       uint32 S  = (addr & (1 << 24)) >> 24;
162       uint32 i1 = (addr & (1 << 23)) >> 23;
163       uint32 i2 = (addr & (1 << 22)) >> 22;
164 
165       uint32 j1 = ((~i1) ^ S) & 1;
166       uint32 j2 = ((~i2) ^ S) & 1;
167 
168       temp |= S << 26;
169       temp |= j2 << 11;
170       temp |= j1 << 13;
171 
172       temp |= (addr & (0x000007FF << 1)) >> 1;
173       temp |= (addr & (0x03FF0000 >> 4)) << 4;
174 
175       (*arm_op) = temp;
176       break;
177     }
178     case ARM_OFF21: {
179       uint32 temp = 0xF0008000;
180       temp |= (c_op & (0x03C00000 >> 22)) << 22;
181 
182       addr -= 4;
183       addr &= 0x001FFFFF;
184 
185       uint32 S  = (addr & (1 << 20)) >> 20;
186       uint32 j1 = (addr & (1 << 19)) >> 19;
187       uint32 j2 = (addr & (1 << 18)) >> 18;
188 
189       temp |= S << 26;
190       temp |= j2 << 11;
191       temp |= j1 << 13;
192 
193       temp |= (addr & (0x000007FF << 1)) >> 1;
194       temp |= (addr & (0x003F0000 >> 4)) << 4;
195 
196       (*arm_op) = temp;
197       break;
198     }
199     default:
200       return false;
201   }
202   return true;
203 }
204 
op_size() const205 uint16 DisassemblerElf32ARM::TypedRVAARM::op_size() const {
206   switch (type_) {
207     case ARM_OFF8:
208       return 2;
209     case ARM_OFF11:
210       return 2;
211     case ARM_OFF24:
212       return 4;
213     case ARM_OFF25:
214       return 4;
215     case ARM_OFF21:
216       return 4;
217     default:
218       return -1;
219   }
220 }
221 
ComputeRelativeTarget(const uint8 * op_pointer)222 CheckBool DisassemblerElf32ARM::TypedRVAARM::ComputeRelativeTarget(
223     const uint8* op_pointer) {
224   arm_op_ = op_pointer;
225   switch (type_) {
226     case ARM_OFF8:
227       // Fall through
228     case ARM_OFF11: {
229       RVA relative_target;
230       CheckBool ret = Compress(type_, Read16LittleEndian(op_pointer), rva(),
231                                &c_op_, &relative_target);
232       set_relative_target(relative_target);
233       return ret;
234     }
235     case ARM_OFF24: {
236       RVA relative_target;
237       CheckBool ret = Compress(type_, Read32LittleEndian(op_pointer), rva(),
238                                &c_op_, &relative_target);
239       set_relative_target(relative_target);
240       return ret;
241     }
242     case ARM_OFF25:
243       // Fall through
244     case ARM_OFF21: {
245       // A thumb-2 op is 32 bits stored as two 16-bit words
246       uint32 pval = (Read16LittleEndian(op_pointer) << 16)
247         | Read16LittleEndian(op_pointer + 2);
248       RVA relative_target;
249       CheckBool ret = Compress(type_, pval, rva(), &c_op_, &relative_target);
250       set_relative_target(relative_target);
251       return ret;
252     }
253    default:
254      return false;
255   }
256 }
257 
EmitInstruction(AssemblyProgram * program,RVA target_rva)258 CheckBool DisassemblerElf32ARM::TypedRVAARM::EmitInstruction(
259     AssemblyProgram* program,
260     RVA target_rva) {
261   return program->EmitRel32ARM(c_op(),
262                                program->FindOrMakeRel32Label(target_rva),
263                                arm_op_,
264                                op_size());
265 }
266 
DisassemblerElf32ARM(const void * start,size_t length)267 DisassemblerElf32ARM::DisassemblerElf32ARM(const void* start, size_t length)
268   : DisassemblerElf32(start, length) {
269 }
270 
271 // Convert an ELF relocation struction into an RVA
RelToRVA(Elf32_Rel rel,RVA * result) const272 CheckBool DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel, RVA* result) const {
273 
274   // The rightmost byte of r_info is the type...
275   elf32_rel_arm_type_values type =
276       (elf32_rel_arm_type_values)(unsigned char)rel.r_info;
277 
278   // The other 3 bytes of r_info are the symbol
279   uint32 symbol =  rel.r_info >> 8;
280 
281   switch(type)
282   {
283     case R_ARM_RELATIVE:
284       if (symbol != 0)
285         return false;
286 
287       // This is a basic ABS32 relocation address
288       *result = rel.r_offset;
289       return true;
290 
291     default:
292       return false;
293   }
294 
295   return false;
296 }
297 
ParseRelocationSection(const Elf32_Shdr * section_header,AssemblyProgram * program)298 CheckBool DisassemblerElf32ARM::ParseRelocationSection(
299     const Elf32_Shdr *section_header,
300       AssemblyProgram* program) {
301   // This method compresses a contiguous stretch of R_ARM_RELATIVE
302   // entries in the relocation table with a Courgette relocation table
303   // instruction.  It skips any entries at the beginning that appear
304   // in a section that Courgette doesn't support, e.g. INIT.
305   // Specifically, the entries should be
306   //   (1) In the same relocation table
307   //   (2) Are consecutive
308   //   (3) Are sorted in memory address order
309   //
310   // Happily, this is normally the case, but it's not required by spec
311   // so we check, and just don't do it if we don't match up.
312   //
313   // The expectation is that one relocation section will contain
314   // all of our R_ARM_RELATIVE entries in the expected order followed
315   // by assorted other entries we can't use special handling for.
316 
317   bool match = true;
318 
319   // Walk all the bytes in the section, matching relocation table or not
320   size_t file_offset = section_header->sh_offset;
321   size_t section_end = section_header->sh_offset + section_header->sh_size;
322 
323   Elf32_Rel *section_relocs_iter =
324       (Elf32_Rel *)OffsetToPointer(section_header->sh_offset);
325 
326   uint32 section_relocs_count = section_header->sh_size /
327                                 section_header->sh_entsize;
328 
329   if (abs32_locations_.size() > section_relocs_count)
330     match = false;
331 
332   if (!abs32_locations_.empty()) {
333     std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin();
334 
335     for (uint32 i = 0; i < section_relocs_count; i++) {
336       if (section_relocs_iter->r_offset == *reloc_iter)
337         break;
338 
339       if (!ParseSimpleRegion(file_offset, file_offset + sizeof(Elf32_Rel),
340                              program))
341         return false;
342 
343       file_offset += sizeof(Elf32_Rel);
344       ++section_relocs_iter;
345     }
346 
347     while (match && (reloc_iter != abs32_locations_.end())) {
348       if (section_relocs_iter->r_info != R_ARM_RELATIVE ||
349           section_relocs_iter->r_offset != *reloc_iter)
350         match = false;
351 
352       section_relocs_iter++;
353       reloc_iter++;
354       file_offset += sizeof(Elf32_Rel);
355     }
356 
357     if (match) {
358       // Skip over relocation tables
359       if (!program->EmitElfARMRelocationInstruction())
360         return false;
361     }
362   }
363 
364   return ParseSimpleRegion(file_offset, section_end, program);
365 }
366 
ParseRel32RelocsFromSection(const Elf32_Shdr * section_header)367 CheckBool DisassemblerElf32ARM::ParseRel32RelocsFromSection(
368     const Elf32_Shdr* section_header) {
369 
370   uint32 start_file_offset = section_header->sh_offset;
371   uint32 end_file_offset = start_file_offset + section_header->sh_size;
372 
373   const uint8* start_pointer = OffsetToPointer(start_file_offset);
374   const uint8* end_pointer = OffsetToPointer(end_file_offset);
375 
376   // Quick way to convert from Pointer to RVA within a single Section is to
377   // subtract 'pointer_to_rva'.
378   const uint8* const adjust_pointer_to_rva = start_pointer -
379                                              section_header->sh_addr;
380 
381   // Find the rel32 relocations.
382   const uint8* p = start_pointer;
383   bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it
384   while (p < end_pointer) {
385     // Heuristic discovery of rel32 locations in instruction stream: are the
386     // next few bytes the start of an instruction containing a rel32
387     // addressing mode?
388 
389     TypedRVAARM* rel32_rva = NULL;
390     RVA target_rva;
391     bool found = false;
392 
393     // 16-bit thumb ops
394     if (!found && (p + 3) <= end_pointer) {
395       uint16 pval = Read16LittleEndian(p);
396       if ((pval & 0xF000) == 0xD000) {
397         RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
398 
399         rel32_rva = new TypedRVAARM(ARM_OFF8, rva);
400         if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
401           return false;
402         }
403         target_rva = rel32_rva->rva() + rel32_rva->relative_target();
404         found = true;
405       } else if ((pval & 0xF800) == 0xE000) {
406         RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
407 
408         rel32_rva = new TypedRVAARM(ARM_OFF11, rva);
409         if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
410           return false;
411         }
412         target_rva = rel32_rva->rva() + rel32_rva->relative_target();
413         found = true;
414       }
415     }
416 
417     // thumb-2 ops comprised of two 16-bit words
418     if (!found && (p + 5) <= end_pointer) {
419       // This is really two 16-bit words, not one 32-bit word.
420       uint32 pval = (Read16LittleEndian(p) << 16) | Read16LittleEndian(p + 2);
421       if ((pval & 0xF8008000) == 0xF0008000) {
422         // Covers thumb-2's 32-bit conditional/unconditional branches
423 
424         if ( (pval & (1 << 14)) || (pval & (1 << 12)) ) {
425           // A branch, with link, or with link and exchange.
426           RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
427 
428           rel32_rva = new TypedRVAARM(ARM_OFF25, rva);
429           if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
430             return false;
431           }
432           target_rva = rel32_rva->rva() + rel32_rva->relative_target();
433           found = true;
434         } else {
435           // TODO(paulgazz) make sure cond is not 111
436           // A conditional branch instruction
437           RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
438 
439           rel32_rva = new TypedRVAARM(ARM_OFF21, rva);
440           if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
441             return false;
442           }
443           target_rva = rel32_rva->rva() + rel32_rva->relative_target();
444           found = true;
445         }
446       }
447     }
448 
449     // 32-bit ARM ops
450     if (!found && on_32bit && (p + 5) <= end_pointer) {
451       uint32 pval = Read32LittleEndian(p);
452       if ((pval & 0x0E000000) == 0x0A000000) {
453         // Covers both 0x0A 0x0B ARM relative branches
454         RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
455 
456         rel32_rva = new TypedRVAARM(ARM_OFF24, rva);
457         if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
458           return false;
459         }
460         target_rva = rel32_rva->rva() + rel32_rva->relative_target();
461         found = true;
462       }
463     }
464 
465     if (found && IsValidRVA(target_rva)) {
466       rel32_locations_.push_back(rel32_rva);
467 #if COURGETTE_HISTOGRAM_TARGETS
468       ++rel32_target_rvas_[target_rva];
469 #endif
470       p += rel32_rva->op_size();
471 
472       // A tricky way to update the on_32bit flag.  Here is the truth table:
473       // on_32bit | on_32bit   size is 4
474       // ---------+---------------------
475       // 1        | 0          0
476       // 0        | 0          1
477       // 0        | 1          0
478       // 1        | 1          1
479       on_32bit = (~(on_32bit ^ (rel32_rva->op_size() == 4))) != 0;
480     } else {
481       // Move 2 bytes at a time, but track 32-bit boundaries
482       p += 2;
483       on_32bit = ((on_32bit + 1) % 2) != 0;
484     }
485   }
486 
487   return true;
488 }
489 
490 }  // namespace courgette
491