• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "courgette/disassembler_elf_32.h"
6 
7 #include <algorithm>
8 #include <string>
9 #include <vector>
10 
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_vector.h"
14 
15 #include "courgette/assembly_program.h"
16 #include "courgette/courgette.h"
17 #include "courgette/encoded_program.h"
18 
19 namespace courgette {
20 
DisassemblerElf32(const void * start,size_t length)21 DisassemblerElf32::DisassemblerElf32(const void* start, size_t length)
22   : Disassembler(start, length),
23     header_(NULL),
24     section_header_table_(NULL),
25     section_header_table_size_(0),
26     program_header_table_(NULL),
27     program_header_table_size_(0),
28     default_string_section_(NULL) {
29 }
30 
ParseHeader()31 bool DisassemblerElf32::ParseHeader() {
32   if (length() < sizeof(Elf32_Ehdr))
33     return Bad("Too small");
34 
35   header_ = (Elf32_Ehdr *)start();
36 
37   // Have magic for elf header?
38   if (header_->e_ident[0] != 0x7f ||
39       header_->e_ident[1] != 'E' ||
40       header_->e_ident[2] != 'L' ||
41       header_->e_ident[3] != 'F')
42     return Bad("No Magic Number");
43 
44   if (header_->e_type != ET_EXEC &&
45       header_->e_type != ET_DYN)
46     return Bad("Not an executable file or shared library");
47 
48   if (header_->e_machine != ElfEM())
49     return Bad("Not a supported architecture");
50 
51   if (header_->e_version != 1)
52     return Bad("Unknown file version");
53 
54   if (header_->e_shentsize != sizeof(Elf32_Shdr))
55     return Bad("Unexpected section header size");
56 
57   if (header_->e_shoff >= length())
58     return Bad("Out of bounds section header table offset");
59 
60   section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff);
61   section_header_table_size_ = header_->e_shnum;
62 
63   if ((header_->e_shoff + header_->e_shnum ) >= length())
64     return Bad("Out of bounds section header table");
65 
66   if (header_->e_phoff >= length())
67     return Bad("Out of bounds program header table offset");
68 
69   program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff);
70   program_header_table_size_ = header_->e_phnum;
71 
72   if ((header_->e_phoff + header_->e_phnum) >= length())
73     return Bad("Out of bounds program header table");
74 
75   default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx);
76 
77   ReduceLength(DiscoverLength());
78 
79   return Good();
80 }
81 
Disassemble(AssemblyProgram * target)82 bool DisassemblerElf32::Disassemble(AssemblyProgram* target) {
83   if (!ok())
84     return false;
85 
86   // The Image Base is always 0 for ELF Executables
87   target->set_image_base(0);
88 
89   if (!ParseAbs32Relocs())
90     return false;
91 
92   if (!ParseRel32RelocsFromSections())
93     return false;
94 
95   if (!ParseFile(target))
96     return false;
97 
98   target->DefaultAssignIndexes();
99 
100   return true;
101 }
102 
DiscoverLength()103 uint32 DisassemblerElf32::DiscoverLength() {
104   uint32 result = 0;
105 
106   // Find the end of the last section
107   for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
108     const Elf32_Shdr *section_header = SectionHeader(section_id);
109 
110     if (section_header->sh_type == SHT_NOBITS)
111       continue;
112 
113     uint32 section_end = section_header->sh_offset + section_header->sh_size;
114 
115     if (section_end > result)
116       result = section_end;
117   }
118 
119   // Find the end of the last segment
120   for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
121     const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
122 
123     uint32 segment_end = segment_header->p_offset + segment_header->p_filesz;
124 
125     if (segment_end > result)
126       result = segment_end;
127   }
128 
129   uint32 section_table_end = header_->e_shoff +
130                              (header_->e_shnum * sizeof(Elf32_Shdr));
131   if (section_table_end > result)
132     result = section_table_end;
133 
134   uint32 segment_table_end = header_->e_phoff +
135                              (header_->e_phnum * sizeof(Elf32_Phdr));
136   if (segment_table_end > result)
137     result = segment_table_end;
138 
139   return result;
140 }
141 
IsValidRVA(RVA rva) const142 CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const {
143 
144   // It's valid if it's contained in any program segment
145   for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
146     const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
147 
148     if (segment_header->p_type != PT_LOAD)
149       continue;
150 
151     Elf32_Addr begin = segment_header->p_vaddr;
152     Elf32_Addr end = segment_header->p_vaddr + segment_header->p_memsz;
153 
154     if (rva >= begin && rva < end)
155       return true;
156   }
157 
158   return false;
159 }
160 
161 // Returns RVA for an in memory address, or NULL.
RVAToFileOffset(Elf32_Addr addr,size_t * result) const162 CheckBool DisassemblerElf32::RVAToFileOffset(Elf32_Addr addr,
163                                                 size_t* result) const {
164 
165   for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
166     Elf32_Addr begin = ProgramSegmentMemoryBegin(i);
167     Elf32_Addr end = begin + ProgramSegmentMemorySize(i);
168 
169     if (addr >= begin  && addr < end) {
170       Elf32_Addr offset = addr - begin;
171 
172       if (offset < ProgramSegmentFileSize(i)) {
173         *result = ProgramSegmentFileOffset(i) + offset;
174         return true;
175       }
176     }
177   }
178 
179   return false;
180 }
181 
FileOffsetToRVA(size_t offset) const182 RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const {
183   // File offsets can be 64 bit values, but we are dealing with 32
184   // bit executables and so only need to support 32bit file sizes.
185   uint32 offset32 = (uint32)offset;
186 
187   for (int i = 0; i < SectionHeaderCount(); i++) {
188 
189     const Elf32_Shdr *section_header = SectionHeader(i);
190 
191     // These can appear to have a size in the file, but don't.
192     if (section_header->sh_type == SHT_NOBITS)
193       continue;
194 
195     Elf32_Off section_begin = section_header->sh_offset;
196     Elf32_Off section_end = section_begin + section_header->sh_size;
197 
198     if (offset32 >= section_begin && offset32 < section_end) {
199       return section_header->sh_addr + (offset32 - section_begin);
200     }
201   }
202 
203   return 0;
204 }
205 
RVAsToOffsets(std::vector<RVA> * rvas,std::vector<size_t> * offsets)206 CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas,
207                                            std::vector<size_t>* offsets) {
208   offsets->clear();
209 
210   for (std::vector<RVA>::iterator rva = rvas->begin();
211        rva != rvas->end();
212        rva++) {
213 
214     size_t offset;
215 
216     if (!RVAToFileOffset(*rva, &offset))
217       return false;
218 
219     offsets->push_back(offset);
220   }
221 
222   return true;
223 }
224 
RVAsToOffsets(ScopedVector<TypedRVA> * rvas)225 CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector<TypedRVA>* rvas) {
226   for (ScopedVector<TypedRVA>::iterator rva = rvas->begin();
227        rva != rvas->end();
228        rva++) {
229 
230     size_t offset;
231 
232     if (!RVAToFileOffset((*rva)->rva(), &offset))
233       return false;
234 
235     (*rva)->set_offset(offset);
236   }
237 
238   return true;
239 }
240 
ParseFile(AssemblyProgram * program)241 CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
242   // Walk all the bytes in the file, whether or not in a section.
243   uint32 file_offset = 0;
244 
245   std::vector<size_t> abs_offsets;
246 
247   if (!RVAsToOffsets(&abs32_locations_, &abs_offsets))
248     return false;
249 
250   if (!RVAsToOffsets(&rel32_locations_))
251     return false;
252 
253   std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin();
254   ScopedVector<TypedRVA>::iterator current_rel = rel32_locations_.begin();
255 
256   std::vector<size_t>::iterator end_abs_offset = abs_offsets.end();
257   ScopedVector<TypedRVA>::iterator end_rel = rel32_locations_.end();
258 
259   for (int section_id = 0;
260        section_id < SectionHeaderCount();
261        section_id++) {
262 
263     const Elf32_Shdr *section_header = SectionHeader(section_id);
264 
265     if (!ParseSimpleRegion(file_offset,
266                            section_header->sh_offset,
267                            program))
268       return false;
269     file_offset = section_header->sh_offset;
270 
271     switch (section_header->sh_type) {
272       case SHT_REL:
273         if (!ParseRelocationSection(section_header, program))
274           return false;
275         file_offset = section_header->sh_offset + section_header->sh_size;
276         break;
277       case SHT_PROGBITS:
278         if (!ParseProgbitsSection(section_header,
279                                   &current_abs_offset, end_abs_offset,
280                                   &current_rel, end_rel,
281                                   program))
282           return false;
283         file_offset = section_header->sh_offset + section_header->sh_size;
284         break;
285       case SHT_NOBITS:
286         // Fall through
287       case SHT_INIT_ARRAY:
288         // Fall through
289       case SHT_FINI_ARRAY:
290         while (current_abs_offset != end_abs_offset &&
291                *current_abs_offset >= section_header->sh_offset &&
292                *current_abs_offset <
293                (section_header->sh_offset + section_header->sh_size)) {
294           // Skip any abs_offsets appear in the unsupported INIT_ARRAY section
295           VLOG(1) << "Skipping relocation entry for unsupported section: " <<
296             section_header->sh_type;
297           current_abs_offset++;
298         }
299         break;
300       default:
301         if (current_abs_offset != end_abs_offset &&
302                *current_abs_offset >= section_header->sh_offset &&
303                *current_abs_offset <
304                (section_header->sh_offset + section_header->sh_size))
305           VLOG(1) << "Relocation address in unrecognized ELF section: " << \
306             section_header->sh_type;
307       break;
308     }
309   }
310 
311   // Rest of the file past the last section
312   if (!ParseSimpleRegion(file_offset,
313                          length(),
314                          program))
315     return false;
316 
317   // Make certain we consume all of the relocations as expected
318   return (current_abs_offset == end_abs_offset);
319 }
320 
ParseProgbitsSection(const Elf32_Shdr * section_header,std::vector<size_t>::iterator * current_abs_offset,std::vector<size_t>::iterator end_abs_offset,ScopedVector<TypedRVA>::iterator * current_rel,ScopedVector<TypedRVA>::iterator end_rel,AssemblyProgram * program)321 CheckBool DisassemblerElf32::ParseProgbitsSection(
322     const Elf32_Shdr *section_header,
323     std::vector<size_t>::iterator* current_abs_offset,
324     std::vector<size_t>::iterator end_abs_offset,
325     ScopedVector<TypedRVA>::iterator* current_rel,
326     ScopedVector<TypedRVA>::iterator end_rel,
327     AssemblyProgram* program) {
328 
329   // Walk all the bytes in the file, whether or not in a section.
330   size_t file_offset = section_header->sh_offset;
331   size_t section_end = section_header->sh_offset + section_header->sh_size;
332 
333   Elf32_Addr origin = section_header->sh_addr;
334   size_t origin_offset = section_header->sh_offset;
335   if (!program->EmitOriginInstruction(origin))
336     return false;
337 
338   while (file_offset < section_end) {
339 
340     if (*current_abs_offset != end_abs_offset &&
341         file_offset > **current_abs_offset)
342       return false;
343 
344     while (*current_rel != end_rel &&
345            file_offset > (**current_rel)->get_offset()) {
346       (*current_rel)++;
347     }
348 
349     size_t next_relocation = section_end;
350 
351     if (*current_abs_offset != end_abs_offset &&
352         next_relocation > **current_abs_offset)
353       next_relocation = **current_abs_offset;
354 
355     // Rel offsets are heuristically derived, and might (incorrectly) overlap
356     // an Abs value, or the end of the section, so +3 to make sure there is
357     // room for the full 4 byte value.
358     if (*current_rel != end_rel &&
359         next_relocation > ((**current_rel)->get_offset() + 3))
360       next_relocation = (**current_rel)->get_offset();
361 
362     if (next_relocation > file_offset) {
363       if (!ParseSimpleRegion(file_offset, next_relocation, program))
364         return false;
365 
366       file_offset = next_relocation;
367       continue;
368     }
369 
370     if (*current_abs_offset != end_abs_offset &&
371         file_offset == **current_abs_offset) {
372 
373       const uint8* p = OffsetToPointer(file_offset);
374       RVA target_rva = Read32LittleEndian(p);
375 
376       if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
377         return false;
378       file_offset += sizeof(RVA);
379       (*current_abs_offset)++;
380       continue;
381     }
382 
383     if (*current_rel != end_rel &&
384         file_offset == (**current_rel)->get_offset()) {
385 
386       uint32 relative_target = (**current_rel)->relative_target();
387       // This cast is for 64 bit systems, and is only safe because we
388       // are working on 32 bit executables.
389       RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
390                              relative_target);
391 
392       if (! (**current_rel)->EmitInstruction(program, target_rva))
393         return false;
394       file_offset += (**current_rel)->op_size();
395       (*current_rel)++;
396       continue;
397     }
398   }
399 
400   // Rest of the section (if any)
401   return ParseSimpleRegion(file_offset, section_end, program);
402 }
403 
ParseSimpleRegion(size_t start_file_offset,size_t end_file_offset,AssemblyProgram * program)404 CheckBool DisassemblerElf32::ParseSimpleRegion(
405     size_t start_file_offset,
406     size_t end_file_offset,
407     AssemblyProgram* program) {
408 
409   const uint8* start = OffsetToPointer(start_file_offset);
410   const uint8* end = OffsetToPointer(end_file_offset);
411 
412   // Callers don't guarantee start < end
413   if (start >= end) return true;
414 
415   const ptrdiff_t len = end - start;  // Works because vars are byte pointers
416 
417   if (!program->EmitBytesInstruction(start, len))
418     return false;
419 
420   return true;
421 }
422 
ParseAbs32Relocs()423 CheckBool DisassemblerElf32::ParseAbs32Relocs() {
424   abs32_locations_.clear();
425 
426   // Loop through sections for relocation sections
427   for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
428     const Elf32_Shdr *section_header = SectionHeader(section_id);
429 
430     if (section_header->sh_type == SHT_REL) {
431 
432       Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id);
433 
434       int relocs_table_count = section_header->sh_size /
435                                section_header->sh_entsize;
436 
437       // Elf32_Word relocation_section_id = section_header->sh_info;
438 
439       // Loop through relocation objects in the relocation section
440       for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) {
441         RVA rva;
442 
443         // Quite a few of these conversions fail, and we simply skip
444         // them, that's okay.
445         if (RelToRVA(relocs_table[rel_id], &rva) && CheckSection(rva))
446           abs32_locations_.push_back(rva);
447       }
448     }
449   }
450 
451   std::sort(abs32_locations_.begin(), abs32_locations_.end());
452   return true;
453 }
454 
CheckSection(RVA rva)455 CheckBool DisassemblerElf32::CheckSection(RVA rva) {
456   size_t offset;
457 
458   if (!RVAToFileOffset(rva, &offset)) {
459     return false;
460   }
461 
462   for (int section_id = 0;
463        section_id < SectionHeaderCount();
464        section_id++) {
465 
466     const Elf32_Shdr *section_header = SectionHeader(section_id);
467 
468     if (offset >= section_header->sh_offset &&
469         offset < (section_header->sh_offset + section_header->sh_size)) {
470       switch (section_header->sh_type) {
471         case SHT_REL:
472           // Fall-through
473         case SHT_PROGBITS:
474           return true;
475       }
476     }
477   }
478 
479   return false;
480 }
481 
ParseRel32RelocsFromSections()482 CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() {
483 
484   rel32_locations_.clear();
485 
486   // Loop through sections for relocation sections
487   for (int section_id = 0;
488        section_id < SectionHeaderCount();
489        section_id++) {
490 
491     const Elf32_Shdr *section_header = SectionHeader(section_id);
492 
493     if (section_header->sh_type != SHT_PROGBITS)
494       continue;
495 
496     if (!ParseRel32RelocsFromSection(section_header))
497       return false;
498   }
499 
500   std::sort(rel32_locations_.begin(),
501             rel32_locations_.end(),
502             TypedRVA::IsLessThan);
503   return true;
504 }
505 
506 }  // namespace courgette
507