• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- ELFReader.cpp ------------------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include <mcld/LD/ELFReader.h>
10 
11 #include <mcld/IRBuilder.h>
12 #include <mcld/Fragment/FillFragment.h>
13 #include <mcld/LD/EhFrame.h>
14 #include <mcld/LD/SectionData.h>
15 #include <mcld/Target/GNULDBackend.h>
16 #include <mcld/Target/GNUInfo.h>
17 #include <mcld/Support/MsgHandling.h>
18 #include <mcld/Support/MemoryArea.h>
19 #include <mcld/Object/ObjectBuilder.h>
20 
21 #include <cstring>
22 
23 #include <llvm/ADT/StringRef.h>
24 #include <llvm/ADT/Twine.h>
25 #include <llvm/Support/ELF.h>
26 #include <llvm/Support/Host.h>
27 
28 #include <iostream>
29 
30 using namespace mcld;
31 
32 //===----------------------------------------------------------------------===//
33 // ELFReader<32, true>
34 //===----------------------------------------------------------------------===//
35 /// constructor
ELFReader(GNULDBackend & pBackend)36 ELFReader<32, true>::ELFReader(GNULDBackend& pBackend)
37   : ELFReaderIF(pBackend) {
38 }
39 
40 /// destructor
~ELFReader()41 ELFReader<32, true>::~ELFReader()
42 {
43 }
44 
45 /// isELF - is this a ELF file
isELF(const void * pELFHeader) const46 bool ELFReader<32, true>::isELF(const void* pELFHeader) const
47 {
48   const llvm::ELF::Elf32_Ehdr* hdr =
49       reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
50   if (0 == memcmp(llvm::ELF::ElfMagic, hdr, 4))
51     return true;
52   return false;
53 }
54 
55 /// readRegularSection - read a regular section and create fragments.
56 bool
readRegularSection(Input & pInput,SectionData & pSD) const57 ELFReader<32, true>::readRegularSection(Input& pInput, SectionData& pSD) const
58 {
59   uint32_t offset = pInput.fileOffset() + pSD.getSection().offset();
60   uint32_t size = pSD.getSection().size();
61 
62   Fragment* frag = IRBuilder::CreateRegion(pInput, offset, size);
63   ObjectBuilder::AppendFragment(*frag, pSD);
64   return true;
65 }
66 
67 /// readSymbols - read ELF symbols and create LDSymbol
readSymbols(Input & pInput,IRBuilder & pBuilder,llvm::StringRef pRegion,const char * pStrTab) const68 bool ELFReader<32, true>::readSymbols(Input& pInput,
69                                       IRBuilder& pBuilder,
70                                       llvm::StringRef pRegion,
71                                       const char* pStrTab) const
72 {
73   // get number of symbols
74   size_t entsize = pRegion.size()/sizeof(llvm::ELF::Elf32_Sym);
75   const llvm::ELF::Elf32_Sym* symtab =
76       reinterpret_cast<const llvm::ELF::Elf32_Sym*>(pRegion.begin());
77 
78   uint32_t st_name  = 0x0;
79   uint32_t st_value = 0x0;
80   uint32_t st_size  = 0x0;
81   uint8_t  st_info  = 0x0;
82   uint8_t  st_other = 0x0;
83   uint16_t st_shndx = 0x0;
84 
85   // skip the first NULL symbol
86   pInput.context()->addSymbol(LDSymbol::Null());
87 
88   /// recording symbols added from DynObj to analyze weak alias
89   std::vector<AliasInfo> potential_aliases;
90   bool is_dyn_obj = (pInput.type()==Input::DynObj);
91   for (size_t idx = 1; idx < entsize; ++idx) {
92     st_info  = symtab[idx].st_info;
93     st_other = symtab[idx].st_other;
94 
95     if (llvm::sys::IsLittleEndianHost) {
96       st_name  = symtab[idx].st_name;
97       st_value = symtab[idx].st_value;
98       st_size  = symtab[idx].st_size;
99       st_shndx = symtab[idx].st_shndx;
100     }
101     else {
102       st_name  = mcld::bswap32(symtab[idx].st_name);
103       st_value = mcld::bswap32(symtab[idx].st_value);
104       st_size  = mcld::bswap32(symtab[idx].st_size);
105       st_shndx = mcld::bswap16(symtab[idx].st_shndx);
106     }
107 
108     // If the section should not be included, set the st_shndx SHN_UNDEF
109     // - A section in interrelated groups are not included.
110     if (pInput.type() == Input::Object &&
111         st_shndx < llvm::ELF::SHN_LORESERVE &&
112         st_shndx != llvm::ELF::SHN_UNDEF) {
113       if (NULL == pInput.context()->getSection(st_shndx))
114         st_shndx = llvm::ELF::SHN_UNDEF;
115     }
116 
117     // get ld_type
118     ResolveInfo::Type ld_type = getSymType(st_info, st_shndx);
119 
120     // get ld_desc
121     ResolveInfo::Desc ld_desc = getSymDesc(st_shndx, pInput);
122 
123     // get ld_binding
124     ResolveInfo::Binding ld_binding = getSymBinding((st_info >> 4), st_shndx, st_other);
125 
126     // get ld_value - ld_value must be section relative.
127     uint64_t ld_value = getSymValue(st_value, st_shndx, pInput);
128 
129     // get ld_vis
130     ResolveInfo::Visibility ld_vis = getSymVisibility(st_other);
131 
132     // get section
133     LDSection* section = NULL;
134     if (st_shndx < llvm::ELF::SHN_LORESERVE) // including ABS and COMMON
135       section = pInput.context()->getSection(st_shndx);
136 
137     // get ld_name
138     std::string ld_name;
139     if (ResolveInfo::Section == ld_type) {
140       // Section symbol's st_name is the section index.
141       assert(NULL != section && "get a invalid section");
142       ld_name = section->name();
143     }
144     else {
145       ld_name = std::string(pStrTab + st_name);
146     }
147 
148     LDSymbol* psym = pBuilder.AddSymbol(pInput,
149                                         ld_name,
150                                         ld_type,
151                                         ld_desc,
152                                         ld_binding,
153                                         st_size,
154                                         ld_value,
155                                         section,
156                                         ld_vis);
157 
158     if (is_dyn_obj
159         && NULL != psym
160         && ResolveInfo::Undefined != ld_desc
161         && (ResolveInfo::Global == ld_binding ||
162             ResolveInfo::Weak == ld_binding)
163         && ResolveInfo::Object == ld_type) {
164       AliasInfo p;
165       p.pt_alias = psym;
166       p.ld_binding = ld_binding;
167       p.ld_value = ld_value;
168       potential_aliases.push_back(p);
169     }
170 
171   } // end of for loop
172 
173   // analyze weak alias
174   // FIXME: it is better to let IRBuilder handle alias anlysis.
175   //        1. eliminate code duplication
176   //        2. easy to know if a symbol is from .so
177   //           (so that it may be a potential alias)
178   if (is_dyn_obj) {
179     // sort symbols by symbol value and then weak before strong
180     std::sort(potential_aliases.begin(), potential_aliases.end(), less);
181 
182     // for each weak symbol, find out all its aliases, and
183     // then link them as a circular list in Module
184     std::vector<AliasInfo>::iterator sym_it, sym_e;
185     sym_e = potential_aliases.end();
186     for (sym_it = potential_aliases.begin(); sym_it!=sym_e; ++sym_it) {
187       if (ResolveInfo::Weak!=sym_it->ld_binding)
188         continue;
189 
190       Module& pModule = pBuilder.getModule();
191       std::vector<AliasInfo>::iterator alias_it = sym_it+1;
192       while(alias_it!=sym_e) {
193         if (sym_it->ld_value != alias_it->ld_value)
194           break;
195 
196         if (sym_it + 1 == alias_it)
197           pModule.CreateAliasList(*sym_it->pt_alias->resolveInfo());
198         pModule.addAlias(*alias_it->pt_alias->resolveInfo());
199         ++alias_it;
200       }
201 
202       sym_it = alias_it - 1;
203     }// end of for loop
204   }
205 
206   return true;
207 }
208 
209 //===----------------------------------------------------------------------===//
210 // ELFReader::read relocations - read ELF rela and rel, and create Relocation
211 //===----------------------------------------------------------------------===//
212 /// ELFReader::readRela - read ELF rela and create Relocation
readRela(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const213 bool ELFReader<32, true>::readRela(Input& pInput,
214                                    LDSection& pSection,
215                                    llvm::StringRef pRegion) const
216 {
217   // get the number of rela
218   size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf32_Rela);
219   const llvm::ELF::Elf32_Rela* relaTab =
220       reinterpret_cast<const llvm::ELF::Elf32_Rela*>(pRegion.begin());
221 
222   for (size_t idx=0; idx < entsize; ++idx) {
223     Relocation::Type r_type = 0x0;
224     uint32_t r_sym = 0x0;
225     uint32_t r_offset = 0x0;
226     int32_t  r_addend = 0;
227     if (!target().readRelocation(relaTab[idx], r_type, r_sym, r_offset, r_addend))
228       return false;
229 
230     LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
231     if (NULL == symbol) {
232       fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
233     }
234 
235     IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset, r_addend);
236   } // end of for
237   return true;
238 }
239 
240 /// readRel - read ELF rel and create Relocation
readRel(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const241 bool ELFReader<32, true>::readRel(Input& pInput,
242                                   LDSection& pSection,
243                                   llvm::StringRef pRegion) const
244 {
245   // get the number of rel
246   size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf32_Rel);
247   const llvm::ELF::Elf32_Rel* relTab =
248       reinterpret_cast<const llvm::ELF::Elf32_Rel*>(pRegion.begin());
249 
250   for (size_t idx=0; idx < entsize; ++idx) {
251     Relocation::Type r_type = 0x0;
252     uint32_t r_sym = 0x0;
253     uint32_t r_offset = 0x0;
254 
255     if (!target().readRelocation(relTab[idx], r_type, r_sym, r_offset))
256       return false;
257 
258     LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
259     if (NULL == symbol) {
260       fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
261     }
262 
263     IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset);
264   } // end of for
265   return true;
266 }
267 
268 /// isMyEndian - is this ELF file in the same endian to me?
isMyEndian(const void * pELFHeader) const269 bool ELFReader<32, true>::isMyEndian(const void* pELFHeader) const
270 {
271   const llvm::ELF::Elf32_Ehdr* hdr =
272       reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
273 
274   return (hdr->e_ident[llvm::ELF::EI_DATA] == llvm::ELF::ELFDATA2LSB);
275 }
276 
277 /// isMyMachine - is this ELF file generated for the same machine.
isMyMachine(const void * pELFHeader) const278 bool ELFReader<32, true>::isMyMachine(const void* pELFHeader) const
279 {
280   const llvm::ELF::Elf32_Ehdr* hdr =
281       reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
282 
283   if (llvm::sys::IsLittleEndianHost)
284     return (hdr->e_machine == target().getInfo().machine());
285   return (mcld::bswap16(hdr->e_machine) == target().getInfo().machine());
286 }
287 
288 /// fileType - return the file type
fileType(const void * pELFHeader) const289 Input::Type ELFReader<32, true>::fileType(const void* pELFHeader) const
290 {
291   const llvm::ELF::Elf32_Ehdr* hdr =
292       reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
293   uint32_t type = 0x0;
294   if (llvm::sys::IsLittleEndianHost)
295     type = hdr->e_type;
296   else
297     type = mcld::bswap16(hdr->e_type);
298 
299   switch(type) {
300   case llvm::ELF::ET_REL:
301     return Input::Object;
302   case llvm::ELF::ET_EXEC:
303     return Input::Exec;
304   case llvm::ELF::ET_DYN:
305     return Input::DynObj;
306   case llvm::ELF::ET_CORE:
307     return Input::CoreFile;
308   case llvm::ELF::ET_NONE:
309   default:
310     return Input::Unknown;
311   }
312 }
313 
314 /// readSectionHeaders - read ELF section header table and create LDSections
readSectionHeaders(Input & pInput,const void * pELFHeader) const315 bool ELFReader<32, true>::readSectionHeaders(Input& pInput,
316                                              const void* pELFHeader) const
317 {
318   const llvm::ELF::Elf32_Ehdr* ehdr =
319       reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
320 
321   uint32_t shoff     = 0x0;
322   uint16_t shentsize = 0x0;
323   uint32_t shnum     = 0x0;
324   uint32_t shstrtab  = 0x0;
325 
326   if (llvm::sys::IsLittleEndianHost) {
327     shoff     = ehdr->e_shoff;
328     shentsize = ehdr->e_shentsize;
329     shnum     = ehdr->e_shnum;
330     shstrtab  = ehdr->e_shstrndx;
331   }
332   else {
333     shoff     = mcld::bswap32(ehdr->e_shoff);
334     shentsize = mcld::bswap16(ehdr->e_shentsize);
335     shnum     = mcld::bswap16(ehdr->e_shnum);
336     shstrtab  = mcld::bswap16(ehdr->e_shstrndx);
337   }
338 
339   // If the file has no section header table, e_shoff holds zero.
340   if (0x0 == shoff)
341     return true;
342 
343   const llvm::ELF::Elf32_Shdr *shdr = NULL;
344   llvm::StringRef shdr_region;
345   uint32_t sh_name      = 0x0;
346   uint32_t sh_type      = 0x0;
347   uint32_t sh_flags     = 0x0;
348   uint32_t sh_offset    = 0x0;
349   uint32_t sh_size      = 0x0;
350   uint32_t sh_link      = 0x0;
351   uint32_t sh_info      = 0x0;
352   uint32_t sh_addralign = 0x0;
353 
354   // if shnum and shstrtab overflow, the actual values are in the 1st shdr
355   if (shnum == llvm::ELF::SHN_UNDEF || shstrtab == llvm::ELF::SHN_XINDEX) {
356     shdr_region = pInput.memArea()->request(pInput.fileOffset() + shoff,
357                                             shentsize);
358     shdr = reinterpret_cast<const llvm::ELF::Elf32_Shdr*>(shdr_region.begin());
359 
360     if (llvm::sys::IsLittleEndianHost) {
361       sh_size = shdr->sh_size;
362       sh_link = shdr->sh_link;
363     }
364     else {
365       sh_size = mcld::bswap32(shdr->sh_size);
366       sh_link = mcld::bswap32(shdr->sh_link);
367     }
368 
369     if (shnum == llvm::ELF::SHN_UNDEF)
370       shnum = sh_size;
371     if (shstrtab == llvm::ELF::SHN_XINDEX)
372       shstrtab = sh_link;
373 
374     shoff += shentsize;
375   }
376 
377   shdr_region = pInput.memArea()->request(pInput.fileOffset() + shoff,
378                                           shnum * shentsize);
379   const llvm::ELF::Elf32_Shdr* shdrTab =
380       reinterpret_cast<const llvm::ELF::Elf32_Shdr*>(shdr_region.begin());
381 
382   // get .shstrtab first
383   shdr = &shdrTab[shstrtab];
384   if (llvm::sys::IsLittleEndianHost) {
385     sh_offset = shdr->sh_offset;
386     sh_size   = shdr->sh_size;
387   }
388   else {
389     sh_offset = mcld::bswap32(shdr->sh_offset);
390     sh_size   = mcld::bswap32(shdr->sh_size);
391   }
392 
393   llvm::StringRef sect_name_region = pInput.memArea()->request(
394       pInput.fileOffset() + sh_offset, sh_size);
395   const char* sect_name = sect_name_region.begin();
396 
397   LinkInfoList link_info_list;
398 
399   // create all LDSections, including first NULL section.
400   for (size_t idx = 0; idx < shnum; ++idx) {
401     if (llvm::sys::IsLittleEndianHost) {
402       sh_name      = shdrTab[idx].sh_name;
403       sh_type      = shdrTab[idx].sh_type;
404       sh_flags     = shdrTab[idx].sh_flags;
405       sh_offset    = shdrTab[idx].sh_offset;
406       sh_size      = shdrTab[idx].sh_size;
407       sh_link      = shdrTab[idx].sh_link;
408       sh_info      = shdrTab[idx].sh_info;
409       sh_addralign = shdrTab[idx].sh_addralign;
410     }
411     else {
412       sh_name      = mcld::bswap32(shdrTab[idx].sh_name);
413       sh_type      = mcld::bswap32(shdrTab[idx].sh_type);
414       sh_flags     = mcld::bswap32(shdrTab[idx].sh_flags);
415       sh_offset    = mcld::bswap32(shdrTab[idx].sh_offset);
416       sh_size      = mcld::bswap32(shdrTab[idx].sh_size);
417       sh_link      = mcld::bswap32(shdrTab[idx].sh_link);
418       sh_info      = mcld::bswap32(shdrTab[idx].sh_info);
419       sh_addralign = mcld::bswap32(shdrTab[idx].sh_addralign);
420     }
421 
422     LDSection* section = IRBuilder::CreateELFHeader(pInput,
423                                                     sect_name+sh_name,
424                                                     sh_type,
425                                                     sh_flags,
426                                                     sh_addralign);
427     section->setSize(sh_size);
428     section->setOffset(sh_offset);
429     section->setInfo(sh_info);
430 
431     if (sh_link != 0x0 || sh_info != 0x0) {
432       LinkInfo link_info = { section, sh_link, sh_info };
433       link_info_list.push_back(link_info);
434     }
435   } // end of for
436 
437   // set up InfoLink
438   LinkInfoList::iterator info, infoEnd = link_info_list.end();
439   for (info = link_info_list.begin(); info != infoEnd; ++info) {
440     if (LDFileFormat::Relocation == info->section->kind())
441       info->section->setLink(pInput.context()->getSection(info->sh_info));
442     else
443       info->section->setLink(pInput.context()->getSection(info->sh_link));
444   }
445 
446   return true;
447 }
448 
449 /// readSignature - read a symbol from the given Input and index in symtab
450 /// This is used to get the signature of a group section.
readSignature(Input & pInput,LDSection & pSymTab,uint32_t pSymIdx) const451 ResolveInfo* ELFReader<32, true>::readSignature(Input& pInput,
452                                                 LDSection& pSymTab,
453                                                 uint32_t pSymIdx) const
454 {
455   LDSection* symtab = &pSymTab;
456   LDSection* strtab = symtab->getLink();
457   assert(NULL != symtab && NULL != strtab);
458 
459   uint32_t offset = pInput.fileOffset() + symtab->offset() +
460                       sizeof(llvm::ELF::Elf32_Sym) * pSymIdx;
461   llvm::StringRef symbol_region =
462       pInput.memArea()->request(offset, sizeof(llvm::ELF::Elf32_Sym));
463   const llvm::ELF::Elf32_Sym* entry =
464       reinterpret_cast<const llvm::ELF::Elf32_Sym*>(symbol_region.begin());
465 
466   uint32_t st_name  = 0x0;
467   uint8_t  st_info  = 0x0;
468   uint8_t  st_other = 0x0;
469   uint16_t st_shndx = 0x0;
470   st_info  = entry->st_info;
471   st_other = entry->st_other;
472   if (llvm::sys::IsLittleEndianHost) {
473     st_name  = entry->st_name;
474     st_shndx = entry->st_shndx;
475   }
476   else {
477     st_name  = mcld::bswap32(entry->st_name);
478     st_shndx = mcld::bswap16(entry->st_shndx);
479   }
480 
481   llvm::StringRef strtab_region = pInput.memArea()->request(
482       pInput.fileOffset() + strtab->offset(), strtab->size());
483 
484   // get ld_name
485   llvm::StringRef ld_name(strtab_region.begin() + st_name);
486 
487   ResolveInfo* result = ResolveInfo::Create(ld_name);
488   result->setSource(pInput.type() == Input::DynObj);
489   result->setType(static_cast<ResolveInfo::Type>(st_info & 0xF));
490   result->setDesc(getSymDesc(st_shndx, pInput));
491   result->setBinding(getSymBinding((st_info >> 4), st_shndx, st_other));
492   result->setVisibility(getSymVisibility(st_other));
493 
494   return result;
495 }
496 
497 /// readDynamic - read ELF .dynamic in input dynobj
readDynamic(Input & pInput) const498 bool ELFReader<32, true>::readDynamic(Input& pInput) const
499 {
500   assert(pInput.type() == Input::DynObj);
501   const LDSection* dynamic_sect = pInput.context()->getSection(".dynamic");
502   if (NULL == dynamic_sect) {
503     fatal(diag::err_cannot_read_section) << ".dynamic";
504   }
505   const LDSection* dynstr_sect = dynamic_sect->getLink();
506   if (NULL == dynstr_sect) {
507     fatal(diag::err_cannot_read_section) << ".dynstr";
508   }
509 
510   llvm::StringRef dynamic_region = pInput.memArea()->request(
511       pInput.fileOffset() + dynamic_sect->offset(), dynamic_sect->size());
512 
513   llvm::StringRef dynstr_region = pInput.memArea()->request(
514       pInput.fileOffset() + dynstr_sect->offset(), dynstr_sect->size());
515 
516   const llvm::ELF::Elf32_Dyn* dynamic =
517       reinterpret_cast<const llvm::ELF::Elf32_Dyn*>(dynamic_region.begin());
518   const char* dynstr = dynstr_region.begin();
519   bool hasSOName = false;
520   size_t numOfEntries = dynamic_sect->size() / sizeof(llvm::ELF::Elf32_Dyn);
521 
522   for (size_t idx = 0; idx < numOfEntries; ++idx) {
523 
524     llvm::ELF::Elf32_Sword d_tag = 0x0;
525     llvm::ELF::Elf32_Word d_val = 0x0;
526 
527     if (llvm::sys::IsLittleEndianHost) {
528       d_tag = dynamic[idx].d_tag;
529       d_val = dynamic[idx].d_un.d_val;
530     } else {
531       d_tag = mcld::bswap32(dynamic[idx].d_tag);
532       d_val = mcld::bswap32(dynamic[idx].d_un.d_val);
533     }
534 
535     switch (d_tag) {
536       case llvm::ELF::DT_SONAME:
537         assert(d_val < dynstr_sect->size());
538         pInput.setName(sys::fs::Path(dynstr + d_val).filename().native());
539         hasSOName = true;
540         break;
541       case llvm::ELF::DT_NEEDED:
542         // TODO:
543         break;
544       case llvm::ELF::DT_NULL:
545       default:
546         break;
547     }
548   }
549 
550   // if there is no SONAME in .dynamic, then set it from input path
551   if (!hasSOName)
552     pInput.setName(pInput.path().filename().native());
553 
554   return true;
555 }
556 
557 //===----------------------------------------------------------------------===//
558 // ELFReader<64, true>
559 //===----------------------------------------------------------------------===//
560 /// constructor
ELFReader(GNULDBackend & pBackend)561 ELFReader<64, true>::ELFReader(GNULDBackend& pBackend)
562   : ELFReaderIF(pBackend) {
563 }
564 
565 /// destructor
~ELFReader()566 ELFReader<64, true>::~ELFReader()
567 {
568 }
569 
570 /// isELF - is this a ELF file
isELF(const void * pELFHeader) const571 bool ELFReader<64, true>::isELF(const void* pELFHeader) const
572 {
573   const llvm::ELF::Elf64_Ehdr* hdr =
574       reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
575   if (0 == memcmp(llvm::ELF::ElfMagic, hdr, 4))
576     return true;
577   return false;
578 }
579 
580 /// readRegularSection - read a regular section and create fragments.
581 bool
readRegularSection(Input & pInput,SectionData & pSD) const582 ELFReader<64, true>::readRegularSection(Input& pInput, SectionData& pSD) const
583 {
584   uint64_t offset = pInput.fileOffset() + pSD.getSection().offset();
585   uint64_t size = pSD.getSection().size();
586 
587   Fragment* frag = IRBuilder::CreateRegion(pInput, offset, size);
588   ObjectBuilder::AppendFragment(*frag, pSD);
589   return true;
590 }
591 
592 /// readSymbols - read ELF symbols and create LDSymbol
readSymbols(Input & pInput,IRBuilder & pBuilder,llvm::StringRef pRegion,const char * pStrTab) const593 bool ELFReader<64, true>::readSymbols(Input& pInput,
594                                       IRBuilder& pBuilder,
595                                       llvm::StringRef pRegion,
596                                       const char* pStrTab) const
597 {
598   // get number of symbols
599   size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf64_Sym);
600   const llvm::ELF::Elf64_Sym* symtab =
601       reinterpret_cast<const llvm::ELF::Elf64_Sym*>(pRegion.begin());
602 
603   uint32_t st_name  = 0x0;
604   uint64_t st_value = 0x0;
605   uint64_t st_size  = 0x0;
606   uint8_t  st_info  = 0x0;
607   uint8_t  st_other = 0x0;
608   uint16_t st_shndx = 0x0;
609 
610   // skip the first NULL symbol
611   pInput.context()->addSymbol(LDSymbol::Null());
612 
613   /// recording symbols added from DynObj to analyze weak alias
614   std::vector<AliasInfo> potential_aliases;
615   bool is_dyn_obj = (pInput.type()==Input::DynObj);
616   for (size_t idx = 1; idx < entsize; ++idx) {
617     st_info  = symtab[idx].st_info;
618     st_other = symtab[idx].st_other;
619 
620     if (llvm::sys::IsLittleEndianHost) {
621       st_name  = symtab[idx].st_name;
622       st_value = symtab[idx].st_value;
623       st_size  = symtab[idx].st_size;
624       st_shndx = symtab[idx].st_shndx;
625     }
626     else {
627       st_name  = mcld::bswap32(symtab[idx].st_name);
628       st_value = mcld::bswap64(symtab[idx].st_value);
629       st_size  = mcld::bswap64(symtab[idx].st_size);
630       st_shndx = mcld::bswap16(symtab[idx].st_shndx);
631     }
632 
633     // If the section should not be included, set the st_shndx SHN_UNDEF
634     // - A section in interrelated groups are not included.
635     if (pInput.type() == Input::Object &&
636         st_shndx < llvm::ELF::SHN_LORESERVE &&
637         st_shndx != llvm::ELF::SHN_UNDEF) {
638       if (NULL == pInput.context()->getSection(st_shndx))
639         st_shndx = llvm::ELF::SHN_UNDEF;
640     }
641 
642     // get ld_type
643     ResolveInfo::Type ld_type = getSymType(st_info, st_shndx);
644 
645     // get ld_desc
646     ResolveInfo::Desc ld_desc = getSymDesc(st_shndx, pInput);
647 
648     // get ld_binding
649     ResolveInfo::Binding ld_binding = getSymBinding((st_info >> 4), st_shndx, st_other);
650 
651     // get ld_value - ld_value must be section relative.
652     uint64_t ld_value = getSymValue(st_value, st_shndx, pInput);
653 
654     // get ld_vis
655     ResolveInfo::Visibility ld_vis = getSymVisibility(st_other);
656 
657     // get section
658     LDSection* section = NULL;
659     if (st_shndx < llvm::ELF::SHN_LORESERVE) // including ABS and COMMON
660       section = pInput.context()->getSection(st_shndx);
661 
662     // get ld_name
663     std::string ld_name;
664     if (ResolveInfo::Section == ld_type) {
665       // Section symbol's st_name is the section index.
666       assert(NULL != section && "get a invalid section");
667       ld_name = section->name();
668     }
669     else {
670       ld_name = std::string(pStrTab + st_name);
671     }
672 
673     LDSymbol* psym = pBuilder.AddSymbol(pInput,
674                                         ld_name,
675                                         ld_type,
676                                         ld_desc,
677                                         ld_binding,
678                                         st_size,
679                                         ld_value,
680                                         section,
681                                         ld_vis);
682 
683     if (is_dyn_obj
684         && NULL != psym
685         && ResolveInfo::Undefined != ld_desc
686         && (ResolveInfo::Global == ld_binding ||
687             ResolveInfo::Weak == ld_binding)
688         && ResolveInfo::Object == ld_type ) {
689       AliasInfo p;
690       p.pt_alias = psym;
691       p.ld_binding = ld_binding;
692       p.ld_value = ld_value;
693       potential_aliases.push_back(p);
694     }
695 
696   } // end of for loop
697 
698   // analyze weak alias here
699   if (is_dyn_obj) {
700     // sort symbols by symbol value and then weak before strong
701     std::sort(potential_aliases.begin(), potential_aliases.end(), less);
702 
703     // for each weak symbol, find out all its aliases, and
704     // then link them as a circular list in Module
705     std::vector<AliasInfo>::iterator sym_it, sym_e;
706     sym_e = potential_aliases.end();
707     for (sym_it = potential_aliases.begin(); sym_it!=sym_e; ++sym_it) {
708       if (ResolveInfo::Weak!=sym_it->ld_binding)
709         continue;
710 
711       Module& pModule = pBuilder.getModule();
712       std::vector<AliasInfo>::iterator alias_it = sym_it+1;
713       while(alias_it!=sym_e) {
714         if (sym_it->ld_value != alias_it->ld_value)
715           break;
716 
717         if (sym_it + 1 == alias_it)
718           pModule.CreateAliasList(*sym_it->pt_alias->resolveInfo());
719         pModule.addAlias(*alias_it->pt_alias->resolveInfo());
720         ++alias_it;
721       }
722 
723       sym_it = alias_it - 1;
724     }// end of for loop
725   }
726   return true;
727 }
728 
729 //===----------------------------------------------------------------------===//
730 // ELFReader::read relocations - read ELF rela and rel, and create Relocation
731 //===----------------------------------------------------------------------===//
732 /// ELFReader::readRela - read ELF rela and create Relocation
readRela(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const733 bool ELFReader<64, true>::readRela(Input& pInput,
734                                    LDSection& pSection,
735                                    llvm::StringRef pRegion) const
736 {
737   // get the number of rela
738   size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf64_Rela);
739   const llvm::ELF::Elf64_Rela* relaTab =
740       reinterpret_cast<const llvm::ELF::Elf64_Rela*>(pRegion.begin());
741 
742   for (size_t idx=0; idx < entsize; ++idx) {
743     Relocation::Type r_type = 0x0;
744     uint32_t r_sym = 0x0;
745     uint64_t r_offset = 0x0;
746     int64_t  r_addend = 0;
747     if (!target().readRelocation(relaTab[idx],
748                                  r_type, r_sym, r_offset, r_addend)) {
749       return false;
750     }
751 
752     LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
753     if (NULL == symbol) {
754       fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
755     }
756 
757     IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset, r_addend);
758   } // end of for
759   return true;
760 }
761 
762 /// readRel - read ELF rel and create Relocation
readRel(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const763 bool ELFReader<64, true>::readRel(Input& pInput,
764                                   LDSection& pSection,
765                                   llvm::StringRef pRegion) const
766 {
767   // get the number of rel
768   size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf64_Rel);
769   const llvm::ELF::Elf64_Rel* relTab =
770       reinterpret_cast<const llvm::ELF::Elf64_Rel*>(pRegion.begin());
771 
772   for (size_t idx=0; idx < entsize; ++idx) {
773     Relocation::Type r_type = 0x0;
774     uint32_t r_sym = 0x0;
775     uint64_t r_offset = 0x0;
776     if (!target().readRelocation(relTab[idx], r_type, r_sym, r_offset))
777       return false;
778 
779     LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
780     if (NULL == symbol) {
781       fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
782     }
783 
784     IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset);
785   } // end of for
786   return true;
787 }
788 
789 /// isMyEndian - is this ELF file in the same endian to me?
isMyEndian(const void * pELFHeader) const790 bool ELFReader<64, true>::isMyEndian(const void* pELFHeader) const
791 {
792   const llvm::ELF::Elf64_Ehdr* hdr =
793       reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
794 
795   return (hdr->e_ident[llvm::ELF::EI_DATA] == llvm::ELF::ELFDATA2LSB);
796 }
797 
798 /// isMyMachine - is this ELF file generated for the same machine.
isMyMachine(const void * pELFHeader) const799 bool ELFReader<64, true>::isMyMachine(const void* pELFHeader) const
800 {
801   const llvm::ELF::Elf64_Ehdr* hdr =
802       reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
803 
804   if (llvm::sys::IsLittleEndianHost)
805     return (hdr->e_machine == target().getInfo().machine());
806   return (mcld::bswap16(hdr->e_machine) == target().getInfo().machine());
807 }
808 
809 /// fileType - return the file type
fileType(const void * pELFHeader) const810 Input::Type ELFReader<64, true>::fileType(const void* pELFHeader) const
811 {
812   const llvm::ELF::Elf64_Ehdr* hdr =
813       reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
814   uint32_t type = 0x0;
815   if (llvm::sys::IsLittleEndianHost)
816     type = hdr->e_type;
817   else
818     type = mcld::bswap16(hdr->e_type);
819 
820   switch(type) {
821   case llvm::ELF::ET_REL:
822     return Input::Object;
823   case llvm::ELF::ET_EXEC:
824     return Input::Exec;
825   case llvm::ELF::ET_DYN:
826     return Input::DynObj;
827   case llvm::ELF::ET_CORE:
828     return Input::CoreFile;
829   case llvm::ELF::ET_NONE:
830   default:
831     return Input::Unknown;
832   }
833 }
834 
835 /// readSectionHeaders - read ELF section header table and create LDSections
readSectionHeaders(Input & pInput,const void * pELFHeader) const836 bool ELFReader<64, true>::readSectionHeaders(Input& pInput,
837                                              const void* pELFHeader) const
838 {
839   const llvm::ELF::Elf64_Ehdr* ehdr =
840       reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
841 
842   uint64_t shoff     = 0x0;
843   uint16_t shentsize = 0x0;
844   uint32_t shnum     = 0x0;
845   uint32_t shstrtab  = 0x0;
846 
847   if (llvm::sys::IsLittleEndianHost) {
848     shoff     = ehdr->e_shoff;
849     shentsize = ehdr->e_shentsize;
850     shnum     = ehdr->e_shnum;
851     shstrtab  = ehdr->e_shstrndx;
852   }
853   else {
854     shoff     = mcld::bswap64(ehdr->e_shoff);
855     shentsize = mcld::bswap16(ehdr->e_shentsize);
856     shnum     = mcld::bswap16(ehdr->e_shnum);
857     shstrtab  = mcld::bswap16(ehdr->e_shstrndx);
858   }
859 
860   // If the file has no section header table, e_shoff holds zero.
861   if (0x0 == shoff)
862     return true;
863 
864   const llvm::ELF::Elf64_Shdr *shdr = NULL;
865   llvm::StringRef shdr_region;
866   uint32_t sh_name      = 0x0;
867   uint32_t sh_type      = 0x0;
868   uint64_t sh_flags     = 0x0;
869   uint64_t sh_offset    = 0x0;
870   uint64_t sh_size      = 0x0;
871   uint32_t sh_link      = 0x0;
872   uint32_t sh_info      = 0x0;
873   uint64_t sh_addralign = 0x0;
874 
875   // if shnum and shstrtab overflow, the actual values are in the 1st shdr
876   if (shnum == llvm::ELF::SHN_UNDEF || shstrtab == llvm::ELF::SHN_XINDEX) {
877     shdr_region = pInput.memArea()->request(pInput.fileOffset() + shoff,
878                                             shentsize);
879     shdr = reinterpret_cast<const llvm::ELF::Elf64_Shdr*>(shdr_region.begin());
880 
881     if (llvm::sys::IsLittleEndianHost) {
882       sh_size = shdr->sh_size;
883       sh_link = shdr->sh_link;
884     }
885     else {
886       sh_size = mcld::bswap64(shdr->sh_size);
887       sh_link = mcld::bswap32(shdr->sh_link);
888     }
889 
890     if (shnum == llvm::ELF::SHN_UNDEF)
891       shnum = sh_size;
892     if (shstrtab == llvm::ELF::SHN_XINDEX)
893       shstrtab = sh_link;
894 
895     shoff += shentsize;
896   }
897 
898   shdr_region = pInput.memArea()->request(pInput.fileOffset() + shoff,
899                                           shnum * shentsize);
900   const llvm::ELF::Elf64_Shdr* shdrTab =
901       reinterpret_cast<const llvm::ELF::Elf64_Shdr*>(shdr_region.begin());
902 
903   // get .shstrtab first
904   shdr = &shdrTab[shstrtab];
905   if (llvm::sys::IsLittleEndianHost) {
906     sh_offset = shdr->sh_offset;
907     sh_size   = shdr->sh_size;
908   }
909   else {
910     sh_offset = mcld::bswap64(shdr->sh_offset);
911     sh_size   = mcld::bswap64(shdr->sh_size);
912   }
913 
914   llvm::StringRef sect_name_region = pInput.memArea()->request(
915       pInput.fileOffset() + sh_offset, sh_size);
916   const char* sect_name = sect_name_region.begin();
917 
918   LinkInfoList link_info_list;
919 
920   // create all LDSections, including first NULL section.
921   for (size_t idx = 0; idx < shnum; ++idx) {
922     if (llvm::sys::IsLittleEndianHost) {
923       sh_name      = shdrTab[idx].sh_name;
924       sh_type      = shdrTab[idx].sh_type;
925       sh_flags     = shdrTab[idx].sh_flags;
926       sh_offset    = shdrTab[idx].sh_offset;
927       sh_size      = shdrTab[idx].sh_size;
928       sh_link      = shdrTab[idx].sh_link;
929       sh_info      = shdrTab[idx].sh_info;
930       sh_addralign = shdrTab[idx].sh_addralign;
931     }
932     else {
933       sh_name      = mcld::bswap32(shdrTab[idx].sh_name);
934       sh_type      = mcld::bswap32(shdrTab[idx].sh_type);
935       sh_flags     = mcld::bswap64(shdrTab[idx].sh_flags);
936       sh_offset    = mcld::bswap64(shdrTab[idx].sh_offset);
937       sh_size      = mcld::bswap64(shdrTab[idx].sh_size);
938       sh_link      = mcld::bswap32(shdrTab[idx].sh_link);
939       sh_info      = mcld::bswap32(shdrTab[idx].sh_info);
940       sh_addralign = mcld::bswap64(shdrTab[idx].sh_addralign);
941     }
942 
943     LDSection* section = IRBuilder::CreateELFHeader(pInput,
944                                                     sect_name+sh_name,
945                                                     sh_type,
946                                                     sh_flags,
947                                                     sh_addralign);
948     section->setSize(sh_size);
949     section->setOffset(sh_offset);
950     section->setInfo(sh_info);
951 
952     if (sh_link != 0x0 || sh_info != 0x0) {
953       LinkInfo link_info = { section, sh_link, sh_info };
954       link_info_list.push_back(link_info);
955     }
956   } // end of for
957 
958   // set up InfoLink
959   LinkInfoList::iterator info, infoEnd = link_info_list.end();
960   for (info = link_info_list.begin(); info != infoEnd; ++info) {
961     if (LDFileFormat::Relocation == info->section->kind())
962       info->section->setLink(pInput.context()->getSection(info->sh_info));
963     else
964       info->section->setLink(pInput.context()->getSection(info->sh_link));
965   }
966 
967   return true;
968 }
969 
970 /// readSignature - read a symbol from the given Input and index in symtab
971 /// This is used to get the signature of a group section.
readSignature(Input & pInput,LDSection & pSymTab,uint32_t pSymIdx) const972 ResolveInfo* ELFReader<64, true>::readSignature(Input& pInput,
973                                                 LDSection& pSymTab,
974                                                 uint32_t pSymIdx) const
975 {
976   LDSection* symtab = &pSymTab;
977   LDSection* strtab = symtab->getLink();
978   assert(NULL != symtab && NULL != strtab);
979 
980   uint64_t offset = pInput.fileOffset() + symtab->offset() +
981                       sizeof(llvm::ELF::Elf64_Sym) * pSymIdx;
982   llvm::StringRef symbol_region =
983       pInput.memArea()->request(offset, sizeof(llvm::ELF::Elf64_Sym));
984   const llvm::ELF::Elf64_Sym* entry =
985       reinterpret_cast<const llvm::ELF::Elf64_Sym*>(symbol_region.begin());
986 
987   uint32_t st_name  = 0x0;
988   uint8_t  st_info  = 0x0;
989   uint8_t  st_other = 0x0;
990   uint16_t st_shndx = 0x0;
991   st_info  = entry->st_info;
992   st_other = entry->st_other;
993   if (llvm::sys::IsLittleEndianHost) {
994     st_name  = entry->st_name;
995     st_shndx = entry->st_shndx;
996   }
997   else {
998     st_name  = mcld::bswap32(entry->st_name);
999     st_shndx = mcld::bswap16(entry->st_shndx);
1000   }
1001 
1002   llvm::StringRef strtab_region = pInput.memArea()->request(
1003       pInput.fileOffset() + strtab->offset(), strtab->size());
1004 
1005   // get ld_name
1006   llvm::StringRef ld_name(strtab_region.begin() + st_name);
1007 
1008   ResolveInfo* result = ResolveInfo::Create(ld_name);
1009   result->setSource(pInput.type() == Input::DynObj);
1010   result->setType(static_cast<ResolveInfo::Type>(st_info & 0xF));
1011   result->setDesc(getSymDesc(st_shndx, pInput));
1012   result->setBinding(getSymBinding((st_info >> 4), st_shndx, st_other));
1013   result->setVisibility(getSymVisibility(st_other));
1014 
1015   return result;
1016 }
1017 
1018 /// readDynamic - read ELF .dynamic in input dynobj
readDynamic(Input & pInput) const1019 bool ELFReader<64, true>::readDynamic(Input& pInput) const
1020 {
1021   assert(pInput.type() == Input::DynObj);
1022   const LDSection* dynamic_sect = pInput.context()->getSection(".dynamic");
1023   if (NULL == dynamic_sect) {
1024     fatal(diag::err_cannot_read_section) << ".dynamic";
1025   }
1026   const LDSection* dynstr_sect = dynamic_sect->getLink();
1027   if (NULL == dynstr_sect) {
1028     fatal(diag::err_cannot_read_section) << ".dynstr";
1029   }
1030 
1031   llvm::StringRef dynamic_region = pInput.memArea()->request(
1032       pInput.fileOffset() + dynamic_sect->offset(), dynamic_sect->size());
1033 
1034   llvm::StringRef dynstr_region = pInput.memArea()->request(
1035       pInput.fileOffset() + dynstr_sect->offset(), dynstr_sect->size());
1036 
1037   const llvm::ELF::Elf64_Dyn* dynamic =
1038       reinterpret_cast<const llvm::ELF::Elf64_Dyn*>(dynamic_region.begin());
1039   const char* dynstr = dynstr_region.begin();
1040   bool hasSOName = false;
1041   size_t numOfEntries = dynamic_sect->size() / sizeof(llvm::ELF::Elf64_Dyn);
1042 
1043   for (size_t idx = 0; idx < numOfEntries; ++idx) {
1044 
1045     llvm::ELF::Elf64_Sxword d_tag = 0x0;
1046     llvm::ELF::Elf64_Xword d_val = 0x0;
1047 
1048     if (llvm::sys::IsLittleEndianHost) {
1049       d_tag = dynamic[idx].d_tag;
1050       d_val = dynamic[idx].d_un.d_val;
1051     } else {
1052       d_tag = mcld::bswap64(dynamic[idx].d_tag);
1053       d_val = mcld::bswap64(dynamic[idx].d_un.d_val);
1054     }
1055 
1056     switch (d_tag) {
1057       case llvm::ELF::DT_SONAME:
1058         assert(d_val < dynstr_sect->size());
1059         pInput.setName(sys::fs::Path(dynstr + d_val).filename().native());
1060         hasSOName = true;
1061         break;
1062       case llvm::ELF::DT_NEEDED:
1063         // TODO:
1064         break;
1065       case llvm::ELF::DT_NULL:
1066       default:
1067         break;
1068     }
1069   }
1070 
1071   // if there is no SONAME in .dynamic, then set it from input path
1072   if (!hasSOName)
1073     pInput.setName(pInput.path().filename().native());
1074 
1075   return true;
1076 }
1077