1 //===- ELFReader.cpp ------------------------------------------------------===//
2 //
3 // The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include <mcld/LD/ELFReader.h>
10
11 #include <mcld/IRBuilder.h>
12 #include <mcld/Fragment/FillFragment.h>
13 #include <mcld/LD/EhFrame.h>
14 #include <mcld/LD/SectionData.h>
15 #include <mcld/Target/GNULDBackend.h>
16 #include <mcld/Target/GNUInfo.h>
17 #include <mcld/Support/MsgHandling.h>
18 #include <mcld/Support/MemoryArea.h>
19 #include <mcld/Object/ObjectBuilder.h>
20
21 #include <cstring>
22
23 #include <llvm/ADT/StringRef.h>
24 #include <llvm/ADT/Twine.h>
25 #include <llvm/Support/ELF.h>
26 #include <llvm/Support/Host.h>
27
28 #include <iostream>
29
30 using namespace mcld;
31
32 //===----------------------------------------------------------------------===//
33 // ELFReader<32, true>
34 //===----------------------------------------------------------------------===//
35 /// constructor
ELFReader(GNULDBackend & pBackend)36 ELFReader<32, true>::ELFReader(GNULDBackend& pBackend)
37 : ELFReaderIF(pBackend) {
38 }
39
40 /// destructor
~ELFReader()41 ELFReader<32, true>::~ELFReader()
42 {
43 }
44
45 /// isELF - is this a ELF file
isELF(const void * pELFHeader) const46 bool ELFReader<32, true>::isELF(const void* pELFHeader) const
47 {
48 const llvm::ELF::Elf32_Ehdr* hdr =
49 reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
50 if (0 == memcmp(llvm::ELF::ElfMagic, hdr, 4))
51 return true;
52 return false;
53 }
54
55 /// readRegularSection - read a regular section and create fragments.
56 bool
readRegularSection(Input & pInput,SectionData & pSD) const57 ELFReader<32, true>::readRegularSection(Input& pInput, SectionData& pSD) const
58 {
59 uint32_t offset = pInput.fileOffset() + pSD.getSection().offset();
60 uint32_t size = pSD.getSection().size();
61
62 Fragment* frag = IRBuilder::CreateRegion(pInput, offset, size);
63 ObjectBuilder::AppendFragment(*frag, pSD);
64 return true;
65 }
66
67 /// readSymbols - read ELF symbols and create LDSymbol
readSymbols(Input & pInput,IRBuilder & pBuilder,llvm::StringRef pRegion,const char * pStrTab) const68 bool ELFReader<32, true>::readSymbols(Input& pInput,
69 IRBuilder& pBuilder,
70 llvm::StringRef pRegion,
71 const char* pStrTab) const
72 {
73 // get number of symbols
74 size_t entsize = pRegion.size()/sizeof(llvm::ELF::Elf32_Sym);
75 const llvm::ELF::Elf32_Sym* symtab =
76 reinterpret_cast<const llvm::ELF::Elf32_Sym*>(pRegion.begin());
77
78 uint32_t st_name = 0x0;
79 uint32_t st_value = 0x0;
80 uint32_t st_size = 0x0;
81 uint8_t st_info = 0x0;
82 uint8_t st_other = 0x0;
83 uint16_t st_shndx = 0x0;
84
85 // skip the first NULL symbol
86 pInput.context()->addSymbol(LDSymbol::Null());
87
88 /// recording symbols added from DynObj to analyze weak alias
89 std::vector<AliasInfo> potential_aliases;
90 bool is_dyn_obj = (pInput.type()==Input::DynObj);
91 for (size_t idx = 1; idx < entsize; ++idx) {
92 st_info = symtab[idx].st_info;
93 st_other = symtab[idx].st_other;
94
95 if (llvm::sys::IsLittleEndianHost) {
96 st_name = symtab[idx].st_name;
97 st_value = symtab[idx].st_value;
98 st_size = symtab[idx].st_size;
99 st_shndx = symtab[idx].st_shndx;
100 }
101 else {
102 st_name = mcld::bswap32(symtab[idx].st_name);
103 st_value = mcld::bswap32(symtab[idx].st_value);
104 st_size = mcld::bswap32(symtab[idx].st_size);
105 st_shndx = mcld::bswap16(symtab[idx].st_shndx);
106 }
107
108 // If the section should not be included, set the st_shndx SHN_UNDEF
109 // - A section in interrelated groups are not included.
110 if (pInput.type() == Input::Object &&
111 st_shndx < llvm::ELF::SHN_LORESERVE &&
112 st_shndx != llvm::ELF::SHN_UNDEF) {
113 if (NULL == pInput.context()->getSection(st_shndx))
114 st_shndx = llvm::ELF::SHN_UNDEF;
115 }
116
117 // get ld_type
118 ResolveInfo::Type ld_type = getSymType(st_info, st_shndx);
119
120 // get ld_desc
121 ResolveInfo::Desc ld_desc = getSymDesc(st_shndx, pInput);
122
123 // get ld_binding
124 ResolveInfo::Binding ld_binding = getSymBinding((st_info >> 4), st_shndx, st_other);
125
126 // get ld_value - ld_value must be section relative.
127 uint64_t ld_value = getSymValue(st_value, st_shndx, pInput);
128
129 // get ld_vis
130 ResolveInfo::Visibility ld_vis = getSymVisibility(st_other);
131
132 // get section
133 LDSection* section = NULL;
134 if (st_shndx < llvm::ELF::SHN_LORESERVE) // including ABS and COMMON
135 section = pInput.context()->getSection(st_shndx);
136
137 // get ld_name
138 std::string ld_name;
139 if (ResolveInfo::Section == ld_type) {
140 // Section symbol's st_name is the section index.
141 assert(NULL != section && "get a invalid section");
142 ld_name = section->name();
143 }
144 else {
145 ld_name = std::string(pStrTab + st_name);
146 }
147
148 LDSymbol* psym = pBuilder.AddSymbol(pInput,
149 ld_name,
150 ld_type,
151 ld_desc,
152 ld_binding,
153 st_size,
154 ld_value,
155 section,
156 ld_vis);
157
158 if (is_dyn_obj
159 && NULL != psym
160 && ResolveInfo::Undefined != ld_desc
161 && (ResolveInfo::Global == ld_binding ||
162 ResolveInfo::Weak == ld_binding)
163 && ResolveInfo::Object == ld_type) {
164 AliasInfo p;
165 p.pt_alias = psym;
166 p.ld_binding = ld_binding;
167 p.ld_value = ld_value;
168 potential_aliases.push_back(p);
169 }
170
171 } // end of for loop
172
173 // analyze weak alias
174 // FIXME: it is better to let IRBuilder handle alias anlysis.
175 // 1. eliminate code duplication
176 // 2. easy to know if a symbol is from .so
177 // (so that it may be a potential alias)
178 if (is_dyn_obj) {
179 // sort symbols by symbol value and then weak before strong
180 std::sort(potential_aliases.begin(), potential_aliases.end(), less);
181
182 // for each weak symbol, find out all its aliases, and
183 // then link them as a circular list in Module
184 std::vector<AliasInfo>::iterator sym_it, sym_e;
185 sym_e = potential_aliases.end();
186 for (sym_it = potential_aliases.begin(); sym_it!=sym_e; ++sym_it) {
187 if (ResolveInfo::Weak!=sym_it->ld_binding)
188 continue;
189
190 Module& pModule = pBuilder.getModule();
191 std::vector<AliasInfo>::iterator alias_it = sym_it+1;
192 while(alias_it!=sym_e) {
193 if (sym_it->ld_value != alias_it->ld_value)
194 break;
195
196 if (sym_it + 1 == alias_it)
197 pModule.CreateAliasList(*sym_it->pt_alias->resolveInfo());
198 pModule.addAlias(*alias_it->pt_alias->resolveInfo());
199 ++alias_it;
200 }
201
202 sym_it = alias_it - 1;
203 }// end of for loop
204 }
205
206 return true;
207 }
208
209 //===----------------------------------------------------------------------===//
210 // ELFReader::read relocations - read ELF rela and rel, and create Relocation
211 //===----------------------------------------------------------------------===//
212 /// ELFReader::readRela - read ELF rela and create Relocation
readRela(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const213 bool ELFReader<32, true>::readRela(Input& pInput,
214 LDSection& pSection,
215 llvm::StringRef pRegion) const
216 {
217 // get the number of rela
218 size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf32_Rela);
219 const llvm::ELF::Elf32_Rela* relaTab =
220 reinterpret_cast<const llvm::ELF::Elf32_Rela*>(pRegion.begin());
221
222 for (size_t idx=0; idx < entsize; ++idx) {
223 Relocation::Type r_type = 0x0;
224 uint32_t r_sym = 0x0;
225 uint32_t r_offset = 0x0;
226 int32_t r_addend = 0;
227 if (!target().readRelocation(relaTab[idx], r_type, r_sym, r_offset, r_addend))
228 return false;
229
230 LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
231 if (NULL == symbol) {
232 fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
233 }
234
235 IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset, r_addend);
236 } // end of for
237 return true;
238 }
239
240 /// readRel - read ELF rel and create Relocation
readRel(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const241 bool ELFReader<32, true>::readRel(Input& pInput,
242 LDSection& pSection,
243 llvm::StringRef pRegion) const
244 {
245 // get the number of rel
246 size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf32_Rel);
247 const llvm::ELF::Elf32_Rel* relTab =
248 reinterpret_cast<const llvm::ELF::Elf32_Rel*>(pRegion.begin());
249
250 for (size_t idx=0; idx < entsize; ++idx) {
251 Relocation::Type r_type = 0x0;
252 uint32_t r_sym = 0x0;
253 uint32_t r_offset = 0x0;
254
255 if (!target().readRelocation(relTab[idx], r_type, r_sym, r_offset))
256 return false;
257
258 LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
259 if (NULL == symbol) {
260 fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
261 }
262
263 IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset);
264 } // end of for
265 return true;
266 }
267
268 /// isMyEndian - is this ELF file in the same endian to me?
isMyEndian(const void * pELFHeader) const269 bool ELFReader<32, true>::isMyEndian(const void* pELFHeader) const
270 {
271 const llvm::ELF::Elf32_Ehdr* hdr =
272 reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
273
274 return (hdr->e_ident[llvm::ELF::EI_DATA] == llvm::ELF::ELFDATA2LSB);
275 }
276
277 /// isMyMachine - is this ELF file generated for the same machine.
isMyMachine(const void * pELFHeader) const278 bool ELFReader<32, true>::isMyMachine(const void* pELFHeader) const
279 {
280 const llvm::ELF::Elf32_Ehdr* hdr =
281 reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
282
283 if (llvm::sys::IsLittleEndianHost)
284 return (hdr->e_machine == target().getInfo().machine());
285 return (mcld::bswap16(hdr->e_machine) == target().getInfo().machine());
286 }
287
288 /// fileType - return the file type
fileType(const void * pELFHeader) const289 Input::Type ELFReader<32, true>::fileType(const void* pELFHeader) const
290 {
291 const llvm::ELF::Elf32_Ehdr* hdr =
292 reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
293 uint32_t type = 0x0;
294 if (llvm::sys::IsLittleEndianHost)
295 type = hdr->e_type;
296 else
297 type = mcld::bswap16(hdr->e_type);
298
299 switch(type) {
300 case llvm::ELF::ET_REL:
301 return Input::Object;
302 case llvm::ELF::ET_EXEC:
303 return Input::Exec;
304 case llvm::ELF::ET_DYN:
305 return Input::DynObj;
306 case llvm::ELF::ET_CORE:
307 return Input::CoreFile;
308 case llvm::ELF::ET_NONE:
309 default:
310 return Input::Unknown;
311 }
312 }
313
314 /// readSectionHeaders - read ELF section header table and create LDSections
readSectionHeaders(Input & pInput,const void * pELFHeader) const315 bool ELFReader<32, true>::readSectionHeaders(Input& pInput,
316 const void* pELFHeader) const
317 {
318 const llvm::ELF::Elf32_Ehdr* ehdr =
319 reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
320
321 uint32_t shoff = 0x0;
322 uint16_t shentsize = 0x0;
323 uint32_t shnum = 0x0;
324 uint32_t shstrtab = 0x0;
325
326 if (llvm::sys::IsLittleEndianHost) {
327 shoff = ehdr->e_shoff;
328 shentsize = ehdr->e_shentsize;
329 shnum = ehdr->e_shnum;
330 shstrtab = ehdr->e_shstrndx;
331 }
332 else {
333 shoff = mcld::bswap32(ehdr->e_shoff);
334 shentsize = mcld::bswap16(ehdr->e_shentsize);
335 shnum = mcld::bswap16(ehdr->e_shnum);
336 shstrtab = mcld::bswap16(ehdr->e_shstrndx);
337 }
338
339 // If the file has no section header table, e_shoff holds zero.
340 if (0x0 == shoff)
341 return true;
342
343 const llvm::ELF::Elf32_Shdr *shdr = NULL;
344 llvm::StringRef shdr_region;
345 uint32_t sh_name = 0x0;
346 uint32_t sh_type = 0x0;
347 uint32_t sh_flags = 0x0;
348 uint32_t sh_offset = 0x0;
349 uint32_t sh_size = 0x0;
350 uint32_t sh_link = 0x0;
351 uint32_t sh_info = 0x0;
352 uint32_t sh_addralign = 0x0;
353
354 // if shnum and shstrtab overflow, the actual values are in the 1st shdr
355 if (shnum == llvm::ELF::SHN_UNDEF || shstrtab == llvm::ELF::SHN_XINDEX) {
356 shdr_region = pInput.memArea()->request(pInput.fileOffset() + shoff,
357 shentsize);
358 shdr = reinterpret_cast<const llvm::ELF::Elf32_Shdr*>(shdr_region.begin());
359
360 if (llvm::sys::IsLittleEndianHost) {
361 sh_size = shdr->sh_size;
362 sh_link = shdr->sh_link;
363 }
364 else {
365 sh_size = mcld::bswap32(shdr->sh_size);
366 sh_link = mcld::bswap32(shdr->sh_link);
367 }
368
369 if (shnum == llvm::ELF::SHN_UNDEF)
370 shnum = sh_size;
371 if (shstrtab == llvm::ELF::SHN_XINDEX)
372 shstrtab = sh_link;
373
374 shoff += shentsize;
375 }
376
377 shdr_region = pInput.memArea()->request(pInput.fileOffset() + shoff,
378 shnum * shentsize);
379 const llvm::ELF::Elf32_Shdr* shdrTab =
380 reinterpret_cast<const llvm::ELF::Elf32_Shdr*>(shdr_region.begin());
381
382 // get .shstrtab first
383 shdr = &shdrTab[shstrtab];
384 if (llvm::sys::IsLittleEndianHost) {
385 sh_offset = shdr->sh_offset;
386 sh_size = shdr->sh_size;
387 }
388 else {
389 sh_offset = mcld::bswap32(shdr->sh_offset);
390 sh_size = mcld::bswap32(shdr->sh_size);
391 }
392
393 llvm::StringRef sect_name_region = pInput.memArea()->request(
394 pInput.fileOffset() + sh_offset, sh_size);
395 const char* sect_name = sect_name_region.begin();
396
397 LinkInfoList link_info_list;
398
399 // create all LDSections, including first NULL section.
400 for (size_t idx = 0; idx < shnum; ++idx) {
401 if (llvm::sys::IsLittleEndianHost) {
402 sh_name = shdrTab[idx].sh_name;
403 sh_type = shdrTab[idx].sh_type;
404 sh_flags = shdrTab[idx].sh_flags;
405 sh_offset = shdrTab[idx].sh_offset;
406 sh_size = shdrTab[idx].sh_size;
407 sh_link = shdrTab[idx].sh_link;
408 sh_info = shdrTab[idx].sh_info;
409 sh_addralign = shdrTab[idx].sh_addralign;
410 }
411 else {
412 sh_name = mcld::bswap32(shdrTab[idx].sh_name);
413 sh_type = mcld::bswap32(shdrTab[idx].sh_type);
414 sh_flags = mcld::bswap32(shdrTab[idx].sh_flags);
415 sh_offset = mcld::bswap32(shdrTab[idx].sh_offset);
416 sh_size = mcld::bswap32(shdrTab[idx].sh_size);
417 sh_link = mcld::bswap32(shdrTab[idx].sh_link);
418 sh_info = mcld::bswap32(shdrTab[idx].sh_info);
419 sh_addralign = mcld::bswap32(shdrTab[idx].sh_addralign);
420 }
421
422 LDSection* section = IRBuilder::CreateELFHeader(pInput,
423 sect_name+sh_name,
424 sh_type,
425 sh_flags,
426 sh_addralign);
427 section->setSize(sh_size);
428 section->setOffset(sh_offset);
429 section->setInfo(sh_info);
430
431 if (sh_link != 0x0 || sh_info != 0x0) {
432 LinkInfo link_info = { section, sh_link, sh_info };
433 link_info_list.push_back(link_info);
434 }
435 } // end of for
436
437 // set up InfoLink
438 LinkInfoList::iterator info, infoEnd = link_info_list.end();
439 for (info = link_info_list.begin(); info != infoEnd; ++info) {
440 if (LDFileFormat::Relocation == info->section->kind())
441 info->section->setLink(pInput.context()->getSection(info->sh_info));
442 else
443 info->section->setLink(pInput.context()->getSection(info->sh_link));
444 }
445
446 return true;
447 }
448
449 /// readSignature - read a symbol from the given Input and index in symtab
450 /// This is used to get the signature of a group section.
readSignature(Input & pInput,LDSection & pSymTab,uint32_t pSymIdx) const451 ResolveInfo* ELFReader<32, true>::readSignature(Input& pInput,
452 LDSection& pSymTab,
453 uint32_t pSymIdx) const
454 {
455 LDSection* symtab = &pSymTab;
456 LDSection* strtab = symtab->getLink();
457 assert(NULL != symtab && NULL != strtab);
458
459 uint32_t offset = pInput.fileOffset() + symtab->offset() +
460 sizeof(llvm::ELF::Elf32_Sym) * pSymIdx;
461 llvm::StringRef symbol_region =
462 pInput.memArea()->request(offset, sizeof(llvm::ELF::Elf32_Sym));
463 const llvm::ELF::Elf32_Sym* entry =
464 reinterpret_cast<const llvm::ELF::Elf32_Sym*>(symbol_region.begin());
465
466 uint32_t st_name = 0x0;
467 uint8_t st_info = 0x0;
468 uint8_t st_other = 0x0;
469 uint16_t st_shndx = 0x0;
470 st_info = entry->st_info;
471 st_other = entry->st_other;
472 if (llvm::sys::IsLittleEndianHost) {
473 st_name = entry->st_name;
474 st_shndx = entry->st_shndx;
475 }
476 else {
477 st_name = mcld::bswap32(entry->st_name);
478 st_shndx = mcld::bswap16(entry->st_shndx);
479 }
480
481 llvm::StringRef strtab_region = pInput.memArea()->request(
482 pInput.fileOffset() + strtab->offset(), strtab->size());
483
484 // get ld_name
485 llvm::StringRef ld_name(strtab_region.begin() + st_name);
486
487 ResolveInfo* result = ResolveInfo::Create(ld_name);
488 result->setSource(pInput.type() == Input::DynObj);
489 result->setType(static_cast<ResolveInfo::Type>(st_info & 0xF));
490 result->setDesc(getSymDesc(st_shndx, pInput));
491 result->setBinding(getSymBinding((st_info >> 4), st_shndx, st_other));
492 result->setVisibility(getSymVisibility(st_other));
493
494 return result;
495 }
496
497 /// readDynamic - read ELF .dynamic in input dynobj
readDynamic(Input & pInput) const498 bool ELFReader<32, true>::readDynamic(Input& pInput) const
499 {
500 assert(pInput.type() == Input::DynObj);
501 const LDSection* dynamic_sect = pInput.context()->getSection(".dynamic");
502 if (NULL == dynamic_sect) {
503 fatal(diag::err_cannot_read_section) << ".dynamic";
504 }
505 const LDSection* dynstr_sect = dynamic_sect->getLink();
506 if (NULL == dynstr_sect) {
507 fatal(diag::err_cannot_read_section) << ".dynstr";
508 }
509
510 llvm::StringRef dynamic_region = pInput.memArea()->request(
511 pInput.fileOffset() + dynamic_sect->offset(), dynamic_sect->size());
512
513 llvm::StringRef dynstr_region = pInput.memArea()->request(
514 pInput.fileOffset() + dynstr_sect->offset(), dynstr_sect->size());
515
516 const llvm::ELF::Elf32_Dyn* dynamic =
517 reinterpret_cast<const llvm::ELF::Elf32_Dyn*>(dynamic_region.begin());
518 const char* dynstr = dynstr_region.begin();
519 bool hasSOName = false;
520 size_t numOfEntries = dynamic_sect->size() / sizeof(llvm::ELF::Elf32_Dyn);
521
522 for (size_t idx = 0; idx < numOfEntries; ++idx) {
523
524 llvm::ELF::Elf32_Sword d_tag = 0x0;
525 llvm::ELF::Elf32_Word d_val = 0x0;
526
527 if (llvm::sys::IsLittleEndianHost) {
528 d_tag = dynamic[idx].d_tag;
529 d_val = dynamic[idx].d_un.d_val;
530 } else {
531 d_tag = mcld::bswap32(dynamic[idx].d_tag);
532 d_val = mcld::bswap32(dynamic[idx].d_un.d_val);
533 }
534
535 switch (d_tag) {
536 case llvm::ELF::DT_SONAME:
537 assert(d_val < dynstr_sect->size());
538 pInput.setName(sys::fs::Path(dynstr + d_val).filename().native());
539 hasSOName = true;
540 break;
541 case llvm::ELF::DT_NEEDED:
542 // TODO:
543 break;
544 case llvm::ELF::DT_NULL:
545 default:
546 break;
547 }
548 }
549
550 // if there is no SONAME in .dynamic, then set it from input path
551 if (!hasSOName)
552 pInput.setName(pInput.path().filename().native());
553
554 return true;
555 }
556
557 //===----------------------------------------------------------------------===//
558 // ELFReader<64, true>
559 //===----------------------------------------------------------------------===//
560 /// constructor
ELFReader(GNULDBackend & pBackend)561 ELFReader<64, true>::ELFReader(GNULDBackend& pBackend)
562 : ELFReaderIF(pBackend) {
563 }
564
565 /// destructor
~ELFReader()566 ELFReader<64, true>::~ELFReader()
567 {
568 }
569
570 /// isELF - is this a ELF file
isELF(const void * pELFHeader) const571 bool ELFReader<64, true>::isELF(const void* pELFHeader) const
572 {
573 const llvm::ELF::Elf64_Ehdr* hdr =
574 reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
575 if (0 == memcmp(llvm::ELF::ElfMagic, hdr, 4))
576 return true;
577 return false;
578 }
579
580 /// readRegularSection - read a regular section and create fragments.
581 bool
readRegularSection(Input & pInput,SectionData & pSD) const582 ELFReader<64, true>::readRegularSection(Input& pInput, SectionData& pSD) const
583 {
584 uint64_t offset = pInput.fileOffset() + pSD.getSection().offset();
585 uint64_t size = pSD.getSection().size();
586
587 Fragment* frag = IRBuilder::CreateRegion(pInput, offset, size);
588 ObjectBuilder::AppendFragment(*frag, pSD);
589 return true;
590 }
591
592 /// readSymbols - read ELF symbols and create LDSymbol
readSymbols(Input & pInput,IRBuilder & pBuilder,llvm::StringRef pRegion,const char * pStrTab) const593 bool ELFReader<64, true>::readSymbols(Input& pInput,
594 IRBuilder& pBuilder,
595 llvm::StringRef pRegion,
596 const char* pStrTab) const
597 {
598 // get number of symbols
599 size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf64_Sym);
600 const llvm::ELF::Elf64_Sym* symtab =
601 reinterpret_cast<const llvm::ELF::Elf64_Sym*>(pRegion.begin());
602
603 uint32_t st_name = 0x0;
604 uint64_t st_value = 0x0;
605 uint64_t st_size = 0x0;
606 uint8_t st_info = 0x0;
607 uint8_t st_other = 0x0;
608 uint16_t st_shndx = 0x0;
609
610 // skip the first NULL symbol
611 pInput.context()->addSymbol(LDSymbol::Null());
612
613 /// recording symbols added from DynObj to analyze weak alias
614 std::vector<AliasInfo> potential_aliases;
615 bool is_dyn_obj = (pInput.type()==Input::DynObj);
616 for (size_t idx = 1; idx < entsize; ++idx) {
617 st_info = symtab[idx].st_info;
618 st_other = symtab[idx].st_other;
619
620 if (llvm::sys::IsLittleEndianHost) {
621 st_name = symtab[idx].st_name;
622 st_value = symtab[idx].st_value;
623 st_size = symtab[idx].st_size;
624 st_shndx = symtab[idx].st_shndx;
625 }
626 else {
627 st_name = mcld::bswap32(symtab[idx].st_name);
628 st_value = mcld::bswap64(symtab[idx].st_value);
629 st_size = mcld::bswap64(symtab[idx].st_size);
630 st_shndx = mcld::bswap16(symtab[idx].st_shndx);
631 }
632
633 // If the section should not be included, set the st_shndx SHN_UNDEF
634 // - A section in interrelated groups are not included.
635 if (pInput.type() == Input::Object &&
636 st_shndx < llvm::ELF::SHN_LORESERVE &&
637 st_shndx != llvm::ELF::SHN_UNDEF) {
638 if (NULL == pInput.context()->getSection(st_shndx))
639 st_shndx = llvm::ELF::SHN_UNDEF;
640 }
641
642 // get ld_type
643 ResolveInfo::Type ld_type = getSymType(st_info, st_shndx);
644
645 // get ld_desc
646 ResolveInfo::Desc ld_desc = getSymDesc(st_shndx, pInput);
647
648 // get ld_binding
649 ResolveInfo::Binding ld_binding = getSymBinding((st_info >> 4), st_shndx, st_other);
650
651 // get ld_value - ld_value must be section relative.
652 uint64_t ld_value = getSymValue(st_value, st_shndx, pInput);
653
654 // get ld_vis
655 ResolveInfo::Visibility ld_vis = getSymVisibility(st_other);
656
657 // get section
658 LDSection* section = NULL;
659 if (st_shndx < llvm::ELF::SHN_LORESERVE) // including ABS and COMMON
660 section = pInput.context()->getSection(st_shndx);
661
662 // get ld_name
663 std::string ld_name;
664 if (ResolveInfo::Section == ld_type) {
665 // Section symbol's st_name is the section index.
666 assert(NULL != section && "get a invalid section");
667 ld_name = section->name();
668 }
669 else {
670 ld_name = std::string(pStrTab + st_name);
671 }
672
673 LDSymbol* psym = pBuilder.AddSymbol(pInput,
674 ld_name,
675 ld_type,
676 ld_desc,
677 ld_binding,
678 st_size,
679 ld_value,
680 section,
681 ld_vis);
682
683 if (is_dyn_obj
684 && NULL != psym
685 && ResolveInfo::Undefined != ld_desc
686 && (ResolveInfo::Global == ld_binding ||
687 ResolveInfo::Weak == ld_binding)
688 && ResolveInfo::Object == ld_type ) {
689 AliasInfo p;
690 p.pt_alias = psym;
691 p.ld_binding = ld_binding;
692 p.ld_value = ld_value;
693 potential_aliases.push_back(p);
694 }
695
696 } // end of for loop
697
698 // analyze weak alias here
699 if (is_dyn_obj) {
700 // sort symbols by symbol value and then weak before strong
701 std::sort(potential_aliases.begin(), potential_aliases.end(), less);
702
703 // for each weak symbol, find out all its aliases, and
704 // then link them as a circular list in Module
705 std::vector<AliasInfo>::iterator sym_it, sym_e;
706 sym_e = potential_aliases.end();
707 for (sym_it = potential_aliases.begin(); sym_it!=sym_e; ++sym_it) {
708 if (ResolveInfo::Weak!=sym_it->ld_binding)
709 continue;
710
711 Module& pModule = pBuilder.getModule();
712 std::vector<AliasInfo>::iterator alias_it = sym_it+1;
713 while(alias_it!=sym_e) {
714 if (sym_it->ld_value != alias_it->ld_value)
715 break;
716
717 if (sym_it + 1 == alias_it)
718 pModule.CreateAliasList(*sym_it->pt_alias->resolveInfo());
719 pModule.addAlias(*alias_it->pt_alias->resolveInfo());
720 ++alias_it;
721 }
722
723 sym_it = alias_it - 1;
724 }// end of for loop
725 }
726 return true;
727 }
728
729 //===----------------------------------------------------------------------===//
730 // ELFReader::read relocations - read ELF rela and rel, and create Relocation
731 //===----------------------------------------------------------------------===//
732 /// ELFReader::readRela - read ELF rela and create Relocation
readRela(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const733 bool ELFReader<64, true>::readRela(Input& pInput,
734 LDSection& pSection,
735 llvm::StringRef pRegion) const
736 {
737 // get the number of rela
738 size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf64_Rela);
739 const llvm::ELF::Elf64_Rela* relaTab =
740 reinterpret_cast<const llvm::ELF::Elf64_Rela*>(pRegion.begin());
741
742 for (size_t idx=0; idx < entsize; ++idx) {
743 Relocation::Type r_type = 0x0;
744 uint32_t r_sym = 0x0;
745 uint64_t r_offset = 0x0;
746 int64_t r_addend = 0;
747 if (!target().readRelocation(relaTab[idx],
748 r_type, r_sym, r_offset, r_addend)) {
749 return false;
750 }
751
752 LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
753 if (NULL == symbol) {
754 fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
755 }
756
757 IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset, r_addend);
758 } // end of for
759 return true;
760 }
761
762 /// readRel - read ELF rel and create Relocation
readRel(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const763 bool ELFReader<64, true>::readRel(Input& pInput,
764 LDSection& pSection,
765 llvm::StringRef pRegion) const
766 {
767 // get the number of rel
768 size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf64_Rel);
769 const llvm::ELF::Elf64_Rel* relTab =
770 reinterpret_cast<const llvm::ELF::Elf64_Rel*>(pRegion.begin());
771
772 for (size_t idx=0; idx < entsize; ++idx) {
773 Relocation::Type r_type = 0x0;
774 uint32_t r_sym = 0x0;
775 uint64_t r_offset = 0x0;
776 if (!target().readRelocation(relTab[idx], r_type, r_sym, r_offset))
777 return false;
778
779 LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
780 if (NULL == symbol) {
781 fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
782 }
783
784 IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset);
785 } // end of for
786 return true;
787 }
788
789 /// isMyEndian - is this ELF file in the same endian to me?
isMyEndian(const void * pELFHeader) const790 bool ELFReader<64, true>::isMyEndian(const void* pELFHeader) const
791 {
792 const llvm::ELF::Elf64_Ehdr* hdr =
793 reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
794
795 return (hdr->e_ident[llvm::ELF::EI_DATA] == llvm::ELF::ELFDATA2LSB);
796 }
797
798 /// isMyMachine - is this ELF file generated for the same machine.
isMyMachine(const void * pELFHeader) const799 bool ELFReader<64, true>::isMyMachine(const void* pELFHeader) const
800 {
801 const llvm::ELF::Elf64_Ehdr* hdr =
802 reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
803
804 if (llvm::sys::IsLittleEndianHost)
805 return (hdr->e_machine == target().getInfo().machine());
806 return (mcld::bswap16(hdr->e_machine) == target().getInfo().machine());
807 }
808
809 /// fileType - return the file type
fileType(const void * pELFHeader) const810 Input::Type ELFReader<64, true>::fileType(const void* pELFHeader) const
811 {
812 const llvm::ELF::Elf64_Ehdr* hdr =
813 reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
814 uint32_t type = 0x0;
815 if (llvm::sys::IsLittleEndianHost)
816 type = hdr->e_type;
817 else
818 type = mcld::bswap16(hdr->e_type);
819
820 switch(type) {
821 case llvm::ELF::ET_REL:
822 return Input::Object;
823 case llvm::ELF::ET_EXEC:
824 return Input::Exec;
825 case llvm::ELF::ET_DYN:
826 return Input::DynObj;
827 case llvm::ELF::ET_CORE:
828 return Input::CoreFile;
829 case llvm::ELF::ET_NONE:
830 default:
831 return Input::Unknown;
832 }
833 }
834
835 /// readSectionHeaders - read ELF section header table and create LDSections
readSectionHeaders(Input & pInput,const void * pELFHeader) const836 bool ELFReader<64, true>::readSectionHeaders(Input& pInput,
837 const void* pELFHeader) const
838 {
839 const llvm::ELF::Elf64_Ehdr* ehdr =
840 reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
841
842 uint64_t shoff = 0x0;
843 uint16_t shentsize = 0x0;
844 uint32_t shnum = 0x0;
845 uint32_t shstrtab = 0x0;
846
847 if (llvm::sys::IsLittleEndianHost) {
848 shoff = ehdr->e_shoff;
849 shentsize = ehdr->e_shentsize;
850 shnum = ehdr->e_shnum;
851 shstrtab = ehdr->e_shstrndx;
852 }
853 else {
854 shoff = mcld::bswap64(ehdr->e_shoff);
855 shentsize = mcld::bswap16(ehdr->e_shentsize);
856 shnum = mcld::bswap16(ehdr->e_shnum);
857 shstrtab = mcld::bswap16(ehdr->e_shstrndx);
858 }
859
860 // If the file has no section header table, e_shoff holds zero.
861 if (0x0 == shoff)
862 return true;
863
864 const llvm::ELF::Elf64_Shdr *shdr = NULL;
865 llvm::StringRef shdr_region;
866 uint32_t sh_name = 0x0;
867 uint32_t sh_type = 0x0;
868 uint64_t sh_flags = 0x0;
869 uint64_t sh_offset = 0x0;
870 uint64_t sh_size = 0x0;
871 uint32_t sh_link = 0x0;
872 uint32_t sh_info = 0x0;
873 uint64_t sh_addralign = 0x0;
874
875 // if shnum and shstrtab overflow, the actual values are in the 1st shdr
876 if (shnum == llvm::ELF::SHN_UNDEF || shstrtab == llvm::ELF::SHN_XINDEX) {
877 shdr_region = pInput.memArea()->request(pInput.fileOffset() + shoff,
878 shentsize);
879 shdr = reinterpret_cast<const llvm::ELF::Elf64_Shdr*>(shdr_region.begin());
880
881 if (llvm::sys::IsLittleEndianHost) {
882 sh_size = shdr->sh_size;
883 sh_link = shdr->sh_link;
884 }
885 else {
886 sh_size = mcld::bswap64(shdr->sh_size);
887 sh_link = mcld::bswap32(shdr->sh_link);
888 }
889
890 if (shnum == llvm::ELF::SHN_UNDEF)
891 shnum = sh_size;
892 if (shstrtab == llvm::ELF::SHN_XINDEX)
893 shstrtab = sh_link;
894
895 shoff += shentsize;
896 }
897
898 shdr_region = pInput.memArea()->request(pInput.fileOffset() + shoff,
899 shnum * shentsize);
900 const llvm::ELF::Elf64_Shdr* shdrTab =
901 reinterpret_cast<const llvm::ELF::Elf64_Shdr*>(shdr_region.begin());
902
903 // get .shstrtab first
904 shdr = &shdrTab[shstrtab];
905 if (llvm::sys::IsLittleEndianHost) {
906 sh_offset = shdr->sh_offset;
907 sh_size = shdr->sh_size;
908 }
909 else {
910 sh_offset = mcld::bswap64(shdr->sh_offset);
911 sh_size = mcld::bswap64(shdr->sh_size);
912 }
913
914 llvm::StringRef sect_name_region = pInput.memArea()->request(
915 pInput.fileOffset() + sh_offset, sh_size);
916 const char* sect_name = sect_name_region.begin();
917
918 LinkInfoList link_info_list;
919
920 // create all LDSections, including first NULL section.
921 for (size_t idx = 0; idx < shnum; ++idx) {
922 if (llvm::sys::IsLittleEndianHost) {
923 sh_name = shdrTab[idx].sh_name;
924 sh_type = shdrTab[idx].sh_type;
925 sh_flags = shdrTab[idx].sh_flags;
926 sh_offset = shdrTab[idx].sh_offset;
927 sh_size = shdrTab[idx].sh_size;
928 sh_link = shdrTab[idx].sh_link;
929 sh_info = shdrTab[idx].sh_info;
930 sh_addralign = shdrTab[idx].sh_addralign;
931 }
932 else {
933 sh_name = mcld::bswap32(shdrTab[idx].sh_name);
934 sh_type = mcld::bswap32(shdrTab[idx].sh_type);
935 sh_flags = mcld::bswap64(shdrTab[idx].sh_flags);
936 sh_offset = mcld::bswap64(shdrTab[idx].sh_offset);
937 sh_size = mcld::bswap64(shdrTab[idx].sh_size);
938 sh_link = mcld::bswap32(shdrTab[idx].sh_link);
939 sh_info = mcld::bswap32(shdrTab[idx].sh_info);
940 sh_addralign = mcld::bswap64(shdrTab[idx].sh_addralign);
941 }
942
943 LDSection* section = IRBuilder::CreateELFHeader(pInput,
944 sect_name+sh_name,
945 sh_type,
946 sh_flags,
947 sh_addralign);
948 section->setSize(sh_size);
949 section->setOffset(sh_offset);
950 section->setInfo(sh_info);
951
952 if (sh_link != 0x0 || sh_info != 0x0) {
953 LinkInfo link_info = { section, sh_link, sh_info };
954 link_info_list.push_back(link_info);
955 }
956 } // end of for
957
958 // set up InfoLink
959 LinkInfoList::iterator info, infoEnd = link_info_list.end();
960 for (info = link_info_list.begin(); info != infoEnd; ++info) {
961 if (LDFileFormat::Relocation == info->section->kind())
962 info->section->setLink(pInput.context()->getSection(info->sh_info));
963 else
964 info->section->setLink(pInput.context()->getSection(info->sh_link));
965 }
966
967 return true;
968 }
969
970 /// readSignature - read a symbol from the given Input and index in symtab
971 /// This is used to get the signature of a group section.
readSignature(Input & pInput,LDSection & pSymTab,uint32_t pSymIdx) const972 ResolveInfo* ELFReader<64, true>::readSignature(Input& pInput,
973 LDSection& pSymTab,
974 uint32_t pSymIdx) const
975 {
976 LDSection* symtab = &pSymTab;
977 LDSection* strtab = symtab->getLink();
978 assert(NULL != symtab && NULL != strtab);
979
980 uint64_t offset = pInput.fileOffset() + symtab->offset() +
981 sizeof(llvm::ELF::Elf64_Sym) * pSymIdx;
982 llvm::StringRef symbol_region =
983 pInput.memArea()->request(offset, sizeof(llvm::ELF::Elf64_Sym));
984 const llvm::ELF::Elf64_Sym* entry =
985 reinterpret_cast<const llvm::ELF::Elf64_Sym*>(symbol_region.begin());
986
987 uint32_t st_name = 0x0;
988 uint8_t st_info = 0x0;
989 uint8_t st_other = 0x0;
990 uint16_t st_shndx = 0x0;
991 st_info = entry->st_info;
992 st_other = entry->st_other;
993 if (llvm::sys::IsLittleEndianHost) {
994 st_name = entry->st_name;
995 st_shndx = entry->st_shndx;
996 }
997 else {
998 st_name = mcld::bswap32(entry->st_name);
999 st_shndx = mcld::bswap16(entry->st_shndx);
1000 }
1001
1002 llvm::StringRef strtab_region = pInput.memArea()->request(
1003 pInput.fileOffset() + strtab->offset(), strtab->size());
1004
1005 // get ld_name
1006 llvm::StringRef ld_name(strtab_region.begin() + st_name);
1007
1008 ResolveInfo* result = ResolveInfo::Create(ld_name);
1009 result->setSource(pInput.type() == Input::DynObj);
1010 result->setType(static_cast<ResolveInfo::Type>(st_info & 0xF));
1011 result->setDesc(getSymDesc(st_shndx, pInput));
1012 result->setBinding(getSymBinding((st_info >> 4), st_shndx, st_other));
1013 result->setVisibility(getSymVisibility(st_other));
1014
1015 return result;
1016 }
1017
1018 /// readDynamic - read ELF .dynamic in input dynobj
readDynamic(Input & pInput) const1019 bool ELFReader<64, true>::readDynamic(Input& pInput) const
1020 {
1021 assert(pInput.type() == Input::DynObj);
1022 const LDSection* dynamic_sect = pInput.context()->getSection(".dynamic");
1023 if (NULL == dynamic_sect) {
1024 fatal(diag::err_cannot_read_section) << ".dynamic";
1025 }
1026 const LDSection* dynstr_sect = dynamic_sect->getLink();
1027 if (NULL == dynstr_sect) {
1028 fatal(diag::err_cannot_read_section) << ".dynstr";
1029 }
1030
1031 llvm::StringRef dynamic_region = pInput.memArea()->request(
1032 pInput.fileOffset() + dynamic_sect->offset(), dynamic_sect->size());
1033
1034 llvm::StringRef dynstr_region = pInput.memArea()->request(
1035 pInput.fileOffset() + dynstr_sect->offset(), dynstr_sect->size());
1036
1037 const llvm::ELF::Elf64_Dyn* dynamic =
1038 reinterpret_cast<const llvm::ELF::Elf64_Dyn*>(dynamic_region.begin());
1039 const char* dynstr = dynstr_region.begin();
1040 bool hasSOName = false;
1041 size_t numOfEntries = dynamic_sect->size() / sizeof(llvm::ELF::Elf64_Dyn);
1042
1043 for (size_t idx = 0; idx < numOfEntries; ++idx) {
1044
1045 llvm::ELF::Elf64_Sxword d_tag = 0x0;
1046 llvm::ELF::Elf64_Xword d_val = 0x0;
1047
1048 if (llvm::sys::IsLittleEndianHost) {
1049 d_tag = dynamic[idx].d_tag;
1050 d_val = dynamic[idx].d_un.d_val;
1051 } else {
1052 d_tag = mcld::bswap64(dynamic[idx].d_tag);
1053 d_val = mcld::bswap64(dynamic[idx].d_un.d_val);
1054 }
1055
1056 switch (d_tag) {
1057 case llvm::ELF::DT_SONAME:
1058 assert(d_val < dynstr_sect->size());
1059 pInput.setName(sys::fs::Path(dynstr + d_val).filename().native());
1060 hasSOName = true;
1061 break;
1062 case llvm::ELF::DT_NEEDED:
1063 // TODO:
1064 break;
1065 case llvm::ELF::DT_NULL:
1066 default:
1067 break;
1068 }
1069 }
1070
1071 // if there is no SONAME in .dynamic, then set it from input path
1072 if (!hasSOName)
1073 pInput.setName(pInput.path().filename().native());
1074
1075 return true;
1076 }
1077