• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- GNUArchiveReader.cpp -----------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include <mcld/LD/GNUArchiveReader.h>
10 
11 #include <mcld/Module.h>
12 #include <mcld/InputTree.h>
13 #include <mcld/MC/Attribute.h>
14 #include <mcld/MC/MCLDInput.h>
15 #include <mcld/LD/ResolveInfo.h>
16 #include <mcld/LD/ELFObjectReader.h>
17 #include <mcld/Support/FileSystem.h>
18 #include <mcld/Support/FileHandle.h>
19 #include <mcld/Support/MemoryArea.h>
20 #include <mcld/Support/MemoryRegion.h>
21 #include <mcld/Support/MsgHandling.h>
22 #include <mcld/Support/Path.h>
23 #include <mcld/ADT/SizeTraits.h>
24 
25 #include <llvm/ADT/StringRef.h>
26 #include <llvm/Support/Host.h>
27 
28 #include <cstring>
29 #include <cstdlib>
30 
31 using namespace mcld;
32 
GNUArchiveReader(Module & pModule,ELFObjectReader & pELFObjectReader)33 GNUArchiveReader::GNUArchiveReader(Module& pModule,
34                                    ELFObjectReader& pELFObjectReader)
35  : m_Module(pModule),
36    m_ELFObjectReader(pELFObjectReader)
37 {
38 }
39 
~GNUArchiveReader()40 GNUArchiveReader::~GNUArchiveReader()
41 {
42 }
43 
44 /// isMyFormat
isMyFormat(Input & pInput) const45 bool GNUArchiveReader::isMyFormat(Input& pInput) const
46 {
47   assert(pInput.hasMemArea());
48   MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
49                                                    Archive::MAGIC_LEN);
50   const char* str = reinterpret_cast<const char*>(region->getBuffer());
51 
52   bool result = false;
53   assert(NULL != str);
54   if (isArchive(str) || isThinArchive(str))
55     result = true;
56 
57   pInput.memArea()->release(region);
58   return result;
59 }
60 
61 /// isArchive
isArchive(const char * pStr) const62 bool GNUArchiveReader::isArchive(const char* pStr) const
63 {
64   return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN));
65 }
66 
67 /// isThinArchive
isThinArchive(const char * pStr) const68 bool GNUArchiveReader::isThinArchive(const char* pStr) const
69 {
70   return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN));
71 }
72 
73 /// isThinArchive
isThinArchive(Input & pInput) const74 bool GNUArchiveReader::isThinArchive(Input& pInput) const
75 {
76   assert(pInput.hasMemArea());
77   MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
78                                                    Archive::MAGIC_LEN);
79   const char* str = reinterpret_cast<const char*>(region->getBuffer());
80 
81   bool result = false;
82   assert(NULL != str);
83   if (isThinArchive(str))
84     result = true;
85 
86   pInput.memArea()->release(region);
87   return result;
88 }
89 
readArchive(Archive & pArchive)90 bool GNUArchiveReader::readArchive(Archive& pArchive)
91 {
92   // bypass the empty archive
93   if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->handler()->size())
94     return true;
95 
96   if (pArchive.getARFile().attribute()->isWholeArchive())
97     return includeAllMembers(pArchive);
98 
99   // if this is the first time read this archive, setup symtab and strtab
100   if (pArchive.getSymbolTable().empty()) {
101   // read the symtab of the archive
102   readSymbolTable(pArchive);
103 
104   // read the strtab of the archive
105   readStringTable(pArchive);
106 
107   // add root archive to ArchiveMemberMap
108   pArchive.addArchiveMember(pArchive.getARFile().name(),
109                             pArchive.inputs().root(),
110                             &InputTree::Downward);
111   }
112 
113   // include the needed members in the archive and build up the input tree
114   bool willSymResolved;
115   do {
116     willSymResolved = false;
117     for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
118       // bypass if we already decided to include this symbol or not
119       if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
120         continue;
121 
122       // bypass if another symbol with the same object file offset is included
123       if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
124         pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
125         continue;
126       }
127 
128       // check if we should include this defined symbol
129       Archive::Symbol::Status status =
130         shouldIncludeSymbol(pArchive.getSymbolName(idx));
131       if (Archive::Symbol::Unknown != status)
132         pArchive.setSymbolStatus(idx, status);
133 
134       if (Archive::Symbol::Include == status) {
135         // include the object member from the given offset
136         includeMember(pArchive, pArchive.getObjFileOffset(idx));
137         willSymResolved = true;
138       } // end of if
139     } // end of for
140   } while (willSymResolved);
141 
142   return true;
143 }
144 
145 /// readMemberHeader - read the header of a member in a archive file and then
146 /// return the corresponding archive member (it may be an input object or
147 /// another archive)
148 /// @param pArchiveRoot  - the archive root that holds the strtab (extended
149 ///                        name table)
150 /// @param pArchiveFile  - the archive that contains the needed object
151 /// @param pFileOffset   - file offset of the member header in the archive
152 /// @param pNestedOffset - used when we find a nested archive
153 /// @param pMemberSize   - the file size of this member
readMemberHeader(Archive & pArchiveRoot,Input & pArchiveFile,uint32_t pFileOffset,uint32_t & pNestedOffset,size_t & pMemberSize)154 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
155                                           Input& pArchiveFile,
156                                           uint32_t pFileOffset,
157                                           uint32_t& pNestedOffset,
158                                           size_t& pMemberSize)
159 {
160   assert(pArchiveFile.hasMemArea());
161 
162   MemoryRegion* header_region =
163     pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset),
164                                     sizeof(Archive::MemberHeader));
165   const Archive::MemberHeader* header =
166     reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
167 
168   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
169 
170   pMemberSize = atoi(header->size);
171 
172   // parse the member name and nested offset if any
173   std::string member_name;
174   llvm::StringRef name_field(header->name, sizeof(header->name));
175   if ('/' != header->name[0]) {
176     // this is an object file in an archive
177     size_t pos = name_field.find_first_of('/');
178     member_name.assign(name_field.substr(0, pos).str());
179   }
180   else {
181     // this is an object/archive file in a thin archive
182     size_t begin = 1;
183     size_t end = name_field.find_first_of(" :");
184     uint32_t name_offset = 0;
185     // parse the name offset
186     name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
187 
188     if (':' == name_field[end]) {
189       // there is a nested offset
190       begin = end + 1;
191       end = name_field.find_first_of(' ', begin);
192       name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
193     }
194 
195     // get the member name from the extended name table
196     assert(pArchiveRoot.hasStrTable());
197     begin = name_offset;
198     end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
199     member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1));
200   }
201 
202   Input* member = NULL;
203   bool isThinAR = isThinArchive(pArchiveFile);
204   if (!isThinAR) {
205     // this is an object file in an archive
206     member = pArchiveRoot.getMemberFile(pArchiveFile,
207                                         isThinAR,
208                                         member_name,
209                                         pArchiveFile.path(),
210                                         (pFileOffset +
211                                          sizeof(Archive::MemberHeader)));
212   }
213   else {
214     // this is a member in a thin archive
215     // try to find if this is a archive already in the map first
216     Archive::ArchiveMember* ar_member =
217       pArchiveRoot.getArchiveMember(member_name);
218     if (NULL != ar_member) {
219       return ar_member->file;
220     }
221 
222     // get nested file path, the nested file's member name is the relative
223     // path to the archive containing it.
224     sys::fs::Path input_path(pArchiveFile.path().parent_path());
225     if (!input_path.empty())
226       input_path.append(member_name);
227     else
228       input_path.assign(member_name);
229 
230     member = pArchiveRoot.getMemberFile(pArchiveFile,
231                                         isThinAR,
232                                         member_name,
233                                         input_path);
234   }
235 
236   pArchiveFile.memArea()->release(header_region);
237   return member;
238 }
239 
240 /// readSymbolTable - read the archive symbol map (armap)
readSymbolTable(Archive & pArchive)241 bool GNUArchiveReader::readSymbolTable(Archive& pArchive)
242 {
243   assert(pArchive.getARFile().hasMemArea());
244 
245   MemoryRegion* header_region =
246     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
247                                              Archive::MAGIC_LEN),
248                                             sizeof(Archive::MemberHeader));
249   const Archive::MemberHeader* header =
250     reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
251   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
252 
253   int symtab_size = atoi(header->size);
254   pArchive.setSymTabSize(symtab_size);
255 
256   if (!pArchive.getARFile().attribute()->isWholeArchive()) {
257     MemoryRegion* symtab_region =
258       pArchive.getARFile().memArea()->request(
259                                             (pArchive.getARFile().fileOffset() +
260                                              Archive::MAGIC_LEN +
261                                              sizeof(Archive::MemberHeader)),
262                                             symtab_size);
263     const uint32_t* data =
264       reinterpret_cast<const uint32_t*>(symtab_region->getBuffer());
265 
266     // read the number of symbols
267     uint32_t number = 0;
268     if (llvm::sys::isLittleEndianHost())
269       number = mcld::bswap32(*data);
270     else
271       number = *data;
272 
273     // set up the pointers for file offset and name offset
274     ++data;
275     const char* name = reinterpret_cast<const char*>(data + number);
276 
277     // add the archive symbols
278     for (uint32_t i = 0; i < number; ++i) {
279       if (llvm::sys::isLittleEndianHost())
280         pArchive.addSymbol(name, mcld::bswap32(*data));
281       else
282         pArchive.addSymbol(name, *data);
283       name += strlen(name) + 1;
284       ++data;
285     }
286     pArchive.getARFile().memArea()->release(symtab_region);
287   }
288   pArchive.getARFile().memArea()->release(header_region);
289   return true;
290 }
291 
292 /// readStringTable - read the strtab for long file name of the archive
readStringTable(Archive & pArchive)293 bool GNUArchiveReader::readStringTable(Archive& pArchive)
294 {
295   size_t offset = Archive::MAGIC_LEN +
296                   sizeof(Archive::MemberHeader) +
297                   pArchive.getSymTabSize();
298 
299   if (0x0 != (offset & 1))
300     ++offset;
301 
302   assert(pArchive.getARFile().hasMemArea());
303 
304   MemoryRegion* header_region =
305     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
306                                              offset),
307                                             sizeof(Archive::MemberHeader));
308   const Archive::MemberHeader* header =
309     reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
310 
311   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
312 
313   if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) {
314     // read the extended name table
315     int strtab_size = atoi(header->size);
316     MemoryRegion* strtab_region =
317       pArchive.getARFile().memArea()->request(
318                                    (pArchive.getARFile().fileOffset() +
319                                     offset + sizeof(Archive::MemberHeader)),
320                                    strtab_size);
321     const char* strtab =
322       reinterpret_cast<const char*>(strtab_region->getBuffer());
323     pArchive.getStrTable().assign(strtab, strtab_size);
324     pArchive.getARFile().memArea()->release(strtab_region);
325   }
326   pArchive.getARFile().memArea()->release(header_region);
327   return true;
328 }
329 
330 /// shouldIncludeStatus - given a sym name from armap and check if including
331 /// the corresponding archive member, and then return the decision
332 enum Archive::Symbol::Status
shouldIncludeSymbol(const llvm::StringRef & pSymName) const333 GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const
334 {
335   // TODO: handle symbol version issue and user defined symbols
336   const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
337   if (NULL != info) {
338     if (!info->isUndef())
339       return Archive::Symbol::Exclude;
340     if (info->isWeak())
341       return Archive::Symbol::Unknown;
342     return Archive::Symbol::Include;
343   }
344   return Archive::Symbol::Unknown;
345 }
346 
347 /// includeMember - include the object member in the given file offset, and
348 /// return the size of the object
349 /// @param pArchiveRoot - the archive root
350 /// @param pFileOffset  - file offset of the member header in the archive
includeMember(Archive & pArchive,uint32_t pFileOffset)351 size_t GNUArchiveReader::includeMember(Archive& pArchive, uint32_t pFileOffset)
352 {
353   Input* cur_archive = &(pArchive.getARFile());
354   Input* member = NULL;
355   uint32_t file_offset = pFileOffset;
356   size_t size = 0;
357   do {
358     uint32_t nested_offset = 0;
359     // use the file offset in current archive to find out the member we
360     // want to include
361     member = readMemberHeader(pArchive,
362                               *cur_archive,
363                               file_offset,
364                               nested_offset,
365                               size);
366     assert(member != NULL);
367     // bypass if we get an archive that is already in the map
368     if (Input::Archive == member->type()) {
369         cur_archive = member;
370         file_offset = nested_offset;
371         continue;
372     }
373 
374     // insert a node into the subtree of current archive.
375     Archive::ArchiveMember* parent =
376       pArchive.getArchiveMember(cur_archive->name());
377 
378     assert(NULL != parent);
379     pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
380 
381     // move the iterator to new created node, and also adjust the
382     // direction to Afterward for next insertion in this subtree
383     parent->move->move(parent->lastPos);
384     parent->move = &InputTree::Afterward;
385 
386     if (m_ELFObjectReader.isMyFormat(*member)) {
387       member->setType(Input::Object);
388       pArchive.addObjectMember(pFileOffset, parent->lastPos);
389       m_ELFObjectReader.readHeader(*member);
390       m_ELFObjectReader.readSections(*member);
391       m_ELFObjectReader.readSymbols(*member);
392       m_Module.getObjectList().push_back(member);
393     }
394     else if (isMyFormat(*member)) {
395       member->setType(Input::Archive);
396       // when adding a new archive node, set the iterator to archive
397       // itself, and set the direction to Downward
398       pArchive.addArchiveMember(member->name(),
399                                 parent->lastPos,
400                                 &InputTree::Downward);
401       cur_archive = member;
402       file_offset = nested_offset;
403     }
404   } while (Input::Object != member->type());
405   return size;
406 }
407 
408 /// includeAllMembers - include all object members. This is called if
409 /// --whole-archive is the attribute for this archive file.
includeAllMembers(Archive & pArchive)410 bool GNUArchiveReader::includeAllMembers(Archive& pArchive)
411 {
412   // read the symtab of the archive
413   readSymbolTable(pArchive);
414 
415   // read the strtab of the archive
416   readStringTable(pArchive);
417 
418   // add root archive to ArchiveMemberMap
419   pArchive.addArchiveMember(pArchive.getARFile().name(),
420                             pArchive.inputs().root(),
421                             &InputTree::Downward);
422 
423   bool isThinAR = isThinArchive(pArchive.getARFile());
424   uint32_t begin_offset = pArchive.getARFile().fileOffset() +
425                           Archive::MAGIC_LEN +
426                           sizeof(Archive::MemberHeader) +
427                           pArchive.getSymTabSize();
428   if (pArchive.hasStrTable()) {
429     if (0x0 != (begin_offset & 1))
430       ++begin_offset;
431     begin_offset += sizeof(Archive::MemberHeader) +
432                     pArchive.getStrTable().size();
433   }
434   uint32_t end_offset = pArchive.getARFile().memArea()->handler()->size();
435   for (uint32_t offset = begin_offset;
436        offset < end_offset;
437        offset += sizeof(Archive::MemberHeader)) {
438 
439     size_t size = includeMember(pArchive, offset);
440 
441     if (!isThinAR) {
442       offset += size;
443     }
444 
445     if (0x0 != (offset & 1))
446       ++offset;
447   }
448   return true;
449 }
450 
451