1 //===- GNUArchiveReader.cpp -----------------------------------------------===//
2 //
3 // The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include <mcld/LD/GNUArchiveReader.h>
10
11 #include <mcld/Module.h>
12 #include <mcld/InputTree.h>
13 #include <mcld/MC/Attribute.h>
14 #include <mcld/MC/MCLDInput.h>
15 #include <mcld/LD/ResolveInfo.h>
16 #include <mcld/LD/ELFObjectReader.h>
17 #include <mcld/Support/FileSystem.h>
18 #include <mcld/Support/FileHandle.h>
19 #include <mcld/Support/MemoryArea.h>
20 #include <mcld/Support/MemoryRegion.h>
21 #include <mcld/Support/MsgHandling.h>
22 #include <mcld/Support/Path.h>
23 #include <mcld/ADT/SizeTraits.h>
24
25 #include <llvm/ADT/StringRef.h>
26 #include <llvm/Support/Host.h>
27
28 #include <cstring>
29 #include <cstdlib>
30
31 using namespace mcld;
32
GNUArchiveReader(Module & pModule,ELFObjectReader & pELFObjectReader)33 GNUArchiveReader::GNUArchiveReader(Module& pModule,
34 ELFObjectReader& pELFObjectReader)
35 : m_Module(pModule),
36 m_ELFObjectReader(pELFObjectReader)
37 {
38 }
39
~GNUArchiveReader()40 GNUArchiveReader::~GNUArchiveReader()
41 {
42 }
43
44 /// isMyFormat
isMyFormat(Input & pInput) const45 bool GNUArchiveReader::isMyFormat(Input& pInput) const
46 {
47 assert(pInput.hasMemArea());
48 MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
49 Archive::MAGIC_LEN);
50 const char* str = reinterpret_cast<const char*>(region->getBuffer());
51
52 bool result = false;
53 assert(NULL != str);
54 if (isArchive(str) || isThinArchive(str))
55 result = true;
56
57 pInput.memArea()->release(region);
58 return result;
59 }
60
61 /// isArchive
isArchive(const char * pStr) const62 bool GNUArchiveReader::isArchive(const char* pStr) const
63 {
64 return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN));
65 }
66
67 /// isThinArchive
isThinArchive(const char * pStr) const68 bool GNUArchiveReader::isThinArchive(const char* pStr) const
69 {
70 return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN));
71 }
72
73 /// isThinArchive
isThinArchive(Input & pInput) const74 bool GNUArchiveReader::isThinArchive(Input& pInput) const
75 {
76 assert(pInput.hasMemArea());
77 MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
78 Archive::MAGIC_LEN);
79 const char* str = reinterpret_cast<const char*>(region->getBuffer());
80
81 bool result = false;
82 assert(NULL != str);
83 if (isThinArchive(str))
84 result = true;
85
86 pInput.memArea()->release(region);
87 return result;
88 }
89
readArchive(Archive & pArchive)90 bool GNUArchiveReader::readArchive(Archive& pArchive)
91 {
92 // bypass the empty archive
93 if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->handler()->size())
94 return true;
95
96 if (pArchive.getARFile().attribute()->isWholeArchive())
97 return includeAllMembers(pArchive);
98
99 // if this is the first time read this archive, setup symtab and strtab
100 if (pArchive.getSymbolTable().empty()) {
101 // read the symtab of the archive
102 readSymbolTable(pArchive);
103
104 // read the strtab of the archive
105 readStringTable(pArchive);
106
107 // add root archive to ArchiveMemberMap
108 pArchive.addArchiveMember(pArchive.getARFile().name(),
109 pArchive.inputs().root(),
110 &InputTree::Downward);
111 }
112
113 // include the needed members in the archive and build up the input tree
114 bool willSymResolved;
115 do {
116 willSymResolved = false;
117 for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
118 // bypass if we already decided to include this symbol or not
119 if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
120 continue;
121
122 // bypass if another symbol with the same object file offset is included
123 if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
124 pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
125 continue;
126 }
127
128 // check if we should include this defined symbol
129 Archive::Symbol::Status status =
130 shouldIncludeSymbol(pArchive.getSymbolName(idx));
131 if (Archive::Symbol::Unknown != status)
132 pArchive.setSymbolStatus(idx, status);
133
134 if (Archive::Symbol::Include == status) {
135 // include the object member from the given offset
136 includeMember(pArchive, pArchive.getObjFileOffset(idx));
137 willSymResolved = true;
138 } // end of if
139 } // end of for
140 } while (willSymResolved);
141
142 return true;
143 }
144
145 /// readMemberHeader - read the header of a member in a archive file and then
146 /// return the corresponding archive member (it may be an input object or
147 /// another archive)
148 /// @param pArchiveRoot - the archive root that holds the strtab (extended
149 /// name table)
150 /// @param pArchiveFile - the archive that contains the needed object
151 /// @param pFileOffset - file offset of the member header in the archive
152 /// @param pNestedOffset - used when we find a nested archive
153 /// @param pMemberSize - the file size of this member
readMemberHeader(Archive & pArchiveRoot,Input & pArchiveFile,uint32_t pFileOffset,uint32_t & pNestedOffset,size_t & pMemberSize)154 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
155 Input& pArchiveFile,
156 uint32_t pFileOffset,
157 uint32_t& pNestedOffset,
158 size_t& pMemberSize)
159 {
160 assert(pArchiveFile.hasMemArea());
161
162 MemoryRegion* header_region =
163 pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset),
164 sizeof(Archive::MemberHeader));
165 const Archive::MemberHeader* header =
166 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
167
168 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
169
170 pMemberSize = atoi(header->size);
171
172 // parse the member name and nested offset if any
173 std::string member_name;
174 llvm::StringRef name_field(header->name, sizeof(header->name));
175 if ('/' != header->name[0]) {
176 // this is an object file in an archive
177 size_t pos = name_field.find_first_of('/');
178 member_name.assign(name_field.substr(0, pos).str());
179 }
180 else {
181 // this is an object/archive file in a thin archive
182 size_t begin = 1;
183 size_t end = name_field.find_first_of(" :");
184 uint32_t name_offset = 0;
185 // parse the name offset
186 name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
187
188 if (':' == name_field[end]) {
189 // there is a nested offset
190 begin = end + 1;
191 end = name_field.find_first_of(' ', begin);
192 name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
193 }
194
195 // get the member name from the extended name table
196 assert(pArchiveRoot.hasStrTable());
197 begin = name_offset;
198 end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
199 member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1));
200 }
201
202 Input* member = NULL;
203 bool isThinAR = isThinArchive(pArchiveFile);
204 if (!isThinAR) {
205 // this is an object file in an archive
206 member = pArchiveRoot.getMemberFile(pArchiveFile,
207 isThinAR,
208 member_name,
209 pArchiveFile.path(),
210 (pFileOffset +
211 sizeof(Archive::MemberHeader)));
212 }
213 else {
214 // this is a member in a thin archive
215 // try to find if this is a archive already in the map first
216 Archive::ArchiveMember* ar_member =
217 pArchiveRoot.getArchiveMember(member_name);
218 if (NULL != ar_member) {
219 return ar_member->file;
220 }
221
222 // get nested file path, the nested file's member name is the relative
223 // path to the archive containing it.
224 sys::fs::Path input_path(pArchiveFile.path().parent_path());
225 if (!input_path.empty())
226 input_path.append(member_name);
227 else
228 input_path.assign(member_name);
229
230 member = pArchiveRoot.getMemberFile(pArchiveFile,
231 isThinAR,
232 member_name,
233 input_path);
234 }
235
236 pArchiveFile.memArea()->release(header_region);
237 return member;
238 }
239
240 /// readSymbolTable - read the archive symbol map (armap)
readSymbolTable(Archive & pArchive)241 bool GNUArchiveReader::readSymbolTable(Archive& pArchive)
242 {
243 assert(pArchive.getARFile().hasMemArea());
244
245 MemoryRegion* header_region =
246 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
247 Archive::MAGIC_LEN),
248 sizeof(Archive::MemberHeader));
249 const Archive::MemberHeader* header =
250 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
251 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
252
253 int symtab_size = atoi(header->size);
254 pArchive.setSymTabSize(symtab_size);
255
256 if (!pArchive.getARFile().attribute()->isWholeArchive()) {
257 MemoryRegion* symtab_region =
258 pArchive.getARFile().memArea()->request(
259 (pArchive.getARFile().fileOffset() +
260 Archive::MAGIC_LEN +
261 sizeof(Archive::MemberHeader)),
262 symtab_size);
263 const uint32_t* data =
264 reinterpret_cast<const uint32_t*>(symtab_region->getBuffer());
265
266 // read the number of symbols
267 uint32_t number = 0;
268 if (llvm::sys::isLittleEndianHost())
269 number = mcld::bswap32(*data);
270 else
271 number = *data;
272
273 // set up the pointers for file offset and name offset
274 ++data;
275 const char* name = reinterpret_cast<const char*>(data + number);
276
277 // add the archive symbols
278 for (uint32_t i = 0; i < number; ++i) {
279 if (llvm::sys::isLittleEndianHost())
280 pArchive.addSymbol(name, mcld::bswap32(*data));
281 else
282 pArchive.addSymbol(name, *data);
283 name += strlen(name) + 1;
284 ++data;
285 }
286 pArchive.getARFile().memArea()->release(symtab_region);
287 }
288 pArchive.getARFile().memArea()->release(header_region);
289 return true;
290 }
291
292 /// readStringTable - read the strtab for long file name of the archive
readStringTable(Archive & pArchive)293 bool GNUArchiveReader::readStringTable(Archive& pArchive)
294 {
295 size_t offset = Archive::MAGIC_LEN +
296 sizeof(Archive::MemberHeader) +
297 pArchive.getSymTabSize();
298
299 if (0x0 != (offset & 1))
300 ++offset;
301
302 assert(pArchive.getARFile().hasMemArea());
303
304 MemoryRegion* header_region =
305 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
306 offset),
307 sizeof(Archive::MemberHeader));
308 const Archive::MemberHeader* header =
309 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
310
311 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
312
313 if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) {
314 // read the extended name table
315 int strtab_size = atoi(header->size);
316 MemoryRegion* strtab_region =
317 pArchive.getARFile().memArea()->request(
318 (pArchive.getARFile().fileOffset() +
319 offset + sizeof(Archive::MemberHeader)),
320 strtab_size);
321 const char* strtab =
322 reinterpret_cast<const char*>(strtab_region->getBuffer());
323 pArchive.getStrTable().assign(strtab, strtab_size);
324 pArchive.getARFile().memArea()->release(strtab_region);
325 }
326 pArchive.getARFile().memArea()->release(header_region);
327 return true;
328 }
329
330 /// shouldIncludeStatus - given a sym name from armap and check if including
331 /// the corresponding archive member, and then return the decision
332 enum Archive::Symbol::Status
shouldIncludeSymbol(const llvm::StringRef & pSymName) const333 GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const
334 {
335 // TODO: handle symbol version issue and user defined symbols
336 const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
337 if (NULL != info) {
338 if (!info->isUndef())
339 return Archive::Symbol::Exclude;
340 if (info->isWeak())
341 return Archive::Symbol::Unknown;
342 return Archive::Symbol::Include;
343 }
344 return Archive::Symbol::Unknown;
345 }
346
347 /// includeMember - include the object member in the given file offset, and
348 /// return the size of the object
349 /// @param pArchiveRoot - the archive root
350 /// @param pFileOffset - file offset of the member header in the archive
includeMember(Archive & pArchive,uint32_t pFileOffset)351 size_t GNUArchiveReader::includeMember(Archive& pArchive, uint32_t pFileOffset)
352 {
353 Input* cur_archive = &(pArchive.getARFile());
354 Input* member = NULL;
355 uint32_t file_offset = pFileOffset;
356 size_t size = 0;
357 do {
358 uint32_t nested_offset = 0;
359 // use the file offset in current archive to find out the member we
360 // want to include
361 member = readMemberHeader(pArchive,
362 *cur_archive,
363 file_offset,
364 nested_offset,
365 size);
366 assert(member != NULL);
367 // bypass if we get an archive that is already in the map
368 if (Input::Archive == member->type()) {
369 cur_archive = member;
370 file_offset = nested_offset;
371 continue;
372 }
373
374 // insert a node into the subtree of current archive.
375 Archive::ArchiveMember* parent =
376 pArchive.getArchiveMember(cur_archive->name());
377
378 assert(NULL != parent);
379 pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
380
381 // move the iterator to new created node, and also adjust the
382 // direction to Afterward for next insertion in this subtree
383 parent->move->move(parent->lastPos);
384 parent->move = &InputTree::Afterward;
385
386 if (m_ELFObjectReader.isMyFormat(*member)) {
387 member->setType(Input::Object);
388 pArchive.addObjectMember(pFileOffset, parent->lastPos);
389 m_ELFObjectReader.readHeader(*member);
390 m_ELFObjectReader.readSections(*member);
391 m_ELFObjectReader.readSymbols(*member);
392 m_Module.getObjectList().push_back(member);
393 }
394 else if (isMyFormat(*member)) {
395 member->setType(Input::Archive);
396 // when adding a new archive node, set the iterator to archive
397 // itself, and set the direction to Downward
398 pArchive.addArchiveMember(member->name(),
399 parent->lastPos,
400 &InputTree::Downward);
401 cur_archive = member;
402 file_offset = nested_offset;
403 }
404 } while (Input::Object != member->type());
405 return size;
406 }
407
408 /// includeAllMembers - include all object members. This is called if
409 /// --whole-archive is the attribute for this archive file.
includeAllMembers(Archive & pArchive)410 bool GNUArchiveReader::includeAllMembers(Archive& pArchive)
411 {
412 // read the symtab of the archive
413 readSymbolTable(pArchive);
414
415 // read the strtab of the archive
416 readStringTable(pArchive);
417
418 // add root archive to ArchiveMemberMap
419 pArchive.addArchiveMember(pArchive.getARFile().name(),
420 pArchive.inputs().root(),
421 &InputTree::Downward);
422
423 bool isThinAR = isThinArchive(pArchive.getARFile());
424 uint32_t begin_offset = pArchive.getARFile().fileOffset() +
425 Archive::MAGIC_LEN +
426 sizeof(Archive::MemberHeader) +
427 pArchive.getSymTabSize();
428 if (pArchive.hasStrTable()) {
429 if (0x0 != (begin_offset & 1))
430 ++begin_offset;
431 begin_offset += sizeof(Archive::MemberHeader) +
432 pArchive.getStrTable().size();
433 }
434 uint32_t end_offset = pArchive.getARFile().memArea()->handler()->size();
435 for (uint32_t offset = begin_offset;
436 offset < end_offset;
437 offset += sizeof(Archive::MemberHeader)) {
438
439 size_t size = includeMember(pArchive, offset);
440
441 if (!isThinAR) {
442 offset += size;
443 }
444
445 if (0x0 != (offset & 1))
446 ++offset;
447 }
448 return true;
449 }
450
451