• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- Archive.cpp - ar File Format implementation ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the ArchiveObjectFile class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Object/Archive.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/Object/Error.h"
20 #include "llvm/Support/Chrono.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/ErrorOr.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <cstdint>
32 #include <cstring>
33 #include <memory>
34 #include <string>
35 #include <system_error>
36 
37 using namespace llvm;
38 using namespace object;
39 using namespace llvm::support::endian;
40 
41 static const char *const Magic = "!<arch>\n";
42 static const char *const ThinMagic = "!<thin>\n";
43 
anchor()44 void Archive::anchor() {}
45 
46 static Error
malformedError(Twine Msg)47 malformedError(Twine Msg) {
48   std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
49   return make_error<GenericBinaryError>(std::move(StringMsg),
50                                         object_error::parse_failed);
51 }
52 
ArchiveMemberHeader(const Archive * Parent,const char * RawHeaderPtr,uint64_t Size,Error * Err)53 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
54                                          const char *RawHeaderPtr,
55                                          uint64_t Size, Error *Err)
56     : Parent(Parent),
57       ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) {
58   if (RawHeaderPtr == nullptr)
59     return;
60   ErrorAsOutParameter ErrAsOutParam(Err);
61 
62   if (Size < sizeof(ArMemHdrType)) {
63     if (Err) {
64       std::string Msg("remaining size of archive too small for next archive "
65                       "member header ");
66       Expected<StringRef> NameOrErr = getName(Size);
67       if (!NameOrErr) {
68         consumeError(NameOrErr.takeError());
69         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
70         *Err = malformedError(Msg + "at offset " + Twine(Offset));
71       } else
72         *Err = malformedError(Msg + "for " + NameOrErr.get());
73     }
74     return;
75   }
76   if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
77     if (Err) {
78       std::string Buf;
79       raw_string_ostream OS(Buf);
80       OS.write_escaped(StringRef(ArMemHdr->Terminator,
81                                  sizeof(ArMemHdr->Terminator)));
82       OS.flush();
83       std::string Msg("terminator characters in archive member \"" + Buf +
84                       "\" not the correct \"`\\n\" values for the archive "
85                       "member header ");
86       Expected<StringRef> NameOrErr = getName(Size);
87       if (!NameOrErr) {
88         consumeError(NameOrErr.takeError());
89         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
90         *Err = malformedError(Msg + "at offset " + Twine(Offset));
91       } else
92         *Err = malformedError(Msg + "for " + NameOrErr.get());
93     }
94     return;
95   }
96 }
97 
98 // This gets the raw name from the ArMemHdr->Name field and checks that it is
99 // valid for the kind of archive.  If it is not valid it returns an Error.
getRawName() const100 Expected<StringRef> ArchiveMemberHeader::getRawName() const {
101   char EndCond;
102   auto Kind = Parent->kind();
103   if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
104     if (ArMemHdr->Name[0] == ' ') {
105       uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
106                         Parent->getData().data();
107       return malformedError("name contains a leading space for archive member "
108                             "header at offset " + Twine(Offset));
109     }
110     EndCond = ' ';
111   }
112   else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
113     EndCond = ' ';
114   else
115     EndCond = '/';
116   StringRef::size_type end =
117       StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
118   if (end == StringRef::npos)
119     end = sizeof(ArMemHdr->Name);
120   assert(end <= sizeof(ArMemHdr->Name) && end > 0);
121   // Don't include the EndCond if there is one.
122   return StringRef(ArMemHdr->Name, end);
123 }
124 
125 // This gets the name looking up long names. Size is the size of the archive
126 // member including the header, so the size of any name following the header
127 // is checked to make sure it does not overflow.
getName(uint64_t Size) const128 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
129 
130   // This can be called from the ArchiveMemberHeader constructor when the
131   // archive header is truncated to produce an error message with the name.
132   // Make sure the name field is not truncated.
133   if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
134     uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
135                       Parent->getData().data();
136     return malformedError("archive header truncated before the name field "
137                           "for archive member header at offset " +
138                           Twine(ArchiveOffset));
139   }
140 
141   // The raw name itself can be invalid.
142   Expected<StringRef> NameOrErr = getRawName();
143   if (!NameOrErr)
144     return NameOrErr.takeError();
145   StringRef Name = NameOrErr.get();
146 
147   // Check if it's a special name.
148   if (Name[0] == '/') {
149     if (Name.size() == 1) // Linker member.
150       return Name;
151     if (Name.size() == 2 && Name[1] == '/') // String table.
152       return Name;
153     // It's a long name.
154     // Get the string table offset.
155     std::size_t StringOffset;
156     if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
157       std::string Buf;
158       raw_string_ostream OS(Buf);
159       OS.write_escaped(Name.substr(1).rtrim(' '));
160       OS.flush();
161       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
162                                Parent->getData().data();
163       return malformedError("long name offset characters after the '/' are "
164                             "not all decimal numbers: '" + Buf + "' for "
165                             "archive member header at offset " +
166                             Twine(ArchiveOffset));
167     }
168 
169     // Verify it.
170     if (StringOffset >= Parent->getStringTable().size()) {
171       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
172                                Parent->getData().data();
173       return malformedError("long name offset " + Twine(StringOffset) + " past "
174                             "the end of the string table for archive member "
175                             "header at offset " + Twine(ArchiveOffset));
176     }
177 
178     // GNU long file names end with a "/\n".
179     if (Parent->kind() == Archive::K_GNU ||
180         Parent->kind() == Archive::K_GNU64) {
181       size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset);
182       if (End == StringRef::npos || End < 1 ||
183           Parent->getStringTable()[End - 1] != '/') {
184         return malformedError("string table at long name offset " +
185                               Twine(StringOffset) + "not terminated");
186       }
187       return Parent->getStringTable().slice(StringOffset, End - 1);
188     }
189     return Parent->getStringTable().begin() + StringOffset;
190   }
191 
192   if (Name.startswith("#1/")) {
193     uint64_t NameLength;
194     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
195       std::string Buf;
196       raw_string_ostream OS(Buf);
197       OS.write_escaped(Name.substr(3).rtrim(' '));
198       OS.flush();
199       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
200                         Parent->getData().data();
201       return malformedError("long name length characters after the #1/ are "
202                             "not all decimal numbers: '" + Buf + "' for "
203                             "archive member header at offset " +
204                             Twine(ArchiveOffset));
205     }
206     if (getSizeOf() + NameLength > Size) {
207       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
208                         Parent->getData().data();
209       return malformedError("long name length: " + Twine(NameLength) +
210                             " extends past the end of the member or archive "
211                             "for archive member header at offset " +
212                             Twine(ArchiveOffset));
213     }
214     return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
215                      NameLength).rtrim('\0');
216   }
217 
218   // It is not a long name so trim the blanks at the end of the name.
219   if (Name[Name.size() - 1] != '/')
220     return Name.rtrim(' ');
221 
222   // It's a simple name.
223   return Name.drop_back(1);
224 }
225 
getSize() const226 Expected<uint64_t> ArchiveMemberHeader::getSize() const {
227   uint64_t Ret;
228   if (StringRef(ArMemHdr->Size,
229                 sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) {
230     std::string Buf;
231     raw_string_ostream OS(Buf);
232     OS.write_escaped(StringRef(ArMemHdr->Size,
233                                sizeof(ArMemHdr->Size)).rtrim(" "));
234     OS.flush();
235     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
236                       Parent->getData().data();
237     return malformedError("characters in size field in archive header are not "
238                           "all decimal numbers: '" + Buf + "' for archive "
239                           "member header at offset " + Twine(Offset));
240   }
241   return Ret;
242 }
243 
getAccessMode() const244 Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const {
245   unsigned Ret;
246   if (StringRef(ArMemHdr->AccessMode,
247                 sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) {
248     std::string Buf;
249     raw_string_ostream OS(Buf);
250     OS.write_escaped(StringRef(ArMemHdr->AccessMode,
251                                sizeof(ArMemHdr->AccessMode)).rtrim(" "));
252     OS.flush();
253     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
254                       Parent->getData().data();
255     return malformedError("characters in AccessMode field in archive header "
256                           "are not all decimal numbers: '" + Buf + "' for the "
257                           "archive member header at offset " + Twine(Offset));
258   }
259   return static_cast<sys::fs::perms>(Ret);
260 }
261 
262 Expected<sys::TimePoint<std::chrono::seconds>>
getLastModified() const263 ArchiveMemberHeader::getLastModified() const {
264   unsigned Seconds;
265   if (StringRef(ArMemHdr->LastModified,
266                 sizeof(ArMemHdr->LastModified)).rtrim(' ')
267           .getAsInteger(10, Seconds)) {
268     std::string Buf;
269     raw_string_ostream OS(Buf);
270     OS.write_escaped(StringRef(ArMemHdr->LastModified,
271                                sizeof(ArMemHdr->LastModified)).rtrim(" "));
272     OS.flush();
273     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
274                       Parent->getData().data();
275     return malformedError("characters in LastModified field in archive header "
276                           "are not all decimal numbers: '" + Buf + "' for the "
277                           "archive member header at offset " + Twine(Offset));
278   }
279 
280   return sys::toTimePoint(Seconds);
281 }
282 
getUID() const283 Expected<unsigned> ArchiveMemberHeader::getUID() const {
284   unsigned Ret;
285   StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' ');
286   if (User.empty())
287     return 0;
288   if (User.getAsInteger(10, Ret)) {
289     std::string Buf;
290     raw_string_ostream OS(Buf);
291     OS.write_escaped(User);
292     OS.flush();
293     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
294                       Parent->getData().data();
295     return malformedError("characters in UID field in archive header "
296                           "are not all decimal numbers: '" + Buf + "' for the "
297                           "archive member header at offset " + Twine(Offset));
298   }
299   return Ret;
300 }
301 
getGID() const302 Expected<unsigned> ArchiveMemberHeader::getGID() const {
303   unsigned Ret;
304   StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' ');
305   if (Group.empty())
306     return 0;
307   if (Group.getAsInteger(10, Ret)) {
308     std::string Buf;
309     raw_string_ostream OS(Buf);
310     OS.write_escaped(Group);
311     OS.flush();
312     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
313                       Parent->getData().data();
314     return malformedError("characters in GID field in archive header "
315                           "are not all decimal numbers: '" + Buf + "' for the "
316                           "archive member header at offset " + Twine(Offset));
317   }
318   return Ret;
319 }
320 
Child(const Archive * Parent,StringRef Data,uint16_t StartOfFile)321 Archive::Child::Child(const Archive *Parent, StringRef Data,
322                       uint16_t StartOfFile)
323     : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr),
324       Data(Data), StartOfFile(StartOfFile) {
325 }
326 
Child(const Archive * Parent,const char * Start,Error * Err)327 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
328     : Parent(Parent),
329       Header(Parent, Start,
330              Parent
331                ? Parent->getData().size() - (Start - Parent->getData().data())
332                : 0, Err) {
333   if (!Start)
334     return;
335 
336   // If we are pointed to real data, Start is not a nullptr, then there must be
337   // a non-null Err pointer available to report malformed data on.  Only in
338   // the case sentinel value is being constructed is Err is permitted to be a
339   // nullptr.
340   assert(Err && "Err can't be nullptr if Start is not a nullptr");
341 
342   ErrorAsOutParameter ErrAsOutParam(Err);
343 
344   // If there was an error in the construction of the Header
345   // then just return with the error now set.
346   if (*Err)
347     return;
348 
349   uint64_t Size = Header.getSizeOf();
350   Data = StringRef(Start, Size);
351   Expected<bool> isThinOrErr = isThinMember();
352   if (!isThinOrErr) {
353     *Err = isThinOrErr.takeError();
354     return;
355   }
356   bool isThin = isThinOrErr.get();
357   if (!isThin) {
358     Expected<uint64_t> MemberSize = getRawSize();
359     if (!MemberSize) {
360       *Err = MemberSize.takeError();
361       return;
362     }
363     Size += MemberSize.get();
364     Data = StringRef(Start, Size);
365   }
366 
367   // Setup StartOfFile and PaddingBytes.
368   StartOfFile = Header.getSizeOf();
369   // Don't include attached name.
370   Expected<StringRef> NameOrErr = getRawName();
371   if (!NameOrErr){
372     *Err = NameOrErr.takeError();
373     return;
374   }
375   StringRef Name = NameOrErr.get();
376   if (Name.startswith("#1/")) {
377     uint64_t NameSize;
378     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) {
379       std::string Buf;
380       raw_string_ostream OS(Buf);
381       OS.write_escaped(Name.substr(3).rtrim(' '));
382       OS.flush();
383       uint64_t Offset = Start - Parent->getData().data();
384       *Err = malformedError("long name length characters after the #1/ are "
385                             "not all decimal numbers: '" + Buf + "' for "
386                             "archive member header at offset " +
387                             Twine(Offset));
388       return;
389     }
390     StartOfFile += NameSize;
391   }
392 }
393 
getSize() const394 Expected<uint64_t> Archive::Child::getSize() const {
395   if (Parent->IsThin) {
396     Expected<uint32_t> Size = Header.getSize();
397     if (!Size)
398       return Size.takeError();
399     return Size.get();
400   }
401   return Data.size() - StartOfFile;
402 }
403 
getRawSize() const404 Expected<uint64_t> Archive::Child::getRawSize() const {
405   return Header.getSize();
406 }
407 
isThinMember() const408 Expected<bool> Archive::Child::isThinMember() const {
409   Expected<StringRef> NameOrErr = Header.getRawName();
410   if (!NameOrErr)
411     return NameOrErr.takeError();
412   StringRef Name = NameOrErr.get();
413   return Parent->IsThin && Name != "/" && Name != "//";
414 }
415 
getFullName() const416 Expected<std::string> Archive::Child::getFullName() const {
417   Expected<bool> isThin = isThinMember();
418   if (!isThin)
419     return isThin.takeError();
420   assert(isThin.get());
421   Expected<StringRef> NameOrErr = getName();
422   if (!NameOrErr)
423     return NameOrErr.takeError();
424   StringRef Name = *NameOrErr;
425   if (sys::path::is_absolute(Name))
426     return Name;
427 
428   SmallString<128> FullName = sys::path::parent_path(
429       Parent->getMemoryBufferRef().getBufferIdentifier());
430   sys::path::append(FullName, Name);
431   return StringRef(FullName);
432 }
433 
getBuffer() const434 Expected<StringRef> Archive::Child::getBuffer() const {
435   Expected<bool> isThinOrErr = isThinMember();
436   if (!isThinOrErr)
437     return isThinOrErr.takeError();
438   bool isThin = isThinOrErr.get();
439   if (!isThin) {
440     Expected<uint32_t> Size = getSize();
441     if (!Size)
442       return Size.takeError();
443     return StringRef(Data.data() + StartOfFile, Size.get());
444   }
445   Expected<std::string> FullNameOrErr = getFullName();
446   if (!FullNameOrErr)
447     return FullNameOrErr.takeError();
448   const std::string &FullName = *FullNameOrErr;
449   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
450   if (std::error_code EC = Buf.getError())
451     return errorCodeToError(EC);
452   Parent->ThinBuffers.push_back(std::move(*Buf));
453   return Parent->ThinBuffers.back()->getBuffer();
454 }
455 
getNext() const456 Expected<Archive::Child> Archive::Child::getNext() const {
457   size_t SpaceToSkip = Data.size();
458   // If it's odd, add 1 to make it even.
459   if (SpaceToSkip & 1)
460     ++SpaceToSkip;
461 
462   const char *NextLoc = Data.data() + SpaceToSkip;
463 
464   // Check to see if this is at the end of the archive.
465   if (NextLoc == Parent->Data.getBufferEnd())
466     return Child(nullptr, nullptr, nullptr);
467 
468   // Check to see if this is past the end of the archive.
469   if (NextLoc > Parent->Data.getBufferEnd()) {
470     std::string Msg("offset to next archive member past the end of the archive "
471                     "after member ");
472     Expected<StringRef> NameOrErr = getName();
473     if (!NameOrErr) {
474       consumeError(NameOrErr.takeError());
475       uint64_t Offset = Data.data() - Parent->getData().data();
476       return malformedError(Msg + "at offset " + Twine(Offset));
477     } else
478       return malformedError(Msg + NameOrErr.get());
479   }
480 
481   Error Err = Error::success();
482   Child Ret(Parent, NextLoc, &Err);
483   if (Err)
484     return std::move(Err);
485   return Ret;
486 }
487 
getChildOffset() const488 uint64_t Archive::Child::getChildOffset() const {
489   const char *a = Parent->Data.getBuffer().data();
490   const char *c = Data.data();
491   uint64_t offset = c - a;
492   return offset;
493 }
494 
getName() const495 Expected<StringRef> Archive::Child::getName() const {
496   Expected<uint64_t> RawSizeOrErr = getRawSize();
497   if (!RawSizeOrErr)
498     return RawSizeOrErr.takeError();
499   uint64_t RawSize = RawSizeOrErr.get();
500   Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize);
501   if (!NameOrErr)
502     return NameOrErr.takeError();
503   StringRef Name = NameOrErr.get();
504   return Name;
505 }
506 
getMemoryBufferRef() const507 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
508   Expected<StringRef> NameOrErr = getName();
509   if (!NameOrErr)
510     return NameOrErr.takeError();
511   StringRef Name = NameOrErr.get();
512   Expected<StringRef> Buf = getBuffer();
513   if (!Buf)
514     return createFileError(Name, Buf.takeError());
515   return MemoryBufferRef(*Buf, Name);
516 }
517 
518 Expected<std::unique_ptr<Binary>>
getAsBinary(LLVMContext * Context) const519 Archive::Child::getAsBinary(LLVMContext *Context) const {
520   Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
521   if (!BuffOrErr)
522     return BuffOrErr.takeError();
523 
524   auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
525   if (BinaryOrErr)
526     return std::move(*BinaryOrErr);
527   return BinaryOrErr.takeError();
528 }
529 
create(MemoryBufferRef Source)530 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
531   Error Err = Error::success();
532   std::unique_ptr<Archive> Ret(new Archive(Source, Err));
533   if (Err)
534     return std::move(Err);
535   return std::move(Ret);
536 }
537 
setFirstRegular(const Child & C)538 void Archive::setFirstRegular(const Child &C) {
539   FirstRegularData = C.Data;
540   FirstRegularStartOfFile = C.StartOfFile;
541 }
542 
Archive(MemoryBufferRef Source,Error & Err)543 Archive::Archive(MemoryBufferRef Source, Error &Err)
544     : Binary(Binary::ID_Archive, Source) {
545   ErrorAsOutParameter ErrAsOutParam(&Err);
546   StringRef Buffer = Data.getBuffer();
547   // Check for sufficient magic.
548   if (Buffer.startswith(ThinMagic)) {
549     IsThin = true;
550   } else if (Buffer.startswith(Magic)) {
551     IsThin = false;
552   } else {
553     Err = make_error<GenericBinaryError>("file too small to be an archive",
554                                          object_error::invalid_file_type);
555     return;
556   }
557 
558   // Make sure Format is initialized before any call to
559   // ArchiveMemberHeader::getName() is made.  This could be a valid empty
560   // archive which is the same in all formats.  So claiming it to be gnu to is
561   // fine if not totally correct before we look for a string table or table of
562   // contents.
563   Format = K_GNU;
564 
565   // Get the special members.
566   child_iterator I = child_begin(Err, false);
567   if (Err)
568     return;
569   child_iterator E = child_end();
570 
571   // See if this is a valid empty archive and if so return.
572   if (I == E) {
573     Err = Error::success();
574     return;
575   }
576   const Child *C = &*I;
577 
578   auto Increment = [&]() {
579     ++I;
580     if (Err)
581       return true;
582     C = &*I;
583     return false;
584   };
585 
586   Expected<StringRef> NameOrErr = C->getRawName();
587   if (!NameOrErr) {
588     Err = NameOrErr.takeError();
589     return;
590   }
591   StringRef Name = NameOrErr.get();
592 
593   // Below is the pattern that is used to figure out the archive format
594   // GNU archive format
595   //  First member : / (may exist, if it exists, points to the symbol table )
596   //  Second member : // (may exist, if it exists, points to the string table)
597   //  Note : The string table is used if the filename exceeds 15 characters
598   // BSD archive format
599   //  First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
600   //  There is no string table, if the filename exceeds 15 characters or has a
601   //  embedded space, the filename has #1/<size>, The size represents the size
602   //  of the filename that needs to be read after the archive header
603   // COFF archive format
604   //  First member : /
605   //  Second member : / (provides a directory of symbols)
606   //  Third member : // (may exist, if it exists, contains the string table)
607   //  Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
608   //  even if the string table is empty. However, lib.exe does not in fact
609   //  seem to create the third member if there's no member whose filename
610   //  exceeds 15 characters. So the third member is optional.
611 
612   if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
613     if (Name == "__.SYMDEF")
614       Format = K_BSD;
615     else // Name == "__.SYMDEF_64"
616       Format = K_DARWIN64;
617     // We know that the symbol table is not an external file, but we still must
618     // check any Expected<> return value.
619     Expected<StringRef> BufOrErr = C->getBuffer();
620     if (!BufOrErr) {
621       Err = BufOrErr.takeError();
622       return;
623     }
624     SymbolTable = BufOrErr.get();
625     if (Increment())
626       return;
627     setFirstRegular(*C);
628 
629     Err = Error::success();
630     return;
631   }
632 
633   if (Name.startswith("#1/")) {
634     Format = K_BSD;
635     // We know this is BSD, so getName will work since there is no string table.
636     Expected<StringRef> NameOrErr = C->getName();
637     if (!NameOrErr) {
638       Err = NameOrErr.takeError();
639       return;
640     }
641     Name = NameOrErr.get();
642     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
643       // We know that the symbol table is not an external file, but we still
644       // must check any Expected<> return value.
645       Expected<StringRef> BufOrErr = C->getBuffer();
646       if (!BufOrErr) {
647         Err = BufOrErr.takeError();
648         return;
649       }
650       SymbolTable = BufOrErr.get();
651       if (Increment())
652         return;
653     }
654     else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
655       Format = K_DARWIN64;
656       // We know that the symbol table is not an external file, but we still
657       // must check any Expected<> return value.
658       Expected<StringRef> BufOrErr = C->getBuffer();
659       if (!BufOrErr) {
660         Err = BufOrErr.takeError();
661         return;
662       }
663       SymbolTable = BufOrErr.get();
664       if (Increment())
665         return;
666     }
667     setFirstRegular(*C);
668     return;
669   }
670 
671   // MIPS 64-bit ELF archives use a special format of a symbol table.
672   // This format is marked by `ar_name` field equals to "/SYM64/".
673   // For detailed description see page 96 in the following document:
674   // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
675 
676   bool has64SymTable = false;
677   if (Name == "/" || Name == "/SYM64/") {
678     // We know that the symbol table is not an external file, but we still
679     // must check any Expected<> return value.
680     Expected<StringRef> BufOrErr = C->getBuffer();
681     if (!BufOrErr) {
682       Err = BufOrErr.takeError();
683       return;
684     }
685     SymbolTable = BufOrErr.get();
686     if (Name == "/SYM64/")
687       has64SymTable = true;
688 
689     if (Increment())
690       return;
691     if (I == E) {
692       Err = Error::success();
693       return;
694     }
695     Expected<StringRef> NameOrErr = C->getRawName();
696     if (!NameOrErr) {
697       Err = NameOrErr.takeError();
698       return;
699     }
700     Name = NameOrErr.get();
701   }
702 
703   if (Name == "//") {
704     Format = has64SymTable ? K_GNU64 : K_GNU;
705     // The string table is never an external member, but we still
706     // must check any Expected<> return value.
707     Expected<StringRef> BufOrErr = C->getBuffer();
708     if (!BufOrErr) {
709       Err = BufOrErr.takeError();
710       return;
711     }
712     StringTable = BufOrErr.get();
713     if (Increment())
714       return;
715     setFirstRegular(*C);
716     Err = Error::success();
717     return;
718   }
719 
720   if (Name[0] != '/') {
721     Format = has64SymTable ? K_GNU64 : K_GNU;
722     setFirstRegular(*C);
723     Err = Error::success();
724     return;
725   }
726 
727   if (Name != "/") {
728     Err = errorCodeToError(object_error::parse_failed);
729     return;
730   }
731 
732   Format = K_COFF;
733   // We know that the symbol table is not an external file, but we still
734   // must check any Expected<> return value.
735   Expected<StringRef> BufOrErr = C->getBuffer();
736   if (!BufOrErr) {
737     Err = BufOrErr.takeError();
738     return;
739   }
740   SymbolTable = BufOrErr.get();
741 
742   if (Increment())
743     return;
744 
745   if (I == E) {
746     setFirstRegular(*C);
747     Err = Error::success();
748     return;
749   }
750 
751   NameOrErr = C->getRawName();
752   if (!NameOrErr) {
753     Err = NameOrErr.takeError();
754     return;
755   }
756   Name = NameOrErr.get();
757 
758   if (Name == "//") {
759     // The string table is never an external member, but we still
760     // must check any Expected<> return value.
761     Expected<StringRef> BufOrErr = C->getBuffer();
762     if (!BufOrErr) {
763       Err = BufOrErr.takeError();
764       return;
765     }
766     StringTable = BufOrErr.get();
767     if (Increment())
768       return;
769   }
770 
771   setFirstRegular(*C);
772   Err = Error::success();
773 }
774 
child_begin(Error & Err,bool SkipInternal) const775 Archive::child_iterator Archive::child_begin(Error &Err,
776                                              bool SkipInternal) const {
777   if (isEmpty())
778     return child_end();
779 
780   if (SkipInternal)
781     return child_iterator::itr(
782         Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
783 
784   const char *Loc = Data.getBufferStart() + strlen(Magic);
785   Child C(this, Loc, &Err);
786   if (Err)
787     return child_end();
788   return child_iterator::itr(C, Err);
789 }
790 
child_end() const791 Archive::child_iterator Archive::child_end() const {
792   return child_iterator::end(Child(nullptr, nullptr, nullptr));
793 }
794 
getName() const795 StringRef Archive::Symbol::getName() const {
796   return Parent->getSymbolTable().begin() + StringIndex;
797 }
798 
getMember() const799 Expected<Archive::Child> Archive::Symbol::getMember() const {
800   const char *Buf = Parent->getSymbolTable().begin();
801   const char *Offsets = Buf;
802   if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
803     Offsets += sizeof(uint64_t);
804   else
805     Offsets += sizeof(uint32_t);
806   uint64_t Offset = 0;
807   if (Parent->kind() == K_GNU) {
808     Offset = read32be(Offsets + SymbolIndex * 4);
809   } else if (Parent->kind() == K_GNU64) {
810     Offset = read64be(Offsets + SymbolIndex * 8);
811   } else if (Parent->kind() == K_BSD) {
812     // The SymbolIndex is an index into the ranlib structs that start at
813     // Offsets (the first uint32_t is the number of bytes of the ranlib
814     // structs).  The ranlib structs are a pair of uint32_t's the first
815     // being a string table offset and the second being the offset into
816     // the archive of the member that defines the symbol.  Which is what
817     // is needed here.
818     Offset = read32le(Offsets + SymbolIndex * 8 + 4);
819   } else if (Parent->kind() == K_DARWIN64) {
820     // The SymbolIndex is an index into the ranlib_64 structs that start at
821     // Offsets (the first uint64_t is the number of bytes of the ranlib_64
822     // structs).  The ranlib_64 structs are a pair of uint64_t's the first
823     // being a string table offset and the second being the offset into
824     // the archive of the member that defines the symbol.  Which is what
825     // is needed here.
826     Offset = read64le(Offsets + SymbolIndex * 16 + 8);
827   } else {
828     // Skip offsets.
829     uint32_t MemberCount = read32le(Buf);
830     Buf += MemberCount * 4 + 4;
831 
832     uint32_t SymbolCount = read32le(Buf);
833     if (SymbolIndex >= SymbolCount)
834       return errorCodeToError(object_error::parse_failed);
835 
836     // Skip SymbolCount to get to the indices table.
837     const char *Indices = Buf + 4;
838 
839     // Get the index of the offset in the file member offset table for this
840     // symbol.
841     uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
842     // Subtract 1 since OffsetIndex is 1 based.
843     --OffsetIndex;
844 
845     if (OffsetIndex >= MemberCount)
846       return errorCodeToError(object_error::parse_failed);
847 
848     Offset = read32le(Offsets + OffsetIndex * 4);
849   }
850 
851   const char *Loc = Parent->getData().begin() + Offset;
852   Error Err = Error::success();
853   Child C(Parent, Loc, &Err);
854   if (Err)
855     return std::move(Err);
856   return C;
857 }
858 
getNext() const859 Archive::Symbol Archive::Symbol::getNext() const {
860   Symbol t(*this);
861   if (Parent->kind() == K_BSD) {
862     // t.StringIndex is an offset from the start of the __.SYMDEF or
863     // "__.SYMDEF SORTED" member into the string table for the ranlib
864     // struct indexed by t.SymbolIndex .  To change t.StringIndex to the
865     // offset in the string table for t.SymbolIndex+1 we subtract the
866     // its offset from the start of the string table for t.SymbolIndex
867     // and add the offset of the string table for t.SymbolIndex+1.
868 
869     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
870     // which is the number of bytes of ranlib structs that follow.  The ranlib
871     // structs are a pair of uint32_t's the first being a string table offset
872     // and the second being the offset into the archive of the member that
873     // define the symbol. After that the next uint32_t is the byte count of
874     // the string table followed by the string table.
875     const char *Buf = Parent->getSymbolTable().begin();
876     uint32_t RanlibCount = 0;
877     RanlibCount = read32le(Buf) / 8;
878     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
879     // don't change the t.StringIndex as we don't want to reference a ranlib
880     // past RanlibCount.
881     if (t.SymbolIndex + 1 < RanlibCount) {
882       const char *Ranlibs = Buf + 4;
883       uint32_t CurRanStrx = 0;
884       uint32_t NextRanStrx = 0;
885       CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
886       NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
887       t.StringIndex -= CurRanStrx;
888       t.StringIndex += NextRanStrx;
889     }
890   } else {
891     // Go to one past next null.
892     t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
893   }
894   ++t.SymbolIndex;
895   return t;
896 }
897 
symbol_begin() const898 Archive::symbol_iterator Archive::symbol_begin() const {
899   if (!hasSymbolTable())
900     return symbol_iterator(Symbol(this, 0, 0));
901 
902   const char *buf = getSymbolTable().begin();
903   if (kind() == K_GNU) {
904     uint32_t symbol_count = 0;
905     symbol_count = read32be(buf);
906     buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
907   } else if (kind() == K_GNU64) {
908     uint64_t symbol_count = read64be(buf);
909     buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
910   } else if (kind() == K_BSD) {
911     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
912     // which is the number of bytes of ranlib structs that follow.  The ranlib
913     // structs are a pair of uint32_t's the first being a string table offset
914     // and the second being the offset into the archive of the member that
915     // define the symbol. After that the next uint32_t is the byte count of
916     // the string table followed by the string table.
917     uint32_t ranlib_count = 0;
918     ranlib_count = read32le(buf) / 8;
919     const char *ranlibs = buf + 4;
920     uint32_t ran_strx = 0;
921     ran_strx = read32le(ranlibs);
922     buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
923     // Skip the byte count of the string table.
924     buf += sizeof(uint32_t);
925     buf += ran_strx;
926   } else if (kind() == K_DARWIN64) {
927     // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t
928     // which is the number of bytes of ranlib_64 structs that follow.  The
929     // ranlib_64 structs are a pair of uint64_t's the first being a string
930     // table offset and the second being the offset into the archive of the
931     // member that define the symbol. After that the next uint64_t is the byte
932     // count of the string table followed by the string table.
933     uint64_t ranlib_count = 0;
934     ranlib_count = read64le(buf) / 16;
935     const char *ranlibs = buf + 8;
936     uint64_t ran_strx = 0;
937     ran_strx = read64le(ranlibs);
938     buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t))));
939     // Skip the byte count of the string table.
940     buf += sizeof(uint64_t);
941     buf += ran_strx;
942   } else {
943     uint32_t member_count = 0;
944     uint32_t symbol_count = 0;
945     member_count = read32le(buf);
946     buf += 4 + (member_count * 4); // Skip offsets.
947     symbol_count = read32le(buf);
948     buf += 4 + (symbol_count * 2); // Skip indices.
949   }
950   uint32_t string_start_offset = buf - getSymbolTable().begin();
951   return symbol_iterator(Symbol(this, 0, string_start_offset));
952 }
953 
symbol_end() const954 Archive::symbol_iterator Archive::symbol_end() const {
955   return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
956 }
957 
getNumberOfSymbols() const958 uint32_t Archive::getNumberOfSymbols() const {
959   if (!hasSymbolTable())
960     return 0;
961   const char *buf = getSymbolTable().begin();
962   if (kind() == K_GNU)
963     return read32be(buf);
964   if (kind() == K_GNU64)
965     return read64be(buf);
966   if (kind() == K_BSD)
967     return read32le(buf) / 8;
968   if (kind() == K_DARWIN64)
969     return read64le(buf) / 16;
970   uint32_t member_count = 0;
971   member_count = read32le(buf);
972   buf += 4 + (member_count * 4); // Skip offsets.
973   return read32le(buf);
974 }
975 
findSym(StringRef name) const976 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
977   Archive::symbol_iterator bs = symbol_begin();
978   Archive::symbol_iterator es = symbol_end();
979 
980   for (; bs != es; ++bs) {
981     StringRef SymName = bs->getName();
982     if (SymName == name) {
983       if (auto MemberOrErr = bs->getMember())
984         return Child(*MemberOrErr);
985       else
986         return MemberOrErr.takeError();
987     }
988   }
989   return Optional<Child>();
990 }
991 
992 // Returns true if archive file contains no member file.
isEmpty() const993 bool Archive::isEmpty() const { return Data.getBufferSize() == 8; }
994 
hasSymbolTable() const995 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
996