• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- tools/dsymutil/DwarfLinker.cpp - Dwarf debug info linker -----------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "DebugMap.h"
10 #include "BinaryHolder.h"
11 #include "DebugMap.h"
12 #include "dsymutil.h"
13 #include "MachOUtils.h"
14 #include "NonRelocatableStringpool.h"
15 #include "llvm/ADT/IntervalMap.h"
16 #include "llvm/ADT/StringMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/CodeGen/AsmPrinter.h"
19 #include "llvm/CodeGen/DIE.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
22 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
23 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
24 #include "llvm/MC/MCAsmBackend.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCCodeEmitter.h"
28 #include "llvm/MC/MCDwarf.h"
29 #include "llvm/MC/MCInstrInfo.h"
30 #include "llvm/MC/MCObjectFileInfo.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCTargetOptionsCommandFlags.h"
35 #include "llvm/Object/MachO.h"
36 #include "llvm/Support/Dwarf.h"
37 #include "llvm/Support/LEB128.h"
38 #include "llvm/Support/TargetRegistry.h"
39 #include "llvm/Target/TargetMachine.h"
40 #include "llvm/Target/TargetOptions.h"
41 #include <string>
42 #include <tuple>
43 
44 namespace llvm {
45 namespace dsymutil {
46 
47 namespace {
48 
49 template <typename KeyT, typename ValT>
50 using HalfOpenIntervalMap =
51     IntervalMap<KeyT, ValT, IntervalMapImpl::NodeSizer<KeyT, ValT>::LeafSize,
52                 IntervalMapHalfOpenInfo<KeyT>>;
53 
54 typedef HalfOpenIntervalMap<uint64_t, int64_t> FunctionIntervals;
55 
56 // FIXME: Delete this structure.
57 struct PatchLocation {
58   DIE::value_iterator I;
59 
60   PatchLocation() = default;
PatchLocationllvm::dsymutil::__anondabe43b60111::PatchLocation61   PatchLocation(DIE::value_iterator I) : I(I) {}
62 
setllvm::dsymutil::__anondabe43b60111::PatchLocation63   void set(uint64_t New) const {
64     assert(I);
65     const auto &Old = *I;
66     assert(Old.getType() == DIEValue::isInteger);
67     *I = DIEValue(Old.getAttribute(), Old.getForm(), DIEInteger(New));
68   }
69 
getllvm::dsymutil::__anondabe43b60111::PatchLocation70   uint64_t get() const {
71     assert(I);
72     return I->getDIEInteger().getValue();
73   }
74 };
75 
76 class CompileUnit;
77 struct DeclMapInfo;
78 
79 /// A DeclContext is a named program scope that is used for ODR
80 /// uniquing of types.
81 /// The set of DeclContext for the ODR-subject parts of a Dwarf link
82 /// is expanded (and uniqued) with each new object file processed. We
83 /// need to determine the context of each DIE in an linked object file
84 /// to see if the corresponding type has already been emitted.
85 ///
86 /// The contexts are conceptually organised as a tree (eg. a function
87 /// scope is contained in a namespace scope that contains other
88 /// scopes), but storing/accessing them in an actual tree is too
89 /// inefficient: we need to be able to very quickly query a context
90 /// for a given child context by name. Storing a StringMap in each
91 /// DeclContext would be too space inefficient.
92 /// The solution here is to give each DeclContext a link to its parent
93 /// (this allows to walk up the tree), but to query the existance of a
94 /// specific DeclContext using a separate DenseMap keyed on the hash
95 /// of the fully qualified name of the context.
96 class DeclContext {
97   unsigned QualifiedNameHash;
98   uint32_t Line;
99   uint32_t ByteSize;
100   uint16_t Tag;
101   StringRef Name;
102   StringRef File;
103   const DeclContext &Parent;
104   const DWARFDebugInfoEntryMinimal *LastSeenDIE;
105   uint32_t LastSeenCompileUnitID;
106   uint32_t CanonicalDIEOffset;
107 
108   friend DeclMapInfo;
109 
110 public:
111   typedef DenseSet<DeclContext *, DeclMapInfo> Map;
112 
DeclContext()113   DeclContext()
114       : QualifiedNameHash(0), Line(0), ByteSize(0),
115         Tag(dwarf::DW_TAG_compile_unit), Name(), File(), Parent(*this),
116         LastSeenDIE(nullptr), LastSeenCompileUnitID(0), CanonicalDIEOffset(0) {}
117 
DeclContext(unsigned Hash,uint32_t Line,uint32_t ByteSize,uint16_t Tag,StringRef Name,StringRef File,const DeclContext & Parent,const DWARFDebugInfoEntryMinimal * LastSeenDIE=nullptr,unsigned CUId=0)118   DeclContext(unsigned Hash, uint32_t Line, uint32_t ByteSize, uint16_t Tag,
119               StringRef Name, StringRef File, const DeclContext &Parent,
120               const DWARFDebugInfoEntryMinimal *LastSeenDIE = nullptr,
121               unsigned CUId = 0)
122       : QualifiedNameHash(Hash), Line(Line), ByteSize(ByteSize), Tag(Tag),
123         Name(Name), File(File), Parent(Parent), LastSeenDIE(LastSeenDIE),
124         LastSeenCompileUnitID(CUId), CanonicalDIEOffset(0) {}
125 
getQualifiedNameHash() const126   uint32_t getQualifiedNameHash() const { return QualifiedNameHash; }
127 
128   bool setLastSeenDIE(CompileUnit &U, const DWARFDebugInfoEntryMinimal *Die);
129 
getCanonicalDIEOffset() const130   uint32_t getCanonicalDIEOffset() const { return CanonicalDIEOffset; }
setCanonicalDIEOffset(uint32_t Offset)131   void setCanonicalDIEOffset(uint32_t Offset) { CanonicalDIEOffset = Offset; }
132 
getTag() const133   uint16_t getTag() const { return Tag; }
getName() const134   StringRef getName() const { return Name; }
135 };
136 
137 /// Info type for the DenseMap storing the DeclContext pointers.
138 struct DeclMapInfo : private DenseMapInfo<DeclContext *> {
139   using DenseMapInfo<DeclContext *>::getEmptyKey;
140   using DenseMapInfo<DeclContext *>::getTombstoneKey;
141 
getHashValuellvm::dsymutil::__anondabe43b60111::DeclMapInfo142   static unsigned getHashValue(const DeclContext *Ctxt) {
143     return Ctxt->QualifiedNameHash;
144   }
145 
isEqualllvm::dsymutil::__anondabe43b60111::DeclMapInfo146   static bool isEqual(const DeclContext *LHS, const DeclContext *RHS) {
147     if (RHS == getEmptyKey() || RHS == getTombstoneKey())
148       return RHS == LHS;
149     return LHS->QualifiedNameHash == RHS->QualifiedNameHash &&
150            LHS->Line == RHS->Line && LHS->ByteSize == RHS->ByteSize &&
151            LHS->Name.data() == RHS->Name.data() &&
152            LHS->File.data() == RHS->File.data() &&
153            LHS->Parent.QualifiedNameHash == RHS->Parent.QualifiedNameHash;
154   }
155 };
156 
157 /// This class gives a tree-like API to the DenseMap that stores the
158 /// DeclContext objects. It also holds the BumpPtrAllocator where
159 /// these objects will be allocated.
160 class DeclContextTree {
161   BumpPtrAllocator Allocator;
162   DeclContext Root;
163   DeclContext::Map Contexts;
164 
165 public:
166   /// Get the child of \a Context described by \a DIE in \a Unit. The
167   /// required strings will be interned in \a StringPool.
168   /// \returns The child DeclContext along with one bit that is set if
169   /// this context is invalid.
170   /// An invalid context means it shouldn't be considered for uniquing, but its
171   /// not returning null, because some children of that context might be
172   /// uniquing candidates.  FIXME: The invalid bit along the return value is to
173   /// emulate some dsymutil-classic functionality.
174   PointerIntPair<DeclContext *, 1>
175   getChildDeclContext(DeclContext &Context,
176                       const DWARFDebugInfoEntryMinimal *DIE, CompileUnit &Unit,
177                       NonRelocatableStringpool &StringPool, bool InClangModule);
178 
getRoot()179   DeclContext &getRoot() { return Root; }
180 };
181 
182 /// \brief Stores all information relating to a compile unit, be it in
183 /// its original instance in the object file to its brand new cloned
184 /// and linked DIE tree.
185 class CompileUnit {
186 public:
187   /// \brief Information gathered about a DIE in the object file.
188   struct DIEInfo {
189     int64_t AddrAdjust; ///< Address offset to apply to the described entity.
190     DeclContext *Ctxt;  ///< ODR Declaration context.
191     DIE *Clone;         ///< Cloned version of that DIE.
192     uint32_t ParentIdx; ///< The index of this DIE's parent.
193     bool Keep : 1;      ///< Is the DIE part of the linked output?
194     bool InDebugMap : 1;///< Was this DIE's entity found in the map?
195     bool Prune : 1;     ///< Is this a pure forward declaration we can strip?
196   };
197 
CompileUnit(DWARFUnit & OrigUnit,unsigned ID,bool CanUseODR,StringRef ClangModuleName)198   CompileUnit(DWARFUnit &OrigUnit, unsigned ID, bool CanUseODR,
199               StringRef ClangModuleName)
200       : OrigUnit(OrigUnit), ID(ID), LowPc(UINT64_MAX), HighPc(0), RangeAlloc(),
201         Ranges(RangeAlloc), ClangModuleName(ClangModuleName) {
202     Info.resize(OrigUnit.getNumDIEs());
203 
204     const auto *CUDie = OrigUnit.getUnitDIE(false);
205     unsigned Lang = CUDie->getAttributeValueAsUnsignedConstant(
206         &OrigUnit, dwarf::DW_AT_language, 0);
207     HasODR = CanUseODR && (Lang == dwarf::DW_LANG_C_plus_plus ||
208                            Lang == dwarf::DW_LANG_C_plus_plus_03 ||
209                            Lang == dwarf::DW_LANG_C_plus_plus_11 ||
210                            Lang == dwarf::DW_LANG_C_plus_plus_14 ||
211                            Lang == dwarf::DW_LANG_ObjC_plus_plus);
212   }
213 
CompileUnit(CompileUnit && RHS)214   CompileUnit(CompileUnit &&RHS)
215       : OrigUnit(RHS.OrigUnit), Info(std::move(RHS.Info)),
216         CUDie(std::move(RHS.CUDie)), StartOffset(RHS.StartOffset),
217         NextUnitOffset(RHS.NextUnitOffset), RangeAlloc(), Ranges(RangeAlloc) {
218     // The CompileUnit container has been 'reserve()'d with the right
219     // size. We cannot move the IntervalMap anyway.
220     llvm_unreachable("CompileUnits should not be moved.");
221   }
222 
getOrigUnit() const223   DWARFUnit &getOrigUnit() const { return OrigUnit; }
224 
getUniqueID() const225   unsigned getUniqueID() const { return ID; }
226 
getOutputUnitDIE() const227   DIE *getOutputUnitDIE() const { return CUDie; }
setOutputUnitDIE(DIE * Die)228   void setOutputUnitDIE(DIE *Die) { CUDie = Die; }
229 
hasODR() const230   bool hasODR() const { return HasODR; }
isClangModule() const231   bool isClangModule() const { return !ClangModuleName.empty(); }
getClangModuleName() const232   const std::string &getClangModuleName() const { return ClangModuleName; }
233 
getInfo(unsigned Idx)234   DIEInfo &getInfo(unsigned Idx) { return Info[Idx]; }
getInfo(unsigned Idx) const235   const DIEInfo &getInfo(unsigned Idx) const { return Info[Idx]; }
236 
getStartOffset() const237   uint64_t getStartOffset() const { return StartOffset; }
getNextUnitOffset() const238   uint64_t getNextUnitOffset() const { return NextUnitOffset; }
setStartOffset(uint64_t DebugInfoSize)239   void setStartOffset(uint64_t DebugInfoSize) { StartOffset = DebugInfoSize; }
240 
getLowPc() const241   uint64_t getLowPc() const { return LowPc; }
getHighPc() const242   uint64_t getHighPc() const { return HighPc; }
243 
getUnitRangesAttribute() const244   Optional<PatchLocation> getUnitRangesAttribute() const {
245     return UnitRangeAttribute;
246   }
getFunctionRanges() const247   const FunctionIntervals &getFunctionRanges() const { return Ranges; }
getRangesAttributes() const248   const std::vector<PatchLocation> &getRangesAttributes() const {
249     return RangeAttributes;
250   }
251 
252   const std::vector<std::pair<PatchLocation, int64_t>> &
getLocationAttributes() const253   getLocationAttributes() const {
254     return LocationAttributes;
255   }
256 
setHasInterestingContent()257   void setHasInterestingContent() { HasInterestingContent = true; }
hasInterestingContent()258   bool hasInterestingContent() { return HasInterestingContent; }
259 
260   /// Mark every DIE in this unit as kept. This function also
261   /// marks variables as InDebugMap so that they appear in the
262   /// reconstructed accelerator tables.
263   void markEverythingAsKept();
264 
265   /// \brief Compute the end offset for this unit. Must be
266   /// called after the CU's DIEs have been cloned.
267   /// \returns the next unit offset (which is also the current
268   /// debug_info section size).
269   uint64_t computeNextUnitOffset();
270 
271   /// \brief Keep track of a forward reference to DIE \p Die in \p
272   /// RefUnit by \p Attr. The attribute should be fixed up later to
273   /// point to the absolute offset of \p Die in the debug_info section
274   /// or to the canonical offset of \p Ctxt if it is non-null.
275   void noteForwardReference(DIE *Die, const CompileUnit *RefUnit,
276                             DeclContext *Ctxt, PatchLocation Attr);
277 
278   /// \brief Apply all fixups recored by noteForwardReference().
279   void fixupForwardReferences();
280 
281   /// \brief Add a function range [\p LowPC, \p HighPC) that is
282   /// relocatad by applying offset \p PCOffset.
283   void addFunctionRange(uint64_t LowPC, uint64_t HighPC, int64_t PCOffset);
284 
285   /// \brief Keep track of a DW_AT_range attribute that we will need to
286   /// patch up later.
287   void noteRangeAttribute(const DIE &Die, PatchLocation Attr);
288 
289   /// \brief Keep track of a location attribute pointing to a location
290   /// list in the debug_loc section.
291   void noteLocationAttribute(PatchLocation Attr, int64_t PcOffset);
292 
293   /// \brief Add a name accelerator entry for \p Die with \p Name
294   /// which is stored in the string table at \p Offset.
295   void addNameAccelerator(const DIE *Die, const char *Name, uint32_t Offset,
296                           bool SkipPubnamesSection = false);
297 
298   /// \brief Add a type accelerator entry for \p Die with \p Name
299   /// which is stored in the string table at \p Offset.
300   void addTypeAccelerator(const DIE *Die, const char *Name, uint32_t Offset);
301 
302   struct AccelInfo {
303     StringRef Name;      ///< Name of the entry.
304     const DIE *Die;      ///< DIE this entry describes.
305     uint32_t NameOffset; ///< Offset of Name in the string pool.
306     bool SkipPubSection; ///< Emit this entry only in the apple_* sections.
307 
AccelInfollvm::dsymutil::__anondabe43b60111::CompileUnit::AccelInfo308     AccelInfo(StringRef Name, const DIE *Die, uint32_t NameOffset,
309               bool SkipPubSection = false)
310         : Name(Name), Die(Die), NameOffset(NameOffset),
311           SkipPubSection(SkipPubSection) {}
312   };
313 
getPubnames() const314   const std::vector<AccelInfo> &getPubnames() const { return Pubnames; }
getPubtypes() const315   const std::vector<AccelInfo> &getPubtypes() const { return Pubtypes; }
316 
317   /// Get the full path for file \a FileNum in the line table
getResolvedPath(unsigned FileNum)318   StringRef getResolvedPath(unsigned FileNum) {
319     if (FileNum >= ResolvedPaths.size())
320       return StringRef();
321     return ResolvedPaths[FileNum];
322   }
323 
324   /// Set the fully resolved path for the line-table's file \a FileNum
325   /// to \a Path.
setResolvedPath(unsigned FileNum,StringRef Path)326   void setResolvedPath(unsigned FileNum, StringRef Path) {
327     if (ResolvedPaths.size() <= FileNum)
328       ResolvedPaths.resize(FileNum + 1);
329     ResolvedPaths[FileNum] = Path;
330   }
331 
332 private:
333   DWARFUnit &OrigUnit;
334   unsigned ID;
335   std::vector<DIEInfo> Info; ///< DIE info indexed by DIE index.
336   DIE *CUDie;                ///< Root of the linked DIE tree.
337 
338   uint64_t StartOffset;
339   uint64_t NextUnitOffset;
340 
341   uint64_t LowPc;
342   uint64_t HighPc;
343 
344   /// \brief A list of attributes to fixup with the absolute offset of
345   /// a DIE in the debug_info section.
346   ///
347   /// The offsets for the attributes in this array couldn't be set while
348   /// cloning because for cross-cu forward refences the target DIE's
349   /// offset isn't known you emit the reference attribute.
350   std::vector<std::tuple<DIE *, const CompileUnit *, DeclContext *,
351                          PatchLocation>> ForwardDIEReferences;
352 
353   FunctionIntervals::Allocator RangeAlloc;
354   /// \brief The ranges in that interval map are the PC ranges for
355   /// functions in this unit, associated with the PC offset to apply
356   /// to the addresses to get the linked address.
357   FunctionIntervals Ranges;
358 
359   /// \brief DW_AT_ranges attributes to patch after we have gathered
360   /// all the unit's function addresses.
361   /// @{
362   std::vector<PatchLocation> RangeAttributes;
363   Optional<PatchLocation> UnitRangeAttribute;
364   /// @}
365 
366   /// \brief Location attributes that need to be transfered from th
367   /// original debug_loc section to the liked one. They are stored
368   /// along with the PC offset that is to be applied to their
369   /// function's address.
370   std::vector<std::pair<PatchLocation, int64_t>> LocationAttributes;
371 
372   /// \brief Accelerator entries for the unit, both for the pub*
373   /// sections and the apple* ones.
374   /// @{
375   std::vector<AccelInfo> Pubnames;
376   std::vector<AccelInfo> Pubtypes;
377   /// @}
378 
379   /// Cached resolved paths from the line table.
380   /// Note, the StringRefs here point in to the intern (uniquing) string pool.
381   /// This means that a StringRef returned here doesn't need to then be uniqued
382   /// for the purposes of getting a unique address for each string.
383   std::vector<StringRef> ResolvedPaths;
384 
385   /// Is this unit subject to the ODR rule?
386   bool HasODR;
387   /// Did a DIE actually contain a valid reloc?
388   bool HasInterestingContent;
389   /// If this is a Clang module, this holds the module's name.
390   std::string ClangModuleName;
391 };
392 
markEverythingAsKept()393 void CompileUnit::markEverythingAsKept() {
394   for (auto &I : Info)
395     // Mark everything that wasn't explicity marked for pruning.
396     I.Keep = !I.Prune;
397 }
398 
computeNextUnitOffset()399 uint64_t CompileUnit::computeNextUnitOffset() {
400   NextUnitOffset = StartOffset + 11 /* Header size */;
401   // The root DIE might be null, meaning that the Unit had nothing to
402   // contribute to the linked output. In that case, we will emit the
403   // unit header without any actual DIE.
404   if (CUDie)
405     NextUnitOffset += CUDie->getSize();
406   return NextUnitOffset;
407 }
408 
409 /// \brief Keep track of a forward cross-cu reference from this unit
410 /// to \p Die that lives in \p RefUnit.
noteForwardReference(DIE * Die,const CompileUnit * RefUnit,DeclContext * Ctxt,PatchLocation Attr)411 void CompileUnit::noteForwardReference(DIE *Die, const CompileUnit *RefUnit,
412                                        DeclContext *Ctxt, PatchLocation Attr) {
413   ForwardDIEReferences.emplace_back(Die, RefUnit, Ctxt, Attr);
414 }
415 
416 /// \brief Apply all fixups recorded by noteForwardReference().
fixupForwardReferences()417 void CompileUnit::fixupForwardReferences() {
418   for (const auto &Ref : ForwardDIEReferences) {
419     DIE *RefDie;
420     const CompileUnit *RefUnit;
421     PatchLocation Attr;
422     DeclContext *Ctxt;
423     std::tie(RefDie, RefUnit, Ctxt, Attr) = Ref;
424     if (Ctxt && Ctxt->getCanonicalDIEOffset())
425       Attr.set(Ctxt->getCanonicalDIEOffset());
426     else
427       Attr.set(RefDie->getOffset() + RefUnit->getStartOffset());
428   }
429 }
430 
addFunctionRange(uint64_t FuncLowPc,uint64_t FuncHighPc,int64_t PcOffset)431 void CompileUnit::addFunctionRange(uint64_t FuncLowPc, uint64_t FuncHighPc,
432                                    int64_t PcOffset) {
433   Ranges.insert(FuncLowPc, FuncHighPc, PcOffset);
434   this->LowPc = std::min(LowPc, FuncLowPc + PcOffset);
435   this->HighPc = std::max(HighPc, FuncHighPc + PcOffset);
436 }
437 
noteRangeAttribute(const DIE & Die,PatchLocation Attr)438 void CompileUnit::noteRangeAttribute(const DIE &Die, PatchLocation Attr) {
439   if (Die.getTag() != dwarf::DW_TAG_compile_unit)
440     RangeAttributes.push_back(Attr);
441   else
442     UnitRangeAttribute = Attr;
443 }
444 
noteLocationAttribute(PatchLocation Attr,int64_t PcOffset)445 void CompileUnit::noteLocationAttribute(PatchLocation Attr, int64_t PcOffset) {
446   LocationAttributes.emplace_back(Attr, PcOffset);
447 }
448 
449 /// \brief Add a name accelerator entry for \p Die with \p Name
450 /// which is stored in the string table at \p Offset.
addNameAccelerator(const DIE * Die,const char * Name,uint32_t Offset,bool SkipPubSection)451 void CompileUnit::addNameAccelerator(const DIE *Die, const char *Name,
452                                      uint32_t Offset, bool SkipPubSection) {
453   Pubnames.emplace_back(Name, Die, Offset, SkipPubSection);
454 }
455 
456 /// \brief Add a type accelerator entry for \p Die with \p Name
457 /// which is stored in the string table at \p Offset.
addTypeAccelerator(const DIE * Die,const char * Name,uint32_t Offset)458 void CompileUnit::addTypeAccelerator(const DIE *Die, const char *Name,
459                                      uint32_t Offset) {
460   Pubtypes.emplace_back(Name, Die, Offset, false);
461 }
462 
463 /// \brief The Dwarf streaming logic
464 ///
465 /// All interactions with the MC layer that is used to build the debug
466 /// information binary representation are handled in this class.
467 class DwarfStreamer {
468   /// \defgroup MCObjects MC layer objects constructed by the streamer
469   /// @{
470   std::unique_ptr<MCRegisterInfo> MRI;
471   std::unique_ptr<MCAsmInfo> MAI;
472   std::unique_ptr<MCObjectFileInfo> MOFI;
473   std::unique_ptr<MCContext> MC;
474   MCAsmBackend *MAB; // Owned by MCStreamer
475   std::unique_ptr<MCInstrInfo> MII;
476   std::unique_ptr<MCSubtargetInfo> MSTI;
477   MCCodeEmitter *MCE; // Owned by MCStreamer
478   MCStreamer *MS;     // Owned by AsmPrinter
479   std::unique_ptr<TargetMachine> TM;
480   std::unique_ptr<AsmPrinter> Asm;
481   /// @}
482 
483   /// \brief the file we stream the linked Dwarf to.
484   std::unique_ptr<raw_fd_ostream> OutFile;
485 
486   uint32_t RangesSectionSize;
487   uint32_t LocSectionSize;
488   uint32_t LineSectionSize;
489   uint32_t FrameSectionSize;
490 
491   /// \brief Emit the pubnames or pubtypes section contribution for \p
492   /// Unit into \p Sec. The data is provided in \p Names.
493   void emitPubSectionForUnit(MCSection *Sec, StringRef Name,
494                              const CompileUnit &Unit,
495                              const std::vector<CompileUnit::AccelInfo> &Names);
496 
497 public:
498   /// \brief Actually create the streamer and the ouptut file.
499   ///
500   /// This could be done directly in the constructor, but it feels
501   /// more natural to handle errors through return value.
502   bool init(Triple TheTriple, StringRef OutputFilename);
503 
504   /// \brief Dump the file to the disk.
505   bool finish(const DebugMap &);
506 
getAsmPrinter() const507   AsmPrinter &getAsmPrinter() const { return *Asm; }
508 
509   /// \brief Set the current output section to debug_info and change
510   /// the MC Dwarf version to \p DwarfVersion.
511   void switchToDebugInfoSection(unsigned DwarfVersion);
512 
513   /// \brief Emit the compilation unit header for \p Unit in the
514   /// debug_info section.
515   ///
516   /// As a side effect, this also switches the current Dwarf version
517   /// of the MC layer to the one of U.getOrigUnit().
518   void emitCompileUnitHeader(CompileUnit &Unit);
519 
520   /// \brief Recursively emit the DIE tree rooted at \p Die.
521   void emitDIE(DIE &Die);
522 
523   /// \brief Emit the abbreviation table \p Abbrevs to the
524   /// debug_abbrev section.
525   void emitAbbrevs(const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs);
526 
527   /// \brief Emit the string table described by \p Pool.
528   void emitStrings(const NonRelocatableStringpool &Pool);
529 
530   /// \brief Emit debug_ranges for \p FuncRange by translating the
531   /// original \p Entries.
532   void emitRangesEntries(
533       int64_t UnitPcOffset, uint64_t OrigLowPc,
534       const FunctionIntervals::const_iterator &FuncRange,
535       const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries,
536       unsigned AddressSize);
537 
538   /// \brief Emit debug_aranges entries for \p Unit and if \p
539   /// DoRangesSection is true, also emit the debug_ranges entries for
540   /// the DW_TAG_compile_unit's DW_AT_ranges attribute.
541   void emitUnitRangesEntries(CompileUnit &Unit, bool DoRangesSection);
542 
getRangesSectionSize() const543   uint32_t getRangesSectionSize() const { return RangesSectionSize; }
544 
545   /// \brief Emit the debug_loc contribution for \p Unit by copying
546   /// the entries from \p Dwarf and offseting them. Update the
547   /// location attributes to point to the new entries.
548   void emitLocationsForUnit(const CompileUnit &Unit, DWARFContext &Dwarf);
549 
550   /// \brief Emit the line table described in \p Rows into the
551   /// debug_line section.
552   void emitLineTableForUnit(MCDwarfLineTableParams Params,
553                             StringRef PrologueBytes, unsigned MinInstLength,
554                             std::vector<DWARFDebugLine::Row> &Rows,
555                             unsigned AdddressSize);
556 
getLineSectionSize() const557   uint32_t getLineSectionSize() const { return LineSectionSize; }
558 
559   /// \brief Emit the .debug_pubnames contribution for \p Unit.
560   void emitPubNamesForUnit(const CompileUnit &Unit);
561 
562   /// \brief Emit the .debug_pubtypes contribution for \p Unit.
563   void emitPubTypesForUnit(const CompileUnit &Unit);
564 
565   /// \brief Emit a CIE.
566   void emitCIE(StringRef CIEBytes);
567 
568   /// \brief Emit an FDE with data \p Bytes.
569   void emitFDE(uint32_t CIEOffset, uint32_t AddreSize, uint32_t Address,
570                StringRef Bytes);
571 
getFrameSectionSize() const572   uint32_t getFrameSectionSize() const { return FrameSectionSize; }
573 };
574 
init(Triple TheTriple,StringRef OutputFilename)575 bool DwarfStreamer::init(Triple TheTriple, StringRef OutputFilename) {
576   std::string ErrorStr;
577   std::string TripleName;
578   StringRef Context = "dwarf streamer init";
579 
580   // Get the target.
581   const Target *TheTarget =
582       TargetRegistry::lookupTarget(TripleName, TheTriple, ErrorStr);
583   if (!TheTarget)
584     return error(ErrorStr, Context);
585   TripleName = TheTriple.getTriple();
586 
587   // Create all the MC Objects.
588   MRI.reset(TheTarget->createMCRegInfo(TripleName));
589   if (!MRI)
590     return error(Twine("no register info for target ") + TripleName, Context);
591 
592   MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName));
593   if (!MAI)
594     return error("no asm info for target " + TripleName, Context);
595 
596   MOFI.reset(new MCObjectFileInfo);
597   MC.reset(new MCContext(MAI.get(), MRI.get(), MOFI.get()));
598   MOFI->InitMCObjectFileInfo(TheTriple, /*PIC*/ false, CodeModel::Default, *MC);
599 
600   MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, "");
601   if (!MAB)
602     return error("no asm backend for target " + TripleName, Context);
603 
604   MII.reset(TheTarget->createMCInstrInfo());
605   if (!MII)
606     return error("no instr info info for target " + TripleName, Context);
607 
608   MSTI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
609   if (!MSTI)
610     return error("no subtarget info for target " + TripleName, Context);
611 
612   MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *MC);
613   if (!MCE)
614     return error("no code emitter for target " + TripleName, Context);
615 
616   // Create the output file.
617   std::error_code EC;
618   OutFile =
619       llvm::make_unique<raw_fd_ostream>(OutputFilename, EC, sys::fs::F_None);
620   if (EC)
621     return error(Twine(OutputFilename) + ": " + EC.message(), Context);
622 
623   MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
624   MS = TheTarget->createMCObjectStreamer(
625       TheTriple, *MC, *MAB, *OutFile, MCE, *MSTI, MCOptions.MCRelaxAll,
626       MCOptions.MCIncrementalLinkerCompatible,
627       /*DWARFMustBeAtTheEnd*/ false);
628   if (!MS)
629     return error("no object streamer for target " + TripleName, Context);
630 
631   // Finally create the AsmPrinter we'll use to emit the DIEs.
632   TM.reset(TheTarget->createTargetMachine(TripleName, "", "", TargetOptions(),
633                                           None));
634   if (!TM)
635     return error("no target machine for target " + TripleName, Context);
636 
637   Asm.reset(TheTarget->createAsmPrinter(*TM, std::unique_ptr<MCStreamer>(MS)));
638   if (!Asm)
639     return error("no asm printer for target " + TripleName, Context);
640 
641   RangesSectionSize = 0;
642   LocSectionSize = 0;
643   LineSectionSize = 0;
644   FrameSectionSize = 0;
645 
646   return true;
647 }
648 
finish(const DebugMap & DM)649 bool DwarfStreamer::finish(const DebugMap &DM) {
650   if (DM.getTriple().isOSDarwin() && !DM.getBinaryPath().empty())
651     return MachOUtils::generateDsymCompanion(DM, *MS, *OutFile);
652 
653   MS->Finish();
654   return true;
655 }
656 
657 /// \brief Set the current output section to debug_info and change
658 /// the MC Dwarf version to \p DwarfVersion.
switchToDebugInfoSection(unsigned DwarfVersion)659 void DwarfStreamer::switchToDebugInfoSection(unsigned DwarfVersion) {
660   MS->SwitchSection(MOFI->getDwarfInfoSection());
661   MC->setDwarfVersion(DwarfVersion);
662 }
663 
664 /// \brief Emit the compilation unit header for \p Unit in the
665 /// debug_info section.
666 ///
667 /// A Dwarf scetion header is encoded as:
668 ///  uint32_t   Unit length (omiting this field)
669 ///  uint16_t   Version
670 ///  uint32_t   Abbreviation table offset
671 ///  uint8_t    Address size
672 ///
673 /// Leading to a total of 11 bytes.
emitCompileUnitHeader(CompileUnit & Unit)674 void DwarfStreamer::emitCompileUnitHeader(CompileUnit &Unit) {
675   unsigned Version = Unit.getOrigUnit().getVersion();
676   switchToDebugInfoSection(Version);
677 
678   // Emit size of content not including length itself. The size has
679   // already been computed in CompileUnit::computeOffsets(). Substract
680   // 4 to that size to account for the length field.
681   Asm->EmitInt32(Unit.getNextUnitOffset() - Unit.getStartOffset() - 4);
682   Asm->EmitInt16(Version);
683   // We share one abbreviations table across all units so it's always at the
684   // start of the section.
685   Asm->EmitInt32(0);
686   Asm->EmitInt8(Unit.getOrigUnit().getAddressByteSize());
687 }
688 
689 /// \brief Emit the \p Abbrevs array as the shared abbreviation table
690 /// for the linked Dwarf file.
emitAbbrevs(const std::vector<std::unique_ptr<DIEAbbrev>> & Abbrevs)691 void DwarfStreamer::emitAbbrevs(
692     const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs) {
693   MS->SwitchSection(MOFI->getDwarfAbbrevSection());
694   Asm->emitDwarfAbbrevs(Abbrevs);
695 }
696 
697 /// \brief Recursively emit the DIE tree rooted at \p Die.
emitDIE(DIE & Die)698 void DwarfStreamer::emitDIE(DIE &Die) {
699   MS->SwitchSection(MOFI->getDwarfInfoSection());
700   Asm->emitDwarfDIE(Die);
701 }
702 
703 /// \brief Emit the debug_str section stored in \p Pool.
emitStrings(const NonRelocatableStringpool & Pool)704 void DwarfStreamer::emitStrings(const NonRelocatableStringpool &Pool) {
705   Asm->OutStreamer->SwitchSection(MOFI->getDwarfStrSection());
706   for (auto *Entry = Pool.getFirstEntry(); Entry;
707        Entry = Pool.getNextEntry(Entry))
708     Asm->OutStreamer->EmitBytes(
709         StringRef(Entry->getKey().data(), Entry->getKey().size() + 1));
710 }
711 
712 /// \brief Emit the debug_range section contents for \p FuncRange by
713 /// translating the original \p Entries. The debug_range section
714 /// format is totally trivial, consisting just of pairs of address
715 /// sized addresses describing the ranges.
emitRangesEntries(int64_t UnitPcOffset,uint64_t OrigLowPc,const FunctionIntervals::const_iterator & FuncRange,const std::vector<DWARFDebugRangeList::RangeListEntry> & Entries,unsigned AddressSize)716 void DwarfStreamer::emitRangesEntries(
717     int64_t UnitPcOffset, uint64_t OrigLowPc,
718     const FunctionIntervals::const_iterator &FuncRange,
719     const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries,
720     unsigned AddressSize) {
721   MS->SwitchSection(MC->getObjectFileInfo()->getDwarfRangesSection());
722 
723   // Offset each range by the right amount.
724   int64_t PcOffset = Entries.empty() ? 0 : FuncRange.value() + UnitPcOffset;
725   for (const auto &Range : Entries) {
726     if (Range.isBaseAddressSelectionEntry(AddressSize)) {
727       warn("unsupported base address selection operation",
728            "emitting debug_ranges");
729       break;
730     }
731     // Do not emit empty ranges.
732     if (Range.StartAddress == Range.EndAddress)
733       continue;
734 
735     // All range entries should lie in the function range.
736     if (!(Range.StartAddress + OrigLowPc >= FuncRange.start() &&
737           Range.EndAddress + OrigLowPc <= FuncRange.stop()))
738       warn("inconsistent range data.", "emitting debug_ranges");
739     MS->EmitIntValue(Range.StartAddress + PcOffset, AddressSize);
740     MS->EmitIntValue(Range.EndAddress + PcOffset, AddressSize);
741     RangesSectionSize += 2 * AddressSize;
742   }
743 
744   // Add the terminator entry.
745   MS->EmitIntValue(0, AddressSize);
746   MS->EmitIntValue(0, AddressSize);
747   RangesSectionSize += 2 * AddressSize;
748 }
749 
750 /// \brief Emit the debug_aranges contribution of a unit and
751 /// if \p DoDebugRanges is true the debug_range contents for a
752 /// compile_unit level DW_AT_ranges attribute (Which are basically the
753 /// same thing with a different base address).
754 /// Just aggregate all the ranges gathered inside that unit.
emitUnitRangesEntries(CompileUnit & Unit,bool DoDebugRanges)755 void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit,
756                                           bool DoDebugRanges) {
757   unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
758   // Gather the ranges in a vector, so that we can simplify them. The
759   // IntervalMap will have coalesced the non-linked ranges, but here
760   // we want to coalesce the linked addresses.
761   std::vector<std::pair<uint64_t, uint64_t>> Ranges;
762   const auto &FunctionRanges = Unit.getFunctionRanges();
763   for (auto Range = FunctionRanges.begin(), End = FunctionRanges.end();
764        Range != End; ++Range)
765     Ranges.push_back(std::make_pair(Range.start() + Range.value(),
766                                     Range.stop() + Range.value()));
767 
768   // The object addresses where sorted, but again, the linked
769   // addresses might end up in a different order.
770   std::sort(Ranges.begin(), Ranges.end());
771 
772   if (!Ranges.empty()) {
773     MS->SwitchSection(MC->getObjectFileInfo()->getDwarfARangesSection());
774 
775     MCSymbol *BeginLabel = Asm->createTempSymbol("Barange");
776     MCSymbol *EndLabel = Asm->createTempSymbol("Earange");
777 
778     unsigned HeaderSize =
779         sizeof(int32_t) + // Size of contents (w/o this field
780         sizeof(int16_t) + // DWARF ARange version number
781         sizeof(int32_t) + // Offset of CU in the .debug_info section
782         sizeof(int8_t) +  // Pointer Size (in bytes)
783         sizeof(int8_t);   // Segment Size (in bytes)
784 
785     unsigned TupleSize = AddressSize * 2;
786     unsigned Padding = OffsetToAlignment(HeaderSize, TupleSize);
787 
788     Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); // Arange length
789     Asm->OutStreamer->EmitLabel(BeginLabel);
790     Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); // Version number
791     Asm->EmitInt32(Unit.getStartOffset());     // Corresponding unit's offset
792     Asm->EmitInt8(AddressSize);                // Address size
793     Asm->EmitInt8(0);                          // Segment size
794 
795     Asm->OutStreamer->emitFill(Padding, 0x0);
796 
797     for (auto Range = Ranges.begin(), End = Ranges.end(); Range != End;
798          ++Range) {
799       uint64_t RangeStart = Range->first;
800       MS->EmitIntValue(RangeStart, AddressSize);
801       while ((Range + 1) != End && Range->second == (Range + 1)->first)
802         ++Range;
803       MS->EmitIntValue(Range->second - RangeStart, AddressSize);
804     }
805 
806     // Emit terminator
807     Asm->OutStreamer->EmitIntValue(0, AddressSize);
808     Asm->OutStreamer->EmitIntValue(0, AddressSize);
809     Asm->OutStreamer->EmitLabel(EndLabel);
810   }
811 
812   if (!DoDebugRanges)
813     return;
814 
815   MS->SwitchSection(MC->getObjectFileInfo()->getDwarfRangesSection());
816   // Offset each range by the right amount.
817   int64_t PcOffset = -Unit.getLowPc();
818   // Emit coalesced ranges.
819   for (auto Range = Ranges.begin(), End = Ranges.end(); Range != End; ++Range) {
820     MS->EmitIntValue(Range->first + PcOffset, AddressSize);
821     while (Range + 1 != End && Range->second == (Range + 1)->first)
822       ++Range;
823     MS->EmitIntValue(Range->second + PcOffset, AddressSize);
824     RangesSectionSize += 2 * AddressSize;
825   }
826 
827   // Add the terminator entry.
828   MS->EmitIntValue(0, AddressSize);
829   MS->EmitIntValue(0, AddressSize);
830   RangesSectionSize += 2 * AddressSize;
831 }
832 
833 /// \brief Emit location lists for \p Unit and update attribtues to
834 /// point to the new entries.
emitLocationsForUnit(const CompileUnit & Unit,DWARFContext & Dwarf)835 void DwarfStreamer::emitLocationsForUnit(const CompileUnit &Unit,
836                                          DWARFContext &Dwarf) {
837   const auto &Attributes = Unit.getLocationAttributes();
838 
839   if (Attributes.empty())
840     return;
841 
842   MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLocSection());
843 
844   unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
845   const DWARFSection &InputSec = Dwarf.getLocSection();
846   DataExtractor Data(InputSec.Data, Dwarf.isLittleEndian(), AddressSize);
847   DWARFUnit &OrigUnit = Unit.getOrigUnit();
848   const auto *OrigUnitDie = OrigUnit.getUnitDIE(false);
849   int64_t UnitPcOffset = 0;
850   uint64_t OrigLowPc = OrigUnitDie->getAttributeValueAsAddress(
851       &OrigUnit, dwarf::DW_AT_low_pc, -1ULL);
852   if (OrigLowPc != -1ULL)
853     UnitPcOffset = int64_t(OrigLowPc) - Unit.getLowPc();
854 
855   for (const auto &Attr : Attributes) {
856     uint32_t Offset = Attr.first.get();
857     Attr.first.set(LocSectionSize);
858     // This is the quantity to add to the old location address to get
859     // the correct address for the new one.
860     int64_t LocPcOffset = Attr.second + UnitPcOffset;
861     while (Data.isValidOffset(Offset)) {
862       uint64_t Low = Data.getUnsigned(&Offset, AddressSize);
863       uint64_t High = Data.getUnsigned(&Offset, AddressSize);
864       LocSectionSize += 2 * AddressSize;
865       if (Low == 0 && High == 0) {
866         Asm->OutStreamer->EmitIntValue(0, AddressSize);
867         Asm->OutStreamer->EmitIntValue(0, AddressSize);
868         break;
869       }
870       Asm->OutStreamer->EmitIntValue(Low + LocPcOffset, AddressSize);
871       Asm->OutStreamer->EmitIntValue(High + LocPcOffset, AddressSize);
872       uint64_t Length = Data.getU16(&Offset);
873       Asm->OutStreamer->EmitIntValue(Length, 2);
874       // Just copy the bytes over.
875       Asm->OutStreamer->EmitBytes(
876           StringRef(InputSec.Data.substr(Offset, Length)));
877       Offset += Length;
878       LocSectionSize += Length + 2;
879     }
880   }
881 }
882 
emitLineTableForUnit(MCDwarfLineTableParams Params,StringRef PrologueBytes,unsigned MinInstLength,std::vector<DWARFDebugLine::Row> & Rows,unsigned PointerSize)883 void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
884                                          StringRef PrologueBytes,
885                                          unsigned MinInstLength,
886                                          std::vector<DWARFDebugLine::Row> &Rows,
887                                          unsigned PointerSize) {
888   // Switch to the section where the table will be emitted into.
889   MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLineSection());
890   MCSymbol *LineStartSym = MC->createTempSymbol();
891   MCSymbol *LineEndSym = MC->createTempSymbol();
892 
893   // The first 4 bytes is the total length of the information for this
894   // compilation unit (not including these 4 bytes for the length).
895   Asm->EmitLabelDifference(LineEndSym, LineStartSym, 4);
896   Asm->OutStreamer->EmitLabel(LineStartSym);
897   // Copy Prologue.
898   MS->EmitBytes(PrologueBytes);
899   LineSectionSize += PrologueBytes.size() + 4;
900 
901   SmallString<128> EncodingBuffer;
902   raw_svector_ostream EncodingOS(EncodingBuffer);
903 
904   if (Rows.empty()) {
905     // We only have the dummy entry, dsymutil emits an entry with a 0
906     // address in that case.
907     MCDwarfLineAddr::Encode(*MC, Params, INT64_MAX, 0, EncodingOS);
908     MS->EmitBytes(EncodingOS.str());
909     LineSectionSize += EncodingBuffer.size();
910     MS->EmitLabel(LineEndSym);
911     return;
912   }
913 
914   // Line table state machine fields
915   unsigned FileNum = 1;
916   unsigned LastLine = 1;
917   unsigned Column = 0;
918   unsigned IsStatement = 1;
919   unsigned Isa = 0;
920   uint64_t Address = -1ULL;
921 
922   unsigned RowsSinceLastSequence = 0;
923 
924   for (unsigned Idx = 0; Idx < Rows.size(); ++Idx) {
925     auto &Row = Rows[Idx];
926 
927     int64_t AddressDelta;
928     if (Address == -1ULL) {
929       MS->EmitIntValue(dwarf::DW_LNS_extended_op, 1);
930       MS->EmitULEB128IntValue(PointerSize + 1);
931       MS->EmitIntValue(dwarf::DW_LNE_set_address, 1);
932       MS->EmitIntValue(Row.Address, PointerSize);
933       LineSectionSize += 2 + PointerSize + getULEB128Size(PointerSize + 1);
934       AddressDelta = 0;
935     } else {
936       AddressDelta = (Row.Address - Address) / MinInstLength;
937     }
938 
939     // FIXME: code copied and transfromed from
940     // MCDwarf.cpp::EmitDwarfLineTable. We should find a way to share
941     // this code, but the current compatibility requirement with
942     // classic dsymutil makes it hard. Revisit that once this
943     // requirement is dropped.
944 
945     if (FileNum != Row.File) {
946       FileNum = Row.File;
947       MS->EmitIntValue(dwarf::DW_LNS_set_file, 1);
948       MS->EmitULEB128IntValue(FileNum);
949       LineSectionSize += 1 + getULEB128Size(FileNum);
950     }
951     if (Column != Row.Column) {
952       Column = Row.Column;
953       MS->EmitIntValue(dwarf::DW_LNS_set_column, 1);
954       MS->EmitULEB128IntValue(Column);
955       LineSectionSize += 1 + getULEB128Size(Column);
956     }
957 
958     // FIXME: We should handle the discriminator here, but dsymutil
959     // doesn' consider it, thus ignore it for now.
960 
961     if (Isa != Row.Isa) {
962       Isa = Row.Isa;
963       MS->EmitIntValue(dwarf::DW_LNS_set_isa, 1);
964       MS->EmitULEB128IntValue(Isa);
965       LineSectionSize += 1 + getULEB128Size(Isa);
966     }
967     if (IsStatement != Row.IsStmt) {
968       IsStatement = Row.IsStmt;
969       MS->EmitIntValue(dwarf::DW_LNS_negate_stmt, 1);
970       LineSectionSize += 1;
971     }
972     if (Row.BasicBlock) {
973       MS->EmitIntValue(dwarf::DW_LNS_set_basic_block, 1);
974       LineSectionSize += 1;
975     }
976 
977     if (Row.PrologueEnd) {
978       MS->EmitIntValue(dwarf::DW_LNS_set_prologue_end, 1);
979       LineSectionSize += 1;
980     }
981 
982     if (Row.EpilogueBegin) {
983       MS->EmitIntValue(dwarf::DW_LNS_set_epilogue_begin, 1);
984       LineSectionSize += 1;
985     }
986 
987     int64_t LineDelta = int64_t(Row.Line) - LastLine;
988     if (!Row.EndSequence) {
989       MCDwarfLineAddr::Encode(*MC, Params, LineDelta, AddressDelta, EncodingOS);
990       MS->EmitBytes(EncodingOS.str());
991       LineSectionSize += EncodingBuffer.size();
992       EncodingBuffer.resize(0);
993       Address = Row.Address;
994       LastLine = Row.Line;
995       RowsSinceLastSequence++;
996     } else {
997       if (LineDelta) {
998         MS->EmitIntValue(dwarf::DW_LNS_advance_line, 1);
999         MS->EmitSLEB128IntValue(LineDelta);
1000         LineSectionSize += 1 + getSLEB128Size(LineDelta);
1001       }
1002       if (AddressDelta) {
1003         MS->EmitIntValue(dwarf::DW_LNS_advance_pc, 1);
1004         MS->EmitULEB128IntValue(AddressDelta);
1005         LineSectionSize += 1 + getULEB128Size(AddressDelta);
1006       }
1007       MCDwarfLineAddr::Encode(*MC, Params, INT64_MAX, 0, EncodingOS);
1008       MS->EmitBytes(EncodingOS.str());
1009       LineSectionSize += EncodingBuffer.size();
1010       EncodingBuffer.resize(0);
1011       Address = -1ULL;
1012       LastLine = FileNum = IsStatement = 1;
1013       RowsSinceLastSequence = Column = Isa = 0;
1014     }
1015   }
1016 
1017   if (RowsSinceLastSequence) {
1018     MCDwarfLineAddr::Encode(*MC, Params, INT64_MAX, 0, EncodingOS);
1019     MS->EmitBytes(EncodingOS.str());
1020     LineSectionSize += EncodingBuffer.size();
1021     EncodingBuffer.resize(0);
1022   }
1023 
1024   MS->EmitLabel(LineEndSym);
1025 }
1026 
1027 /// \brief Emit the pubnames or pubtypes section contribution for \p
1028 /// Unit into \p Sec. The data is provided in \p Names.
emitPubSectionForUnit(MCSection * Sec,StringRef SecName,const CompileUnit & Unit,const std::vector<CompileUnit::AccelInfo> & Names)1029 void DwarfStreamer::emitPubSectionForUnit(
1030     MCSection *Sec, StringRef SecName, const CompileUnit &Unit,
1031     const std::vector<CompileUnit::AccelInfo> &Names) {
1032   if (Names.empty())
1033     return;
1034 
1035   // Start the dwarf pubnames section.
1036   Asm->OutStreamer->SwitchSection(Sec);
1037   MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + SecName + "_begin");
1038   MCSymbol *EndLabel = Asm->createTempSymbol("pub" + SecName + "_end");
1039 
1040   bool HeaderEmitted = false;
1041   // Emit the pubnames for this compilation unit.
1042   for (const auto &Name : Names) {
1043     if (Name.SkipPubSection)
1044       continue;
1045 
1046     if (!HeaderEmitted) {
1047       // Emit the header.
1048       Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); // Length
1049       Asm->OutStreamer->EmitLabel(BeginLabel);
1050       Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); // Version
1051       Asm->EmitInt32(Unit.getStartOffset());      // Unit offset
1052       Asm->EmitInt32(Unit.getNextUnitOffset() - Unit.getStartOffset()); // Size
1053       HeaderEmitted = true;
1054     }
1055     Asm->EmitInt32(Name.Die->getOffset());
1056     Asm->OutStreamer->EmitBytes(
1057         StringRef(Name.Name.data(), Name.Name.size() + 1));
1058   }
1059 
1060   if (!HeaderEmitted)
1061     return;
1062   Asm->EmitInt32(0); // End marker.
1063   Asm->OutStreamer->EmitLabel(EndLabel);
1064 }
1065 
1066 /// \brief Emit .debug_pubnames for \p Unit.
emitPubNamesForUnit(const CompileUnit & Unit)1067 void DwarfStreamer::emitPubNamesForUnit(const CompileUnit &Unit) {
1068   emitPubSectionForUnit(MC->getObjectFileInfo()->getDwarfPubNamesSection(),
1069                         "names", Unit, Unit.getPubnames());
1070 }
1071 
1072 /// \brief Emit .debug_pubtypes for \p Unit.
emitPubTypesForUnit(const CompileUnit & Unit)1073 void DwarfStreamer::emitPubTypesForUnit(const CompileUnit &Unit) {
1074   emitPubSectionForUnit(MC->getObjectFileInfo()->getDwarfPubTypesSection(),
1075                         "types", Unit, Unit.getPubtypes());
1076 }
1077 
1078 /// \brief Emit a CIE into the debug_frame section.
emitCIE(StringRef CIEBytes)1079 void DwarfStreamer::emitCIE(StringRef CIEBytes) {
1080   MS->SwitchSection(MC->getObjectFileInfo()->getDwarfFrameSection());
1081 
1082   MS->EmitBytes(CIEBytes);
1083   FrameSectionSize += CIEBytes.size();
1084 }
1085 
1086 /// \brief Emit a FDE into the debug_frame section. \p FDEBytes
1087 /// contains the FDE data without the length, CIE offset and address
1088 /// which will be replaced with the paramter values.
emitFDE(uint32_t CIEOffset,uint32_t AddrSize,uint32_t Address,StringRef FDEBytes)1089 void DwarfStreamer::emitFDE(uint32_t CIEOffset, uint32_t AddrSize,
1090                             uint32_t Address, StringRef FDEBytes) {
1091   MS->SwitchSection(MC->getObjectFileInfo()->getDwarfFrameSection());
1092 
1093   MS->EmitIntValue(FDEBytes.size() + 4 + AddrSize, 4);
1094   MS->EmitIntValue(CIEOffset, 4);
1095   MS->EmitIntValue(Address, AddrSize);
1096   MS->EmitBytes(FDEBytes);
1097   FrameSectionSize += FDEBytes.size() + 8 + AddrSize;
1098 }
1099 
1100 /// \brief The core of the Dwarf linking logic.
1101 ///
1102 /// The link of the dwarf information from the object files will be
1103 /// driven by the selection of 'root DIEs', which are DIEs that
1104 /// describe variables or functions that are present in the linked
1105 /// binary (and thus have entries in the debug map). All the debug
1106 /// information that will be linked (the DIEs, but also the line
1107 /// tables, ranges, ...) is derived from that set of root DIEs.
1108 ///
1109 /// The root DIEs are identified because they contain relocations that
1110 /// correspond to a debug map entry at specific places (the low_pc for
1111 /// a function, the location for a variable). These relocations are
1112 /// called ValidRelocs in the DwarfLinker and are gathered as a very
1113 /// first step when we start processing a DebugMapObject.
1114 class DwarfLinker {
1115 public:
DwarfLinker(StringRef OutputFilename,const LinkOptions & Options)1116   DwarfLinker(StringRef OutputFilename, const LinkOptions &Options)
1117       : OutputFilename(OutputFilename), Options(Options),
1118         BinHolder(Options.Verbose), LastCIEOffset(0) {}
1119 
1120   /// \brief Link the contents of the DebugMap.
1121   bool link(const DebugMap &);
1122 
1123   void reportWarning(const Twine &Warning, const DWARFUnit *Unit = nullptr,
1124                      const DWARFDebugInfoEntryMinimal *DIE = nullptr) const;
1125 
1126 private:
1127   /// \brief Called at the start of a debug object link.
1128   void startDebugObject(DWARFContext &, DebugMapObject &);
1129 
1130   /// \brief Called at the end of a debug object link.
1131   void endDebugObject();
1132 
1133   /// Keeps track of relocations.
1134   class RelocationManager {
1135     struct ValidReloc {
1136       uint32_t Offset;
1137       uint32_t Size;
1138       uint64_t Addend;
1139       const DebugMapObject::DebugMapEntry *Mapping;
1140 
ValidRelocllvm::dsymutil::__anondabe43b60111::DwarfLinker::RelocationManager::ValidReloc1141       ValidReloc(uint32_t Offset, uint32_t Size, uint64_t Addend,
1142                  const DebugMapObject::DebugMapEntry *Mapping)
1143           : Offset(Offset), Size(Size), Addend(Addend), Mapping(Mapping) {}
1144 
operator <llvm::dsymutil::__anondabe43b60111::DwarfLinker::RelocationManager::ValidReloc1145       bool operator<(const ValidReloc &RHS) const {
1146         return Offset < RHS.Offset;
1147       }
1148     };
1149 
1150     DwarfLinker &Linker;
1151 
1152     /// \brief The valid relocations for the current DebugMapObject.
1153     /// This vector is sorted by relocation offset.
1154     std::vector<ValidReloc> ValidRelocs;
1155 
1156     /// \brief Index into ValidRelocs of the next relocation to
1157     /// consider. As we walk the DIEs in acsending file offset and as
1158     /// ValidRelocs is sorted by file offset, keeping this index
1159     /// uptodate is all we have to do to have a cheap lookup during the
1160     /// root DIE selection and during DIE cloning.
1161     unsigned NextValidReloc;
1162 
1163   public:
RelocationManager(DwarfLinker & Linker)1164     RelocationManager(DwarfLinker &Linker)
1165         : Linker(Linker), NextValidReloc(0) {}
1166 
hasValidRelocs() const1167     bool hasValidRelocs() const { return !ValidRelocs.empty(); }
1168     /// \brief Reset the NextValidReloc counter.
resetValidRelocs()1169     void resetValidRelocs() { NextValidReloc = 0; }
1170 
1171     /// \defgroup FindValidRelocations Translate debug map into a list
1172     /// of relevant relocations
1173     ///
1174     /// @{
1175     bool findValidRelocsInDebugInfo(const object::ObjectFile &Obj,
1176                                     const DebugMapObject &DMO);
1177 
1178     bool findValidRelocs(const object::SectionRef &Section,
1179                          const object::ObjectFile &Obj,
1180                          const DebugMapObject &DMO);
1181 
1182     void findValidRelocsMachO(const object::SectionRef &Section,
1183                               const object::MachOObjectFile &Obj,
1184                               const DebugMapObject &DMO);
1185     /// @}
1186 
1187     bool hasValidRelocation(uint32_t StartOffset, uint32_t EndOffset,
1188                             CompileUnit::DIEInfo &Info);
1189 
1190     bool applyValidRelocs(MutableArrayRef<char> Data, uint32_t BaseOffset,
1191                           bool isLittleEndian);
1192   };
1193 
1194   /// \defgroup FindRootDIEs Find DIEs corresponding to debug map entries.
1195   ///
1196   /// @{
1197   /// \brief Recursively walk the \p DIE tree and look for DIEs to
1198   /// keep. Store that information in \p CU's DIEInfo.
1199   void lookForDIEsToKeep(RelocationManager &RelocMgr,
1200                          const DWARFDebugInfoEntryMinimal &DIE,
1201                          const DebugMapObject &DMO, CompileUnit &CU,
1202                          unsigned Flags);
1203 
1204   /// If this compile unit is really a skeleton CU that points to a
1205   /// clang module, register it in ClangModules and return true.
1206   ///
1207   /// A skeleton CU is a CU without children, a DW_AT_gnu_dwo_name
1208   /// pointing to the module, and a DW_AT_gnu_dwo_id with the module
1209   /// hash.
1210   bool registerModuleReference(const DWARFDebugInfoEntryMinimal &CUDie,
1211                                const DWARFUnit &Unit, DebugMap &ModuleMap,
1212                                unsigned Indent = 0);
1213 
1214   /// Recursively add the debug info in this clang module .pcm
1215   /// file (and all the modules imported by it in a bottom-up fashion)
1216   /// to Units.
1217   void loadClangModule(StringRef Filename, StringRef ModulePath,
1218                        StringRef ModuleName, uint64_t DwoId,
1219                        DebugMap &ModuleMap, unsigned Indent = 0);
1220 
1221   /// \brief Flags passed to DwarfLinker::lookForDIEsToKeep
1222   enum TravesalFlags {
1223     TF_Keep = 1 << 0,            ///< Mark the traversed DIEs as kept.
1224     TF_InFunctionScope = 1 << 1, ///< Current scope is a fucntion scope.
1225     TF_DependencyWalk = 1 << 2,  ///< Walking the dependencies of a kept DIE.
1226     TF_ParentWalk = 1 << 3,      ///< Walking up the parents of a kept DIE.
1227     TF_ODR = 1 << 4,             ///< Use the ODR whhile keeping dependants.
1228     TF_SkipPC = 1 << 5,          ///< Skip all location attributes.
1229   };
1230 
1231   /// \brief Mark the passed DIE as well as all the ones it depends on
1232   /// as kept.
1233   void keepDIEAndDependencies(RelocationManager &RelocMgr,
1234                                const DWARFDebugInfoEntryMinimal &DIE,
1235                                CompileUnit::DIEInfo &MyInfo,
1236                                const DebugMapObject &DMO, CompileUnit &CU,
1237                                bool UseODR);
1238 
1239   unsigned shouldKeepDIE(RelocationManager &RelocMgr,
1240                          const DWARFDebugInfoEntryMinimal &DIE,
1241                          CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo,
1242                          unsigned Flags);
1243 
1244   unsigned shouldKeepVariableDIE(RelocationManager &RelocMgr,
1245                                  const DWARFDebugInfoEntryMinimal &DIE,
1246                                  CompileUnit &Unit,
1247                                  CompileUnit::DIEInfo &MyInfo, unsigned Flags);
1248 
1249   unsigned shouldKeepSubprogramDIE(RelocationManager &RelocMgr,
1250                                    const DWARFDebugInfoEntryMinimal &DIE,
1251                                    CompileUnit &Unit,
1252                                    CompileUnit::DIEInfo &MyInfo,
1253                                    unsigned Flags);
1254 
1255   bool hasValidRelocation(uint32_t StartOffset, uint32_t EndOffset,
1256                           CompileUnit::DIEInfo &Info);
1257   /// @}
1258 
1259   /// \defgroup Linking Methods used to link the debug information
1260   ///
1261   /// @{
1262 
1263   class DIECloner {
1264     DwarfLinker &Linker;
1265     RelocationManager &RelocMgr;
1266     /// Allocator used for all the DIEValue objects.
1267     BumpPtrAllocator &DIEAlloc;
1268     MutableArrayRef<CompileUnit> CompileUnits;
1269     LinkOptions Options;
1270 
1271   public:
DIECloner(DwarfLinker & Linker,RelocationManager & RelocMgr,BumpPtrAllocator & DIEAlloc,MutableArrayRef<CompileUnit> CompileUnits,LinkOptions & Options)1272     DIECloner(DwarfLinker &Linker, RelocationManager &RelocMgr,
1273               BumpPtrAllocator &DIEAlloc,
1274               MutableArrayRef<CompileUnit> CompileUnits, LinkOptions &Options)
1275         : Linker(Linker), RelocMgr(RelocMgr), DIEAlloc(DIEAlloc),
1276           CompileUnits(CompileUnits), Options(Options) {}
1277 
1278     /// Recursively clone \p InputDIE into an tree of DIE objects
1279     /// where useless (as decided by lookForDIEsToKeep()) bits have been
1280     /// stripped out and addresses have been rewritten according to the
1281     /// debug map.
1282     ///
1283     /// \param OutOffset is the offset the cloned DIE in the output
1284     /// compile unit.
1285     /// \param PCOffset (while cloning a function scope) is the offset
1286     /// applied to the entry point of the function to get the linked address.
1287     ///
1288     /// \returns the root of the cloned tree or null if nothing was selected.
1289     DIE *cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE, CompileUnit &U,
1290                   int64_t PCOffset, uint32_t OutOffset, unsigned Flags);
1291 
1292     /// Construct the output DIE tree by cloning the DIEs we
1293     /// chose to keep above. If there are no valid relocs, then there's
1294     /// nothing to clone/emit.
1295     void cloneAllCompileUnits(DWARFContextInMemory &DwarfContext);
1296 
1297   private:
1298     typedef DWARFAbbreviationDeclaration::AttributeSpec AttributeSpec;
1299 
1300     /// Information gathered and exchanged between the various
1301     /// clone*Attributes helpers about the attributes of a particular DIE.
1302     struct AttributesInfo {
1303       const char *Name, *MangledName;         ///< Names.
1304       uint32_t NameOffset, MangledNameOffset; ///< Offsets in the string pool.
1305 
1306       uint64_t OrigLowPc;  ///< Value of AT_low_pc in the input DIE
1307       uint64_t OrigHighPc; ///< Value of AT_high_pc in the input DIE
1308       int64_t PCOffset; ///< Offset to apply to PC addresses inside a function.
1309 
1310       bool HasLowPc;      ///< Does the DIE have a low_pc attribute?
1311       bool IsDeclaration; ///< Is this DIE only a declaration?
1312 
AttributesInfollvm::dsymutil::__anondabe43b60111::DwarfLinker::DIECloner::AttributesInfo1313       AttributesInfo()
1314           : Name(nullptr), MangledName(nullptr), NameOffset(0),
1315             MangledNameOffset(0), OrigLowPc(UINT64_MAX), OrigHighPc(0),
1316             PCOffset(0), HasLowPc(false), IsDeclaration(false) {}
1317     };
1318 
1319     /// Helper for cloneDIE.
1320     unsigned cloneAttribute(DIE &Die,
1321                             const DWARFDebugInfoEntryMinimal &InputDIE,
1322                             CompileUnit &U, const DWARFFormValue &Val,
1323                             const AttributeSpec AttrSpec, unsigned AttrSize,
1324                             AttributesInfo &AttrInfo);
1325 
1326     /// Clone a string attribute described by \p AttrSpec and add
1327     /// it to \p Die.
1328     /// \returns the size of the new attribute.
1329     unsigned cloneStringAttribute(DIE &Die, AttributeSpec AttrSpec,
1330                                   const DWARFFormValue &Val,
1331                                   const DWARFUnit &U);
1332 
1333     /// Clone an attribute referencing another DIE and add
1334     /// it to \p Die.
1335     /// \returns the size of the new attribute.
1336     unsigned
1337     cloneDieReferenceAttribute(DIE &Die,
1338                                const DWARFDebugInfoEntryMinimal &InputDIE,
1339                                AttributeSpec AttrSpec, unsigned AttrSize,
1340                                const DWARFFormValue &Val, CompileUnit &Unit);
1341 
1342     /// Clone an attribute referencing another DIE and add
1343     /// it to \p Die.
1344     /// \returns the size of the new attribute.
1345     unsigned cloneBlockAttribute(DIE &Die, AttributeSpec AttrSpec,
1346                                  const DWARFFormValue &Val, unsigned AttrSize);
1347 
1348     /// Clone an attribute referencing another DIE and add
1349     /// it to \p Die.
1350     /// \returns the size of the new attribute.
1351     unsigned cloneAddressAttribute(DIE &Die, AttributeSpec AttrSpec,
1352                                    const DWARFFormValue &Val,
1353                                    const CompileUnit &Unit,
1354                                    AttributesInfo &Info);
1355 
1356     /// Clone a scalar attribute  and add it to \p Die.
1357     /// \returns the size of the new attribute.
1358     unsigned cloneScalarAttribute(DIE &Die,
1359                                   const DWARFDebugInfoEntryMinimal &InputDIE,
1360                                   CompileUnit &U, AttributeSpec AttrSpec,
1361                                   const DWARFFormValue &Val, unsigned AttrSize,
1362                                   AttributesInfo &Info);
1363 
1364     /// Get the potential name and mangled name for the entity
1365     /// described by \p Die and store them in \Info if they are not
1366     /// already there.
1367     /// \returns is a name was found.
1368     bool getDIENames(const DWARFDebugInfoEntryMinimal &Die, DWARFUnit &U,
1369                      AttributesInfo &Info);
1370 
1371     /// Create a copy of abbreviation Abbrev.
1372     void copyAbbrev(const DWARFAbbreviationDeclaration &Abbrev, bool hasODR);
1373   };
1374 
1375   /// \brief Assign an abbreviation number to \p Abbrev
1376   void AssignAbbrev(DIEAbbrev &Abbrev);
1377 
1378   /// \brief FoldingSet that uniques the abbreviations.
1379   FoldingSet<DIEAbbrev> AbbreviationsSet;
1380   /// \brief Storage for the unique Abbreviations.
1381   /// This is passed to AsmPrinter::emitDwarfAbbrevs(), thus it cannot
1382   /// be changed to a vecot of unique_ptrs.
1383   std::vector<std::unique_ptr<DIEAbbrev>> Abbreviations;
1384 
1385   /// \brief Compute and emit debug_ranges section for \p Unit, and
1386   /// patch the attributes referencing it.
1387   void patchRangesForUnit(const CompileUnit &Unit, DWARFContext &Dwarf) const;
1388 
1389   /// \brief Generate and emit the DW_AT_ranges attribute for a
1390   /// compile_unit if it had one.
1391   void generateUnitRanges(CompileUnit &Unit) const;
1392 
1393   /// \brief Extract the line tables fromt he original dwarf, extract
1394   /// the relevant parts according to the linked function ranges and
1395   /// emit the result in the debug_line section.
1396   void patchLineTableForUnit(CompileUnit &Unit, DWARFContext &OrigDwarf);
1397 
1398   /// \brief Emit the accelerator entries for \p Unit.
1399   void emitAcceleratorEntriesForUnit(CompileUnit &Unit);
1400 
1401   /// \brief Patch the frame info for an object file and emit it.
1402   void patchFrameInfoForObject(const DebugMapObject &, DWARFContext &,
1403                                unsigned AddressSize);
1404 
1405   /// \brief DIELoc objects that need to be destructed (but not freed!).
1406   std::vector<DIELoc *> DIELocs;
1407   /// \brief DIEBlock objects that need to be destructed (but not freed!).
1408   std::vector<DIEBlock *> DIEBlocks;
1409   /// \brief Allocator used for all the DIEValue objects.
1410   BumpPtrAllocator DIEAlloc;
1411   /// @}
1412 
1413   /// ODR Contexts for that link.
1414   DeclContextTree ODRContexts;
1415 
1416   /// \defgroup Helpers Various helper methods.
1417   ///
1418   /// @{
1419   bool createStreamer(const Triple &TheTriple, StringRef OutputFilename);
1420 
1421   /// \brief Attempt to load a debug object from disk.
1422   ErrorOr<const object::ObjectFile &> loadObject(BinaryHolder &BinaryHolder,
1423                                                  DebugMapObject &Obj,
1424                                                  const DebugMap &Map);
1425   /// @}
1426 
1427   std::string OutputFilename;
1428   LinkOptions Options;
1429   BinaryHolder BinHolder;
1430   std::unique_ptr<DwarfStreamer> Streamer;
1431   uint64_t OutputDebugInfoSize;
1432   unsigned UnitID; ///< A unique ID that identifies each compile unit.
1433 
1434   /// The units of the current debug map object.
1435   std::vector<CompileUnit> Units;
1436 
1437   /// The debug map object currently under consideration.
1438   DebugMapObject *CurrentDebugObject;
1439 
1440   /// \brief The Dwarf string pool
1441   NonRelocatableStringpool StringPool;
1442 
1443   /// \brief This map is keyed by the entry PC of functions in that
1444   /// debug object and the associated value is a pair storing the
1445   /// corresponding end PC and the offset to apply to get the linked
1446   /// address.
1447   ///
1448   /// See startDebugObject() for a more complete description of its use.
1449   std::map<uint64_t, std::pair<uint64_t, int64_t>> Ranges;
1450 
1451   /// \brief The CIEs that have been emitted in the output
1452   /// section. The actual CIE data serves a the key to this StringMap,
1453   /// this takes care of comparing the semantics of CIEs defined in
1454   /// different object files.
1455   StringMap<uint32_t> EmittedCIEs;
1456 
1457   /// Offset of the last CIE that has been emitted in the output
1458   /// debug_frame section.
1459   uint32_t LastCIEOffset;
1460 
1461   /// Mapping the PCM filename to the DwoId.
1462   StringMap<uint64_t> ClangModules;
1463 
1464   bool ModuleCacheHintDisplayed = false;
1465   bool ArchiveHintDisplayed = false;
1466 };
1467 
1468 /// Similar to DWARFUnitSection::getUnitForOffset(), but returning our
1469 /// CompileUnit object instead.
getUnitForOffset(MutableArrayRef<CompileUnit> Units,unsigned Offset)1470 static CompileUnit *getUnitForOffset(MutableArrayRef<CompileUnit> Units,
1471                                      unsigned Offset) {
1472   auto CU =
1473       std::upper_bound(Units.begin(), Units.end(), Offset,
1474                        [](uint32_t LHS, const CompileUnit &RHS) {
1475                          return LHS < RHS.getOrigUnit().getNextUnitOffset();
1476                        });
1477   return CU != Units.end() ? &*CU : nullptr;
1478 }
1479 
1480 /// Resolve the DIE attribute reference that has been
1481 /// extracted in \p RefValue. The resulting DIE migh be in another
1482 /// CompileUnit which is stored into \p ReferencedCU.
1483 /// \returns null if resolving fails for any reason.
resolveDIEReference(const DwarfLinker & Linker,MutableArrayRef<CompileUnit> Units,const DWARFFormValue & RefValue,const DWARFUnit & Unit,const DWARFDebugInfoEntryMinimal & DIE,CompileUnit * & RefCU)1484 static const DWARFDebugInfoEntryMinimal *resolveDIEReference(
1485     const DwarfLinker &Linker, MutableArrayRef<CompileUnit> Units,
1486     const DWARFFormValue &RefValue, const DWARFUnit &Unit,
1487     const DWARFDebugInfoEntryMinimal &DIE, CompileUnit *&RefCU) {
1488   assert(RefValue.isFormClass(DWARFFormValue::FC_Reference));
1489   uint64_t RefOffset = *RefValue.getAsReference(&Unit);
1490 
1491   if ((RefCU = getUnitForOffset(Units, RefOffset)))
1492     if (const auto *RefDie = RefCU->getOrigUnit().getDIEForOffset(RefOffset))
1493       return RefDie;
1494 
1495   Linker.reportWarning("could not find referenced DIE", &Unit, &DIE);
1496   return nullptr;
1497 }
1498 
1499 /// \returns whether the passed \a Attr type might contain a DIE
1500 /// reference suitable for ODR uniquing.
isODRAttribute(uint16_t Attr)1501 static bool isODRAttribute(uint16_t Attr) {
1502   switch (Attr) {
1503   default:
1504     return false;
1505   case dwarf::DW_AT_type:
1506   case dwarf::DW_AT_containing_type:
1507   case dwarf::DW_AT_specification:
1508   case dwarf::DW_AT_abstract_origin:
1509   case dwarf::DW_AT_import:
1510     return true;
1511   }
1512   llvm_unreachable("Improper attribute.");
1513 }
1514 
1515 /// Set the last DIE/CU a context was seen in and, possibly invalidate
1516 /// the context if it is ambiguous.
1517 ///
1518 /// In the current implementation, we don't handle overloaded
1519 /// functions well, because the argument types are not taken into
1520 /// account when computing the DeclContext tree.
1521 ///
1522 /// Some of this is mitigated byt using mangled names that do contain
1523 /// the arguments types, but sometimes (eg. with function templates)
1524 /// we don't have that. In that case, just do not unique anything that
1525 /// refers to the contexts we are not able to distinguish.
1526 ///
1527 /// If a context that is not a namespace appears twice in the same CU,
1528 /// we know it is ambiguous. Make it invalid.
setLastSeenDIE(CompileUnit & U,const DWARFDebugInfoEntryMinimal * Die)1529 bool DeclContext::setLastSeenDIE(CompileUnit &U,
1530                                  const DWARFDebugInfoEntryMinimal *Die) {
1531   if (LastSeenCompileUnitID == U.getUniqueID()) {
1532     DWARFUnit &OrigUnit = U.getOrigUnit();
1533     uint32_t FirstIdx = OrigUnit.getDIEIndex(LastSeenDIE);
1534     U.getInfo(FirstIdx).Ctxt = nullptr;
1535     return false;
1536   }
1537 
1538   LastSeenCompileUnitID = U.getUniqueID();
1539   LastSeenDIE = Die;
1540   return true;
1541 }
1542 
getChildDeclContext(DeclContext & Context,const DWARFDebugInfoEntryMinimal * DIE,CompileUnit & U,NonRelocatableStringpool & StringPool,bool InClangModule)1543 PointerIntPair<DeclContext *, 1> DeclContextTree::getChildDeclContext(
1544     DeclContext &Context, const DWARFDebugInfoEntryMinimal *DIE, CompileUnit &U,
1545     NonRelocatableStringpool &StringPool, bool InClangModule) {
1546   unsigned Tag = DIE->getTag();
1547 
1548   // FIXME: dsymutil-classic compat: We should bail out here if we
1549   // have a specification or an abstract_origin. We will get the
1550   // parent context wrong here.
1551 
1552   switch (Tag) {
1553   default:
1554     // By default stop gathering child contexts.
1555     return PointerIntPair<DeclContext *, 1>(nullptr);
1556   case dwarf::DW_TAG_module:
1557     break;
1558   case dwarf::DW_TAG_compile_unit:
1559     return PointerIntPair<DeclContext *, 1>(&Context);
1560   case dwarf::DW_TAG_subprogram:
1561     // Do not unique anything inside CU local functions.
1562     if ((Context.getTag() == dwarf::DW_TAG_namespace ||
1563          Context.getTag() == dwarf::DW_TAG_compile_unit) &&
1564         !DIE->getAttributeValueAsUnsignedConstant(&U.getOrigUnit(),
1565                                                   dwarf::DW_AT_external, 0))
1566       return PointerIntPair<DeclContext *, 1>(nullptr);
1567   // Fallthrough
1568   case dwarf::DW_TAG_member:
1569   case dwarf::DW_TAG_namespace:
1570   case dwarf::DW_TAG_structure_type:
1571   case dwarf::DW_TAG_class_type:
1572   case dwarf::DW_TAG_union_type:
1573   case dwarf::DW_TAG_enumeration_type:
1574   case dwarf::DW_TAG_typedef:
1575     // Artificial things might be ambiguous, because they might be
1576     // created on demand. For example implicitely defined constructors
1577     // are ambiguous because of the way we identify contexts, and they
1578     // won't be generated everytime everywhere.
1579     if (DIE->getAttributeValueAsUnsignedConstant(&U.getOrigUnit(),
1580                                                  dwarf::DW_AT_artificial, 0))
1581       return PointerIntPair<DeclContext *, 1>(nullptr);
1582     break;
1583   }
1584 
1585   const char *Name = DIE->getName(&U.getOrigUnit(), DINameKind::LinkageName);
1586   const char *ShortName = DIE->getName(&U.getOrigUnit(), DINameKind::ShortName);
1587   StringRef NameRef;
1588   StringRef ShortNameRef;
1589   StringRef FileRef;
1590 
1591   if (Name)
1592     NameRef = StringPool.internString(Name);
1593   else if (Tag == dwarf::DW_TAG_namespace)
1594     // FIXME: For dsymutil-classic compatibility. I think uniquing
1595     // within anonymous namespaces is wrong. There is no ODR guarantee
1596     // there.
1597     NameRef = StringPool.internString("(anonymous namespace)");
1598 
1599   if (ShortName && ShortName != Name)
1600     ShortNameRef = StringPool.internString(ShortName);
1601   else
1602     ShortNameRef = NameRef;
1603 
1604   if (Tag != dwarf::DW_TAG_class_type && Tag != dwarf::DW_TAG_structure_type &&
1605       Tag != dwarf::DW_TAG_union_type &&
1606       Tag != dwarf::DW_TAG_enumeration_type && NameRef.empty())
1607     return PointerIntPair<DeclContext *, 1>(nullptr);
1608 
1609   unsigned Line = 0;
1610   unsigned ByteSize = UINT32_MAX;
1611 
1612   if (!InClangModule) {
1613     // Gather some discriminating data about the DeclContext we will be
1614     // creating: File, line number and byte size. This shouldn't be
1615     // necessary, because the ODR is just about names, but given that we
1616     // do some approximations with overloaded functions and anonymous
1617     // namespaces, use these additional data points to make the process
1618     // safer.  This is disabled for clang modules, because forward
1619     // declarations of module-defined types do not have a file and line.
1620     ByteSize = DIE->getAttributeValueAsUnsignedConstant(
1621         &U.getOrigUnit(), dwarf::DW_AT_byte_size, UINT64_MAX);
1622     if (Tag != dwarf::DW_TAG_namespace || !Name) {
1623       if (unsigned FileNum = DIE->getAttributeValueAsUnsignedConstant(
1624               &U.getOrigUnit(), dwarf::DW_AT_decl_file, 0)) {
1625         if (const auto *LT = U.getOrigUnit().getContext().getLineTableForUnit(
1626                 &U.getOrigUnit())) {
1627           // FIXME: dsymutil-classic compatibility. I'd rather not
1628           // unique anything in anonymous namespaces, but if we do, then
1629           // verify that the file and line correspond.
1630           if (!Name && Tag == dwarf::DW_TAG_namespace)
1631             FileNum = 1;
1632 
1633           // FIXME: Passing U.getOrigUnit().getCompilationDir()
1634           // instead of "" would allow more uniquing, but for now, do
1635           // it this way to match dsymutil-classic.
1636           std::string File;
1637           if (LT->getFileNameByIndex(
1638                   FileNum, "",
1639                   DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
1640                   File)) {
1641             Line = DIE->getAttributeValueAsUnsignedConstant(
1642                 &U.getOrigUnit(), dwarf::DW_AT_decl_line, 0);
1643             // Cache the resolved paths, because calling realpath is expansive.
1644             StringRef ResolvedPath = U.getResolvedPath(FileNum);
1645             if (!ResolvedPath.empty()) {
1646               FileRef = ResolvedPath;
1647             } else {
1648 #ifdef HAVE_REALPATH
1649               char RealPath[PATH_MAX + 1];
1650               RealPath[PATH_MAX] = 0;
1651               if (::realpath(File.c_str(), RealPath))
1652                 File = RealPath;
1653 #endif
1654               FileRef = StringPool.internString(File);
1655               U.setResolvedPath(FileNum, FileRef);
1656             }
1657           }
1658         }
1659       }
1660     }
1661   }
1662 
1663   if (!Line && NameRef.empty())
1664     return PointerIntPair<DeclContext *, 1>(nullptr);
1665 
1666   // We hash NameRef, which is the mangled name, in order to get most
1667   // overloaded functions resolve correctly.
1668   //
1669   // Strictly speaking, hashing the Tag is only necessary for a
1670   // DW_TAG_module, to prevent uniquing of a module and a namespace
1671   // with the same name.
1672   //
1673   // FIXME: dsymutil-classic won't unique the same type presented
1674   // once as a struct and once as a class. Using the Tag in the fully
1675   // qualified name hash to get the same effect.
1676   unsigned Hash = hash_combine(Context.getQualifiedNameHash(), Tag, NameRef);
1677 
1678   // FIXME: dsymutil-classic compatibility: when we don't have a name,
1679   // use the filename.
1680   if (Tag == dwarf::DW_TAG_namespace && NameRef == "(anonymous namespace)")
1681     Hash = hash_combine(Hash, FileRef);
1682 
1683   // Now look if this context already exists.
1684   DeclContext Key(Hash, Line, ByteSize, Tag, NameRef, FileRef, Context);
1685   auto ContextIter = Contexts.find(&Key);
1686 
1687   if (ContextIter == Contexts.end()) {
1688     // The context wasn't found.
1689     bool Inserted;
1690     DeclContext *NewContext =
1691         new (Allocator) DeclContext(Hash, Line, ByteSize, Tag, NameRef, FileRef,
1692                                     Context, DIE, U.getUniqueID());
1693     std::tie(ContextIter, Inserted) = Contexts.insert(NewContext);
1694     assert(Inserted && "Failed to insert DeclContext");
1695     (void)Inserted;
1696   } else if (Tag != dwarf::DW_TAG_namespace &&
1697              !(*ContextIter)->setLastSeenDIE(U, DIE)) {
1698     // The context was found, but it is ambiguous with another context
1699     // in the same file. Mark it invalid.
1700     return PointerIntPair<DeclContext *, 1>(*ContextIter, /* Invalid= */ 1);
1701   }
1702 
1703   assert(ContextIter != Contexts.end());
1704   // FIXME: dsymutil-classic compatibility. Union types aren't
1705   // uniques, but their children might be.
1706   if ((Tag == dwarf::DW_TAG_subprogram &&
1707        Context.getTag() != dwarf::DW_TAG_structure_type &&
1708        Context.getTag() != dwarf::DW_TAG_class_type) ||
1709       (Tag == dwarf::DW_TAG_union_type))
1710     return PointerIntPair<DeclContext *, 1>(*ContextIter, /* Invalid= */ 1);
1711 
1712   return PointerIntPair<DeclContext *, 1>(*ContextIter);
1713 }
1714 
getDIENames(const DWARFDebugInfoEntryMinimal & Die,DWARFUnit & U,AttributesInfo & Info)1715 bool DwarfLinker::DIECloner::getDIENames(const DWARFDebugInfoEntryMinimal &Die,
1716                                          DWARFUnit &U, AttributesInfo &Info) {
1717   // FIXME: a bit wasteful as the first getName might return the
1718   // short name.
1719   if (!Info.MangledName &&
1720       (Info.MangledName = Die.getName(&U, DINameKind::LinkageName)))
1721     Info.MangledNameOffset =
1722         Linker.StringPool.getStringOffset(Info.MangledName);
1723 
1724   if (!Info.Name && (Info.Name = Die.getName(&U, DINameKind::ShortName)))
1725     Info.NameOffset = Linker.StringPool.getStringOffset(Info.Name);
1726 
1727   return Info.Name || Info.MangledName;
1728 }
1729 
1730 /// \brief Report a warning to the user, optionaly including
1731 /// information about a specific \p DIE related to the warning.
reportWarning(const Twine & Warning,const DWARFUnit * Unit,const DWARFDebugInfoEntryMinimal * DIE) const1732 void DwarfLinker::reportWarning(const Twine &Warning, const DWARFUnit *Unit,
1733                                 const DWARFDebugInfoEntryMinimal *DIE) const {
1734   StringRef Context = "<debug map>";
1735   if (CurrentDebugObject)
1736     Context = CurrentDebugObject->getObjectFilename();
1737   warn(Warning, Context);
1738 
1739   if (!Options.Verbose || !DIE)
1740     return;
1741 
1742   errs() << "    in DIE:\n";
1743   DIE->dump(errs(), const_cast<DWARFUnit *>(Unit), 0 /* RecurseDepth */,
1744             6 /* Indent */);
1745 }
1746 
createStreamer(const Triple & TheTriple,StringRef OutputFilename)1747 bool DwarfLinker::createStreamer(const Triple &TheTriple,
1748                                  StringRef OutputFilename) {
1749   if (Options.NoOutput)
1750     return true;
1751 
1752   Streamer = llvm::make_unique<DwarfStreamer>();
1753   return Streamer->init(TheTriple, OutputFilename);
1754 }
1755 
1756 /// Recursive helper to build the global DeclContext information and
1757 /// gather the child->parent relationships in the original compile unit.
1758 ///
1759 /// \return true when this DIE and all of its children are only
1760 /// forward declarations to types defined in external clang modules
1761 /// (i.e., forward declarations that are children of a DW_TAG_module).
analyzeContextInfo(const DWARFDebugInfoEntryMinimal * DIE,unsigned ParentIdx,CompileUnit & CU,DeclContext * CurrentDeclContext,NonRelocatableStringpool & StringPool,DeclContextTree & Contexts,bool InImportedModule=false)1762 static bool analyzeContextInfo(const DWARFDebugInfoEntryMinimal *DIE,
1763                                unsigned ParentIdx, CompileUnit &CU,
1764                                DeclContext *CurrentDeclContext,
1765                                NonRelocatableStringpool &StringPool,
1766                                DeclContextTree &Contexts,
1767                                bool InImportedModule = false) {
1768   unsigned MyIdx = CU.getOrigUnit().getDIEIndex(DIE);
1769   CompileUnit::DIEInfo &Info = CU.getInfo(MyIdx);
1770 
1771   // Clang imposes an ODR on modules(!) regardless of the language:
1772   //  "The module-id should consist of only a single identifier,
1773   //   which provides the name of the module being defined. Each
1774   //   module shall have a single definition."
1775   //
1776   // This does not extend to the types inside the modules:
1777   //  "[I]n C, this implies that if two structs are defined in
1778   //   different submodules with the same name, those two types are
1779   //   distinct types (but may be compatible types if their
1780   //   definitions match)."
1781   //
1782   // We treat non-C++ modules like namespaces for this reason.
1783   if (DIE->getTag() == dwarf::DW_TAG_module && ParentIdx == 0 &&
1784       DIE->getAttributeValueAsString(&CU.getOrigUnit(), dwarf::DW_AT_name,
1785                                      "") != CU.getClangModuleName()) {
1786     InImportedModule = true;
1787   }
1788 
1789   Info.ParentIdx = ParentIdx;
1790   bool InClangModule = CU.isClangModule() || InImportedModule;
1791   if (CU.hasODR() || InClangModule) {
1792     if (CurrentDeclContext) {
1793       auto PtrInvalidPair = Contexts.getChildDeclContext(
1794           *CurrentDeclContext, DIE, CU, StringPool, InClangModule);
1795       CurrentDeclContext = PtrInvalidPair.getPointer();
1796       Info.Ctxt =
1797           PtrInvalidPair.getInt() ? nullptr : PtrInvalidPair.getPointer();
1798     } else
1799       Info.Ctxt = CurrentDeclContext = nullptr;
1800   }
1801 
1802   Info.Prune = InImportedModule;
1803   if (DIE->hasChildren())
1804     for (auto *Child = DIE->getFirstChild(); Child && !Child->isNULL();
1805          Child = Child->getSibling())
1806       Info.Prune &= analyzeContextInfo(Child, MyIdx, CU, CurrentDeclContext,
1807                                        StringPool, Contexts, InImportedModule);
1808 
1809   // Prune this DIE if it is either a forward declaration inside a
1810   // DW_TAG_module or a DW_TAG_module that contains nothing but
1811   // forward declarations.
1812   Info.Prune &= (DIE->getTag() == dwarf::DW_TAG_module) ||
1813                 DIE->getAttributeValueAsUnsignedConstant(
1814                     &CU.getOrigUnit(), dwarf::DW_AT_declaration, 0);
1815 
1816   // Don't prune it if there is no definition for the DIE.
1817   Info.Prune &= Info.Ctxt && Info.Ctxt->getCanonicalDIEOffset();
1818 
1819   return Info.Prune;
1820 }
1821 
dieNeedsChildrenToBeMeaningful(uint32_t Tag)1822 static bool dieNeedsChildrenToBeMeaningful(uint32_t Tag) {
1823   switch (Tag) {
1824   default:
1825     return false;
1826   case dwarf::DW_TAG_subprogram:
1827   case dwarf::DW_TAG_lexical_block:
1828   case dwarf::DW_TAG_subroutine_type:
1829   case dwarf::DW_TAG_structure_type:
1830   case dwarf::DW_TAG_class_type:
1831   case dwarf::DW_TAG_union_type:
1832     return true;
1833   }
1834   llvm_unreachable("Invalid Tag");
1835 }
1836 
getRefAddrSize(const DWARFUnit & U)1837 static unsigned getRefAddrSize(const DWARFUnit &U) {
1838   if (U.getVersion() == 2)
1839     return U.getAddressByteSize();
1840   return 4;
1841 }
1842 
startDebugObject(DWARFContext & Dwarf,DebugMapObject & Obj)1843 void DwarfLinker::startDebugObject(DWARFContext &Dwarf, DebugMapObject &Obj) {
1844   Units.reserve(Dwarf.getNumCompileUnits());
1845   // Iterate over the debug map entries and put all the ones that are
1846   // functions (because they have a size) into the Ranges map. This
1847   // map is very similar to the FunctionRanges that are stored in each
1848   // unit, with 2 notable differences:
1849   //  - obviously this one is global, while the other ones are per-unit.
1850   //  - this one contains not only the functions described in the DIE
1851   // tree, but also the ones that are only in the debug map.
1852   // The latter information is required to reproduce dsymutil's logic
1853   // while linking line tables. The cases where this information
1854   // matters look like bugs that need to be investigated, but for now
1855   // we need to reproduce dsymutil's behavior.
1856   // FIXME: Once we understood exactly if that information is needed,
1857   // maybe totally remove this (or try to use it to do a real
1858   // -gline-tables-only on Darwin.
1859   for (const auto &Entry : Obj.symbols()) {
1860     const auto &Mapping = Entry.getValue();
1861     if (Mapping.Size && Mapping.ObjectAddress)
1862       Ranges[*Mapping.ObjectAddress] = std::make_pair(
1863           *Mapping.ObjectAddress + Mapping.Size,
1864           int64_t(Mapping.BinaryAddress) - *Mapping.ObjectAddress);
1865   }
1866 }
1867 
endDebugObject()1868 void DwarfLinker::endDebugObject() {
1869   Units.clear();
1870   Ranges.clear();
1871 
1872   for (auto I = DIEBlocks.begin(), E = DIEBlocks.end(); I != E; ++I)
1873     (*I)->~DIEBlock();
1874   for (auto I = DIELocs.begin(), E = DIELocs.end(); I != E; ++I)
1875     (*I)->~DIELoc();
1876 
1877   DIEBlocks.clear();
1878   DIELocs.clear();
1879   DIEAlloc.Reset();
1880 }
1881 
isMachOPairedReloc(uint64_t RelocType,uint64_t Arch)1882 static bool isMachOPairedReloc(uint64_t RelocType, uint64_t Arch) {
1883   switch (Arch) {
1884   case Triple::x86:
1885     return RelocType == MachO::GENERIC_RELOC_SECTDIFF ||
1886            RelocType == MachO::GENERIC_RELOC_LOCAL_SECTDIFF;
1887   case Triple::x86_64:
1888     return RelocType == MachO::X86_64_RELOC_SUBTRACTOR;
1889   case Triple::arm:
1890   case Triple::thumb:
1891     return RelocType == MachO::ARM_RELOC_SECTDIFF ||
1892            RelocType == MachO::ARM_RELOC_LOCAL_SECTDIFF ||
1893            RelocType == MachO::ARM_RELOC_HALF ||
1894            RelocType == MachO::ARM_RELOC_HALF_SECTDIFF;
1895   case Triple::aarch64:
1896     return RelocType == MachO::ARM64_RELOC_SUBTRACTOR;
1897   default:
1898     return false;
1899   }
1900 }
1901 
1902 /// \brief Iterate over the relocations of the given \p Section and
1903 /// store the ones that correspond to debug map entries into the
1904 /// ValidRelocs array.
1905 void DwarfLinker::RelocationManager::
findValidRelocsMachO(const object::SectionRef & Section,const object::MachOObjectFile & Obj,const DebugMapObject & DMO)1906 findValidRelocsMachO(const object::SectionRef &Section,
1907                      const object::MachOObjectFile &Obj,
1908                      const DebugMapObject &DMO) {
1909   StringRef Contents;
1910   Section.getContents(Contents);
1911   DataExtractor Data(Contents, Obj.isLittleEndian(), 0);
1912   bool SkipNext = false;
1913 
1914   for (const object::RelocationRef &Reloc : Section.relocations()) {
1915     if (SkipNext) {
1916       SkipNext = false;
1917       continue;
1918     }
1919 
1920     object::DataRefImpl RelocDataRef = Reloc.getRawDataRefImpl();
1921     MachO::any_relocation_info MachOReloc = Obj.getRelocation(RelocDataRef);
1922 
1923     if (isMachOPairedReloc(Obj.getAnyRelocationType(MachOReloc),
1924                            Obj.getArch())) {
1925       SkipNext = true;
1926       Linker.reportWarning(" unsupported relocation in debug_info section.");
1927       continue;
1928     }
1929 
1930     unsigned RelocSize = 1 << Obj.getAnyRelocationLength(MachOReloc);
1931     uint64_t Offset64 = Reloc.getOffset();
1932     if ((RelocSize != 4 && RelocSize != 8)) {
1933       Linker.reportWarning(" unsupported relocation in debug_info section.");
1934       continue;
1935     }
1936     uint32_t Offset = Offset64;
1937     // Mach-o uses REL relocations, the addend is at the relocation offset.
1938     uint64_t Addend = Data.getUnsigned(&Offset, RelocSize);
1939     uint64_t SymAddress;
1940     int64_t SymOffset;
1941 
1942     if (Obj.isRelocationScattered(MachOReloc)) {
1943       // The address of the base symbol for scattered relocations is
1944       // stored in the reloc itself. The actual addend will store the
1945       // base address plus the offset.
1946       SymAddress = Obj.getScatteredRelocationValue(MachOReloc);
1947       SymOffset = int64_t(Addend) - SymAddress;
1948     } else {
1949       SymAddress = Addend;
1950       SymOffset = 0;
1951     }
1952 
1953     auto Sym = Reloc.getSymbol();
1954     if (Sym != Obj.symbol_end()) {
1955       Expected<StringRef> SymbolName = Sym->getName();
1956       if (!SymbolName) {
1957         consumeError(SymbolName.takeError());
1958         Linker.reportWarning("error getting relocation symbol name.");
1959         continue;
1960       }
1961       if (const auto *Mapping = DMO.lookupSymbol(*SymbolName))
1962         ValidRelocs.emplace_back(Offset64, RelocSize, Addend, Mapping);
1963     } else if (const auto *Mapping = DMO.lookupObjectAddress(SymAddress)) {
1964       // Do not store the addend. The addend was the address of the
1965       // symbol in the object file, the address in the binary that is
1966       // stored in the debug map doesn't need to be offseted.
1967       ValidRelocs.emplace_back(Offset64, RelocSize, SymOffset, Mapping);
1968     }
1969   }
1970 }
1971 
1972 /// \brief Dispatch the valid relocation finding logic to the
1973 /// appropriate handler depending on the object file format.
findValidRelocs(const object::SectionRef & Section,const object::ObjectFile & Obj,const DebugMapObject & DMO)1974 bool DwarfLinker::RelocationManager::findValidRelocs(
1975     const object::SectionRef &Section, const object::ObjectFile &Obj,
1976     const DebugMapObject &DMO) {
1977   // Dispatch to the right handler depending on the file type.
1978   if (auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Obj))
1979     findValidRelocsMachO(Section, *MachOObj, DMO);
1980   else
1981     Linker.reportWarning(Twine("unsupported object file type: ") +
1982                          Obj.getFileName());
1983 
1984   if (ValidRelocs.empty())
1985     return false;
1986 
1987   // Sort the relocations by offset. We will walk the DIEs linearly in
1988   // the file, this allows us to just keep an index in the relocation
1989   // array that we advance during our walk, rather than resorting to
1990   // some associative container. See DwarfLinker::NextValidReloc.
1991   std::sort(ValidRelocs.begin(), ValidRelocs.end());
1992   return true;
1993 }
1994 
1995 /// \brief Look for relocations in the debug_info section that match
1996 /// entries in the debug map. These relocations will drive the Dwarf
1997 /// link by indicating which DIEs refer to symbols present in the
1998 /// linked binary.
1999 /// \returns wether there are any valid relocations in the debug info.
2000 bool DwarfLinker::RelocationManager::
findValidRelocsInDebugInfo(const object::ObjectFile & Obj,const DebugMapObject & DMO)2001 findValidRelocsInDebugInfo(const object::ObjectFile &Obj,
2002                            const DebugMapObject &DMO) {
2003   // Find the debug_info section.
2004   for (const object::SectionRef &Section : Obj.sections()) {
2005     StringRef SectionName;
2006     Section.getName(SectionName);
2007     SectionName = SectionName.substr(SectionName.find_first_not_of("._"));
2008     if (SectionName != "debug_info")
2009       continue;
2010     return findValidRelocs(Section, Obj, DMO);
2011   }
2012   return false;
2013 }
2014 
2015 /// \brief Checks that there is a relocation against an actual debug
2016 /// map entry between \p StartOffset and \p NextOffset.
2017 ///
2018 /// This function must be called with offsets in strictly ascending
2019 /// order because it never looks back at relocations it already 'went past'.
2020 /// \returns true and sets Info.InDebugMap if it is the case.
2021 bool DwarfLinker::RelocationManager::
hasValidRelocation(uint32_t StartOffset,uint32_t EndOffset,CompileUnit::DIEInfo & Info)2022 hasValidRelocation(uint32_t StartOffset, uint32_t EndOffset,
2023                    CompileUnit::DIEInfo &Info) {
2024   assert(NextValidReloc == 0 ||
2025          StartOffset > ValidRelocs[NextValidReloc - 1].Offset);
2026   if (NextValidReloc >= ValidRelocs.size())
2027     return false;
2028 
2029   uint64_t RelocOffset = ValidRelocs[NextValidReloc].Offset;
2030 
2031   // We might need to skip some relocs that we didn't consider. For
2032   // example the high_pc of a discarded DIE might contain a reloc that
2033   // is in the list because it actually corresponds to the start of a
2034   // function that is in the debug map.
2035   while (RelocOffset < StartOffset && NextValidReloc < ValidRelocs.size() - 1)
2036     RelocOffset = ValidRelocs[++NextValidReloc].Offset;
2037 
2038   if (RelocOffset < StartOffset || RelocOffset >= EndOffset)
2039     return false;
2040 
2041   const auto &ValidReloc = ValidRelocs[NextValidReloc++];
2042   const auto &Mapping = ValidReloc.Mapping->getValue();
2043   uint64_t ObjectAddress =
2044       Mapping.ObjectAddress ? uint64_t(*Mapping.ObjectAddress) : UINT64_MAX;
2045   if (Linker.Options.Verbose)
2046     outs() << "Found valid debug map entry: " << ValidReloc.Mapping->getKey()
2047            << " " << format("\t%016" PRIx64 " => %016" PRIx64, ObjectAddress,
2048                             uint64_t(Mapping.BinaryAddress));
2049 
2050   Info.AddrAdjust = int64_t(Mapping.BinaryAddress) + ValidReloc.Addend;
2051   if (Mapping.ObjectAddress)
2052     Info.AddrAdjust -= ObjectAddress;
2053   Info.InDebugMap = true;
2054   return true;
2055 }
2056 
2057 /// \brief Get the starting and ending (exclusive) offset for the
2058 /// attribute with index \p Idx descibed by \p Abbrev. \p Offset is
2059 /// supposed to point to the position of the first attribute described
2060 /// by \p Abbrev.
2061 /// \return [StartOffset, EndOffset) as a pair.
2062 static std::pair<uint32_t, uint32_t>
getAttributeOffsets(const DWARFAbbreviationDeclaration * Abbrev,unsigned Idx,unsigned Offset,const DWARFUnit & Unit)2063 getAttributeOffsets(const DWARFAbbreviationDeclaration *Abbrev, unsigned Idx,
2064                     unsigned Offset, const DWARFUnit &Unit) {
2065   DataExtractor Data = Unit.getDebugInfoExtractor();
2066 
2067   for (unsigned i = 0; i < Idx; ++i)
2068     DWARFFormValue::skipValue(Abbrev->getFormByIndex(i), Data, &Offset, &Unit);
2069 
2070   uint32_t End = Offset;
2071   DWARFFormValue::skipValue(Abbrev->getFormByIndex(Idx), Data, &End, &Unit);
2072 
2073   return std::make_pair(Offset, End);
2074 }
2075 
2076 /// \brief Check if a variable describing DIE should be kept.
2077 /// \returns updated TraversalFlags.
shouldKeepVariableDIE(RelocationManager & RelocMgr,const DWARFDebugInfoEntryMinimal & DIE,CompileUnit & Unit,CompileUnit::DIEInfo & MyInfo,unsigned Flags)2078 unsigned DwarfLinker::shouldKeepVariableDIE(RelocationManager &RelocMgr,
2079                                             const DWARFDebugInfoEntryMinimal &DIE,
2080                                             CompileUnit &Unit,
2081                                             CompileUnit::DIEInfo &MyInfo,
2082                                             unsigned Flags) {
2083   const auto *Abbrev = DIE.getAbbreviationDeclarationPtr();
2084 
2085   // Global variables with constant value can always be kept.
2086   if (!(Flags & TF_InFunctionScope) &&
2087       Abbrev->findAttributeIndex(dwarf::DW_AT_const_value) != -1U) {
2088     MyInfo.InDebugMap = true;
2089     return Flags | TF_Keep;
2090   }
2091 
2092   uint32_t LocationIdx = Abbrev->findAttributeIndex(dwarf::DW_AT_location);
2093   if (LocationIdx == -1U)
2094     return Flags;
2095 
2096   uint32_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode());
2097   const DWARFUnit &OrigUnit = Unit.getOrigUnit();
2098   uint32_t LocationOffset, LocationEndOffset;
2099   std::tie(LocationOffset, LocationEndOffset) =
2100       getAttributeOffsets(Abbrev, LocationIdx, Offset, OrigUnit);
2101 
2102   // See if there is a relocation to a valid debug map entry inside
2103   // this variable's location. The order is important here. We want to
2104   // always check in the variable has a valid relocation, so that the
2105   // DIEInfo is filled. However, we don't want a static variable in a
2106   // function to force us to keep the enclosing function.
2107   if (!RelocMgr.hasValidRelocation(LocationOffset, LocationEndOffset, MyInfo) ||
2108       (Flags & TF_InFunctionScope))
2109     return Flags;
2110 
2111   if (Options.Verbose)
2112     DIE.dump(outs(), const_cast<DWARFUnit *>(&OrigUnit), 0, 8 /* Indent */);
2113 
2114   return Flags | TF_Keep;
2115 }
2116 
2117 /// \brief Check if a function describing DIE should be kept.
2118 /// \returns updated TraversalFlags.
shouldKeepSubprogramDIE(RelocationManager & RelocMgr,const DWARFDebugInfoEntryMinimal & DIE,CompileUnit & Unit,CompileUnit::DIEInfo & MyInfo,unsigned Flags)2119 unsigned DwarfLinker::shouldKeepSubprogramDIE(
2120     RelocationManager &RelocMgr,
2121     const DWARFDebugInfoEntryMinimal &DIE, CompileUnit &Unit,
2122     CompileUnit::DIEInfo &MyInfo, unsigned Flags) {
2123   const auto *Abbrev = DIE.getAbbreviationDeclarationPtr();
2124 
2125   Flags |= TF_InFunctionScope;
2126 
2127   uint32_t LowPcIdx = Abbrev->findAttributeIndex(dwarf::DW_AT_low_pc);
2128   if (LowPcIdx == -1U)
2129     return Flags;
2130 
2131   uint32_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode());
2132   const DWARFUnit &OrigUnit = Unit.getOrigUnit();
2133   uint32_t LowPcOffset, LowPcEndOffset;
2134   std::tie(LowPcOffset, LowPcEndOffset) =
2135       getAttributeOffsets(Abbrev, LowPcIdx, Offset, OrigUnit);
2136 
2137   uint64_t LowPc =
2138       DIE.getAttributeValueAsAddress(&OrigUnit, dwarf::DW_AT_low_pc, -1ULL);
2139   assert(LowPc != -1ULL && "low_pc attribute is not an address.");
2140   if (LowPc == -1ULL ||
2141       !RelocMgr.hasValidRelocation(LowPcOffset, LowPcEndOffset, MyInfo))
2142     return Flags;
2143 
2144   if (Options.Verbose)
2145     DIE.dump(outs(), const_cast<DWARFUnit *>(&OrigUnit), 0, 8 /* Indent */);
2146 
2147   Flags |= TF_Keep;
2148 
2149   DWARFFormValue HighPcValue;
2150   if (!DIE.getAttributeValue(&OrigUnit, dwarf::DW_AT_high_pc, HighPcValue)) {
2151     reportWarning("Function without high_pc. Range will be discarded.\n",
2152                   &OrigUnit, &DIE);
2153     return Flags;
2154   }
2155 
2156   uint64_t HighPc;
2157   if (HighPcValue.isFormClass(DWARFFormValue::FC_Address)) {
2158     HighPc = *HighPcValue.getAsAddress(&OrigUnit);
2159   } else {
2160     assert(HighPcValue.isFormClass(DWARFFormValue::FC_Constant));
2161     HighPc = LowPc + *HighPcValue.getAsUnsignedConstant();
2162   }
2163 
2164   // Replace the debug map range with a more accurate one.
2165   Ranges[LowPc] = std::make_pair(HighPc, MyInfo.AddrAdjust);
2166   Unit.addFunctionRange(LowPc, HighPc, MyInfo.AddrAdjust);
2167   return Flags;
2168 }
2169 
2170 /// \brief Check if a DIE should be kept.
2171 /// \returns updated TraversalFlags.
shouldKeepDIE(RelocationManager & RelocMgr,const DWARFDebugInfoEntryMinimal & DIE,CompileUnit & Unit,CompileUnit::DIEInfo & MyInfo,unsigned Flags)2172 unsigned DwarfLinker::shouldKeepDIE(RelocationManager &RelocMgr,
2173                                     const DWARFDebugInfoEntryMinimal &DIE,
2174                                     CompileUnit &Unit,
2175                                     CompileUnit::DIEInfo &MyInfo,
2176                                     unsigned Flags) {
2177   switch (DIE.getTag()) {
2178   case dwarf::DW_TAG_constant:
2179   case dwarf::DW_TAG_variable:
2180     return shouldKeepVariableDIE(RelocMgr, DIE, Unit, MyInfo, Flags);
2181   case dwarf::DW_TAG_subprogram:
2182     return shouldKeepSubprogramDIE(RelocMgr, DIE, Unit, MyInfo, Flags);
2183   case dwarf::DW_TAG_module:
2184   case dwarf::DW_TAG_imported_module:
2185   case dwarf::DW_TAG_imported_declaration:
2186   case dwarf::DW_TAG_imported_unit:
2187     // We always want to keep these.
2188     return Flags | TF_Keep;
2189   }
2190 
2191   return Flags;
2192 }
2193 
2194 /// \brief Mark the passed DIE as well as all the ones it depends on
2195 /// as kept.
2196 ///
2197 /// This function is called by lookForDIEsToKeep on DIEs that are
2198 /// newly discovered to be needed in the link. It recursively calls
2199 /// back to lookForDIEsToKeep while adding TF_DependencyWalk to the
2200 /// TraversalFlags to inform it that it's not doing the primary DIE
2201 /// tree walk.
keepDIEAndDependencies(RelocationManager & RelocMgr,const DWARFDebugInfoEntryMinimal & Die,CompileUnit::DIEInfo & MyInfo,const DebugMapObject & DMO,CompileUnit & CU,bool UseODR)2202 void DwarfLinker::keepDIEAndDependencies(RelocationManager &RelocMgr,
2203                                           const DWARFDebugInfoEntryMinimal &Die,
2204                                           CompileUnit::DIEInfo &MyInfo,
2205                                           const DebugMapObject &DMO,
2206                                           CompileUnit &CU, bool UseODR) {
2207   const DWARFUnit &Unit = CU.getOrigUnit();
2208   MyInfo.Keep = true;
2209 
2210   // First mark all the parent chain as kept.
2211   unsigned AncestorIdx = MyInfo.ParentIdx;
2212   while (!CU.getInfo(AncestorIdx).Keep) {
2213     unsigned ODRFlag = UseODR ? TF_ODR : 0;
2214     lookForDIEsToKeep(RelocMgr, *Unit.getDIEAtIndex(AncestorIdx), DMO, CU,
2215                       TF_ParentWalk | TF_Keep | TF_DependencyWalk | ODRFlag);
2216     AncestorIdx = CU.getInfo(AncestorIdx).ParentIdx;
2217   }
2218 
2219   // Then we need to mark all the DIEs referenced by this DIE's
2220   // attributes as kept.
2221   DataExtractor Data = Unit.getDebugInfoExtractor();
2222   const auto *Abbrev = Die.getAbbreviationDeclarationPtr();
2223   uint32_t Offset = Die.getOffset() + getULEB128Size(Abbrev->getCode());
2224 
2225   // Mark all DIEs referenced through atttributes as kept.
2226   for (const auto &AttrSpec : Abbrev->attributes()) {
2227     DWARFFormValue Val(AttrSpec.Form);
2228 
2229     if (!Val.isFormClass(DWARFFormValue::FC_Reference)) {
2230       DWARFFormValue::skipValue(AttrSpec.Form, Data, &Offset, &Unit);
2231       continue;
2232     }
2233 
2234     Val.extractValue(Data, &Offset, &Unit);
2235     CompileUnit *ReferencedCU;
2236     if (const auto *RefDIE =
2237             resolveDIEReference(*this, MutableArrayRef<CompileUnit>(Units), Val,
2238                                 Unit, Die, ReferencedCU)) {
2239       uint32_t RefIdx = ReferencedCU->getOrigUnit().getDIEIndex(RefDIE);
2240       CompileUnit::DIEInfo &Info = ReferencedCU->getInfo(RefIdx);
2241       // If the referenced DIE has a DeclContext that has already been
2242       // emitted, then do not keep the one in this CU. We'll link to
2243       // the canonical DIE in cloneDieReferenceAttribute.
2244       // FIXME: compatibility with dsymutil-classic. UseODR shouldn't
2245       // be necessary and could be advantageously replaced by
2246       // ReferencedCU->hasODR() && CU.hasODR().
2247       // FIXME: compatibility with dsymutil-classic. There is no
2248       // reason not to unique ref_addr references.
2249       if (AttrSpec.Form != dwarf::DW_FORM_ref_addr && UseODR && Info.Ctxt &&
2250           Info.Ctxt != ReferencedCU->getInfo(Info.ParentIdx).Ctxt &&
2251           Info.Ctxt->getCanonicalDIEOffset() && isODRAttribute(AttrSpec.Attr))
2252         continue;
2253 
2254       // Keep a module forward declaration if there is no definition.
2255       if (!(isODRAttribute(AttrSpec.Attr) && Info.Ctxt &&
2256             Info.Ctxt->getCanonicalDIEOffset()))
2257         Info.Prune = false;
2258 
2259       unsigned ODRFlag = UseODR ? TF_ODR : 0;
2260       lookForDIEsToKeep(RelocMgr, *RefDIE, DMO, *ReferencedCU,
2261                         TF_Keep | TF_DependencyWalk | ODRFlag);
2262     }
2263   }
2264 }
2265 
2266 /// \brief Recursively walk the \p DIE tree and look for DIEs to
2267 /// keep. Store that information in \p CU's DIEInfo.
2268 ///
2269 /// This function is the entry point of the DIE selection
2270 /// algorithm. It is expected to walk the DIE tree in file order and
2271 /// (though the mediation of its helper) call hasValidRelocation() on
2272 /// each DIE that might be a 'root DIE' (See DwarfLinker class
2273 /// comment).
2274 /// While walking the dependencies of root DIEs, this function is
2275 /// also called, but during these dependency walks the file order is
2276 /// not respected. The TF_DependencyWalk flag tells us which kind of
2277 /// traversal we are currently doing.
lookForDIEsToKeep(RelocationManager & RelocMgr,const DWARFDebugInfoEntryMinimal & Die,const DebugMapObject & DMO,CompileUnit & CU,unsigned Flags)2278 void DwarfLinker::lookForDIEsToKeep(RelocationManager &RelocMgr,
2279                                     const DWARFDebugInfoEntryMinimal &Die,
2280                                     const DebugMapObject &DMO, CompileUnit &CU,
2281                                     unsigned Flags) {
2282   unsigned Idx = CU.getOrigUnit().getDIEIndex(&Die);
2283   CompileUnit::DIEInfo &MyInfo = CU.getInfo(Idx);
2284   bool AlreadyKept = MyInfo.Keep;
2285   if (MyInfo.Prune)
2286     return;
2287 
2288   // If the Keep flag is set, we are marking a required DIE's
2289   // dependencies. If our target is already marked as kept, we're all
2290   // set.
2291   if ((Flags & TF_DependencyWalk) && AlreadyKept)
2292     return;
2293 
2294   // We must not call shouldKeepDIE while called from keepDIEAndDependencies,
2295   // because it would screw up the relocation finding logic.
2296   if (!(Flags & TF_DependencyWalk))
2297     Flags = shouldKeepDIE(RelocMgr, Die, CU, MyInfo, Flags);
2298 
2299   // If it is a newly kept DIE mark it as well as all its dependencies as kept.
2300   if (!AlreadyKept && (Flags & TF_Keep)) {
2301     bool UseOdr = (Flags & TF_DependencyWalk) ? (Flags & TF_ODR) : CU.hasODR();
2302     keepDIEAndDependencies(RelocMgr, Die, MyInfo, DMO, CU, UseOdr);
2303   }
2304   // The TF_ParentWalk flag tells us that we are currently walking up
2305   // the parent chain of a required DIE, and we don't want to mark all
2306   // the children of the parents as kept (consider for example a
2307   // DW_TAG_namespace node in the parent chain). There are however a
2308   // set of DIE types for which we want to ignore that directive and still
2309   // walk their children.
2310   if (dieNeedsChildrenToBeMeaningful(Die.getTag()))
2311     Flags &= ~TF_ParentWalk;
2312 
2313   if (!Die.hasChildren() || (Flags & TF_ParentWalk))
2314     return;
2315 
2316   for (auto *Child = Die.getFirstChild(); Child && !Child->isNULL();
2317        Child = Child->getSibling())
2318     lookForDIEsToKeep(RelocMgr, *Child, DMO, CU, Flags);
2319 }
2320 
2321 /// \brief Assign an abbreviation numer to \p Abbrev.
2322 ///
2323 /// Our DIEs get freed after every DebugMapObject has been processed,
2324 /// thus the FoldingSet we use to unique DIEAbbrevs cannot refer to
2325 /// the instances hold by the DIEs. When we encounter an abbreviation
2326 /// that we don't know, we create a permanent copy of it.
AssignAbbrev(DIEAbbrev & Abbrev)2327 void DwarfLinker::AssignAbbrev(DIEAbbrev &Abbrev) {
2328   // Check the set for priors.
2329   FoldingSetNodeID ID;
2330   Abbrev.Profile(ID);
2331   void *InsertToken;
2332   DIEAbbrev *InSet = AbbreviationsSet.FindNodeOrInsertPos(ID, InsertToken);
2333 
2334   // If it's newly added.
2335   if (InSet) {
2336     // Assign existing abbreviation number.
2337     Abbrev.setNumber(InSet->getNumber());
2338   } else {
2339     // Add to abbreviation list.
2340     Abbreviations.push_back(
2341         llvm::make_unique<DIEAbbrev>(Abbrev.getTag(), Abbrev.hasChildren()));
2342     for (const auto &Attr : Abbrev.getData())
2343       Abbreviations.back()->AddAttribute(Attr.getAttribute(), Attr.getForm());
2344     AbbreviationsSet.InsertNode(Abbreviations.back().get(), InsertToken);
2345     // Assign the unique abbreviation number.
2346     Abbrev.setNumber(Abbreviations.size());
2347     Abbreviations.back()->setNumber(Abbreviations.size());
2348   }
2349 }
2350 
cloneStringAttribute(DIE & Die,AttributeSpec AttrSpec,const DWARFFormValue & Val,const DWARFUnit & U)2351 unsigned DwarfLinker::DIECloner::cloneStringAttribute(DIE &Die,
2352                                                       AttributeSpec AttrSpec,
2353                                                       const DWARFFormValue &Val,
2354                                                       const DWARFUnit &U) {
2355   // Switch everything to out of line strings.
2356   const char *String = *Val.getAsCString(&U);
2357   unsigned Offset = Linker.StringPool.getStringOffset(String);
2358   Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr), dwarf::DW_FORM_strp,
2359                DIEInteger(Offset));
2360   return 4;
2361 }
2362 
cloneDieReferenceAttribute(DIE & Die,const DWARFDebugInfoEntryMinimal & InputDIE,AttributeSpec AttrSpec,unsigned AttrSize,const DWARFFormValue & Val,CompileUnit & Unit)2363 unsigned DwarfLinker::DIECloner::cloneDieReferenceAttribute(
2364     DIE &Die, const DWARFDebugInfoEntryMinimal &InputDIE,
2365     AttributeSpec AttrSpec, unsigned AttrSize, const DWARFFormValue &Val,
2366     CompileUnit &Unit) {
2367   const DWARFUnit &U = Unit.getOrigUnit();
2368   uint32_t Ref = *Val.getAsReference(&U);
2369   DIE *NewRefDie = nullptr;
2370   CompileUnit *RefUnit = nullptr;
2371   DeclContext *Ctxt = nullptr;
2372 
2373   const DWARFDebugInfoEntryMinimal *RefDie =
2374       resolveDIEReference(Linker, CompileUnits, Val, U, InputDIE, RefUnit);
2375 
2376   // If the referenced DIE is not found,  drop the attribute.
2377   if (!RefDie)
2378     return 0;
2379 
2380   unsigned Idx = RefUnit->getOrigUnit().getDIEIndex(RefDie);
2381   CompileUnit::DIEInfo &RefInfo = RefUnit->getInfo(Idx);
2382 
2383   // If we already have emitted an equivalent DeclContext, just point
2384   // at it.
2385   if (isODRAttribute(AttrSpec.Attr)) {
2386     Ctxt = RefInfo.Ctxt;
2387     if (Ctxt && Ctxt->getCanonicalDIEOffset()) {
2388       DIEInteger Attr(Ctxt->getCanonicalDIEOffset());
2389       Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
2390                    dwarf::DW_FORM_ref_addr, Attr);
2391       return getRefAddrSize(U);
2392     }
2393   }
2394 
2395   if (!RefInfo.Clone) {
2396     assert(Ref > InputDIE.getOffset());
2397     // We haven't cloned this DIE yet. Just create an empty one and
2398     // store it. It'll get really cloned when we process it.
2399     RefInfo.Clone = DIE::get(DIEAlloc, dwarf::Tag(RefDie->getTag()));
2400   }
2401   NewRefDie = RefInfo.Clone;
2402 
2403   if (AttrSpec.Form == dwarf::DW_FORM_ref_addr ||
2404       (Unit.hasODR() && isODRAttribute(AttrSpec.Attr))) {
2405     // We cannot currently rely on a DIEEntry to emit ref_addr
2406     // references, because the implementation calls back to DwarfDebug
2407     // to find the unit offset. (We don't have a DwarfDebug)
2408     // FIXME: we should be able to design DIEEntry reliance on
2409     // DwarfDebug away.
2410     uint64_t Attr;
2411     if (Ref < InputDIE.getOffset()) {
2412       // We must have already cloned that DIE.
2413       uint32_t NewRefOffset =
2414           RefUnit->getStartOffset() + NewRefDie->getOffset();
2415       Attr = NewRefOffset;
2416       Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
2417                    dwarf::DW_FORM_ref_addr, DIEInteger(Attr));
2418     } else {
2419       // A forward reference. Note and fixup later.
2420       Attr = 0xBADDEF;
2421       Unit.noteForwardReference(
2422           NewRefDie, RefUnit, Ctxt,
2423           Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
2424                        dwarf::DW_FORM_ref_addr, DIEInteger(Attr)));
2425     }
2426     return getRefAddrSize(U);
2427   }
2428 
2429   Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
2430                dwarf::Form(AttrSpec.Form), DIEEntry(*NewRefDie));
2431   return AttrSize;
2432 }
2433 
cloneBlockAttribute(DIE & Die,AttributeSpec AttrSpec,const DWARFFormValue & Val,unsigned AttrSize)2434 unsigned DwarfLinker::DIECloner::cloneBlockAttribute(DIE &Die,
2435                                                      AttributeSpec AttrSpec,
2436                                                      const DWARFFormValue &Val,
2437                                                      unsigned AttrSize) {
2438   DIEValueList *Attr;
2439   DIEValue Value;
2440   DIELoc *Loc = nullptr;
2441   DIEBlock *Block = nullptr;
2442   // Just copy the block data over.
2443   if (AttrSpec.Form == dwarf::DW_FORM_exprloc) {
2444     Loc = new (DIEAlloc) DIELoc;
2445     Linker.DIELocs.push_back(Loc);
2446   } else {
2447     Block = new (DIEAlloc) DIEBlock;
2448     Linker.DIEBlocks.push_back(Block);
2449   }
2450   Attr = Loc ? static_cast<DIEValueList *>(Loc)
2451              : static_cast<DIEValueList *>(Block);
2452 
2453   if (Loc)
2454     Value = DIEValue(dwarf::Attribute(AttrSpec.Attr),
2455                      dwarf::Form(AttrSpec.Form), Loc);
2456   else
2457     Value = DIEValue(dwarf::Attribute(AttrSpec.Attr),
2458                      dwarf::Form(AttrSpec.Form), Block);
2459   ArrayRef<uint8_t> Bytes = *Val.getAsBlock();
2460   for (auto Byte : Bytes)
2461     Attr->addValue(DIEAlloc, static_cast<dwarf::Attribute>(0),
2462                    dwarf::DW_FORM_data1, DIEInteger(Byte));
2463   // FIXME: If DIEBlock and DIELoc just reuses the Size field of
2464   // the DIE class, this if could be replaced by
2465   // Attr->setSize(Bytes.size()).
2466   if (Linker.Streamer) {
2467     auto *AsmPrinter = &Linker.Streamer->getAsmPrinter();
2468     if (Loc)
2469       Loc->ComputeSize(AsmPrinter);
2470     else
2471       Block->ComputeSize(AsmPrinter);
2472   }
2473   Die.addValue(DIEAlloc, Value);
2474   return AttrSize;
2475 }
2476 
cloneAddressAttribute(DIE & Die,AttributeSpec AttrSpec,const DWARFFormValue & Val,const CompileUnit & Unit,AttributesInfo & Info)2477 unsigned DwarfLinker::DIECloner::cloneAddressAttribute(
2478     DIE &Die, AttributeSpec AttrSpec, const DWARFFormValue &Val,
2479     const CompileUnit &Unit, AttributesInfo &Info) {
2480   uint64_t Addr = *Val.getAsAddress(&Unit.getOrigUnit());
2481   if (AttrSpec.Attr == dwarf::DW_AT_low_pc) {
2482     if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine ||
2483         Die.getTag() == dwarf::DW_TAG_lexical_block)
2484       // The low_pc of a block or inline subroutine might get
2485       // relocated because it happens to match the low_pc of the
2486       // enclosing subprogram. To prevent issues with that, always use
2487       // the low_pc from the input DIE if relocations have been applied.
2488       Addr = (Info.OrigLowPc != UINT64_MAX ? Info.OrigLowPc : Addr) +
2489              Info.PCOffset;
2490     else if (Die.getTag() == dwarf::DW_TAG_compile_unit) {
2491       Addr = Unit.getLowPc();
2492       if (Addr == UINT64_MAX)
2493         return 0;
2494     }
2495     Info.HasLowPc = true;
2496   } else if (AttrSpec.Attr == dwarf::DW_AT_high_pc) {
2497     if (Die.getTag() == dwarf::DW_TAG_compile_unit) {
2498       if (uint64_t HighPc = Unit.getHighPc())
2499         Addr = HighPc;
2500       else
2501         return 0;
2502     } else
2503       // If we have a high_pc recorded for the input DIE, use
2504       // it. Otherwise (when no relocations where applied) just use the
2505       // one we just decoded.
2506       Addr = (Info.OrigHighPc ? Info.OrigHighPc : Addr) + Info.PCOffset;
2507   }
2508 
2509   Die.addValue(DIEAlloc, static_cast<dwarf::Attribute>(AttrSpec.Attr),
2510                static_cast<dwarf::Form>(AttrSpec.Form), DIEInteger(Addr));
2511   return Unit.getOrigUnit().getAddressByteSize();
2512 }
2513 
cloneScalarAttribute(DIE & Die,const DWARFDebugInfoEntryMinimal & InputDIE,CompileUnit & Unit,AttributeSpec AttrSpec,const DWARFFormValue & Val,unsigned AttrSize,AttributesInfo & Info)2514 unsigned DwarfLinker::DIECloner::cloneScalarAttribute(
2515     DIE &Die, const DWARFDebugInfoEntryMinimal &InputDIE, CompileUnit &Unit,
2516     AttributeSpec AttrSpec, const DWARFFormValue &Val, unsigned AttrSize,
2517     AttributesInfo &Info) {
2518   uint64_t Value;
2519   if (AttrSpec.Attr == dwarf::DW_AT_high_pc &&
2520       Die.getTag() == dwarf::DW_TAG_compile_unit) {
2521     if (Unit.getLowPc() == -1ULL)
2522       return 0;
2523     // Dwarf >= 4 high_pc is an size, not an address.
2524     Value = Unit.getHighPc() - Unit.getLowPc();
2525   } else if (AttrSpec.Form == dwarf::DW_FORM_sec_offset)
2526     Value = *Val.getAsSectionOffset();
2527   else if (AttrSpec.Form == dwarf::DW_FORM_sdata)
2528     Value = *Val.getAsSignedConstant();
2529   else if (auto OptionalValue = Val.getAsUnsignedConstant())
2530     Value = *OptionalValue;
2531   else {
2532     Linker.reportWarning(
2533         "Unsupported scalar attribute form. Dropping attribute.",
2534         &Unit.getOrigUnit(), &InputDIE);
2535     return 0;
2536   }
2537   PatchLocation Patch =
2538       Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
2539                    dwarf::Form(AttrSpec.Form), DIEInteger(Value));
2540   if (AttrSpec.Attr == dwarf::DW_AT_ranges)
2541     Unit.noteRangeAttribute(Die, Patch);
2542 
2543   // A more generic way to check for location attributes would be
2544   // nice, but it's very unlikely that any other attribute needs a
2545   // location list.
2546   else if (AttrSpec.Attr == dwarf::DW_AT_location ||
2547            AttrSpec.Attr == dwarf::DW_AT_frame_base)
2548     Unit.noteLocationAttribute(Patch, Info.PCOffset);
2549   else if (AttrSpec.Attr == dwarf::DW_AT_declaration && Value)
2550     Info.IsDeclaration = true;
2551 
2552   return AttrSize;
2553 }
2554 
2555 /// \brief Clone \p InputDIE's attribute described by \p AttrSpec with
2556 /// value \p Val, and add it to \p Die.
2557 /// \returns the size of the cloned attribute.
cloneAttribute(DIE & Die,const DWARFDebugInfoEntryMinimal & InputDIE,CompileUnit & Unit,const DWARFFormValue & Val,const AttributeSpec AttrSpec,unsigned AttrSize,AttributesInfo & Info)2558 unsigned DwarfLinker::DIECloner::cloneAttribute(
2559     DIE &Die, const DWARFDebugInfoEntryMinimal &InputDIE, CompileUnit &Unit,
2560     const DWARFFormValue &Val, const AttributeSpec AttrSpec, unsigned AttrSize,
2561     AttributesInfo &Info) {
2562   const DWARFUnit &U = Unit.getOrigUnit();
2563 
2564   switch (AttrSpec.Form) {
2565   case dwarf::DW_FORM_strp:
2566   case dwarf::DW_FORM_string:
2567     return cloneStringAttribute(Die, AttrSpec, Val, U);
2568   case dwarf::DW_FORM_ref_addr:
2569   case dwarf::DW_FORM_ref1:
2570   case dwarf::DW_FORM_ref2:
2571   case dwarf::DW_FORM_ref4:
2572   case dwarf::DW_FORM_ref8:
2573     return cloneDieReferenceAttribute(Die, InputDIE, AttrSpec, AttrSize, Val,
2574                                       Unit);
2575   case dwarf::DW_FORM_block:
2576   case dwarf::DW_FORM_block1:
2577   case dwarf::DW_FORM_block2:
2578   case dwarf::DW_FORM_block4:
2579   case dwarf::DW_FORM_exprloc:
2580     return cloneBlockAttribute(Die, AttrSpec, Val, AttrSize);
2581   case dwarf::DW_FORM_addr:
2582     return cloneAddressAttribute(Die, AttrSpec, Val, Unit, Info);
2583   case dwarf::DW_FORM_data1:
2584   case dwarf::DW_FORM_data2:
2585   case dwarf::DW_FORM_data4:
2586   case dwarf::DW_FORM_data8:
2587   case dwarf::DW_FORM_udata:
2588   case dwarf::DW_FORM_sdata:
2589   case dwarf::DW_FORM_sec_offset:
2590   case dwarf::DW_FORM_flag:
2591   case dwarf::DW_FORM_flag_present:
2592     return cloneScalarAttribute(Die, InputDIE, Unit, AttrSpec, Val, AttrSize,
2593                                 Info);
2594   default:
2595     Linker.reportWarning(
2596         "Unsupported attribute form in cloneAttribute. Dropping.", &U,
2597         &InputDIE);
2598   }
2599 
2600   return 0;
2601 }
2602 
2603 /// \brief Apply the valid relocations found by findValidRelocs() to
2604 /// the buffer \p Data, taking into account that Data is at \p BaseOffset
2605 /// in the debug_info section.
2606 ///
2607 /// Like for findValidRelocs(), this function must be called with
2608 /// monotonic \p BaseOffset values.
2609 ///
2610 /// \returns wether any reloc has been applied.
2611 bool DwarfLinker::RelocationManager::
applyValidRelocs(MutableArrayRef<char> Data,uint32_t BaseOffset,bool isLittleEndian)2612 applyValidRelocs(MutableArrayRef<char> Data, uint32_t BaseOffset,
2613                  bool isLittleEndian) {
2614   assert((NextValidReloc == 0 ||
2615           BaseOffset > ValidRelocs[NextValidReloc - 1].Offset) &&
2616          "BaseOffset should only be increasing.");
2617   if (NextValidReloc >= ValidRelocs.size())
2618     return false;
2619 
2620   // Skip relocs that haven't been applied.
2621   while (NextValidReloc < ValidRelocs.size() &&
2622          ValidRelocs[NextValidReloc].Offset < BaseOffset)
2623     ++NextValidReloc;
2624 
2625   bool Applied = false;
2626   uint64_t EndOffset = BaseOffset + Data.size();
2627   while (NextValidReloc < ValidRelocs.size() &&
2628          ValidRelocs[NextValidReloc].Offset >= BaseOffset &&
2629          ValidRelocs[NextValidReloc].Offset < EndOffset) {
2630     const auto &ValidReloc = ValidRelocs[NextValidReloc++];
2631     assert(ValidReloc.Offset - BaseOffset < Data.size());
2632     assert(ValidReloc.Offset - BaseOffset + ValidReloc.Size <= Data.size());
2633     char Buf[8];
2634     uint64_t Value = ValidReloc.Mapping->getValue().BinaryAddress;
2635     Value += ValidReloc.Addend;
2636     for (unsigned i = 0; i != ValidReloc.Size; ++i) {
2637       unsigned Index = isLittleEndian ? i : (ValidReloc.Size - i - 1);
2638       Buf[i] = uint8_t(Value >> (Index * 8));
2639     }
2640     assert(ValidReloc.Size <= sizeof(Buf));
2641     memcpy(&Data[ValidReloc.Offset - BaseOffset], Buf, ValidReloc.Size);
2642     Applied = true;
2643   }
2644 
2645   return Applied;
2646 }
2647 
isTypeTag(uint16_t Tag)2648 static bool isTypeTag(uint16_t Tag) {
2649   switch (Tag) {
2650   case dwarf::DW_TAG_array_type:
2651   case dwarf::DW_TAG_class_type:
2652   case dwarf::DW_TAG_enumeration_type:
2653   case dwarf::DW_TAG_pointer_type:
2654   case dwarf::DW_TAG_reference_type:
2655   case dwarf::DW_TAG_string_type:
2656   case dwarf::DW_TAG_structure_type:
2657   case dwarf::DW_TAG_subroutine_type:
2658   case dwarf::DW_TAG_typedef:
2659   case dwarf::DW_TAG_union_type:
2660   case dwarf::DW_TAG_ptr_to_member_type:
2661   case dwarf::DW_TAG_set_type:
2662   case dwarf::DW_TAG_subrange_type:
2663   case dwarf::DW_TAG_base_type:
2664   case dwarf::DW_TAG_const_type:
2665   case dwarf::DW_TAG_constant:
2666   case dwarf::DW_TAG_file_type:
2667   case dwarf::DW_TAG_namelist:
2668   case dwarf::DW_TAG_packed_type:
2669   case dwarf::DW_TAG_volatile_type:
2670   case dwarf::DW_TAG_restrict_type:
2671   case dwarf::DW_TAG_interface_type:
2672   case dwarf::DW_TAG_unspecified_type:
2673   case dwarf::DW_TAG_shared_type:
2674     return true;
2675   default:
2676     break;
2677   }
2678   return false;
2679 }
2680 
2681 static bool
shouldSkipAttribute(DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,uint16_t Tag,bool InDebugMap,bool SkipPC,bool InFunctionScope)2682 shouldSkipAttribute(DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
2683                     uint16_t Tag, bool InDebugMap, bool SkipPC,
2684                     bool InFunctionScope) {
2685   switch (AttrSpec.Attr) {
2686   default:
2687     return false;
2688   case dwarf::DW_AT_low_pc:
2689   case dwarf::DW_AT_high_pc:
2690   case dwarf::DW_AT_ranges:
2691     return SkipPC;
2692   case dwarf::DW_AT_location:
2693   case dwarf::DW_AT_frame_base:
2694     // FIXME: for some reason dsymutil-classic keeps the location
2695     // attributes when they are of block type (ie. not location
2696     // lists). This is totally wrong for globals where we will keep a
2697     // wrong address. It is mostly harmless for locals, but there is
2698     // no point in keeping these anyway when the function wasn't linked.
2699     return (SkipPC || (!InFunctionScope && Tag == dwarf::DW_TAG_variable &&
2700                        !InDebugMap)) &&
2701            !DWARFFormValue(AttrSpec.Form).isFormClass(DWARFFormValue::FC_Block);
2702   }
2703 }
2704 
cloneDIE(const DWARFDebugInfoEntryMinimal & InputDIE,CompileUnit & Unit,int64_t PCOffset,uint32_t OutOffset,unsigned Flags)2705 DIE *DwarfLinker::DIECloner::cloneDIE(
2706     const DWARFDebugInfoEntryMinimal &InputDIE, CompileUnit &Unit,
2707     int64_t PCOffset, uint32_t OutOffset, unsigned Flags) {
2708   DWARFUnit &U = Unit.getOrigUnit();
2709   unsigned Idx = U.getDIEIndex(&InputDIE);
2710   CompileUnit::DIEInfo &Info = Unit.getInfo(Idx);
2711 
2712   // Should the DIE appear in the output?
2713   if (!Unit.getInfo(Idx).Keep)
2714     return nullptr;
2715 
2716   uint32_t Offset = InputDIE.getOffset();
2717   // The DIE might have been already created by a forward reference
2718   // (see cloneDieReferenceAttribute()).
2719   DIE *Die = Info.Clone;
2720   if (!Die)
2721     Die = Info.Clone = DIE::get(DIEAlloc, dwarf::Tag(InputDIE.getTag()));
2722   assert(Die->getTag() == InputDIE.getTag());
2723   Die->setOffset(OutOffset);
2724   if ((Unit.hasODR() || Unit.isClangModule()) &&
2725       Die->getTag() != dwarf::DW_TAG_namespace && Info.Ctxt &&
2726       Info.Ctxt != Unit.getInfo(Info.ParentIdx).Ctxt &&
2727       !Info.Ctxt->getCanonicalDIEOffset()) {
2728     // We are about to emit a DIE that is the root of its own valid
2729     // DeclContext tree. Make the current offset the canonical offset
2730     // for this context.
2731     Info.Ctxt->setCanonicalDIEOffset(OutOffset + Unit.getStartOffset());
2732   }
2733 
2734   // Extract and clone every attribute.
2735   DataExtractor Data = U.getDebugInfoExtractor();
2736   // Point to the next DIE (generally there is always at least a NULL
2737   // entry after the current one). If this is a lone
2738   // DW_TAG_compile_unit without any children, point to the next unit.
2739   uint32_t NextOffset =
2740     (Idx + 1 < U.getNumDIEs())
2741     ? U.getDIEAtIndex(Idx + 1)->getOffset()
2742     : U.getNextUnitOffset();
2743   AttributesInfo AttrInfo;
2744 
2745   // We could copy the data only if we need to aply a relocation to
2746   // it. After testing, it seems there is no performance downside to
2747   // doing the copy unconditionally, and it makes the code simpler.
2748   SmallString<40> DIECopy(Data.getData().substr(Offset, NextOffset - Offset));
2749   Data = DataExtractor(DIECopy, Data.isLittleEndian(), Data.getAddressSize());
2750   // Modify the copy with relocated addresses.
2751   if (RelocMgr.applyValidRelocs(DIECopy, Offset, Data.isLittleEndian())) {
2752     // If we applied relocations, we store the value of high_pc that was
2753     // potentially stored in the input DIE. If high_pc is an address
2754     // (Dwarf version == 2), then it might have been relocated to a
2755     // totally unrelated value (because the end address in the object
2756     // file might be start address of another function which got moved
2757     // independantly by the linker). The computation of the actual
2758     // high_pc value is done in cloneAddressAttribute().
2759     AttrInfo.OrigHighPc =
2760         InputDIE.getAttributeValueAsAddress(&U, dwarf::DW_AT_high_pc, 0);
2761     // Also store the low_pc. It might get relocated in an
2762     // inline_subprogram that happens at the beginning of its
2763     // inlining function.
2764     AttrInfo.OrigLowPc =
2765         InputDIE.getAttributeValueAsAddress(&U, dwarf::DW_AT_low_pc, UINT64_MAX);
2766   }
2767 
2768   // Reset the Offset to 0 as we will be working on the local copy of
2769   // the data.
2770   Offset = 0;
2771 
2772   const auto *Abbrev = InputDIE.getAbbreviationDeclarationPtr();
2773   Offset += getULEB128Size(Abbrev->getCode());
2774 
2775   // We are entering a subprogram. Get and propagate the PCOffset.
2776   if (Die->getTag() == dwarf::DW_TAG_subprogram)
2777     PCOffset = Info.AddrAdjust;
2778   AttrInfo.PCOffset = PCOffset;
2779 
2780   if (Abbrev->getTag() == dwarf::DW_TAG_subprogram) {
2781     Flags |= TF_InFunctionScope;
2782     if (!Info.InDebugMap)
2783       Flags |= TF_SkipPC;
2784   }
2785 
2786   bool Copied = false;
2787   for (const auto &AttrSpec : Abbrev->attributes()) {
2788     if (shouldSkipAttribute(AttrSpec, Die->getTag(), Info.InDebugMap,
2789                             Flags & TF_SkipPC, Flags & TF_InFunctionScope)) {
2790       DWARFFormValue::skipValue(AttrSpec.Form, Data, &Offset, &U);
2791       // FIXME: dsymutil-classic keeps the old abbreviation around
2792       // even if it's not used. We can remove this (and the copyAbbrev
2793       // helper) as soon as bit-for-bit compatibility is not a goal anymore.
2794       if (!Copied) {
2795         copyAbbrev(*InputDIE.getAbbreviationDeclarationPtr(), Unit.hasODR());
2796         Copied = true;
2797       }
2798       continue;
2799     }
2800 
2801     DWARFFormValue Val(AttrSpec.Form);
2802     uint32_t AttrSize = Offset;
2803     Val.extractValue(Data, &Offset, &U);
2804     AttrSize = Offset - AttrSize;
2805 
2806     OutOffset +=
2807         cloneAttribute(*Die, InputDIE, Unit, Val, AttrSpec, AttrSize, AttrInfo);
2808   }
2809 
2810   // Look for accelerator entries.
2811   uint16_t Tag = InputDIE.getTag();
2812   // FIXME: This is slightly wrong. An inline_subroutine without a
2813   // low_pc, but with AT_ranges might be interesting to get into the
2814   // accelerator tables too. For now stick with dsymutil's behavior.
2815   if ((Info.InDebugMap || AttrInfo.HasLowPc) &&
2816       Tag != dwarf::DW_TAG_compile_unit &&
2817       getDIENames(InputDIE, Unit.getOrigUnit(), AttrInfo)) {
2818     if (AttrInfo.MangledName && AttrInfo.MangledName != AttrInfo.Name)
2819       Unit.addNameAccelerator(Die, AttrInfo.MangledName,
2820                               AttrInfo.MangledNameOffset,
2821                               Tag == dwarf::DW_TAG_inlined_subroutine);
2822     if (AttrInfo.Name)
2823       Unit.addNameAccelerator(Die, AttrInfo.Name, AttrInfo.NameOffset,
2824                               Tag == dwarf::DW_TAG_inlined_subroutine);
2825   } else if (isTypeTag(Tag) && !AttrInfo.IsDeclaration &&
2826              getDIENames(InputDIE, Unit.getOrigUnit(), AttrInfo)) {
2827     Unit.addTypeAccelerator(Die, AttrInfo.Name, AttrInfo.NameOffset);
2828   }
2829 
2830   // Determine whether there are any children that we want to keep.
2831   bool HasChildren = false;
2832   for (auto *Child = InputDIE.getFirstChild(); Child && !Child->isNULL();
2833        Child = Child->getSibling()) {
2834     unsigned Idx = U.getDIEIndex(Child);
2835     if (Unit.getInfo(Idx).Keep) {
2836       HasChildren = true;
2837       break;
2838     }
2839   }
2840 
2841   DIEAbbrev NewAbbrev = Die->generateAbbrev();
2842   if (HasChildren)
2843     NewAbbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
2844   // Assign a permanent abbrev number
2845   Linker.AssignAbbrev(NewAbbrev);
2846   Die->setAbbrevNumber(NewAbbrev.getNumber());
2847 
2848   // Add the size of the abbreviation number to the output offset.
2849   OutOffset += getULEB128Size(Die->getAbbrevNumber());
2850 
2851   if (!HasChildren) {
2852     // Update our size.
2853     Die->setSize(OutOffset - Die->getOffset());
2854     return Die;
2855   }
2856 
2857   // Recursively clone children.
2858   for (auto *Child = InputDIE.getFirstChild(); Child && !Child->isNULL();
2859        Child = Child->getSibling()) {
2860     if (DIE *Clone = cloneDIE(*Child, Unit, PCOffset, OutOffset, Flags)) {
2861       Die->addChild(Clone);
2862       OutOffset = Clone->getOffset() + Clone->getSize();
2863     }
2864   }
2865 
2866   // Account for the end of children marker.
2867   OutOffset += sizeof(int8_t);
2868   // Update our size.
2869   Die->setSize(OutOffset - Die->getOffset());
2870   return Die;
2871 }
2872 
2873 /// \brief Patch the input object file relevant debug_ranges entries
2874 /// and emit them in the output file. Update the relevant attributes
2875 /// to point at the new entries.
patchRangesForUnit(const CompileUnit & Unit,DWARFContext & OrigDwarf) const2876 void DwarfLinker::patchRangesForUnit(const CompileUnit &Unit,
2877                                      DWARFContext &OrigDwarf) const {
2878   DWARFDebugRangeList RangeList;
2879   const auto &FunctionRanges = Unit.getFunctionRanges();
2880   unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
2881   DataExtractor RangeExtractor(OrigDwarf.getRangeSection(),
2882                                OrigDwarf.isLittleEndian(), AddressSize);
2883   auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange;
2884   DWARFUnit &OrigUnit = Unit.getOrigUnit();
2885   const auto *OrigUnitDie = OrigUnit.getUnitDIE(false);
2886   uint64_t OrigLowPc = OrigUnitDie->getAttributeValueAsAddress(
2887       &OrigUnit, dwarf::DW_AT_low_pc, -1ULL);
2888   // Ranges addresses are based on the unit's low_pc. Compute the
2889   // offset we need to apply to adapt to the new unit's low_pc.
2890   int64_t UnitPcOffset = 0;
2891   if (OrigLowPc != -1ULL)
2892     UnitPcOffset = int64_t(OrigLowPc) - Unit.getLowPc();
2893 
2894   for (const auto &RangeAttribute : Unit.getRangesAttributes()) {
2895     uint32_t Offset = RangeAttribute.get();
2896     RangeAttribute.set(Streamer->getRangesSectionSize());
2897     RangeList.extract(RangeExtractor, &Offset);
2898     const auto &Entries = RangeList.getEntries();
2899     if (!Entries.empty()) {
2900       const DWARFDebugRangeList::RangeListEntry &First = Entries.front();
2901 
2902       if (CurrRange == InvalidRange ||
2903           First.StartAddress + OrigLowPc < CurrRange.start() ||
2904           First.StartAddress + OrigLowPc >= CurrRange.stop()) {
2905         CurrRange = FunctionRanges.find(First.StartAddress + OrigLowPc);
2906         if (CurrRange == InvalidRange ||
2907             CurrRange.start() > First.StartAddress + OrigLowPc) {
2908           reportWarning("no mapping for range.");
2909           continue;
2910         }
2911       }
2912     }
2913 
2914     Streamer->emitRangesEntries(UnitPcOffset, OrigLowPc, CurrRange, Entries,
2915                                 AddressSize);
2916   }
2917 }
2918 
2919 /// \brief Generate the debug_aranges entries for \p Unit and if the
2920 /// unit has a DW_AT_ranges attribute, also emit the debug_ranges
2921 /// contribution for this attribute.
2922 /// FIXME: this could actually be done right in patchRangesForUnit,
2923 /// but for the sake of initial bit-for-bit compatibility with legacy
2924 /// dsymutil, we have to do it in a delayed pass.
generateUnitRanges(CompileUnit & Unit) const2925 void DwarfLinker::generateUnitRanges(CompileUnit &Unit) const {
2926   auto Attr = Unit.getUnitRangesAttribute();
2927   if (Attr)
2928     Attr->set(Streamer->getRangesSectionSize());
2929   Streamer->emitUnitRangesEntries(Unit, static_cast<bool>(Attr));
2930 }
2931 
2932 /// \brief Insert the new line info sequence \p Seq into the current
2933 /// set of already linked line info \p Rows.
insertLineSequence(std::vector<DWARFDebugLine::Row> & Seq,std::vector<DWARFDebugLine::Row> & Rows)2934 static void insertLineSequence(std::vector<DWARFDebugLine::Row> &Seq,
2935                                std::vector<DWARFDebugLine::Row> &Rows) {
2936   if (Seq.empty())
2937     return;
2938 
2939   if (!Rows.empty() && Rows.back().Address < Seq.front().Address) {
2940     Rows.insert(Rows.end(), Seq.begin(), Seq.end());
2941     Seq.clear();
2942     return;
2943   }
2944 
2945   auto InsertPoint = std::lower_bound(
2946       Rows.begin(), Rows.end(), Seq.front(),
2947       [](const DWARFDebugLine::Row &LHS, const DWARFDebugLine::Row &RHS) {
2948         return LHS.Address < RHS.Address;
2949       });
2950 
2951   // FIXME: this only removes the unneeded end_sequence if the
2952   // sequences have been inserted in order. using a global sort like
2953   // described in patchLineTableForUnit() and delaying the end_sequene
2954   // elimination to emitLineTableForUnit() we can get rid of all of them.
2955   if (InsertPoint != Rows.end() &&
2956       InsertPoint->Address == Seq.front().Address && InsertPoint->EndSequence) {
2957     *InsertPoint = Seq.front();
2958     Rows.insert(InsertPoint + 1, Seq.begin() + 1, Seq.end());
2959   } else {
2960     Rows.insert(InsertPoint, Seq.begin(), Seq.end());
2961   }
2962 
2963   Seq.clear();
2964 }
2965 
patchStmtList(DIE & Die,DIEInteger Offset)2966 static void patchStmtList(DIE &Die, DIEInteger Offset) {
2967   for (auto &V : Die.values())
2968     if (V.getAttribute() == dwarf::DW_AT_stmt_list) {
2969       V = DIEValue(V.getAttribute(), V.getForm(), Offset);
2970       return;
2971     }
2972 
2973   llvm_unreachable("Didn't find DW_AT_stmt_list in cloned DIE!");
2974 }
2975 
2976 /// \brief Extract the line table for \p Unit from \p OrigDwarf, and
2977 /// recreate a relocated version of these for the address ranges that
2978 /// are present in the binary.
patchLineTableForUnit(CompileUnit & Unit,DWARFContext & OrigDwarf)2979 void DwarfLinker::patchLineTableForUnit(CompileUnit &Unit,
2980                                         DWARFContext &OrigDwarf) {
2981   const DWARFDebugInfoEntryMinimal *CUDie = Unit.getOrigUnit().getUnitDIE();
2982   uint64_t StmtList = CUDie->getAttributeValueAsSectionOffset(
2983       &Unit.getOrigUnit(), dwarf::DW_AT_stmt_list, -1ULL);
2984   if (StmtList == -1ULL)
2985     return;
2986 
2987   // Update the cloned DW_AT_stmt_list with the correct debug_line offset.
2988   if (auto *OutputDIE = Unit.getOutputUnitDIE())
2989     patchStmtList(*OutputDIE, DIEInteger(Streamer->getLineSectionSize()));
2990 
2991   // Parse the original line info for the unit.
2992   DWARFDebugLine::LineTable LineTable;
2993   uint32_t StmtOffset = StmtList;
2994   StringRef LineData = OrigDwarf.getLineSection().Data;
2995   DataExtractor LineExtractor(LineData, OrigDwarf.isLittleEndian(),
2996                               Unit.getOrigUnit().getAddressByteSize());
2997   LineTable.parse(LineExtractor, &OrigDwarf.getLineSection().Relocs,
2998                   &StmtOffset);
2999 
3000   // This vector is the output line table.
3001   std::vector<DWARFDebugLine::Row> NewRows;
3002   NewRows.reserve(LineTable.Rows.size());
3003 
3004   // Current sequence of rows being extracted, before being inserted
3005   // in NewRows.
3006   std::vector<DWARFDebugLine::Row> Seq;
3007   const auto &FunctionRanges = Unit.getFunctionRanges();
3008   auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange;
3009 
3010   // FIXME: This logic is meant to generate exactly the same output as
3011   // Darwin's classic dsynutil. There is a nicer way to implement this
3012   // by simply putting all the relocated line info in NewRows and simply
3013   // sorting NewRows before passing it to emitLineTableForUnit. This
3014   // should be correct as sequences for a function should stay
3015   // together in the sorted output. There are a few corner cases that
3016   // look suspicious though, and that required to implement the logic
3017   // this way. Revisit that once initial validation is finished.
3018 
3019   // Iterate over the object file line info and extract the sequences
3020   // that correspond to linked functions.
3021   for (auto &Row : LineTable.Rows) {
3022     // Check wether we stepped out of the range. The range is
3023     // half-open, but consider accept the end address of the range if
3024     // it is marked as end_sequence in the input (because in that
3025     // case, the relocation offset is accurate and that entry won't
3026     // serve as the start of another function).
3027     if (CurrRange == InvalidRange || Row.Address < CurrRange.start() ||
3028         Row.Address > CurrRange.stop() ||
3029         (Row.Address == CurrRange.stop() && !Row.EndSequence)) {
3030       // We just stepped out of a known range. Insert a end_sequence
3031       // corresponding to the end of the range.
3032       uint64_t StopAddress = CurrRange != InvalidRange
3033                                  ? CurrRange.stop() + CurrRange.value()
3034                                  : -1ULL;
3035       CurrRange = FunctionRanges.find(Row.Address);
3036       bool CurrRangeValid =
3037           CurrRange != InvalidRange && CurrRange.start() <= Row.Address;
3038       if (!CurrRangeValid) {
3039         CurrRange = InvalidRange;
3040         if (StopAddress != -1ULL) {
3041           // Try harder by looking in the DebugMapObject function
3042           // ranges map. There are corner cases where this finds a
3043           // valid entry. It's unclear if this is right or wrong, but
3044           // for now do as dsymutil.
3045           // FIXME: Understand exactly what cases this addresses and
3046           // potentially remove it along with the Ranges map.
3047           auto Range = Ranges.lower_bound(Row.Address);
3048           if (Range != Ranges.begin() && Range != Ranges.end())
3049             --Range;
3050 
3051           if (Range != Ranges.end() && Range->first <= Row.Address &&
3052               Range->second.first >= Row.Address) {
3053             StopAddress = Row.Address + Range->second.second;
3054           }
3055         }
3056       }
3057       if (StopAddress != -1ULL && !Seq.empty()) {
3058         // Insert end sequence row with the computed end address, but
3059         // the same line as the previous one.
3060         auto NextLine = Seq.back();
3061         NextLine.Address = StopAddress;
3062         NextLine.EndSequence = 1;
3063         NextLine.PrologueEnd = 0;
3064         NextLine.BasicBlock = 0;
3065         NextLine.EpilogueBegin = 0;
3066         Seq.push_back(NextLine);
3067         insertLineSequence(Seq, NewRows);
3068       }
3069 
3070       if (!CurrRangeValid)
3071         continue;
3072     }
3073 
3074     // Ignore empty sequences.
3075     if (Row.EndSequence && Seq.empty())
3076       continue;
3077 
3078     // Relocate row address and add it to the current sequence.
3079     Row.Address += CurrRange.value();
3080     Seq.emplace_back(Row);
3081 
3082     if (Row.EndSequence)
3083       insertLineSequence(Seq, NewRows);
3084   }
3085 
3086   // Finished extracting, now emit the line tables.
3087   uint32_t PrologueEnd = StmtList + 10 + LineTable.Prologue.PrologueLength;
3088   // FIXME: LLVM hardcodes it's prologue values. We just copy the
3089   // prologue over and that works because we act as both producer and
3090   // consumer. It would be nicer to have a real configurable line
3091   // table emitter.
3092   if (LineTable.Prologue.Version != 2 ||
3093       LineTable.Prologue.DefaultIsStmt != DWARF2_LINE_DEFAULT_IS_STMT ||
3094       LineTable.Prologue.OpcodeBase > 13)
3095     reportWarning("line table paramters mismatch. Cannot emit.");
3096   else {
3097     MCDwarfLineTableParams Params;
3098     Params.DWARF2LineOpcodeBase = LineTable.Prologue.OpcodeBase;
3099     Params.DWARF2LineBase = LineTable.Prologue.LineBase;
3100     Params.DWARF2LineRange = LineTable.Prologue.LineRange;
3101     Streamer->emitLineTableForUnit(Params,
3102                                    LineData.slice(StmtList + 4, PrologueEnd),
3103                                    LineTable.Prologue.MinInstLength, NewRows,
3104                                    Unit.getOrigUnit().getAddressByteSize());
3105   }
3106 }
3107 
emitAcceleratorEntriesForUnit(CompileUnit & Unit)3108 void DwarfLinker::emitAcceleratorEntriesForUnit(CompileUnit &Unit) {
3109   Streamer->emitPubNamesForUnit(Unit);
3110   Streamer->emitPubTypesForUnit(Unit);
3111 }
3112 
3113 /// \brief Read the frame info stored in the object, and emit the
3114 /// patched frame descriptions for the linked binary.
3115 ///
3116 /// This is actually pretty easy as the data of the CIEs and FDEs can
3117 /// be considered as black boxes and moved as is. The only thing to do
3118 /// is to patch the addresses in the headers.
patchFrameInfoForObject(const DebugMapObject & DMO,DWARFContext & OrigDwarf,unsigned AddrSize)3119 void DwarfLinker::patchFrameInfoForObject(const DebugMapObject &DMO,
3120                                           DWARFContext &OrigDwarf,
3121                                           unsigned AddrSize) {
3122   StringRef FrameData = OrigDwarf.getDebugFrameSection();
3123   if (FrameData.empty())
3124     return;
3125 
3126   DataExtractor Data(FrameData, OrigDwarf.isLittleEndian(), 0);
3127   uint32_t InputOffset = 0;
3128 
3129   // Store the data of the CIEs defined in this object, keyed by their
3130   // offsets.
3131   DenseMap<uint32_t, StringRef> LocalCIES;
3132 
3133   while (Data.isValidOffset(InputOffset)) {
3134     uint32_t EntryOffset = InputOffset;
3135     uint32_t InitialLength = Data.getU32(&InputOffset);
3136     if (InitialLength == 0xFFFFFFFF)
3137       return reportWarning("Dwarf64 bits no supported");
3138 
3139     uint32_t CIEId = Data.getU32(&InputOffset);
3140     if (CIEId == 0xFFFFFFFF) {
3141       // This is a CIE, store it.
3142       StringRef CIEData = FrameData.substr(EntryOffset, InitialLength + 4);
3143       LocalCIES[EntryOffset] = CIEData;
3144       // The -4 is to account for the CIEId we just read.
3145       InputOffset += InitialLength - 4;
3146       continue;
3147     }
3148 
3149     uint32_t Loc = Data.getUnsigned(&InputOffset, AddrSize);
3150 
3151     // Some compilers seem to emit frame info that doesn't start at
3152     // the function entry point, thus we can't just lookup the address
3153     // in the debug map. Use the linker's range map to see if the FDE
3154     // describes something that we can relocate.
3155     auto Range = Ranges.upper_bound(Loc);
3156     if (Range != Ranges.begin())
3157       --Range;
3158     if (Range == Ranges.end() || Range->first > Loc ||
3159         Range->second.first <= Loc) {
3160       // The +4 is to account for the size of the InitialLength field itself.
3161       InputOffset = EntryOffset + InitialLength + 4;
3162       continue;
3163     }
3164 
3165     // This is an FDE, and we have a mapping.
3166     // Have we already emitted a corresponding CIE?
3167     StringRef CIEData = LocalCIES[CIEId];
3168     if (CIEData.empty())
3169       return reportWarning("Inconsistent debug_frame content. Dropping.");
3170 
3171     // Look if we already emitted a CIE that corresponds to the
3172     // referenced one (the CIE data is the key of that lookup).
3173     auto IteratorInserted = EmittedCIEs.insert(
3174         std::make_pair(CIEData, Streamer->getFrameSectionSize()));
3175     // If there is no CIE yet for this ID, emit it.
3176     if (IteratorInserted.second ||
3177         // FIXME: dsymutil-classic only caches the last used CIE for
3178         // reuse. Mimic that behavior for now. Just removing that
3179         // second half of the condition and the LastCIEOffset variable
3180         // makes the code DTRT.
3181         LastCIEOffset != IteratorInserted.first->getValue()) {
3182       LastCIEOffset = Streamer->getFrameSectionSize();
3183       IteratorInserted.first->getValue() = LastCIEOffset;
3184       Streamer->emitCIE(CIEData);
3185     }
3186 
3187     // Emit the FDE with updated address and CIE pointer.
3188     // (4 + AddrSize) is the size of the CIEId + initial_location
3189     // fields that will get reconstructed by emitFDE().
3190     unsigned FDERemainingBytes = InitialLength - (4 + AddrSize);
3191     Streamer->emitFDE(IteratorInserted.first->getValue(), AddrSize,
3192                       Loc + Range->second.second,
3193                       FrameData.substr(InputOffset, FDERemainingBytes));
3194     InputOffset += FDERemainingBytes;
3195   }
3196 }
3197 
copyAbbrev(const DWARFAbbreviationDeclaration & Abbrev,bool hasODR)3198 void DwarfLinker::DIECloner::copyAbbrev(
3199     const DWARFAbbreviationDeclaration &Abbrev, bool hasODR) {
3200   DIEAbbrev Copy(dwarf::Tag(Abbrev.getTag()),
3201                  dwarf::Form(Abbrev.hasChildren()));
3202 
3203   for (const auto &Attr : Abbrev.attributes()) {
3204     uint16_t Form = Attr.Form;
3205     if (hasODR && isODRAttribute(Attr.Attr))
3206       Form = dwarf::DW_FORM_ref_addr;
3207     Copy.AddAttribute(dwarf::Attribute(Attr.Attr), dwarf::Form(Form));
3208   }
3209 
3210   Linker.AssignAbbrev(Copy);
3211 }
3212 
getDwoId(const DWARFDebugInfoEntryMinimal & CUDie,const DWARFUnit & Unit)3213 static uint64_t getDwoId(const DWARFDebugInfoEntryMinimal &CUDie,
3214                          const DWARFUnit &Unit) {
3215   uint64_t DwoId =
3216       CUDie.getAttributeValueAsUnsignedConstant(&Unit, dwarf::DW_AT_dwo_id, 0);
3217   if (!DwoId)
3218     DwoId = CUDie.getAttributeValueAsUnsignedConstant(&Unit,
3219                                                       dwarf::DW_AT_GNU_dwo_id, 0);
3220   return DwoId;
3221 }
3222 
registerModuleReference(const DWARFDebugInfoEntryMinimal & CUDie,const DWARFUnit & Unit,DebugMap & ModuleMap,unsigned Indent)3223 bool DwarfLinker::registerModuleReference(
3224     const DWARFDebugInfoEntryMinimal &CUDie, const DWARFUnit &Unit,
3225     DebugMap &ModuleMap, unsigned Indent) {
3226   std::string PCMfile =
3227       CUDie.getAttributeValueAsString(&Unit, dwarf::DW_AT_dwo_name, "");
3228   if (PCMfile.empty())
3229     PCMfile =
3230         CUDie.getAttributeValueAsString(&Unit, dwarf::DW_AT_GNU_dwo_name, "");
3231   if (PCMfile.empty())
3232     return false;
3233 
3234   // Clang module DWARF skeleton CUs abuse this for the path to the module.
3235   std::string PCMpath =
3236       CUDie.getAttributeValueAsString(&Unit, dwarf::DW_AT_comp_dir, "");
3237   uint64_t DwoId = getDwoId(CUDie, Unit);
3238 
3239   std::string Name =
3240       CUDie.getAttributeValueAsString(&Unit, dwarf::DW_AT_name, "");
3241   if (Name.empty()) {
3242     reportWarning("Anonymous module skeleton CU for " + PCMfile);
3243     return true;
3244   }
3245 
3246   if (Options.Verbose) {
3247     outs().indent(Indent);
3248     outs() << "Found clang module reference " << PCMfile;
3249   }
3250 
3251   auto Cached = ClangModules.find(PCMfile);
3252   if (Cached != ClangModules.end()) {
3253     // FIXME: Until PR27449 (https://llvm.org/bugs/show_bug.cgi?id=27449) is
3254     // fixed in clang, only warn about DWO_id mismatches in verbose mode.
3255     // ASTFileSignatures will change randomly when a module is rebuilt.
3256     if (Options.Verbose && (Cached->second != DwoId))
3257       reportWarning(Twine("hash mismatch: this object file was built against a "
3258                           "different version of the module ") + PCMfile);
3259     if (Options.Verbose)
3260       outs() << " [cached].\n";
3261     return true;
3262   }
3263   if (Options.Verbose)
3264     outs() << " ...\n";
3265 
3266   // Cyclic dependencies are disallowed by Clang, but we still
3267   // shouldn't run into an infinite loop, so mark it as processed now.
3268   ClangModules.insert({PCMfile, DwoId});
3269   loadClangModule(PCMfile, PCMpath, Name, DwoId, ModuleMap, Indent + 2);
3270   return true;
3271 }
3272 
3273 ErrorOr<const object::ObjectFile &>
loadObject(BinaryHolder & BinaryHolder,DebugMapObject & Obj,const DebugMap & Map)3274 DwarfLinker::loadObject(BinaryHolder &BinaryHolder, DebugMapObject &Obj,
3275                         const DebugMap &Map) {
3276   auto ErrOrObjs =
3277       BinaryHolder.GetObjectFiles(Obj.getObjectFilename(), Obj.getTimestamp());
3278   if (std::error_code EC = ErrOrObjs.getError()) {
3279     reportWarning(Twine(Obj.getObjectFilename()) + ": " + EC.message());
3280     return EC;
3281   }
3282   auto ErrOrObj = BinaryHolder.Get(Map.getTriple());
3283   if (std::error_code EC = ErrOrObj.getError())
3284     reportWarning(Twine(Obj.getObjectFilename()) + ": " + EC.message());
3285   return ErrOrObj;
3286 }
3287 
loadClangModule(StringRef Filename,StringRef ModulePath,StringRef ModuleName,uint64_t DwoId,DebugMap & ModuleMap,unsigned Indent)3288 void DwarfLinker::loadClangModule(StringRef Filename, StringRef ModulePath,
3289                                   StringRef ModuleName, uint64_t DwoId,
3290                                   DebugMap &ModuleMap, unsigned Indent) {
3291   SmallString<80> Path(Options.PrependPath);
3292   if (sys::path::is_relative(Filename))
3293     sys::path::append(Path, ModulePath, Filename);
3294   else
3295     sys::path::append(Path, Filename);
3296   BinaryHolder ObjHolder(Options.Verbose);
3297   auto &Obj =
3298       ModuleMap.addDebugMapObject(Path, sys::TimeValue::PosixZeroTime());
3299   auto ErrOrObj = loadObject(ObjHolder, Obj, ModuleMap);
3300   if (!ErrOrObj) {
3301     // Try and emit more helpful warnings by applying some heuristics.
3302     StringRef ObjFile = CurrentDebugObject->getObjectFilename();
3303     bool isClangModule = sys::path::extension(Filename).equals(".pcm");
3304     bool isArchive = ObjFile.endswith(")");
3305     if (isClangModule) {
3306       StringRef ModuleCacheDir = sys::path::parent_path(Path);
3307       if (sys::fs::exists(ModuleCacheDir)) {
3308         // If the module's parent directory exists, we assume that the module
3309         // cache has expired and was pruned by clang.  A more adventurous
3310         // dsymutil would invoke clang to rebuild the module now.
3311         if (!ModuleCacheHintDisplayed) {
3312           errs() << "note: The clang module cache may have expired since this "
3313                     "object file was built. Rebuilding the object file will "
3314                     "rebuild the module cache.\n";
3315           ModuleCacheHintDisplayed = true;
3316         }
3317       } else if (isArchive) {
3318         // If the module cache directory doesn't exist at all and the object
3319         // file is inside a static library, we assume that the static library
3320         // was built on a different machine. We don't want to discourage module
3321         // debugging for convenience libraries within a project though.
3322         if (!ArchiveHintDisplayed) {
3323           errs() << "note: Linking a static library that was built with "
3324                     "-gmodules, but the module cache was not found.  "
3325                     "Redistributable static libraries should never be built "
3326                     "with module debugging enabled.  The debug experience will "
3327                     "be degraded due to incomplete debug information.\n";
3328           ArchiveHintDisplayed = true;
3329         }
3330       }
3331     }
3332     return;
3333   }
3334 
3335   std::unique_ptr<CompileUnit> Unit;
3336 
3337   // Setup access to the debug info.
3338   DWARFContextInMemory DwarfContext(*ErrOrObj);
3339   RelocationManager RelocMgr(*this);
3340   for (const auto &CU : DwarfContext.compile_units()) {
3341     auto *CUDie = CU->getUnitDIE(false);
3342     // Recursively get all modules imported by this one.
3343     if (!registerModuleReference(*CUDie, *CU, ModuleMap, Indent)) {
3344       if (Unit) {
3345         errs() << Filename << ": Clang modules are expected to have exactly"
3346                << " 1 compile unit.\n";
3347         exitDsymutil(1);
3348       }
3349       // FIXME: Until PR27449 (https://llvm.org/bugs/show_bug.cgi?id=27449) is
3350       // fixed in clang, only warn about DWO_id mismatches in verbose mode.
3351       // ASTFileSignatures will change randomly when a module is rebuilt.
3352       uint64_t PCMDwoId = getDwoId(*CUDie, *CU);
3353       if (PCMDwoId != DwoId) {
3354         if (Options.Verbose)
3355           reportWarning(
3356               Twine("hash mismatch: this object file was built against a "
3357                     "different version of the module ") + Filename);
3358         // Update the cache entry with the DwoId of the module loaded from disk.
3359         ClangModules[Filename] = PCMDwoId;
3360       }
3361 
3362       // Add this module.
3363       Unit = llvm::make_unique<CompileUnit>(*CU, UnitID++, !Options.NoODR,
3364                                             ModuleName);
3365       Unit->setHasInterestingContent();
3366       analyzeContextInfo(CUDie, 0, *Unit, &ODRContexts.getRoot(), StringPool,
3367                          ODRContexts);
3368       // Keep everything.
3369       Unit->markEverythingAsKept();
3370     }
3371   }
3372   if (Options.Verbose) {
3373     outs().indent(Indent);
3374     outs() << "cloning .debug_info from " << Filename << "\n";
3375   }
3376 
3377   DIECloner(*this, RelocMgr, DIEAlloc, MutableArrayRef<CompileUnit>(*Unit),
3378             Options)
3379       .cloneAllCompileUnits(DwarfContext);
3380 }
3381 
cloneAllCompileUnits(DWARFContextInMemory & DwarfContext)3382 void DwarfLinker::DIECloner::cloneAllCompileUnits(
3383     DWARFContextInMemory &DwarfContext) {
3384   if (!Linker.Streamer)
3385     return;
3386 
3387   for (auto &CurrentUnit : CompileUnits) {
3388     const auto *InputDIE = CurrentUnit.getOrigUnit().getUnitDIE();
3389     CurrentUnit.setStartOffset(Linker.OutputDebugInfoSize);
3390     DIE *OutputDIE = cloneDIE(*InputDIE, CurrentUnit, 0 /* PC offset */,
3391                               11 /* Unit Header size */, 0);
3392     CurrentUnit.setOutputUnitDIE(OutputDIE);
3393     Linker.OutputDebugInfoSize = CurrentUnit.computeNextUnitOffset();
3394     if (Linker.Options.NoOutput)
3395       continue;
3396     // FIXME: for compatibility with the classic dsymutil, we emit
3397     // an empty line table for the unit, even if the unit doesn't
3398     // actually exist in the DIE tree.
3399     Linker.patchLineTableForUnit(CurrentUnit, DwarfContext);
3400     if (!OutputDIE)
3401       continue;
3402     Linker.patchRangesForUnit(CurrentUnit, DwarfContext);
3403     Linker.Streamer->emitLocationsForUnit(CurrentUnit, DwarfContext);
3404     Linker.emitAcceleratorEntriesForUnit(CurrentUnit);
3405   }
3406 
3407   if (Linker.Options.NoOutput)
3408     return;
3409 
3410   // Emit all the compile unit's debug information.
3411   for (auto &CurrentUnit : CompileUnits) {
3412     Linker.generateUnitRanges(CurrentUnit);
3413     CurrentUnit.fixupForwardReferences();
3414     Linker.Streamer->emitCompileUnitHeader(CurrentUnit);
3415     if (!CurrentUnit.getOutputUnitDIE())
3416       continue;
3417     Linker.Streamer->emitDIE(*CurrentUnit.getOutputUnitDIE());
3418   }
3419 }
3420 
link(const DebugMap & Map)3421 bool DwarfLinker::link(const DebugMap &Map) {
3422 
3423   if (!createStreamer(Map.getTriple(), OutputFilename))
3424     return false;
3425 
3426   // Size of the DIEs (and headers) generated for the linked output.
3427   OutputDebugInfoSize = 0;
3428   // A unique ID that identifies each compile unit.
3429   UnitID = 0;
3430   DebugMap ModuleMap(Map.getTriple(), Map.getBinaryPath());
3431 
3432   for (const auto &Obj : Map.objects()) {
3433     CurrentDebugObject = Obj.get();
3434 
3435     if (Options.Verbose)
3436       outs() << "DEBUG MAP OBJECT: " << Obj->getObjectFilename() << "\n";
3437     auto ErrOrObj = loadObject(BinHolder, *Obj, Map);
3438     if (!ErrOrObj)
3439       continue;
3440 
3441     // Look for relocations that correspond to debug map entries.
3442     RelocationManager RelocMgr(*this);
3443     if (!RelocMgr.findValidRelocsInDebugInfo(*ErrOrObj, *Obj)) {
3444       if (Options.Verbose)
3445         outs() << "No valid relocations found. Skipping.\n";
3446       continue;
3447     }
3448 
3449     // Setup access to the debug info.
3450     DWARFContextInMemory DwarfContext(*ErrOrObj);
3451     startDebugObject(DwarfContext, *Obj);
3452 
3453     // In a first phase, just read in the debug info and load all clang modules.
3454     for (const auto &CU : DwarfContext.compile_units()) {
3455       auto *CUDie = CU->getUnitDIE(false);
3456       if (Options.Verbose) {
3457         outs() << "Input compilation unit:";
3458         CUDie->dump(outs(), CU.get(), 0);
3459       }
3460 
3461       if (!registerModuleReference(*CUDie, *CU, ModuleMap))
3462         Units.emplace_back(*CU, UnitID++, !Options.NoODR, "");
3463     }
3464 
3465     // Now build the DIE parent links that we will use during the next phase.
3466     for (auto &CurrentUnit : Units)
3467       analyzeContextInfo(CurrentUnit.getOrigUnit().getUnitDIE(), 0, CurrentUnit,
3468                          &ODRContexts.getRoot(), StringPool, ODRContexts);
3469 
3470     // Then mark all the DIEs that need to be present in the linked
3471     // output and collect some information about them. Note that this
3472     // loop can not be merged with the previous one becaue cross-cu
3473     // references require the ParentIdx to be setup for every CU in
3474     // the object file before calling this.
3475     for (auto &CurrentUnit : Units)
3476       lookForDIEsToKeep(RelocMgr, *CurrentUnit.getOrigUnit().getUnitDIE(), *Obj,
3477                         CurrentUnit, 0);
3478 
3479     // The calls to applyValidRelocs inside cloneDIE will walk the
3480     // reloc array again (in the same way findValidRelocsInDebugInfo()
3481     // did). We need to reset the NextValidReloc index to the beginning.
3482     RelocMgr.resetValidRelocs();
3483     if (RelocMgr.hasValidRelocs())
3484       DIECloner(*this, RelocMgr, DIEAlloc, Units, Options)
3485           .cloneAllCompileUnits(DwarfContext);
3486     if (!Options.NoOutput && !Units.empty())
3487       patchFrameInfoForObject(*Obj, DwarfContext,
3488                               Units[0].getOrigUnit().getAddressByteSize());
3489 
3490     // Clean-up before starting working on the next object.
3491     endDebugObject();
3492   }
3493 
3494   // Emit everything that's global.
3495   if (!Options.NoOutput) {
3496     Streamer->emitAbbrevs(Abbreviations);
3497     Streamer->emitStrings(StringPool);
3498   }
3499 
3500   return Options.NoOutput ? true : Streamer->finish(Map);
3501 }
3502 }
3503 
3504 /// \brief Get the offset of string \p S in the string table. This
3505 /// can insert a new element or return the offset of a preexisitng
3506 /// one.
getStringOffset(StringRef S)3507 uint32_t NonRelocatableStringpool::getStringOffset(StringRef S) {
3508   if (S.empty() && !Strings.empty())
3509     return 0;
3510 
3511   std::pair<uint32_t, StringMapEntryBase *> Entry(0, nullptr);
3512   MapTy::iterator It;
3513   bool Inserted;
3514 
3515   // A non-empty string can't be at offset 0, so if we have an entry
3516   // with a 0 offset, it must be a previously interned string.
3517   std::tie(It, Inserted) = Strings.insert(std::make_pair(S, Entry));
3518   if (Inserted || It->getValue().first == 0) {
3519     // Set offset and chain at the end of the entries list.
3520     It->getValue().first = CurrentEndOffset;
3521     CurrentEndOffset += S.size() + 1; // +1 for the '\0'.
3522     Last->getValue().second = &*It;
3523     Last = &*It;
3524   }
3525   return It->getValue().first;
3526 }
3527 
3528 /// \brief Put \p S into the StringMap so that it gets permanent
3529 /// storage, but do not actually link it in the chain of elements
3530 /// that go into the output section. A latter call to
3531 /// getStringOffset() with the same string will chain it though.
internString(StringRef S)3532 StringRef NonRelocatableStringpool::internString(StringRef S) {
3533   std::pair<uint32_t, StringMapEntryBase *> Entry(0, nullptr);
3534   auto InsertResult = Strings.insert(std::make_pair(S, Entry));
3535   return InsertResult.first->getKey();
3536 }
3537 
warn(const Twine & Warning,const Twine & Context)3538 void warn(const Twine &Warning, const Twine &Context) {
3539   errs() << Twine("while processing ") + Context + ":\n";
3540   errs() << Twine("warning: ") + Warning + "\n";
3541 }
3542 
error(const Twine & Error,const Twine & Context)3543 bool error(const Twine &Error, const Twine &Context) {
3544   errs() << Twine("while processing ") + Context + ":\n";
3545   errs() << Twine("error: ") + Error + "\n";
3546   return false;
3547 }
3548 
linkDwarf(StringRef OutputFilename,const DebugMap & DM,const LinkOptions & Options)3549 bool linkDwarf(StringRef OutputFilename, const DebugMap &DM,
3550                const LinkOptions &Options) {
3551   DwarfLinker Linker(OutputFilename, Options);
3552   return Linker.link(DM);
3553 }
3554 }
3555 }
3556