//===- lib/ReaderWriter/MachO/CompactUnwindPass.cpp -------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file A pass to convert MachO's __compact_unwind sections into the final /// __unwind_info format used during runtime. See /// mach-o/compact_unwind_encoding.h for more details on the formats involved. /// //===----------------------------------------------------------------------===// #include "ArchHandler.h" #include "File.h" #include "MachONormalizedFileBinaryUtils.h" #include "MachOPasses.h" #include "lld/Common/LLVM.h" #include "lld/Core/DefinedAtom.h" #include "lld/Core/File.h" #include "lld/Core/Reference.h" #include "lld/Core/Simple.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #define DEBUG_TYPE "macho-compact-unwind" namespace lld { namespace mach_o { namespace { struct CompactUnwindEntry { const Atom *rangeStart; const Atom *personalityFunction; const Atom *lsdaLocation; const Atom *ehFrame; uint32_t rangeLength; // There are 3 types of compact unwind entry, distinguished by the encoding // value: 0 indicates a function with no unwind info; // _archHandler.dwarfCompactUnwindType() indicates that the entry defers to // __eh_frame, and that the ehFrame entry will be valid; any other value is a // real compact unwind entry -- personalityFunction will be set and // lsdaLocation may be. uint32_t encoding; CompactUnwindEntry(const DefinedAtom *function) : rangeStart(function), personalityFunction(nullptr), lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(function->size()), encoding(0) {} CompactUnwindEntry() : rangeStart(nullptr), personalityFunction(nullptr), lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(0), encoding(0) {} }; struct UnwindInfoPage { ArrayRef entries; }; } class UnwindInfoAtom : public SimpleDefinedAtom { public: UnwindInfoAtom(ArchHandler &archHandler, const File &file, bool isBig, std::vector &personalities, std::vector &commonEncodings, std::vector &pages, uint32_t numLSDAs) : SimpleDefinedAtom(file), _archHandler(archHandler), _commonEncodingsOffset(7 * sizeof(uint32_t)), _personalityArrayOffset(_commonEncodingsOffset + commonEncodings.size() * sizeof(uint32_t)), _topLevelIndexOffset(_personalityArrayOffset + personalities.size() * sizeof(uint32_t)), _lsdaIndexOffset(_topLevelIndexOffset + 3 * (pages.size() + 1) * sizeof(uint32_t)), _firstPageOffset(_lsdaIndexOffset + 2 * numLSDAs * sizeof(uint32_t)), _isBig(isBig) { addHeader(commonEncodings.size(), personalities.size(), pages.size()); addCommonEncodings(commonEncodings); addPersonalityFunctions(personalities); addTopLevelIndexes(pages); addLSDAIndexes(pages, numLSDAs); addSecondLevelPages(pages); } ~UnwindInfoAtom() override = default; ContentType contentType() const override { return DefinedAtom::typeProcessedUnwindInfo; } Alignment alignment() const override { return 4; } uint64_t size() const override { return _contents.size(); } ContentPermissions permissions() const override { return DefinedAtom::permR__; } ArrayRef rawContent() const override { return _contents; } void addHeader(uint32_t numCommon, uint32_t numPersonalities, uint32_t numPages) { using normalized::write32; uint32_t headerSize = 7 * sizeof(uint32_t); _contents.resize(headerSize); uint8_t *headerEntries = _contents.data(); // version write32(headerEntries, 1, _isBig); // commonEncodingsArraySectionOffset write32(headerEntries + sizeof(uint32_t), _commonEncodingsOffset, _isBig); // commonEncodingsArrayCount write32(headerEntries + 2 * sizeof(uint32_t), numCommon, _isBig); // personalityArraySectionOffset write32(headerEntries + 3 * sizeof(uint32_t), _personalityArrayOffset, _isBig); // personalityArrayCount write32(headerEntries + 4 * sizeof(uint32_t), numPersonalities, _isBig); // indexSectionOffset write32(headerEntries + 5 * sizeof(uint32_t), _topLevelIndexOffset, _isBig); // indexCount write32(headerEntries + 6 * sizeof(uint32_t), numPages + 1, _isBig); } /// Add the list of common encodings to the section; this is simply an array /// of uint32_t compact values. Size has already been specified in the header. void addCommonEncodings(std::vector &commonEncodings) { using normalized::write32; _contents.resize(_commonEncodingsOffset + commonEncodings.size() * sizeof(uint32_t)); uint8_t *commonEncodingsArea = reinterpret_cast(_contents.data() + _commonEncodingsOffset); for (uint32_t encoding : commonEncodings) { write32(commonEncodingsArea, encoding, _isBig); commonEncodingsArea += sizeof(uint32_t); } } void addPersonalityFunctions(std::vector personalities) { _contents.resize(_personalityArrayOffset + personalities.size() * sizeof(uint32_t)); for (unsigned i = 0; i < personalities.size(); ++i) addImageReferenceIndirect(_personalityArrayOffset + i * sizeof(uint32_t), personalities[i]); } void addTopLevelIndexes(std::vector &pages) { using normalized::write32; uint32_t numIndexes = pages.size() + 1; _contents.resize(_topLevelIndexOffset + numIndexes * 3 * sizeof(uint32_t)); uint32_t pageLoc = _firstPageOffset; // The most difficult job here is calculating the LSDAs; everything else // follows fairly naturally, but we can't state where the first uint8_t *indexData = &_contents[_topLevelIndexOffset]; uint32_t numLSDAs = 0; for (unsigned i = 0; i < pages.size(); ++i) { // functionOffset addImageReference(_topLevelIndexOffset + 3 * i * sizeof(uint32_t), pages[i].entries[0].rangeStart); // secondLevelPagesSectionOffset write32(indexData + (3 * i + 1) * sizeof(uint32_t), pageLoc, _isBig); write32(indexData + (3 * i + 2) * sizeof(uint32_t), _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); for (auto &entry : pages[i].entries) if (entry.lsdaLocation) ++numLSDAs; } // Finally, write out the final sentinel index auto &finalEntry = pages[pages.size() - 1].entries.back(); addImageReference(_topLevelIndexOffset + 3 * pages.size() * sizeof(uint32_t), finalEntry.rangeStart, finalEntry.rangeLength); // secondLevelPagesSectionOffset => 0 write32(indexData + (3 * pages.size() + 2) * sizeof(uint32_t), _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); } void addLSDAIndexes(std::vector &pages, uint32_t numLSDAs) { _contents.resize(_lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t)); uint32_t curOffset = _lsdaIndexOffset; for (auto &page : pages) { for (auto &entry : page.entries) { if (!entry.lsdaLocation) continue; addImageReference(curOffset, entry.rangeStart); addImageReference(curOffset + sizeof(uint32_t), entry.lsdaLocation); curOffset += 2 * sizeof(uint32_t); } } } void addSecondLevelPages(std::vector &pages) { for (auto &page : pages) { addRegularSecondLevelPage(page); } } void addRegularSecondLevelPage(const UnwindInfoPage &page) { uint32_t curPageOffset = _contents.size(); const int16_t headerSize = sizeof(uint32_t) + 2 * sizeof(uint16_t); uint32_t curPageSize = headerSize + 2 * page.entries.size() * sizeof(uint32_t); _contents.resize(curPageOffset + curPageSize); using normalized::write32; using normalized::write16; // 2 => regular page write32(&_contents[curPageOffset], 2, _isBig); // offset of 1st entry write16(&_contents[curPageOffset + 4], headerSize, _isBig); write16(&_contents[curPageOffset + 6], page.entries.size(), _isBig); uint32_t pagePos = curPageOffset + headerSize; for (auto &entry : page.entries) { addImageReference(pagePos, entry.rangeStart); write32(_contents.data() + pagePos + sizeof(uint32_t), entry.encoding, _isBig); if ((entry.encoding & 0x0f000000U) == _archHandler.dwarfCompactUnwindType()) addEhFrameReference(pagePos + sizeof(uint32_t), entry.ehFrame); pagePos += 2 * sizeof(uint32_t); } } void addEhFrameReference(uint32_t offset, const Atom *dest, Reference::Addend addend = 0) { addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), _archHandler.unwindRefToEhFrameKind(), offset, dest, addend); } void addImageReference(uint32_t offset, const Atom *dest, Reference::Addend addend = 0) { addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), _archHandler.imageOffsetKind(), offset, dest, addend); } void addImageReferenceIndirect(uint32_t offset, const Atom *dest) { addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), _archHandler.imageOffsetKindIndirect(), offset, dest, 0); } private: mach_o::ArchHandler &_archHandler; std::vector _contents; uint32_t _commonEncodingsOffset; uint32_t _personalityArrayOffset; uint32_t _topLevelIndexOffset; uint32_t _lsdaIndexOffset; uint32_t _firstPageOffset; bool _isBig; }; /// Pass for instantiating and optimizing GOT slots. /// class CompactUnwindPass : public Pass { public: CompactUnwindPass(const MachOLinkingContext &context) : _ctx(context), _archHandler(_ctx.archHandler()), _file(*_ctx.make_file("")), _isBig(MachOLinkingContext::isBigEndian(_ctx.arch())) { _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); } private: llvm::Error perform(SimpleFile &mergedFile) override { LLVM_DEBUG(llvm::dbgs() << "MachO Compact Unwind pass\n"); std::map unwindLocs; std::map dwarfFrames; std::vector personalities; uint32_t numLSDAs = 0; // First collect all __compact_unwind and __eh_frame entries, addressable by // the function referred to. collectCompactUnwindEntries(mergedFile, unwindLocs, personalities, numLSDAs); collectDwarfFrameEntries(mergedFile, dwarfFrames); // Skip rest of pass if no unwind info. if (unwindLocs.empty() && dwarfFrames.empty()) return llvm::Error::success(); // FIXME: if there are more than 4 personality functions then we need to // defer to DWARF info for the ones we don't put in the list. They should // also probably be sorted by frequency. assert(personalities.size() <= 4); // TODO: Find common encodings for use by compressed pages. std::vector commonEncodings; // Now sort the entries by final address and fixup the compact encoding to // its final form (i.e. set personality function bits & create DWARF // references where needed). std::vector unwindInfos = createUnwindInfoEntries( mergedFile, unwindLocs, personalities, dwarfFrames); // Remove any unused eh-frame atoms. pruneUnusedEHFrames(mergedFile, unwindInfos, unwindLocs, dwarfFrames); // Finally, we can start creating pages based on these entries. LLVM_DEBUG(llvm::dbgs() << " Splitting entries into pages\n"); // FIXME: we split the entries into pages naively: lots of 4k pages followed // by a small one. ld64 tried to minimize space and align them to real 4k // boundaries. That might be worth doing, or perhaps we could perform some // minor balancing for expected number of lookups. std::vector pages; auto remainingInfos = llvm::makeArrayRef(unwindInfos); do { pages.push_back(UnwindInfoPage()); // FIXME: we only create regular pages at the moment. These can hold up to // 1021 entries according to the documentation. unsigned entriesInPage = std::min(1021U, (unsigned)remainingInfos.size()); pages.back().entries = remainingInfos.slice(0, entriesInPage); remainingInfos = remainingInfos.slice(entriesInPage); LLVM_DEBUG(llvm::dbgs() << " Page from " << pages.back().entries[0].rangeStart->name() << " to " << pages.back().entries.back().rangeStart->name() << " + " << llvm::format("0x%x", pages.back().entries.back().rangeLength) << " has " << entriesInPage << " entries\n"); } while (!remainingInfos.empty()); auto *unwind = new (_file.allocator()) UnwindInfoAtom(_archHandler, _file, _isBig, personalities, commonEncodings, pages, numLSDAs); mergedFile.addAtom(*unwind); // Finally, remove all __compact_unwind atoms now that we've processed them. mergedFile.removeDefinedAtomsIf([](const DefinedAtom *atom) { return atom->contentType() == DefinedAtom::typeCompactUnwindInfo; }); return llvm::Error::success(); } void collectCompactUnwindEntries( const SimpleFile &mergedFile, std::map &unwindLocs, std::vector &personalities, uint32_t &numLSDAs) { LLVM_DEBUG(llvm::dbgs() << " Collecting __compact_unwind entries\n"); for (const DefinedAtom *atom : mergedFile.defined()) { if (atom->contentType() != DefinedAtom::typeCompactUnwindInfo) continue; auto unwindEntry = extractCompactUnwindEntry(atom); unwindLocs.insert(std::make_pair(unwindEntry.rangeStart, unwindEntry)); LLVM_DEBUG(llvm::dbgs() << " Entry for " << unwindEntry.rangeStart->name() << ", encoding=" << llvm::format("0x%08x", unwindEntry.encoding)); if (unwindEntry.personalityFunction) LLVM_DEBUG(llvm::dbgs() << ", personality=" << unwindEntry.personalityFunction->name() << ", lsdaLoc=" << unwindEntry.lsdaLocation->name()); LLVM_DEBUG(llvm::dbgs() << '\n'); // Count number of LSDAs we see, since we need to know how big the index // will be while laying out the section. if (unwindEntry.lsdaLocation) ++numLSDAs; // Gather the personality functions now, so that they're in deterministic // order (derived from the DefinedAtom order). if (unwindEntry.personalityFunction && !llvm::count(personalities, unwindEntry.personalityFunction)) personalities.push_back(unwindEntry.personalityFunction); } } CompactUnwindEntry extractCompactUnwindEntry(const DefinedAtom *atom) { CompactUnwindEntry entry; for (const Reference *ref : *atom) { switch (ref->offsetInAtom()) { case 0: // FIXME: there could legitimately be functions with multiple encoding // entries. However, nothing produces them at the moment. assert(ref->addend() == 0 && "unexpected offset into function"); entry.rangeStart = ref->target(); break; case 0x10: assert(ref->addend() == 0 && "unexpected offset into personality fn"); entry.personalityFunction = ref->target(); break; case 0x18: assert(ref->addend() == 0 && "unexpected offset into LSDA atom"); entry.lsdaLocation = ref->target(); break; } } if (atom->rawContent().size() < 4 * sizeof(uint32_t)) return entry; using normalized::read32; entry.rangeLength = read32(atom->rawContent().data() + 2 * sizeof(uint32_t), _isBig); entry.encoding = read32(atom->rawContent().data() + 3 * sizeof(uint32_t), _isBig); return entry; } void collectDwarfFrameEntries(const SimpleFile &mergedFile, std::map &dwarfFrames) { for (const DefinedAtom *ehFrameAtom : mergedFile.defined()) { if (ehFrameAtom->contentType() != DefinedAtom::typeCFI) continue; if (ArchHandler::isDwarfCIE(_isBig, ehFrameAtom)) continue; if (const Atom *function = _archHandler.fdeTargetFunction(ehFrameAtom)) dwarfFrames[function] = ehFrameAtom; } } /// Every atom defined in __TEXT,__text needs an entry in the final /// __unwind_info section (in order). These comes from two sources: /// + Input __compact_unwind sections where possible (after adding the /// personality function offset which is only known now). /// + A synthesised reference to __eh_frame if there's no __compact_unwind /// or too many personality functions to be accommodated. std::vector createUnwindInfoEntries( const SimpleFile &mergedFile, const std::map &unwindLocs, const std::vector &personalities, const std::map &dwarfFrames) { std::vector unwindInfos; LLVM_DEBUG(llvm::dbgs() << " Creating __unwind_info entries\n"); // The final order in the __unwind_info section must be derived from the // order of typeCode atoms, since that's how they'll be put into the object // file eventually (yuck!). for (const DefinedAtom *atom : mergedFile.defined()) { if (atom->contentType() != DefinedAtom::typeCode) continue; unwindInfos.push_back(finalizeUnwindInfoEntryForAtom( atom, unwindLocs, personalities, dwarfFrames)); LLVM_DEBUG(llvm::dbgs() << " Entry for " << atom->name() << ", final encoding=" << llvm::format("0x%08x", unwindInfos.back().encoding) << '\n'); } return unwindInfos; } /// Remove unused EH frames. /// /// An EH frame is considered unused if there is a corresponding compact /// unwind atom that doesn't require the EH frame. void pruneUnusedEHFrames( SimpleFile &mergedFile, const std::vector &unwindInfos, const std::map &unwindLocs, const std::map &dwarfFrames) { // Worklist of all 'used' FDEs. std::vector usedDwarfWorklist; // We have to check two conditions when building the worklist: // (1) EH frames used by compact unwind entries. for (auto &entry : unwindInfos) if (entry.ehFrame) usedDwarfWorklist.push_back(cast(entry.ehFrame)); // (2) EH frames that reference functions with no corresponding compact // unwind info. for (auto &entry : dwarfFrames) if (!unwindLocs.count(entry.first)) usedDwarfWorklist.push_back(cast(entry.second)); // Add all transitively referenced CFI atoms by processing the worklist. std::set usedDwarfFrames; while (!usedDwarfWorklist.empty()) { const DefinedAtom *cfiAtom = usedDwarfWorklist.back(); usedDwarfWorklist.pop_back(); usedDwarfFrames.insert(cfiAtom); for (const auto *ref : *cfiAtom) { const DefinedAtom *cfiTarget = dyn_cast(ref->target()); if (cfiTarget->contentType() == DefinedAtom::typeCFI) usedDwarfWorklist.push_back(cfiTarget); } } // Finally, delete all unreferenced CFI atoms. mergedFile.removeDefinedAtomsIf([&](const DefinedAtom *atom) { if ((atom->contentType() == DefinedAtom::typeCFI) && !usedDwarfFrames.count(atom)) return true; return false; }); } CompactUnwindEntry finalizeUnwindInfoEntryForAtom( const DefinedAtom *function, const std::map &unwindLocs, const std::vector &personalities, const std::map &dwarfFrames) { auto unwindLoc = unwindLocs.find(function); CompactUnwindEntry entry; if (unwindLoc == unwindLocs.end()) { // Default entry has correct encoding (0 => no unwind), but we need to // synthesise the function. entry.rangeStart = function; entry.rangeLength = function->size(); } else entry = unwindLoc->second; // If there's no __compact_unwind entry, or it explicitly says to use // __eh_frame, we need to try and fill in the correct DWARF atom. if (entry.encoding == _archHandler.dwarfCompactUnwindType() || entry.encoding == 0) { auto dwarfFrame = dwarfFrames.find(function); if (dwarfFrame != dwarfFrames.end()) { entry.encoding = _archHandler.dwarfCompactUnwindType(); entry.ehFrame = dwarfFrame->second; } } auto personality = llvm::find(personalities, entry.personalityFunction); uint32_t personalityIdx = personality == personalities.end() ? 0 : personality - personalities.begin() + 1; // FIXME: We should also use DWARF when there isn't enough room for the // personality function in the compact encoding. assert(personalityIdx < 4 && "too many personality functions"); entry.encoding |= personalityIdx << 28; if (entry.lsdaLocation) entry.encoding |= 1U << 30; return entry; } const MachOLinkingContext &_ctx; mach_o::ArchHandler &_archHandler; MachOFile &_file; bool _isBig; }; void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx) { assert(ctx.needsCompactUnwindPass()); pm.add(std::make_unique(ctx)); } } // end namespace mach_o } // end namespace lld