1 //===- DebugTypes.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "DebugTypes.h"
10 #include "Chunks.h"
11 #include "Driver.h"
12 #include "InputFiles.h"
13 #include "PDB.h"
14 #include "TypeMerger.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "lld/Common/Timer.h"
18 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
19 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
20 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
21 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
22 #include "llvm/DebugInfo/PDB/GenericError.h"
23 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
24 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
25 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
26 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
27 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
28 #include "llvm/Support/FormatVariadic.h"
29 #include "llvm/Support/Parallel.h"
30 #include "llvm/Support/Path.h"
31
32 using namespace llvm;
33 using namespace llvm::codeview;
34 using namespace lld;
35 using namespace lld::coff;
36
37 namespace {
38 class TypeServerIpiSource;
39
40 // The TypeServerSource class represents a PDB type server, a file referenced by
41 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
42 // files, therefore there must be only once instance per OBJ lot. The file path
43 // is discovered from the dependent OBJ's debug type stream. The
44 // TypeServerSource object is then queued and loaded by the COFF Driver. The
45 // debug type stream for such PDB files will be merged first in the final PDB,
46 // before any dependent OBJ.
47 class TypeServerSource : public TpiSource {
48 public:
TypeServerSource(PDBInputFile * f)49 explicit TypeServerSource(PDBInputFile *f)
50 : TpiSource(PDB, nullptr), pdbInputFile(f) {
51 if (f->loadErr && *f->loadErr)
52 return;
53 pdb::PDBFile &file = f->session->getPDBFile();
54 auto expectedInfo = file.getPDBInfoStream();
55 if (!expectedInfo)
56 return;
57 auto it = mappings.emplace(expectedInfo->getGuid(), this);
58 assert(it.second);
59 (void)it;
60 }
61
62 Error mergeDebugT(TypeMerger *m) override;
63
64 void loadGHashes() override;
65 void remapTpiWithGHashes(GHashState *g) override;
66
isDependency() const67 bool isDependency() const override { return true; }
68
69 PDBInputFile *pdbInputFile = nullptr;
70
71 // TpiSource for IPI stream.
72 TypeServerIpiSource *ipiSrc = nullptr;
73
74 static std::map<codeview::GUID, TypeServerSource *> mappings;
75 };
76
77 // Companion to TypeServerSource. Stores the index map for the IPI stream in the
78 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
79 // invariant of one type index space per source.
80 class TypeServerIpiSource : public TpiSource {
81 public:
TypeServerIpiSource()82 explicit TypeServerIpiSource() : TpiSource(PDBIpi, nullptr) {}
83
84 friend class TypeServerSource;
85
86 // All of the TpiSource methods are no-ops. The parent TypeServerSource
87 // handles both TPI and IPI.
mergeDebugT(TypeMerger * m)88 Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
loadGHashes()89 void loadGHashes() override {}
remapTpiWithGHashes(GHashState * g)90 void remapTpiWithGHashes(GHashState *g) override {}
isDependency() const91 bool isDependency() const override { return true; }
92 };
93
94 // This class represents the debug type stream of an OBJ file that depends on a
95 // PDB type server (see TypeServerSource).
96 class UseTypeServerSource : public TpiSource {
97 Expected<TypeServerSource *> getTypeServerSource();
98
99 public:
UseTypeServerSource(ObjFile * f,TypeServer2Record ts)100 UseTypeServerSource(ObjFile *f, TypeServer2Record ts)
101 : TpiSource(UsingPDB, f), typeServerDependency(ts) {}
102
103 Error mergeDebugT(TypeMerger *m) override;
104
105 // No need to load ghashes from /Zi objects.
loadGHashes()106 void loadGHashes() override {}
107 void remapTpiWithGHashes(GHashState *g) override;
108
109 // Information about the PDB type server dependency, that needs to be loaded
110 // in before merging this OBJ.
111 TypeServer2Record typeServerDependency;
112 };
113
114 // This class represents the debug type stream of a Microsoft precompiled
115 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
116 // PDB, before any other OBJs that depend on this. Note that only MSVC generate
117 // such files, clang does not.
118 class PrecompSource : public TpiSource {
119 public:
PrecompSource(ObjFile * f)120 PrecompSource(ObjFile *f) : TpiSource(PCH, f) {
121 if (!f->pchSignature || !*f->pchSignature)
122 fatal(toString(f) +
123 " claims to be a PCH object, but does not have a valid signature");
124 auto it = mappings.emplace(*f->pchSignature, this);
125 if (!it.second)
126 fatal("a PCH object with the same signature has already been provided (" +
127 toString(it.first->second->file) + " and " + toString(file) + ")");
128 }
129
130 void loadGHashes() override;
131
isDependency() const132 bool isDependency() const override { return true; }
133
134 static std::map<uint32_t, PrecompSource *> mappings;
135 };
136
137 // This class represents the debug type stream of an OBJ file that depends on a
138 // Microsoft precompiled headers OBJ (see PrecompSource).
139 class UsePrecompSource : public TpiSource {
140 public:
UsePrecompSource(ObjFile * f,PrecompRecord precomp)141 UsePrecompSource(ObjFile *f, PrecompRecord precomp)
142 : TpiSource(UsingPCH, f), precompDependency(precomp) {}
143
144 Error mergeDebugT(TypeMerger *m) override;
145
146 void loadGHashes() override;
147 void remapTpiWithGHashes(GHashState *g) override;
148
149 private:
150 Error mergeInPrecompHeaderObj();
151
152 public:
153 // Information about the Precomp OBJ dependency, that needs to be loaded in
154 // before merging this OBJ.
155 PrecompRecord precompDependency;
156 };
157 } // namespace
158
159 std::vector<TpiSource *> TpiSource::instances;
160 ArrayRef<TpiSource *> TpiSource::dependencySources;
161 ArrayRef<TpiSource *> TpiSource::objectSources;
162
TpiSource(TpiKind k,ObjFile * f)163 TpiSource::TpiSource(TpiKind k, ObjFile *f)
164 : kind(k), tpiSrcIdx(instances.size()), file(f) {
165 instances.push_back(this);
166 }
167
168 // Vtable key method.
~TpiSource()169 TpiSource::~TpiSource() {
170 // Silence any assertions about unchecked errors.
171 consumeError(std::move(typeMergingError));
172 }
173
sortDependencies()174 void TpiSource::sortDependencies() {
175 // Order dependencies first, but preserve the existing order.
176 std::vector<TpiSource *> deps;
177 std::vector<TpiSource *> objs;
178 for (TpiSource *s : instances)
179 (s->isDependency() ? deps : objs).push_back(s);
180 uint32_t numDeps = deps.size();
181 uint32_t numObjs = objs.size();
182 instances = std::move(deps);
183 instances.insert(instances.end(), objs.begin(), objs.end());
184 for (uint32_t i = 0, e = instances.size(); i < e; ++i)
185 instances[i]->tpiSrcIdx = i;
186 dependencySources = makeArrayRef(instances.data(), numDeps);
187 objectSources = makeArrayRef(instances.data() + numDeps, numObjs);
188 }
189
makeTpiSource(ObjFile * file)190 TpiSource *lld::coff::makeTpiSource(ObjFile *file) {
191 return make<TpiSource>(TpiSource::Regular, file);
192 }
193
makeTypeServerSource(PDBInputFile * pdbInputFile)194 TpiSource *lld::coff::makeTypeServerSource(PDBInputFile *pdbInputFile) {
195 // Type server sources come in pairs: the TPI stream, and the IPI stream.
196 auto *tpiSource = make<TypeServerSource>(pdbInputFile);
197 if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
198 tpiSource->ipiSrc = make<TypeServerIpiSource>();
199 return tpiSource;
200 }
201
makeUseTypeServerSource(ObjFile * file,TypeServer2Record ts)202 TpiSource *lld::coff::makeUseTypeServerSource(ObjFile *file,
203 TypeServer2Record ts) {
204 return make<UseTypeServerSource>(file, ts);
205 }
206
makePrecompSource(ObjFile * file)207 TpiSource *lld::coff::makePrecompSource(ObjFile *file) {
208 return make<PrecompSource>(file);
209 }
210
makeUsePrecompSource(ObjFile * file,PrecompRecord precomp)211 TpiSource *lld::coff::makeUsePrecompSource(ObjFile *file,
212 PrecompRecord precomp) {
213 return make<UsePrecompSource>(file, precomp);
214 }
215
216 std::map<codeview::GUID, TypeServerSource *> TypeServerSource::mappings;
217
218 std::map<uint32_t, PrecompSource *> PrecompSource::mappings;
219
remapTypeIndex(TypeIndex & ti,TiRefKind refKind) const220 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
221 if (ti.isSimple())
222 return true;
223
224 // This can be an item index or a type index. Choose the appropriate map.
225 ArrayRef<TypeIndex> tpiOrIpiMap =
226 (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
227 if (ti.toArrayIndex() >= tpiOrIpiMap.size())
228 return false;
229 ti = tpiOrIpiMap[ti.toArrayIndex()];
230 return true;
231 }
232
remapRecord(MutableArrayRef<uint8_t> rec,ArrayRef<TiReference> typeRefs)233 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
234 ArrayRef<TiReference> typeRefs) {
235 MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
236 for (const TiReference &ref : typeRefs) {
237 unsigned byteSize = ref.Count * sizeof(TypeIndex);
238 if (contents.size() < ref.Offset + byteSize)
239 fatal("symbol record too short");
240
241 MutableArrayRef<TypeIndex> indices(
242 reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
243 for (TypeIndex &ti : indices) {
244 if (!remapTypeIndex(ti, ref.Kind)) {
245 if (config->verbose) {
246 uint16_t kind =
247 reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
248 StringRef fname = file ? file->getName() : "<unknown PDB>";
249 log("failed to remap type index in record of kind 0x" +
250 utohexstr(kind) + " in " + fname + " with bad " +
251 (ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
252 " index 0x" + utohexstr(ti.getIndex()));
253 }
254 ti = TypeIndex(SimpleTypeKind::NotTranslated);
255 continue;
256 }
257 }
258 }
259 }
260
remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec)261 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
262 // TODO: Handle errors similar to symbols.
263 SmallVector<TiReference, 32> typeRefs;
264 discoverTypeIndices(CVType(rec), typeRefs);
265 remapRecord(rec, typeRefs);
266 }
267
remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec)268 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
269 // Discover type index references in the record. Skip it if we don't
270 // know where they are.
271 SmallVector<TiReference, 32> typeRefs;
272 if (!discoverTypeIndicesInSymbol(rec, typeRefs))
273 return false;
274 remapRecord(rec, typeRefs);
275 return true;
276 }
277
278 // A COFF .debug$H section is currently a clang extension. This function checks
279 // if a .debug$H section is in a format that we expect / understand, so that we
280 // can ignore any sections which are coincidentally also named .debug$H but do
281 // not contain a format we recognize.
canUseDebugH(ArrayRef<uint8_t> debugH)282 static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
283 if (debugH.size() < sizeof(object::debug_h_header))
284 return false;
285 auto *header =
286 reinterpret_cast<const object::debug_h_header *>(debugH.data());
287 debugH = debugH.drop_front(sizeof(object::debug_h_header));
288 return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
289 header->Version == 0 &&
290 header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::SHA1_8) &&
291 (debugH.size() % 8 == 0);
292 }
293
getDebugH(ObjFile * file)294 static Optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
295 SectionChunk *sec =
296 SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
297 if (!sec)
298 return llvm::None;
299 ArrayRef<uint8_t> contents = sec->getContents();
300 if (!canUseDebugH(contents))
301 return None;
302 return contents;
303 }
304
305 static ArrayRef<GloballyHashedType>
getHashesFromDebugH(ArrayRef<uint8_t> debugH)306 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
307 assert(canUseDebugH(debugH));
308 debugH = debugH.drop_front(sizeof(object::debug_h_header));
309 uint32_t count = debugH.size() / sizeof(GloballyHashedType);
310 return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
311 }
312
313 // Merge .debug$T for a generic object file.
mergeDebugT(TypeMerger * m)314 Error TpiSource::mergeDebugT(TypeMerger *m) {
315 assert(!config->debugGHashes &&
316 "use remapTpiWithGHashes when ghash is enabled");
317
318 CVTypeArray types;
319 BinaryStreamReader reader(file->debugTypes, support::little);
320 cantFail(reader.readArray(types, reader.getLength()));
321
322 // When dealing with PCH.OBJ, some indices were already merged.
323 unsigned nbHeadIndices = indexMapStorage.size();
324
325 if (auto err = mergeTypeAndIdRecords(
326 m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature))
327 fatal("codeview::mergeTypeAndIdRecords failed: " +
328 toString(std::move(err)));
329
330 // In an object, there is only one mapping for both types and items.
331 tpiMap = indexMapStorage;
332 ipiMap = indexMapStorage;
333
334 if (config->showSummary) {
335 nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
336 nbTypeRecordsBytes = reader.getLength();
337 // Count how many times we saw each type record in our input. This
338 // calculation requires a second pass over the type records to classify each
339 // record as a type or index. This is slow, but this code executes when
340 // collecting statistics.
341 m->tpiCounts.resize(m->getTypeTable().size());
342 m->ipiCounts.resize(m->getIDTable().size());
343 uint32_t srcIdx = nbHeadIndices;
344 for (CVType &ty : types) {
345 TypeIndex dstIdx = tpiMap[srcIdx++];
346 // Type merging may fail, so a complex source type may become the simple
347 // NotTranslated type, which cannot be used as an array index.
348 if (dstIdx.isSimple())
349 continue;
350 SmallVectorImpl<uint32_t> &counts =
351 isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
352 ++counts[dstIdx.toArrayIndex()];
353 }
354 }
355
356 return Error::success();
357 }
358
359 // Merge types from a type server PDB.
mergeDebugT(TypeMerger * m)360 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
361 assert(!config->debugGHashes &&
362 "use remapTpiWithGHashes when ghash is enabled");
363
364 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
365 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
366 if (auto e = expectedTpi.takeError())
367 fatal("Type server does not have TPI stream: " + toString(std::move(e)));
368 pdb::TpiStream *maybeIpi = nullptr;
369 if (pdbFile.hasPDBIpiStream()) {
370 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
371 if (auto e = expectedIpi.takeError())
372 fatal("Error getting type server IPI stream: " + toString(std::move(e)));
373 maybeIpi = &*expectedIpi;
374 }
375
376 // Merge TPI first, because the IPI stream will reference type indices.
377 if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
378 expectedTpi->typeArray()))
379 fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
380 tpiMap = indexMapStorage;
381
382 // Merge IPI.
383 if (maybeIpi) {
384 if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
385 maybeIpi->typeArray()))
386 fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
387 ipiMap = ipiSrc->indexMapStorage;
388 }
389
390 if (config->showSummary) {
391 nbTypeRecords = tpiMap.size() + ipiMap.size();
392 nbTypeRecordsBytes =
393 expectedTpi->typeArray().getUnderlyingStream().getLength() +
394 (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
395 : 0);
396
397 // Count how many times we saw each type record in our input. If a
398 // destination type index is present in the source to destination type index
399 // map, that means we saw it once in the input. Add it to our histogram.
400 m->tpiCounts.resize(m->getTypeTable().size());
401 m->ipiCounts.resize(m->getIDTable().size());
402 for (TypeIndex ti : tpiMap)
403 if (!ti.isSimple())
404 ++m->tpiCounts[ti.toArrayIndex()];
405 for (TypeIndex ti : ipiMap)
406 if (!ti.isSimple())
407 ++m->ipiCounts[ti.toArrayIndex()];
408 }
409
410 return Error::success();
411 }
412
getTypeServerSource()413 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
414 const codeview::GUID &tsId = typeServerDependency.getGuid();
415 StringRef tsPath = typeServerDependency.getName();
416
417 TypeServerSource *tsSrc;
418 auto it = TypeServerSource::mappings.find(tsId);
419 if (it != TypeServerSource::mappings.end()) {
420 tsSrc = it->second;
421 } else {
422 // The file failed to load, lookup by name
423 PDBInputFile *pdb = PDBInputFile::findFromRecordPath(tsPath, file);
424 if (!pdb)
425 return createFileError(tsPath, errorCodeToError(std::error_code(
426 ENOENT, std::generic_category())));
427 // If an error occurred during loading, throw it now
428 if (pdb->loadErr && *pdb->loadErr)
429 return createFileError(tsPath, std::move(*pdb->loadErr));
430
431 tsSrc = (TypeServerSource *)pdb->debugTypesObj;
432 }
433 return tsSrc;
434 }
435
mergeDebugT(TypeMerger * m)436 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
437 Expected<TypeServerSource *> tsSrc = getTypeServerSource();
438 if (!tsSrc)
439 return tsSrc.takeError();
440
441 pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
442 auto expectedInfo = pdbSession.getPDBInfoStream();
443 if (!expectedInfo)
444 return expectedInfo.takeError();
445
446 // Just because a file with a matching name was found and it was an actual
447 // PDB file doesn't mean it matches. For it to match the InfoStream's GUID
448 // must match the GUID specified in the TypeServer2 record.
449 if (expectedInfo->getGuid() != typeServerDependency.getGuid())
450 return createFileError(
451 typeServerDependency.getName(),
452 make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date));
453
454 // Reuse the type index map of the type server.
455 tpiMap = (*tsSrc)->tpiMap;
456 ipiMap = (*tsSrc)->ipiMap;
457 return Error::success();
458 }
459
equalsPath(StringRef path1,StringRef path2)460 static bool equalsPath(StringRef path1, StringRef path2) {
461 #if defined(_WIN32)
462 return path1.equals_lower(path2);
463 #else
464 return path1.equals(path2);
465 #endif
466 }
467
468 // Find by name an OBJ provided on the command line
findObjByName(StringRef fileNameOnly)469 static PrecompSource *findObjByName(StringRef fileNameOnly) {
470 SmallString<128> currentPath;
471 for (auto kv : PrecompSource::mappings) {
472 StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
473 sys::path::Style::windows);
474
475 // Compare based solely on the file name (link.exe behavior)
476 if (equalsPath(currentFileName, fileNameOnly))
477 return kv.second;
478 }
479 return nullptr;
480 }
481
findPrecompSource(ObjFile * file,PrecompRecord & pr)482 static PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr) {
483 // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
484 // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
485 // the paths embedded in the OBJs are in the Windows format.
486 SmallString<128> prFileName =
487 sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
488
489 auto it = PrecompSource::mappings.find(pr.getSignature());
490 if (it != PrecompSource::mappings.end()) {
491 return it->second;
492 }
493 // Lookup by name
494 return findObjByName(prFileName);
495 }
496
findPrecompMap(ObjFile * file,PrecompRecord & pr)497 static Expected<PrecompSource *> findPrecompMap(ObjFile *file,
498 PrecompRecord &pr) {
499 PrecompSource *precomp = findPrecompSource(file, pr);
500
501 if (!precomp)
502 return createFileError(
503 pr.getPrecompFilePath(),
504 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
505
506 if (pr.getSignature() != file->pchSignature)
507 return createFileError(
508 toString(file),
509 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
510
511 if (pr.getSignature() != *precomp->file->pchSignature)
512 return createFileError(
513 toString(precomp->file),
514 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
515
516 return precomp;
517 }
518
519 /// Merges a precompiled headers TPI map into the current TPI map. The
520 /// precompiled headers object will also be loaded and remapped in the
521 /// process.
mergeInPrecompHeaderObj()522 Error UsePrecompSource::mergeInPrecompHeaderObj() {
523 auto e = findPrecompMap(file, precompDependency);
524 if (!e)
525 return e.takeError();
526
527 PrecompSource *precompSrc = *e;
528 if (precompSrc->tpiMap.empty())
529 return Error::success();
530
531 assert(precompDependency.getStartTypeIndex() ==
532 TypeIndex::FirstNonSimpleIndex);
533 assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
534 // Use the previously remapped index map from the precompiled headers.
535 indexMapStorage.append(precompSrc->tpiMap.begin(),
536 precompSrc->tpiMap.begin() +
537 precompDependency.getTypesCount());
538
539 return Error::success();
540 }
541
mergeDebugT(TypeMerger * m)542 Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
543 // This object was compiled with /Yu, so process the corresponding
544 // precompiled headers object (/Yc) first. Some type indices in the current
545 // object are referencing data in the precompiled headers object, so we need
546 // both to be loaded.
547 if (Error e = mergeInPrecompHeaderObj())
548 return e;
549
550 return TpiSource::mergeDebugT(m);
551 }
552
countTypeServerPDBs()553 uint32_t TpiSource::countTypeServerPDBs() {
554 return TypeServerSource::mappings.size();
555 }
556
countPrecompObjs()557 uint32_t TpiSource::countPrecompObjs() {
558 return PrecompSource::mappings.size();
559 }
560
clear()561 void TpiSource::clear() {
562 // Clean up any owned ghash allocations.
563 clearGHashes();
564 TpiSource::instances.clear();
565 TypeServerSource::mappings.clear();
566 PrecompSource::mappings.clear();
567 }
568
569 //===----------------------------------------------------------------------===//
570 // Parellel GHash type merging implementation.
571 //===----------------------------------------------------------------------===//
572
loadGHashes()573 void TpiSource::loadGHashes() {
574 if (Optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
575 ghashes = getHashesFromDebugH(*debugH);
576 ownedGHashes = false;
577 } else {
578 CVTypeArray types;
579 BinaryStreamReader reader(file->debugTypes, support::little);
580 cantFail(reader.readArray(types, reader.getLength()));
581 assignGHashesFromVector(GloballyHashedType::hashTypes(types));
582 }
583
584 fillIsItemIndexFromDebugT();
585 }
586
587 // Copies ghashes from a vector into an array. These are long lived, so it's
588 // worth the time to copy these into an appropriately sized vector to reduce
589 // memory usage.
assignGHashesFromVector(std::vector<GloballyHashedType> && hashVec)590 void TpiSource::assignGHashesFromVector(
591 std::vector<GloballyHashedType> &&hashVec) {
592 GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
593 memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
594 ghashes = makeArrayRef(hashes, hashVec.size());
595 ownedGHashes = true;
596 }
597
598 // Faster way to iterate type records. forEachTypeChecked is faster than
599 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
forEachTypeChecked(ArrayRef<uint8_t> types,function_ref<void (const CVType &)> fn)600 static void forEachTypeChecked(ArrayRef<uint8_t> types,
601 function_ref<void(const CVType &)> fn) {
602 checkError(
603 forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
604 fn(ty);
605 return Error::success();
606 }));
607 }
608
609 // Walk over file->debugTypes and fill in the isItemIndex bit vector.
610 // TODO: Store this information in .debug$H so that we don't have to recompute
611 // it. This is the main bottleneck slowing down parallel ghashing with one
612 // thread over single-threaded ghashing.
fillIsItemIndexFromDebugT()613 void TpiSource::fillIsItemIndexFromDebugT() {
614 uint32_t index = 0;
615 isItemIndex.resize(ghashes.size());
616 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
617 if (isIdRecord(ty.kind()))
618 isItemIndex.set(index);
619 ++index;
620 });
621 }
622
mergeTypeRecord(TypeIndex curIndex,CVType ty)623 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
624 // Decide if the merged type goes into TPI or IPI.
625 bool isItem = isIdRecord(ty.kind());
626 MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
627
628 // Copy the type into our mutable buffer.
629 assert(ty.length() <= codeview::MaxRecordLength);
630 size_t offset = merged.recs.size();
631 size_t newSize = alignTo(ty.length(), 4);
632 merged.recs.resize(offset + newSize);
633 auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize);
634 memcpy(newRec.data(), ty.data().data(), newSize);
635
636 // Fix up the record prefix and padding bytes if it required resizing.
637 if (newSize != ty.length()) {
638 reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
639 for (size_t i = ty.length(); i < newSize; ++i)
640 newRec[i] = LF_PAD0 + (newSize - i);
641 }
642
643 // Remap the type indices in the new record.
644 remapTypesInTypeRecord(newRec);
645 uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
646 merged.recSizes.push_back(static_cast<uint16_t>(newSize));
647 merged.recHashes.push_back(pdbHash);
648
649 // Retain a mapping from PDB function id to PDB function type. This mapping is
650 // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
651 // symbols.
652 if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
653 bool success = ty.length() >= 12;
654 TypeIndex funcId = curIndex;
655 if (success)
656 success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
657 TypeIndex funcType =
658 *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
659 if (success) {
660 funcIdToType.push_back({funcId, funcType});
661 } else {
662 StringRef fname = file ? file->getName() : "<unknown PDB>";
663 warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) +
664 " in " + fname);
665 }
666 }
667 }
668
mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,TypeIndex beginIndex)669 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
670 TypeIndex beginIndex) {
671 // Re-sort the list of unique types by index.
672 if (kind == PDB)
673 assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end()));
674 else
675 llvm::sort(uniqueTypes);
676
677 // Accumulate all the unique types into one buffer in mergedTypes.
678 uint32_t ghashIndex = 0;
679 auto nextUniqueIndex = uniqueTypes.begin();
680 assert(mergedTpi.recs.empty());
681 assert(mergedIpi.recs.empty());
682 forEachTypeChecked(typeRecords, [&](const CVType &ty) {
683 if (nextUniqueIndex != uniqueTypes.end() &&
684 *nextUniqueIndex == ghashIndex) {
685 mergeTypeRecord(beginIndex + ghashIndex, ty);
686 ++nextUniqueIndex;
687 }
688 ++ghashIndex;
689 });
690 assert(nextUniqueIndex == uniqueTypes.end() &&
691 "failed to merge all desired records");
692 assert(uniqueTypes.size() ==
693 mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
694 "missing desired record");
695 }
696
remapTpiWithGHashes(GHashState * g)697 void TpiSource::remapTpiWithGHashes(GHashState *g) {
698 assert(config->debugGHashes && "ghashes must be enabled");
699 fillMapFromGHashes(g, indexMapStorage);
700 tpiMap = indexMapStorage;
701 ipiMap = indexMapStorage;
702 mergeUniqueTypeRecords(file->debugTypes);
703 // TODO: Free all unneeded ghash resources now that we have a full index map.
704
705 if (config->showSummary) {
706 nbTypeRecords = ghashes.size();
707 nbTypeRecordsBytes = file->debugTypes.size();
708 }
709 }
710
711 // PDBs do not actually store global hashes, so when merging a type server
712 // PDB we have to synthesize global hashes. To do this, we first synthesize
713 // global hashes for the TPI stream, since it is independent, then we
714 // synthesize hashes for the IPI stream, using the hashes for the TPI stream
715 // as inputs.
loadGHashes()716 void TypeServerSource::loadGHashes() {
717 // Don't hash twice.
718 if (!ghashes.empty())
719 return;
720 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
721
722 // Hash TPI stream.
723 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
724 if (auto e = expectedTpi.takeError())
725 fatal("Type server does not have TPI stream: " + toString(std::move(e)));
726 assignGHashesFromVector(
727 GloballyHashedType::hashTypes(expectedTpi->typeArray()));
728 isItemIndex.resize(ghashes.size());
729
730 // Hash IPI stream, which depends on TPI ghashes.
731 if (!pdbFile.hasPDBIpiStream())
732 return;
733 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
734 if (auto e = expectedIpi.takeError())
735 fatal("error retrieving IPI stream: " + toString(std::move(e)));
736 ipiSrc->assignGHashesFromVector(
737 GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
738
739 // The IPI stream isItemIndex bitvector should be all ones.
740 ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
741 ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
742 }
743
744 // Flatten discontiguous PDB type arrays to bytes so that we can use
745 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
746 // type servers is faster than iterating all object files compiled with /Z7 with
747 // CVTypeArray, which has high overheads due to the virtual interface of
748 // BinaryStream::readBytes.
typeArrayToBytes(const CVTypeArray & types)749 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
750 BinaryStreamRef stream = types.getUnderlyingStream();
751 ArrayRef<uint8_t> debugTypes;
752 checkError(stream.readBytes(0, stream.getLength(), debugTypes));
753 return debugTypes;
754 }
755
756 // Merge types from a type server PDB.
remapTpiWithGHashes(GHashState * g)757 void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
758 assert(config->debugGHashes && "ghashes must be enabled");
759
760 // IPI merging depends on TPI, so do TPI first, then do IPI. No need to
761 // propagate errors, those should've been handled during ghash loading.
762 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
763 pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
764 fillMapFromGHashes(g, indexMapStorage);
765 tpiMap = indexMapStorage;
766 mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
767 if (pdbFile.hasPDBIpiStream()) {
768 pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
769 ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
770 ipiSrc->fillMapFromGHashes(g, ipiSrc->indexMapStorage);
771 ipiMap = ipiSrc->indexMapStorage;
772 ipiSrc->tpiMap = tpiMap;
773 ipiSrc->ipiMap = ipiMap;
774 ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
775
776 if (config->showSummary) {
777 nbTypeRecords = ipiSrc->ghashes.size();
778 nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
779 }
780 }
781
782 if (config->showSummary) {
783 nbTypeRecords += ghashes.size();
784 nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
785 }
786 }
787
remapTpiWithGHashes(GHashState * g)788 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
789 // No remapping to do with /Zi objects. Simply use the index map from the type
790 // server. Errors should have been reported earlier. Symbols from this object
791 // will be ignored.
792 Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
793 if (!maybeTsSrc) {
794 typeMergingError =
795 joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
796 return;
797 }
798 TypeServerSource *tsSrc = *maybeTsSrc;
799 tpiMap = tsSrc->tpiMap;
800 ipiMap = tsSrc->ipiMap;
801 }
802
loadGHashes()803 void PrecompSource::loadGHashes() {
804 if (getDebugH(file)) {
805 warn("ignoring .debug$H section; pch with ghash is not implemented");
806 }
807
808 uint32_t ghashIdx = 0;
809 std::vector<GloballyHashedType> hashVec;
810 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
811 // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
812 // the PDB. There must be an entry in the list of ghashes so that the type
813 // indexes of the following records in the /Yc PCH object line up.
814 if (ty.kind() == LF_ENDPRECOMP)
815 endPrecompGHashIdx = ghashIdx;
816
817 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
818 isItemIndex.push_back(isIdRecord(ty.kind()));
819 ++ghashIdx;
820 });
821 assignGHashesFromVector(std::move(hashVec));
822 }
823
loadGHashes()824 void UsePrecompSource::loadGHashes() {
825 PrecompSource *pchSrc = findPrecompSource(file, precompDependency);
826 if (!pchSrc)
827 return;
828
829 // To compute ghashes of a /Yu object file, we need to build on the the
830 // ghashes of the /Yc PCH object. After we are done hashing, discard the
831 // ghashes from the PCH source so we don't unnecessarily try to deduplicate
832 // them.
833 std::vector<GloballyHashedType> hashVec =
834 pchSrc->ghashes.take_front(precompDependency.getTypesCount());
835 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
836 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
837 isItemIndex.push_back(isIdRecord(ty.kind()));
838 });
839 hashVec.erase(hashVec.begin(),
840 hashVec.begin() + precompDependency.getTypesCount());
841 assignGHashesFromVector(std::move(hashVec));
842 }
843
remapTpiWithGHashes(GHashState * g)844 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
845 // This object was compiled with /Yu, so process the corresponding
846 // precompiled headers object (/Yc) first. Some type indices in the current
847 // object are referencing data in the precompiled headers object, so we need
848 // both to be loaded.
849 if (Error e = mergeInPrecompHeaderObj()) {
850 typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
851 return;
852 }
853
854 fillMapFromGHashes(g, indexMapStorage);
855 tpiMap = indexMapStorage;
856 ipiMap = indexMapStorage;
857 mergeUniqueTypeRecords(file->debugTypes,
858 TypeIndex(precompDependency.getStartTypeIndex() +
859 precompDependency.getTypesCount()));
860 if (config->showSummary) {
861 nbTypeRecords = ghashes.size();
862 nbTypeRecordsBytes = file->debugTypes.size();
863 }
864 }
865
866 namespace {
867 /// A concurrent hash table for global type hashing. It is based on this paper:
868 /// Concurrent Hash Tables: Fast and General(?)!
869 /// https://dl.acm.org/doi/10.1145/3309206
870 ///
871 /// This hash table is meant to be used in two phases:
872 /// 1. concurrent insertions
873 /// 2. concurrent reads
874 /// It does not support lookup, deletion, or rehashing. It uses linear probing.
875 ///
876 /// The paper describes storing a key-value pair in two machine words.
877 /// Generally, the values stored in this map are type indices, and we can use
878 /// those values to recover the ghash key from a side table. This allows us to
879 /// shrink the table entries further at the cost of some loads, and sidesteps
880 /// the need for a 128 bit atomic compare-and-swap operation.
881 ///
882 /// During insertion, a priority function is used to decide which insertion
883 /// should be preferred. This ensures that the output is deterministic. For
884 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
885 ///
886 class GHashCell;
887 struct GHashTable {
888 GHashCell *table = nullptr;
889 uint32_t tableSize = 0;
890
891 GHashTable() = default;
892 ~GHashTable();
893
894 /// Initialize the table with the given size. Because the table cannot be
895 /// resized, the initial size of the table must be large enough to contain all
896 /// inputs, or insertion may not be able to find an empty cell.
897 void init(uint32_t newTableSize);
898
899 /// Insert the cell with the given ghash into the table. Return the insertion
900 /// position in the table. It is safe for the caller to store the insertion
901 /// position because the table cannot be resized.
902 uint32_t insert(GloballyHashedType ghash, GHashCell newCell);
903 };
904
905 /// A ghash table cell for deduplicating types from TpiSources.
906 class GHashCell {
907 uint64_t data = 0;
908
909 public:
910 GHashCell() = default;
911
912 // Construct data most to least significant so that sorting works well:
913 // - isItem
914 // - tpiSrcIdx
915 // - ghashIdx
916 // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
917 // non-zero representation.
GHashCell(bool isItem,uint32_t tpiSrcIdx,uint32_t ghashIdx)918 GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
919 : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
920 ghashIdx) {
921 assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
922 assert(ghashIdx == getGHashIdx() && "round trip failure");
923 }
924
GHashCell(uint64_t data)925 explicit GHashCell(uint64_t data) : data(data) {}
926
927 // The empty cell is all zeros.
isEmpty() const928 bool isEmpty() const { return data == 0ULL; }
929
930 /// Extract the tpiSrcIdx.
getTpiSrcIdx() const931 uint32_t getTpiSrcIdx() const {
932 return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
933 }
934
935 /// Extract the index into the ghash array of the TpiSource.
getGHashIdx() const936 uint32_t getGHashIdx() const { return (uint32_t)data; }
937
isItem() const938 bool isItem() const { return data & (1ULL << 63U); }
939
940 /// Get the ghash key for this cell.
getGHash() const941 GloballyHashedType getGHash() const {
942 return TpiSource::instances[getTpiSrcIdx()]->ghashes[getGHashIdx()];
943 }
944
945 /// The priority function for the cell. The data is stored such that lower
946 /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
947 /// from earlier sources are more likely to prevail.
operator <(const GHashCell & l,const GHashCell & r)948 friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
949 return l.data < r.data;
950 }
951 };
952 } // namespace
953
954 namespace lld {
955 namespace coff {
956 /// This type is just a wrapper around GHashTable with external linkage so it
957 /// can be used from a header.
958 struct GHashState {
959 GHashTable table;
960 };
961 } // namespace coff
962 } // namespace lld
963
~GHashTable()964 GHashTable::~GHashTable() { delete[] table; }
965
init(uint32_t newTableSize)966 void GHashTable::init(uint32_t newTableSize) {
967 table = new GHashCell[newTableSize];
968 memset(table, 0, newTableSize * sizeof(GHashCell));
969 tableSize = newTableSize;
970 }
971
insert(GloballyHashedType ghash,GHashCell newCell)972 uint32_t GHashTable::insert(GloballyHashedType ghash, GHashCell newCell) {
973 assert(!newCell.isEmpty() && "cannot insert empty cell value");
974
975 // FIXME: The low bytes of SHA1 have low entropy for short records, which
976 // type records are. Swap the byte order for better entropy. A better ghash
977 // won't need this.
978 uint32_t startIdx =
979 ByteSwap_64(*reinterpret_cast<uint64_t *>(&ghash)) % tableSize;
980
981 // Do a linear probe starting at startIdx.
982 uint32_t idx = startIdx;
983 while (true) {
984 // Run a compare and swap loop. There are four cases:
985 // - cell is empty: CAS into place and return
986 // - cell has matching key, earlier priority: do nothing, return
987 // - cell has matching key, later priority: CAS into place and return
988 // - cell has non-matching key: hash collision, probe next cell
989 auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
990 GHashCell oldCell(cellPtr->load());
991 while (oldCell.isEmpty() || oldCell.getGHash() == ghash) {
992 // Check if there is an existing ghash entry with a higher priority
993 // (earlier ordering). If so, this is a duplicate, we are done.
994 if (!oldCell.isEmpty() && oldCell < newCell)
995 return idx;
996 // Either the cell is empty, or our value is higher priority. Try to
997 // compare and swap. If it succeeds, we are done.
998 if (cellPtr->compare_exchange_weak(oldCell, newCell))
999 return idx;
1000 // If the CAS failed, check this cell again.
1001 }
1002
1003 // Advance the probe. Wrap around to the beginning if we run off the end.
1004 ++idx;
1005 idx = idx == tableSize ? 0 : idx;
1006 if (idx == startIdx) {
1007 // If this becomes an issue, we could mark failure and rehash from the
1008 // beginning with a bigger table. There is no difference between rehashing
1009 // internally and starting over.
1010 report_fatal_error("ghash table is full");
1011 }
1012 }
1013 llvm_unreachable("left infloop");
1014 }
1015
TypeMerger(llvm::BumpPtrAllocator & alloc)1016 TypeMerger::TypeMerger(llvm::BumpPtrAllocator &alloc)
1017 : typeTable(alloc), idTable(alloc) {}
1018
1019 TypeMerger::~TypeMerger() = default;
1020
mergeTypesWithGHash()1021 void TypeMerger::mergeTypesWithGHash() {
1022 // Load ghashes. Do type servers and PCH objects first.
1023 {
1024 ScopedTimer t1(loadGHashTimer);
1025 parallelForEach(TpiSource::dependencySources,
1026 [&](TpiSource *source) { source->loadGHashes(); });
1027 parallelForEach(TpiSource::objectSources,
1028 [&](TpiSource *source) { source->loadGHashes(); });
1029 }
1030
1031 ScopedTimer t2(mergeGHashTimer);
1032 GHashState ghashState;
1033
1034 // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1035 // be larger than the number of unique types, or hash table insertion may not
1036 // be able to find a vacant slot. Summing the input types guarantees this, but
1037 // it is a gross overestimate. The table size could be reduced to save memory,
1038 // but it would require implementing rehashing, and this table is generally
1039 // small compared to total memory usage, at eight bytes per input type record,
1040 // and most input type records are larger than eight bytes.
1041 size_t tableSize = 0;
1042 for (TpiSource *source : TpiSource::instances)
1043 tableSize += source->ghashes.size();
1044
1045 // Cap the table size so that we can use 32-bit cell indices. Type indices are
1046 // also 32-bit, so this is an inherent PDB file format limit anyway.
1047 tableSize = std::min(size_t(INT32_MAX), tableSize);
1048 ghashState.table.init(static_cast<uint32_t>(tableSize));
1049
1050 // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1051 // the contents of the hash table cell, but we can remember the insertion
1052 // position. Because the table does not rehash, the position will not change
1053 // under insertion. After insertion is done, the value of the cell can be read
1054 // to retreive the final PDB type index.
1055 parallelForEachN(0, TpiSource::instances.size(), [&](size_t tpiSrcIdx) {
1056 TpiSource *source = TpiSource::instances[tpiSrcIdx];
1057 source->indexMapStorage.resize(source->ghashes.size());
1058 for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1059 if (source->shouldOmitFromPdb(i)) {
1060 source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1061 continue;
1062 }
1063 GloballyHashedType ghash = source->ghashes[i];
1064 bool isItem = source->isItemIndex.test(i);
1065 uint32_t cellIdx =
1066 ghashState.table.insert(ghash, GHashCell(isItem, tpiSrcIdx, i));
1067
1068 // Store the ghash cell index as a type index in indexMapStorage. Later
1069 // we will replace it with the PDB type index.
1070 source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1071 }
1072 });
1073
1074 // Collect all non-empty cells and sort them. This will implicitly assign
1075 // destination type indices, and partition the entries into type records and
1076 // item records. It arranges types in this order:
1077 // - type records
1078 // - source 0, type 0...
1079 // - source 1, type 1...
1080 // - item records
1081 // - source 0, type 1...
1082 // - source 1, type 0...
1083 std::vector<GHashCell> entries;
1084 for (const GHashCell &cell :
1085 makeArrayRef(ghashState.table.table, tableSize)) {
1086 if (!cell.isEmpty())
1087 entries.push_back(cell);
1088 }
1089 parallelSort(entries, std::less<GHashCell>());
1090 log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1091 tableSize ? double(entries.size()) / tableSize : 0,
1092 entries.size(), tableSize));
1093
1094 // Find out how many type and item indices there are.
1095 auto mid =
1096 std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0));
1097 assert((mid == entries.end() || mid->isItem()) &&
1098 (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1099 "midpoint is not midpoint");
1100 uint32_t numTypes = std::distance(entries.begin(), mid);
1101 uint32_t numItems = std::distance(mid, entries.end());
1102 log("Tpi record count: " + Twine(numTypes));
1103 log("Ipi record count: " + Twine(numItems));
1104
1105 // Make a list of the "unique" type records to merge for each tpi source. Type
1106 // merging will skip indices not on this list. Store the destination PDB type
1107 // index for these unique types in the tpiMap for each source. The entries for
1108 // non-unique types will be filled in prior to type merging.
1109 for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1110 auto &cell = entries[i];
1111 uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1112 TpiSource *source = TpiSource::instances[tpiSrcIdx];
1113 source->uniqueTypes.push_back(cell.getGHashIdx());
1114
1115 // Update the ghash table to store the destination PDB type index in the
1116 // table.
1117 uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1118 uint32_t ghashCellIndex =
1119 source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1120 ghashState.table.table[ghashCellIndex] =
1121 GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1122 }
1123
1124 // In parallel, remap all types.
1125 for_each(TpiSource::dependencySources, [&](TpiSource *source) {
1126 source->remapTpiWithGHashes(&ghashState);
1127 });
1128 parallelForEach(TpiSource::objectSources, [&](TpiSource *source) {
1129 source->remapTpiWithGHashes(&ghashState);
1130 });
1131
1132 // Build a global map of from function ID to function type.
1133 for (TpiSource *source : TpiSource::instances) {
1134 for (auto idToType : source->funcIdToType)
1135 funcIdToType.insert(idToType);
1136 source->funcIdToType.clear();
1137 }
1138
1139 TpiSource::clearGHashes();
1140 }
1141
1142 /// Given the index into the ghash table for a particular type, return the type
1143 /// index for that type in the output PDB.
loadPdbTypeIndexFromCell(GHashState * g,uint32_t ghashCellIdx)1144 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1145 uint32_t ghashCellIdx) {
1146 GHashCell cell = g->table.table[ghashCellIdx];
1147 return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1148 }
1149
1150 // Fill in a TPI or IPI index map using ghashes. For each source type, use its
1151 // ghash to lookup its final type index in the PDB, and store that in the map.
fillMapFromGHashes(GHashState * g,SmallVectorImpl<TypeIndex> & mapToFill)1152 void TpiSource::fillMapFromGHashes(GHashState *g,
1153 SmallVectorImpl<TypeIndex> &mapToFill) {
1154 for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1155 TypeIndex fakeCellIndex = indexMapStorage[i];
1156 if (fakeCellIndex.isSimple())
1157 mapToFill[i] = fakeCellIndex;
1158 else
1159 mapToFill[i] = loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
1160 }
1161 }
1162
clearGHashes()1163 void TpiSource::clearGHashes() {
1164 for (TpiSource *src : TpiSource::instances) {
1165 if (src->ownedGHashes)
1166 delete[] src->ghashes.data();
1167 src->ghashes = {};
1168 src->isItemIndex.clear();
1169 src->uniqueTypes.clear();
1170 }
1171 }
1172