1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
11
12 #include "llvm/DebugInfo/CodeView/RecordName.h"
13 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
14 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
15 #include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
16 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
17 #include "llvm/DebugInfo/MSF/MSFCommon.h"
18 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
19 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
20 #include "llvm/DebugInfo/PDB/Native/Hash.h"
21 #include "llvm/Support/BinaryItemStream.h"
22 #include "llvm/Support/BinaryStreamWriter.h"
23 #include <algorithm>
24 #include <vector>
25
26 using namespace llvm;
27 using namespace llvm::msf;
28 using namespace llvm::pdb;
29 using namespace llvm::codeview;
30
31 struct llvm::pdb::GSIHashStreamBuilder {
32 std::vector<CVSymbol> Records;
33 uint32_t StreamIndex;
34 std::vector<PSHashRecord> HashRecords;
35 std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
36 std::vector<support::ulittle32_t> HashBuckets;
37
38 uint32_t calculateSerializedLength() const;
39 uint32_t calculateRecordByteSize() const;
40 Error commit(BinaryStreamWriter &Writer);
41 void finalizeBuckets(uint32_t RecordZeroOffset);
42
addSymbolllvm::pdb::GSIHashStreamBuilder43 template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) {
44 T Copy(Symbol);
45 Records.push_back(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
46 CodeViewContainer::Pdb));
47 }
addSymbolllvm::pdb::GSIHashStreamBuilder48 void addSymbol(const CVSymbol &Symbol) { Records.push_back(Symbol); }
49 };
50
calculateSerializedLength() const51 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
52 uint32_t Size = sizeof(GSIHashHeader);
53 Size += HashRecords.size() * sizeof(PSHashRecord);
54 Size += HashBitmap.size() * sizeof(uint32_t);
55 Size += HashBuckets.size() * sizeof(uint32_t);
56 return Size;
57 }
58
calculateRecordByteSize() const59 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const {
60 uint32_t Size = 0;
61 for (const auto &Sym : Records)
62 Size += Sym.length();
63 return Size;
64 }
65
commit(BinaryStreamWriter & Writer)66 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
67 GSIHashHeader Header;
68 Header.VerSignature = GSIHashHeader::HdrSignature;
69 Header.VerHdr = GSIHashHeader::HdrVersion;
70 Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
71 Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;
72
73 if (auto EC = Writer.writeObject(Header))
74 return EC;
75
76 if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
77 return EC;
78 if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
79 return EC;
80 if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
81 return EC;
82 return Error::success();
83 }
84
isAsciiString(StringRef S)85 static bool isAsciiString(StringRef S) {
86 return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
87 }
88
89 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
gsiRecordLess(StringRef S1,StringRef S2)90 static bool gsiRecordLess(StringRef S1, StringRef S2) {
91 size_t LS = S1.size();
92 size_t RS = S2.size();
93 // Shorter strings always compare less than longer strings.
94 if (LS != RS)
95 return LS < RS;
96
97 // If either string contains non ascii characters, memcmp them.
98 if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
99 return memcmp(S1.data(), S2.data(), LS) < 0;
100
101 // Both strings are ascii, perform a case-insenstive comparison.
102 return S1.compare_lower(S2.data()) < 0;
103 }
104
finalizeBuckets(uint32_t RecordZeroOffset)105 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
106 std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
107 TmpBuckets;
108 uint32_t SymOffset = RecordZeroOffset;
109 for (const CVSymbol &Sym : Records) {
110 PSHashRecord HR;
111 // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs.
112 HR.Off = SymOffset + 1;
113 HR.CRef = 1; // Always use a refcount of 1.
114
115 // Hash the name to figure out which bucket this goes into.
116 StringRef Name = getSymbolName(Sym);
117 size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
118 TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR));
119 SymOffset += Sym.length();
120 }
121
122 // Compute the three tables: the hash records in bucket and chain order, the
123 // bucket presence bitmap, and the bucket chain start offsets.
124 HashRecords.reserve(Records.size());
125 for (ulittle32_t &Word : HashBitmap)
126 Word = 0;
127 for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) {
128 auto &Bucket = TmpBuckets[BucketIdx];
129 if (Bucket.empty())
130 continue;
131 HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32);
132
133 // Calculate what the offset of the first hash record in the chain would
134 // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
135 // each record would be 12 bytes. See HROffsetCalc in gsi.h.
136 const int SizeOfHROffsetCalc = 12;
137 ulittle32_t ChainStartOff =
138 ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
139 HashBuckets.push_back(ChainStartOff);
140
141 // Sort each bucket by memcmp of the symbol's name. It's important that
142 // we use the same sorting algorithm as is used by the reference
143 // implementation to ensure that the search for a record within a bucket
144 // can properly early-out when it detects the record won't be found. The
145 // algorithm used here corredsponds to the function
146 // caseInsensitiveComparePchPchCchCch in the reference implementation.
147 llvm::sort(Bucket.begin(), Bucket.end(),
148 [](const std::pair<StringRef, PSHashRecord> &Left,
149 const std::pair<StringRef, PSHashRecord> &Right) {
150 return gsiRecordLess(Left.first, Right.first);
151 });
152
153 for (const auto &Entry : Bucket)
154 HashRecords.push_back(Entry.second);
155 }
156 }
157
GSIStreamBuilder(msf::MSFBuilder & Msf)158 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
159 : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()),
160 GSH(llvm::make_unique<GSIHashStreamBuilder>()) {}
161
~GSIStreamBuilder()162 GSIStreamBuilder::~GSIStreamBuilder() {}
163
calculatePublicsHashStreamSize() const164 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
165 uint32_t Size = 0;
166 Size += sizeof(PublicsStreamHeader);
167 Size += PSH->calculateSerializedLength();
168 Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap
169 // FIXME: Add thunk map and section offsets for incremental linking.
170
171 return Size;
172 }
173
calculateGlobalsHashStreamSize() const174 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
175 return GSH->calculateSerializedLength();
176 }
177
finalizeMsfLayout()178 Error GSIStreamBuilder::finalizeMsfLayout() {
179 // First we write public symbol records, then we write global symbol records.
180 uint32_t PSHZero = 0;
181 uint32_t GSHZero = PSH->calculateRecordByteSize();
182
183 PSH->finalizeBuckets(PSHZero);
184 GSH->finalizeBuckets(GSHZero);
185
186 Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
187 if (!Idx)
188 return Idx.takeError();
189 GSH->StreamIndex = *Idx;
190 Idx = Msf.addStream(calculatePublicsHashStreamSize());
191 if (!Idx)
192 return Idx.takeError();
193 PSH->StreamIndex = *Idx;
194
195 uint32_t RecordBytes =
196 GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize();
197
198 Idx = Msf.addStream(RecordBytes);
199 if (!Idx)
200 return Idx.takeError();
201 RecordStreamIdx = *Idx;
202 return Error::success();
203 }
204
comparePubSymByAddrAndName(const std::pair<const CVSymbol *,const PublicSym32 * > & LS,const std::pair<const CVSymbol *,const PublicSym32 * > & RS)205 static bool comparePubSymByAddrAndName(
206 const std::pair<const CVSymbol *, const PublicSym32 *> &LS,
207 const std::pair<const CVSymbol *, const PublicSym32 *> &RS) {
208 if (LS.second->Segment != RS.second->Segment)
209 return LS.second->Segment < RS.second->Segment;
210 if (LS.second->Offset != RS.second->Offset)
211 return LS.second->Offset < RS.second->Offset;
212
213 return LS.second->Name < RS.second->Name;
214 }
215
216 /// Compute the address map. The address map is an array of symbol offsets
217 /// sorted so that it can be binary searched by address.
computeAddrMap(ArrayRef<CVSymbol> Records)218 static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
219 // Make a vector of pointers to the symbols so we can sort it by address.
220 // Also gather the symbol offsets while we're at it.
221
222 std::vector<PublicSym32> DeserializedPublics;
223 std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr;
224 std::vector<uint32_t> SymOffsets;
225 DeserializedPublics.reserve(Records.size());
226 PublicsByAddr.reserve(Records.size());
227 SymOffsets.reserve(Records.size());
228
229 uint32_t SymOffset = 0;
230 for (const CVSymbol &Sym : Records) {
231 assert(Sym.kind() == SymbolKind::S_PUB32);
232 DeserializedPublics.push_back(
233 cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym)));
234 PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back());
235 SymOffsets.push_back(SymOffset);
236 SymOffset += Sym.length();
237 }
238 std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(),
239 comparePubSymByAddrAndName);
240
241 // Fill in the symbol offsets in the appropriate order.
242 std::vector<ulittle32_t> AddrMap;
243 AddrMap.reserve(Records.size());
244 for (auto &Sym : PublicsByAddr) {
245 ptrdiff_t Idx = std::distance(Records.data(), Sym.first);
246 assert(Idx >= 0 && size_t(Idx) < Records.size());
247 AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
248 }
249 return AddrMap;
250 }
251
getPublicsStreamIndex() const252 uint32_t GSIStreamBuilder::getPublicsStreamIndex() const {
253 return PSH->StreamIndex;
254 }
255
getGlobalsStreamIndex() const256 uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const {
257 return GSH->StreamIndex;
258 }
259
addPublicSymbol(const PublicSym32 & Pub)260 void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) {
261 PSH->addSymbol(Pub, Msf);
262 }
263
addGlobalSymbol(const ProcRefSym & Sym)264 void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) {
265 GSH->addSymbol(Sym, Msf);
266 }
267
addGlobalSymbol(const DataSym & Sym)268 void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) {
269 GSH->addSymbol(Sym, Msf);
270 }
271
addGlobalSymbol(const ConstantSym & Sym)272 void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
273 GSH->addSymbol(Sym, Msf);
274 }
275
addGlobalSymbol(const UDTSym & Sym)276 void GSIStreamBuilder::addGlobalSymbol(const UDTSym &Sym) {
277 GSH->addSymbol(Sym, Msf);
278 }
279
addGlobalSymbol(const codeview::CVSymbol & Sym)280 void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) {
281 GSH->addSymbol(Sym);
282 }
283
writeRecords(BinaryStreamWriter & Writer,ArrayRef<CVSymbol> Records)284 static Error writeRecords(BinaryStreamWriter &Writer,
285 ArrayRef<CVSymbol> Records) {
286 BinaryItemStream<CVSymbol> ItemStream(support::endianness::little);
287 ItemStream.setItems(Records);
288 BinaryStreamRef RecordsRef(ItemStream);
289 return Writer.writeStreamRef(RecordsRef);
290 }
291
commitSymbolRecordStream(WritableBinaryStreamRef Stream)292 Error GSIStreamBuilder::commitSymbolRecordStream(
293 WritableBinaryStreamRef Stream) {
294 BinaryStreamWriter Writer(Stream);
295
296 // Write public symbol records first, followed by global symbol records. This
297 // must match the order that we assume in finalizeMsfLayout when computing
298 // PSHZero and GSHZero.
299 if (auto EC = writeRecords(Writer, PSH->Records))
300 return EC;
301 if (auto EC = writeRecords(Writer, GSH->Records))
302 return EC;
303
304 return Error::success();
305 }
306
commitPublicsHashStream(WritableBinaryStreamRef Stream)307 Error GSIStreamBuilder::commitPublicsHashStream(
308 WritableBinaryStreamRef Stream) {
309 BinaryStreamWriter Writer(Stream);
310 PublicsStreamHeader Header;
311
312 // FIXME: Fill these in. They are for incremental linking.
313 Header.NumThunks = 0;
314 Header.SizeOfThunk = 0;
315 Header.ISectThunkTable = 0;
316 Header.OffThunkTable = 0;
317 Header.NumSections = 0;
318 Header.SymHash = PSH->calculateSerializedLength();
319 Header.AddrMap = PSH->Records.size() * 4;
320 if (auto EC = Writer.writeObject(Header))
321 return EC;
322
323 if (auto EC = PSH->commit(Writer))
324 return EC;
325
326 std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records);
327 if (auto EC = Writer.writeArray(makeArrayRef(AddrMap)))
328 return EC;
329
330 return Error::success();
331 }
332
commitGlobalsHashStream(WritableBinaryStreamRef Stream)333 Error GSIStreamBuilder::commitGlobalsHashStream(
334 WritableBinaryStreamRef Stream) {
335 BinaryStreamWriter Writer(Stream);
336 return GSH->commit(Writer);
337 }
338
commit(const msf::MSFLayout & Layout,WritableBinaryStreamRef Buffer)339 Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
340 WritableBinaryStreamRef Buffer) {
341 auto GS = WritableMappedBlockStream::createIndexedStream(
342 Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
343 auto PS = WritableMappedBlockStream::createIndexedStream(
344 Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
345 auto PRS = WritableMappedBlockStream::createIndexedStream(
346 Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator());
347
348 if (auto EC = commitSymbolRecordStream(*PRS))
349 return EC;
350 if (auto EC = commitGlobalsHashStream(*GS))
351 return EC;
352 if (auto EC = commitPublicsHashStream(*PS))
353 return EC;
354 return Error::success();
355 }
356