1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/DebugInfo/MSF/MSFCommon.h"
14 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
15 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
16 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
17 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
18 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
19 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
20 #include "llvm/DebugInfo/PDB/Native/RawError.h"
21 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
22 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
23 #include "llvm/Support/BinaryStream.h"
24 #include "llvm/Support/BinaryStreamArray.h"
25 #include "llvm/Support/BinaryStreamReader.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/Path.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <cstdint>
32
33 using namespace llvm;
34 using namespace llvm::codeview;
35 using namespace llvm::msf;
36 using namespace llvm::pdb;
37
38 namespace {
39 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
40 } // end anonymous namespace
41
PDBFile(StringRef Path,std::unique_ptr<BinaryStream> PdbFileBuffer,BumpPtrAllocator & Allocator)42 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
43 BumpPtrAllocator &Allocator)
44 : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
45
46 PDBFile::~PDBFile() = default;
47
getFilePath() const48 StringRef PDBFile::getFilePath() const { return FilePath; }
49
getFileDirectory() const50 StringRef PDBFile::getFileDirectory() const {
51 return sys::path::parent_path(FilePath);
52 }
53
getBlockSize() const54 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
55
getFreeBlockMapBlock() const56 uint32_t PDBFile::getFreeBlockMapBlock() const {
57 return ContainerLayout.SB->FreeBlockMapBlock;
58 }
59
getBlockCount() const60 uint32_t PDBFile::getBlockCount() const {
61 return ContainerLayout.SB->NumBlocks;
62 }
63
getNumDirectoryBytes() const64 uint32_t PDBFile::getNumDirectoryBytes() const {
65 return ContainerLayout.SB->NumDirectoryBytes;
66 }
67
getBlockMapIndex() const68 uint32_t PDBFile::getBlockMapIndex() const {
69 return ContainerLayout.SB->BlockMapAddr;
70 }
71
getUnknown1() const72 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
73
getNumDirectoryBlocks() const74 uint32_t PDBFile::getNumDirectoryBlocks() const {
75 return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
76 ContainerLayout.SB->BlockSize);
77 }
78
getBlockMapOffset() const79 uint64_t PDBFile::getBlockMapOffset() const {
80 return (uint64_t)ContainerLayout.SB->BlockMapAddr *
81 ContainerLayout.SB->BlockSize;
82 }
83
getNumStreams() const84 uint32_t PDBFile::getNumStreams() const {
85 return ContainerLayout.StreamSizes.size();
86 }
87
getMaxStreamSize() const88 uint32_t PDBFile::getMaxStreamSize() const {
89 return *std::max_element(ContainerLayout.StreamSizes.begin(),
90 ContainerLayout.StreamSizes.end());
91 }
92
getStreamByteSize(uint32_t StreamIndex) const93 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
94 return ContainerLayout.StreamSizes[StreamIndex];
95 }
96
97 ArrayRef<support::ulittle32_t>
getStreamBlockList(uint32_t StreamIndex) const98 PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
99 return ContainerLayout.StreamMap[StreamIndex];
100 }
101
getFileSize() const102 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
103
getBlockData(uint32_t BlockIndex,uint32_t NumBytes) const104 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
105 uint32_t NumBytes) const {
106 uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
107
108 ArrayRef<uint8_t> Result;
109 if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
110 return std::move(EC);
111 return Result;
112 }
113
setBlockData(uint32_t BlockIndex,uint32_t Offset,ArrayRef<uint8_t> Data) const114 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
115 ArrayRef<uint8_t> Data) const {
116 return make_error<RawError>(raw_error_code::not_writable,
117 "PDBFile is immutable");
118 }
119
parseFileHeaders()120 Error PDBFile::parseFileHeaders() {
121 BinaryStreamReader Reader(*Buffer);
122
123 // Initialize SB.
124 const msf::SuperBlock *SB = nullptr;
125 if (auto EC = Reader.readObject(SB)) {
126 consumeError(std::move(EC));
127 return make_error<RawError>(raw_error_code::corrupt_file,
128 "Does not contain superblock");
129 }
130
131 if (auto EC = msf::validateSuperBlock(*SB))
132 return EC;
133
134 if (Buffer->getLength() % SB->BlockSize != 0)
135 return make_error<RawError>(raw_error_code::corrupt_file,
136 "File size is not a multiple of block size");
137 ContainerLayout.SB = SB;
138
139 // Initialize Free Page Map.
140 ContainerLayout.FreePageMap.resize(SB->NumBlocks);
141 // The Fpm exists either at block 1 or block 2 of the MSF. However, this
142 // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
143 // thusly an equal number of total blocks in the file. For a block size
144 // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
145 // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
146 // the Fpm is split across the file at `getBlockSize()` intervals. As a
147 // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
148 // for any non-negative integer k is an Fpm block. In theory, we only really
149 // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
150 // current versions of the MSF format already expect the Fpm to be arranged
151 // at getBlockSize() intervals, so we have to be compatible.
152 // See the function fpmPn() for more information:
153 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
154 auto FpmStream =
155 MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
156 BinaryStreamReader FpmReader(*FpmStream);
157 ArrayRef<uint8_t> FpmBytes;
158 if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
159 return EC;
160 uint32_t BlocksRemaining = getBlockCount();
161 uint32_t BI = 0;
162 for (auto Byte : FpmBytes) {
163 uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
164 for (uint32_t I = 0; I < BlocksThisByte; ++I) {
165 if (Byte & (1 << I))
166 ContainerLayout.FreePageMap[BI] = true;
167 --BlocksRemaining;
168 ++BI;
169 }
170 }
171
172 Reader.setOffset(getBlockMapOffset());
173 if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
174 getNumDirectoryBlocks()))
175 return EC;
176
177 return Error::success();
178 }
179
parseStreamData()180 Error PDBFile::parseStreamData() {
181 assert(ContainerLayout.SB);
182 if (DirectoryStream)
183 return Error::success();
184
185 uint32_t NumStreams = 0;
186
187 // Normally you can't use a MappedBlockStream without having fully parsed the
188 // PDB file, because it accesses the directory and various other things, which
189 // is exactly what we are attempting to parse. By specifying a custom
190 // subclass of IPDBStreamData which only accesses the fields that have already
191 // been parsed, we can avoid this and reuse MappedBlockStream.
192 auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
193 Allocator);
194 BinaryStreamReader Reader(*DS);
195 if (auto EC = Reader.readInteger(NumStreams))
196 return EC;
197
198 if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
199 return EC;
200 for (uint32_t I = 0; I < NumStreams; ++I) {
201 uint32_t StreamSize = getStreamByteSize(I);
202 // FIXME: What does StreamSize ~0U mean?
203 uint64_t NumExpectedStreamBlocks =
204 StreamSize == UINT32_MAX
205 ? 0
206 : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
207
208 // For convenience, we store the block array contiguously. This is because
209 // if someone calls setStreamMap(), it is more convenient to be able to call
210 // it with an ArrayRef instead of setting up a StreamRef. Since the
211 // DirectoryStream is cached in the class and thus lives for the life of the
212 // class, we can be guaranteed that readArray() will return a stable
213 // reference, even if it has to allocate from its internal pool.
214 ArrayRef<support::ulittle32_t> Blocks;
215 if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
216 return EC;
217 for (uint32_t Block : Blocks) {
218 uint64_t BlockEndOffset =
219 (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
220 if (BlockEndOffset > getFileSize())
221 return make_error<RawError>(raw_error_code::corrupt_file,
222 "Stream block map is corrupt.");
223 }
224 ContainerLayout.StreamMap.push_back(Blocks);
225 }
226
227 // We should have read exactly SB->NumDirectoryBytes bytes.
228 assert(Reader.bytesRemaining() == 0);
229 DirectoryStream = std::move(DS);
230 return Error::success();
231 }
232
getDirectoryBlockArray() const233 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
234 return ContainerLayout.DirectoryBlocks;
235 }
236
createIndexedStream(uint16_t SN)237 std::unique_ptr<MappedBlockStream> PDBFile::createIndexedStream(uint16_t SN) {
238 if (SN == kInvalidStreamIndex)
239 return nullptr;
240 return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
241 Allocator);
242 }
243
getStreamLayout(uint32_t StreamIdx) const244 MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
245 MSFStreamLayout Result;
246 auto Blocks = getStreamBlockList(StreamIdx);
247 Result.Blocks.assign(Blocks.begin(), Blocks.end());
248 Result.Length = getStreamByteSize(StreamIdx);
249 return Result;
250 }
251
getFpmStreamLayout() const252 msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
253 return msf::getFpmStreamLayout(ContainerLayout);
254 }
255
getPDBGlobalsStream()256 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
257 if (!Globals) {
258 auto DbiS = getPDBDbiStream();
259 if (!DbiS)
260 return DbiS.takeError();
261
262 auto GlobalS = safelyCreateIndexedStream(
263 ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
264 if (!GlobalS)
265 return GlobalS.takeError();
266 auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
267 if (auto EC = TempGlobals->reload())
268 return std::move(EC);
269 Globals = std::move(TempGlobals);
270 }
271 return *Globals;
272 }
273
getPDBInfoStream()274 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
275 if (!Info) {
276 auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
277 if (!InfoS)
278 return InfoS.takeError();
279 auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
280 if (auto EC = TempInfo->reload())
281 return std::move(EC);
282 Info = std::move(TempInfo);
283 }
284 return *Info;
285 }
286
getPDBDbiStream()287 Expected<DbiStream &> PDBFile::getPDBDbiStream() {
288 if (!Dbi) {
289 auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
290 if (!DbiS)
291 return DbiS.takeError();
292 auto TempDbi = llvm::make_unique<DbiStream>(std::move(*DbiS));
293 if (auto EC = TempDbi->reload(this))
294 return std::move(EC);
295 Dbi = std::move(TempDbi);
296 }
297 return *Dbi;
298 }
299
getPDBTpiStream()300 Expected<TpiStream &> PDBFile::getPDBTpiStream() {
301 if (!Tpi) {
302 auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
303 if (!TpiS)
304 return TpiS.takeError();
305 auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
306 if (auto EC = TempTpi->reload())
307 return std::move(EC);
308 Tpi = std::move(TempTpi);
309 }
310 return *Tpi;
311 }
312
getPDBIpiStream()313 Expected<TpiStream &> PDBFile::getPDBIpiStream() {
314 if (!Ipi) {
315 if (!hasPDBIpiStream())
316 return make_error<RawError>(raw_error_code::no_stream);
317
318 auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
319 if (!IpiS)
320 return IpiS.takeError();
321 auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
322 if (auto EC = TempIpi->reload())
323 return std::move(EC);
324 Ipi = std::move(TempIpi);
325 }
326 return *Ipi;
327 }
328
getPDBPublicsStream()329 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
330 if (!Publics) {
331 auto DbiS = getPDBDbiStream();
332 if (!DbiS)
333 return DbiS.takeError();
334
335 auto PublicS = safelyCreateIndexedStream(
336 ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
337 if (!PublicS)
338 return PublicS.takeError();
339 auto TempPublics = llvm::make_unique<PublicsStream>(std::move(*PublicS));
340 if (auto EC = TempPublics->reload())
341 return std::move(EC);
342 Publics = std::move(TempPublics);
343 }
344 return *Publics;
345 }
346
getPDBSymbolStream()347 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
348 if (!Symbols) {
349 auto DbiS = getPDBDbiStream();
350 if (!DbiS)
351 return DbiS.takeError();
352
353 uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
354 auto SymbolS =
355 safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
356 if (!SymbolS)
357 return SymbolS.takeError();
358
359 auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
360 if (auto EC = TempSymbols->reload())
361 return std::move(EC);
362 Symbols = std::move(TempSymbols);
363 }
364 return *Symbols;
365 }
366
getStringTable()367 Expected<PDBStringTable &> PDBFile::getStringTable() {
368 if (!Strings) {
369 auto IS = getPDBInfoStream();
370 if (!IS)
371 return IS.takeError();
372
373 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
374 if (!ExpectedNSI)
375 return ExpectedNSI.takeError();
376 uint32_t NameStreamIndex = *ExpectedNSI;
377
378 auto NS =
379 safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
380 if (!NS)
381 return NS.takeError();
382
383 auto N = llvm::make_unique<PDBStringTable>();
384 BinaryStreamReader Reader(**NS);
385 if (auto EC = N->reload(Reader))
386 return std::move(EC);
387 assert(Reader.bytesRemaining() == 0);
388 StringTableStream = std::move(*NS);
389 Strings = std::move(N);
390 }
391 return *Strings;
392 }
393
getPointerSize()394 uint32_t PDBFile::getPointerSize() {
395 auto DbiS = getPDBDbiStream();
396 if (!DbiS)
397 return 0;
398 PDB_Machine Machine = DbiS->getMachineType();
399 if (Machine == PDB_Machine::Amd64)
400 return 8;
401 return 4;
402 }
403
hasPDBDbiStream() const404 bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); }
405
hasPDBGlobalsStream()406 bool PDBFile::hasPDBGlobalsStream() {
407 auto DbiS = getPDBDbiStream();
408 if (!DbiS) {
409 consumeError(DbiS.takeError());
410 return false;
411 }
412
413 return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
414 }
415
hasPDBInfoStream() const416 bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
417
hasPDBIpiStream() const418 bool PDBFile::hasPDBIpiStream() const {
419 if (!hasPDBInfoStream())
420 return false;
421
422 if (StreamIPI >= getNumStreams())
423 return false;
424
425 auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
426 return InfoStream.containsIdStream();
427 }
428
hasPDBPublicsStream()429 bool PDBFile::hasPDBPublicsStream() {
430 auto DbiS = getPDBDbiStream();
431 if (!DbiS) {
432 consumeError(DbiS.takeError());
433 return false;
434 }
435 return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
436 }
437
hasPDBSymbolStream()438 bool PDBFile::hasPDBSymbolStream() {
439 auto DbiS = getPDBDbiStream();
440 if (!DbiS)
441 return false;
442 return DbiS->getSymRecordStreamIndex() < getNumStreams();
443 }
444
hasPDBTpiStream() const445 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
446
hasPDBStringTable()447 bool PDBFile::hasPDBStringTable() {
448 auto IS = getPDBInfoStream();
449 if (!IS)
450 return false;
451 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
452 if (!ExpectedNSI) {
453 consumeError(ExpectedNSI.takeError());
454 return false;
455 }
456 assert(*ExpectedNSI < getNumStreams());
457 return true;
458 }
459
460 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
461 /// stream with that index actually exists. If it does not, the return value
462 /// will have an MSFError with code msf_error_code::no_stream. Else, the return
463 /// value will contain the stream returned by createIndexedStream().
464 Expected<std::unique_ptr<MappedBlockStream>>
safelyCreateIndexedStream(const MSFLayout & Layout,BinaryStreamRef MsfData,uint32_t StreamIndex) const465 PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
466 BinaryStreamRef MsfData,
467 uint32_t StreamIndex) const {
468 if (StreamIndex >= getNumStreams())
469 return make_error<RawError>(raw_error_code::no_stream);
470 return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex,
471 Allocator);
472 }
473