1 //===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
11 #include "llvm/ADT/SmallString.h"
12 #include "llvm/ADT/StringExtras.h"
13 #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
14 #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
15 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
16 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
17 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
18 #include "llvm/Support/Error.h"
19
20 using namespace llvm;
21 using namespace llvm::codeview;
22
slotForIndex(TypeIndex Idx)23 static inline size_t slotForIndex(TypeIndex Idx) {
24 assert(!Idx.isSimple() && "simple type indices have no slots");
25 return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex;
26 }
27
28 namespace {
29
30 /// Implementation of CodeView type stream merging.
31 ///
32 /// A CodeView type stream is a series of records that reference each other
33 /// through type indices. A type index is either "simple", meaning it is less
34 /// than 0x1000 and refers to a builtin type, or it is complex, meaning it
35 /// refers to a prior type record in the current stream. The type index of a
36 /// record is equal to the number of records before it in the stream plus
37 /// 0x1000.
38 ///
39 /// Type records are only allowed to use type indices smaller than their own, so
40 /// a type stream is effectively a topologically sorted DAG. Cycles occuring in
41 /// the type graph of the source program are resolved with forward declarations
42 /// of composite types. This class implements the following type stream merging
43 /// algorithm, which relies on this DAG structure:
44 ///
45 /// - Begin with a new empty stream, and a new empty hash table that maps from
46 /// type record contents to new type index.
47 /// - For each new type stream, maintain a map from source type index to
48 /// destination type index.
49 /// - For each record, copy it and rewrite its type indices to be valid in the
50 /// destination type stream.
51 /// - If the new type record is not already present in the destination stream
52 /// hash table, append it to the destination type stream, assign it the next
53 /// type index, and update the two hash tables.
54 /// - If the type record already exists in the destination stream, discard it
55 /// and update the type index map to forward the source type index to the
56 /// existing destination type index.
57 ///
58 /// As an additional complication, type stream merging actually produces two
59 /// streams: an item (or IPI) stream and a type stream, as this is what is
60 /// actually stored in the final PDB. We choose which records go where by
61 /// looking at the record kind.
62 class TypeStreamMerger {
63 public:
TypeStreamMerger(SmallVectorImpl<TypeIndex> & SourceToDest)64 explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
65 : IndexMap(SourceToDest) {
66 SourceToDest.clear();
67 }
68
69 static const TypeIndex Untranslated;
70
71 // Local hashing entry points
72 Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
73 MergingTypeTableBuilder &DestTypes,
74 const CVTypeArray &IdsAndTypes);
75 Error mergeIdRecords(MergingTypeTableBuilder &Dest,
76 ArrayRef<TypeIndex> TypeSourceToDest,
77 const CVTypeArray &Ids);
78 Error mergeTypeRecords(MergingTypeTableBuilder &Dest,
79 const CVTypeArray &Types);
80
81 // Global hashing entry points
82 Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
83 GlobalTypeTableBuilder &DestTypes,
84 const CVTypeArray &IdsAndTypes,
85 ArrayRef<GloballyHashedType> Hashes);
86 Error mergeIdRecords(GlobalTypeTableBuilder &Dest,
87 ArrayRef<TypeIndex> TypeSourceToDest,
88 const CVTypeArray &Ids,
89 ArrayRef<GloballyHashedType> Hashes);
90 Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types,
91 ArrayRef<GloballyHashedType> Hashes);
92
93 private:
94 Error doit(const CVTypeArray &Types);
95
96 Error remapAllTypes(const CVTypeArray &Types);
97
98 Error remapType(const CVType &Type);
99
100 void addMapping(TypeIndex Idx);
101
remapTypeIndex(TypeIndex & Idx)102 inline bool remapTypeIndex(TypeIndex &Idx) {
103 // If we're mapping a pure index stream, then IndexMap only contains
104 // mappings from OldIdStream -> NewIdStream, in which case we will need to
105 // use the special mapping from OldTypeStream -> NewTypeStream which was
106 // computed externally. Regardless, we use this special map if and only if
107 // we are doing an id-only mapping.
108 if (!hasTypeStream())
109 return remapIndex(Idx, TypeLookup);
110
111 assert(TypeLookup.empty());
112 return remapIndex(Idx, IndexMap);
113 }
remapItemIndex(TypeIndex & Idx)114 inline bool remapItemIndex(TypeIndex &Idx) {
115 assert(hasIdStream());
116 return remapIndex(Idx, IndexMap);
117 }
118
hasTypeStream() const119 bool hasTypeStream() const {
120 return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream);
121 }
122
hasIdStream() const123 bool hasIdStream() const {
124 return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream);
125 }
126
127 ArrayRef<uint8_t> remapIndices(const CVType &OriginalType,
128 MutableArrayRef<uint8_t> Storage);
129
remapIndex(TypeIndex & Idx,ArrayRef<TypeIndex> Map)130 inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) {
131 if (LLVM_LIKELY(remapIndexSimple(Idx, Map)))
132 return true;
133
134 return remapIndexFallback(Idx, Map);
135 }
136
remapIndexSimple(TypeIndex & Idx,ArrayRef<TypeIndex> Map) const137 inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const {
138 // Simple types are unchanged.
139 if (Idx.isSimple())
140 return true;
141
142 // Check if this type index refers to a record we've already translated
143 // successfully. If it refers to a type later in the stream or a record we
144 // had to defer, defer it until later pass.
145 unsigned MapPos = slotForIndex(Idx);
146 if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated))
147 return false;
148
149 Idx = Map[MapPos];
150 return true;
151 }
152
153 bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map);
154
errorCorruptRecord() const155 Error errorCorruptRecord() const {
156 return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
157 }
158
159 Optional<Error> LastError;
160
161 bool UseGlobalHashes = false;
162
163 bool IsSecondPass = false;
164
165 unsigned NumBadIndices = 0;
166
167 TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex};
168
169 MergingTypeTableBuilder *DestIdStream = nullptr;
170 MergingTypeTableBuilder *DestTypeStream = nullptr;
171
172 GlobalTypeTableBuilder *DestGlobalIdStream = nullptr;
173 GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr;
174
175 ArrayRef<GloballyHashedType> GlobalHashes;
176
177 // If we're only mapping id records, this array contains the mapping for
178 // type records.
179 ArrayRef<TypeIndex> TypeLookup;
180
181 /// Map from source type index to destination type index. Indexed by source
182 /// type index minus 0x1000.
183 SmallVectorImpl<TypeIndex> &IndexMap;
184
185 /// Temporary storage that we use to copy a record's data while re-writing
186 /// its type indices.
187 SmallVector<uint8_t, 256> RemapStorage;
188 };
189
190 } // end anonymous namespace
191
192 const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
193
isIdRecord(TypeLeafKind K)194 static bool isIdRecord(TypeLeafKind K) {
195 switch (K) {
196 case TypeLeafKind::LF_FUNC_ID:
197 case TypeLeafKind::LF_MFUNC_ID:
198 case TypeLeafKind::LF_STRING_ID:
199 case TypeLeafKind::LF_SUBSTR_LIST:
200 case TypeLeafKind::LF_BUILDINFO:
201 case TypeLeafKind::LF_UDT_SRC_LINE:
202 case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
203 return true;
204 default:
205 return false;
206 }
207 }
208
addMapping(TypeIndex Idx)209 void TypeStreamMerger::addMapping(TypeIndex Idx) {
210 if (!IsSecondPass) {
211 assert(IndexMap.size() == slotForIndex(CurIndex) &&
212 "visitKnownRecord should add one index map entry");
213 IndexMap.push_back(Idx);
214 } else {
215 assert(slotForIndex(CurIndex) < IndexMap.size());
216 IndexMap[slotForIndex(CurIndex)] = Idx;
217 }
218 }
219
remapIndexFallback(TypeIndex & Idx,ArrayRef<TypeIndex> Map)220 bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx,
221 ArrayRef<TypeIndex> Map) {
222 size_t MapPos = slotForIndex(Idx);
223
224 // If this is the second pass and this index isn't in the map, then it points
225 // outside the current type stream, and this is a corrupt record.
226 if (IsSecondPass && MapPos >= Map.size()) {
227 // FIXME: Print a more useful error. We can give the current record and the
228 // index that we think its pointing to.
229 if (LastError)
230 LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
231 else
232 LastError = errorCorruptRecord();
233 }
234
235 ++NumBadIndices;
236
237 // This type index is invalid. Remap this to "not translated by cvpack",
238 // and return failure.
239 Idx = Untranslated;
240 return false;
241 }
242
243 // Local hashing entry points
mergeTypeRecords(MergingTypeTableBuilder & Dest,const CVTypeArray & Types)244 Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest,
245 const CVTypeArray &Types) {
246 DestTypeStream = &Dest;
247 UseGlobalHashes = false;
248
249 return doit(Types);
250 }
251
mergeIdRecords(MergingTypeTableBuilder & Dest,ArrayRef<TypeIndex> TypeSourceToDest,const CVTypeArray & Ids)252 Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest,
253 ArrayRef<TypeIndex> TypeSourceToDest,
254 const CVTypeArray &Ids) {
255 DestIdStream = &Dest;
256 TypeLookup = TypeSourceToDest;
257 UseGlobalHashes = false;
258
259 return doit(Ids);
260 }
261
mergeTypesAndIds(MergingTypeTableBuilder & DestIds,MergingTypeTableBuilder & DestTypes,const CVTypeArray & IdsAndTypes)262 Error TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
263 MergingTypeTableBuilder &DestTypes,
264 const CVTypeArray &IdsAndTypes) {
265 DestIdStream = &DestIds;
266 DestTypeStream = &DestTypes;
267 UseGlobalHashes = false;
268 return doit(IdsAndTypes);
269 }
270
271 // Global hashing entry points
mergeTypeRecords(GlobalTypeTableBuilder & Dest,const CVTypeArray & Types,ArrayRef<GloballyHashedType> Hashes)272 Error TypeStreamMerger::mergeTypeRecords(GlobalTypeTableBuilder &Dest,
273 const CVTypeArray &Types,
274 ArrayRef<GloballyHashedType> Hashes) {
275 DestGlobalTypeStream = &Dest;
276 UseGlobalHashes = true;
277 GlobalHashes = Hashes;
278
279 return doit(Types);
280 }
281
mergeIdRecords(GlobalTypeTableBuilder & Dest,ArrayRef<TypeIndex> TypeSourceToDest,const CVTypeArray & Ids,ArrayRef<GloballyHashedType> Hashes)282 Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest,
283 ArrayRef<TypeIndex> TypeSourceToDest,
284 const CVTypeArray &Ids,
285 ArrayRef<GloballyHashedType> Hashes) {
286 DestGlobalIdStream = &Dest;
287 TypeLookup = TypeSourceToDest;
288 UseGlobalHashes = true;
289 GlobalHashes = Hashes;
290
291 return doit(Ids);
292 }
293
mergeTypesAndIds(GlobalTypeTableBuilder & DestIds,GlobalTypeTableBuilder & DestTypes,const CVTypeArray & IdsAndTypes,ArrayRef<GloballyHashedType> Hashes)294 Error TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
295 GlobalTypeTableBuilder &DestTypes,
296 const CVTypeArray &IdsAndTypes,
297 ArrayRef<GloballyHashedType> Hashes) {
298 DestGlobalIdStream = &DestIds;
299 DestGlobalTypeStream = &DestTypes;
300 UseGlobalHashes = true;
301 GlobalHashes = Hashes;
302 return doit(IdsAndTypes);
303 }
304
doit(const CVTypeArray & Types)305 Error TypeStreamMerger::doit(const CVTypeArray &Types) {
306 if (auto EC = remapAllTypes(Types))
307 return EC;
308
309 // If we found bad indices but no other errors, try doing another pass and see
310 // if we can resolve the indices that weren't in the map on the first pass.
311 // This may require multiple passes, but we should always make progress. MASM
312 // is the only known CodeView producer that makes type streams that aren't
313 // topologically sorted. The standard library contains MASM-produced objects,
314 // so this is important to handle correctly, but we don't have to be too
315 // efficient. MASM type streams are usually very small.
316 while (!LastError && NumBadIndices > 0) {
317 unsigned BadIndicesRemaining = NumBadIndices;
318 IsSecondPass = true;
319 NumBadIndices = 0;
320 CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex);
321
322 if (auto EC = remapAllTypes(Types))
323 return EC;
324
325 assert(NumBadIndices <= BadIndicesRemaining &&
326 "second pass found more bad indices");
327 if (!LastError && NumBadIndices == BadIndicesRemaining) {
328 return llvm::make_error<CodeViewError>(
329 cv_error_code::corrupt_record, "input type graph contains cycles");
330 }
331 }
332
333 if (LastError)
334 return std::move(*LastError);
335 return Error::success();
336 }
337
remapAllTypes(const CVTypeArray & Types)338 Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
339 BinaryStreamRef Stream = Types.getUnderlyingStream();
340 ArrayRef<uint8_t> Buffer;
341 cantFail(Stream.readBytes(0, Stream.getLength(), Buffer));
342
343 return forEachCodeViewRecord<CVType>(
344 Buffer, [this](const CVType &T) { return remapType(T); });
345 }
346
remapType(const CVType & Type)347 Error TypeStreamMerger::remapType(const CVType &Type) {
348 auto DoSerialize =
349 [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
350 return remapIndices(Type, Storage);
351 };
352
353 TypeIndex DestIdx = Untranslated;
354 if (LLVM_LIKELY(UseGlobalHashes)) {
355 GlobalTypeTableBuilder &Dest =
356 isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
357 GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
358 DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize);
359 } else {
360 MergingTypeTableBuilder &Dest =
361 isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
362
363 RemapStorage.resize(Type.RecordData.size());
364 ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
365 if (!Result.empty())
366 DestIdx = Dest.insertRecordBytes(Result);
367 }
368 addMapping(DestIdx);
369
370 ++CurIndex;
371 assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) &&
372 "visitKnownRecord should add one index map entry");
373 return Error::success();
374 }
375
376 ArrayRef<uint8_t>
remapIndices(const CVType & OriginalType,MutableArrayRef<uint8_t> Storage)377 TypeStreamMerger::remapIndices(const CVType &OriginalType,
378 MutableArrayRef<uint8_t> Storage) {
379 SmallVector<TiReference, 4> Refs;
380 discoverTypeIndices(OriginalType.RecordData, Refs);
381 if (Refs.empty())
382 return OriginalType.RecordData;
383
384 ::memcpy(Storage.data(), OriginalType.RecordData.data(),
385 OriginalType.RecordData.size());
386
387 uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix);
388
389 for (auto &Ref : Refs) {
390 TypeIndex *DestTIs =
391 reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset);
392
393 for (size_t I = 0; I < Ref.Count; ++I) {
394 TypeIndex &TI = DestTIs[I];
395 bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI)
396 : remapTypeIndex(TI);
397 if (LLVM_UNLIKELY(!Success))
398 return {};
399 }
400 }
401 return Storage;
402 }
403
mergeTypeRecords(MergingTypeTableBuilder & Dest,SmallVectorImpl<TypeIndex> & SourceToDest,const CVTypeArray & Types)404 Error llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder &Dest,
405 SmallVectorImpl<TypeIndex> &SourceToDest,
406 const CVTypeArray &Types) {
407 TypeStreamMerger M(SourceToDest);
408 return M.mergeTypeRecords(Dest, Types);
409 }
410
mergeIdRecords(MergingTypeTableBuilder & Dest,ArrayRef<TypeIndex> TypeSourceToDest,SmallVectorImpl<TypeIndex> & SourceToDest,const CVTypeArray & Ids)411 Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest,
412 ArrayRef<TypeIndex> TypeSourceToDest,
413 SmallVectorImpl<TypeIndex> &SourceToDest,
414 const CVTypeArray &Ids) {
415 TypeStreamMerger M(SourceToDest);
416 return M.mergeIdRecords(Dest, TypeSourceToDest, Ids);
417 }
418
mergeTypeAndIdRecords(MergingTypeTableBuilder & DestIds,MergingTypeTableBuilder & DestTypes,SmallVectorImpl<TypeIndex> & SourceToDest,const CVTypeArray & IdsAndTypes)419 Error llvm::codeview::mergeTypeAndIdRecords(
420 MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes,
421 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes) {
422 TypeStreamMerger M(SourceToDest);
423 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes);
424 }
425
mergeTypeAndIdRecords(GlobalTypeTableBuilder & DestIds,GlobalTypeTableBuilder & DestTypes,SmallVectorImpl<TypeIndex> & SourceToDest,const CVTypeArray & IdsAndTypes,ArrayRef<GloballyHashedType> Hashes)426 Error llvm::codeview::mergeTypeAndIdRecords(
427 GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
428 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
429 ArrayRef<GloballyHashedType> Hashes) {
430 TypeStreamMerger M(SourceToDest);
431 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes);
432 }
433
mergeTypeRecords(GlobalTypeTableBuilder & Dest,SmallVectorImpl<TypeIndex> & SourceToDest,const CVTypeArray & Types,ArrayRef<GloballyHashedType> Hashes)434 Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest,
435 SmallVectorImpl<TypeIndex> &SourceToDest,
436 const CVTypeArray &Types,
437 ArrayRef<GloballyHashedType> Hashes) {
438 TypeStreamMerger M(SourceToDest);
439 return M.mergeTypeRecords(Dest, Types, Hashes);
440 }
441
mergeIdRecords(GlobalTypeTableBuilder & Dest,ArrayRef<TypeIndex> Types,SmallVectorImpl<TypeIndex> & SourceToDest,const CVTypeArray & Ids,ArrayRef<GloballyHashedType> Hashes)442 Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest,
443 ArrayRef<TypeIndex> Types,
444 SmallVectorImpl<TypeIndex> &SourceToDest,
445 const CVTypeArray &Ids,
446 ArrayRef<GloballyHashedType> Hashes) {
447 TypeStreamMerger M(SourceToDest);
448 return M.mergeIdRecords(Dest, Types, Ids, Hashes);
449 }
450