1 //===- TypeHashing.h ---------------------------------------------*- C++-*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H 11 #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H 12 13 #include "llvm/ADT/DenseMapInfo.h" 14 #include "llvm/ADT/Hashing.h" 15 16 #include "llvm/DebugInfo/CodeView/CodeView.h" 17 #include "llvm/DebugInfo/CodeView/TypeCollection.h" 18 #include "llvm/DebugInfo/CodeView/TypeIndex.h" 19 20 #include "llvm/Support/FormatProviders.h" 21 22 #include <type_traits> 23 24 namespace llvm { 25 namespace codeview { 26 27 /// A locally hashed type represents a straightforward hash code of a serialized 28 /// record. The record is simply serialized, and then the bytes are hashed by 29 /// a standard algorithm. This is sufficient for the case of de-duplicating 30 /// records within a single sequence of types, because if two records both have 31 /// a back-reference to the same type in the same stream, they will both have 32 /// the same numeric value for the TypeIndex of the back reference. 33 struct LocallyHashedType { 34 hash_code Hash; 35 ArrayRef<uint8_t> RecordData; 36 37 /// Given a type, compute its local hash. 38 static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData); 39 40 /// Given a sequence of types, compute all of the local hashes. 41 template <typename Range> hashTypesLocallyHashedType42 static std::vector<LocallyHashedType> hashTypes(Range &&Records) { 43 std::vector<LocallyHashedType> Hashes; 44 Hashes.reserve(std::distance(std::begin(Records), std::end(Records))); 45 for (const auto &R : Records) 46 Hashes.push_back(hashType(R)); 47 48 return Hashes; 49 } 50 51 static std::vector<LocallyHashedType> hashTypeCollectionLocallyHashedType52 hashTypeCollection(TypeCollection &Types) { 53 std::vector<LocallyHashedType> Hashes; 54 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { 55 Hashes.push_back(hashType(Type.RecordData)); 56 }); 57 return Hashes; 58 } 59 }; 60 61 enum class GlobalTypeHashAlg : uint16_t { 62 SHA1 = 0, // standard 20-byte SHA1 hash 63 SHA1_8 // last 8-bytes of standard SHA1 hash 64 }; 65 66 /// A globally hashed type represents a hash value that is sufficient to 67 /// uniquely identify a record across multiple type streams or type sequences. 68 /// This works by, for any given record A which references B, replacing the 69 /// TypeIndex that refers to B with a previously-computed global hash for B. As 70 /// this is a recursive algorithm (e.g. the global hash of B also depends on the 71 /// global hashes of the types that B refers to), a global hash can uniquely 72 /// identify identify that A occurs in another stream that has a completely 73 /// different graph structure. Although the hash itself is slower to compute, 74 /// probing is much faster with a globally hashed type, because the hash itself 75 /// is considered "as good as" the original type. Since type records can be 76 /// quite large, this makes the equality comparison of the hash much faster than 77 /// equality comparison of a full record. 78 struct GloballyHashedType { 79 GloballyHashedType() = default; GloballyHashedTypeGloballyHashedType80 GloballyHashedType(StringRef H) 81 : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {} GloballyHashedTypeGloballyHashedType82 GloballyHashedType(ArrayRef<uint8_t> H) { 83 assert(H.size() == 8); 84 ::memcpy(Hash.data(), H.data(), 8); 85 } 86 std::array<uint8_t, 8> Hash; 87 88 /// Given a sequence of bytes representing a record, compute a global hash for 89 /// this record. Due to the nature of global hashes incorporating the hashes 90 /// of referenced records, this function requires a list of types and ids 91 /// that RecordData might reference, indexable by TypeIndex. 92 static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData, 93 ArrayRef<GloballyHashedType> PreviousTypes, 94 ArrayRef<GloballyHashedType> PreviousIds); 95 96 /// Given a sequence of bytes representing a record, compute a global hash for 97 /// this record. Due to the nature of global hashes incorporating the hashes 98 /// of referenced records, this function requires a list of types and ids 99 /// that RecordData might reference, indexable by TypeIndex. hashTypeGloballyHashedType100 static GloballyHashedType hashType(CVType Type, 101 ArrayRef<GloballyHashedType> PreviousTypes, 102 ArrayRef<GloballyHashedType> PreviousIds) { 103 return hashType(Type.RecordData, PreviousTypes, PreviousIds); 104 } 105 106 /// Given a sequence of combined type and ID records, compute global hashes 107 /// for each of them, returning the results in a vector of hashed types. 108 template <typename Range> hashTypesGloballyHashedType109 static std::vector<GloballyHashedType> hashTypes(Range &&Records) { 110 std::vector<GloballyHashedType> Hashes; 111 for (const auto &R : Records) 112 Hashes.push_back(hashType(R, Hashes, Hashes)); 113 114 return Hashes; 115 } 116 117 /// Given a sequence of combined type and ID records, compute global hashes 118 /// for each of them, returning the results in a vector of hashed types. 119 template <typename Range> 120 static std::vector<GloballyHashedType> hashIdsGloballyHashedType121 hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) { 122 std::vector<GloballyHashedType> IdHashes; 123 for (const auto &R : Records) 124 IdHashes.push_back(hashType(R, TypeHashes, IdHashes)); 125 126 return IdHashes; 127 } 128 129 static std::vector<GloballyHashedType> hashTypeCollectionGloballyHashedType130 hashTypeCollection(TypeCollection &Types) { 131 std::vector<GloballyHashedType> Hashes; 132 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { 133 Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes)); 134 }); 135 return Hashes; 136 } 137 }; 138 #if defined(_MSC_VER) 139 // is_trivially_copyable is not available in older versions of libc++, but it is 140 // available in all supported versions of MSVC, so at least this gives us some 141 // coverage. 142 static_assert(std::is_trivially_copyable<GloballyHashedType>::value, 143 "GloballyHashedType must be trivially copyable so that we can " 144 "reinterpret_cast arrays of hash data to arrays of " 145 "GloballyHashedType"); 146 #endif 147 } // namespace codeview 148 149 template <> struct DenseMapInfo<codeview::LocallyHashedType> { 150 static codeview::LocallyHashedType Empty; 151 static codeview::LocallyHashedType Tombstone; 152 153 static codeview::LocallyHashedType getEmptyKey() { return Empty; } 154 155 static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; } 156 157 static unsigned getHashValue(codeview::LocallyHashedType Val) { 158 return Val.Hash; 159 } 160 161 static bool isEqual(codeview::LocallyHashedType LHS, 162 codeview::LocallyHashedType RHS) { 163 if (LHS.Hash != RHS.Hash) 164 return false; 165 return LHS.RecordData == RHS.RecordData; 166 } 167 }; 168 169 template <> struct DenseMapInfo<codeview::GloballyHashedType> { 170 static codeview::GloballyHashedType Empty; 171 static codeview::GloballyHashedType Tombstone; 172 173 static codeview::GloballyHashedType getEmptyKey() { return Empty; } 174 175 static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; } 176 177 static unsigned getHashValue(codeview::GloballyHashedType Val) { 178 return *reinterpret_cast<const unsigned *>(Val.Hash.data()); 179 } 180 181 static bool isEqual(codeview::GloballyHashedType LHS, 182 codeview::GloballyHashedType RHS) { 183 return LHS.Hash == RHS.Hash; 184 } 185 }; 186 187 template <> struct format_provider<codeview::LocallyHashedType> { 188 public: 189 static void format(const codeview::LocallyHashedType &V, 190 llvm::raw_ostream &Stream, StringRef Style) { 191 write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8); 192 } 193 }; 194 195 template <> struct format_provider<codeview::GloballyHashedType> { 196 public: 197 static void format(const codeview::GloballyHashedType &V, 198 llvm::raw_ostream &Stream, StringRef Style) { 199 for (uint8_t B : V.Hash) { 200 write_hex(Stream, B, HexPrintStyle::Upper, 2); 201 } 202 } 203 }; 204 205 } // namespace llvm 206 207 #endif 208