1 //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H 10 #define LLVM_SUPPORT_BINARYSTREAMARRAY_H 11 12 #include "llvm/ADT/ArrayRef.h" 13 #include "llvm/ADT/iterator.h" 14 #include "llvm/Support/BinaryStreamRef.h" 15 #include "llvm/Support/Error.h" 16 #include <cassert> 17 #include <cstdint> 18 19 /// Lightweight arrays that are backed by an arbitrary BinaryStream. This file 20 /// provides two different array implementations. 21 /// 22 /// VarStreamArray - Arrays of variable length records. The user specifies 23 /// an Extractor type that can extract a record from a given offset and 24 /// return the number of bytes consumed by the record. 25 /// 26 /// FixedStreamArray - Arrays of fixed length records. This is similar in 27 /// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the 28 /// elements of the array need not be laid out in contiguous memory. 29 namespace llvm { 30 31 /// VarStreamArrayExtractor is intended to be specialized to provide customized 32 /// extraction logic. On input it receives a BinaryStreamRef pointing to the 33 /// beginning of the next record, but where the length of the record is not yet 34 /// known. Upon completion, it should return an appropriate Error instance if 35 /// a record could not be extracted, or if one could be extracted it should 36 /// return success and set Len to the number of bytes this record occupied in 37 /// the underlying stream, and it should fill out the fields of the value type 38 /// Item appropriately to represent the current record. 39 /// 40 /// You can specialize this template for your own custom value types to avoid 41 /// having to specify a second template argument to VarStreamArray (documented 42 /// below). 43 template <typename T> struct VarStreamArrayExtractor { 44 // Method intentionally deleted. You must provide an explicit specialization 45 // with the following method implemented. 46 Error operator()(BinaryStreamRef Stream, uint32_t &Len, 47 T &Item) const = delete; 48 }; 49 50 /// VarStreamArray represents an array of variable length records backed by a 51 /// stream. This could be a contiguous sequence of bytes in memory, it could 52 /// be a file on disk, or it could be a PDB stream where bytes are stored as 53 /// discontiguous blocks in a file. Usually it is desirable to treat arrays 54 /// as contiguous blocks of memory, but doing so with large PDB files, for 55 /// example, could mean allocating huge amounts of memory just to allow 56 /// re-ordering of stream data to be contiguous before iterating over it. By 57 /// abstracting this out, we need not duplicate this memory, and we can 58 /// iterate over arrays in arbitrarily formatted streams. Elements are parsed 59 /// lazily on iteration, so there is no upfront cost associated with building 60 /// or copying a VarStreamArray, no matter how large it may be. 61 /// 62 /// You create a VarStreamArray by specifying a ValueType and an Extractor type. 63 /// If you do not specify an Extractor type, you are expected to specialize 64 /// VarStreamArrayExtractor<T> for your ValueType. 65 /// 66 /// By default an Extractor is default constructed in the class, but in some 67 /// cases you might find it useful for an Extractor to maintain state across 68 /// extractions. In this case you can provide your own Extractor through a 69 /// secondary constructor. The following examples show various ways of 70 /// creating a VarStreamArray. 71 /// 72 /// // Will use VarStreamArrayExtractor<MyType> as the extractor. 73 /// VarStreamArray<MyType> MyTypeArray; 74 /// 75 /// // Will use a default-constructed MyExtractor as the extractor. 76 /// VarStreamArray<MyType, MyExtractor> MyTypeArray2; 77 /// 78 /// // Will use the specific instance of MyExtractor provided. 79 /// // MyExtractor need not be default-constructible in this case. 80 /// MyExtractor E(SomeContext); 81 /// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E); 82 /// 83 84 template <typename ValueType, typename Extractor> class VarStreamArrayIterator; 85 86 template <typename ValueType, 87 typename Extractor = VarStreamArrayExtractor<ValueType>> 88 class VarStreamArray { 89 friend class VarStreamArrayIterator<ValueType, Extractor>; 90 91 public: 92 typedef VarStreamArrayIterator<ValueType, Extractor> Iterator; 93 94 VarStreamArray() = default; 95 VarStreamArray(const Extractor & E)96 explicit VarStreamArray(const Extractor &E) : E(E) {} 97 98 explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0) Stream(Stream)99 : Stream(Stream), Skew(Skew) {} 100 101 VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0) Stream(Stream)102 : Stream(Stream), E(E), Skew(Skew) {} 103 104 Iterator begin(bool *HadError = nullptr) const { 105 return Iterator(*this, E, Skew, nullptr); 106 } 107 valid()108 bool valid() const { return Stream.valid(); } 109 skew()110 uint32_t skew() const { return Skew; } end()111 Iterator end() const { return Iterator(E); } 112 empty()113 bool empty() const { return Stream.getLength() == 0; } 114 substream(uint32_t Begin,uint32_t End)115 VarStreamArray<ValueType, Extractor> substream(uint32_t Begin, 116 uint32_t End) const { 117 assert(Begin >= Skew); 118 // We should never cut off the beginning of the stream since it might be 119 // skewed, meaning the initial bytes are important. 120 BinaryStreamRef NewStream = Stream.slice(0, End); 121 return {NewStream, E, Begin}; 122 } 123 124 /// given an offset into the array's underlying stream, return an 125 /// iterator to the record at that offset. This is considered unsafe 126 /// since the behavior is undefined if \p Offset does not refer to the 127 /// beginning of a valid record. at(uint32_t Offset)128 Iterator at(uint32_t Offset) const { 129 return Iterator(*this, E, Offset, nullptr); 130 } 131 getExtractor()132 const Extractor &getExtractor() const { return E; } getExtractor()133 Extractor &getExtractor() { return E; } 134 getUnderlyingStream()135 BinaryStreamRef getUnderlyingStream() const { return Stream; } 136 void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) { 137 Stream = NewStream; 138 Skew = NewSkew; 139 } 140 drop_front()141 void drop_front() { Skew += begin()->length(); } 142 143 private: 144 BinaryStreamRef Stream; 145 Extractor E; 146 uint32_t Skew = 0; 147 }; 148 149 template <typename ValueType, typename Extractor> 150 class VarStreamArrayIterator 151 : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>, 152 std::forward_iterator_tag, ValueType> { 153 typedef VarStreamArrayIterator<ValueType, Extractor> IterType; 154 typedef VarStreamArray<ValueType, Extractor> ArrayType; 155 156 public: VarStreamArrayIterator(const ArrayType & Array,const Extractor & E,uint32_t Offset,bool * HadError)157 VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, 158 uint32_t Offset, bool *HadError) 159 : IterRef(Array.Stream.drop_front(Offset)), Extract(E), 160 Array(&Array), AbsOffset(Offset), HadError(HadError) { 161 if (IterRef.getLength() == 0) 162 moveToEnd(); 163 else { 164 auto EC = Extract(IterRef, ThisLen, ThisValue); 165 if (EC) { 166 consumeError(std::move(EC)); 167 markError(); 168 } 169 } 170 } 171 172 VarStreamArrayIterator() = default; VarStreamArrayIterator(const Extractor & E)173 explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} 174 ~VarStreamArrayIterator() = default; 175 176 bool operator==(const IterType &R) const { 177 if (Array && R.Array) { 178 // Both have a valid array, make sure they're same. 179 assert(Array == R.Array); 180 return IterRef == R.IterRef; 181 } 182 183 // Both iterators are at the end. 184 if (!Array && !R.Array) 185 return true; 186 187 // One is not at the end and one is. 188 return false; 189 } 190 191 const ValueType &operator*() const { 192 assert(Array && !HasError); 193 return ThisValue; 194 } 195 196 ValueType &operator*() { 197 assert(Array && !HasError); 198 return ThisValue; 199 } 200 201 IterType &operator+=(unsigned N) { 202 for (unsigned I = 0; I < N; ++I) { 203 // We are done with the current record, discard it so that we are 204 // positioned at the next record. 205 AbsOffset += ThisLen; 206 IterRef = IterRef.drop_front(ThisLen); 207 if (IterRef.getLength() == 0) { 208 // There is nothing after the current record, we must make this an end 209 // iterator. 210 moveToEnd(); 211 } else { 212 // There is some data after the current record. 213 auto EC = Extract(IterRef, ThisLen, ThisValue); 214 if (EC) { 215 consumeError(std::move(EC)); 216 markError(); 217 } else if (ThisLen == 0) { 218 // An empty record? Make this an end iterator. 219 moveToEnd(); 220 } 221 } 222 } 223 return *this; 224 } 225 offset()226 uint32_t offset() const { return AbsOffset; } getRecordLength()227 uint32_t getRecordLength() const { return ThisLen; } 228 229 private: moveToEnd()230 void moveToEnd() { 231 Array = nullptr; 232 ThisLen = 0; 233 } markError()234 void markError() { 235 moveToEnd(); 236 HasError = true; 237 if (HadError != nullptr) 238 *HadError = true; 239 } 240 241 ValueType ThisValue; 242 BinaryStreamRef IterRef; 243 Extractor Extract; 244 const ArrayType *Array{nullptr}; 245 uint32_t ThisLen{0}; 246 uint32_t AbsOffset{0}; 247 bool HasError{false}; 248 bool *HadError{nullptr}; 249 }; 250 251 template <typename T> class FixedStreamArrayIterator; 252 253 /// FixedStreamArray is similar to VarStreamArray, except with each record 254 /// having a fixed-length. As with VarStreamArray, there is no upfront 255 /// cost associated with building or copying a FixedStreamArray, as the 256 /// memory for each element is not read from the backing stream until that 257 /// element is iterated. 258 template <typename T> class FixedStreamArray { 259 friend class FixedStreamArrayIterator<T>; 260 261 public: 262 typedef FixedStreamArrayIterator<T> Iterator; 263 264 FixedStreamArray() = default; FixedStreamArray(BinaryStreamRef Stream)265 explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) { 266 assert(Stream.getLength() % sizeof(T) == 0); 267 } 268 269 bool operator==(const FixedStreamArray<T> &Other) const { 270 return Stream == Other.Stream; 271 } 272 273 bool operator!=(const FixedStreamArray<T> &Other) const { 274 return !(*this == Other); 275 } 276 277 FixedStreamArray(const FixedStreamArray &) = default; 278 FixedStreamArray &operator=(const FixedStreamArray &) = default; 279 280 const T &operator[](uint32_t Index) const { 281 assert(Index < size()); 282 uint32_t Off = Index * sizeof(T); 283 ArrayRef<uint8_t> Data; 284 if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) { 285 assert(false && "Unexpected failure reading from stream"); 286 // This should never happen since we asserted that the stream length was 287 // an exact multiple of the element size. 288 consumeError(std::move(EC)); 289 } 290 assert(isAddrAligned(Align::Of<T>(), Data.data())); 291 return *reinterpret_cast<const T *>(Data.data()); 292 } 293 size()294 uint32_t size() const { return Stream.getLength() / sizeof(T); } 295 empty()296 bool empty() const { return size() == 0; } 297 begin()298 FixedStreamArrayIterator<T> begin() const { 299 return FixedStreamArrayIterator<T>(*this, 0); 300 } 301 end()302 FixedStreamArrayIterator<T> end() const { 303 return FixedStreamArrayIterator<T>(*this, size()); 304 } 305 front()306 const T &front() const { return *begin(); } back()307 const T &back() const { 308 FixedStreamArrayIterator<T> I = end(); 309 return *(--I); 310 } 311 getUnderlyingStream()312 BinaryStreamRef getUnderlyingStream() const { return Stream; } 313 314 private: 315 BinaryStreamRef Stream; 316 }; 317 318 template <typename T> 319 class FixedStreamArrayIterator 320 : public iterator_facade_base<FixedStreamArrayIterator<T>, 321 std::random_access_iterator_tag, const T> { 322 323 public: FixedStreamArrayIterator(const FixedStreamArray<T> & Array,uint32_t Index)324 FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index) 325 : Array(Array), Index(Index) {} 326 327 FixedStreamArrayIterator<T>(const FixedStreamArrayIterator<T> &Other) 328 : Array(Other.Array), Index(Other.Index) {} 329 FixedStreamArrayIterator<T> & 330 operator=(const FixedStreamArrayIterator<T> &Other) { 331 Array = Other.Array; 332 Index = Other.Index; 333 return *this; 334 } 335 336 const T &operator*() const { return Array[Index]; } 337 const T &operator*() { return Array[Index]; } 338 339 bool operator==(const FixedStreamArrayIterator<T> &R) const { 340 assert(Array == R.Array); 341 return (Index == R.Index) && (Array == R.Array); 342 } 343 344 FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) { 345 Index += N; 346 return *this; 347 } 348 349 FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) { 350 assert(std::ptrdiff_t(Index) >= N); 351 Index -= N; 352 return *this; 353 } 354 355 std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const { 356 assert(Array == R.Array); 357 assert(Index >= R.Index); 358 return Index - R.Index; 359 } 360 361 bool operator<(const FixedStreamArrayIterator<T> &RHS) const { 362 assert(Array == RHS.Array); 363 return Index < RHS.Index; 364 } 365 366 private: 367 FixedStreamArray<T> Array; 368 uint32_t Index; 369 }; 370 371 } // namespace llvm 372 373 #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H 374