1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/Bitstream/BitstreamReader.h"
10 #include "llvm/ADT/StringRef.h"
11 #include <cassert>
12 #include <string>
13
14 using namespace llvm;
15
16 //===----------------------------------------------------------------------===//
17 // BitstreamCursor implementation
18 //===----------------------------------------------------------------------===//
19
20 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
EnterSubBlock(unsigned BlockID,unsigned * NumWordsP)21 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
22 // Save the current block's state on BlockScope.
23 BlockScope.push_back(Block(CurCodeSize));
24 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
25
26 // Add the abbrevs specific to this block to the CurAbbrevs list.
27 if (BlockInfo) {
28 if (const BitstreamBlockInfo::BlockInfo *Info =
29 BlockInfo->getBlockInfo(BlockID)) {
30 CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
31 Info->Abbrevs.end());
32 }
33 }
34
35 // Get the codesize of this block.
36 Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
37 if (!MaybeVBR)
38 return MaybeVBR.takeError();
39 CurCodeSize = MaybeVBR.get();
40
41 if (CurCodeSize > MaxChunkSize)
42 return llvm::createStringError(
43 std::errc::illegal_byte_sequence,
44 "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
45 CurCodeSize);
46
47 SkipToFourByteBoundary();
48 Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
49 if (!MaybeNum)
50 return MaybeNum.takeError();
51 word_t NumWords = MaybeNum.get();
52 if (NumWordsP)
53 *NumWordsP = NumWords;
54
55 if (CurCodeSize == 0)
56 return llvm::createStringError(
57 std::errc::illegal_byte_sequence,
58 "can't enter sub-block: current code size is 0");
59 if (AtEndOfStream())
60 return llvm::createStringError(
61 std::errc::illegal_byte_sequence,
62 "can't enter sub block: already at end of stream");
63
64 return Error::success();
65 }
66
readAbbreviatedField(BitstreamCursor & Cursor,const BitCodeAbbrevOp & Op)67 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
68 const BitCodeAbbrevOp &Op) {
69 assert(!Op.isLiteral() && "Not to be used with literals!");
70
71 // Decode the value as we are commanded.
72 switch (Op.getEncoding()) {
73 case BitCodeAbbrevOp::Array:
74 case BitCodeAbbrevOp::Blob:
75 llvm_unreachable("Should not reach here");
76 case BitCodeAbbrevOp::Fixed:
77 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
78 return Cursor.Read((unsigned)Op.getEncodingData());
79 case BitCodeAbbrevOp::VBR:
80 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
81 return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
82 case BitCodeAbbrevOp::Char6:
83 if (Expected<unsigned> Res = Cursor.Read(6))
84 return BitCodeAbbrevOp::DecodeChar6(Res.get());
85 else
86 return Res.takeError();
87 }
88 llvm_unreachable("invalid abbreviation encoding");
89 }
90
91 /// skipRecord - Read the current record and discard it.
skipRecord(unsigned AbbrevID)92 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
93 // Skip unabbreviated records by reading past their entries.
94 if (AbbrevID == bitc::UNABBREV_RECORD) {
95 Expected<uint32_t> MaybeCode = ReadVBR(6);
96 if (!MaybeCode)
97 return MaybeCode.takeError();
98 unsigned Code = MaybeCode.get();
99 Expected<uint32_t> MaybeVBR = ReadVBR(6);
100 if (!MaybeVBR)
101 return MaybeVBR.get();
102 unsigned NumElts = MaybeVBR.get();
103 for (unsigned i = 0; i != NumElts; ++i)
104 if (Expected<uint64_t> Res = ReadVBR64(6))
105 ; // Skip!
106 else
107 return Res.takeError();
108 return Code;
109 }
110
111 const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
112 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
113 unsigned Code;
114 if (CodeOp.isLiteral())
115 Code = CodeOp.getLiteralValue();
116 else {
117 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
118 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
119 return llvm::createStringError(
120 std::errc::illegal_byte_sequence,
121 "Abbreviation starts with an Array or a Blob");
122 Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
123 if (!MaybeCode)
124 return MaybeCode.takeError();
125 Code = MaybeCode.get();
126 }
127
128 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
129 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
130 if (Op.isLiteral())
131 continue;
132
133 if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
134 Op.getEncoding() != BitCodeAbbrevOp::Blob) {
135 if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
136 continue;
137 else
138 return MaybeField.takeError();
139 }
140
141 if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
142 // Array case. Read the number of elements as a vbr6.
143 Expected<uint32_t> MaybeNum = ReadVBR(6);
144 if (!MaybeNum)
145 return MaybeNum.takeError();
146 unsigned NumElts = MaybeNum.get();
147
148 // Get the element encoding.
149 assert(i+2 == e && "array op not second to last?");
150 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
151
152 // Read all the elements.
153 // Decode the value as we are commanded.
154 switch (EltEnc.getEncoding()) {
155 default:
156 report_fatal_error("Array element type can't be an Array or a Blob");
157 case BitCodeAbbrevOp::Fixed:
158 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
159 if (Error Err = JumpToBit(GetCurrentBitNo() +
160 NumElts * EltEnc.getEncodingData()))
161 return std::move(Err);
162 break;
163 case BitCodeAbbrevOp::VBR:
164 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
165 for (; NumElts; --NumElts)
166 if (Expected<uint64_t> Res =
167 ReadVBR64((unsigned)EltEnc.getEncodingData()))
168 ; // Skip!
169 else
170 return Res.takeError();
171 break;
172 case BitCodeAbbrevOp::Char6:
173 if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
174 return std::move(Err);
175 break;
176 }
177 continue;
178 }
179
180 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
181 // Blob case. Read the number of bytes as a vbr6.
182 Expected<uint32_t> MaybeNum = ReadVBR(6);
183 if (!MaybeNum)
184 return MaybeNum.takeError();
185 unsigned NumElts = MaybeNum.get();
186 SkipToFourByteBoundary(); // 32-bit alignment
187
188 // Figure out where the end of this blob will be including tail padding.
189 size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8;
190
191 // If this would read off the end of the bitcode file, just set the
192 // record to empty and return.
193 if (!canSkipToPos(NewEnd/8)) {
194 skipToEnd();
195 break;
196 }
197
198 // Skip over the blob.
199 if (Error Err = JumpToBit(NewEnd))
200 return std::move(Err);
201 }
202 return Code;
203 }
204
readRecord(unsigned AbbrevID,SmallVectorImpl<uint64_t> & Vals,StringRef * Blob)205 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
206 SmallVectorImpl<uint64_t> &Vals,
207 StringRef *Blob) {
208 if (AbbrevID == bitc::UNABBREV_RECORD) {
209 Expected<uint32_t> MaybeCode = ReadVBR(6);
210 if (!MaybeCode)
211 return MaybeCode.takeError();
212 uint32_t Code = MaybeCode.get();
213 Expected<uint32_t> MaybeNumElts = ReadVBR(6);
214 if (!MaybeNumElts)
215 return MaybeNumElts.takeError();
216 uint32_t NumElts = MaybeNumElts.get();
217
218 for (unsigned i = 0; i != NumElts; ++i)
219 if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
220 Vals.push_back(MaybeVal.get());
221 else
222 return MaybeVal.takeError();
223 return Code;
224 }
225
226 const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
227
228 // Read the record code first.
229 assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
230 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
231 unsigned Code;
232 if (CodeOp.isLiteral())
233 Code = CodeOp.getLiteralValue();
234 else {
235 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
236 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
237 report_fatal_error("Abbreviation starts with an Array or a Blob");
238 if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
239 Code = MaybeCode.get();
240 else
241 return MaybeCode.takeError();
242 }
243
244 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
245 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
246 if (Op.isLiteral()) {
247 Vals.push_back(Op.getLiteralValue());
248 continue;
249 }
250
251 if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
252 Op.getEncoding() != BitCodeAbbrevOp::Blob) {
253 if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
254 Vals.push_back(MaybeVal.get());
255 else
256 return MaybeVal.takeError();
257 continue;
258 }
259
260 if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
261 // Array case. Read the number of elements as a vbr6.
262 Expected<uint32_t> MaybeNumElts = ReadVBR(6);
263 if (!MaybeNumElts)
264 return MaybeNumElts.takeError();
265 uint32_t NumElts = MaybeNumElts.get();
266
267 // Get the element encoding.
268 if (i + 2 != e)
269 report_fatal_error("Array op not second to last");
270 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
271 if (!EltEnc.isEncoding())
272 report_fatal_error(
273 "Array element type has to be an encoding of a type");
274
275 // Read all the elements.
276 switch (EltEnc.getEncoding()) {
277 default:
278 report_fatal_error("Array element type can't be an Array or a Blob");
279 case BitCodeAbbrevOp::Fixed:
280 for (; NumElts; --NumElts)
281 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
282 Read((unsigned)EltEnc.getEncodingData()))
283 Vals.push_back(MaybeVal.get());
284 else
285 return MaybeVal.takeError();
286 break;
287 case BitCodeAbbrevOp::VBR:
288 for (; NumElts; --NumElts)
289 if (Expected<uint64_t> MaybeVal =
290 ReadVBR64((unsigned)EltEnc.getEncodingData()))
291 Vals.push_back(MaybeVal.get());
292 else
293 return MaybeVal.takeError();
294 break;
295 case BitCodeAbbrevOp::Char6:
296 for (; NumElts; --NumElts)
297 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
298 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
299 else
300 return MaybeVal.takeError();
301 }
302 continue;
303 }
304
305 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
306 // Blob case. Read the number of bytes as a vbr6.
307 Expected<uint32_t> MaybeNumElts = ReadVBR(6);
308 if (!MaybeNumElts)
309 return MaybeNumElts.takeError();
310 uint32_t NumElts = MaybeNumElts.get();
311 SkipToFourByteBoundary(); // 32-bit alignment
312
313 // Figure out where the end of this blob will be including tail padding.
314 size_t CurBitPos = GetCurrentBitNo();
315 size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8;
316
317 // If this would read off the end of the bitcode file, just set the
318 // record to empty and return.
319 if (!canSkipToPos(NewEnd/8)) {
320 Vals.append(NumElts, 0);
321 skipToEnd();
322 break;
323 }
324
325 // Otherwise, inform the streamer that we need these bytes in memory. Skip
326 // over tail padding first, in case jumping to NewEnd invalidates the Blob
327 // pointer.
328 if (Error Err = JumpToBit(NewEnd))
329 return std::move(Err);
330 const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
331
332 // If we can return a reference to the data, do so to avoid copying it.
333 if (Blob) {
334 *Blob = StringRef(Ptr, NumElts);
335 } else {
336 // Otherwise, unpack into Vals with zero extension.
337 for (; NumElts; --NumElts)
338 Vals.push_back((unsigned char)*Ptr++);
339 }
340 }
341
342 return Code;
343 }
344
ReadAbbrevRecord()345 Error BitstreamCursor::ReadAbbrevRecord() {
346 auto Abbv = std::make_shared<BitCodeAbbrev>();
347 Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
348 if (!MaybeNumOpInfo)
349 return MaybeNumOpInfo.takeError();
350 unsigned NumOpInfo = MaybeNumOpInfo.get();
351 for (unsigned i = 0; i != NumOpInfo; ++i) {
352 Expected<word_t> MaybeIsLiteral = Read(1);
353 if (!MaybeIsLiteral)
354 return MaybeIsLiteral.takeError();
355 bool IsLiteral = MaybeIsLiteral.get();
356 if (IsLiteral) {
357 Expected<uint64_t> MaybeOp = ReadVBR64(8);
358 if (!MaybeOp)
359 return MaybeOp.takeError();
360 Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
361 continue;
362 }
363
364 Expected<word_t> MaybeEncoding = Read(3);
365 if (!MaybeEncoding)
366 return MaybeEncoding.takeError();
367 BitCodeAbbrevOp::Encoding E =
368 (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
369 if (BitCodeAbbrevOp::hasEncodingData(E)) {
370 Expected<uint64_t> MaybeData = ReadVBR64(5);
371 if (!MaybeData)
372 return MaybeData.takeError();
373 uint64_t Data = MaybeData.get();
374
375 // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
376 // and vbr(0) as a literal zero. This is decoded the same way, and avoids
377 // a slow path in Read() to have to handle reading zero bits.
378 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
379 Data == 0) {
380 Abbv->Add(BitCodeAbbrevOp(0));
381 continue;
382 }
383
384 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
385 Data > MaxChunkSize)
386 report_fatal_error(
387 "Fixed or VBR abbrev record with size > MaxChunkData");
388
389 Abbv->Add(BitCodeAbbrevOp(E, Data));
390 } else
391 Abbv->Add(BitCodeAbbrevOp(E));
392 }
393
394 if (Abbv->getNumOperandInfos() == 0)
395 report_fatal_error("Abbrev record with no operands");
396 CurAbbrevs.push_back(std::move(Abbv));
397
398 return Error::success();
399 }
400
401 Expected<Optional<BitstreamBlockInfo>>
ReadBlockInfoBlock(bool ReadBlockInfoNames)402 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
403 if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
404 return std::move(Err);
405
406 BitstreamBlockInfo NewBlockInfo;
407
408 SmallVector<uint64_t, 64> Record;
409 BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
410
411 // Read all the records for this module.
412 while (true) {
413 Expected<BitstreamEntry> MaybeEntry =
414 advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
415 if (!MaybeEntry)
416 return MaybeEntry.takeError();
417 BitstreamEntry Entry = MaybeEntry.get();
418
419 switch (Entry.Kind) {
420 case llvm::BitstreamEntry::SubBlock: // Handled for us already.
421 case llvm::BitstreamEntry::Error:
422 return None;
423 case llvm::BitstreamEntry::EndBlock:
424 return std::move(NewBlockInfo);
425 case llvm::BitstreamEntry::Record:
426 // The interesting case.
427 break;
428 }
429
430 // Read abbrev records, associate them with CurBID.
431 if (Entry.ID == bitc::DEFINE_ABBREV) {
432 if (!CurBlockInfo) return None;
433 if (Error Err = ReadAbbrevRecord())
434 return std::move(Err);
435
436 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the
437 // appropriate BlockInfo.
438 CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
439 CurAbbrevs.pop_back();
440 continue;
441 }
442
443 // Read a record.
444 Record.clear();
445 Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
446 if (!MaybeBlockInfo)
447 return MaybeBlockInfo.takeError();
448 switch (MaybeBlockInfo.get()) {
449 default:
450 break; // Default behavior, ignore unknown content.
451 case bitc::BLOCKINFO_CODE_SETBID:
452 if (Record.size() < 1)
453 return None;
454 CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
455 break;
456 case bitc::BLOCKINFO_CODE_BLOCKNAME: {
457 if (!CurBlockInfo)
458 return None;
459 if (!ReadBlockInfoNames)
460 break; // Ignore name.
461 std::string Name;
462 for (unsigned i = 0, e = Record.size(); i != e; ++i)
463 Name += (char)Record[i];
464 CurBlockInfo->Name = Name;
465 break;
466 }
467 case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
468 if (!CurBlockInfo) return None;
469 if (!ReadBlockInfoNames)
470 break; // Ignore name.
471 std::string Name;
472 for (unsigned i = 1, e = Record.size(); i != e; ++i)
473 Name += (char)Record[i];
474 CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
475 Name));
476 break;
477 }
478 }
479 }
480 }
481