1 // Copyright (c) 2017 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Contains utils for reading, writing and debug printing bit streams.
16
17 #ifndef LIBSPIRV_UTIL_BIT_STREAM_H_
18 #define LIBSPIRV_UTIL_BIT_STREAM_H_
19
20 #include <algorithm>
21 #include <bitset>
22 #include <cstdint>
23 #include <functional>
24 #include <string>
25 #include <sstream>
26 #include <vector>
27
28 namespace spvutils {
29
30 // Returns rounded down log2(val). log2(0) is considered 0.
31 size_t Log2U64(uint64_t val);
32
33 // Terminology:
34 // Bits - usually used for a uint64 word, first bit is the lowest.
35 // Stream - std::string of '0' and '1', read left-to-right,
36 // i.e. first bit is at the front and not at the end as in
37 // std::bitset::to_string().
38 // Bitset - std::bitset corresponding to uint64 bits and to reverse(stream).
39
40 // Converts number of bits to a respective number of chunks of size N.
41 // For example NumBitsToNumWords<8> returns how many bytes are needed to store
42 // |num_bits|.
43 template <size_t N>
NumBitsToNumWords(size_t num_bits)44 inline size_t NumBitsToNumWords(size_t num_bits) {
45 return (num_bits + (N - 1)) / N;
46 }
47
48 // Returns value of the same type as |in|, where all but the first |num_bits|
49 // are set to zero.
50 template <typename T>
GetLowerBits(T in,size_t num_bits)51 inline T GetLowerBits(T in, size_t num_bits) {
52 return sizeof(T) * 8 == num_bits ? in : in & T((T(1) << num_bits) - T(1));
53 }
54
55 // Encodes signed integer as unsigned in zigzag order:
56 // 0 -> 0
57 // -1 -> 1
58 // 1 -> 2
59 // -2 -> 3
60 // 2 -> 4
61 // Motivation: -1 is 0xFF...FF what doesn't work very well with
62 // WriteVariableWidth which prefers to have as many 0 bits as possible.
EncodeZigZag(int64_t val)63 inline uint64_t EncodeZigZag(int64_t val) {
64 return (val << 1) ^ (val >> 63);
65 }
66
67 // Decodes signed integer encoded with EncodeZigZag.
DecodeZigZag(uint64_t val)68 inline int64_t DecodeZigZag(uint64_t val) {
69 if (val & 1) {
70 // Negative.
71 // 1 -> -1
72 // 3 -> -2
73 // 5 -> -3
74 return -1 - (val >> 1);
75 } else {
76 // Non-negative.
77 // 0 -> 0
78 // 2 -> 1
79 // 4 -> 2
80 return val >> 1;
81 }
82 }
83
84 // Encodes signed integer as unsigned. This is a generalized version of
85 // EncodeZigZag, designed to favor small positive numbers.
86 // Values are transformed in blocks of 2^|block_exponent|.
87 // If |block_exponent| is zero, then this degenerates into normal EncodeZigZag.
88 // Example when |block_exponent| is 1 (return value is the index):
89 // 0, 1, -1, -2, 2, 3, -3, -4, 4, 5, -5, -6, 6, 7, -7, -8
90 // Example when |block_exponent| is 2:
91 // 0, 1, 2, 3, -1, -2, -3, -4, 4, 5, 6, 7, -5, -6, -7, -8
EncodeZigZag(int64_t val,size_t block_exponent)92 inline uint64_t EncodeZigZag(int64_t val, size_t block_exponent) {
93 assert(block_exponent < 64);
94 const uint64_t uval = static_cast<uint64_t>(val >= 0 ? val : -val - 1);
95 const uint64_t block_num = ((uval >> block_exponent) << 1) + (val >= 0 ? 0 : 1);
96 const uint64_t pos = GetLowerBits(uval, block_exponent);
97 return (block_num << block_exponent) + pos;
98 }
99
100 // Decodes signed integer encoded with EncodeZigZag. |block_exponent| must be
101 // the same.
DecodeZigZag(uint64_t val,size_t block_exponent)102 inline int64_t DecodeZigZag(uint64_t val, size_t block_exponent) {
103 assert(block_exponent < 64);
104 const uint64_t block_num = val >> block_exponent;
105 const uint64_t pos = GetLowerBits(val, block_exponent);
106 if (block_num & 1) {
107 // Negative.
108 return -1LL - ((block_num >> 1) << block_exponent) - pos;
109 } else {
110 // Positive.
111 return ((block_num >> 1) << block_exponent) + pos;
112 }
113 }
114
115 // Converts |buffer| to a stream of '0' and '1'.
116 template <typename T>
BufferToStream(const std::vector<T> & buffer)117 std::string BufferToStream(const std::vector<T>& buffer) {
118 std::stringstream ss;
119 for (auto it = buffer.begin(); it != buffer.end(); ++it) {
120 std::string str = std::bitset<sizeof(T) * 8>(*it).to_string();
121 // Strings generated by std::bitset::to_string are read right to left.
122 // Reversing to left to right.
123 std::reverse(str.begin(), str.end());
124 ss << str;
125 }
126 return ss.str();
127 }
128
129 // Converts a left-to-right input string of '0' and '1' to a buffer of |T|
130 // words.
131 template <typename T>
StreamToBuffer(std::string str)132 std::vector<T> StreamToBuffer(std::string str) {
133 // The input string is left-to-right, the input argument of std::bitset needs
134 // to right-to-left. Instead of reversing tokens, reverse the entire string
135 // and iterate tokens from end to begin.
136 std::reverse(str.begin(), str.end());
137 const int word_size = static_cast<int>(sizeof(T) * 8);
138 const int str_length = static_cast<int>(str.length());
139 std::vector<T> buffer;
140 buffer.reserve(NumBitsToNumWords<sizeof(T)>(str.length()));
141 for (int index = str_length - word_size; index >= 0; index -= word_size) {
142 buffer.push_back(static_cast<T>(std::bitset<sizeof(T) * 8>(
143 str, index, word_size).to_ullong()));
144 }
145 const size_t suffix_length = str.length() % word_size;
146 if (suffix_length != 0) {
147 buffer.push_back(static_cast<T>(std::bitset<sizeof(T) * 8>(
148 str, 0, suffix_length).to_ullong()));
149 }
150 return buffer;
151 }
152
153 // Adds '0' chars at the end of the string until the size is a multiple of N.
154 template <size_t N>
PadToWord(std::string && str)155 inline std::string PadToWord(std::string&& str) {
156 const size_t tail_length = str.size() % N;
157 if (tail_length != 0)
158 str += std::string(N - tail_length, '0');
159 return str;
160 }
161
162 // Adds '0' chars at the end of the string until the size is a multiple of N.
163 template <size_t N>
PadToWord(const std::string & str)164 inline std::string PadToWord(const std::string& str) {
165 return PadToWord<N>(std::string(str));
166 }
167
168 // Converts a left-to-right stream of bits to std::bitset.
169 template <size_t N>
StreamToBitset(std::string str)170 inline std::bitset<N> StreamToBitset(std::string str) {
171 std::reverse(str.begin(), str.end());
172 return std::bitset<N>(str);
173 }
174
175 // Converts first |num_bits| of std::bitset to a left-to-right stream of bits.
176 template <size_t N>
177 inline std::string BitsetToStream(const std::bitset<N>& bits, size_t num_bits = N) {
178 std::string str = bits.to_string().substr(N - num_bits);
179 std::reverse(str.begin(), str.end());
180 return str;
181 }
182
183 // Converts a left-to-right stream of bits to uint64.
StreamToBits(std::string str)184 inline uint64_t StreamToBits(std::string str) {
185 std::reverse(str.begin(), str.end());
186 return std::bitset<64>(str).to_ullong();
187 }
188
189 // Converts first |num_bits| stored in uint64 to a left-to-right stream of bits.
190 inline std::string BitsToStream(uint64_t bits, size_t num_bits = 64) {
191 std::bitset<64> bitset(bits);
192 return BitsetToStream(bitset, num_bits);
193 }
194
195 // Base class for writing sequences of bits.
196 class BitWriterInterface {
197 public:
BitWriterInterface()198 BitWriterInterface() {}
~BitWriterInterface()199 virtual ~BitWriterInterface() {}
200
201 // Writes lower |num_bits| in |bits| to the stream.
202 // |num_bits| must be no greater than 64.
203 virtual void WriteBits(uint64_t bits, size_t num_bits) = 0;
204
205 // Writes left-to-right string of '0' and '1' to stream.
206 // String length must be no greater than 64.
207 // Note: "01" will be writen as 0x2, not 0x1. The string doesn't represent
208 // numbers but a stream of bits in the order they come from encoder.
WriteStream(const std::string & bits)209 virtual void WriteStream(const std::string& bits) {
210 WriteBits(StreamToBits(bits), bits.length());
211 }
212
213 // Writes lower |num_bits| in |bits| to the stream.
214 // |num_bits| must be no greater than 64.
215 template <size_t N>
216 void WriteBitset(const std::bitset<N>& bits, size_t num_bits = N) {
217 WriteBits(bits.to_ullong(), num_bits);
218 }
219
220 // Writes bits from value of type |T| to the stream. No encoding is done.
221 // Always writes 8 * sizeof(T) bits.
222 template <typename T>
WriteUnencoded(T val)223 void WriteUnencoded(T val) {
224 static_assert(sizeof(T) <= 64, "Type size too large");
225 uint64_t bits = 0;
226 memcpy(&bits, &val, sizeof(T));
227 WriteBits(bits, sizeof(T) * 8);
228 }
229
230 // Writes |val| in chunks of size |chunk_length| followed by a signal bit:
231 // 0 - no more chunks to follow
232 // 1 - more chunks to follow
233 // for example 255 is encoded into 1111 1 1111 0 for chunk length 4.
234 // The last chunk can be truncated and signal bit omitted, if the entire
235 // payload (for example 16 bit for uint16_t has already been written).
236 void WriteVariableWidthU64(uint64_t val, size_t chunk_length);
237 void WriteVariableWidthU32(uint32_t val, size_t chunk_length);
238 void WriteVariableWidthU16(uint16_t val, size_t chunk_length);
239 void WriteVariableWidthU8(uint8_t val, size_t chunk_length);
240 void WriteVariableWidthS64(
241 int64_t val, size_t chunk_length, size_t zigzag_exponent);
242 void WriteVariableWidthS32(
243 int32_t val, size_t chunk_length, size_t zigzag_exponent);
244 void WriteVariableWidthS16(
245 int16_t val, size_t chunk_length, size_t zigzag_exponent);
246 void WriteVariableWidthS8(
247 int8_t val, size_t chunk_length, size_t zigzag_exponent);
248
249 // Writes |val| using fixed bit width. Bit width is determined by |max_val|:
250 // max_val 0 -> bit width 1
251 // max_val 1 -> bit width 1
252 // max_val 2 -> bit width 2
253 // max_val 3 -> bit width 2
254 // max_val 4 -> bit width 3
255 // max_val 5 -> bit width 3
256 // max_val 8 -> bit width 4
257 // max_val n -> bit width 1 + floor(log2(n))
258 // |val| needs to be <= |max_val|.
259 void WriteFixedWidth(uint64_t val, uint64_t max_val);
260
261 // Returns number of bits written.
262 virtual size_t GetNumBits() const = 0;
263
264 // Provides direct access to the buffer data if implemented.
GetData()265 virtual const uint8_t* GetData() const {
266 return nullptr;
267 }
268
269 // Returns buffer size in bytes.
GetDataSizeBytes()270 size_t GetDataSizeBytes() const {
271 return NumBitsToNumWords<8>(GetNumBits());
272 }
273
274 // Generates and returns byte array containing written bits.
275 virtual std::vector<uint8_t> GetDataCopy() const = 0;
276
277 BitWriterInterface(const BitWriterInterface&) = delete;
278 BitWriterInterface& operator=(const BitWriterInterface&) = delete;
279 };
280
281 // This class is an implementation of BitWriterInterface, using
282 // std::vector<uint64_t> to store written bits.
283 class BitWriterWord64 : public BitWriterInterface {
284 public:
285 explicit BitWriterWord64(size_t reserve_bits = 64);
286
287 void WriteBits(uint64_t bits, size_t num_bits) override;
288
GetNumBits()289 size_t GetNumBits() const override {
290 return end_;
291 }
292
GetData()293 const uint8_t* GetData() const override {
294 return reinterpret_cast<const uint8_t*>(buffer_.data());
295 }
296
GetDataCopy()297 std::vector<uint8_t> GetDataCopy() const override {
298 return std::vector<uint8_t>(GetData(), GetData() + GetDataSizeBytes());
299 }
300
301 // Returns written stream as std::string, padded with zeroes so that the
302 // length is a multiple of 64.
GetStreamPadded64()303 std::string GetStreamPadded64() const {
304 return BufferToStream(buffer_);
305 }
306
307 // Sets callback to emit bit sequences after every write.
SetCallback(std::function<void (const std::string &)> callback)308 void SetCallback(std::function<void(const std::string&)> callback) {
309 callback_ = callback;
310 }
311
312 protected:
313 // Sends string generated from arguments to callback_ if defined.
EmitSequence(uint64_t bits,size_t num_bits)314 void EmitSequence(uint64_t bits, size_t num_bits) const {
315 if (callback_)
316 callback_(BitsToStream(bits, num_bits));
317 }
318
319 private:
320 std::vector<uint64_t> buffer_;
321 // Total number of bits written so far. Named 'end' as analogy to std::end().
322 size_t end_;
323
324 // If not null, the writer will use the callback to emit the written bit
325 // sequence as a string of '0' and '1'.
326 std::function<void(const std::string&)> callback_;
327 };
328
329 // Base class for reading sequences of bits.
330 class BitReaderInterface {
331 public:
BitReaderInterface()332 BitReaderInterface() {}
~BitReaderInterface()333 virtual ~BitReaderInterface() {}
334
335 // Reads |num_bits| from the stream, stores them in |bits|.
336 // Returns number of read bits. |num_bits| must be no greater than 64.
337 virtual size_t ReadBits(uint64_t* bits, size_t num_bits) = 0;
338
339 // Reads |num_bits| from the stream, stores them in |bits|.
340 // Returns number of read bits. |num_bits| must be no greater than 64.
341 template <size_t N>
342 size_t ReadBitset(std::bitset<N>* bits, size_t num_bits = N) {
343 uint64_t val = 0;
344 size_t num_read = ReadBits(&val, num_bits);
345 if (num_read) {
346 *bits = std::bitset<N>(val);
347 }
348 return num_read;
349 }
350
351 // Reads |num_bits| from the stream, returns string in left-to-right order.
352 // The length of the returned string may be less than |num_bits| if end was
353 // reached.
ReadStream(size_t num_bits)354 std::string ReadStream(size_t num_bits) {
355 uint64_t bits = 0;
356 size_t num_read = ReadBits(&bits, num_bits);
357 return BitsToStream(bits, num_read);
358 }
359
360 // Reads 8 * sizeof(T) bits and stores them in |val|.
361 template <typename T>
ReadUnencoded(T * val)362 bool ReadUnencoded(T* val) {
363 static_assert(sizeof(T) <= 64, "Type size too large");
364 uint64_t bits = 0;
365 const size_t num_read = ReadBits(&bits, sizeof(T) * 8);
366 if (num_read != sizeof(T) * 8)
367 return false;
368 memcpy(val, &bits, sizeof(T));
369 return true;
370 }
371
372 // Returns number of bits already read.
373 virtual size_t GetNumReadBits() const = 0;
374
375 // These two functions define 'hard' and 'soft' EOF.
376 //
377 // Returns true if the end of the buffer was reached.
378 virtual bool ReachedEnd() const = 0;
379 // Returns true if we reached the end of the buffer or are nearing it and only
380 // zero bits are left to read. Implementations of this function are allowed to
381 // commit a "false negative" error if the end of the buffer was not reached,
382 // i.e. it can return false even if indeed only zeroes are left.
383 // It is assumed that the consumer expects that
384 // the buffer stream ends with padding zeroes, and would accept this as a
385 // 'soft' EOF. Implementations of this class do not necessarily need to
386 // implement this, default behavior can simply delegate to ReachedEnd().
OnlyZeroesLeft()387 virtual bool OnlyZeroesLeft() const {
388 return ReachedEnd();
389 }
390
391 // Reads value encoded with WriteVariableWidthXXX (see BitWriterInterface).
392 // Reader and writer must use the same |chunk_length| and variable type.
393 // Returns true on success, false if the bit stream ends prematurely.
394 bool ReadVariableWidthU64(uint64_t* val, size_t chunk_length);
395 bool ReadVariableWidthU32(uint32_t* val, size_t chunk_length);
396 bool ReadVariableWidthU16(uint16_t* val, size_t chunk_length);
397 bool ReadVariableWidthU8(uint8_t* val, size_t chunk_length);
398 bool ReadVariableWidthS64(
399 int64_t* val, size_t chunk_length, size_t zigzag_exponent);
400 bool ReadVariableWidthS32(
401 int32_t* val, size_t chunk_length, size_t zigzag_exponent);
402 bool ReadVariableWidthS16(
403 int16_t* val, size_t chunk_length, size_t zigzag_exponent);
404 bool ReadVariableWidthS8(
405 int8_t* val, size_t chunk_length, size_t zigzag_exponent);
406
407 // Reads value written by WriteFixedWidth (|max_val| needs to be the same).
408 // Returns true on success, false if the bit stream ends prematurely.
409 bool ReadFixedWidth(uint64_t* val, uint64_t max_val);
410
411 BitReaderInterface(const BitReaderInterface&) = delete;
412 BitReaderInterface& operator=(const BitReaderInterface&) = delete;
413 };
414
415 // This class is an implementation of BitReaderInterface which accepts both
416 // uint8_t and uint64_t buffers as input. uint64_t buffers are consumed and
417 // owned. uint8_t buffers are copied.
418 class BitReaderWord64 : public BitReaderInterface {
419 public:
420 // Consumes and owns the buffer.
421 explicit BitReaderWord64(std::vector<uint64_t>&& buffer);
422
423 // Copies the buffer and casts it to uint64.
424 // Consuming the original buffer and casting it to uint64 is difficult,
425 // as it would potentially cause data misalignment and poor performance.
426 explicit BitReaderWord64(const std::vector<uint8_t>& buffer);
427 BitReaderWord64(const void* buffer, size_t num_bytes);
428
429 size_t ReadBits(uint64_t* bits, size_t num_bits) override;
430
GetNumReadBits()431 size_t GetNumReadBits() const override {
432 return pos_;
433 }
434
435 bool ReachedEnd() const override;
436 bool OnlyZeroesLeft() const override;
437
438 BitReaderWord64() = delete;
439 private:
440 const std::vector<uint64_t> buffer_;
441 size_t pos_;
442 };
443
444 } // namespace spvutils
445
446 #endif // LIBSPIRV_UTIL_BIT_STREAM_H_
447