• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2017 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Contains utils for reading, writing and debug printing bit streams.
16 
17 #ifndef SOURCE_COMP_BIT_STREAM_H_
18 #define SOURCE_COMP_BIT_STREAM_H_
19 
20 #include <algorithm>
21 #include <bitset>
22 #include <cassert>
23 #include <cstdint>
24 #include <cstring>
25 #include <functional>
26 #include <sstream>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 namespace spvtools {
32 namespace comp {
33 
34 // Terminology:
35 // Bits - usually used for a uint64 word, first bit is the lowest.
36 // Stream - std::string of '0' and '1', read left-to-right,
37 //          i.e. first bit is at the front and not at the end as in
38 //          std::bitset::to_string().
39 // Bitset - std::bitset corresponding to uint64 bits and to reverse(stream).
40 
41 // Converts number of bits to a respective number of chunks of size N.
42 // For example NumBitsToNumWords<8> returns how many bytes are needed to store
43 // |num_bits|.
44 template <size_t N>
NumBitsToNumWords(size_t num_bits)45 inline size_t NumBitsToNumWords(size_t num_bits) {
46   return (num_bits + (N - 1)) / N;
47 }
48 
49 // Returns value of the same type as |in|, where all but the first |num_bits|
50 // are set to zero.
51 template <typename T>
GetLowerBits(T in,size_t num_bits)52 inline T GetLowerBits(T in, size_t num_bits) {
53   return sizeof(T) * 8 == num_bits ? in : in & T((T(1) << num_bits) - T(1));
54 }
55 
56 // Encodes signed integer as unsigned. This is a generalized version of
57 // EncodeZigZag, designed to favor small positive numbers.
58 // Values are transformed in blocks of 2^|block_exponent|.
59 // If |block_exponent| is zero, then this degenerates into normal EncodeZigZag.
60 // Example when |block_exponent| is 1 (return value is the index):
61 // 0, 1, -1, -2, 2, 3, -3, -4, 4, 5, -5, -6, 6, 7, -7, -8
62 // Example when |block_exponent| is 2:
63 // 0, 1, 2, 3, -1, -2, -3, -4, 4, 5, 6, 7, -5, -6, -7, -8
EncodeZigZag(int64_t val,size_t block_exponent)64 inline uint64_t EncodeZigZag(int64_t val, size_t block_exponent) {
65   assert(block_exponent < 64);
66   const uint64_t uval = static_cast<uint64_t>(val >= 0 ? val : -val - 1);
67   const uint64_t block_num =
68       ((uval >> block_exponent) << 1) + (val >= 0 ? 0 : 1);
69   const uint64_t pos = GetLowerBits(uval, block_exponent);
70   return (block_num << block_exponent) + pos;
71 }
72 
73 // Decodes signed integer encoded with EncodeZigZag. |block_exponent| must be
74 // the same.
DecodeZigZag(uint64_t val,size_t block_exponent)75 inline int64_t DecodeZigZag(uint64_t val, size_t block_exponent) {
76   assert(block_exponent < 64);
77   const uint64_t block_num = val >> block_exponent;
78   const uint64_t pos = GetLowerBits(val, block_exponent);
79   if (block_num & 1) {
80     // Negative.
81     return -1LL - ((block_num >> 1) << block_exponent) - pos;
82   } else {
83     // Positive.
84     return ((block_num >> 1) << block_exponent) + pos;
85   }
86 }
87 
88 // Converts first |num_bits| stored in uint64 to a left-to-right stream of bits.
89 inline std::string BitsToStream(uint64_t bits, size_t num_bits = 64) {
90   std::bitset<64> bitset(bits);
91   std::string str = bitset.to_string().substr(64 - num_bits);
92   std::reverse(str.begin(), str.end());
93   return str;
94 }
95 
96 // Base class for writing sequences of bits.
97 class BitWriterInterface {
98  public:
99   BitWriterInterface() = default;
100   virtual ~BitWriterInterface() = default;
101 
102   // Writes lower |num_bits| in |bits| to the stream.
103   // |num_bits| must be no greater than 64.
104   virtual void WriteBits(uint64_t bits, size_t num_bits) = 0;
105 
106   // Writes bits from value of type |T| to the stream. No encoding is done.
107   // Always writes 8 * sizeof(T) bits.
108   template <typename T>
WriteUnencoded(T val)109   void WriteUnencoded(T val) {
110     static_assert(sizeof(T) <= 64, "Type size too large");
111     uint64_t bits = 0;
112     memcpy(&bits, &val, sizeof(T));
113     WriteBits(bits, sizeof(T) * 8);
114   }
115 
116   // Writes |val| in chunks of size |chunk_length| followed by a signal bit:
117   // 0 - no more chunks to follow
118   // 1 - more chunks to follow
119   // for example 255 is encoded into 1111 1 1111 0 for chunk length 4.
120   // The last chunk can be truncated and signal bit omitted, if the entire
121   // payload (for example 16 bit for uint16_t has already been written).
122   void WriteVariableWidthU64(uint64_t val, size_t chunk_length);
123   void WriteVariableWidthU32(uint32_t val, size_t chunk_length);
124   void WriteVariableWidthU16(uint16_t val, size_t chunk_length);
125   void WriteVariableWidthS64(int64_t val, size_t chunk_length,
126                              size_t zigzag_exponent);
127 
128   // Returns number of bits written.
129   virtual size_t GetNumBits() const = 0;
130 
131   // Provides direct access to the buffer data if implemented.
GetData()132   virtual const uint8_t* GetData() const { return nullptr; }
133 
134   // Returns buffer size in bytes.
GetDataSizeBytes()135   size_t GetDataSizeBytes() const { return NumBitsToNumWords<8>(GetNumBits()); }
136 
137   // Generates and returns byte array containing written bits.
138   virtual std::vector<uint8_t> GetDataCopy() const = 0;
139 
140   BitWriterInterface(const BitWriterInterface&) = delete;
141   BitWriterInterface& operator=(const BitWriterInterface&) = delete;
142 };
143 
144 // This class is an implementation of BitWriterInterface, using
145 // std::vector<uint64_t> to store written bits.
146 class BitWriterWord64 : public BitWriterInterface {
147  public:
148   explicit BitWriterWord64(size_t reserve_bits = 64);
149 
150   void WriteBits(uint64_t bits, size_t num_bits) override;
151 
GetNumBits()152   size_t GetNumBits() const override { return end_; }
153 
GetData()154   const uint8_t* GetData() const override {
155     return reinterpret_cast<const uint8_t*>(buffer_.data());
156   }
157 
GetDataCopy()158   std::vector<uint8_t> GetDataCopy() const override {
159     return std::vector<uint8_t>(GetData(), GetData() + GetDataSizeBytes());
160   }
161 
162   // Sets callback to emit bit sequences after every write.
SetCallback(std::function<void (const std::string &)> callback)163   void SetCallback(std::function<void(const std::string&)> callback) {
164     callback_ = callback;
165   }
166 
167  protected:
168   // Sends string generated from arguments to callback_ if defined.
EmitSequence(uint64_t bits,size_t num_bits)169   void EmitSequence(uint64_t bits, size_t num_bits) const {
170     if (callback_) callback_(BitsToStream(bits, num_bits));
171   }
172 
173  private:
174   std::vector<uint64_t> buffer_;
175   // Total number of bits written so far. Named 'end' as analogy to std::end().
176   size_t end_;
177 
178   // If not null, the writer will use the callback to emit the written bit
179   // sequence as a string of '0' and '1'.
180   std::function<void(const std::string&)> callback_;
181 };
182 
183 // Base class for reading sequences of bits.
184 class BitReaderInterface {
185  public:
BitReaderInterface()186   BitReaderInterface() {}
~BitReaderInterface()187   virtual ~BitReaderInterface() {}
188 
189   // Reads |num_bits| from the stream, stores them in |bits|.
190   // Returns number of read bits. |num_bits| must be no greater than 64.
191   virtual size_t ReadBits(uint64_t* bits, size_t num_bits) = 0;
192 
193   // Reads 8 * sizeof(T) bits and stores them in |val|.
194   template <typename T>
ReadUnencoded(T * val)195   bool ReadUnencoded(T* val) {
196     static_assert(sizeof(T) <= 64, "Type size too large");
197     uint64_t bits = 0;
198     const size_t num_read = ReadBits(&bits, sizeof(T) * 8);
199     if (num_read != sizeof(T) * 8) return false;
200     memcpy(val, &bits, sizeof(T));
201     return true;
202   }
203 
204   // Returns number of bits already read.
205   virtual size_t GetNumReadBits() const = 0;
206 
207   // These two functions define 'hard' and 'soft' EOF.
208   //
209   // Returns true if the end of the buffer was reached.
210   virtual bool ReachedEnd() const = 0;
211   // Returns true if we reached the end of the buffer or are nearing it and only
212   // zero bits are left to read. Implementations of this function are allowed to
213   // commit a "false negative" error if the end of the buffer was not reached,
214   // i.e. it can return false even if indeed only zeroes are left.
215   // It is assumed that the consumer expects that
216   // the buffer stream ends with padding zeroes, and would accept this as a
217   // 'soft' EOF. Implementations of this class do not necessarily need to
218   // implement this, default behavior can simply delegate to ReachedEnd().
OnlyZeroesLeft()219   virtual bool OnlyZeroesLeft() const { return ReachedEnd(); }
220 
221   // Reads value encoded with WriteVariableWidthXXX (see BitWriterInterface).
222   // Reader and writer must use the same |chunk_length| and variable type.
223   // Returns true on success, false if the bit stream ends prematurely.
224   bool ReadVariableWidthU64(uint64_t* val, size_t chunk_length);
225   bool ReadVariableWidthU32(uint32_t* val, size_t chunk_length);
226   bool ReadVariableWidthU16(uint16_t* val, size_t chunk_length);
227   bool ReadVariableWidthS64(int64_t* val, size_t chunk_length,
228                             size_t zigzag_exponent);
229 
230   BitReaderInterface(const BitReaderInterface&) = delete;
231   BitReaderInterface& operator=(const BitReaderInterface&) = delete;
232 };
233 
234 // This class is an implementation of BitReaderInterface which accepts both
235 // uint8_t and uint64_t buffers as input. uint64_t buffers are consumed and
236 // owned. uint8_t buffers are copied.
237 class BitReaderWord64 : public BitReaderInterface {
238  public:
239   // Consumes and owns the buffer.
240   explicit BitReaderWord64(std::vector<uint64_t>&& buffer);
241 
242   // Copies the buffer and casts it to uint64.
243   // Consuming the original buffer and casting it to uint64 is difficult,
244   // as it would potentially cause data misalignment and poor performance.
245   explicit BitReaderWord64(const std::vector<uint8_t>& buffer);
246   BitReaderWord64(const void* buffer, size_t num_bytes);
247 
248   size_t ReadBits(uint64_t* bits, size_t num_bits) override;
249 
GetNumReadBits()250   size_t GetNumReadBits() const override { return pos_; }
251 
252   bool ReachedEnd() const override;
253   bool OnlyZeroesLeft() const override;
254 
255   BitReaderWord64() = delete;
256 
257   // Sets callback to emit bit sequences after every read.
SetCallback(std::function<void (const std::string &)> callback)258   void SetCallback(std::function<void(const std::string&)> callback) {
259     callback_ = callback;
260   }
261 
262  protected:
263   // Sends string generated from arguments to callback_ if defined.
EmitSequence(uint64_t bits,size_t num_bits)264   void EmitSequence(uint64_t bits, size_t num_bits) const {
265     if (callback_) callback_(BitsToStream(bits, num_bits));
266   }
267 
268  private:
269   const std::vector<uint64_t> buffer_;
270   size_t pos_;
271 
272   // If not null, the reader will use the callback to emit the read bit
273   // sequence as a string of '0' and '1'.
274   std::function<void(const std::string&)> callback_;
275 };
276 
277 }  // namespace comp
278 }  // namespace spvtools
279 
280 #endif  // SOURCE_COMP_BIT_STREAM_H_
281