1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_LIBARTBASE_BASE_LEB128_H_
18 #define ART_LIBARTBASE_BASE_LEB128_H_
19
20 #include <vector>
21
22 #include <android-base/logging.h>
23
24 #include "bit_utils.h"
25 #include "globals.h"
26 #include "macros.h"
27
28 namespace art {
29
30 // Reads an unsigned LEB128 value, updating the given pointer to point
31 // just past the end of the read value. This function tolerates
32 // non-zero high-order bits in the fifth encoded byte.
DecodeUnsignedLeb128(const uint8_t ** data)33 static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) {
34 const uint8_t* ptr = *data;
35 int result = *(ptr++);
36 if (UNLIKELY(result > 0x7f)) {
37 int cur = *(ptr++);
38 result = (result & 0x7f) | ((cur & 0x7f) << 7);
39 if (cur > 0x7f) {
40 cur = *(ptr++);
41 result |= (cur & 0x7f) << 14;
42 if (cur > 0x7f) {
43 cur = *(ptr++);
44 result |= (cur & 0x7f) << 21;
45 if (cur > 0x7f) {
46 // Note: We don't check to see if cur is out of range here,
47 // meaning we tolerate garbage in the four high-order bits.
48 cur = *(ptr++);
49 result |= cur << 28;
50 }
51 }
52 }
53 }
54 *data = ptr;
55 return static_cast<uint32_t>(result);
56 }
57
DecodeUnsignedLeb128WithoutMovingCursor(const uint8_t * data)58 static inline uint32_t DecodeUnsignedLeb128WithoutMovingCursor(const uint8_t* data) {
59 return DecodeUnsignedLeb128(&data);
60 }
61
DecodeUnsignedLeb128Checked(const uint8_t ** data,const void * end,uint32_t * out)62 static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data,
63 const void* end,
64 uint32_t* out) {
65 const uint8_t* ptr = *data;
66 if (ptr >= end) {
67 return false;
68 }
69 int result = *(ptr++);
70 if (UNLIKELY(result > 0x7f)) {
71 if (ptr >= end) {
72 return false;
73 }
74 int cur = *(ptr++);
75 result = (result & 0x7f) | ((cur & 0x7f) << 7);
76 if (cur > 0x7f) {
77 if (ptr >= end) {
78 return false;
79 }
80 cur = *(ptr++);
81 result |= (cur & 0x7f) << 14;
82 if (cur > 0x7f) {
83 if (ptr >= end) {
84 return false;
85 }
86 cur = *(ptr++);
87 result |= (cur & 0x7f) << 21;
88 if (cur > 0x7f) {
89 if (ptr >= end) {
90 return false;
91 }
92 // Note: We don't check to see if cur is out of range here,
93 // meaning we tolerate garbage in the four high-order bits.
94 cur = *(ptr++);
95 result |= cur << 28;
96 }
97 }
98 }
99 }
100 *data = ptr;
101 *out = static_cast<uint32_t>(result);
102 return true;
103 }
104
105 // Reads an unsigned LEB128 + 1 value. updating the given pointer to point
106 // just past the end of the read value. This function tolerates
107 // non-zero high-order bits in the fifth encoded byte.
108 // It is possible for this function to return -1.
DecodeUnsignedLeb128P1(const uint8_t ** data)109 static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) {
110 return DecodeUnsignedLeb128(data) - 1;
111 }
112
113 // Reads a signed LEB128 value, updating the given pointer to point
114 // just past the end of the read value. This function tolerates
115 // non-zero high-order bits in the fifth encoded byte.
DecodeSignedLeb128(const uint8_t ** data)116 static inline int32_t DecodeSignedLeb128(const uint8_t** data) {
117 const uint8_t* ptr = *data;
118 int32_t result = *(ptr++);
119 if (result <= 0x7f) {
120 result = (result << 25) >> 25;
121 } else {
122 int cur = *(ptr++);
123 result = (result & 0x7f) | ((cur & 0x7f) << 7);
124 if (cur <= 0x7f) {
125 result = (result << 18) >> 18;
126 } else {
127 cur = *(ptr++);
128 result |= (cur & 0x7f) << 14;
129 if (cur <= 0x7f) {
130 result = (result << 11) >> 11;
131 } else {
132 cur = *(ptr++);
133 result |= (cur & 0x7f) << 21;
134 if (cur <= 0x7f) {
135 result = (result << 4) >> 4;
136 } else {
137 // Note: We don't check to see if cur is out of range here,
138 // meaning we tolerate garbage in the four high-order bits.
139 cur = *(ptr++);
140 result |= cur << 28;
141 }
142 }
143 }
144 }
145 *data = ptr;
146 return result;
147 }
148
DecodeSignedLeb128Checked(const uint8_t ** data,const void * end,int32_t * out)149 static inline bool DecodeSignedLeb128Checked(const uint8_t** data,
150 const void* end,
151 int32_t* out) {
152 const uint8_t* ptr = *data;
153 if (ptr >= end) {
154 return false;
155 }
156 int32_t result = *(ptr++);
157 if (result <= 0x7f) {
158 result = (result << 25) >> 25;
159 } else {
160 if (ptr >= end) {
161 return false;
162 }
163 int cur = *(ptr++);
164 result = (result & 0x7f) | ((cur & 0x7f) << 7);
165 if (cur <= 0x7f) {
166 result = (result << 18) >> 18;
167 } else {
168 if (ptr >= end) {
169 return false;
170 }
171 cur = *(ptr++);
172 result |= (cur & 0x7f) << 14;
173 if (cur <= 0x7f) {
174 result = (result << 11) >> 11;
175 } else {
176 if (ptr >= end) {
177 return false;
178 }
179 cur = *(ptr++);
180 result |= (cur & 0x7f) << 21;
181 if (cur <= 0x7f) {
182 result = (result << 4) >> 4;
183 } else {
184 if (ptr >= end) {
185 return false;
186 }
187 // Note: We don't check to see if cur is out of range here,
188 // meaning we tolerate garbage in the four high-order bits.
189 cur = *(ptr++);
190 result |= cur << 28;
191 }
192 }
193 }
194 }
195 *data = ptr;
196 *out = static_cast<uint32_t>(result);
197 return true;
198 }
199
200 // Returns the number of bytes needed to encode the value in unsigned LEB128.
UnsignedLeb128Size(uint32_t data)201 static inline uint32_t UnsignedLeb128Size(uint32_t data) {
202 // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1 // 32 - CLZ(data | 1)
203 // bytes = ceil(bits_to_encode / 7.0); // (6 + bits_to_encode) / 7
204 uint32_t x = 6 + 32 - CLZ(data | 1U);
205 // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
206 // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
207 return (x * 37) >> 8;
208 }
209
IsLeb128Terminator(const uint8_t * ptr)210 static inline bool IsLeb128Terminator(const uint8_t* ptr) {
211 return *ptr <= 0x7f;
212 }
213
214 // Returns the first byte of a Leb128 value assuming that:
215 // (1) `end_ptr` points to the first byte after the Leb128 value, and
216 // (2) there is another Leb128 value before this one.
217 template <typename T>
ReverseSearchUnsignedLeb128(T * end_ptr)218 static inline T* ReverseSearchUnsignedLeb128(T* end_ptr) {
219 static_assert(std::is_same<typename std::remove_const<T>::type, uint8_t>::value,
220 "T must be a uint8_t");
221 T* ptr = end_ptr;
222
223 // Move one byte back, check that this is the terminating byte.
224 ptr--;
225 DCHECK(IsLeb128Terminator(ptr));
226
227 // Keep moving back while the previous byte is not a terminating byte.
228 // Fail after reading five bytes in case there isn't another Leb128 value
229 // before this one.
230 while (!IsLeb128Terminator(ptr - 1)) {
231 ptr--;
232 DCHECK_LE(static_cast<ptrdiff_t>(end_ptr - ptr), 5);
233 }
234
235 return ptr;
236 }
237
238 // Returns the number of bytes needed to encode the value in unsigned LEB128.
SignedLeb128Size(int32_t data)239 static inline uint32_t SignedLeb128Size(int32_t data) {
240 // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
241 data = data ^ (data >> 31);
242 uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
243 return (x * 37) >> 8;
244 }
245
EncodeUnsignedLeb128(uint8_t * dest,uint32_t value)246 static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint32_t value) {
247 uint8_t out = value & 0x7f;
248 value >>= 7;
249 while (value != 0) {
250 *dest++ = out | 0x80;
251 out = value & 0x7f;
252 value >>= 7;
253 }
254 *dest++ = out;
255 return dest;
256 }
257
258 template <typename Vector>
EncodeUnsignedLeb128(Vector * dest,uint32_t value)259 static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
260 static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
261 uint8_t out = value & 0x7f;
262 value >>= 7;
263 while (value != 0) {
264 dest->push_back(out | 0x80);
265 out = value & 0x7f;
266 value >>= 7;
267 }
268 dest->push_back(out);
269 }
270
271 // Overwrite encoded Leb128 with a new value. The new value must be less than
272 // or equal to the old value to ensure that it fits the allocated space.
UpdateUnsignedLeb128(uint8_t * dest,uint32_t value)273 static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
274 const uint8_t* old_end = dest;
275 uint32_t old_value = DecodeUnsignedLeb128(&old_end);
276 DCHECK_LE(UnsignedLeb128Size(value), UnsignedLeb128Size(old_value));
277 for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
278 // Use longer encoding than necessary to fill the allocated space.
279 end[-1] |= 0x80;
280 end[0] = 0;
281 }
282 }
283
EncodeSignedLeb128(uint8_t * dest,int32_t value)284 static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) {
285 uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
286 uint8_t out = value & 0x7f;
287 while (extra_bits != 0u) {
288 *dest++ = out | 0x80;
289 value >>= 7;
290 out = value & 0x7f;
291 extra_bits >>= 7;
292 }
293 *dest++ = out;
294 return dest;
295 }
296
297 template<typename Vector>
EncodeSignedLeb128(Vector * dest,int32_t value)298 static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
299 static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
300 uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
301 uint8_t out = value & 0x7f;
302 while (extra_bits != 0u) {
303 dest->push_back(out | 0x80);
304 value >>= 7;
305 out = value & 0x7f;
306 extra_bits >>= 7;
307 }
308 dest->push_back(out);
309 }
310
311 // An encoder that pushes int32_t/uint32_t data onto the given std::vector.
312 template <typename Vector = std::vector<uint8_t>>
313 class Leb128Encoder {
314 static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
315
316 public:
Leb128Encoder(Vector * data)317 explicit Leb128Encoder(Vector* data) : data_(data) {
318 DCHECK(data != nullptr);
319 }
320
Reserve(uint32_t size)321 void Reserve(uint32_t size) {
322 data_->reserve(size);
323 }
324
PushBackUnsigned(uint32_t value)325 void PushBackUnsigned(uint32_t value) {
326 EncodeUnsignedLeb128(data_, value);
327 }
328
329 template<typename It>
InsertBackUnsigned(It cur,It end)330 void InsertBackUnsigned(It cur, It end) {
331 for (; cur != end; ++cur) {
332 PushBackUnsigned(*cur);
333 }
334 }
335
PushBackSigned(int32_t value)336 void PushBackSigned(int32_t value) {
337 EncodeSignedLeb128(data_, value);
338 }
339
340 template<typename It>
InsertBackSigned(It cur,It end)341 void InsertBackSigned(It cur, It end) {
342 for (; cur != end; ++cur) {
343 PushBackSigned(*cur);
344 }
345 }
346
GetData()347 const Vector& GetData() const {
348 return *data_;
349 }
350
351 protected:
352 Vector* const data_;
353
354 private:
355 DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
356 };
357
358 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
359 template <typename Vector = std::vector<uint8_t>>
360 class Leb128EncodingVector final : private Vector,
361 public Leb128Encoder<Vector> {
362 static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
363
364 public:
Leb128EncodingVector()365 Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
366
Leb128EncodingVector(const typename Vector::allocator_type & alloc)367 explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
368 : Vector(alloc),
369 Leb128Encoder<Vector>(this) { }
370
371 private:
372 DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
373 };
374
375 } // namespace art
376
377 #endif // ART_LIBARTBASE_BASE_LEB128_H_
378