1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_LIBARTBASE_BASE_BIT_MEMORY_REGION_H_ 18 #define ART_LIBARTBASE_BASE_BIT_MEMORY_REGION_H_ 19 20 #include "memory_region.h" 21 22 #include "bit_utils.h" 23 #include "memory_tool.h" 24 25 #include <array> 26 27 namespace art { 28 29 // Bit memory region is a bit offset subregion of a normal memoryregion. This is useful for 30 // abstracting away the bit start offset to avoid needing passing as an argument everywhere. 31 class BitMemoryRegion final : public ValueObject { 32 public: 33 BitMemoryRegion() = default; BitMemoryRegion(uint8_t * data,ssize_t bit_start,size_t bit_size)34 ALWAYS_INLINE BitMemoryRegion(uint8_t* data, ssize_t bit_start, size_t bit_size) { 35 // Normalize the data pointer. Note that bit_start may be negative. 36 data_ = AlignDown(data + (bit_start >> kBitsPerByteLog2), kPageSize); 37 bit_start_ = bit_start + kBitsPerByte * (data - data_); 38 bit_size_ = bit_size; 39 } BitMemoryRegion(MemoryRegion region)40 ALWAYS_INLINE explicit BitMemoryRegion(MemoryRegion region) 41 : BitMemoryRegion(region.begin(), /* bit_start */ 0, region.size_in_bits()) { 42 } BitMemoryRegion(MemoryRegion region,size_t bit_offset,size_t bit_length)43 ALWAYS_INLINE BitMemoryRegion(MemoryRegion region, size_t bit_offset, size_t bit_length) 44 : BitMemoryRegion(region) { 45 *this = Subregion(bit_offset, bit_length); 46 } 47 IsValid()48 ALWAYS_INLINE bool IsValid() const { return data_ != nullptr; } 49 data()50 const uint8_t* data() const { 51 DCHECK_ALIGNED(bit_start_, kBitsPerByte); 52 return data_ + bit_start_ / kBitsPerByte; 53 } 54 size_in_bits()55 size_t size_in_bits() const { 56 return bit_size_; 57 } 58 Resize(size_t bit_size)59 void Resize(size_t bit_size) { 60 bit_size_ = bit_size; 61 } 62 Subregion(size_t bit_offset,size_t bit_length)63 ALWAYS_INLINE BitMemoryRegion Subregion(size_t bit_offset, size_t bit_length) const { 64 DCHECK_LE(bit_offset, bit_size_); 65 DCHECK_LE(bit_length, bit_size_ - bit_offset); 66 BitMemoryRegion result = *this; 67 result.bit_start_ += bit_offset; 68 result.bit_size_ = bit_length; 69 return result; 70 } 71 Subregion(size_t bit_offset)72 ALWAYS_INLINE BitMemoryRegion Subregion(size_t bit_offset) const { 73 DCHECK_LE(bit_offset, bit_size_); 74 BitMemoryRegion result = *this; 75 result.bit_start_ += bit_offset; 76 result.bit_size_ -= bit_offset; 77 return result; 78 } 79 80 // Load a single bit in the region. The bit at offset 0 is the least 81 // significant bit in the first byte. LoadBit(size_t bit_offset)82 ALWAYS_INLINE bool LoadBit(size_t bit_offset) const { 83 DCHECK_LT(bit_offset, bit_size_); 84 size_t index = (bit_start_ + bit_offset) / kBitsPerByte; 85 size_t shift = (bit_start_ + bit_offset) % kBitsPerByte; 86 return ((data_[index] >> shift) & 1) != 0; 87 } 88 StoreBit(size_t bit_offset,bool value)89 ALWAYS_INLINE void StoreBit(size_t bit_offset, bool value) { 90 DCHECK_LT(bit_offset, bit_size_); 91 size_t index = (bit_start_ + bit_offset) / kBitsPerByte; 92 size_t shift = (bit_start_ + bit_offset) % kBitsPerByte; 93 data_[index] &= ~(1 << shift); // Clear bit. 94 data_[index] |= (value ? 1 : 0) << shift; // Set bit. 95 DCHECK_EQ(value, LoadBit(bit_offset)); 96 } 97 98 // Load `bit_length` bits from `data` starting at given `bit_offset`. 99 // The least significant bit is stored in the smallest memory offset. 100 template<typename Result = size_t> 101 ATTRIBUTE_NO_SANITIZE_ADDRESS // We might touch extra bytes due to the alignment. 102 ATTRIBUTE_NO_SANITIZE_HWADDRESS // The hwasan uses different attribute. LoadBits(size_t bit_offset,size_t bit_length)103 ALWAYS_INLINE Result LoadBits(size_t bit_offset, size_t bit_length) const { 104 static_assert(std::is_integral_v<Result>, "Result must be integral"); 105 static_assert(std::is_unsigned_v<Result>, "Result must be unsigned"); 106 DCHECK(IsAligned<sizeof(Result)>(data_)); 107 DCHECK_LE(bit_offset, bit_size_); 108 DCHECK_LE(bit_length, bit_size_ - bit_offset); 109 DCHECK_LE(bit_length, BitSizeOf<Result>()); 110 if (bit_length == 0) { 111 return 0; 112 } 113 // Load naturally-aligned value which contains the least significant bit. 114 Result* data = reinterpret_cast<Result*>(data_); 115 size_t width = BitSizeOf<Result>(); 116 size_t index = (bit_start_ + bit_offset) / width; 117 size_t shift = (bit_start_ + bit_offset) % width; 118 Result value = data[index] >> shift; 119 // Load extra value containing the most significant bit (it might be the same one). 120 // We can not just load the following value as that could potentially cause SIGSEGV. 121 Result extra = data[index + (shift + (bit_length - 1)) / width]; 122 // Mask to clear unwanted bits (the 1s are needed to avoid avoid undefined shift). 123 Result clear = (std::numeric_limits<Result>::max() << 1) << (bit_length - 1); 124 // Prepend the extra value. We add explicit '& (width - 1)' so that the shift is defined. 125 // It is a no-op for `shift != 0` and if `shift == 0` then `value == extra` because of 126 // bit_length <= width causing the `value` and `extra` to be read from the same location. 127 // The '& (width - 1)' is implied by the shift instruction on ARM and removed by compiler. 128 return (value | (extra << ((width - shift) & (width - 1)))) & ~clear; 129 } 130 131 // Store `bit_length` bits in `data` starting at given `bit_offset`. 132 // The least significant bit is stored in the smallest memory offset. StoreBits(size_t bit_offset,size_t value,size_t bit_length)133 ALWAYS_INLINE void StoreBits(size_t bit_offset, size_t value, size_t bit_length) { 134 DCHECK_LE(bit_offset, bit_size_); 135 DCHECK_LE(bit_length, bit_size_ - bit_offset); 136 DCHECK_LE(bit_length, BitSizeOf<size_t>()); 137 DCHECK_LE(value, MaxInt<size_t>(bit_length)); 138 if (bit_length == 0) { 139 return; 140 } 141 // Write data byte by byte to avoid races with other threads 142 // on bytes that do not overlap with this region. 143 size_t mask = std::numeric_limits<size_t>::max() >> (BitSizeOf<size_t>() - bit_length); 144 size_t index = (bit_start_ + bit_offset) / kBitsPerByte; 145 size_t shift = (bit_start_ + bit_offset) % kBitsPerByte; 146 data_[index] &= ~(mask << shift); // Clear bits. 147 data_[index] |= (value << shift); // Set bits. 148 size_t finished_bits = kBitsPerByte - shift; 149 for (int i = 1; finished_bits < bit_length; i++, finished_bits += kBitsPerByte) { 150 data_[index + i] &= ~(mask >> finished_bits); // Clear bits. 151 data_[index + i] |= (value >> finished_bits); // Set bits. 152 } 153 DCHECK_EQ(value, LoadBits(bit_offset, bit_length)); 154 } 155 156 // Copy bits from other bit region. CopyBits(const BitMemoryRegion & src)157 ALWAYS_INLINE void CopyBits(const BitMemoryRegion& src) { 158 DCHECK_EQ(size_in_bits(), src.size_in_bits()); 159 // Hopefully, the loads of the unused `value` shall be optimized away. 160 VisitChunks( 161 [this, &src](size_t offset, size_t num_bits, size_t value ATTRIBUTE_UNUSED) ALWAYS_INLINE { 162 StoreChunk(offset, src.LoadBits(offset, num_bits), num_bits); 163 return true; 164 }); 165 } 166 167 // And bits from other bit region. AndBits(const BitMemoryRegion & src)168 ALWAYS_INLINE void AndBits(const BitMemoryRegion& src) { 169 DCHECK_EQ(size_in_bits(), src.size_in_bits()); 170 VisitChunks([this, &src](size_t offset, size_t num_bits, size_t value) ALWAYS_INLINE { 171 StoreChunk(offset, value & src.LoadBits(offset, num_bits), num_bits); 172 return true; 173 }); 174 } 175 176 // Or bits from other bit region. OrBits(const BitMemoryRegion & src)177 ALWAYS_INLINE void OrBits(const BitMemoryRegion& src) { 178 DCHECK_EQ(size_in_bits(), src.size_in_bits()); 179 VisitChunks([this, &src](size_t offset, size_t num_bits, size_t value) ALWAYS_INLINE { 180 StoreChunk(offset, value | src.LoadBits(offset, num_bits), num_bits); 181 return true; 182 }); 183 } 184 185 // Xor bits from other bit region. XorBits(const BitMemoryRegion & src)186 ALWAYS_INLINE void XorBits(const BitMemoryRegion& src) { 187 DCHECK_EQ(size_in_bits(), src.size_in_bits()); 188 VisitChunks([this, &src](size_t offset, size_t num_bits, size_t value) ALWAYS_INLINE { 189 StoreChunk(offset, value ^ src.LoadBits(offset, num_bits), num_bits); 190 return true; 191 }); 192 } 193 194 // Count the number of set bits within this region. PopCount()195 ALWAYS_INLINE size_t PopCount() const { 196 size_t result = 0u; 197 VisitChunks([&](size_t offset ATTRIBUTE_UNUSED, 198 size_t num_bits ATTRIBUTE_UNUSED, 199 size_t value) ALWAYS_INLINE { 200 result += POPCOUNT(value); 201 return true; 202 }); 203 return result; 204 } 205 206 // Count the number of set bits within the given bit range. PopCount(size_t bit_offset,size_t bit_length)207 ALWAYS_INLINE size_t PopCount(size_t bit_offset, size_t bit_length) const { 208 return Subregion(bit_offset, bit_length).PopCount(); 209 } 210 211 // Check if this region has all bits clear. HasAllBitsClear()212 ALWAYS_INLINE bool HasAllBitsClear() const { 213 return VisitChunks([](size_t offset ATTRIBUTE_UNUSED, 214 size_t num_bits ATTRIBUTE_UNUSED, 215 size_t value) ALWAYS_INLINE { 216 return value == 0u; 217 }); 218 } 219 220 // Check if this region has any bit set. HasSomeBitSet()221 ALWAYS_INLINE bool HasSomeBitSet() const { 222 return !HasAllBitsClear(); 223 } 224 225 // Check if there is any bit set within the given bit range. HasSomeBitSet(size_t bit_offset,size_t bit_length)226 ALWAYS_INLINE bool HasSomeBitSet(size_t bit_offset, size_t bit_length) const { 227 return Subregion(bit_offset, bit_length).HasSomeBitSet(); 228 } 229 Compare(const BitMemoryRegion & lhs,const BitMemoryRegion & rhs)230 static int Compare(const BitMemoryRegion& lhs, const BitMemoryRegion& rhs) { 231 if (lhs.size_in_bits() != rhs.size_in_bits()) { 232 return (lhs.size_in_bits() < rhs.size_in_bits()) ? -1 : 1; 233 } 234 int result = 0; 235 bool equals = lhs.VisitChunks( 236 [&](size_t offset, size_t num_bits, size_t lhs_value) ALWAYS_INLINE { 237 size_t rhs_value = rhs.LoadBits(offset, num_bits); 238 if (lhs_value == rhs_value) { 239 return true; 240 } 241 // We have found a difference. To avoid the comparison being dependent on how the region 242 // is split into chunks, check the lowest bit that differs. (Android is little-endian.) 243 int bit = CTZ(lhs_value ^ rhs_value); 244 result = ((rhs_value >> bit) & 1u) != 0u ? 1 : -1; 245 return false; // Stop iterating. 246 }); 247 DCHECK_EQ(equals, result == 0); 248 return result; 249 } 250 Equals(const BitMemoryRegion & lhs,const BitMemoryRegion & rhs)251 static bool Equals(const BitMemoryRegion& lhs, const BitMemoryRegion& rhs) { 252 if (lhs.size_in_bits() != rhs.size_in_bits()) { 253 return false; 254 } 255 return lhs.VisitChunks([&rhs](size_t offset, size_t num_bits, size_t lhs_value) ALWAYS_INLINE { 256 return lhs_value == rhs.LoadBits(offset, num_bits); 257 }); 258 } 259 260 private: 261 // Visit the region in aligned `size_t` chunks. The first and last chunk may have fewer bits. 262 // 263 // Returns `true` if the iteration visited all chunks successfully, i.e. none of the 264 // calls to `visitor(offset, num_bits, value)` returned `false`; otherwise `false`. 265 template <typename VisitorType> 266 ATTRIBUTE_NO_SANITIZE_ADDRESS // We might touch extra bytes due to the alignment. 267 ATTRIBUTE_NO_SANITIZE_HWADDRESS // The hwasan uses different attribute. VisitChunks(VisitorType && visitor)268 ALWAYS_INLINE bool VisitChunks(VisitorType&& visitor) const { 269 constexpr size_t kChunkSize = BitSizeOf<size_t>(); 270 size_t remaining_bits = bit_size_; 271 if (remaining_bits == 0) { 272 return true; 273 } 274 DCHECK(IsAligned<sizeof(size_t)>(data_)); 275 const size_t* data = reinterpret_cast<const size_t*>(data_); 276 size_t offset = 0u; 277 size_t bit_start = bit_start_; 278 data += bit_start / kChunkSize; 279 if ((bit_start % kChunkSize) != 0u) { 280 size_t leading_bits = kChunkSize - (bit_start % kChunkSize); 281 size_t value = (*data) >> (bit_start % kChunkSize); 282 if (leading_bits > remaining_bits) { 283 leading_bits = remaining_bits; 284 value = value & ~(std::numeric_limits<size_t>::max() << remaining_bits); 285 } 286 if (!visitor(offset, leading_bits, value)) { 287 return false; 288 } 289 offset += leading_bits; 290 remaining_bits -= leading_bits; 291 ++data; 292 } 293 while (remaining_bits >= kChunkSize) { 294 size_t value = *data; 295 if (!visitor(offset, kChunkSize, value)) { 296 return false; 297 } 298 offset += kChunkSize; 299 remaining_bits -= kChunkSize; 300 ++data; 301 } 302 if (remaining_bits != 0u) { 303 size_t value = (*data) & ~(std::numeric_limits<size_t>::max() << remaining_bits); 304 if (!visitor(offset, remaining_bits, value)) { 305 return false; 306 } 307 } 308 return true; 309 } 310 StoreChunk(size_t bit_offset,size_t value,size_t bit_length)311 ALWAYS_INLINE void StoreChunk(size_t bit_offset, size_t value, size_t bit_length) { 312 if (bit_length == BitSizeOf<size_t>()) { 313 DCHECK_ALIGNED(bit_start_ + bit_offset, BitSizeOf<size_t>()); 314 uint8_t* data = data_ + (bit_start_ + bit_offset) / kBitsPerByte; 315 DCHECK_ALIGNED(data, sizeof(size_t)); 316 reinterpret_cast<size_t*>(data)[0] = value; 317 } else { 318 StoreBits(bit_offset, value, bit_length); 319 } 320 } 321 322 uint8_t* data_ = nullptr; // The pointer is page aligned. 323 size_t bit_start_ = 0; 324 size_t bit_size_ = 0; 325 }; 326 327 constexpr uint32_t kVarintBits = 4; // Minimum number of bits used for varint. 328 constexpr uint32_t kVarintMax = 11; // Maximum value which is stored "inline". 329 330 class BitMemoryReader { 331 public: 332 BitMemoryReader(BitMemoryReader&&) = default; BitMemoryReader(BitMemoryRegion data)333 explicit BitMemoryReader(BitMemoryRegion data) 334 : finished_region_(data.Subregion(0, 0) /* set the length to zero */ ) { 335 } 336 explicit BitMemoryReader(const uint8_t* data, ssize_t bit_offset = 0) 337 : finished_region_(const_cast<uint8_t*>(data), bit_offset, /* bit_length */ 0) { 338 } 339 data()340 const uint8_t* data() const { return finished_region_.data(); } 341 GetReadRegion()342 BitMemoryRegion GetReadRegion() const { return finished_region_; } 343 NumberOfReadBits()344 size_t NumberOfReadBits() const { return finished_region_.size_in_bits(); } 345 ReadRegion(size_t bit_length)346 ALWAYS_INLINE BitMemoryRegion ReadRegion(size_t bit_length) { 347 size_t bit_offset = finished_region_.size_in_bits(); 348 finished_region_.Resize(bit_offset + bit_length); 349 return finished_region_.Subregion(bit_offset, bit_length); 350 } 351 352 template<typename Result = size_t> ReadBits(size_t bit_length)353 ALWAYS_INLINE Result ReadBits(size_t bit_length) { 354 return ReadRegion(bit_length).LoadBits<Result>(/* bit_offset */ 0, bit_length); 355 } 356 ReadBit()357 ALWAYS_INLINE bool ReadBit() { 358 return ReadRegion(/* bit_length */ 1).LoadBit(/* bit_offset */ 0); 359 } 360 361 // Read variable-length bit-packed integer. 362 // The first four bits determine the variable length of the encoded integer: 363 // Values 0..11 represent the result as-is, with no further following bits. 364 // Values 12..15 mean the result is in the next 8/16/24/32-bits respectively. ReadVarint()365 ALWAYS_INLINE uint32_t ReadVarint() { 366 uint32_t x = ReadBits(kVarintBits); 367 return (x <= kVarintMax) ? x : ReadBits((x - kVarintMax) * kBitsPerByte); 368 } 369 370 // Read N 'interleaved' varints (different to just reading consecutive varints). 371 // All small values are stored first and the large values are stored after them. 372 // This requires fewer bit-reads compared to indidually storing the varints. 373 template<size_t N> ReadInterleavedVarints()374 ALWAYS_INLINE std::array<uint32_t, N> ReadInterleavedVarints() { 375 static_assert(N * kVarintBits <= sizeof(uint64_t) * kBitsPerByte, "N too big"); 376 std::array<uint32_t, N> values; 377 // StackMap BitTable uses over 8 varints in the header, so we need uint64_t. 378 uint64_t data = ReadBits<uint64_t>(N * kVarintBits); 379 for (size_t i = 0; i < N; i++) { 380 values[i] = BitFieldExtract(data, i * kVarintBits, kVarintBits); 381 } 382 // Do the second part in its own loop as that seems to produce better code in clang. 383 for (size_t i = 0; i < N; i++) { 384 if (UNLIKELY(values[i] > kVarintMax)) { 385 values[i] = ReadBits((values[i] - kVarintMax) * kBitsPerByte); 386 } 387 } 388 return values; 389 } 390 391 private: 392 // Represents all of the bits which were read so far. There is no upper bound. 393 // Therefore, by definition, the "cursor" is always at the end of the region. 394 BitMemoryRegion finished_region_; 395 396 DISALLOW_COPY_AND_ASSIGN(BitMemoryReader); 397 }; 398 399 template<typename Vector> 400 class BitMemoryWriter { 401 public: 402 explicit BitMemoryWriter(Vector* out, size_t bit_offset = 0) out_(out)403 : out_(out), bit_start_(bit_offset), bit_offset_(bit_offset) { 404 DCHECK_EQ(NumberOfWrittenBits(), 0u); 405 } 406 Truncate(size_t bit_offset)407 void Truncate(size_t bit_offset) { 408 DCHECK_GE(bit_offset, bit_start_); 409 DCHECK_LE(bit_offset, bit_offset_); 410 bit_offset_ = bit_offset; 411 DCHECK_LE(BitsToBytesRoundUp(bit_offset), out_->size()); 412 out_->resize(BitsToBytesRoundUp(bit_offset)); // Shrink. 413 } 414 GetWrittenRegion()415 BitMemoryRegion GetWrittenRegion() const { 416 return BitMemoryRegion(out_->data(), bit_start_, bit_offset_ - bit_start_); 417 } 418 data()419 const uint8_t* data() const { return out_->data(); } 420 NumberOfWrittenBits()421 size_t NumberOfWrittenBits() const { return bit_offset_ - bit_start_; } 422 Allocate(size_t bit_length)423 ALWAYS_INLINE BitMemoryRegion Allocate(size_t bit_length) { 424 out_->resize(BitsToBytesRoundUp(bit_offset_ + bit_length)); 425 BitMemoryRegion region(out_->data(), bit_offset_, bit_length); 426 DCHECK_LE(bit_length, std::numeric_limits<size_t>::max() - bit_offset_) << "Overflow"; 427 bit_offset_ += bit_length; 428 return region; 429 } 430 WriteRegion(const BitMemoryRegion & region)431 ALWAYS_INLINE void WriteRegion(const BitMemoryRegion& region) { 432 Allocate(region.size_in_bits()).CopyBits(region); 433 } 434 WriteBits(uint32_t value,size_t bit_length)435 ALWAYS_INLINE void WriteBits(uint32_t value, size_t bit_length) { 436 Allocate(bit_length).StoreBits(/* bit_offset */ 0, value, bit_length); 437 } 438 WriteBit(bool value)439 ALWAYS_INLINE void WriteBit(bool value) { 440 Allocate(1).StoreBit(/* bit_offset */ 0, value); 441 } 442 443 template<size_t N> WriteInterleavedVarints(std::array<uint32_t,N> values)444 ALWAYS_INLINE void WriteInterleavedVarints(std::array<uint32_t, N> values) { 445 // Write small values (or the number of bytes needed for the large values). 446 for (uint32_t value : values) { 447 if (value > kVarintMax) { 448 WriteBits(kVarintMax + BitsToBytesRoundUp(MinimumBitsToStore(value)), kVarintBits); 449 } else { 450 WriteBits(value, kVarintBits); 451 } 452 } 453 // Write large values. 454 for (uint32_t value : values) { 455 if (value > kVarintMax) { 456 WriteBits(value, BitsToBytesRoundUp(MinimumBitsToStore(value)) * kBitsPerByte); 457 } 458 } 459 } 460 WriteVarint(uint32_t value)461 ALWAYS_INLINE void WriteVarint(uint32_t value) { 462 WriteInterleavedVarints<1>({value}); 463 } 464 WriteBytesAligned(const uint8_t * bytes,size_t length)465 void WriteBytesAligned(const uint8_t* bytes, size_t length) { 466 DCHECK_ALIGNED(bit_start_, kBitsPerByte); 467 DCHECK_ALIGNED(bit_offset_, kBitsPerByte); 468 DCHECK_EQ(BitsToBytesRoundUp(bit_offset_), out_->size()); 469 out_->insert(out_->end(), bytes, bytes + length); 470 bit_offset_ += length * kBitsPerByte; 471 } 472 ByteAlign()473 ALWAYS_INLINE void ByteAlign() { 474 DCHECK_ALIGNED(bit_start_, kBitsPerByte); 475 bit_offset_ = RoundUp(bit_offset_, kBitsPerByte); 476 } 477 478 private: 479 Vector* out_; 480 size_t bit_start_; 481 size_t bit_offset_; 482 483 DISALLOW_COPY_AND_ASSIGN(BitMemoryWriter); 484 }; 485 486 } // namespace art 487 488 #endif // ART_LIBARTBASE_BASE_BIT_MEMORY_REGION_H_ 489