1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_ 18 #define SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_ 19 20 #include <stddef.h> 21 #include <stdint.h> 22 #include <stdio.h> 23 24 #include <algorithm> 25 #include <array> 26 #include <optional> 27 #include <vector> 28 29 #include "perfetto/base/logging.h" 30 31 namespace perfetto { 32 namespace trace_processor { 33 34 namespace internal { 35 36 class BaseIterator; 37 class AllBitsIterator; 38 class SetBitsIterator; 39 40 } // namespace internal 41 42 // A bitvector which compactly stores a vector of bools using a single bit 43 // for each bool. 44 class BitVector { 45 public: 46 using AllBitsIterator = internal::AllBitsIterator; 47 using SetBitsIterator = internal::SetBitsIterator; 48 49 static constexpr uint32_t kBitsInWord = 64; 50 51 // Builder class which allows efficiently creating a BitVector by appending 52 // words. Using this class is generally far more efficient than trying to set 53 // bits directly in a BitVector or even appending one bit at a time. 54 class Builder { 55 public: 56 // Creates a Builder for building a BitVector of |size| bits. Builder(uint32_t size)57 explicit Builder(uint32_t size) : words_(WordCount(size)), size_(size) {} 58 59 // Skips forward |n| bits leaving them as zeroed. Skip(uint32_t n)60 void Skip(uint32_t n) { 61 PERFETTO_DCHECK(global_bit_offset_ + n <= size_); 62 global_bit_offset_ += n; 63 } 64 65 // Appends a single bit to the builder. 66 // Note: |AppendWord| is far more efficient than this method so should be 67 // preferred. Append(bool value)68 void Append(bool value) { 69 PERFETTO_DCHECK(global_bit_offset_ < size_); 70 71 words_[global_bit_offset_ / BitWord::kBits] |= 72 static_cast<uint64_t>(value) << global_bit_offset_ % BitWord::kBits; 73 global_bit_offset_++; 74 } 75 76 // Appends a whole word to the Builder. Builder has to end on a word 77 // boundary before calling this function. AppendWord(uint64_t word)78 void AppendWord(uint64_t word) { 79 PERFETTO_DCHECK(global_bit_offset_ % BitWord::kBits == 0); 80 PERFETTO_DCHECK(global_bit_offset_ + BitWord::kBits <= size_); 81 words_[global_bit_offset_ / BitWord::kBits] = word; 82 global_bit_offset_ += BitWord::kBits; 83 } 84 85 // Creates a BitVector from this Builder. Build()86 BitVector Build() && { 87 Address addr = IndexToAddress(size_ - 1); 88 uint32_t no_blocks = addr.block_idx + 1; 89 std::vector<uint32_t> counts(no_blocks); 90 91 // Calculate counts only for full blocks. 92 for (uint32_t i = 1; i < no_blocks; ++i) { 93 counts[i] = counts[i - 1] + 94 ConstBlock(&words_[Block::kWords * (i - 1)]).CountSetBits(); 95 } 96 return BitVector{std::move(words_), std::move(counts), size_}; 97 } 98 99 // Returns the number of bits which are in complete words which can be 100 // appended to this builder before having to fallback to |Append| due to 101 // being close to the end. BitsInCompleteWordsUntilFull()102 uint32_t BitsInCompleteWordsUntilFull() { 103 uint32_t next_word = WordCount(global_bit_offset_); 104 uint32_t end_word = WordFloor(size_); 105 uint32_t complete_words = next_word < end_word ? end_word - next_word : 0; 106 return complete_words * BitWord::kBits; 107 } 108 109 // Returns the number of bits which should be appended using |Append| either 110 // hitting a word boundary (and thus able to use |AppendWord|) or until the 111 // bitvector is full (i.e. no more Appends should happen), whichever would 112 // happen first. BitsUntilWordBoundaryOrFull()113 uint32_t BitsUntilWordBoundaryOrFull() { 114 if (global_bit_offset_ == 0 && size_ < BitWord::kBits) { 115 return size_; 116 } 117 uint8_t word_bit_offset = global_bit_offset_ % BitWord::kBits; 118 return std::min(BitsUntilFull(), 119 (BitWord::kBits - word_bit_offset) % BitWord::kBits); 120 } 121 122 // Returns the number of bits which should be appended using |Append| before 123 // hitting a word boundary (and thus able to use |AppendWord|) or until the 124 // bitvector is full (i.e. no more Appends should happen). BitsUntilFull()125 uint32_t BitsUntilFull() { return size_ - global_bit_offset_; } 126 127 private: 128 std::vector<uint64_t> words_; 129 uint32_t global_bit_offset_ = 0; 130 uint32_t size_ = 0; 131 }; 132 133 // Creates an empty bitvector. 134 BitVector(); 135 136 explicit BitVector(std::initializer_list<bool> init); 137 138 // Creates a bitvector of |count| size filled with |value|. 139 explicit BitVector(uint32_t count, bool value = false); 140 141 // Enable moving bitvectors as they have no unmovable state. 142 BitVector(BitVector&&) noexcept = default; 143 BitVector& operator=(BitVector&&) = default; 144 145 // Create a copy of the bitvector. 146 BitVector Copy() const; 147 148 // Create a bitwise Not copy of the bitvector. 149 BitVector Not() const; 150 151 // Returns the size of the bitvector. size()152 uint32_t size() const { return static_cast<uint32_t>(size_); } 153 154 // Returns whether the bit at |idx| is set. IsSet(uint32_t idx)155 bool IsSet(uint32_t idx) const { 156 PERFETTO_DCHECK(idx < size()); 157 158 Address addr = IndexToAddress(idx); 159 return ConstBlockFromIndex(addr.block_idx).IsSet(addr.block_offset); 160 } 161 162 // Returns the number of set bits in the bitvector. CountSetBits()163 uint32_t CountSetBits() const { return CountSetBits(size()); } 164 165 // Returns the number of set bits between the start of the bitvector 166 // (inclusive) and the index |end| (exclusive). CountSetBits(uint32_t end)167 uint32_t CountSetBits(uint32_t end) const { 168 if (end == 0) 169 return 0; 170 171 // Although the external interface we present uses an exclusive |end|, 172 // internally it's a lot nicer to work with an inclusive |end| (mainly 173 // because we get block rollovers on exclusive ends which means we need 174 // to have if checks to ensure we don't overflow the number of blocks). 175 Address addr = IndexToAddress(end - 1); 176 177 // Add the number of set bits until the start of the block to the number 178 // of set bits until the end address inside the block. 179 return counts_[addr.block_idx] + 180 ConstBlockFromIndex(addr.block_idx).CountSetBits(addr.block_offset); 181 } 182 183 // Returns the index of the |n|th set bit. Should only be called with |n| < 184 // CountSetBits(). IndexOfNthSet(uint32_t n)185 uint32_t IndexOfNthSet(uint32_t n) const { 186 PERFETTO_DCHECK(n < CountSetBits()); 187 188 // First search for the block which, up until the start of it, has more than 189 // n bits set. Note that this should never return |counts.begin()| as 190 // that should always be 0. 191 // TODO(lalitm): investigate whether we can make this faster with small 192 // binary search followed by a linear search instead of binary searching the 193 // full way. 194 auto it = std::upper_bound(counts_.begin(), counts_.end(), n); 195 PERFETTO_DCHECK(it != counts_.begin()); 196 197 // Go back one block to find the block which has the bit we are looking for. 198 uint32_t block_idx = 199 static_cast<uint32_t>(std::distance(counts_.begin(), it) - 1); 200 201 // Figure out how many set bits forward we are looking inside the block 202 // by taking away the number of bits at the start of the block from n. 203 uint32_t set_in_block = n - counts_[block_idx]; 204 205 // Compute the address of the bit in the block then convert the full 206 // address back to an index. 207 BlockOffset block_offset = 208 ConstBlockFromIndex(block_idx).IndexOfNthSet(set_in_block); 209 return AddressToIndex(Address{block_idx, block_offset}); 210 } 211 212 // Sets the bit at index |idx| to true and returns the previous value. Set(uint32_t idx)213 bool Set(uint32_t idx) { 214 // Set the bit to the correct value inside the block but store the old 215 // bit to help fix the counts. 216 auto addr = IndexToAddress(idx); 217 bool old_value = 218 ConstBlockFromIndex(addr.block_idx).IsSet(addr.block_offset); 219 220 // If the old value was unset, set the bit and add one to the count. 221 if (PERFETTO_LIKELY(!old_value)) { 222 BlockFromIndex(addr.block_idx).Set(addr.block_offset); 223 224 uint32_t size = static_cast<uint32_t>(counts_.size()); 225 for (uint32_t i = addr.block_idx + 1; i < size; ++i) { 226 counts_[i]++; 227 } 228 } 229 return old_value; 230 } 231 232 // Sets the bit at index |idx| to false. Clear(uint32_t idx)233 void Clear(uint32_t idx) { 234 // Set the bit to the correct value inside the block but store the old 235 // bit to help fix the counts. 236 auto addr = IndexToAddress(idx); 237 bool old_value = 238 ConstBlockFromIndex(addr.block_idx).IsSet(addr.block_offset); 239 240 // If the old value was set, clear the bit and subtract one from all the 241 // counts. 242 if (PERFETTO_LIKELY(old_value)) { 243 BlockFromIndex(addr.block_idx).Clear(addr.block_offset); 244 245 uint32_t size = static_cast<uint32_t>(counts_.size()); 246 for (uint32_t i = addr.block_idx + 1; i < size; ++i) { 247 counts_[i]--; 248 } 249 } 250 } 251 252 // Appends true to the bitvector. AppendTrue()253 void AppendTrue() { 254 AppendFalse(); 255 Address addr = IndexToAddress(size() - 1); 256 BlockFromIndex(addr.block_idx).Set(addr.block_offset); 257 } 258 259 // Appends false to the bitvector. AppendFalse()260 void AppendFalse() { 261 Address addr = IndexToAddress(size_); 262 uint32_t old_blocks_size = BlockCount(); 263 uint32_t new_blocks_size = addr.block_idx + 1; 264 265 if (PERFETTO_UNLIKELY(new_blocks_size > old_blocks_size)) { 266 uint32_t t = CountSetBits(); 267 words_.resize(words_.size() + Block::kWords); 268 counts_.emplace_back(t); 269 } 270 271 size_++; 272 // We don't need to clear the bit as we ensure that anything after 273 // size_ is always set to false. 274 } 275 276 // Resizes the BitVector to the given |size|. 277 // Truncates the BitVector if |size| < |size()| or fills the new space with 278 // |filler| if |size| > |size()|. Calling this method is a noop if |size| == 279 // |size()|. 280 void Resize(uint32_t new_size, bool filler = false); 281 282 // Creates a BitVector of size |end| with the bits between |start| and |end| 283 // filled by calling the filler function |f(index of bit)|. 284 // 285 // As an example, suppose Range(3, 7, [](x) { return x < 5 }). This would 286 // result in the following bitvector: 287 // [0 0 0 1 1 0 0] 288 template <typename Filler = bool(uint32_t)> Range(uint32_t start,uint32_t end,Filler f)289 static BitVector Range(uint32_t start, uint32_t end, Filler f) { 290 // Compute the block index and bitvector index where we start and end 291 // working one block at a time. 292 uint32_t start_fast_block = BlockCount(start); 293 uint32_t start_fast_idx = BlockToIndex(start_fast_block); 294 BitVector bv(start, false); 295 296 // Minimum value of start_fast_idx is numer of bits in block, so we need to 297 // seperate calculation for shorter ranges. 298 if (start_fast_idx > end) { 299 for (uint32_t i = start; i < end; ++i) { 300 bv.Append(f(i)); 301 } 302 bv.counts_.emplace_back(bv.CountSetBits()); 303 bv.size_ = end; 304 return bv; 305 } 306 307 uint32_t end_fast_block = BlockFloor(end); 308 uint32_t end_fast_idx = BlockToIndex(end_fast_block); 309 310 // Fill up to |start_fast_index| with values from the filler. 311 for (uint32_t i = start; i < start_fast_idx; ++i) { 312 bv.Append(f(i)); 313 } 314 315 // Assert words_ vector is full and size_ is properly calculated. 316 PERFETTO_DCHECK(bv.words_.size() % Block::kWords == 0); 317 PERFETTO_DCHECK(bv.words_.size() * BitWord::kBits == bv.size_); 318 319 // At this point we can work one block at a time. 320 bv.words_.resize(bv.words_.size() + 321 Block::kWords * (end_fast_block - start_fast_block)); 322 for (uint32_t i = start_fast_block; i < end_fast_block; ++i) { 323 uint64_t* block_start_word = &bv.words_[i * Block::kWords]; 324 Block(block_start_word).FromFiller(bv.size_, f); 325 bv.counts_.emplace_back(bv.CountSetBits()); 326 bv.size_ += Block::kBits; 327 } 328 329 // Add the last few elements to finish up to |end|. 330 for (uint32_t i = end_fast_idx; i < end; ++i) { 331 bv.Append(f(i)); 332 } 333 334 return bv; 335 } 336 337 // Creates a BitVector of size |end| bit the bits between |start| and |end| 338 // filled with corresponding bits |this| BitVector. 339 BitVector IntersectRange(uint32_t range_start, uint32_t range_end) const; 340 341 // Requests the removal of unused capacity. 342 // Matches the semantics of std::vector::shrink_to_fit. ShrinkToFit()343 void ShrinkToFit() { 344 words_.shrink_to_fit(); 345 counts_.shrink_to_fit(); 346 } 347 348 // Updates the ith set bit of this bitvector with the value of 349 // |other.IsSet(i)|. 350 // 351 // This is the best way to batch update all the bits which are set; for 352 // example when filtering rows, we want to filter all rows which are currently 353 // included but ignore rows which have already been excluded. 354 // 355 // For example suppose the following: 356 // this: 1 1 0 0 1 0 1 357 // other: 0 1 1 0 358 // This will change this to the following: 359 // this: 0 1 0 0 1 0 0 360 // TODO(lalitm): investigate whether we should just change this to And. 361 void UpdateSetBits(const BitVector& other); 362 363 // Iterate all the bits in the BitVector. 364 // 365 // Usage: 366 // for (auto it = bv.IterateAllBits(); it; it.Next()) { 367 // ... 368 // } 369 AllBitsIterator IterateAllBits() const; 370 371 // Iterate all the set bits in the BitVector. 372 // 373 // Usage: 374 // for (auto it = bv.IterateSetBits(); it; it.Next()) { 375 // ... 376 // } 377 SetBitsIterator IterateSetBits() const; 378 379 // Returns the approximate cost (in bytes) of storing a bitvector with size 380 // |n|. This can be used to make decisions about whether using a BitVector is 381 // worthwhile. 382 // This cost should not be treated as exact - it just gives an indication of 383 // the memory needed. ApproxBytesCost(uint32_t n)384 static constexpr uint32_t ApproxBytesCost(uint32_t n) { 385 // The two main things making up a bitvector is the cost of the blocks of 386 // bits and the cost of the counts vector. 387 return BlockCount(n) * Block::kBits + BlockCount(n) * sizeof(uint32_t); 388 } 389 390 private: 391 friend class internal::BaseIterator; 392 friend class internal::AllBitsIterator; 393 friend class internal::SetBitsIterator; 394 395 // Represents the offset of a bit within a block. 396 struct BlockOffset { 397 uint16_t word_idx; 398 uint16_t bit_idx; 399 }; 400 401 // Represents the address of a bit within the bitvector. 402 struct Address { 403 uint32_t block_idx; 404 BlockOffset block_offset; 405 }; 406 407 // Represents the smallest collection of bits we can refer to as 408 // one unit. 409 // 410 // Currently, this is implemented as a 64 bit integer as this is the 411 // largest type which we can assume to be present on all platforms. 412 class BitWord { 413 public: 414 static constexpr uint32_t kBits = 64; 415 BitWord(uint64_t * word)416 explicit BitWord(uint64_t* word) : word_(word) {} 417 418 // Bitwise ors the given |mask| to the current value. Or(uint64_t mask)419 void Or(uint64_t mask) { *word_ |= mask; } 420 421 // Sets the bit at the given index to true. Set(uint32_t idx)422 void Set(uint32_t idx) { 423 PERFETTO_DCHECK(idx < kBits); 424 425 // Or the value for the true shifted up to |idx| with the word. 426 Or(1ull << idx); 427 } 428 429 // Sets the bit at the given index to false. Clear(uint32_t idx)430 void Clear(uint32_t idx) { 431 PERFETTO_DCHECK(idx < kBits); 432 433 // And the integer of all bits set apart from |idx| with the word. 434 *word_ &= ~(1ull << idx); 435 } 436 437 // Clears all the bits (i.e. sets the atom to zero). ClearAll()438 void ClearAll() { *word_ = 0; } 439 440 // Retains all bits up to and including the bit at |idx| and clears 441 // all bits after this point. ClearAfter(uint32_t idx)442 void ClearAfter(uint32_t idx) { 443 PERFETTO_DCHECK(idx < kBits); 444 *word_ = WordUntil(idx); 445 } 446 447 // Sets all bits between the bit at |start| and |end| (inclusive). Set(uint32_t start,uint32_t end)448 void Set(uint32_t start, uint32_t end) { 449 uint32_t diff = end - start; 450 *word_ |= (MaskAllBitsSetUntil(diff) << static_cast<uint64_t>(start)); 451 } 452 453 // Return a mask of all the bits up to and including bit at |idx|. MaskAllBitsSetUntil(uint32_t idx)454 static uint64_t MaskAllBitsSetUntil(uint32_t idx) { 455 // Start with 1 and shift it up (idx + 1) bits we get: 456 // top : 00000000010000000 457 uint64_t top = 1ull << ((idx + 1ull) % kBits); 458 459 // We need to handle the case where idx == 63. In this case |top| will be 460 // zero because 1 << ((idx + 1) % 64) == 1 << (64 % 64) == 1. 461 // In this case, we actually want top == 0. We can do this by shifting 462 // down by (idx + 1) / kBits - this will be a noop for every index other 463 // than idx == 63. This should also be free on x86 because of the mod 464 // instruction above. 465 top = top >> ((idx + 1) / kBits); 466 467 // Then if we take away 1, we get precisely the mask we want. 468 return top - 1u; 469 } 470 471 private: 472 // Returns the bits up to and including the bit at |idx|. WordUntil(uint32_t idx)473 uint64_t WordUntil(uint32_t idx) const { 474 PERFETTO_DCHECK(idx < kBits); 475 476 // To understand what is happeninng here, consider an example. 477 // Suppose we want to all the bits up to the 7th bit in the atom 478 // 7th 479 // | 480 // v 481 // atom: 01010101011111000 482 // 483 // The easiest way to do this would be if we had a mask with only 484 // the bottom 7 bits set: 485 // mask: 00000000001111111 486 uint64_t mask = MaskAllBitsSetUntil(idx); 487 488 // Finish up by and'ing the atom with the computed mask. 489 return *word_ & mask; 490 } 491 492 uint64_t* word_; 493 }; 494 495 class ConstBitWord { 496 public: 497 static constexpr uint32_t kBits = 64; 498 ConstBitWord(const uint64_t * word)499 explicit ConstBitWord(const uint64_t* word) : word_(word) {} 500 501 // Returns whether the bit at the given index is set. IsSet(uint32_t idx)502 bool IsSet(uint32_t idx) const { 503 PERFETTO_DCHECK(idx < kBits); 504 return (*word_ >> idx) & 1ull; 505 } 506 507 // Bitwise not. Not()508 uint64_t Not() const { return ~(*word_); } 509 510 // Returns the index of the nth set bit. 511 // Undefined if |n| >= |CountSetBits()|. IndexOfNthSet(uint32_t n)512 uint16_t IndexOfNthSet(uint32_t n) const { 513 PERFETTO_DCHECK(n < kBits); 514 515 // The below code is very dense but essentially computes the nth set 516 // bit inside |atom| in the "broadword" style of programming (sometimes 517 // referred to as "SIMD within a register"). 518 // 519 // Instead of treating a uint64 as an individual unit, broadword 520 // algorithms treat them as a packed vector of uint8. By doing this, they 521 // allow branchless algorithms when considering bits of a uint64. 522 // 523 // In benchmarks, this algorithm has found to be the fastest, portable 524 // way of computing the nth set bit (if we were only targetting new 525 // versions of x64, we could also use pdep + ctz but unfortunately 526 // this would fail on WASM - this about 2.5-3x faster on x64). 527 // 528 // The code below was taken from the paper 529 // http://vigna.di.unimi.it/ftp/papers/Broadword.pdf 530 uint64_t s = *word_ - ((*word_ & 0xAAAAAAAAAAAAAAAA) >> 1); 531 s = (s & 0x3333333333333333) + ((s >> 2) & 0x3333333333333333); 532 s = ((s + (s >> 4)) & 0x0F0F0F0F0F0F0F0F) * L8; 533 534 uint64_t b = (BwLessThan(s, n * L8) >> 7) * L8 >> 53 & ~7ull; 535 uint64_t l = n - ((s << 8) >> b & 0xFF); 536 s = (BwGtZero(((*word_ >> b & 0xFF) * L8) & 0x8040201008040201) >> 7) * 537 L8; 538 539 uint64_t ret = b + ((BwLessThan(s, l * L8) >> 7) * L8 >> 56); 540 541 return static_cast<uint16_t>(ret); 542 } 543 544 // Returns the number of set bits. CountSetBits()545 uint32_t CountSetBits() const { 546 return static_cast<uint32_t>(PERFETTO_POPCOUNT(*word_)); 547 } 548 549 // Returns the number of set bits up to and including the bit at |idx|. CountSetBits(uint32_t idx)550 uint32_t CountSetBits(uint32_t idx) const { 551 PERFETTO_DCHECK(idx < kBits); 552 return static_cast<uint32_t>(PERFETTO_POPCOUNT(WordUntil(idx))); 553 } 554 555 private: 556 // Constant with all the low bit of every byte set. 557 static constexpr uint64_t L8 = 0x0101010101010101; 558 559 // Constant with all the high bit of every byte set. 560 static constexpr uint64_t H8 = 0x8080808080808080; 561 562 // Returns a packed uint64 encoding whether each byte of x is less 563 // than each corresponding byte of y. 564 // This is computed in the "broadword" style of programming; see 565 // IndexOfNthSet for details on this. BwLessThan(uint64_t x,uint64_t y)566 static uint64_t BwLessThan(uint64_t x, uint64_t y) { 567 return (((y | H8) - (x & ~H8)) ^ x ^ y) & H8; 568 } 569 570 // Returns a packed uint64 encoding whether each byte of x is greater 571 // than or equal zero. 572 // This is computed in the "broadword" style of programming; see 573 // IndexOfNthSet for details on this. BwGtZero(uint64_t x)574 static uint64_t BwGtZero(uint64_t x) { return (((x | H8) - L8) | x) & H8; } 575 576 // Returns the bits up to and including the bit at |idx|. WordUntil(uint32_t idx)577 uint64_t WordUntil(uint32_t idx) const { 578 PERFETTO_DCHECK(idx < kBits); 579 580 // To understand what is happeninng here, consider an example. 581 // Suppose we want to all the bits up to the 7th bit in the atom 582 // 7th 583 // | 584 // v 585 // atom: 01010101011111000 586 // 587 // The easiest way to do this would be if we had a mask with only 588 // the bottom 7 bits set: 589 // mask: 00000000001111111 590 uint64_t mask = BitWord::MaskAllBitsSetUntil(idx); 591 592 // Finish up by and'ing the atom with the computed mask. 593 return *word_ & mask; 594 } 595 596 const uint64_t* word_; 597 }; 598 599 // Represents a group of bits with a bitcount such that it is 600 // efficient to work on these bits. 601 // 602 // On x86 architectures we generally target for trace processor, the 603 // size of a cache line is 64 bytes (or 512 bits). For this reason, 604 // we make the size of the block contain 8 atoms as 8 * 64 == 512. 605 // 606 // TODO(lalitm): investigate whether we should tune this value for 607 // WASM and ARM. 608 class Block { 609 public: 610 // See class documentation for how these constants are chosen. 611 static constexpr uint16_t kWords = 8; 612 static constexpr uint32_t kBits = kWords * BitWord::kBits; 613 Block(uint64_t * start_word)614 explicit Block(uint64_t* start_word) : start_word_(start_word) {} 615 616 // Sets the bit at the given address to true. Set(const BlockOffset & addr)617 void Set(const BlockOffset& addr) { 618 PERFETTO_DCHECK(addr.word_idx < kWords); 619 BitWord(&start_word_[addr.word_idx]).Set(addr.bit_idx); 620 } 621 622 // Sets the bit at the given address to false. Clear(const BlockOffset & addr)623 void Clear(const BlockOffset& addr) { 624 PERFETTO_DCHECK(addr.word_idx < kWords); 625 626 BitWord(&start_word_[addr.word_idx]).Clear(addr.bit_idx); 627 } 628 629 // Retains all bits up to and including the bit at |addr| and clears 630 // all bits after this point. ClearAfter(const BlockOffset & offset)631 void ClearAfter(const BlockOffset& offset) { 632 PERFETTO_DCHECK(offset.word_idx < kWords); 633 634 // In the first atom, keep the bits until the address specified. 635 BitWord(&start_word_[offset.word_idx]).ClearAfter(offset.bit_idx); 636 637 // For all subsequent atoms, we just clear the whole atom. 638 for (uint32_t i = offset.word_idx + 1; i < kWords; ++i) { 639 BitWord(&start_word_[i]).ClearAll(); 640 } 641 } 642 643 // Set all the bits between the offsets given by |start| and |end| 644 // (inclusive). Set(const BlockOffset & start,const BlockOffset & end)645 void Set(const BlockOffset& start, const BlockOffset& end) { 646 if (start.word_idx == end.word_idx) { 647 // If there is only one word we will change, just set the range within 648 // the word. 649 BitWord(&start_word_[start.word_idx]).Set(start.bit_idx, end.bit_idx); 650 return; 651 } 652 653 // Otherwise, we have more than one word to set. To do this, we will 654 // do this in three steps. 655 656 // First, we set the first word from the start to the end of the word. 657 BitWord(&start_word_[start.word_idx]) 658 .Set(start.bit_idx, BitWord::kBits - 1); 659 660 // Next, we set all words (except the last). 661 for (uint32_t i = start.word_idx + 1; i < end.word_idx; ++i) { 662 BitWord(&start_word_[i]).Set(0, BitWord::kBits - 1); 663 } 664 665 // Finally, we set the word block from the start to the end offset. 666 BitWord(&start_word_[end.word_idx]).Set(0, end.bit_idx); 667 } 668 669 template <typename Filler> FromFiller(uint32_t offset,Filler f)670 void FromFiller(uint32_t offset, Filler f) { 671 // We choose to iterate the bits as the outer loop as this allows us 672 // to reuse the mask and the bit offset between iterations of the loop. 673 // This makes a small (but noticable) impact in the performance of this 674 // function. 675 for (uint32_t i = 0; i < BitWord::kBits; ++i) { 676 uint64_t mask = 1ull << i; 677 uint32_t offset_with_bit = offset + i; 678 for (uint32_t j = 0; j < Block::kWords; ++j) { 679 bool res = f(offset_with_bit + j * BitWord::kBits); 680 BitWord(&start_word_[j]).Or(res ? mask : 0); 681 } 682 } 683 } 684 ReplaceWith(Block block)685 void ReplaceWith(Block block) { 686 for (uint32_t i = 0; i < BitWord::kBits; ++i) { 687 start_word_[i] = block.start_word()[i]; 688 } 689 } 690 start_word()691 uint64_t* start_word() { return start_word_; } 692 693 private: 694 uint64_t* start_word_; 695 }; 696 697 class ConstBlock { 698 public: 699 // See class documentation for how these constants are chosen. 700 static constexpr uint16_t kWords = Block::kWords; 701 static constexpr uint32_t kBits = kWords * BitWord::kBits; 702 ConstBlock(const uint64_t * start_word)703 explicit ConstBlock(const uint64_t* start_word) : start_word_(start_word) {} ConstBlock(Block block)704 explicit ConstBlock(Block block) : start_word_(block.start_word()) {} 705 706 // Returns whether the bit at the given address is set. IsSet(const BlockOffset & addr)707 bool IsSet(const BlockOffset& addr) const { 708 PERFETTO_DCHECK(addr.word_idx < kWords); 709 return ConstBitWord(start_word_ + addr.word_idx).IsSet(addr.bit_idx); 710 } 711 712 // Gets the offset of the nth set bit in this block. IndexOfNthSet(uint32_t n)713 BlockOffset IndexOfNthSet(uint32_t n) const { 714 uint32_t count = 0; 715 for (uint16_t i = 0; i < kWords; ++i) { 716 // Keep a running count of all the set bits in the atom. 717 uint32_t value = count + ConstBitWord(start_word_ + i).CountSetBits(); 718 if (value <= n) { 719 count = value; 720 continue; 721 } 722 723 // The running count of set bits is more than |n|. That means this 724 // atom contains the bit we are looking for. 725 726 // Take away the number of set bits to the start of this atom from 727 // |n|. 728 uint32_t set_in_atom = n - count; 729 730 // Figure out the index of the set bit inside the atom and create the 731 // address of this bit from that. 732 uint16_t bit_idx = 733 ConstBitWord(start_word_ + i).IndexOfNthSet(set_in_atom); 734 PERFETTO_DCHECK(bit_idx < 64); 735 return BlockOffset{i, bit_idx}; 736 } 737 PERFETTO_FATAL("Index out of bounds"); 738 } 739 740 // Gets the number of set bits within a block up to and including the bit 741 // at the given address. CountSetBits(const BlockOffset & addr)742 uint32_t CountSetBits(const BlockOffset& addr) const { 743 PERFETTO_DCHECK(addr.word_idx < kWords); 744 745 // Count all the set bits in the atom until we reach the last atom 746 // index. 747 uint32_t count = 0; 748 for (uint32_t i = 0; i < addr.word_idx; ++i) { 749 count += ConstBitWord(&start_word_[i]).CountSetBits(); 750 } 751 752 // For the last atom, only count the bits upto and including the bit 753 // index. 754 return count + ConstBitWord(&start_word_[addr.word_idx]) 755 .CountSetBits(addr.bit_idx); 756 } 757 758 // Gets the number of set bits within a block up. CountSetBits()759 uint32_t CountSetBits() const { 760 uint32_t count = 0; 761 for (uint32_t i = 0; i < kWords; ++i) { 762 count += ConstBitWord(&start_word_[i]).CountSetBits(); 763 } 764 return count; 765 } 766 767 private: 768 const uint64_t* start_word_; 769 }; 770 771 BitVector(std::vector<uint64_t> words, 772 std::vector<uint32_t> counts, 773 uint32_t size); 774 775 BitVector(const BitVector&) = delete; 776 BitVector& operator=(const BitVector&) = delete; 777 778 // Returns the number of 8 elements blocks in the bitvector. BlockCount()779 uint32_t BlockCount() { 780 return static_cast<uint32_t>(words_.size()) / Block::kWords; 781 } 782 BlockFromIndex(uint32_t idx)783 Block BlockFromIndex(uint32_t idx) { 784 PERFETTO_DCHECK(Block::kWords * (idx + 1) <= words_.size()); 785 786 uint64_t* start_word = &words_[Block::kWords * idx]; 787 return Block(start_word); 788 } 789 ConstBlockFromIndex(uint32_t idx)790 ConstBlock ConstBlockFromIndex(uint32_t idx) const { 791 PERFETTO_DCHECK(Block::kWords * (idx + 1) <= words_.size()); 792 793 return ConstBlock(&words_[Block::kWords * idx]); 794 } 795 796 // Set all the bits between the addresses given by |start| and |end| 797 // (inclusive). 798 // Note: this method does not update the counts vector - that is the 799 // responsibility of the caller. Set(const Address & start,const Address & end)800 void Set(const Address& start, const Address& end) { 801 static constexpr BlockOffset kFirstBlockOffset = BlockOffset{0, 0}; 802 static constexpr BlockOffset kLastBlockOffset = 803 BlockOffset{Block::kWords - 1, BitWord::kBits - 1}; 804 805 if (start.block_idx == end.block_idx) { 806 // If there is only one block we will change, just set the range within 807 // the block. 808 BlockFromIndex(start.block_idx).Set(start.block_offset, end.block_offset); 809 return; 810 } 811 812 // Otherwise, we have more than one block to set. To do this, we will 813 // do this in three steps. 814 815 // First, we set the first block from the start to the end of the block. 816 BlockFromIndex(start.block_idx).Set(start.block_offset, kLastBlockOffset); 817 818 // Next, we set all blocks (except the last). 819 for (uint32_t cur_block_idx = start.block_idx + 1; 820 cur_block_idx < end.block_idx; ++cur_block_idx) { 821 BlockFromIndex(cur_block_idx).Set(kFirstBlockOffset, kLastBlockOffset); 822 } 823 824 // Finally, we set the last block from the start to the end offset. 825 BlockFromIndex(end.block_idx).Set(kFirstBlockOffset, end.block_offset); 826 } 827 828 // Helper function to append a bit. Generally, prefer to call AppendTrue 829 // or AppendFalse instead of this function if you know the type - they will 830 // be faster. Append(bool value)831 void Append(bool value) { 832 if (value) { 833 AppendTrue(); 834 } else { 835 AppendFalse(); 836 } 837 } 838 839 // Returns the index of the word which would store |idx|. WordFloor(uint32_t idx)840 static constexpr uint32_t WordFloor(uint32_t idx) { 841 return idx / BitWord::kBits; 842 } 843 844 // Returns number of words (int64_t) required to store |bit_count| bits. WordCount(uint32_t bit_count)845 static uint32_t WordCount(uint32_t bit_count) { 846 // See |BlockCount| for an explanation of this trick. 847 return (bit_count + BitWord::kBits - 1) / BitWord::kBits; 848 } 849 IndexToAddress(uint32_t idx)850 static Address IndexToAddress(uint32_t idx) { 851 Address a; 852 a.block_idx = idx / Block::kBits; 853 854 uint16_t bit_idx_inside_block = idx % Block::kBits; 855 a.block_offset.word_idx = bit_idx_inside_block / BitWord::kBits; 856 a.block_offset.bit_idx = bit_idx_inside_block % BitWord::kBits; 857 return a; 858 } 859 AddressToIndex(Address addr)860 static uint32_t AddressToIndex(Address addr) { 861 return addr.block_idx * Block::kBits + 862 addr.block_offset.word_idx * BitWord::kBits + 863 addr.block_offset.bit_idx; 864 } 865 866 // Returns number of blocks (arrays of 8 int64_t) required to store 867 // |bit_count| bits. 868 // 869 // This is useful to be able to find indices where "fast" algorithms can 870 // start which work on entire blocks. BlockCount(uint32_t bit_count)871 static constexpr uint32_t BlockCount(uint32_t bit_count) { 872 // Adding |Block::kBits - 1| gives us a quick way to get the count. We 873 // do this instead of adding 1 at the end because that gives incorrect 874 // answers for bit_count % Block::kBits == 0. 875 return (bit_count + Block::kBits - 1) / Block::kBits; 876 } 877 878 // Returns the index of the block which would store |idx|. BlockFloor(uint32_t idx)879 static constexpr uint32_t BlockFloor(uint32_t idx) { 880 return idx / Block::kBits; 881 } 882 883 // Converts a block index to a index in the BitVector. BlockToIndex(uint32_t block_idx)884 static constexpr uint32_t BlockToIndex(uint32_t block_idx) { 885 return block_idx * Block::kBits; 886 } 887 888 uint32_t size_ = 0; 889 // See class documentation for how these constants are chosen. 890 static constexpr uint16_t kWordsInBlock = Block::kWords; 891 static constexpr uint32_t kBitsInBlock = kWordsInBlock * BitWord::kBits; 892 std::vector<uint32_t> counts_; 893 std::vector<uint64_t> words_; 894 }; 895 896 } // namespace trace_processor 897 } // namespace perfetto 898 899 #endif // SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_ 900