1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_ 18 #define SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_ 19 20 #include <stddef.h> 21 #include <stdint.h> 22 #include <stdio.h> 23 24 #include <algorithm> 25 #include <array> 26 #include <vector> 27 28 #include "perfetto/base/logging.h" 29 30 namespace perfetto { 31 namespace trace_processor { 32 33 namespace internal { 34 35 class BaseIterator; 36 class AllBitsIterator; 37 class SetBitsIterator; 38 39 } // namespace internal 40 41 // A bitvector which compactly stores a vector of bools using a single bit 42 // for each bool. 43 class BitVector { 44 public: 45 using AllBitsIterator = internal::AllBitsIterator; 46 using SetBitsIterator = internal::SetBitsIterator; 47 48 // Creates an empty bitvector. 49 BitVector(); 50 51 explicit BitVector(std::initializer_list<bool> init); 52 53 // Creates a bitvector of |count| size filled with |value|. 54 BitVector(uint32_t count, bool value = false); 55 56 // Enable moving bitvectors as they have no unmovable state. 57 BitVector(BitVector&&) noexcept = default; 58 BitVector& operator=(BitVector&&) = default; 59 60 // Create a copy of the bitvector. 61 BitVector Copy() const; 62 63 // Returns the size of the bitvector. size()64 uint32_t size() const { return static_cast<uint32_t>(size_); } 65 66 // Returns whether the bit at |idx| is set. IsSet(uint32_t idx)67 bool IsSet(uint32_t idx) const { 68 PERFETTO_DCHECK(idx < size()); 69 70 Address a = IndexToAddress(idx); 71 return blocks_[a.block_idx].IsSet(a.block_offset); 72 } 73 74 // Returns the number of set bits in the bitvector. GetNumBitsSet()75 uint32_t GetNumBitsSet() const { return GetNumBitsSet(size()); } 76 77 // Returns the number of set bits between the start of the bitvector 78 // (inclusive) and the index |end| (exclusive). GetNumBitsSet(uint32_t end)79 uint32_t GetNumBitsSet(uint32_t end) const { 80 if (end == 0) 81 return 0; 82 83 // Although the external interface we present uses an exclusive |end|, 84 // internally it's a lot nicer to work with an inclusive |end| (mainly 85 // because we get block rollovers on exclusive ends which means we need 86 // to have if checks to ensure we don't overflow the number of blocks). 87 Address addr = IndexToAddress(end - 1); 88 uint32_t idx = addr.block_idx; 89 90 // Add the number of set bits until the start of the block to the number 91 // of set bits until the end address inside the block. 92 return counts_[idx] + blocks_[idx].GetNumBitsSet(addr.block_offset); 93 } 94 95 // Returns the index of the |n|th set bit. Should only be called with |n| < 96 // GetNumBitsSet(). IndexOfNthSet(uint32_t n)97 uint32_t IndexOfNthSet(uint32_t n) const { 98 PERFETTO_DCHECK(n < GetNumBitsSet()); 99 100 // First search for the block which, up until the start of it, has more than 101 // n bits set. Note that this should never return |counts.begin()| as 102 // that should always be 0. 103 // TODO(lalitm): investigate whether we can make this faster with small 104 // binary search followed by a linear search instead of binary searching the 105 // full way. 106 auto it = std::upper_bound(counts_.begin(), counts_.end(), n); 107 PERFETTO_DCHECK(it != counts_.begin()); 108 109 // Go back one block to find the block which has the bit we are looking for. 110 uint32_t block_idx = 111 static_cast<uint32_t>(std::distance(counts_.begin(), it) - 1); 112 113 // Figure out how many set bits forward we are looking inside the block 114 // by taking away the number of bits at the start of the block from n. 115 uint32_t set_in_block = n - counts_[block_idx]; 116 117 // Compute the address of the bit in the block then convert the full 118 // address back to an index. 119 BlockOffset block_offset = blocks_[block_idx].IndexOfNthSet(set_in_block); 120 return AddressToIndex(Address{block_idx, block_offset}); 121 } 122 123 // Sets the bit at index |idx| to true. Set(uint32_t idx)124 void Set(uint32_t idx) { 125 // Set the bit to the correct value inside the block but store the old 126 // bit to help fix the counts. 127 auto addr = IndexToAddress(idx); 128 bool old_value = blocks_[addr.block_idx].IsSet(addr.block_offset); 129 130 // If the old value was unset, set the bit and add one to the count. 131 if (PERFETTO_LIKELY(!old_value)) { 132 blocks_[addr.block_idx].Set(addr.block_offset); 133 134 uint32_t size = static_cast<uint32_t>(counts_.size()); 135 for (uint32_t i = addr.block_idx + 1; i < size; ++i) { 136 counts_[i]++; 137 } 138 } 139 } 140 141 // Sets the bit at index |idx| to false. Clear(uint32_t idx)142 void Clear(uint32_t idx) { 143 // Set the bit to the correct value inside the block but store the old 144 // bit to help fix the counts. 145 auto addr = IndexToAddress(idx); 146 bool old_value = blocks_[addr.block_idx].IsSet(addr.block_offset); 147 148 // If the old value was set, clear the bit and subtract one from all the 149 // counts. 150 if (PERFETTO_LIKELY(old_value)) { 151 blocks_[addr.block_idx].Clear(addr.block_offset); 152 153 uint32_t size = static_cast<uint32_t>(counts_.size()); 154 for (uint32_t i = addr.block_idx + 1; i < size; ++i) { 155 counts_[i]--; 156 } 157 } 158 } 159 160 // Appends true to the bitvector. AppendTrue()161 void AppendTrue() { 162 Address addr = IndexToAddress(size_); 163 uint32_t old_blocks_size = static_cast<uint32_t>(blocks_.size()); 164 uint32_t new_blocks_size = addr.block_idx + 1; 165 166 if (PERFETTO_UNLIKELY(new_blocks_size > old_blocks_size)) { 167 uint32_t t = GetNumBitsSet(); 168 blocks_.emplace_back(); 169 counts_.emplace_back(t); 170 } 171 172 size_++; 173 blocks_[addr.block_idx].Set(addr.block_offset); 174 } 175 176 // Appends false to the bitvector. AppendFalse()177 void AppendFalse() { 178 Address addr = IndexToAddress(size_); 179 uint32_t old_blocks_size = static_cast<uint32_t>(blocks_.size()); 180 uint32_t new_blocks_size = addr.block_idx + 1; 181 182 if (PERFETTO_UNLIKELY(new_blocks_size > old_blocks_size)) { 183 uint32_t t = GetNumBitsSet(); 184 blocks_.emplace_back(); 185 counts_.emplace_back(t); 186 } 187 188 size_++; 189 // We don't need to clear the bit as we ensure that anything after 190 // size_ is always set to false. 191 } 192 193 // Resizes the BitVector to the given |size|. 194 // Truncates the BitVector if |size| < |size()| or fills the new space with 195 // |value| if |size| > |size()|. Calling this method is a noop if |size| == 196 // |size()|. 197 void Resize(uint32_t size, bool value = false) { 198 uint32_t old_size = size_; 199 if (size == old_size) 200 return; 201 202 // Empty bitvectors should be memory efficient so we don't keep any data 203 // around in the bitvector. 204 if (size == 0) { 205 blocks_.clear(); 206 counts_.clear(); 207 size_ = 0; 208 return; 209 } 210 211 // Compute the address of the new last bit in the bitvector. 212 Address last_addr = IndexToAddress(size - 1); 213 uint32_t old_blocks_size = static_cast<uint32_t>(counts_.size()); 214 uint32_t new_blocks_size = last_addr.block_idx + 1; 215 216 // Then, resize the block and count vectors to have the correct 217 // number of entries. 218 blocks_.resize(new_blocks_size); 219 counts_.resize(new_blocks_size); 220 221 if (size > old_size) { 222 if (value) { 223 // If the new space should be filled with true, then set all the bits 224 // between the address of the old size and the new last address. 225 const Address& start = IndexToAddress(old_size); 226 Set(start, last_addr); 227 228 // We then need to update the counts vector to match the changes we 229 // made to the blocks. 230 231 // We start by adding the bits we set in the first block to the 232 // cummulative count before the range we changed. 233 Address end_of_block = {start.block_idx, 234 {Block::kWords - 1, BitWord::kBits - 1}}; 235 uint32_t count_in_block_after_end = 236 AddressToIndex(end_of_block) - AddressToIndex(start) + 1; 237 uint32_t set_count = GetNumBitsSet() + count_in_block_after_end; 238 239 for (uint32_t i = start.block_idx + 1; i <= last_addr.block_idx; ++i) { 240 // Set the count to the cummulative count so far. 241 counts_[i] = set_count; 242 243 // Add a full block of set bits to the count. 244 set_count += Block::kBits; 245 } 246 } else { 247 // If the newly added bits are false, we just need to update the 248 // counts vector with the current size of the bitvector for all 249 // the newly added blocks. 250 if (new_blocks_size > old_blocks_size) { 251 uint32_t count = GetNumBitsSet(); 252 for (uint32_t i = old_blocks_size; i < new_blocks_size; ++i) { 253 counts_[i] = count; 254 } 255 } 256 } 257 } else { 258 // Throw away all the bits after the new last bit. We do this to make 259 // future lookup, append and resize operations not have to worrying about 260 // trailing garbage bits in the last block. 261 blocks_[last_addr.block_idx].ClearAfter(last_addr.block_offset); 262 } 263 264 // Actually update the size. 265 size_ = size; 266 } 267 268 // Creates a BitVector of size |end| with the bits between |start| and |end| 269 // filled by calling the filler function |f(index of bit)|. 270 // 271 // As an example, suppose Range(3, 7, [](x) { return x < 5 }). This would 272 // result in the following bitvector: 273 // [0 0 0 1 1 0 0 0] 274 template <typename Filler = bool(uint32_t)> Range(uint32_t start,uint32_t end,Filler f)275 static BitVector Range(uint32_t start, uint32_t end, Filler f) { 276 // Compute the block index and bitvector index where we start and end 277 // working one block at a time. 278 uint32_t start_fast_block = BlockCeil(start); 279 uint32_t start_fast_idx = BlockToIndex(start_fast_block); 280 uint32_t end_fast_block = BlockFloor(end); 281 uint32_t end_fast_idx = BlockToIndex(end_fast_block); 282 283 // First, create the BitVector up to |start| then fill up to 284 // |start_fast_index| with values from the filler. 285 BitVector bv(start, false); 286 for (uint32_t i = start; i < start_fast_idx; ++i) { 287 bv.Append(f(i)); 288 } 289 290 // At this point we can work one block at a time. 291 for (uint32_t i = start_fast_block; i < end_fast_block; ++i) { 292 bv.counts_.emplace_back(bv.GetNumBitsSet()); 293 bv.blocks_.emplace_back(Block::FromFiller(bv.size_, f)); 294 bv.size_ += Block::kBits; 295 } 296 297 // Add the last few elements to finish up to |end|. 298 for (uint32_t i = end_fast_idx; i < end; ++i) { 299 bv.Append(f(i)); 300 } 301 return bv; 302 } 303 304 // Updates the ith set bit of this bitvector with the value of 305 // |other.IsSet(i)|. 306 // 307 // This is the best way to batch update all the bits which are set; for 308 // example when filtering rows, we want to filter all rows which are currently 309 // included but ignore rows which have already been excluded. 310 // 311 // For example suppose the following: 312 // this: 1 1 0 0 1 0 1 313 // other: 0 1 1 0 314 // This will change this to the following: 315 // this: 0 1 0 0 1 0 0 316 // TODO(lalitm): investigate whether we should just change this to And. 317 void UpdateSetBits(const BitVector& other); 318 319 // Iterate all the bits in the BitVector. 320 // 321 // Usage: 322 // for (auto it = bv.IterateAllBits(); it; it.Next()) { 323 // ... 324 // } 325 AllBitsIterator IterateAllBits() const; 326 327 // Iterate all the set bits in the BitVector. 328 // 329 // Usage: 330 // for (auto it = bv.IterateSetBits(); it; it.Next()) { 331 // ... 332 // } 333 SetBitsIterator IterateSetBits() const; 334 335 // Returns the approximate cost (in bytes) of storing a bitvector with size 336 // |n|. This can be used to make decisions about whether using a BitVector is 337 // worthwhile. 338 // This cost should not be treated as exact - it just gives an indication of 339 // the memory needed. ApproxBytesCost(uint32_t n)340 static constexpr uint32_t ApproxBytesCost(uint32_t n) { 341 // The two main things making up a bitvector is the cost of the blocks of 342 // bits and the cost of the counts vector. 343 return BlockCeil(n) * Block::kBits + BlockCeil(n) * sizeof(uint32_t); 344 } 345 346 private: 347 friend class internal::BaseIterator; 348 friend class internal::AllBitsIterator; 349 friend class internal::SetBitsIterator; 350 351 // Represents the offset of a bit within a block. 352 struct BlockOffset { 353 uint16_t word_idx; 354 uint16_t bit_idx; 355 }; 356 357 // Represents the address of a bit within the bitvector. 358 struct Address { 359 uint32_t block_idx; 360 BlockOffset block_offset; 361 }; 362 363 // Represents the smallest collection of bits we can refer to as 364 // one unit. 365 // 366 // Currently, this is implemented as a 64 bit integer as this is the 367 // largest type which we can assume to be present on all platforms. 368 class BitWord { 369 public: 370 static constexpr uint32_t kBits = 64; 371 372 // Returns whether the bit at the given index is set. IsSet(uint32_t idx)373 bool IsSet(uint32_t idx) const { 374 PERFETTO_DCHECK(idx < kBits); 375 return (word_ >> idx) & 1ull; 376 } 377 378 // Bitwise ors the given |mask| to the current value. Or(uint64_t mask)379 void Or(uint64_t mask) { word_ |= mask; } 380 381 // Sets the bit at the given index to true. Set(uint32_t idx)382 void Set(uint32_t idx) { 383 PERFETTO_DCHECK(idx < kBits); 384 385 // Or the value for the true shifted up to |idx| with the word. 386 Or(1ull << idx); 387 } 388 389 // Sets the bit at the given index to false. Clear(uint32_t idx)390 void Clear(uint32_t idx) { 391 PERFETTO_DCHECK(idx < kBits); 392 393 // And the integer of all bits set apart from |idx| with the word. 394 word_ &= ~(1ull << idx); 395 } 396 397 // Clears all the bits (i.e. sets the atom to zero). ClearAll()398 void ClearAll() { word_ = 0; } 399 400 // Returns the index of the nth set bit. 401 // Undefined if |n| >= |GetNumBitsSet()|. IndexOfNthSet(uint32_t n)402 uint16_t IndexOfNthSet(uint32_t n) const { 403 PERFETTO_DCHECK(n < kBits); 404 405 // The below code is very dense but essentially computes the nth set 406 // bit inside |atom| in the "broadword" style of programming (sometimes 407 // referred to as "SIMD within a register"). 408 // 409 // Instead of treating a uint64 as an individual unit, broadword 410 // algorithms treat them as a packed vector of uint8. By doing this, they 411 // allow branchless algorithms when considering bits of a uint64. 412 // 413 // In benchmarks, this algorithm has found to be the fastest, portable 414 // way of computing the nth set bit (if we were only targetting new 415 // versions of x64, we could also use pdep + ctz but unfortunately 416 // this would fail on WASM - this about 2.5-3x faster on x64). 417 // 418 // The code below was taken from the paper 419 // http://vigna.di.unimi.it/ftp/papers/Broadword.pdf 420 uint64_t s = word_ - ((word_ & 0xAAAAAAAAAAAAAAAA) >> 1); 421 s = (s & 0x3333333333333333) + ((s >> 2) & 0x3333333333333333); 422 s = ((s + (s >> 4)) & 0x0F0F0F0F0F0F0F0F) * L8; 423 424 uint64_t b = (BwLessThan(s, n * L8) >> 7) * L8 >> 53 & ~7ull; 425 uint64_t l = n - ((s << 8) >> b & 0xFF); 426 s = (BwGtZero(((word_ >> b & 0xFF) * L8) & 0x8040201008040201) >> 7) * L8; 427 428 uint64_t ret = b + ((BwLessThan(s, l * L8) >> 7) * L8 >> 56); 429 430 return static_cast<uint16_t>(ret); 431 } 432 433 // Returns the number of set bits. GetNumBitsSet()434 uint32_t GetNumBitsSet() const { 435 return static_cast<uint32_t>(PERFETTO_POPCOUNT(word_)); 436 } 437 438 // Returns the number of set bits up to and including the bit at |idx|. GetNumBitsSet(uint32_t idx)439 uint32_t GetNumBitsSet(uint32_t idx) const { 440 PERFETTO_DCHECK(idx < kBits); 441 return static_cast<uint32_t>(PERFETTO_POPCOUNT(WordUntil(idx))); 442 } 443 444 // Retains all bits up to and including the bit at |idx| and clears 445 // all bits after this point. ClearAfter(uint32_t idx)446 void ClearAfter(uint32_t idx) { 447 PERFETTO_DCHECK(idx < kBits); 448 word_ = WordUntil(idx); 449 } 450 451 // Sets all bits between the bit at |start| and |end| (inclusive). Set(uint32_t start,uint32_t end)452 void Set(uint32_t start, uint32_t end) { 453 uint32_t diff = end - start; 454 word_ |= (MaskAllBitsSetUntil(diff) << static_cast<uint64_t>(start)); 455 } 456 457 private: 458 // Constant with all the low bit of every byte set. 459 static constexpr uint64_t L8 = 0x0101010101010101; 460 461 // Constant with all the high bit of every byte set. 462 static constexpr uint64_t H8 = 0x8080808080808080; 463 464 // Returns a packed uint64 encoding whether each byte of x is less 465 // than each corresponding byte of y. 466 // This is computed in the "broadword" style of programming; see 467 // IndexOfNthSet for details on this. BwLessThan(uint64_t x,uint64_t y)468 static uint64_t BwLessThan(uint64_t x, uint64_t y) { 469 return (((y | H8) - (x & ~H8)) ^ x ^ y) & H8; 470 } 471 472 // Returns a packed uint64 encoding whether each byte of x is greater 473 // than or equal zero. 474 // This is computed in the "broadword" style of programming; see 475 // IndexOfNthSet for details on this. BwGtZero(uint64_t x)476 static uint64_t BwGtZero(uint64_t x) { return (((x | H8) - L8) | x) & H8; } 477 478 // Returns the bits up to and including the bit at |idx|. WordUntil(uint32_t idx)479 uint64_t WordUntil(uint32_t idx) const { 480 PERFETTO_DCHECK(idx < kBits); 481 482 // To understand what is happeninng here, consider an example. 483 // Suppose we want to all the bits up to the 7th bit in the atom 484 // 7th 485 // | 486 // v 487 // atom: 01010101011111000 488 // 489 // The easiest way to do this would be if we had a mask with only 490 // the bottom 7 bits set: 491 // mask: 00000000001111111 492 uint64_t mask = MaskAllBitsSetUntil(idx); 493 494 // Finish up by anding the the atom with the computed msk. 495 return word_ & mask; 496 } 497 498 // Return a mask of all the bits up to and including bit at |idx|. MaskAllBitsSetUntil(uint32_t idx)499 static uint64_t MaskAllBitsSetUntil(uint32_t idx) { 500 // Start with 1 and shift it up (idx + 1) bits we get: 501 // top : 00000000010000000 502 uint64_t top = 1ull << ((idx + 1ull) % kBits); 503 504 // We need to handle the case where idx == 63. In this case |top| will be 505 // zero because 1 << ((idx + 1) % 64) == 1 << (64 % 64) == 1. 506 // In this case, we actually want top == 0. We can do this by shifting 507 // down by (idx + 1) / kBits - this will be a noop for every index other 508 // than idx == 63. This should also be free on x86 because of the mod 509 // instruction above. 510 top = top >> ((idx + 1) / kBits); 511 512 // Then if we take away 1, we get precisely the mask we want. 513 return top - 1u; 514 } 515 516 uint64_t word_ = 0; 517 }; 518 519 // Represents a group of bits with a bitcount such that it is 520 // efficient to work on these bits. 521 // 522 // On x86 architectures we generally target for trace processor, the 523 // size of a cache line is 64 bytes (or 512 bits). For this reason, 524 // we make the size of the block contain 8 atoms as 8 * 64 == 512. 525 // 526 // TODO(lalitm): investigate whether we should tune this value for 527 // WASM and ARM. 528 class Block { 529 public: 530 // See class documentation for how these constants are chosen. 531 static constexpr uint16_t kWords = 8; 532 static constexpr uint32_t kBits = kWords * BitWord::kBits; 533 534 // Returns whether the bit at the given address is set. IsSet(const BlockOffset & addr)535 bool IsSet(const BlockOffset& addr) const { 536 PERFETTO_DCHECK(addr.word_idx < kWords); 537 538 return words_[addr.word_idx].IsSet(addr.bit_idx); 539 } 540 541 // Sets the bit at the given address to true. Set(const BlockOffset & addr)542 void Set(const BlockOffset& addr) { 543 PERFETTO_DCHECK(addr.word_idx < kWords); 544 545 words_[addr.word_idx].Set(addr.bit_idx); 546 } 547 548 // Sets the bit at the given address to false. Clear(const BlockOffset & addr)549 void Clear(const BlockOffset& addr) { 550 PERFETTO_DCHECK(addr.word_idx < kWords); 551 552 words_[addr.word_idx].Clear(addr.bit_idx); 553 } 554 555 // Gets the offset of the nth set bit in this block. IndexOfNthSet(uint32_t n)556 BlockOffset IndexOfNthSet(uint32_t n) const { 557 uint32_t count = 0; 558 for (uint16_t i = 0; i < kWords; ++i) { 559 // Keep a running count of all the set bits in the atom. 560 uint32_t value = count + words_[i].GetNumBitsSet(); 561 if (value <= n) { 562 count = value; 563 continue; 564 } 565 566 // The running count of set bits is more than |n|. That means this atom 567 // contains the bit we are looking for. 568 569 // Take away the number of set bits to the start of this atom from |n|. 570 uint32_t set_in_atom = n - count; 571 572 // Figure out the index of the set bit inside the atom and create the 573 // address of this bit from that. 574 uint16_t bit_idx = words_[i].IndexOfNthSet(set_in_atom); 575 PERFETTO_DCHECK(bit_idx < 64); 576 return BlockOffset{i, bit_idx}; 577 } 578 PERFETTO_FATAL("Index out of bounds"); 579 } 580 581 // Gets the number of set bits within a block up to and including the bit 582 // at the given address. GetNumBitsSet(const BlockOffset & addr)583 uint32_t GetNumBitsSet(const BlockOffset& addr) const { 584 PERFETTO_DCHECK(addr.word_idx < kWords); 585 586 // Count all the set bits in the atom until we reach the last atom 587 // index. 588 uint32_t count = 0; 589 for (uint32_t i = 0; i < addr.word_idx; ++i) { 590 count += words_[i].GetNumBitsSet(); 591 } 592 593 // For the last atom, only count the bits upto and including the bit 594 // index. 595 return count + words_[addr.word_idx].GetNumBitsSet(addr.bit_idx); 596 } 597 598 // Retains all bits up to and including the bit at |addr| and clears 599 // all bits after this point. ClearAfter(const BlockOffset & offset)600 void ClearAfter(const BlockOffset& offset) { 601 PERFETTO_DCHECK(offset.word_idx < kWords); 602 603 // In the first atom, keep the bits until the address specified. 604 words_[offset.word_idx].ClearAfter(offset.bit_idx); 605 606 // For all subsequent atoms, we just clear the whole atom. 607 for (uint32_t i = offset.word_idx + 1; i < kWords; ++i) { 608 words_[i].ClearAll(); 609 } 610 } 611 612 // Set all the bits between the offsets given by |start| and |end| 613 // (inclusive). Set(const BlockOffset & start,const BlockOffset & end)614 void Set(const BlockOffset& start, const BlockOffset& end) { 615 if (start.word_idx == end.word_idx) { 616 // If there is only one word we will change, just set the range within 617 // the word. 618 words_[start.word_idx].Set(start.bit_idx, end.bit_idx); 619 return; 620 } 621 622 // Otherwise, we have more than one word to set. To do this, we will 623 // do this in three steps. 624 625 // First, we set the first word from the start to the end of the word. 626 words_[start.word_idx].Set(start.bit_idx, BitWord::kBits - 1); 627 628 // Next, we set all words (except the last). 629 for (uint32_t i = start.word_idx + 1; i < end.word_idx; ++i) { 630 words_[i].Set(0, BitWord::kBits - 1); 631 } 632 633 // Finally, we set the word block from the start to the end offset. 634 words_[end.word_idx].Set(0, end.bit_idx); 635 } 636 637 template <typename Filler> FromFiller(uint32_t offset,Filler f)638 static Block FromFiller(uint32_t offset, Filler f) { 639 // We choose to iterate the bits as the outer loop as this allows us 640 // to reuse the mask and the bit offset between iterations of the loop. 641 // This makes a small (but noticable) impact in the performance of this 642 // function. 643 Block b; 644 for (uint32_t i = 0; i < BitWord::kBits; ++i) { 645 uint64_t mask = 1ull << i; 646 uint32_t offset_with_bit = offset + i; 647 for (uint32_t j = 0; j < Block::kWords; ++j) { 648 bool res = f(offset_with_bit + j * BitWord::kBits); 649 b.words_[j].Or(res ? mask : 0); 650 } 651 } 652 return b; 653 } 654 655 private: 656 std::array<BitWord, kWords> words_{}; 657 }; 658 659 BitVector(std::vector<Block> blocks, 660 std::vector<uint32_t> counts, 661 uint32_t size); 662 663 BitVector(const BitVector&) = delete; 664 BitVector& operator=(const BitVector&) = delete; 665 666 // Set all the bits between the addresses given by |start| and |end| 667 // (inclusive). 668 // Note: this method does not update the counts vector - that is the 669 // responibility of the caller. Set(const Address & start,const Address & end)670 void Set(const Address& start, const Address& end) { 671 static constexpr BlockOffset kFirstBlockOffset = BlockOffset{0, 0}; 672 static constexpr BlockOffset kLastBlockOffset = 673 BlockOffset{Block::kWords - 1, BitWord::kBits - 1}; 674 675 if (start.block_idx == end.block_idx) { 676 // If there is only one block we will change, just set the range within 677 // the block. 678 blocks_[start.block_idx].Set(start.block_offset, end.block_offset); 679 return; 680 } 681 682 // Otherwise, we have more than one block to set. To do this, we will 683 // do this in three steps. 684 685 // First, we set the first block from the start to the end of the block. 686 blocks_[start.block_idx].Set(start.block_offset, kLastBlockOffset); 687 688 // Next, we set all blocks (except the last). 689 for (uint32_t i = start.block_idx + 1; i < end.block_idx; ++i) { 690 blocks_[i].Set(kFirstBlockOffset, kLastBlockOffset); 691 } 692 693 // Finally, we set the last block from the start to the end offset. 694 blocks_[end.block_idx].Set(kFirstBlockOffset, end.block_offset); 695 } 696 697 // Helper function to append a bit. Generally, prefer to call AppendTrue 698 // or AppendFalse instead of this function if you know the type - they will 699 // be faster. Append(bool value)700 void Append(bool value) { 701 if (value) { 702 AppendTrue(); 703 } else { 704 AppendFalse(); 705 } 706 } 707 IndexToAddress(uint32_t idx)708 static Address IndexToAddress(uint32_t idx) { 709 Address a; 710 a.block_idx = idx / Block::kBits; 711 712 uint16_t bit_idx_inside_block = idx % Block::kBits; 713 a.block_offset.word_idx = bit_idx_inside_block / BitWord::kBits; 714 a.block_offset.bit_idx = bit_idx_inside_block % BitWord::kBits; 715 return a; 716 } 717 AddressToIndex(Address addr)718 static uint32_t AddressToIndex(Address addr) { 719 return addr.block_idx * Block::kBits + 720 addr.block_offset.word_idx * BitWord::kBits + 721 addr.block_offset.bit_idx; 722 } 723 724 // Rounds |idx| up to the nearest block boundary and returns the block 725 // index. If |idx| is already on a block boundary, the current block is 726 // returned. 727 // 728 // This is useful to be able to find indices where "fast" algorithms can start 729 // which work on entire blocks. BlockCeil(uint32_t idx)730 static constexpr uint32_t BlockCeil(uint32_t idx) { 731 // Adding |Block::kBits - 1| gives us a quick way to get the ceil. We 732 // do this instead of adding 1 at the end because that gives incorrect 733 // answers for index % Block::kBits == 0. 734 return (idx + Block::kBits - 1) / Block::kBits; 735 } 736 737 // Returns the index of the block which would store |idx|. BlockFloor(uint32_t idx)738 static constexpr uint32_t BlockFloor(uint32_t idx) { 739 return idx / Block::kBits; 740 } 741 742 // Converts a block index to a index in the BitVector. BlockToIndex(uint32_t block)743 static constexpr uint32_t BlockToIndex(uint32_t block) { 744 return block * Block::kBits; 745 } 746 747 uint32_t size_ = 0; 748 std::vector<uint32_t> counts_; 749 std::vector<Block> blocks_; 750 }; 751 752 } // namespace trace_processor 753 } // namespace perfetto 754 755 #endif // SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_ 756