• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_
18 #define SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_
19 
20 #include <stddef.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 
24 #include <algorithm>
25 #include <array>
26 #include <optional>
27 #include <vector>
28 
29 #include "perfetto/base/logging.h"
30 
31 namespace perfetto {
32 namespace trace_processor {
33 
34 namespace internal {
35 
36 class BaseIterator;
37 class AllBitsIterator;
38 class SetBitsIterator;
39 
40 }  // namespace internal
41 
42 // A bitvector which compactly stores a vector of bools using a single bit
43 // for each bool.
44 class BitVector {
45  public:
46   using AllBitsIterator = internal::AllBitsIterator;
47   using SetBitsIterator = internal::SetBitsIterator;
48 
49   static constexpr uint32_t kBitsInWord = 64;
50 
51   // Builder class which allows efficiently creating a BitVector by appending
52   // words. Using this class is generally far more efficient than trying to set
53   // bits directly in a BitVector or even appending one bit at a time.
54   class Builder {
55    public:
56     // Creates a Builder for building a BitVector of |size| bits.
Builder(uint32_t size)57     explicit Builder(uint32_t size) : words_(WordCount(size)), size_(size) {}
58 
59     // Skips forward |n| bits leaving them as zeroed.
Skip(uint32_t n)60     void Skip(uint32_t n) {
61       PERFETTO_DCHECK(global_bit_offset_ + n <= size_);
62       global_bit_offset_ += n;
63     }
64 
65     // Appends a single bit to the builder.
66     // Note: |AppendWord| is far more efficient than this method so should be
67     // preferred.
Append(bool value)68     void Append(bool value) {
69       PERFETTO_DCHECK(global_bit_offset_ < size_);
70 
71       words_[global_bit_offset_ / BitWord::kBits] |=
72           static_cast<uint64_t>(value) << global_bit_offset_ % BitWord::kBits;
73       global_bit_offset_++;
74     }
75 
76     // Appends a whole word to the Builder. Builder has to end on a word
77     // boundary before calling this function.
AppendWord(uint64_t word)78     void AppendWord(uint64_t word) {
79       PERFETTO_DCHECK(global_bit_offset_ % BitWord::kBits == 0);
80       PERFETTO_DCHECK(global_bit_offset_ + BitWord::kBits <= size_);
81       words_[global_bit_offset_ / BitWord::kBits] = word;
82       global_bit_offset_ += BitWord::kBits;
83     }
84 
85     // Creates a BitVector from this Builder.
Build()86     BitVector Build() && {
87       Address addr = IndexToAddress(size_ - 1);
88       uint32_t no_blocks = addr.block_idx + 1;
89       std::vector<uint32_t> counts(no_blocks);
90 
91       // Calculate counts only for full blocks.
92       for (uint32_t i = 1; i < no_blocks; ++i) {
93         counts[i] = counts[i - 1] +
94                     ConstBlock(&words_[Block::kWords * (i - 1)]).CountSetBits();
95       }
96       return BitVector{std::move(words_), std::move(counts), size_};
97     }
98 
99     // Returns the number of bits which are in complete words which can be
100     // appended to this builder before having to fallback to |Append| due to
101     // being close to the end.
BitsInCompleteWordsUntilFull()102     uint32_t BitsInCompleteWordsUntilFull() {
103       uint32_t next_word = WordCount(global_bit_offset_);
104       uint32_t end_word = WordFloor(size_);
105       uint32_t complete_words = next_word < end_word ? end_word - next_word : 0;
106       return complete_words * BitWord::kBits;
107     }
108 
109     // Returns the number of bits which should be appended using |Append| either
110     // hitting a word boundary (and thus able to use |AppendWord|) or until the
111     // bitvector is full (i.e. no more Appends should happen), whichever would
112     // happen first.
BitsUntilWordBoundaryOrFull()113     uint32_t BitsUntilWordBoundaryOrFull() {
114       if (global_bit_offset_ == 0 && size_ < BitWord::kBits) {
115         return size_;
116       }
117       uint8_t word_bit_offset = global_bit_offset_ % BitWord::kBits;
118       return std::min(BitsUntilFull(),
119                       (BitWord::kBits - word_bit_offset) % BitWord::kBits);
120     }
121 
122     // Returns the number of bits which should be appended using |Append| before
123     // hitting a word boundary (and thus able to use |AppendWord|) or until the
124     // bitvector is full (i.e. no more Appends should happen).
BitsUntilFull()125     uint32_t BitsUntilFull() { return size_ - global_bit_offset_; }
126 
127    private:
128     std::vector<uint64_t> words_;
129     uint32_t global_bit_offset_ = 0;
130     uint32_t size_ = 0;
131   };
132 
133   // Creates an empty bitvector.
134   BitVector();
135 
136   explicit BitVector(std::initializer_list<bool> init);
137 
138   // Creates a bitvector of |count| size filled with |value|.
139   explicit BitVector(uint32_t count, bool value = false);
140 
141   // Enable moving bitvectors as they have no unmovable state.
142   BitVector(BitVector&&) noexcept = default;
143   BitVector& operator=(BitVector&&) = default;
144 
145   // Create a copy of the bitvector.
146   BitVector Copy() const;
147 
148   // Create a bitwise Not copy of the bitvector.
149   BitVector Not() const;
150 
151   // Returns the size of the bitvector.
size()152   uint32_t size() const { return static_cast<uint32_t>(size_); }
153 
154   // Returns whether the bit at |idx| is set.
IsSet(uint32_t idx)155   bool IsSet(uint32_t idx) const {
156     PERFETTO_DCHECK(idx < size());
157 
158     Address addr = IndexToAddress(idx);
159     return ConstBlockFromIndex(addr.block_idx).IsSet(addr.block_offset);
160   }
161 
162   // Returns the number of set bits in the bitvector.
CountSetBits()163   uint32_t CountSetBits() const { return CountSetBits(size()); }
164 
165   // Returns the number of set bits between the start of the bitvector
166   // (inclusive) and the index |end| (exclusive).
CountSetBits(uint32_t end)167   uint32_t CountSetBits(uint32_t end) const {
168     if (end == 0)
169       return 0;
170 
171     // Although the external interface we present uses an exclusive |end|,
172     // internally it's a lot nicer to work with an inclusive |end| (mainly
173     // because we get block rollovers on exclusive ends which means we need
174     // to have if checks to ensure we don't overflow the number of blocks).
175     Address addr = IndexToAddress(end - 1);
176 
177     // Add the number of set bits until the start of the block to the number
178     // of set bits until the end address inside the block.
179     return counts_[addr.block_idx] +
180            ConstBlockFromIndex(addr.block_idx).CountSetBits(addr.block_offset);
181   }
182 
183   // Returns the index of the |n|th set bit. Should only be called with |n| <
184   // CountSetBits().
IndexOfNthSet(uint32_t n)185   uint32_t IndexOfNthSet(uint32_t n) const {
186     PERFETTO_DCHECK(n < CountSetBits());
187 
188     // First search for the block which, up until the start of it, has more than
189     // n bits set. Note that this should never return |counts.begin()| as
190     // that should always be 0.
191     // TODO(lalitm): investigate whether we can make this faster with small
192     // binary search followed by a linear search instead of binary searching the
193     // full way.
194     auto it = std::upper_bound(counts_.begin(), counts_.end(), n);
195     PERFETTO_DCHECK(it != counts_.begin());
196 
197     // Go back one block to find the block which has the bit we are looking for.
198     uint32_t block_idx =
199         static_cast<uint32_t>(std::distance(counts_.begin(), it) - 1);
200 
201     // Figure out how many set bits forward we are looking inside the block
202     // by taking away the number of bits at the start of the block from n.
203     uint32_t set_in_block = n - counts_[block_idx];
204 
205     // Compute the address of the bit in the block then convert the full
206     // address back to an index.
207     BlockOffset block_offset =
208         ConstBlockFromIndex(block_idx).IndexOfNthSet(set_in_block);
209     return AddressToIndex(Address{block_idx, block_offset});
210   }
211 
212   // Sets the bit at index |idx| to true and returns the previous value.
Set(uint32_t idx)213   bool Set(uint32_t idx) {
214     // Set the bit to the correct value inside the block but store the old
215     // bit to help fix the counts.
216     auto addr = IndexToAddress(idx);
217     bool old_value =
218         ConstBlockFromIndex(addr.block_idx).IsSet(addr.block_offset);
219 
220     // If the old value was unset, set the bit and add one to the count.
221     if (PERFETTO_LIKELY(!old_value)) {
222       BlockFromIndex(addr.block_idx).Set(addr.block_offset);
223 
224       uint32_t size = static_cast<uint32_t>(counts_.size());
225       for (uint32_t i = addr.block_idx + 1; i < size; ++i) {
226         counts_[i]++;
227       }
228     }
229     return old_value;
230   }
231 
232   // Sets the bit at index |idx| to false.
Clear(uint32_t idx)233   void Clear(uint32_t idx) {
234     // Set the bit to the correct value inside the block but store the old
235     // bit to help fix the counts.
236     auto addr = IndexToAddress(idx);
237     bool old_value =
238         ConstBlockFromIndex(addr.block_idx).IsSet(addr.block_offset);
239 
240     // If the old value was set, clear the bit and subtract one from all the
241     // counts.
242     if (PERFETTO_LIKELY(old_value)) {
243       BlockFromIndex(addr.block_idx).Clear(addr.block_offset);
244 
245       uint32_t size = static_cast<uint32_t>(counts_.size());
246       for (uint32_t i = addr.block_idx + 1; i < size; ++i) {
247         counts_[i]--;
248       }
249     }
250   }
251 
252   // Appends true to the bitvector.
AppendTrue()253   void AppendTrue() {
254     AppendFalse();
255     Address addr = IndexToAddress(size() - 1);
256     BlockFromIndex(addr.block_idx).Set(addr.block_offset);
257   }
258 
259   // Appends false to the bitvector.
AppendFalse()260   void AppendFalse() {
261     Address addr = IndexToAddress(size_);
262     uint32_t old_blocks_size = BlockCount();
263     uint32_t new_blocks_size = addr.block_idx + 1;
264 
265     if (PERFETTO_UNLIKELY(new_blocks_size > old_blocks_size)) {
266       uint32_t t = CountSetBits();
267       words_.resize(words_.size() + Block::kWords);
268       counts_.emplace_back(t);
269     }
270 
271     size_++;
272     // We don't need to clear the bit as we ensure that anything after
273     // size_ is always set to false.
274   }
275 
276   // Resizes the BitVector to the given |size|.
277   // Truncates the BitVector if |size| < |size()| or fills the new space with
278   // |filler| if |size| > |size()|. Calling this method is a noop if |size| ==
279   // |size()|.
280   void Resize(uint32_t new_size, bool filler = false);
281 
282   // Creates a BitVector of size |end| with the bits between |start| and |end|
283   // filled by calling the filler function |f(index of bit)|.
284   //
285   // As an example, suppose Range(3, 7, [](x) { return x < 5 }). This would
286   // result in the following bitvector:
287   // [0 0 0 1 1 0 0]
288   template <typename Filler = bool(uint32_t)>
Range(uint32_t start,uint32_t end,Filler f)289   static BitVector Range(uint32_t start, uint32_t end, Filler f) {
290     // Compute the block index and bitvector index where we start and end
291     // working one block at a time.
292     uint32_t start_fast_block = BlockCount(start);
293     uint32_t start_fast_idx = BlockToIndex(start_fast_block);
294     BitVector bv(start, false);
295 
296     // Minimum value of start_fast_idx is numer of bits in block, so we need to
297     // seperate calculation for shorter ranges.
298     if (start_fast_idx > end) {
299       for (uint32_t i = start; i < end; ++i) {
300         bv.Append(f(i));
301       }
302       bv.counts_.emplace_back(bv.CountSetBits());
303       bv.size_ = end;
304       return bv;
305     }
306 
307     uint32_t end_fast_block = BlockFloor(end);
308     uint32_t end_fast_idx = BlockToIndex(end_fast_block);
309 
310     // Fill up to |start_fast_index| with values from the filler.
311     for (uint32_t i = start; i < start_fast_idx; ++i) {
312       bv.Append(f(i));
313     }
314 
315     // Assert words_ vector is full and size_ is properly calculated.
316     PERFETTO_DCHECK(bv.words_.size() % Block::kWords == 0);
317     PERFETTO_DCHECK(bv.words_.size() * BitWord::kBits == bv.size_);
318 
319     // At this point we can work one block at a time.
320     bv.words_.resize(bv.words_.size() +
321                      Block::kWords * (end_fast_block - start_fast_block));
322     for (uint32_t i = start_fast_block; i < end_fast_block; ++i) {
323       uint64_t* block_start_word = &bv.words_[i * Block::kWords];
324       Block(block_start_word).FromFiller(bv.size_, f);
325       bv.counts_.emplace_back(bv.CountSetBits());
326       bv.size_ += Block::kBits;
327     }
328 
329     // Add the last few elements to finish up to |end|.
330     for (uint32_t i = end_fast_idx; i < end; ++i) {
331       bv.Append(f(i));
332     }
333 
334     return bv;
335   }
336 
337   // Creates a BitVector of size |end| bit the bits between |start| and |end|
338   // filled with corresponding bits |this| BitVector.
339   BitVector IntersectRange(uint32_t range_start, uint32_t range_end) const;
340 
341   // Requests the removal of unused capacity.
342   // Matches the semantics of std::vector::shrink_to_fit.
ShrinkToFit()343   void ShrinkToFit() {
344     words_.shrink_to_fit();
345     counts_.shrink_to_fit();
346   }
347 
348   // Updates the ith set bit of this bitvector with the value of
349   // |other.IsSet(i)|.
350   //
351   // This is the best way to batch update all the bits which are set; for
352   // example when filtering rows, we want to filter all rows which are currently
353   // included but ignore rows which have already been excluded.
354   //
355   // For example suppose the following:
356   // this:  1 1 0 0 1 0 1
357   // other: 0 1 1 0
358   // This will change this to the following:
359   // this:  0 1 0 0 1 0 0
360   // TODO(lalitm): investigate whether we should just change this to And.
361   void UpdateSetBits(const BitVector& other);
362 
363   // Iterate all the bits in the BitVector.
364   //
365   // Usage:
366   // for (auto it = bv.IterateAllBits(); it; it.Next()) {
367   //   ...
368   // }
369   AllBitsIterator IterateAllBits() const;
370 
371   // Iterate all the set bits in the BitVector.
372   //
373   // Usage:
374   // for (auto it = bv.IterateSetBits(); it; it.Next()) {
375   //   ...
376   // }
377   SetBitsIterator IterateSetBits() const;
378 
379   // Returns the approximate cost (in bytes) of storing a bitvector with size
380   // |n|. This can be used to make decisions about whether using a BitVector is
381   // worthwhile.
382   // This cost should not be treated as exact - it just gives an indication of
383   // the memory needed.
ApproxBytesCost(uint32_t n)384   static constexpr uint32_t ApproxBytesCost(uint32_t n) {
385     // The two main things making up a bitvector is the cost of the blocks of
386     // bits and the cost of the counts vector.
387     return BlockCount(n) * Block::kBits + BlockCount(n) * sizeof(uint32_t);
388   }
389 
390  private:
391   friend class internal::BaseIterator;
392   friend class internal::AllBitsIterator;
393   friend class internal::SetBitsIterator;
394 
395   // Represents the offset of a bit within a block.
396   struct BlockOffset {
397     uint16_t word_idx;
398     uint16_t bit_idx;
399   };
400 
401   // Represents the address of a bit within the bitvector.
402   struct Address {
403     uint32_t block_idx;
404     BlockOffset block_offset;
405   };
406 
407   // Represents the smallest collection of bits we can refer to as
408   // one unit.
409   //
410   // Currently, this is implemented as a 64 bit integer as this is the
411   // largest type which we can assume to be present on all platforms.
412   class BitWord {
413    public:
414     static constexpr uint32_t kBits = 64;
415 
BitWord(uint64_t * word)416     explicit BitWord(uint64_t* word) : word_(word) {}
417 
418     // Bitwise ors the given |mask| to the current value.
Or(uint64_t mask)419     void Or(uint64_t mask) { *word_ |= mask; }
420 
421     // Sets the bit at the given index to true.
Set(uint32_t idx)422     void Set(uint32_t idx) {
423       PERFETTO_DCHECK(idx < kBits);
424 
425       // Or the value for the true shifted up to |idx| with the word.
426       Or(1ull << idx);
427     }
428 
429     // Sets the bit at the given index to false.
Clear(uint32_t idx)430     void Clear(uint32_t idx) {
431       PERFETTO_DCHECK(idx < kBits);
432 
433       // And the integer of all bits set apart from |idx| with the word.
434       *word_ &= ~(1ull << idx);
435     }
436 
437     // Clears all the bits (i.e. sets the atom to zero).
ClearAll()438     void ClearAll() { *word_ = 0; }
439 
440     // Retains all bits up to and including the bit at |idx| and clears
441     // all bits after this point.
ClearAfter(uint32_t idx)442     void ClearAfter(uint32_t idx) {
443       PERFETTO_DCHECK(idx < kBits);
444       *word_ = WordUntil(idx);
445     }
446 
447     // Sets all bits between the bit at |start| and |end| (inclusive).
Set(uint32_t start,uint32_t end)448     void Set(uint32_t start, uint32_t end) {
449       uint32_t diff = end - start;
450       *word_ |= (MaskAllBitsSetUntil(diff) << static_cast<uint64_t>(start));
451     }
452 
453     // Return a mask of all the bits up to and including bit at |idx|.
MaskAllBitsSetUntil(uint32_t idx)454     static uint64_t MaskAllBitsSetUntil(uint32_t idx) {
455       // Start with 1 and shift it up (idx + 1) bits we get:
456       // top : 00000000010000000
457       uint64_t top = 1ull << ((idx + 1ull) % kBits);
458 
459       // We need to handle the case where idx == 63. In this case |top| will be
460       // zero because 1 << ((idx + 1) % 64) == 1 << (64 % 64) == 1.
461       // In this case, we actually want top == 0. We can do this by shifting
462       // down by (idx + 1) / kBits - this will be a noop for every index other
463       // than idx == 63. This should also be free on x86 because of the mod
464       // instruction above.
465       top = top >> ((idx + 1) / kBits);
466 
467       // Then if we take away 1, we get precisely the mask we want.
468       return top - 1u;
469     }
470 
471    private:
472     // Returns the bits up to and including the bit at |idx|.
WordUntil(uint32_t idx)473     uint64_t WordUntil(uint32_t idx) const {
474       PERFETTO_DCHECK(idx < kBits);
475 
476       // To understand what is happeninng here, consider an example.
477       // Suppose we want to all the bits up to the 7th bit in the atom
478       //               7th
479       //                |
480       //                v
481       // atom: 01010101011111000
482       //
483       // The easiest way to do this would be if we had a mask with only
484       // the bottom 7 bits set:
485       // mask: 00000000001111111
486       uint64_t mask = MaskAllBitsSetUntil(idx);
487 
488       // Finish up by and'ing the atom with the computed mask.
489       return *word_ & mask;
490     }
491 
492     uint64_t* word_;
493   };
494 
495   class ConstBitWord {
496    public:
497     static constexpr uint32_t kBits = 64;
498 
ConstBitWord(const uint64_t * word)499     explicit ConstBitWord(const uint64_t* word) : word_(word) {}
500 
501     // Returns whether the bit at the given index is set.
IsSet(uint32_t idx)502     bool IsSet(uint32_t idx) const {
503       PERFETTO_DCHECK(idx < kBits);
504       return (*word_ >> idx) & 1ull;
505     }
506 
507     // Bitwise not.
Not()508     uint64_t Not() const { return ~(*word_); }
509 
510     // Returns the index of the nth set bit.
511     // Undefined if |n| >= |CountSetBits()|.
IndexOfNthSet(uint32_t n)512     uint16_t IndexOfNthSet(uint32_t n) const {
513       PERFETTO_DCHECK(n < kBits);
514 
515       // The below code is very dense but essentially computes the nth set
516       // bit inside |atom| in the "broadword" style of programming (sometimes
517       // referred to as "SIMD within a register").
518       //
519       // Instead of treating a uint64 as an individual unit, broadword
520       // algorithms treat them as a packed vector of uint8. By doing this, they
521       // allow branchless algorithms when considering bits of a uint64.
522       //
523       // In benchmarks, this algorithm has found to be the fastest, portable
524       // way of computing the nth set bit (if we were only targetting new
525       // versions of x64, we could also use pdep + ctz but unfortunately
526       // this would fail on WASM - this about 2.5-3x faster on x64).
527       //
528       // The code below was taken from the paper
529       // http://vigna.di.unimi.it/ftp/papers/Broadword.pdf
530       uint64_t s = *word_ - ((*word_ & 0xAAAAAAAAAAAAAAAA) >> 1);
531       s = (s & 0x3333333333333333) + ((s >> 2) & 0x3333333333333333);
532       s = ((s + (s >> 4)) & 0x0F0F0F0F0F0F0F0F) * L8;
533 
534       uint64_t b = (BwLessThan(s, n * L8) >> 7) * L8 >> 53 & ~7ull;
535       uint64_t l = n - ((s << 8) >> b & 0xFF);
536       s = (BwGtZero(((*word_ >> b & 0xFF) * L8) & 0x8040201008040201) >> 7) *
537           L8;
538 
539       uint64_t ret = b + ((BwLessThan(s, l * L8) >> 7) * L8 >> 56);
540 
541       return static_cast<uint16_t>(ret);
542     }
543 
544     // Returns the number of set bits.
CountSetBits()545     uint32_t CountSetBits() const {
546       return static_cast<uint32_t>(PERFETTO_POPCOUNT(*word_));
547     }
548 
549     // Returns the number of set bits up to and including the bit at |idx|.
CountSetBits(uint32_t idx)550     uint32_t CountSetBits(uint32_t idx) const {
551       PERFETTO_DCHECK(idx < kBits);
552       return static_cast<uint32_t>(PERFETTO_POPCOUNT(WordUntil(idx)));
553     }
554 
555    private:
556     // Constant with all the low bit of every byte set.
557     static constexpr uint64_t L8 = 0x0101010101010101;
558 
559     // Constant with all the high bit of every byte set.
560     static constexpr uint64_t H8 = 0x8080808080808080;
561 
562     // Returns a packed uint64 encoding whether each byte of x is less
563     // than each corresponding byte of y.
564     // This is computed in the "broadword" style of programming; see
565     // IndexOfNthSet for details on this.
BwLessThan(uint64_t x,uint64_t y)566     static uint64_t BwLessThan(uint64_t x, uint64_t y) {
567       return (((y | H8) - (x & ~H8)) ^ x ^ y) & H8;
568     }
569 
570     // Returns a packed uint64 encoding whether each byte of x is greater
571     // than or equal zero.
572     // This is computed in the "broadword" style of programming; see
573     // IndexOfNthSet for details on this.
BwGtZero(uint64_t x)574     static uint64_t BwGtZero(uint64_t x) { return (((x | H8) - L8) | x) & H8; }
575 
576     // Returns the bits up to and including the bit at |idx|.
WordUntil(uint32_t idx)577     uint64_t WordUntil(uint32_t idx) const {
578       PERFETTO_DCHECK(idx < kBits);
579 
580       // To understand what is happeninng here, consider an example.
581       // Suppose we want to all the bits up to the 7th bit in the atom
582       //               7th
583       //                |
584       //                v
585       // atom: 01010101011111000
586       //
587       // The easiest way to do this would be if we had a mask with only
588       // the bottom 7 bits set:
589       // mask: 00000000001111111
590       uint64_t mask = BitWord::MaskAllBitsSetUntil(idx);
591 
592       // Finish up by and'ing the atom with the computed mask.
593       return *word_ & mask;
594     }
595 
596     const uint64_t* word_;
597   };
598 
599   // Represents a group of bits with a bitcount such that it is
600   // efficient to work on these bits.
601   //
602   // On x86 architectures we generally target for trace processor, the
603   // size of a cache line is 64 bytes (or 512 bits). For this reason,
604   // we make the size of the block contain 8 atoms as 8 * 64 == 512.
605   //
606   // TODO(lalitm): investigate whether we should tune this value for
607   // WASM and ARM.
608   class Block {
609    public:
610     // See class documentation for how these constants are chosen.
611     static constexpr uint16_t kWords = 8;
612     static constexpr uint32_t kBits = kWords * BitWord::kBits;
613 
Block(uint64_t * start_word)614     explicit Block(uint64_t* start_word) : start_word_(start_word) {}
615 
616     // Sets the bit at the given address to true.
Set(const BlockOffset & addr)617     void Set(const BlockOffset& addr) {
618       PERFETTO_DCHECK(addr.word_idx < kWords);
619       BitWord(&start_word_[addr.word_idx]).Set(addr.bit_idx);
620     }
621 
622     // Sets the bit at the given address to false.
Clear(const BlockOffset & addr)623     void Clear(const BlockOffset& addr) {
624       PERFETTO_DCHECK(addr.word_idx < kWords);
625 
626       BitWord(&start_word_[addr.word_idx]).Clear(addr.bit_idx);
627     }
628 
629     // Retains all bits up to and including the bit at |addr| and clears
630     // all bits after this point.
ClearAfter(const BlockOffset & offset)631     void ClearAfter(const BlockOffset& offset) {
632       PERFETTO_DCHECK(offset.word_idx < kWords);
633 
634       // In the first atom, keep the bits until the address specified.
635       BitWord(&start_word_[offset.word_idx]).ClearAfter(offset.bit_idx);
636 
637       // For all subsequent atoms, we just clear the whole atom.
638       for (uint32_t i = offset.word_idx + 1; i < kWords; ++i) {
639         BitWord(&start_word_[i]).ClearAll();
640       }
641     }
642 
643     // Set all the bits between the offsets given by |start| and |end|
644     // (inclusive).
Set(const BlockOffset & start,const BlockOffset & end)645     void Set(const BlockOffset& start, const BlockOffset& end) {
646       if (start.word_idx == end.word_idx) {
647         // If there is only one word we will change, just set the range within
648         // the word.
649         BitWord(&start_word_[start.word_idx]).Set(start.bit_idx, end.bit_idx);
650         return;
651       }
652 
653       // Otherwise, we have more than one word to set. To do this, we will
654       // do this in three steps.
655 
656       // First, we set the first word from the start to the end of the word.
657       BitWord(&start_word_[start.word_idx])
658           .Set(start.bit_idx, BitWord::kBits - 1);
659 
660       // Next, we set all words (except the last).
661       for (uint32_t i = start.word_idx + 1; i < end.word_idx; ++i) {
662         BitWord(&start_word_[i]).Set(0, BitWord::kBits - 1);
663       }
664 
665       // Finally, we set the word block from the start to the end offset.
666       BitWord(&start_word_[end.word_idx]).Set(0, end.bit_idx);
667     }
668 
669     template <typename Filler>
FromFiller(uint32_t offset,Filler f)670     void FromFiller(uint32_t offset, Filler f) {
671       // We choose to iterate the bits as the outer loop as this allows us
672       // to reuse the mask and the bit offset between iterations of the loop.
673       // This makes a small (but noticable) impact in the performance of this
674       // function.
675       for (uint32_t i = 0; i < BitWord::kBits; ++i) {
676         uint64_t mask = 1ull << i;
677         uint32_t offset_with_bit = offset + i;
678         for (uint32_t j = 0; j < Block::kWords; ++j) {
679           bool res = f(offset_with_bit + j * BitWord::kBits);
680           BitWord(&start_word_[j]).Or(res ? mask : 0);
681         }
682       }
683     }
684 
ReplaceWith(Block block)685     void ReplaceWith(Block block) {
686       for (uint32_t i = 0; i < BitWord::kBits; ++i) {
687         start_word_[i] = block.start_word()[i];
688       }
689     }
690 
start_word()691     uint64_t* start_word() { return start_word_; }
692 
693    private:
694     uint64_t* start_word_;
695   };
696 
697   class ConstBlock {
698    public:
699     // See class documentation for how these constants are chosen.
700     static constexpr uint16_t kWords = Block::kWords;
701     static constexpr uint32_t kBits = kWords * BitWord::kBits;
702 
ConstBlock(const uint64_t * start_word)703     explicit ConstBlock(const uint64_t* start_word) : start_word_(start_word) {}
ConstBlock(Block block)704     explicit ConstBlock(Block block) : start_word_(block.start_word()) {}
705 
706     // Returns whether the bit at the given address is set.
IsSet(const BlockOffset & addr)707     bool IsSet(const BlockOffset& addr) const {
708       PERFETTO_DCHECK(addr.word_idx < kWords);
709       return ConstBitWord(start_word_ + addr.word_idx).IsSet(addr.bit_idx);
710     }
711 
712     // Gets the offset of the nth set bit in this block.
IndexOfNthSet(uint32_t n)713     BlockOffset IndexOfNthSet(uint32_t n) const {
714       uint32_t count = 0;
715       for (uint16_t i = 0; i < kWords; ++i) {
716         // Keep a running count of all the set bits in the atom.
717         uint32_t value = count + ConstBitWord(start_word_ + i).CountSetBits();
718         if (value <= n) {
719           count = value;
720           continue;
721         }
722 
723         // The running count of set bits is more than |n|. That means this
724         // atom contains the bit we are looking for.
725 
726         // Take away the number of set bits to the start of this atom from
727         // |n|.
728         uint32_t set_in_atom = n - count;
729 
730         // Figure out the index of the set bit inside the atom and create the
731         // address of this bit from that.
732         uint16_t bit_idx =
733             ConstBitWord(start_word_ + i).IndexOfNthSet(set_in_atom);
734         PERFETTO_DCHECK(bit_idx < 64);
735         return BlockOffset{i, bit_idx};
736       }
737       PERFETTO_FATAL("Index out of bounds");
738     }
739 
740     // Gets the number of set bits within a block up to and including the bit
741     // at the given address.
CountSetBits(const BlockOffset & addr)742     uint32_t CountSetBits(const BlockOffset& addr) const {
743       PERFETTO_DCHECK(addr.word_idx < kWords);
744 
745       // Count all the set bits in the atom until we reach the last atom
746       // index.
747       uint32_t count = 0;
748       for (uint32_t i = 0; i < addr.word_idx; ++i) {
749         count += ConstBitWord(&start_word_[i]).CountSetBits();
750       }
751 
752       // For the last atom, only count the bits upto and including the bit
753       // index.
754       return count + ConstBitWord(&start_word_[addr.word_idx])
755                          .CountSetBits(addr.bit_idx);
756     }
757 
758     // Gets the number of set bits within a block up.
CountSetBits()759     uint32_t CountSetBits() const {
760       uint32_t count = 0;
761       for (uint32_t i = 0; i < kWords; ++i) {
762         count += ConstBitWord(&start_word_[i]).CountSetBits();
763       }
764       return count;
765     }
766 
767    private:
768     const uint64_t* start_word_;
769   };
770 
771   BitVector(std::vector<uint64_t> words,
772             std::vector<uint32_t> counts,
773             uint32_t size);
774 
775   BitVector(const BitVector&) = delete;
776   BitVector& operator=(const BitVector&) = delete;
777 
778   // Returns the number of 8 elements blocks in the bitvector.
BlockCount()779   uint32_t BlockCount() {
780     return static_cast<uint32_t>(words_.size()) / Block::kWords;
781   }
782 
BlockFromIndex(uint32_t idx)783   Block BlockFromIndex(uint32_t idx) {
784     PERFETTO_DCHECK(Block::kWords * (idx + 1) <= words_.size());
785 
786     uint64_t* start_word = &words_[Block::kWords * idx];
787     return Block(start_word);
788   }
789 
ConstBlockFromIndex(uint32_t idx)790   ConstBlock ConstBlockFromIndex(uint32_t idx) const {
791     PERFETTO_DCHECK(Block::kWords * (idx + 1) <= words_.size());
792 
793     return ConstBlock(&words_[Block::kWords * idx]);
794   }
795 
796   // Set all the bits between the addresses given by |start| and |end|
797   // (inclusive).
798   // Note: this method does not update the counts vector - that is the
799   // responsibility of the caller.
Set(const Address & start,const Address & end)800   void Set(const Address& start, const Address& end) {
801     static constexpr BlockOffset kFirstBlockOffset = BlockOffset{0, 0};
802     static constexpr BlockOffset kLastBlockOffset =
803         BlockOffset{Block::kWords - 1, BitWord::kBits - 1};
804 
805     if (start.block_idx == end.block_idx) {
806       // If there is only one block we will change, just set the range within
807       // the block.
808       BlockFromIndex(start.block_idx).Set(start.block_offset, end.block_offset);
809       return;
810     }
811 
812     // Otherwise, we have more than one block to set. To do this, we will
813     // do this in three steps.
814 
815     // First, we set the first block from the start to the end of the block.
816     BlockFromIndex(start.block_idx).Set(start.block_offset, kLastBlockOffset);
817 
818     // Next, we set all blocks (except the last).
819     for (uint32_t cur_block_idx = start.block_idx + 1;
820          cur_block_idx < end.block_idx; ++cur_block_idx) {
821       BlockFromIndex(cur_block_idx).Set(kFirstBlockOffset, kLastBlockOffset);
822     }
823 
824     // Finally, we set the last block from the start to the end offset.
825     BlockFromIndex(end.block_idx).Set(kFirstBlockOffset, end.block_offset);
826   }
827 
828   // Helper function to append a bit. Generally, prefer to call AppendTrue
829   // or AppendFalse instead of this function if you know the type - they will
830   // be faster.
Append(bool value)831   void Append(bool value) {
832     if (value) {
833       AppendTrue();
834     } else {
835       AppendFalse();
836     }
837   }
838 
839   // Returns the index of the word which would store |idx|.
WordFloor(uint32_t idx)840   static constexpr uint32_t WordFloor(uint32_t idx) {
841     return idx / BitWord::kBits;
842   }
843 
844   // Returns number of words (int64_t) required to store |bit_count| bits.
WordCount(uint32_t bit_count)845   static uint32_t WordCount(uint32_t bit_count) {
846     // See |BlockCount| for an explanation of this trick.
847     return (bit_count + BitWord::kBits - 1) / BitWord::kBits;
848   }
849 
IndexToAddress(uint32_t idx)850   static Address IndexToAddress(uint32_t idx) {
851     Address a;
852     a.block_idx = idx / Block::kBits;
853 
854     uint16_t bit_idx_inside_block = idx % Block::kBits;
855     a.block_offset.word_idx = bit_idx_inside_block / BitWord::kBits;
856     a.block_offset.bit_idx = bit_idx_inside_block % BitWord::kBits;
857     return a;
858   }
859 
AddressToIndex(Address addr)860   static uint32_t AddressToIndex(Address addr) {
861     return addr.block_idx * Block::kBits +
862            addr.block_offset.word_idx * BitWord::kBits +
863            addr.block_offset.bit_idx;
864   }
865 
866   // Returns number of blocks (arrays of 8 int64_t) required to store
867   // |bit_count| bits.
868   //
869   // This is useful to be able to find indices where "fast" algorithms can
870   // start which work on entire blocks.
BlockCount(uint32_t bit_count)871   static constexpr uint32_t BlockCount(uint32_t bit_count) {
872     // Adding |Block::kBits - 1| gives us a quick way to get the count. We
873     // do this instead of adding 1 at the end because that gives incorrect
874     // answers for bit_count % Block::kBits == 0.
875     return (bit_count + Block::kBits - 1) / Block::kBits;
876   }
877 
878   // Returns the index of the block which would store |idx|.
BlockFloor(uint32_t idx)879   static constexpr uint32_t BlockFloor(uint32_t idx) {
880     return idx / Block::kBits;
881   }
882 
883   // Converts a block index to a index in the BitVector.
BlockToIndex(uint32_t block_idx)884   static constexpr uint32_t BlockToIndex(uint32_t block_idx) {
885     return block_idx * Block::kBits;
886   }
887 
888   uint32_t size_ = 0;
889   // See class documentation for how these constants are chosen.
890   static constexpr uint16_t kWordsInBlock = Block::kWords;
891   static constexpr uint32_t kBitsInBlock = kWordsInBlock * BitWord::kBits;
892   std::vector<uint32_t> counts_;
893   std::vector<uint64_t> words_;
894 };
895 
896 }  // namespace trace_processor
897 }  // namespace perfetto
898 
899 #endif  // SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_
900