• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
18 #define SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
19 
20 #include <stdint.h>
21 
22 #include <memory>
23 #include <optional>
24 #include <variant>
25 #include <vector>
26 
27 #include "perfetto/base/logging.h"
28 #include "src/trace_processor/containers/bit_vector.h"
29 #include "src/trace_processor/containers/bit_vector_iterators.h"
30 
31 namespace perfetto {
32 namespace trace_processor {
33 
34 // Stores a list of row indicies in a space efficient manner. One or more
35 // columns can refer to the same RowMap. The RowMap defines the access pattern
36 // to iterate on rows.
37 //
38 // Naming convention:
39 //
40 // As both the input and output of RowMap is a uint32_t, it can be quite
41 // confusing to reason about what parameters/return values of the functions
42 // of RowMap actually means. To help with this, we define a strict convention
43 // of naming.
44 //
45 // row:     input - that is, rows are what are passed into operator[]; named as
46 //          such because a "row" number in a table is converted to an index to
47 //          lookup in the backing vectors.
48 // index:   output - that is, indices are what are returned from operator[];
49 //          named as such because an "index" is what's used to lookup data
50 //          from the backing vectors.
51 //
52 // Implementation details:
53 //
54 // Behind the scenes, this class is impelemented using one of three backing
55 // data-structures:
56 // 1. A start and end index (internally named 'range')
57 // 1. BitVector
58 // 2. std::vector<uint32_t> (internally named IndexVector).
59 //
60 // Generally the preference for data structures is range > BitVector >
61 // std::vector<uint32>; this ordering is based mainly on memory efficiency as we
62 // expect RowMaps to be large.
63 //
64 // However, BitVector and std::vector<uint32_t> allow things which are not
65 // possible with the data-structures preferred to them:
66 //  * a range (as the name suggests) can only store a compact set of indices
67 //  with no holes. A BitVector works around this limitation by storing a 1 at an
68 //  index where that row is part of the RowMap and 0 otherwise.
69 //  * as soon as ordering or duplicate rows come into play, we cannot use a
70 //   BitVector anymore as ordering/duplicate row information cannot be captured
71 //   by a BitVector.
72 //
73 // For small, sparse RowMaps, it is possible that a std::vector<uint32_t> is
74 // more efficient than a BitVector; in this case, we will make a best effort
75 // switch to it but the cases where this happens is not precisely defined.
76 class RowMap {
77  public:
78   using InputRow = uint32_t;
79   using OutputIndex = uint32_t;
80   using IndexVector = std::vector<OutputIndex>;
81 
82   struct Range {
RangeRange83     Range(OutputIndex start_index, OutputIndex end_index)
84         : start(start_index), end(end_index) {}
RangeRange85     Range() : start(0), end(0) {}
86 
87     OutputIndex start = 0;  // This is an inclusive index.
88     OutputIndex end = 0;    // This is an exclusive index.
89 
sizeRange90     uint32_t size() const {
91       PERFETTO_DCHECK(end >= start);
92       return end - start;
93     }
94   };
95 
96   // Allows efficient iteration over the rows of a RowMap.
97   //
98   // Note: you should usually prefer to use the methods on RowMap directly (if
99   // they exist for the task being attempted) to avoid the lookup for the mode
100   // of the RowMap on every method call.
101   class Iterator {
102    public:
103     explicit Iterator(const RowMap* rm);
104 
105     Iterator(Iterator&&) noexcept = default;
106     Iterator& operator=(Iterator&&) = default;
107 
108     // Forwards the iterator to the next row of the RowMap.
Next()109     void Next() {
110       if (std::get_if<Range>(&rm_->data_)) {
111         ++ordinal_;
112       } else if (std::get_if<BitVector>(&rm_->data_)) {
113         set_bits_it_->Next();
114       } else if (std::get_if<IndexVector>(&rm_->data_)) {
115         ++ordinal_;
116       }
117     }
118 
119     // Returns if the iterator is still valid.
120     operator bool() const {
121       if (auto* range = std::get_if<Range>(&rm_->data_)) {
122         return ordinal_ < range->end;
123       }
124       if (std::get_if<BitVector>(&rm_->data_)) {
125         return bool(*set_bits_it_);
126       }
127       if (auto* vec = std::get_if<IndexVector>(&rm_->data_)) {
128         return ordinal_ < vec->size();
129       }
130       PERFETTO_FATAL("Didn't match any variant type.");
131     }
132 
133     // Returns the index pointed to by this iterator.
index()134     OutputIndex index() const {
135       if (std::get_if<Range>(&rm_->data_)) {
136         return ordinal_;
137       }
138       if (std::get_if<BitVector>(&rm_->data_)) {
139         return set_bits_it_->index();
140       }
141       if (auto* vec = std::get_if<IndexVector>(&rm_->data_)) {
142         return (*vec)[ordinal_];
143       }
144       PERFETTO_FATAL("Didn't match any variant type.");
145     }
146 
147     // Returns the row of the index the iterator points to.
row()148     InputRow row() const {
149       if (auto* range = std::get_if<Range>(&rm_->data_)) {
150         return ordinal_ - range->start;
151       }
152       if (std::get_if<BitVector>(&rm_->data_)) {
153         return set_bits_it_->ordinal();
154       }
155       if (std::get_if<IndexVector>(&rm_->data_)) {
156         return ordinal_;
157       }
158       PERFETTO_FATAL("Didn't match any variant type.");
159     }
160 
161    private:
162     Iterator(const Iterator&) = delete;
163     Iterator& operator=(const Iterator&) = delete;
164 
165     // Ordinal will not be used for BitVector based RowMap.
166     uint32_t ordinal_ = 0;
167     // Not nullptr for BitVector based RowMap.
168     std::unique_ptr<BitVector::SetBitsIterator> set_bits_it_;
169 
170     const RowMap* rm_ = nullptr;
171   };
172 
173   // Enum to allow users of RowMap to decide whether they want to optimize for
174   // memory usage or for speed of lookups.
175   enum class OptimizeFor {
176     kMemory,
177     kLookupSpeed,
178   };
179 
180   // Creates an empty RowMap.
181   // By default this will be implemented using a range.
182   RowMap();
183 
184   // Creates a RowMap containing the range of indices between |start| and |end|
185   // i.e. all indices between |start| (inclusive) and |end| (exclusive).
186   RowMap(OutputIndex start,
187          OutputIndex end,
188          OptimizeFor optimize_for = OptimizeFor::kMemory);
189 
190   // Creates a RowMap backed by a BitVector.
191   explicit RowMap(BitVector);
192 
193   // Creates a RowMap backed by an std::vector<uint32_t>.
194   explicit RowMap(IndexVector);
195 
196   RowMap(const RowMap&) noexcept = delete;
197   RowMap& operator=(const RowMap&) = delete;
198 
199   RowMap(RowMap&&) noexcept = default;
200   RowMap& operator=(RowMap&&) = default;
201 
202   // Creates a RowMap containing just |index|.
203   // By default this will be implemented using a range.
SingleRow(OutputIndex index)204   static RowMap SingleRow(OutputIndex index) {
205     return RowMap(index, index + 1);
206   }
207 
208   // Creates a copy of the RowMap.
209   // We have an explicit copy function because RowMap can hold onto large chunks
210   // of memory and we want to be very explicit when making a copy to avoid
211   // accidental leaks and copies.
212   RowMap Copy() const;
213 
214   // Returns the size of the RowMap; that is the number of indices in the
215   // RowMap.
size()216   uint32_t size() const {
217     if (auto* range = std::get_if<Range>(&data_)) {
218       return range->size();
219     }
220     if (auto* bv = std::get_if<BitVector>(&data_)) {
221       return bv->CountSetBits();
222     }
223     if (auto* vec = std::get_if<IndexVector>(&data_)) {
224       return static_cast<uint32_t>(vec->size());
225     }
226     NoVariantMatched();
227   }
228 
229   // Returns whether this rowmap is empty.
empty()230   bool empty() const { return size() == 0; }
231 
232   // Returns the index at the given |row|.
Get(InputRow row)233   OutputIndex Get(InputRow row) const {
234     if (auto* range = std::get_if<Range>(&data_)) {
235       return GetRange(*range, row);
236     }
237     if (auto* bv = std::get_if<BitVector>(&data_)) {
238       return GetBitVector(*bv, row);
239     }
240     if (auto* vec = std::get_if<IndexVector>(&data_)) {
241       return GetIndexVector(*vec, row);
242     }
243     NoVariantMatched();
244   }
245 
246   // Returns whether the RowMap contains the given index.
Contains(OutputIndex index)247   bool Contains(OutputIndex index) const {
248     if (auto* range = std::get_if<Range>(&data_)) {
249       return index >= range->start && index < range->end;
250     }
251     if (auto* bv = std::get_if<BitVector>(&data_)) {
252       return index < bv->size() && bv->IsSet(index);
253     }
254     if (auto* vec = std::get_if<IndexVector>(&data_)) {
255       return std::find(vec->begin(), vec->end(), index) != vec->end();
256     }
257     NoVariantMatched();
258   }
259 
260   // Returns the first row of the given |index| in the RowMap.
RowOf(OutputIndex index)261   std::optional<InputRow> RowOf(OutputIndex index) const {
262     if (auto* range = std::get_if<Range>(&data_)) {
263       if (index < range->start || index >= range->end)
264         return std::nullopt;
265       return index - range->start;
266     }
267     if (auto* bv = std::get_if<BitVector>(&data_)) {
268       return index < bv->size() && bv->IsSet(index)
269                  ? std::make_optional(bv->CountSetBits(index))
270                  : std::nullopt;
271     }
272     if (auto* vec = std::get_if<IndexVector>(&data_)) {
273       auto it = std::find(vec->begin(), vec->end(), index);
274       return it != vec->end() ? std::make_optional(static_cast<InputRow>(
275                                     std::distance(vec->begin(), it)))
276                               : std::nullopt;
277     }
278     NoVariantMatched();
279   }
280 
281   // Performs an ordered insert of the index into the current RowMap
282   // (precondition: this RowMap is ordered based on the indices it contains).
283   //
284   // Example:
285   // this = [1, 5, 10, 11, 20]
286   // Insert(10)  // this = [1, 5, 10, 11, 20]
287   // Insert(12)  // this = [1, 5, 10, 11, 12, 20]
288   // Insert(21)  // this = [1, 5, 10, 11, 12, 20, 21]
289   // Insert(2)   // this = [1, 2, 5, 10, 11, 12, 20, 21]
290   //
291   // Speecifically, this means that it is only valid to call Insert on a RowMap
292   // which is sorted by the indices it contains; this is automatically true when
293   // the RowMap is in range or BitVector mode but is a required condition for
294   // IndexVector mode.
Insert(OutputIndex index)295   void Insert(OutputIndex index) {
296     if (auto* range = std::get_if<Range>(&data_)) {
297       if (index == range->end) {
298         // Fast path: if we're just appending to the end
299         // of the range, we can stay in range mode and
300         // just bump the end index.
301         range->end++;
302         return;
303       }
304 
305       // Slow path: the insert is somewhere else other
306       // than the end. This means we need to switch to
307       // using a BitVector instead.
308       BitVector bv;
309       bv.Resize(range->start, false);
310       bv.Resize(range->end, true);
311       InsertIntoBitVector(bv, index);
312       data_ = std::move(bv);
313       return;
314     }
315     if (auto* bv = std::get_if<BitVector>(&data_)) {
316       InsertIntoBitVector(*bv, index);
317       return;
318     }
319     if (auto* vec = std::get_if<IndexVector>(&data_)) {
320       PERFETTO_DCHECK(std::is_sorted(vec->begin(), vec->end()));
321       auto it = std::upper_bound(vec->begin(), vec->end(), index);
322       vec->insert(it, index);
323       return;
324     }
325     NoVariantMatched();
326   }
327 
328   // Updates this RowMap by 'picking' the indices given by |picker|.
329   // This is easiest to explain with an example; suppose we have the following
330   // RowMaps:
331   // this  : [0, 1, 4, 10, 11]
332   // picker: [0, 3, 4, 4, 2]
333   //
334   // After calling Apply(picker), we now have the following:
335   // this  : [0, 10, 11, 11, 4]
336   //
337   // Conceptually, we are performing the following algorithm:
338   // RowMap rm = Copy()
339   // for (p : picker)
340   //   rm[i++] = this[p]
341   // return rm;
SelectRows(const RowMap & selector)342   RowMap SelectRows(const RowMap& selector) const {
343     uint32_t size = selector.size();
344 
345     // If the selector is empty, just return an empty RowMap.
346     if (size == 0u)
347       return RowMap();
348 
349     // If the selector is just picking a single row, just return that row
350     // without any additional overhead.
351     if (size == 1u)
352       return RowMap::SingleRow(Get(selector.Get(0)));
353 
354     // For all other cases, go into the slow-path.
355     return SelectRowsSlow(selector);
356   }
357 
358   // Intersects the range [start_index, end_index) with |this| writing the
359   // result into |this|. By "intersect", we mean to keep only the indices
360   // present in both this RowMap and in the Range [start_index, end_index). The
361   // order of the preserved indices will be the same as |this|.
362   //
363   // Conceptually, we are performing the following algorithm:
364   // for (idx : this)
365   //   if (start_index <= idx && idx < end_index)
366   //     continue;
367   //   Remove(idx)
368   void Intersect(const RowMap& second);
369 
370   // Intersects this RowMap with |index|. If this RowMap contained |index|, then
371   // it will *only* contain |index|. Otherwise, it will be empty.
IntersectExact(OutputIndex index)372   void IntersectExact(OutputIndex index) {
373     if (Contains(index)) {
374       *this = RowMap(index, index + 1);
375     } else {
376       Clear();
377     }
378   }
379 
380   // Clears this RowMap by resetting it to a newly constructed state.
Clear()381   void Clear() { *this = RowMap(); }
382 
383   template <typename Comparator = bool(uint32_t, uint32_t)>
StableSort(IndexVector * out,Comparator c)384   void StableSort(IndexVector* out, Comparator c) const {
385     if (auto* range = std::get_if<Range>(&data_)) {
386       std::stable_sort(out->begin(), out->end(),
387                        [range, c](uint32_t a, uint32_t b) {
388                          return c(GetRange(*range, a), GetRange(*range, b));
389                        });
390       return;
391     }
392     if (auto* bv = std::get_if<BitVector>(&data_)) {
393       std::stable_sort(out->begin(), out->end(),
394                        [&bv, c](uint32_t a, uint32_t b) {
395                          return c(GetBitVector(*bv, a), GetBitVector(*bv, b));
396                        });
397       return;
398     }
399     if (auto* vec = std::get_if<IndexVector>(&data_)) {
400       std::stable_sort(
401           out->begin(), out->end(), [vec, c](uint32_t a, uint32_t b) {
402             return c(GetIndexVector(*vec, a), GetIndexVector(*vec, b));
403           });
404       return;
405     }
406     NoVariantMatched();
407   }
408 
409   // Filters the indices in |out| by keeping those which meet |p|.
410   template <typename Predicate = bool(OutputIndex)>
Filter(Predicate p)411   void Filter(Predicate p) {
412     if (auto* range = std::get_if<Range>(&data_)) {
413       data_ = FilterRange(p, *range);
414       return;
415     }
416     if (auto* bv = std::get_if<BitVector>(&data_)) {
417       for (auto it = bv->IterateSetBits(); it; it.Next()) {
418         if (!p(it.index()))
419           it.Clear();
420       }
421       return;
422     }
423     if (auto* vec = std::get_if<IndexVector>(&data_)) {
424       auto ret = std::remove_if(vec->begin(), vec->end(),
425                                 [p](uint32_t i) { return !p(i); });
426       vec->erase(ret, vec->end());
427       return;
428     }
429     NoVariantMatched();
430   }
431 
432   // Returns the iterator over the rows in this RowMap.
IterateRows()433   Iterator IterateRows() const { return Iterator(this); }
434 
435   // Returns if the RowMap is internally represented using a range.
IsRange()436   bool IsRange() const { return std::holds_alternative<Range>(data_); }
437 
438   // Returns if the RowMap is internally represented using a BitVector.
IsBitVector()439   bool IsBitVector() const { return std::holds_alternative<BitVector>(data_); }
440 
441   // Returns if the RowMap is internally represented using an index vector.
IsIndexVector()442   bool IsIndexVector() const {
443     return std::holds_alternative<IndexVector>(data_);
444   }
445 
446  private:
447   using Variant = std::variant<Range, BitVector, IndexVector>;
448 
449   explicit RowMap(Range);
450 
451   explicit RowMap(Variant);
452 
453   // TODO(lalitm): remove this when the coupling between RowMap and
454   // ColumnStorage Selector is broken (after filtering is moved out of here).
455   friend class ColumnStorageOverlay;
456 
457   template <typename Predicate>
FilterRange(Predicate p,Range r)458   Variant FilterRange(Predicate p, Range r) {
459     uint32_t count = r.size();
460 
461     // Optimization: if we are only going to scan a few indices, it's not
462     // worth the haslle of working with a BitVector.
463     constexpr uint32_t kSmallRangeLimit = 2048;
464     bool is_small_range = count < kSmallRangeLimit;
465 
466     // Optimization: weif the cost of a BitVector is more than the highest
467     // possible cost an index vector could have, use the index vector.
468     uint32_t bit_vector_cost = BitVector::ApproxBytesCost(r.end);
469     uint32_t index_vector_cost_ub = sizeof(uint32_t) * count;
470 
471     // If either of the conditions hold which make it better to use an
472     // index vector, use it instead. Alternatively, if we are optimizing for
473     // lookup speed, we also want to use an index vector.
474     if (is_small_range || index_vector_cost_ub <= bit_vector_cost ||
475         optimize_for_ == OptimizeFor::kLookupSpeed) {
476       // Try and strike a good balance between not making the vector too
477       // big and good performance.
478       IndexVector iv(std::min(kSmallRangeLimit, count));
479 
480       uint32_t out_i = 0;
481       for (uint32_t i = 0; i < count; ++i) {
482         // If we reach the capacity add another small set of indices.
483         if (PERFETTO_UNLIKELY(out_i == iv.size()))
484           iv.resize(iv.size() + kSmallRangeLimit);
485 
486         // We keep this branch free by always writing the index but only
487         // incrementing the out index if the return value is true.
488         bool value = p(i + r.start);
489         iv[out_i] = i + r.start;
490         out_i += value;
491       }
492 
493       // Make the vector the correct size and as small as possible.
494       iv.resize(out_i);
495       iv.shrink_to_fit();
496 
497       return std::move(iv);
498     }
499 
500     // Otherwise, create a bitvector which spans the full range using
501     // |p| as the filler for the bits between start and end.
502     return BitVector::Range(r.start, r.end, p);
503   }
504 
GetRange(Range r,InputRow row)505   PERFETTO_ALWAYS_INLINE static OutputIndex GetRange(Range r, InputRow row) {
506     return r.start + row;
507   }
GetBitVector(const BitVector & bv,uint32_t row)508   PERFETTO_ALWAYS_INLINE static OutputIndex GetBitVector(const BitVector& bv,
509                                                          uint32_t row) {
510     return bv.IndexOfNthSet(row);
511   }
GetIndexVector(const IndexVector & vec,uint32_t row)512   PERFETTO_ALWAYS_INLINE static OutputIndex GetIndexVector(
513       const IndexVector& vec,
514       uint32_t row) {
515     return vec[row];
516   }
517 
518   RowMap SelectRowsSlow(const RowMap& selector) const;
519 
InsertIntoBitVector(BitVector & bv,OutputIndex row)520   static void InsertIntoBitVector(BitVector& bv, OutputIndex row) {
521     if (row == bv.size()) {
522       bv.AppendTrue();
523       return;
524     }
525     if (row > bv.size())
526       bv.Resize(row + 1, false);
527     bv.Set(row);
528   }
529 
NoVariantMatched()530   PERFETTO_NORETURN void NoVariantMatched() const {
531     PERFETTO_FATAL("Didn't match any variant type.");
532   }
533 
534   Variant data_;
535   OptimizeFor optimize_for_ = OptimizeFor::kMemory;
536 };
537 
538 }  // namespace trace_processor
539 }  // namespace perfetto
540 
541 #endif  // SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
542