• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_IMPLICIT_SEGMENT_FOREST_H_
18 #define SRC_TRACE_PROCESSOR_CONTAINERS_IMPLICIT_SEGMENT_FOREST_H_
19 
20 #include <cstddef>
21 #include <cstdint>
22 #include <vector>
23 
24 #include "perfetto/base/logging.h"
25 
26 #if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT)
27 #include <immintrin.h>
28 #endif
29 
30 namespace perfetto::trace_processor {
31 
32 // An implementation of a segment tree data structure [1] with:
33 // 1) parent-child relationships are implicit, saving memory.
34 // 2) the requirement for the number of values being a power of two, turning
35 //    the tree into a forest.
36 //
37 // Segment trees are a very powerful data structure allowing O(log(n)) aggregate
38 // queries to be performed on an arbitrary range of elements in an array.
39 // Specifically, for `T x[n]`, and an associative and commutative operation
40 // AggOp (e.g. +, *, min, max, etc.), segment trees can compute
41 // ```
42 //   T y = AggOp()(x[i], x[i + 1], x[i + 2], ..., x[j])
43 // ```
44 // in O(log(n)) time.
45 //
46 // Practically, in trace processor, this is useful for computing aggregations
47 // over events in a trace. For example:
48 // ```
49 // struct Slice { int64_t ts; int64_t dur; };
50 // struct MaxDurSlice {
51 //   Slice operator()(const Slice& a, const Slice& b) {
52 //     return a.dur < b.dur ? b : a;
53 //   }
54 // }
55 // using MipMap = ImplicitSegmentForest<Slice, MaxDurSlice>;
56 // ```
57 // allows building a "mipmap" [2] of a track in a trace in a UI. The UI can show
58 // a representation of the items in the track when very zoomed out while
59 // skipping the rendering slices which are smaller than one pixel.
60 //
61 // The design and implementation of this class takes heavy inspiration from
62 // Tristan Hume's "IForestIndex" data structure [3] as described in his blog
63 // post [4].
64 //
65 // [1] https://en.algorithmica.org/hpc/data-structures/segment-trees/
66 // [2] https://en.wikipedia.org/wiki/Mipmap
67 // [3]
68 // https://github.com/trishume/gigatrace/blob/dfde0d7244f356bdc9aeefb387d904dd8b09d94a/src/iforest.rs
69 // [4] https://thume.ca/2021/03/14/iforests/
70 template <typename T, typename AggOp>
71 class ImplicitSegmentForest {
72  public:
73   // Computes the aggregation (as specified by operator() in AggOp) over all
74   // elements in the tree between the indices [start, end). Requires that
75   // start < end.
76   //
77   // Complexity:
78   // This function performs O(log(n)) operations (n = end - start).
79   //
80   // Returns:
81   //  1) values[start]: if start + 1 == end
82   //  2) AggOp()(values[start], ..., values[end - 1]) otherwise
Query(uint32_t start,uint32_t end)83   T Query(uint32_t start, uint32_t end) const {
84     PERFETTO_DCHECK(start < end);
85 
86     const uint32_t in_start = start * 2;
87     const uint32_t in_end = end * 2;
88 
89     uint32_t first_skip = LargestPrefixInsideSkip(in_start, in_end);
90     T aggregated = values_[AggNode(in_start, first_skip)];
91     for (uint32_t i = in_start + first_skip; i < in_end;) {
92       uint32_t skip = LargestPrefixInsideSkip(i, in_end);
93       aggregated = AggOp()(aggregated, values_[AggNode(i, skip)]);
94       i += skip;
95     }
96     return aggregated;
97   }
98 
99   // Pushes a new element to right-most part of the tree. This index of this
100   // element can be used in future calls to |Query|.
Push(T v)101   void Push(T v) {
102     values_.emplace_back(std::move(v));
103 
104     size_t len = values_.size();
105     auto levels_to_index =
106         static_cast<uint32_t>(Tzcnt(static_cast<uint64_t>(~len))) - 1;
107 
108     size_t cur = len - 1;
109     for (uint32_t level = 0; level < levels_to_index; ++level) {
110       size_t prev_higher_level = cur - (1 << level);
111       values_[prev_higher_level] =
112           AggOp()(values_[prev_higher_level], values_[cur]);
113       cur = prev_higher_level;
114     }
115     values_.emplace_back(values_[len - (1 << levels_to_index)]);
116   }
117 
118   // Returns the value at |n| in the tree: this corresponds to the |n|th
119   // element |Push|-ed into the tree.
120   const T& operator[](uint32_t n) { return values_[n * 2]; }
121 
122   // Returns the number of elements pushed into the forest.
size()123   uint32_t size() const { return static_cast<uint32_t>(values_.size() / 2); }
124 
125  private:
Lsp(uint32_t x)126   static uint32_t Lsp(uint32_t x) { return x & ~(x - 1); }
Msp(uint32_t x)127   static uint32_t Msp(uint32_t x) {
128     return (1u << (sizeof(x) * 8 - 1)) >> Lzcnt32(x);
129   }
LargestPrefixInsideSkip(uint32_t min,uint32_t max)130   static uint32_t LargestPrefixInsideSkip(uint32_t min, uint32_t max) {
131     return Lsp(min | Msp(max - min));
132   }
AggNode(uint32_t i,uint32_t offset)133   static uint32_t AggNode(uint32_t i, uint32_t offset) {
134     return i + (offset >> 1) - 1;
135   }
136 
Lzcnt32(uint32_t value)137   static PERFETTO_ALWAYS_INLINE uint32_t Lzcnt32(uint32_t value) {
138 #if defined(__GNUC__) || defined(__clang__)
139     return value ? static_cast<uint32_t>(__builtin_clz(value)) : 32u;
140 #else
141     unsigned long out;
142     return _BitScanReverse(&out, value) ? 31 - out : 32u;
143 #endif
144   }
145 
Tzcnt(uint64_t value)146   static PERFETTO_ALWAYS_INLINE uint32_t Tzcnt(uint64_t value) {
147 #if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT)
148     return static_cast<uint32_t>(_tzcnt_u64(value));
149 #elif defined(__GNUC__) || defined(__clang__)
150     return value ? static_cast<uint32_t>(__builtin_ctzll(value)) : 64u;
151 #else
152     unsigned long out;
153     return _BitScanForward64(&out, value) ? static_cast<uint32_t>(out) : 64u;
154 #endif
155   }
156 
157   std::vector<T> values_;
158 };
159 
160 }  // namespace perfetto::trace_processor
161 
162 #endif  // SRC_TRACE_PROCESSOR_CONTAINERS_IMPLICIT_SEGMENT_FOREST_H_
163