1 /* 2 * Copyright (C) 2024 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_IMPLICIT_SEGMENT_FOREST_H_ 18 #define SRC_TRACE_PROCESSOR_CONTAINERS_IMPLICIT_SEGMENT_FOREST_H_ 19 20 #include <cstddef> 21 #include <cstdint> 22 #include <vector> 23 24 #include "perfetto/base/logging.h" 25 26 #if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT) 27 #include <immintrin.h> 28 #endif 29 30 namespace perfetto::trace_processor { 31 32 // An implementation of a segment tree data structure [1] with: 33 // 1) parent-child relationships are implicit, saving memory. 34 // 2) the requirement for the number of values being a power of two, turning 35 // the tree into a forest. 36 // 37 // Segment trees are a very powerful data structure allowing O(log(n)) aggregate 38 // queries to be performed on an arbitrary range of elements in an array. 39 // Specifically, for `T x[n]`, and an associative and commutative operation 40 // AggOp (e.g. +, *, min, max, etc.), segment trees can compute 41 // ``` 42 // T y = AggOp()(x[i], x[i + 1], x[i + 2], ..., x[j]) 43 // ``` 44 // in O(log(n)) time. 45 // 46 // Practically, in trace processor, this is useful for computing aggregations 47 // over events in a trace. For example: 48 // ``` 49 // struct Slice { int64_t ts; int64_t dur; }; 50 // struct MaxDurSlice { 51 // Slice operator()(const Slice& a, const Slice& b) { 52 // return a.dur < b.dur ? b : a; 53 // } 54 // } 55 // using MipMap = ImplicitSegmentForest<Slice, MaxDurSlice>; 56 // ``` 57 // allows building a "mipmap" [2] of a track in a trace in a UI. The UI can show 58 // a representation of the items in the track when very zoomed out while 59 // skipping the rendering slices which are smaller than one pixel. 60 // 61 // The design and implementation of this class takes heavy inspiration from 62 // Tristan Hume's "IForestIndex" data structure [3] as described in his blog 63 // post [4]. 64 // 65 // [1] https://en.algorithmica.org/hpc/data-structures/segment-trees/ 66 // [2] https://en.wikipedia.org/wiki/Mipmap 67 // [3] 68 // https://github.com/trishume/gigatrace/blob/dfde0d7244f356bdc9aeefb387d904dd8b09d94a/src/iforest.rs 69 // [4] https://thume.ca/2021/03/14/iforests/ 70 template <typename T, typename AggOp> 71 class ImplicitSegmentForest { 72 public: 73 // Computes the aggregation (as specified by operator() in AggOp) over all 74 // elements in the tree between the indices [start, end). Requires that 75 // start < end. 76 // 77 // Complexity: 78 // This function performs O(log(n)) operations (n = end - start). 79 // 80 // Returns: 81 // 1) values[start]: if start + 1 == end 82 // 2) AggOp()(values[start], ..., values[end - 1]) otherwise Query(uint32_t start,uint32_t end)83 T Query(uint32_t start, uint32_t end) const { 84 PERFETTO_DCHECK(start < end); 85 86 const uint32_t in_start = start * 2; 87 const uint32_t in_end = end * 2; 88 89 uint32_t first_skip = LargestPrefixInsideSkip(in_start, in_end); 90 T aggregated = values_[AggNode(in_start, first_skip)]; 91 for (uint32_t i = in_start + first_skip; i < in_end;) { 92 uint32_t skip = LargestPrefixInsideSkip(i, in_end); 93 aggregated = AggOp()(aggregated, values_[AggNode(i, skip)]); 94 i += skip; 95 } 96 return aggregated; 97 } 98 99 // Pushes a new element to right-most part of the tree. This index of this 100 // element can be used in future calls to |Query|. Push(T v)101 void Push(T v) { 102 values_.emplace_back(std::move(v)); 103 104 size_t len = values_.size(); 105 auto levels_to_index = 106 static_cast<uint32_t>(Tzcnt(static_cast<uint64_t>(~len))) - 1; 107 108 size_t cur = len - 1; 109 for (uint32_t level = 0; level < levels_to_index; ++level) { 110 size_t prev_higher_level = cur - (1 << level); 111 values_[prev_higher_level] = 112 AggOp()(values_[prev_higher_level], values_[cur]); 113 cur = prev_higher_level; 114 } 115 values_.emplace_back(values_[len - (1 << levels_to_index)]); 116 } 117 118 // Returns the value at |n| in the tree: this corresponds to the |n|th 119 // element |Push|-ed into the tree. 120 const T& operator[](uint32_t n) { return values_[n * 2]; } 121 122 // Returns the number of elements pushed into the forest. size()123 uint32_t size() const { return static_cast<uint32_t>(values_.size() / 2); } 124 125 private: Lsp(uint32_t x)126 static uint32_t Lsp(uint32_t x) { return x & ~(x - 1); } Msp(uint32_t x)127 static uint32_t Msp(uint32_t x) { 128 return (1u << (sizeof(x) * 8 - 1)) >> Lzcnt32(x); 129 } LargestPrefixInsideSkip(uint32_t min,uint32_t max)130 static uint32_t LargestPrefixInsideSkip(uint32_t min, uint32_t max) { 131 return Lsp(min | Msp(max - min)); 132 } AggNode(uint32_t i,uint32_t offset)133 static uint32_t AggNode(uint32_t i, uint32_t offset) { 134 return i + (offset >> 1) - 1; 135 } 136 Lzcnt32(uint32_t value)137 static PERFETTO_ALWAYS_INLINE uint32_t Lzcnt32(uint32_t value) { 138 #if defined(__GNUC__) || defined(__clang__) 139 return value ? static_cast<uint32_t>(__builtin_clz(value)) : 32u; 140 #else 141 unsigned long out; 142 return _BitScanReverse(&out, value) ? 31 - out : 32u; 143 #endif 144 } 145 Tzcnt(uint64_t value)146 static PERFETTO_ALWAYS_INLINE uint32_t Tzcnt(uint64_t value) { 147 #if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT) 148 return static_cast<uint32_t>(_tzcnt_u64(value)); 149 #elif defined(__GNUC__) || defined(__clang__) 150 return value ? static_cast<uint32_t>(__builtin_ctzll(value)) : 64u; 151 #else 152 unsigned long out; 153 return _BitScanForward64(&out, value) ? static_cast<uint32_t>(out) : 64u; 154 #endif 155 } 156 157 std::vector<T> values_; 158 }; 159 160 } // namespace perfetto::trace_processor 161 162 #endif // SRC_TRACE_PROCESSOR_CONTAINERS_IMPLICIT_SEGMENT_FOREST_H_ 163