1-- 2-- Copyright 2024 The Android Open Source Project 3-- 4-- Licensed under the Apache License, Version 2.0 (the "License"); 5-- you may not use this file except in compliance with the License. 6-- You may obtain a copy of the License at 7-- 8-- https://www.apache.org/licenses/LICENSE-2.0 9-- 10-- Unless required by applicable law or agreed to in writing, software 11-- distributed under the License is distributed on an "AS IS" BASIS, 12-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13-- See the License for the specific language governing permissions and 14-- limitations under the License. 15 16-- Computes the "reachable" set of nodes in a directed graph from a given set 17-- of starting nodes by performing a depth-first search on the graph. The 18-- returned nodes are structured as a tree with parent-child relationships 19-- corresponding to the order in which nodes were encountered by the DFS. 20-- 21-- While this macro can be used directly by end users (hence being public), 22-- it is primarily intended as a lower-level building block upon which higher 23-- level functions/macros in the standard library can be built. 24-- 25-- Example usage on traces containing heap graphs: 26-- ``` 27-- -- Compute the reachable nodes from the first heap root. 28-- SELECT * 29-- FROM graph_reachable_dfs!( 30-- ( 31-- SELECT 32-- owner_id AS source_node_id, 33-- owned_id as dest_node_id 34-- FROM heap_graph_reference 35-- WHERE owned_id IS NOT NULL 36-- ), 37-- (SELECT id FROM heap_graph_object WHERE root_type IS NOT NULL) 38-- ); 39-- ``` 40CREATE PERFETTO MACRO graph_reachable_dfs( 41 -- A table/view/subquery corresponding to a directed graph on which the 42 -- reachability search should be performed. This table must have the columns 43 -- "source_node_id" and "dest_node_id" corresponding to the two nodes on 44 -- either end of the edges in the graph. 45 -- 46 -- Note: the columns must contain uint32 similar to ids in trace processor 47 -- tables (i.e. the values should be relatively dense and close to zero). The 48 -- implementation makes assumptions on this for performance reasons and, if 49 -- this criteria is not, can lead to enormous amounts of memory being 50 -- allocated. 51 graph_table TableOrSubquery, 52 -- A table/view/subquery corresponding to the list of start nodes for 53 -- the BFS. This table must have a single column "node_id". 54 start_nodes TableOrSubquery 55) 56-- The returned table has the schema (node_id LONG, parent_node_id LONG). 57-- |node_id| is the id of the node from the input graph and |parent_node_id| 58-- is the id of the node which was the first encountered predecessor in a DFS 59-- search of the graph. 60RETURNS TableOrSubquery AS 61( 62 -- Rename the generic columns of __intrinsic_table_ptr to the actual columns. 63 SELECT 64 c0 AS node_id, 65 c1 AS parent_node_id 66 FROM __intrinsic_table_ptr( 67 __intrinsic_dfs( 68 ( 69 SELECT 70 __intrinsic_graph_agg(g.source_node_id, g.dest_node_id) 71 FROM $graph_table AS g 72 ), 73 ( 74 SELECT 75 __intrinsic_array_agg(t.node_id) AS arr 76 FROM $start_nodes AS t 77 ) 78 ) 79 ) 80 -- Bind the dynamic columns in the |__intrinsic_table_ptr| to the columns of 81 -- the DFS table. 82 WHERE 83 __intrinsic_table_ptr_bind(c0, 'node_id') 84 AND __intrinsic_table_ptr_bind(c1, 'parent_node_id') 85); 86 87-- Computes the "reachable" set of nodes in a directed graph from a given 88-- starting node by performing a breadth-first search on the graph. The returned 89-- nodes are structured as a tree with parent-child relationships corresponding 90-- to the order in which nodes were encountered by the BFS. 91-- 92-- While this macro can be used directly by end users (hence being public), 93-- it is primarily intended as a lower-level building block upon which higher 94-- level functions/macros in the standard library can be built. 95-- 96-- Example usage on traces containing heap graphs: 97-- ``` 98-- -- Compute the reachable nodes from all heap roots. 99-- SELECT * 100-- FROM graph_reachable_bfs!( 101-- ( 102-- SELECT 103-- owner_id AS source_node_id, 104-- owned_id as dest_node_id 105-- FROM heap_graph_reference 106-- WHERE owned_id IS NOT NULL 107-- ), 108-- (SELECT id FROM heap_graph_object WHERE root_type IS NOT NULL) 109-- ); 110-- ``` 111CREATE PERFETTO MACRO graph_reachable_bfs( 112 -- A table/view/subquery corresponding to a directed graph on which the 113 -- reachability search should be performed. This table must have the columns 114 -- "source_node_id" and "dest_node_id" corresponding to the two nodes on 115 -- either end of the edges in the graph. 116 -- 117 -- Note: the columns must contain uint32 similar to ids in trace processor 118 -- tables (i.e. the values should be relatively dense and close to zero). The 119 -- implementation makes assumptions on this for performance reasons and, if 120 -- this criteria is not, can lead to enormous amounts of memory being 121 -- allocated. 122 graph_table TableOrSubquery, 123 -- A table/view/subquery corresponding to the list of start nodes for 124 -- the BFS. This table must have a single column "node_id". 125 start_nodes TableOrSubquery 126) 127-- The returned table has the schema (node_id LONG, parent_node_id LONG). 128-- |node_id| is the id of the node from the input graph and |parent_node_id| 129-- is the id of the node which was the first encountered predecessor in a BFS 130-- search of the graph. 131RETURNS TableOrSubquery AS 132( 133 -- Rename the generic columns of __intrinsic_table_ptr to the actual columns. 134 SELECT 135 c0 AS node_id, 136 c1 AS parent_node_id 137 FROM __intrinsic_table_ptr( 138 __intrinsic_bfs( 139 ( 140 SELECT 141 __intrinsic_graph_agg(g.source_node_id, g.dest_node_id) 142 FROM $graph_table AS g 143 ), 144 ( 145 SELECT 146 __intrinsic_array_agg(t.node_id) AS arr 147 FROM $start_nodes AS t 148 ) 149 ) 150 ) 151 -- Bind the dynamic columns in the |__intrinsic_table_ptr| to the columns of 152 -- the DFS table. 153 WHERE 154 __intrinsic_table_ptr_bind(c0, 'node_id') 155 AND __intrinsic_table_ptr_bind(c1, 'parent_node_id') 156); 157 158-- Computes the next sibling node in a directed graph. The next node under a parent node 159-- is determined by on the |sort_key|, which should be unique for every node under a parent. 160-- The order of the next sibling is undefined if the |sort_key| is not unique. 161-- 162-- Example usage: 163-- ``` 164-- -- Compute the next sibling: 165-- SELECT * 166-- FROM graph_next_sibling!( 167-- ( 168-- SELECT 169-- id AS node_id, 170-- parent_id AS node_parent_id, 171-- ts AS sort_key 172-- FROM slice 173-- ) 174-- ); 175-- ``` 176CREATE PERFETTO MACRO graph_next_sibling( 177 -- A table/view/subquery corresponding to a directed graph for which to find the next sibling. 178 -- This table must have the columns "node_id", "node_parent_id" and "sort_key". 179 graph_table TableOrSubquery 180) 181-- The returned table has the schema (node_id LONG, next_node_id LONG). 182-- |node_id| is the id of the node from the input graph and |next_node_id| 183-- is the id of the node which is its next sibling. 184RETURNS TableOrSubquery AS 185( 186 SELECT 187 node_id, 188 lead(node_id) OVER (PARTITION BY node_parent_id ORDER BY sort_key) AS next_node_id 189 FROM $graph_table 190); 191 192-- Computes the "reachable" set of nodes in a directed graph from a set of 193-- starting (root) nodes by performing a depth-first search from each root node on the graph. 194-- The search is bounded by the sum of edge weights on the path and the root node specifies the 195-- max weight (inclusive) allowed before stopping the search. 196-- The returned nodes are structured as a tree with parent-child relationships corresponding 197-- to the order in which nodes were encountered by the DFS. Each row also has the root node from 198-- which where the edge was encountered. 199-- 200-- While this macro can be used directly by end users (hence being public), 201-- it is primarily intended as a lower-level building block upon which higher 202-- level functions/macros in the standard library can be built. 203-- 204-- Example usage on traces with sched info: 205-- ``` 206-- -- Compute the reachable nodes from a sched wakeup chain 207-- INCLUDE PERFETTO MODULE sched.thread_executing_spans; 208-- 209-- SELECT * 210-- FROM 211-- graph_reachable_dfs_bounded 212-- !( 213-- ( 214-- SELECT 215-- id AS source_node_id, 216-- COALESCE(parent_id, id) AS dest_node_id, 217-- id - COALESCE(parent_id, id) AS edge_weight 218-- FROM _wakeup_chain 219-- ), 220-- ( 221-- SELECT 222-- id AS root_node_id, 223-- id - COALESCE(prev_id, id) AS root_target_weight 224-- FROM _wakeup_chain 225-- )); 226-- ``` 227CREATE PERFETTO MACRO graph_reachable_weight_bounded_dfs( 228 -- A table/view/subquery corresponding to a directed graph on which the 229 -- reachability search should be performed. This table must have the columns 230 -- "source_node_id" and "dest_node_id" corresponding to the two nodes on 231 -- either end of the edges in the graph and an "edge_weight" corresponding to the 232 -- weight of the edge between the node. 233 -- 234 -- Note: the columns must contain uint32 similar to ids in trace processor 235 -- tables (i.e. the values should be relatively dense and close to zero). The 236 -- implementation makes assumptions on this for performance reasons and, if 237 -- this criteria is not, can lead to enormous amounts of memory being 238 -- allocated. 239 graph_table TableOrSubquery, 240 -- A table/view/subquery corresponding to start nodes to |graph_table| which will be the 241 -- roots of the reachability trees. This table must have the columns 242 -- "root_node_id" and "root_target_weight" corresponding to the starting node id and the max 243 -- weight allowed on the tree. 244 -- 245 -- Note: the columns must contain uint32 similar to ids in trace processor 246 -- tables (i.e. the values should be relatively dense and close to zero). The 247 -- implementation makes assumptions on this for performance reasons and, if 248 -- this criteria is not, can lead to enormous amounts of memory being 249 -- allocated. 250 root_table TableOrSubquery, 251 -- Whether the target_weight is a floor weight or ceiling weight. 252 -- If it's floor, the search stops right after we exceed the target weight, and we 253 -- include the node that pushed just passed the target. If ceiling, the search stops 254 -- right before the target weight and the node that would have pushed us passed the 255 -- target is not included. 256 is_target_weight_floor Expr 257) 258-- The returned table has the schema (root_node_id, node_id LONG, parent_node_id LONG). 259-- |root_node_id| is the id of the starting node under which this edge was encountered. 260-- |node_id| is the id of the node from the input graph and |parent_node_id| 261-- is the id of the node which was the first encountered predecessor in a DFS 262-- search of the graph. 263RETURNS TableOrSubquery AS 264( 265 WITH 266 __temp_graph_table AS ( 267 SELECT 268 * 269 FROM $graph_table 270 ), 271 __temp_root_table AS ( 272 SELECT 273 * 274 FROM $root_table 275 ) 276 SELECT 277 dt.root_node_id, 278 dt.node_id, 279 dt.parent_node_id 280 FROM __intrinsic_dfs_weight_bounded( 281 ( 282 SELECT 283 repeatedfield(source_node_id) 284 FROM __temp_graph_table 285 ), 286 ( 287 SELECT 288 repeatedfield(dest_node_id) 289 FROM __temp_graph_table 290 ), 291 ( 292 SELECT 293 repeatedfield(edge_weight) 294 FROM __temp_graph_table 295 ), 296 ( 297 SELECT 298 repeatedfield(root_node_id) 299 FROM __temp_root_table 300 ), 301 ( 302 SELECT 303 repeatedfield(root_target_weight) 304 FROM __temp_root_table 305 ), 306 $is_target_weight_floor 307 ) AS dt 308); 309