• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1--
2-- Copyright 2024 The Android Open Source Project
3--
4-- Licensed under the Apache License, Version 2.0 (the "License");
5-- you may not use this file except in compliance with the License.
6-- You may obtain a copy of the License at
7--
8--     https://www.apache.org/licenses/LICENSE-2.0
9--
10-- Unless required by applicable law or agreed to in writing, software
11-- distributed under the License is distributed on an "AS IS" BASIS,
12-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13-- See the License for the specific language governing permissions and
14-- limitations under the License.
15
16-- Computes the "reachable" set of nodes in a directed graph from a given set
17-- of starting nodes by performing a depth-first search on the graph. The
18-- returned nodes are structured as a tree with parent-child relationships
19-- corresponding to the order in which nodes were encountered by the DFS.
20--
21-- While this macro can be used directly by end users (hence being public),
22-- it is primarily intended as a lower-level building block upon which higher
23-- level functions/macros in the standard library can be built.
24--
25-- Example usage on traces containing heap graphs:
26-- ```
27-- -- Compute the reachable nodes from the first heap root.
28-- SELECT *
29-- FROM graph_reachable_dfs!(
30--   (
31--     SELECT
32--       owner_id AS source_node_id,
33--       owned_id as dest_node_id
34--     FROM heap_graph_reference
35--     WHERE owned_id IS NOT NULL
36--   ),
37--   (SELECT id FROM heap_graph_object WHERE root_type IS NOT NULL)
38-- );
39-- ```
40CREATE PERFETTO MACRO graph_reachable_dfs(
41    -- A table/view/subquery corresponding to a directed graph on which the
42    -- reachability search should be performed. This table must have the columns
43    -- "source_node_id" and "dest_node_id" corresponding to the two nodes on
44    -- either end of the edges in the graph.
45    --
46    -- Note: the columns must contain uint32 similar to ids in trace processor
47    -- tables (i.e. the values should be relatively dense and close to zero). The
48    -- implementation makes assumptions on this for performance reasons and, if
49    -- this criteria is not, can lead to enormous amounts of memory being
50    -- allocated.
51    graph_table TableOrSubquery,
52    -- A table/view/subquery corresponding to the list of start nodes for
53    -- the BFS. This table must have a single column "node_id".
54    start_nodes TableOrSubquery
55)
56-- The returned table has the schema (node_id LONG, parent_node_id LONG).
57-- |node_id| is the id of the node from the input graph and |parent_node_id|
58-- is the id of the node which was the first encountered predecessor in a DFS
59-- search of the graph.
60RETURNS TableOrSubquery AS
61(
62  -- Rename the generic columns of __intrinsic_table_ptr to the actual columns.
63  SELECT
64    c0 AS node_id,
65    c1 AS parent_node_id
66  FROM __intrinsic_table_ptr(
67    __intrinsic_dfs(
68      (
69        SELECT
70          __intrinsic_graph_agg(g.source_node_id, g.dest_node_id)
71        FROM $graph_table AS g
72      ),
73      (
74        SELECT
75          __intrinsic_array_agg(t.node_id) AS arr
76        FROM $start_nodes AS t
77      )
78    )
79  )
80  -- Bind the dynamic columns in the |__intrinsic_table_ptr| to the columns of
81  -- the DFS table.
82  WHERE
83    __intrinsic_table_ptr_bind(c0, 'node_id')
84    AND __intrinsic_table_ptr_bind(c1, 'parent_node_id')
85);
86
87-- Computes the "reachable" set of nodes in a directed graph from a given
88-- starting node by performing a breadth-first search on the graph. The returned
89-- nodes are structured as a tree with parent-child relationships corresponding
90-- to the order in which nodes were encountered by the BFS.
91--
92-- While this macro can be used directly by end users (hence being public),
93-- it is primarily intended as a lower-level building block upon which higher
94-- level functions/macros in the standard library can be built.
95--
96-- Example usage on traces containing heap graphs:
97-- ```
98-- -- Compute the reachable nodes from all heap roots.
99-- SELECT *
100-- FROM graph_reachable_bfs!(
101--   (
102--     SELECT
103--       owner_id AS source_node_id,
104--       owned_id as dest_node_id
105--     FROM heap_graph_reference
106--     WHERE owned_id IS NOT NULL
107--   ),
108--   (SELECT id FROM heap_graph_object WHERE root_type IS NOT NULL)
109-- );
110-- ```
111CREATE PERFETTO MACRO graph_reachable_bfs(
112    -- A table/view/subquery corresponding to a directed graph on which the
113    -- reachability search should be performed. This table must have the columns
114    -- "source_node_id" and "dest_node_id" corresponding to the two nodes on
115    -- either end of the edges in the graph.
116    --
117    -- Note: the columns must contain uint32 similar to ids in trace processor
118    -- tables (i.e. the values should be relatively dense and close to zero). The
119    -- implementation makes assumptions on this for performance reasons and, if
120    -- this criteria is not, can lead to enormous amounts of memory being
121    -- allocated.
122    graph_table TableOrSubquery,
123    -- A table/view/subquery corresponding to the list of start nodes for
124    -- the BFS. This table must have a single column "node_id".
125    start_nodes TableOrSubquery
126)
127-- The returned table has the schema (node_id LONG, parent_node_id LONG).
128-- |node_id| is the id of the node from the input graph and |parent_node_id|
129-- is the id of the node which was the first encountered predecessor in a BFS
130-- search of the graph.
131RETURNS TableOrSubquery AS
132(
133  -- Rename the generic columns of __intrinsic_table_ptr to the actual columns.
134  SELECT
135    c0 AS node_id,
136    c1 AS parent_node_id
137  FROM __intrinsic_table_ptr(
138    __intrinsic_bfs(
139      (
140        SELECT
141          __intrinsic_graph_agg(g.source_node_id, g.dest_node_id)
142        FROM $graph_table AS g
143      ),
144      (
145        SELECT
146          __intrinsic_array_agg(t.node_id) AS arr
147        FROM $start_nodes AS t
148      )
149    )
150  )
151  -- Bind the dynamic columns in the |__intrinsic_table_ptr| to the columns of
152  -- the DFS table.
153  WHERE
154    __intrinsic_table_ptr_bind(c0, 'node_id')
155    AND __intrinsic_table_ptr_bind(c1, 'parent_node_id')
156);
157
158-- Computes the next sibling node in a directed graph. The next node under a parent node
159-- is determined by on the |sort_key|, which should be unique for every node under a parent.
160-- The order of the next sibling is undefined if the |sort_key| is not unique.
161--
162-- Example usage:
163-- ```
164-- -- Compute the next sibling:
165-- SELECT *
166-- FROM graph_next_sibling!(
167--   (
168--     SELECT
169--       id AS node_id,
170--       parent_id AS node_parent_id,
171--       ts AS sort_key
172--     FROM slice
173--   )
174-- );
175-- ```
176CREATE PERFETTO MACRO graph_next_sibling(
177    -- A table/view/subquery corresponding to a directed graph for which to find the next sibling.
178    -- This table must have the columns "node_id", "node_parent_id" and "sort_key".
179    graph_table TableOrSubquery
180)
181-- The returned table has the schema (node_id LONG, next_node_id LONG).
182-- |node_id| is the id of the node from the input graph and |next_node_id|
183-- is the id of the node which is its next sibling.
184RETURNS TableOrSubquery AS
185(
186  SELECT
187    node_id,
188    lead(node_id) OVER (PARTITION BY node_parent_id ORDER BY sort_key) AS next_node_id
189  FROM $graph_table
190);
191
192-- Computes the "reachable" set of nodes in a directed graph from a set of
193-- starting (root) nodes by performing a depth-first search from each root node on the graph.
194-- The search is bounded by the sum of edge weights on the path and the root node specifies the
195-- max weight (inclusive) allowed before stopping the search.
196-- The returned nodes are structured as a tree with parent-child relationships corresponding
197-- to the order in which nodes were encountered by the DFS. Each row also has the root node from
198-- which where the edge was encountered.
199--
200-- While this macro can be used directly by end users (hence being public),
201-- it is primarily intended as a lower-level building block upon which higher
202-- level functions/macros in the standard library can be built.
203--
204-- Example usage on traces with sched info:
205-- ```
206-- -- Compute the reachable nodes from a sched wakeup chain
207-- INCLUDE PERFETTO MODULE sched.thread_executing_spans;
208--
209-- SELECT *
210-- FROM
211--   graph_reachable_dfs_bounded
212--    !(
213--      (
214--        SELECT
215--          id AS source_node_id,
216--          COALESCE(parent_id, id) AS dest_node_id,
217--          id - COALESCE(parent_id, id) AS edge_weight
218--        FROM _wakeup_chain
219--      ),
220--      (
221--        SELECT
222--          id AS root_node_id,
223--          id - COALESCE(prev_id, id) AS root_target_weight
224--        FROM _wakeup_chain
225--      ));
226-- ```
227CREATE PERFETTO MACRO graph_reachable_weight_bounded_dfs(
228    -- A table/view/subquery corresponding to a directed graph on which the
229    -- reachability search should be performed. This table must have the columns
230    -- "source_node_id" and "dest_node_id" corresponding to the two nodes on
231    -- either end of the edges in the graph and an "edge_weight" corresponding to the
232    -- weight of the edge between the node.
233    --
234    -- Note: the columns must contain uint32 similar to ids in trace processor
235    -- tables (i.e. the values should be relatively dense and close to zero). The
236    -- implementation makes assumptions on this for performance reasons and, if
237    -- this criteria is not, can lead to enormous amounts of memory being
238    -- allocated.
239    graph_table TableOrSubquery,
240    -- A table/view/subquery corresponding to start nodes to |graph_table| which will be the
241    -- roots of the reachability trees. This table must have the columns
242    -- "root_node_id" and "root_target_weight" corresponding to the starting node id and the max
243    -- weight allowed on the tree.
244    --
245    -- Note: the columns must contain uint32 similar to ids in trace processor
246    -- tables (i.e. the values should be relatively dense and close to zero). The
247    -- implementation makes assumptions on this for performance reasons and, if
248    -- this criteria is not, can lead to enormous amounts of memory being
249    -- allocated.
250    root_table TableOrSubquery,
251    -- Whether the target_weight is a floor weight or ceiling weight.
252    -- If it's floor, the search stops right after we exceed the target weight, and we
253    -- include the node that pushed just passed the target. If ceiling, the search stops
254    -- right before the target weight and the node that would have pushed us passed the
255    -- target is not included.
256    is_target_weight_floor Expr
257)
258-- The returned table has the schema (root_node_id, node_id LONG, parent_node_id LONG).
259-- |root_node_id| is the id of the starting node under which this edge was encountered.
260-- |node_id| is the id of the node from the input graph and |parent_node_id|
261-- is the id of the node which was the first encountered predecessor in a DFS
262-- search of the graph.
263RETURNS TableOrSubquery AS
264(
265  WITH
266    __temp_graph_table AS (
267      SELECT
268        *
269      FROM $graph_table
270    ),
271    __temp_root_table AS (
272      SELECT
273        *
274      FROM $root_table
275    )
276  SELECT
277    dt.root_node_id,
278    dt.node_id,
279    dt.parent_node_id
280  FROM __intrinsic_dfs_weight_bounded(
281    (
282      SELECT
283        repeatedfield(source_node_id)
284      FROM __temp_graph_table
285    ),
286    (
287      SELECT
288        repeatedfield(dest_node_id)
289      FROM __temp_graph_table
290    ),
291    (
292      SELECT
293        repeatedfield(edge_weight)
294      FROM __temp_graph_table
295    ),
296    (
297      SELECT
298        repeatedfield(root_node_id)
299      FROM __temp_root_table
300    ),
301    (
302      SELECT
303        repeatedfield(root_target_weight)
304      FROM __temp_root_table
305    ),
306    $is_target_weight_floor
307  ) AS dt
308);
309