1 /*
2 * Copyright © 2017 Rob Clark <robclark@freedesktop.org>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <robclark@freedesktop.org>
7 */
8
9 /* NOTE: see https://gitlab.freedesktop.org/freedreno/freedreno/-/wikis/A5xx-Queries */
10
11 #include "freedreno_query_acc.h"
12 #include "freedreno_resource.h"
13
14 #include "fd5_context.h"
15 #include "fd5_emit.h"
16 #include "fd5_format.h"
17 #include "fd5_query.h"
18
19 struct PACKED fd5_query_sample {
20 struct fd_acc_query_sample base;
21
22 /* The RB_SAMPLE_COUNT_ADDR destination needs to be 16-byte aligned: */
23 uint64_t pad;
24
25 uint64_t start;
26 uint64_t result;
27 uint64_t stop;
28 };
29 FD_DEFINE_CAST(fd_acc_query_sample, fd5_query_sample);
30
31 /* offset of a single field of an array of fd5_query_sample: */
32 #define query_sample_idx(aq, idx, field) \
33 fd_resource((aq)->prsc)->bo, \
34 (idx * sizeof(struct fd5_query_sample)) + \
35 offsetof(struct fd5_query_sample, field), \
36 0, 0
37
38 /* offset of a single field of fd5_query_sample: */
39 #define query_sample(aq, field) query_sample_idx(aq, 0, field)
40
41 /*
42 * Occlusion Query:
43 *
44 * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
45 * interpret results
46 */
47
48 static void
occlusion_resume(struct fd_acc_query * aq,struct fd_batch * batch)49 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
50 assert_dt
51 {
52 struct fd_context *ctx = batch->ctx;
53 struct fd_ringbuffer *ring = batch->draw;
54
55 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
56 OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
57
58 ASSERT_ALIGNED(struct fd5_query_sample, start, 16);
59
60 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
61 OUT_RELOC(ring, query_sample(aq, start));
62
63 fd5_event_write(batch, ring, ZPASS_DONE, false);
64 fd_reset_wfi(batch);
65
66 ctx->occlusion_queries_active++;
67 }
68
69 static void
occlusion_pause(struct fd_acc_query * aq,struct fd_batch * batch)70 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
71 assert_dt
72 {
73 struct fd_context *ctx = batch->ctx;
74 struct fd_ringbuffer *ring = batch->draw;
75
76 OUT_PKT7(ring, CP_MEM_WRITE, 4);
77 OUT_RELOC(ring, query_sample(aq, stop));
78 OUT_RING(ring, 0xffffffff);
79 OUT_RING(ring, 0xffffffff);
80
81 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
82
83 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
84 OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
85
86 ASSERT_ALIGNED(struct fd5_query_sample, stop, 16);
87
88 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
89 OUT_RELOC(ring, query_sample(aq, stop));
90
91 fd5_event_write(batch, ring, ZPASS_DONE, false);
92 fd_reset_wfi(batch);
93
94 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
95 OUT_RING(ring, 0x00000014); // XXX
96 OUT_RELOC(ring, query_sample(aq, stop));
97 OUT_RING(ring, 0xffffffff);
98 OUT_RING(ring, 0xffffffff);
99 OUT_RING(ring, 0x00000010); // XXX
100
101 /* result += stop - start: */
102 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
103 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
104 OUT_RELOC(ring, query_sample(aq, result)); /* dst */
105 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
106 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
107 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
108
109 assert(ctx->occlusion_queries_active > 0);
110 ctx->occlusion_queries_active--;
111 }
112
113 static void
occlusion_counter_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)114 occlusion_counter_result(struct fd_acc_query *aq,
115 struct fd_acc_query_sample *s,
116 union pipe_query_result *result)
117 {
118 struct fd5_query_sample *sp = fd5_query_sample(s);
119 result->u64 = sp->result;
120 }
121
122 static void
occlusion_predicate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)123 occlusion_predicate_result(struct fd_acc_query *aq,
124 struct fd_acc_query_sample *s,
125 union pipe_query_result *result)
126 {
127 struct fd5_query_sample *sp = fd5_query_sample(s);
128 result->b = !!sp->result;
129 }
130
131 static const struct fd_acc_sample_provider occlusion_counter = {
132 .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
133 .size = sizeof(struct fd5_query_sample),
134 .resume = occlusion_resume,
135 .pause = occlusion_pause,
136 .result = occlusion_counter_result,
137 };
138
139 static const struct fd_acc_sample_provider occlusion_predicate = {
140 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
141 .size = sizeof(struct fd5_query_sample),
142 .resume = occlusion_resume,
143 .pause = occlusion_pause,
144 .result = occlusion_predicate_result,
145 };
146
147 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
148 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
149 .size = sizeof(struct fd5_query_sample),
150 .resume = occlusion_resume,
151 .pause = occlusion_pause,
152 .result = occlusion_predicate_result,
153 };
154
155 /*
156 * Timestamp Queries:
157 */
158
159 static void
timestamp_resume(struct fd_acc_query * aq,struct fd_batch * batch)160 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
161 {
162 struct fd_ringbuffer *ring = batch->draw;
163
164 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
165 OUT_RING(ring,
166 CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
167 OUT_RELOC(ring, query_sample(aq, start));
168 OUT_RING(ring, 0x00000000);
169
170 fd_reset_wfi(batch);
171 }
172
173 static void
timestamp_pause(struct fd_acc_query * aq,struct fd_batch * batch)174 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
175 {
176 struct fd_ringbuffer *ring = batch->draw;
177
178 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
179 OUT_RING(ring,
180 CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
181 OUT_RELOC(ring, query_sample(aq, stop));
182 OUT_RING(ring, 0x00000000);
183
184 fd_reset_wfi(batch);
185 fd_wfi(batch, ring);
186
187 /* result += stop - start: */
188 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
189 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
190 OUT_RELOC(ring, query_sample(aq, result)); /* dst */
191 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
192 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
193 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
194 }
195
196 static void
time_elapsed_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)197 time_elapsed_accumulate_result(struct fd_acc_query *aq,
198 struct fd_acc_query_sample *s,
199 union pipe_query_result *result)
200 {
201 struct fd5_query_sample *sp = fd5_query_sample(s);
202 result->u64 = ticks_to_ns(sp->result);
203 }
204
205 static void
timestamp_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)206 timestamp_accumulate_result(struct fd_acc_query *aq,
207 struct fd_acc_query_sample *s,
208 union pipe_query_result *result)
209 {
210 struct fd5_query_sample *sp = fd5_query_sample(s);
211 result->u64 = ticks_to_ns(sp->result);
212 }
213
214 static const struct fd_acc_sample_provider time_elapsed = {
215 .query_type = PIPE_QUERY_TIME_ELAPSED,
216 .always = true,
217 .size = sizeof(struct fd5_query_sample),
218 .resume = timestamp_resume,
219 .pause = timestamp_pause,
220 .result = time_elapsed_accumulate_result,
221 };
222
223 /* NOTE: timestamp query isn't going to give terribly sensible results
224 * on a tiler. But it is needed by qapitrace profile heatmap. If you
225 * add in a binning pass, the results get even more non-sensical. So
226 * we just return the timestamp on the first tile and hope that is
227 * kind of good enough.
228 */
229
230 static const struct fd_acc_sample_provider timestamp = {
231 .query_type = PIPE_QUERY_TIMESTAMP,
232 .always = true,
233 .size = sizeof(struct fd5_query_sample),
234 .resume = timestamp_resume,
235 .pause = timestamp_pause,
236 .result = timestamp_accumulate_result,
237 };
238
239 /*
240 * Performance Counter (batch) queries:
241 *
242 * Only one of these is active at a time, per design of the gallium
243 * batch_query API design. On perfcntr query tracks N query_types,
244 * each of which has a 'fd_batch_query_entry' that maps it back to
245 * the associated group and counter.
246 */
247
248 struct fd_batch_query_entry {
249 uint8_t gid; /* group-id */
250 uint8_t cid; /* countable-id within the group */
251 };
252
253 struct fd_batch_query_data {
254 struct fd_screen *screen;
255 unsigned num_query_entries;
256 struct fd_batch_query_entry query_entries[];
257 };
258
259 static void
perfcntr_resume(struct fd_acc_query * aq,struct fd_batch * batch)260 perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
261 {
262 struct fd_batch_query_data *data = aq->query_data;
263 struct fd_screen *screen = data->screen;
264 struct fd_ringbuffer *ring = batch->draw;
265
266 unsigned counters_per_group[screen->num_perfcntr_groups];
267 memset(counters_per_group, 0, sizeof(counters_per_group));
268
269 fd_wfi(batch, ring);
270
271 /* configure performance counters for the requested queries: */
272 for (unsigned i = 0; i < data->num_query_entries; i++) {
273 struct fd_batch_query_entry *entry = &data->query_entries[i];
274 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
275 unsigned counter_idx = counters_per_group[entry->gid]++;
276
277 assert(counter_idx < g->num_counters);
278
279 OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
280 OUT_RING(ring, g->countables[entry->cid].selector);
281 }
282
283 memset(counters_per_group, 0, sizeof(counters_per_group));
284
285 /* and snapshot the start values */
286 for (unsigned i = 0; i < data->num_query_entries; i++) {
287 struct fd_batch_query_entry *entry = &data->query_entries[i];
288 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
289 unsigned counter_idx = counters_per_group[entry->gid]++;
290 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
291
292 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
293 OUT_RING(ring, CP_REG_TO_MEM_0_64B |
294 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
295 OUT_RELOC(ring, query_sample_idx(aq, i, start));
296 }
297 }
298
299 static void
perfcntr_pause(struct fd_acc_query * aq,struct fd_batch * batch)300 perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
301 {
302 struct fd_batch_query_data *data = aq->query_data;
303 struct fd_screen *screen = data->screen;
304 struct fd_ringbuffer *ring = batch->draw;
305
306 unsigned counters_per_group[screen->num_perfcntr_groups];
307 memset(counters_per_group, 0, sizeof(counters_per_group));
308
309 fd_wfi(batch, ring);
310
311 /* TODO do we need to bother to turn anything off? */
312
313 /* snapshot the end values: */
314 for (unsigned i = 0; i < data->num_query_entries; i++) {
315 struct fd_batch_query_entry *entry = &data->query_entries[i];
316 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
317 unsigned counter_idx = counters_per_group[entry->gid]++;
318 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
319
320 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
321 OUT_RING(ring, CP_REG_TO_MEM_0_64B |
322 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
323 OUT_RELOC(ring, query_sample_idx(aq, i, stop));
324 }
325
326 /* and compute the result: */
327 for (unsigned i = 0; i < data->num_query_entries; i++) {
328 /* result += stop - start: */
329 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
330 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
331 OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
332 OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
333 OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
334 OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
335 }
336 }
337
338 static void
perfcntr_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)339 perfcntr_accumulate_result(struct fd_acc_query *aq,
340 struct fd_acc_query_sample *s,
341 union pipe_query_result *result)
342 {
343 struct fd_batch_query_data *data = aq->query_data;
344 struct fd5_query_sample *sp = fd5_query_sample(s);
345
346 for (unsigned i = 0; i < data->num_query_entries; i++) {
347 result->batch[i].u64 = sp[i].result;
348 }
349 }
350
351 static const struct fd_acc_sample_provider perfcntr = {
352 .query_type = FD_QUERY_FIRST_PERFCNTR,
353 .always = true,
354 .resume = perfcntr_resume,
355 .pause = perfcntr_pause,
356 .result = perfcntr_accumulate_result,
357 };
358
359 static struct pipe_query *
fd5_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)360 fd5_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
361 unsigned *query_types)
362 {
363 struct fd_context *ctx = fd_context(pctx);
364 struct fd_screen *screen = ctx->screen;
365 struct fd_query *q;
366 struct fd_acc_query *aq;
367 struct fd_batch_query_data *data;
368
369 data = CALLOC_VARIANT_LENGTH_STRUCT(
370 fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
371
372 data->screen = screen;
373 data->num_query_entries = num_queries;
374
375 /* validate the requested query_types and ensure we don't try
376 * to request more query_types of a given group than we have
377 * counters:
378 */
379 unsigned counters_per_group[screen->num_perfcntr_groups];
380 memset(counters_per_group, 0, sizeof(counters_per_group));
381
382 for (unsigned i = 0; i < num_queries; i++) {
383 unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
384
385 /* verify valid query_type, ie. is it actually a perfcntr? */
386 if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
387 (idx >= screen->num_perfcntr_queries)) {
388 mesa_loge("invalid batch query query_type: %u", query_types[i]);
389 goto error;
390 }
391
392 struct fd_batch_query_entry *entry = &data->query_entries[i];
393 struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
394
395 entry->gid = pq->group_id;
396
397 /* the perfcntr_queries[] table flattens all the countables
398 * for each group in series, ie:
399 *
400 * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
401 *
402 * So to find the countable index just step back through the
403 * table to find the first entry with the same group-id.
404 */
405 while (pq > screen->perfcntr_queries) {
406 pq--;
407 if (pq->group_id == entry->gid)
408 entry->cid++;
409 }
410
411 if (counters_per_group[entry->gid] >=
412 screen->perfcntr_groups[entry->gid].num_counters) {
413 mesa_loge("too many counters for group %u\n", entry->gid);
414 goto error;
415 }
416
417 counters_per_group[entry->gid]++;
418 }
419
420 q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
421 aq = fd_acc_query(q);
422
423 /* sample buffer size is based on # of queries: */
424 aq->size = num_queries * sizeof(struct fd5_query_sample);
425 aq->query_data = data;
426
427 return (struct pipe_query *)q;
428
429 error:
430 free(data);
431 return NULL;
432 }
433
434 void
fd5_query_context_init(struct pipe_context * pctx)435 fd5_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
436 {
437 struct fd_context *ctx = fd_context(pctx);
438
439 ctx->create_query = fd_acc_create_query;
440 ctx->query_update_batch = fd_acc_query_update_batch;
441
442 pctx->create_batch_query = fd5_create_batch_query;
443
444 fd_acc_query_register_provider(pctx, &occlusion_counter);
445 fd_acc_query_register_provider(pctx, &occlusion_predicate);
446 fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
447
448 fd_acc_query_register_provider(pctx, &time_elapsed);
449 fd_acc_query_register_provider(pctx, ×tamp);
450 }
451