• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Rob Clark <robclark@freedesktop.org>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <robclark@freedesktop.org>
7  */
8 
9 /* NOTE: see https://gitlab.freedesktop.org/freedreno/freedreno/-/wikis/A5xx-Queries */
10 
11 #include "freedreno_query_acc.h"
12 #include "freedreno_resource.h"
13 
14 #include "fd5_context.h"
15 #include "fd5_emit.h"
16 #include "fd5_format.h"
17 #include "fd5_query.h"
18 
19 struct PACKED fd5_query_sample {
20    struct fd_acc_query_sample base;
21 
22    /* The RB_SAMPLE_COUNT_ADDR destination needs to be 16-byte aligned: */
23    uint64_t pad;
24 
25    uint64_t start;
26    uint64_t result;
27    uint64_t stop;
28 };
29 FD_DEFINE_CAST(fd_acc_query_sample, fd5_query_sample);
30 
31 /* offset of a single field of an array of fd5_query_sample: */
32 #define query_sample_idx(aq, idx, field)                                       \
33    fd_resource((aq)->prsc)->bo,                                                \
34       (idx * sizeof(struct fd5_query_sample)) +                                \
35          offsetof(struct fd5_query_sample, field),                             \
36       0, 0
37 
38 /* offset of a single field of fd5_query_sample: */
39 #define query_sample(aq, field) query_sample_idx(aq, 0, field)
40 
41 /*
42  * Occlusion Query:
43  *
44  * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
45  * interpret results
46  */
47 
48 static void
occlusion_resume(struct fd_acc_query * aq,struct fd_batch * batch)49 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
50    assert_dt
51 {
52    struct fd_context *ctx = batch->ctx;
53    struct fd_ringbuffer *ring = batch->draw;
54 
55    OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
56    OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
57 
58    ASSERT_ALIGNED(struct fd5_query_sample, start, 16);
59 
60    OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
61    OUT_RELOC(ring, query_sample(aq, start));
62 
63    fd5_event_write(batch, ring, ZPASS_DONE, false);
64    fd_reset_wfi(batch);
65 
66    ctx->occlusion_queries_active++;
67 }
68 
69 static void
occlusion_pause(struct fd_acc_query * aq,struct fd_batch * batch)70 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
71    assert_dt
72 {
73    struct fd_context *ctx = batch->ctx;
74    struct fd_ringbuffer *ring = batch->draw;
75 
76    OUT_PKT7(ring, CP_MEM_WRITE, 4);
77    OUT_RELOC(ring, query_sample(aq, stop));
78    OUT_RING(ring, 0xffffffff);
79    OUT_RING(ring, 0xffffffff);
80 
81    OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
82 
83    OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
84    OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
85 
86    ASSERT_ALIGNED(struct fd5_query_sample, stop, 16);
87 
88    OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
89    OUT_RELOC(ring, query_sample(aq, stop));
90 
91    fd5_event_write(batch, ring, ZPASS_DONE, false);
92    fd_reset_wfi(batch);
93 
94    OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
95    OUT_RING(ring, 0x00000014); // XXX
96    OUT_RELOC(ring, query_sample(aq, stop));
97    OUT_RING(ring, 0xffffffff);
98    OUT_RING(ring, 0xffffffff);
99    OUT_RING(ring, 0x00000010); // XXX
100 
101    /* result += stop - start: */
102    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
103    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
104    OUT_RELOC(ring, query_sample(aq, result)); /* dst */
105    OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
106    OUT_RELOC(ring, query_sample(aq, stop));   /* srcB */
107    OUT_RELOC(ring, query_sample(aq, start));  /* srcC */
108 
109    assert(ctx->occlusion_queries_active > 0);
110    ctx->occlusion_queries_active--;
111 }
112 
113 static void
occlusion_counter_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)114 occlusion_counter_result(struct fd_acc_query *aq,
115                          struct fd_acc_query_sample *s,
116                          union pipe_query_result *result)
117 {
118    struct fd5_query_sample *sp = fd5_query_sample(s);
119    result->u64 = sp->result;
120 }
121 
122 static void
occlusion_predicate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)123 occlusion_predicate_result(struct fd_acc_query *aq,
124                            struct fd_acc_query_sample *s,
125                            union pipe_query_result *result)
126 {
127    struct fd5_query_sample *sp = fd5_query_sample(s);
128    result->b = !!sp->result;
129 }
130 
131 static const struct fd_acc_sample_provider occlusion_counter = {
132    .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
133    .size = sizeof(struct fd5_query_sample),
134    .resume = occlusion_resume,
135    .pause = occlusion_pause,
136    .result = occlusion_counter_result,
137 };
138 
139 static const struct fd_acc_sample_provider occlusion_predicate = {
140    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
141    .size = sizeof(struct fd5_query_sample),
142    .resume = occlusion_resume,
143    .pause = occlusion_pause,
144    .result = occlusion_predicate_result,
145 };
146 
147 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
148    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
149    .size = sizeof(struct fd5_query_sample),
150    .resume = occlusion_resume,
151    .pause = occlusion_pause,
152    .result = occlusion_predicate_result,
153 };
154 
155 /*
156  * Timestamp Queries:
157  */
158 
159 static void
timestamp_resume(struct fd_acc_query * aq,struct fd_batch * batch)160 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
161 {
162    struct fd_ringbuffer *ring = batch->draw;
163 
164    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
165    OUT_RING(ring,
166             CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
167    OUT_RELOC(ring, query_sample(aq, start));
168    OUT_RING(ring, 0x00000000);
169 
170    fd_reset_wfi(batch);
171 }
172 
173 static void
timestamp_pause(struct fd_acc_query * aq,struct fd_batch * batch)174 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
175 {
176    struct fd_ringbuffer *ring = batch->draw;
177 
178    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
179    OUT_RING(ring,
180             CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
181    OUT_RELOC(ring, query_sample(aq, stop));
182    OUT_RING(ring, 0x00000000);
183 
184    fd_reset_wfi(batch);
185    fd_wfi(batch, ring);
186 
187    /* result += stop - start: */
188    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
189    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
190    OUT_RELOC(ring, query_sample(aq, result)); /* dst */
191    OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
192    OUT_RELOC(ring, query_sample(aq, stop));   /* srcB */
193    OUT_RELOC(ring, query_sample(aq, start));  /* srcC */
194 }
195 
196 static void
time_elapsed_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)197 time_elapsed_accumulate_result(struct fd_acc_query *aq,
198                                struct fd_acc_query_sample *s,
199                                union pipe_query_result *result)
200 {
201    struct fd5_query_sample *sp = fd5_query_sample(s);
202    result->u64 = ticks_to_ns(sp->result);
203 }
204 
205 static void
timestamp_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)206 timestamp_accumulate_result(struct fd_acc_query *aq,
207                             struct fd_acc_query_sample *s,
208                             union pipe_query_result *result)
209 {
210    struct fd5_query_sample *sp = fd5_query_sample(s);
211    result->u64 = ticks_to_ns(sp->result);
212 }
213 
214 static const struct fd_acc_sample_provider time_elapsed = {
215    .query_type = PIPE_QUERY_TIME_ELAPSED,
216    .always = true,
217    .size = sizeof(struct fd5_query_sample),
218    .resume = timestamp_resume,
219    .pause = timestamp_pause,
220    .result = time_elapsed_accumulate_result,
221 };
222 
223 /* NOTE: timestamp query isn't going to give terribly sensible results
224  * on a tiler.  But it is needed by qapitrace profile heatmap.  If you
225  * add in a binning pass, the results get even more non-sensical.  So
226  * we just return the timestamp on the first tile and hope that is
227  * kind of good enough.
228  */
229 
230 static const struct fd_acc_sample_provider timestamp = {
231    .query_type = PIPE_QUERY_TIMESTAMP,
232    .always = true,
233    .size = sizeof(struct fd5_query_sample),
234    .resume = timestamp_resume,
235    .pause = timestamp_pause,
236    .result = timestamp_accumulate_result,
237 };
238 
239 /*
240  * Performance Counter (batch) queries:
241  *
242  * Only one of these is active at a time, per design of the gallium
243  * batch_query API design.  On perfcntr query tracks N query_types,
244  * each of which has a 'fd_batch_query_entry' that maps it back to
245  * the associated group and counter.
246  */
247 
248 struct fd_batch_query_entry {
249    uint8_t gid; /* group-id */
250    uint8_t cid; /* countable-id within the group */
251 };
252 
253 struct fd_batch_query_data {
254    struct fd_screen *screen;
255    unsigned num_query_entries;
256    struct fd_batch_query_entry query_entries[];
257 };
258 
259 static void
perfcntr_resume(struct fd_acc_query * aq,struct fd_batch * batch)260 perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
261 {
262    struct fd_batch_query_data *data = aq->query_data;
263    struct fd_screen *screen = data->screen;
264    struct fd_ringbuffer *ring = batch->draw;
265 
266    unsigned counters_per_group[screen->num_perfcntr_groups];
267    memset(counters_per_group, 0, sizeof(counters_per_group));
268 
269    fd_wfi(batch, ring);
270 
271    /* configure performance counters for the requested queries: */
272    for (unsigned i = 0; i < data->num_query_entries; i++) {
273       struct fd_batch_query_entry *entry = &data->query_entries[i];
274       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
275       unsigned counter_idx = counters_per_group[entry->gid]++;
276 
277       assert(counter_idx < g->num_counters);
278 
279       OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
280       OUT_RING(ring, g->countables[entry->cid].selector);
281    }
282 
283    memset(counters_per_group, 0, sizeof(counters_per_group));
284 
285    /* and snapshot the start values */
286    for (unsigned i = 0; i < data->num_query_entries; i++) {
287       struct fd_batch_query_entry *entry = &data->query_entries[i];
288       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
289       unsigned counter_idx = counters_per_group[entry->gid]++;
290       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
291 
292       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
293       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
294                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
295       OUT_RELOC(ring, query_sample_idx(aq, i, start));
296    }
297 }
298 
299 static void
perfcntr_pause(struct fd_acc_query * aq,struct fd_batch * batch)300 perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
301 {
302    struct fd_batch_query_data *data = aq->query_data;
303    struct fd_screen *screen = data->screen;
304    struct fd_ringbuffer *ring = batch->draw;
305 
306    unsigned counters_per_group[screen->num_perfcntr_groups];
307    memset(counters_per_group, 0, sizeof(counters_per_group));
308 
309    fd_wfi(batch, ring);
310 
311    /* TODO do we need to bother to turn anything off? */
312 
313    /* snapshot the end values: */
314    for (unsigned i = 0; i < data->num_query_entries; i++) {
315       struct fd_batch_query_entry *entry = &data->query_entries[i];
316       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
317       unsigned counter_idx = counters_per_group[entry->gid]++;
318       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
319 
320       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
321       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
322                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
323       OUT_RELOC(ring, query_sample_idx(aq, i, stop));
324    }
325 
326    /* and compute the result: */
327    for (unsigned i = 0; i < data->num_query_entries; i++) {
328       /* result += stop - start: */
329       OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
330       OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
331       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
332       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
333       OUT_RELOC(ring, query_sample_idx(aq, i, stop));   /* srcB */
334       OUT_RELOC(ring, query_sample_idx(aq, i, start));  /* srcC */
335    }
336 }
337 
338 static void
perfcntr_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)339 perfcntr_accumulate_result(struct fd_acc_query *aq,
340                            struct fd_acc_query_sample *s,
341                            union pipe_query_result *result)
342 {
343    struct fd_batch_query_data *data = aq->query_data;
344    struct fd5_query_sample *sp = fd5_query_sample(s);
345 
346    for (unsigned i = 0; i < data->num_query_entries; i++) {
347       result->batch[i].u64 = sp[i].result;
348    }
349 }
350 
351 static const struct fd_acc_sample_provider perfcntr = {
352    .query_type = FD_QUERY_FIRST_PERFCNTR,
353    .always = true,
354    .resume = perfcntr_resume,
355    .pause = perfcntr_pause,
356    .result = perfcntr_accumulate_result,
357 };
358 
359 static struct pipe_query *
fd5_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)360 fd5_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
361                        unsigned *query_types)
362 {
363    struct fd_context *ctx = fd_context(pctx);
364    struct fd_screen *screen = ctx->screen;
365    struct fd_query *q;
366    struct fd_acc_query *aq;
367    struct fd_batch_query_data *data;
368 
369    data = CALLOC_VARIANT_LENGTH_STRUCT(
370       fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
371 
372    data->screen = screen;
373    data->num_query_entries = num_queries;
374 
375    /* validate the requested query_types and ensure we don't try
376     * to request more query_types of a given group than we have
377     * counters:
378     */
379    unsigned counters_per_group[screen->num_perfcntr_groups];
380    memset(counters_per_group, 0, sizeof(counters_per_group));
381 
382    for (unsigned i = 0; i < num_queries; i++) {
383       unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
384 
385       /* verify valid query_type, ie. is it actually a perfcntr? */
386       if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
387           (idx >= screen->num_perfcntr_queries)) {
388          mesa_loge("invalid batch query query_type: %u", query_types[i]);
389          goto error;
390       }
391 
392       struct fd_batch_query_entry *entry = &data->query_entries[i];
393       struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
394 
395       entry->gid = pq->group_id;
396 
397       /* the perfcntr_queries[] table flattens all the countables
398        * for each group in series, ie:
399        *
400        *   (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
401        *
402        * So to find the countable index just step back through the
403        * table to find the first entry with the same group-id.
404        */
405       while (pq > screen->perfcntr_queries) {
406          pq--;
407          if (pq->group_id == entry->gid)
408             entry->cid++;
409       }
410 
411       if (counters_per_group[entry->gid] >=
412           screen->perfcntr_groups[entry->gid].num_counters) {
413          mesa_loge("too many counters for group %u\n", entry->gid);
414          goto error;
415       }
416 
417       counters_per_group[entry->gid]++;
418    }
419 
420    q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
421    aq = fd_acc_query(q);
422 
423    /* sample buffer size is based on # of queries: */
424    aq->size = num_queries * sizeof(struct fd5_query_sample);
425    aq->query_data = data;
426 
427    return (struct pipe_query *)q;
428 
429 error:
430    free(data);
431    return NULL;
432 }
433 
434 void
fd5_query_context_init(struct pipe_context * pctx)435 fd5_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
436 {
437    struct fd_context *ctx = fd_context(pctx);
438 
439    ctx->create_query = fd_acc_create_query;
440    ctx->query_update_batch = fd_acc_query_update_batch;
441 
442    pctx->create_batch_query = fd5_create_batch_query;
443 
444    fd_acc_query_register_provider(pctx, &occlusion_counter);
445    fd_acc_query_register_provider(pctx, &occlusion_predicate);
446    fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
447 
448    fd_acc_query_register_provider(pctx, &time_elapsed);
449    fd_acc_query_register_provider(pctx, &timestamp);
450 }
451