• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 /* NOTE: see https://gitlab.freedesktop.org/freedreno/freedreno/-/wikis/A5xx-Queries */
28 
29 #include "freedreno_query_acc.h"
30 #include "freedreno_resource.h"
31 
32 #include "fd5_context.h"
33 #include "fd5_emit.h"
34 #include "fd5_format.h"
35 #include "fd5_query.h"
36 
37 struct PACKED fd5_query_sample {
38    struct fd_acc_query_sample base;
39 
40    /* The RB_SAMPLE_COUNT_ADDR destination needs to be 16-byte aligned: */
41    uint64_t pad;
42 
43    uint64_t start;
44    uint64_t result;
45    uint64_t stop;
46 };
47 DEFINE_CAST(fd_acc_query_sample, fd5_query_sample);
48 
49 /* offset of a single field of an array of fd5_query_sample: */
50 #define query_sample_idx(aq, idx, field)                                       \
51    fd_resource((aq)->prsc)->bo,                                                \
52       (idx * sizeof(struct fd5_query_sample)) +                                \
53          offsetof(struct fd5_query_sample, field),                             \
54       0, 0
55 
56 /* offset of a single field of fd5_query_sample: */
57 #define query_sample(aq, field) query_sample_idx(aq, 0, field)
58 
59 /*
60  * Occlusion Query:
61  *
62  * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
63  * interpret results
64  */
65 
66 static void
occlusion_resume(struct fd_acc_query * aq,struct fd_batch * batch)67 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
68 {
69    struct fd_ringbuffer *ring = batch->draw;
70 
71    OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
72    OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
73 
74    ASSERT_ALIGNED(struct fd5_query_sample, start, 16);
75 
76    OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
77    OUT_RELOC(ring, query_sample(aq, start));
78 
79    fd5_event_write(batch, ring, ZPASS_DONE, false);
80    fd_reset_wfi(batch);
81 
82    fd5_context(batch->ctx)->samples_passed_queries++;
83 }
84 
85 static void
occlusion_pause(struct fd_acc_query * aq,struct fd_batch * batch)86 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
87 {
88    struct fd_ringbuffer *ring = batch->draw;
89 
90    OUT_PKT7(ring, CP_MEM_WRITE, 4);
91    OUT_RELOC(ring, query_sample(aq, stop));
92    OUT_RING(ring, 0xffffffff);
93    OUT_RING(ring, 0xffffffff);
94 
95    OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
96 
97    OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
98    OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
99 
100    ASSERT_ALIGNED(struct fd5_query_sample, stop, 16);
101 
102    OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
103    OUT_RELOC(ring, query_sample(aq, stop));
104 
105    fd5_event_write(batch, ring, ZPASS_DONE, false);
106    fd_reset_wfi(batch);
107 
108    OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
109    OUT_RING(ring, 0x00000014); // XXX
110    OUT_RELOC(ring, query_sample(aq, stop));
111    OUT_RING(ring, 0xffffffff);
112    OUT_RING(ring, 0xffffffff);
113    OUT_RING(ring, 0x00000010); // XXX
114 
115    /* result += stop - start: */
116    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
117    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
118    OUT_RELOC(ring, query_sample(aq, result)); /* dst */
119    OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
120    OUT_RELOC(ring, query_sample(aq, stop));   /* srcB */
121    OUT_RELOC(ring, query_sample(aq, start));  /* srcC */
122 
123    fd5_context(batch->ctx)->samples_passed_queries--;
124 }
125 
126 static void
occlusion_counter_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)127 occlusion_counter_result(struct fd_acc_query *aq,
128                          struct fd_acc_query_sample *s,
129                          union pipe_query_result *result)
130 {
131    struct fd5_query_sample *sp = fd5_query_sample(s);
132    result->u64 = sp->result;
133 }
134 
135 static void
occlusion_predicate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)136 occlusion_predicate_result(struct fd_acc_query *aq,
137                            struct fd_acc_query_sample *s,
138                            union pipe_query_result *result)
139 {
140    struct fd5_query_sample *sp = fd5_query_sample(s);
141    result->b = !!sp->result;
142 }
143 
144 static const struct fd_acc_sample_provider occlusion_counter = {
145    .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
146    .size = sizeof(struct fd5_query_sample),
147    .resume = occlusion_resume,
148    .pause = occlusion_pause,
149    .result = occlusion_counter_result,
150 };
151 
152 static const struct fd_acc_sample_provider occlusion_predicate = {
153    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
154    .size = sizeof(struct fd5_query_sample),
155    .resume = occlusion_resume,
156    .pause = occlusion_pause,
157    .result = occlusion_predicate_result,
158 };
159 
160 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
161    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
162    .size = sizeof(struct fd5_query_sample),
163    .resume = occlusion_resume,
164    .pause = occlusion_pause,
165    .result = occlusion_predicate_result,
166 };
167 
168 /*
169  * Timestamp Queries:
170  */
171 
172 static void
timestamp_resume(struct fd_acc_query * aq,struct fd_batch * batch)173 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
174 {
175    struct fd_ringbuffer *ring = batch->draw;
176 
177    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
178    OUT_RING(ring,
179             CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
180    OUT_RELOC(ring, query_sample(aq, start));
181    OUT_RING(ring, 0x00000000);
182 
183    fd_reset_wfi(batch);
184 }
185 
186 static void
timestamp_pause(struct fd_acc_query * aq,struct fd_batch * batch)187 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
188 {
189    struct fd_ringbuffer *ring = batch->draw;
190 
191    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
192    OUT_RING(ring,
193             CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
194    OUT_RELOC(ring, query_sample(aq, stop));
195    OUT_RING(ring, 0x00000000);
196 
197    fd_reset_wfi(batch);
198    fd_wfi(batch, ring);
199 
200    /* result += stop - start: */
201    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
202    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
203    OUT_RELOC(ring, query_sample(aq, result)); /* dst */
204    OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
205    OUT_RELOC(ring, query_sample(aq, stop));   /* srcB */
206    OUT_RELOC(ring, query_sample(aq, start));  /* srcC */
207 }
208 
209 static void
time_elapsed_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)210 time_elapsed_accumulate_result(struct fd_acc_query *aq,
211                                struct fd_acc_query_sample *s,
212                                union pipe_query_result *result)
213 {
214    struct fd5_query_sample *sp = fd5_query_sample(s);
215    result->u64 = ticks_to_ns(sp->result);
216 }
217 
218 static void
timestamp_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)219 timestamp_accumulate_result(struct fd_acc_query *aq,
220                             struct fd_acc_query_sample *s,
221                             union pipe_query_result *result)
222 {
223    struct fd5_query_sample *sp = fd5_query_sample(s);
224    result->u64 = ticks_to_ns(sp->result);
225 }
226 
227 static const struct fd_acc_sample_provider time_elapsed = {
228    .query_type = PIPE_QUERY_TIME_ELAPSED,
229    .always = true,
230    .size = sizeof(struct fd5_query_sample),
231    .resume = timestamp_resume,
232    .pause = timestamp_pause,
233    .result = time_elapsed_accumulate_result,
234 };
235 
236 /* NOTE: timestamp query isn't going to give terribly sensible results
237  * on a tiler.  But it is needed by qapitrace profile heatmap.  If you
238  * add in a binning pass, the results get even more non-sensical.  So
239  * we just return the timestamp on the first tile and hope that is
240  * kind of good enough.
241  */
242 
243 static const struct fd_acc_sample_provider timestamp = {
244    .query_type = PIPE_QUERY_TIMESTAMP,
245    .always = true,
246    .size = sizeof(struct fd5_query_sample),
247    .resume = timestamp_resume,
248    .pause = timestamp_pause,
249    .result = timestamp_accumulate_result,
250 };
251 
252 /*
253  * Performance Counter (batch) queries:
254  *
255  * Only one of these is active at a time, per design of the gallium
256  * batch_query API design.  On perfcntr query tracks N query_types,
257  * each of which has a 'fd_batch_query_entry' that maps it back to
258  * the associated group and counter.
259  */
260 
261 struct fd_batch_query_entry {
262    uint8_t gid; /* group-id */
263    uint8_t cid; /* countable-id within the group */
264 };
265 
266 struct fd_batch_query_data {
267    struct fd_screen *screen;
268    unsigned num_query_entries;
269    struct fd_batch_query_entry query_entries[];
270 };
271 
272 static void
perfcntr_resume(struct fd_acc_query * aq,struct fd_batch * batch)273 perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
274 {
275    struct fd_batch_query_data *data = aq->query_data;
276    struct fd_screen *screen = data->screen;
277    struct fd_ringbuffer *ring = batch->draw;
278 
279    unsigned counters_per_group[screen->num_perfcntr_groups];
280    memset(counters_per_group, 0, sizeof(counters_per_group));
281 
282    fd_wfi(batch, ring);
283 
284    /* configure performance counters for the requested queries: */
285    for (unsigned i = 0; i < data->num_query_entries; i++) {
286       struct fd_batch_query_entry *entry = &data->query_entries[i];
287       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
288       unsigned counter_idx = counters_per_group[entry->gid]++;
289 
290       assert(counter_idx < g->num_counters);
291 
292       OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
293       OUT_RING(ring, g->countables[entry->cid].selector);
294    }
295 
296    memset(counters_per_group, 0, sizeof(counters_per_group));
297 
298    /* and snapshot the start values */
299    for (unsigned i = 0; i < data->num_query_entries; i++) {
300       struct fd_batch_query_entry *entry = &data->query_entries[i];
301       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
302       unsigned counter_idx = counters_per_group[entry->gid]++;
303       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
304 
305       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
306       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
307                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
308       OUT_RELOC(ring, query_sample_idx(aq, i, start));
309    }
310 }
311 
312 static void
perfcntr_pause(struct fd_acc_query * aq,struct fd_batch * batch)313 perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
314 {
315    struct fd_batch_query_data *data = aq->query_data;
316    struct fd_screen *screen = data->screen;
317    struct fd_ringbuffer *ring = batch->draw;
318 
319    unsigned counters_per_group[screen->num_perfcntr_groups];
320    memset(counters_per_group, 0, sizeof(counters_per_group));
321 
322    fd_wfi(batch, ring);
323 
324    /* TODO do we need to bother to turn anything off? */
325 
326    /* snapshot the end values: */
327    for (unsigned i = 0; i < data->num_query_entries; i++) {
328       struct fd_batch_query_entry *entry = &data->query_entries[i];
329       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
330       unsigned counter_idx = counters_per_group[entry->gid]++;
331       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
332 
333       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
334       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
335                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
336       OUT_RELOC(ring, query_sample_idx(aq, i, stop));
337    }
338 
339    /* and compute the result: */
340    for (unsigned i = 0; i < data->num_query_entries; i++) {
341       /* result += stop - start: */
342       OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
343       OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
344       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
345       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
346       OUT_RELOC(ring, query_sample_idx(aq, i, stop));   /* srcB */
347       OUT_RELOC(ring, query_sample_idx(aq, i, start));  /* srcC */
348    }
349 }
350 
351 static void
perfcntr_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)352 perfcntr_accumulate_result(struct fd_acc_query *aq,
353                            struct fd_acc_query_sample *s,
354                            union pipe_query_result *result)
355 {
356    struct fd_batch_query_data *data = aq->query_data;
357    struct fd5_query_sample *sp = fd5_query_sample(s);
358 
359    for (unsigned i = 0; i < data->num_query_entries; i++) {
360       result->batch[i].u64 = sp[i].result;
361    }
362 }
363 
364 static const struct fd_acc_sample_provider perfcntr = {
365    .query_type = FD_QUERY_FIRST_PERFCNTR,
366    .always = true,
367    .resume = perfcntr_resume,
368    .pause = perfcntr_pause,
369    .result = perfcntr_accumulate_result,
370 };
371 
372 static struct pipe_query *
fd5_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)373 fd5_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
374                        unsigned *query_types)
375 {
376    struct fd_context *ctx = fd_context(pctx);
377    struct fd_screen *screen = ctx->screen;
378    struct fd_query *q;
379    struct fd_acc_query *aq;
380    struct fd_batch_query_data *data;
381 
382    data = CALLOC_VARIANT_LENGTH_STRUCT(
383       fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
384 
385    data->screen = screen;
386    data->num_query_entries = num_queries;
387 
388    /* validate the requested query_types and ensure we don't try
389     * to request more query_types of a given group than we have
390     * counters:
391     */
392    unsigned counters_per_group[screen->num_perfcntr_groups];
393    memset(counters_per_group, 0, sizeof(counters_per_group));
394 
395    for (unsigned i = 0; i < num_queries; i++) {
396       unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
397 
398       /* verify valid query_type, ie. is it actually a perfcntr? */
399       if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
400           (idx >= screen->num_perfcntr_queries)) {
401          mesa_loge("invalid batch query query_type: %u", query_types[i]);
402          goto error;
403       }
404 
405       struct fd_batch_query_entry *entry = &data->query_entries[i];
406       struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
407 
408       entry->gid = pq->group_id;
409 
410       /* the perfcntr_queries[] table flattens all the countables
411        * for each group in series, ie:
412        *
413        *   (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
414        *
415        * So to find the countable index just step back through the
416        * table to find the first entry with the same group-id.
417        */
418       while (pq > screen->perfcntr_queries) {
419          pq--;
420          if (pq->group_id == entry->gid)
421             entry->cid++;
422       }
423 
424       if (counters_per_group[entry->gid] >=
425           screen->perfcntr_groups[entry->gid].num_counters) {
426          mesa_loge("too many counters for group %u\n", entry->gid);
427          goto error;
428       }
429 
430       counters_per_group[entry->gid]++;
431    }
432 
433    q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
434    aq = fd_acc_query(q);
435 
436    /* sample buffer size is based on # of queries: */
437    aq->size = num_queries * sizeof(struct fd5_query_sample);
438    aq->query_data = data;
439 
440    return (struct pipe_query *)q;
441 
442 error:
443    free(data);
444    return NULL;
445 }
446 
447 void
fd5_query_context_init(struct pipe_context * pctx)448 fd5_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
449 {
450    struct fd_context *ctx = fd_context(pctx);
451 
452    ctx->create_query = fd_acc_create_query;
453    ctx->query_update_batch = fd_acc_query_update_batch;
454 
455    pctx->create_batch_query = fd5_create_batch_query;
456 
457    fd_acc_query_register_provider(pctx, &occlusion_counter);
458    fd_acc_query_register_provider(pctx, &occlusion_predicate);
459    fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
460 
461    fd_acc_query_register_provider(pctx, &time_elapsed);
462    fd_acc_query_register_provider(pctx, &timestamp);
463 }
464