• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "si_build_pm4.h"
26 #include "si_query.h"
27 #include "util/u_memory.h"
28 
29 #include "ac_perfcounter.h"
30 
31 struct si_query_group {
32    struct si_query_group *next;
33    struct ac_pc_block *block;
34    unsigned sub_gid;     /* only used during init */
35    unsigned result_base; /* only used during init */
36    int se;
37    int instance;
38    unsigned num_counters;
39    unsigned selectors[AC_QUERY_MAX_COUNTERS];
40 };
41 
42 struct si_query_counter {
43    unsigned base;
44    unsigned qwords;
45    unsigned stride; /* in uint64s */
46 };
47 
48 struct si_query_pc {
49    struct si_query b;
50    struct si_query_buffer buffer;
51 
52    /* Size of the results in memory, in bytes. */
53    unsigned result_size;
54 
55    unsigned shaders;
56    unsigned num_counters;
57    struct si_query_counter *counters;
58    struct si_query_group *groups;
59 };
60 
si_pc_emit_instance(struct si_context * sctx,int se,int instance)61 static void si_pc_emit_instance(struct si_context *sctx, int se, int instance)
62 {
63    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
64    unsigned value = S_030800_SH_BROADCAST_WRITES(1);
65 
66    if (se >= 0) {
67       value |= S_030800_SE_INDEX(se);
68    } else {
69       value |= S_030800_SE_BROADCAST_WRITES(1);
70    }
71 
72    if (sctx->chip_class >= GFX10) {
73       /* TODO: Expose counters from each shader array separately if needed. */
74       value |= S_030800_SA_BROADCAST_WRITES(1);
75    }
76 
77    if (instance >= 0) {
78       value |= S_030800_INSTANCE_INDEX(instance);
79    } else {
80       value |= S_030800_INSTANCE_BROADCAST_WRITES(1);
81    }
82 
83    radeon_begin(cs);
84    radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, value);
85    radeon_end();
86 }
87 
si_pc_emit_shaders(struct si_context * sctx,unsigned shaders)88 static void si_pc_emit_shaders(struct si_context *sctx, unsigned shaders)
89 {
90    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
91 
92    radeon_begin(cs);
93    radeon_set_uconfig_reg_seq(R_036780_SQ_PERFCOUNTER_CTRL, 2, false);
94    radeon_emit(shaders & 0x7f);
95    radeon_emit(0xffffffff);
96    radeon_end();
97 }
98 
si_pc_emit_select(struct si_context * sctx,struct ac_pc_block * block,unsigned count,unsigned * selectors)99 static void si_pc_emit_select(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
100                               unsigned *selectors)
101 {
102    struct ac_pc_block_base *regs = block->b->b;
103    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
104    unsigned idx;
105 
106    assert(count <= regs->num_counters);
107 
108    /* Fake counters. */
109    if (!regs->select0)
110       return;
111 
112    radeon_begin(cs);
113 
114    for (idx = 0; idx < count; ++idx) {
115       radeon_set_uconfig_reg_seq(regs->select0[idx], 1, false);
116       radeon_emit(selectors[idx] | regs->select_or);
117    }
118 
119    for (idx = 0; idx < regs->num_spm_counters; idx++) {
120       radeon_set_uconfig_reg_seq(regs->select1[idx], 1, false);
121       radeon_emit(0);
122    }
123 
124    radeon_end();
125 }
126 
si_pc_emit_start(struct si_context * sctx,struct si_resource * buffer,uint64_t va)127 static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
128 {
129    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
130 
131    si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address,
132                    COPY_DATA_IMM, NULL, 1);
133 
134    radeon_begin(cs);
135    radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
136                           S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
137    radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
138    radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
139    radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
140                           S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
141    radeon_end();
142 }
143 
144 /* Note: The buffer was already added in si_pc_emit_start, so we don't have to
145  * do it again in here. */
si_pc_emit_stop(struct si_context * sctx,struct si_resource * buffer,uint64_t va)146 static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
147 {
148    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
149 
150    si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
151                      EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY);
152    si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
153 
154    radeon_begin(cs);
155    radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
156    radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
157    radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
158    radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
159    radeon_set_uconfig_reg(
160       R_036020_CP_PERFMON_CNTL,
161       S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1));
162    radeon_end();
163 }
164 
si_pc_emit_read(struct si_context * sctx,struct ac_pc_block * block,unsigned count,uint64_t va)165 static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
166                             uint64_t va)
167 {
168    struct ac_pc_block_base *regs = block->b->b;
169    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
170    unsigned idx;
171    unsigned reg = regs->counter0_lo;
172    unsigned reg_delta = 8;
173 
174    radeon_begin(cs);
175 
176    if (regs->select0) {
177       for (idx = 0; idx < count; ++idx) {
178          if (regs->counters)
179             reg = regs->counters[idx];
180 
181          radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
182          radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
183                             COPY_DATA_COUNT_SEL); /* 64 bits */
184          radeon_emit(reg >> 2);
185          radeon_emit(0); /* unused */
186          radeon_emit(va);
187          radeon_emit(va >> 32);
188          va += sizeof(uint64_t);
189          reg += reg_delta;
190       }
191    } else {
192       /* Fake counters. */
193       for (idx = 0; idx < count; ++idx) {
194          radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
195          radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
196                      COPY_DATA_COUNT_SEL);
197          radeon_emit(0); /* immediate */
198          radeon_emit(0);
199          radeon_emit(va);
200          radeon_emit(va >> 32);
201          va += sizeof(uint64_t);
202       }
203    }
204    radeon_end();
205 }
206 
si_pc_query_destroy(struct si_context * sctx,struct si_query * squery)207 static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery)
208 {
209    struct si_query_pc *query = (struct si_query_pc *)squery;
210 
211    while (query->groups) {
212       struct si_query_group *group = query->groups;
213       query->groups = group->next;
214       FREE(group);
215    }
216 
217    FREE(query->counters);
218 
219    si_query_buffer_destroy(sctx->screen, &query->buffer);
220    FREE(query);
221 }
222 
si_inhibit_clockgating(struct si_context * sctx,struct radeon_cmdbuf * cs,bool inhibit)223 void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit)
224 {
225    radeon_begin(&sctx->gfx_cs);
226 
227    if (sctx->chip_class >= GFX10) {
228       radeon_set_uconfig_reg(R_037390_RLC_PERFMON_CLK_CNTL,
229                              S_037390_PERFMON_CLOCK_STATE(inhibit));
230    } else if (sctx->chip_class >= GFX8) {
231       radeon_set_uconfig_reg(R_0372FC_RLC_PERFMON_CLK_CNTL,
232                              S_0372FC_PERFMON_CLOCK_STATE(inhibit));
233    }
234    radeon_end();
235 }
236 
si_pc_query_resume(struct si_context * sctx,struct si_query * squery)237 static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery)
238 /*
239                                    struct si_query_hw *hwquery,
240                                    struct si_resource *buffer, uint64_t va)*/
241 {
242    struct si_query_pc *query = (struct si_query_pc *)squery;
243    int current_se = -1;
244    int current_instance = -1;
245 
246    if (!si_query_buffer_alloc(sctx, &query->buffer, NULL, query->result_size))
247       return;
248    si_need_gfx_cs_space(sctx, 0);
249 
250    if (query->shaders)
251       si_pc_emit_shaders(sctx, query->shaders);
252 
253    si_inhibit_clockgating(sctx, &sctx->gfx_cs, true);
254 
255    for (struct si_query_group *group = query->groups; group; group = group->next) {
256       struct ac_pc_block *block = group->block;
257 
258       if (group->se != current_se || group->instance != current_instance) {
259          current_se = group->se;
260          current_instance = group->instance;
261          si_pc_emit_instance(sctx, group->se, group->instance);
262       }
263 
264       si_pc_emit_select(sctx, block, group->num_counters, group->selectors);
265    }
266 
267    if (current_se != -1 || current_instance != -1)
268       si_pc_emit_instance(sctx, -1, -1);
269 
270    uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
271    si_pc_emit_start(sctx, query->buffer.buf, va);
272 }
273 
si_pc_query_suspend(struct si_context * sctx,struct si_query * squery)274 static void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery)
275 {
276    struct si_query_pc *query = (struct si_query_pc *)squery;
277 
278    if (!query->buffer.buf)
279       return;
280 
281    uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
282    query->buffer.results_end += query->result_size;
283 
284    si_pc_emit_stop(sctx, query->buffer.buf, va);
285 
286    for (struct si_query_group *group = query->groups; group; group = group->next) {
287       struct ac_pc_block *block = group->block;
288       unsigned se = group->se >= 0 ? group->se : 0;
289       unsigned se_end = se + 1;
290 
291       if ((block->b->b->flags & AC_PC_BLOCK_SE) && (group->se < 0))
292          se_end = sctx->screen->info.max_se;
293 
294       do {
295          unsigned instance = group->instance >= 0 ? group->instance : 0;
296 
297          do {
298             si_pc_emit_instance(sctx, se, instance);
299             si_pc_emit_read(sctx, block, group->num_counters, va);
300             va += sizeof(uint64_t) * group->num_counters;
301          } while (group->instance < 0 && ++instance < block->num_instances);
302       } while (++se < se_end);
303    }
304 
305    si_pc_emit_instance(sctx, -1, -1);
306 
307    si_inhibit_clockgating(sctx, &sctx->gfx_cs, false);
308 }
309 
si_pc_query_begin(struct si_context * ctx,struct si_query * squery)310 static bool si_pc_query_begin(struct si_context *ctx, struct si_query *squery)
311 {
312    struct si_query_pc *query = (struct si_query_pc *)squery;
313 
314    si_query_buffer_reset(ctx, &query->buffer);
315 
316    list_addtail(&query->b.active_list, &ctx->active_queries);
317    ctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
318 
319    si_pc_query_resume(ctx, squery);
320 
321    return true;
322 }
323 
si_pc_query_end(struct si_context * ctx,struct si_query * squery)324 static bool si_pc_query_end(struct si_context *ctx, struct si_query *squery)
325 {
326    struct si_query_pc *query = (struct si_query_pc *)squery;
327 
328    si_pc_query_suspend(ctx, squery);
329 
330    list_del(&squery->active_list);
331    ctx->num_cs_dw_queries_suspend -= squery->num_cs_dw_suspend;
332 
333    return query->buffer.buf != NULL;
334 }
335 
si_pc_query_add_result(struct si_query_pc * query,void * buffer,union pipe_query_result * result)336 static void si_pc_query_add_result(struct si_query_pc *query, void *buffer,
337                                    union pipe_query_result *result)
338 {
339    uint64_t *results = buffer;
340    unsigned i, j;
341 
342    for (i = 0; i < query->num_counters; ++i) {
343       struct si_query_counter *counter = &query->counters[i];
344 
345       for (j = 0; j < counter->qwords; ++j) {
346          uint32_t value = results[counter->base + j * counter->stride];
347          result->batch[i].u64 += value;
348       }
349    }
350 }
351 
si_pc_query_get_result(struct si_context * sctx,struct si_query * squery,bool wait,union pipe_query_result * result)352 static bool si_pc_query_get_result(struct si_context *sctx, struct si_query *squery, bool wait,
353                                    union pipe_query_result *result)
354 {
355    struct si_query_pc *query = (struct si_query_pc *)squery;
356 
357    memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
358 
359    for (struct si_query_buffer *qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
360       unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
361       unsigned results_base = 0;
362       void *map;
363 
364       if (squery->b.flushed)
365          map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);
366       else
367          map = si_buffer_map(sctx, qbuf->buf, usage);
368 
369       if (!map)
370          return false;
371 
372       while (results_base != qbuf->results_end) {
373          si_pc_query_add_result(query, map + results_base, result);
374          results_base += query->result_size;
375       }
376    }
377 
378    return true;
379 }
380 
381 static const struct si_query_ops batch_query_ops = {
382    .destroy = si_pc_query_destroy,
383    .begin = si_pc_query_begin,
384    .end = si_pc_query_end,
385    .get_result = si_pc_query_get_result,
386 
387    .suspend = si_pc_query_suspend,
388    .resume = si_pc_query_resume,
389 };
390 
get_group_state(struct si_screen * screen,struct si_query_pc * query,struct ac_pc_block * block,unsigned sub_gid)391 static struct si_query_group *get_group_state(struct si_screen *screen, struct si_query_pc *query,
392                                               struct ac_pc_block *block, unsigned sub_gid)
393 {
394    struct si_perfcounters *pc = screen->perfcounters;
395    struct si_query_group *group = query->groups;
396 
397    while (group) {
398       if (group->block == block && group->sub_gid == sub_gid)
399          return group;
400       group = group->next;
401    }
402 
403    group = CALLOC_STRUCT(si_query_group);
404    if (!group)
405       return NULL;
406 
407    group->block = block;
408    group->sub_gid = sub_gid;
409 
410    if (block->b->b->flags & AC_PC_BLOCK_SHADER) {
411       unsigned sub_gids = block->num_instances;
412       unsigned shader_id;
413       unsigned shaders;
414       unsigned query_shaders;
415 
416       if (ac_pc_block_has_per_se_groups(&pc->base, block))
417          sub_gids = sub_gids * screen->info.max_se;
418       shader_id = sub_gid / sub_gids;
419       sub_gid = sub_gid % sub_gids;
420 
421       shaders = ac_pc_shader_type_bits[shader_id];
422 
423       query_shaders = query->shaders & ~AC_PC_SHADERS_WINDOWING;
424       if (query_shaders && query_shaders != shaders) {
425          fprintf(stderr, "si_perfcounter: incompatible shader groups\n");
426          FREE(group);
427          return NULL;
428       }
429       query->shaders = shaders;
430    }
431 
432    if (block->b->b->flags & AC_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
433       // A non-zero value in query->shaders ensures that the shader
434       // masking is reset unless the user explicitly requests one.
435       query->shaders = AC_PC_SHADERS_WINDOWING;
436    }
437 
438    if (ac_pc_block_has_per_se_groups(&pc->base, block)) {
439       group->se = sub_gid / block->num_instances;
440       sub_gid = sub_gid % block->num_instances;
441    } else {
442       group->se = -1;
443    }
444 
445    if (ac_pc_block_has_per_instance_groups(&pc->base, block)) {
446       group->instance = sub_gid;
447    } else {
448       group->instance = -1;
449    }
450 
451    group->next = query->groups;
452    query->groups = group;
453 
454    return group;
455 }
456 
si_create_batch_query(struct pipe_context * ctx,unsigned num_queries,unsigned * query_types)457 struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_queries,
458                                          unsigned *query_types)
459 {
460    struct si_screen *screen = (struct si_screen *)ctx->screen;
461    struct si_perfcounters *pc = screen->perfcounters;
462    struct ac_pc_block *block;
463    struct si_query_group *group;
464    struct si_query_pc *query;
465    unsigned base_gid, sub_gid, sub_index;
466    unsigned i, j;
467 
468    if (!pc)
469       return NULL;
470 
471    query = CALLOC_STRUCT(si_query_pc);
472    if (!query)
473       return NULL;
474 
475    query->b.ops = &batch_query_ops;
476 
477    query->num_counters = num_queries;
478 
479    /* Collect selectors per group */
480    for (i = 0; i < num_queries; ++i) {
481       unsigned sub_gid;
482 
483       if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER)
484          goto error;
485 
486       block =
487          ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index);
488       if (!block)
489          goto error;
490 
491       sub_gid = sub_index / block->b->selectors;
492       sub_index = sub_index % block->b->selectors;
493 
494       group = get_group_state(screen, query, block, sub_gid);
495       if (!group)
496          goto error;
497 
498       if (group->num_counters >= block->b->b->num_counters) {
499          fprintf(stderr, "perfcounter group %s: too many selected\n", block->b->b->name);
500          goto error;
501       }
502       group->selectors[group->num_counters] = sub_index;
503       ++group->num_counters;
504    }
505 
506    /* Compute result bases and CS size per group */
507    query->b.num_cs_dw_suspend = pc->num_stop_cs_dwords;
508    query->b.num_cs_dw_suspend += pc->num_instance_cs_dwords;
509 
510    i = 0;
511    for (group = query->groups; group; group = group->next) {
512       struct ac_pc_block *block = group->block;
513       unsigned read_dw;
514       unsigned instances = 1;
515 
516       if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
517          instances = screen->info.max_se;
518       if (group->instance < 0)
519          instances *= block->num_instances;
520 
521       group->result_base = i;
522       query->result_size += sizeof(uint64_t) * instances * group->num_counters;
523       i += instances * group->num_counters;
524 
525       read_dw = 6 * group->num_counters;
526       query->b.num_cs_dw_suspend += instances * read_dw;
527       query->b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords;
528    }
529 
530    if (query->shaders) {
531       if (query->shaders == AC_PC_SHADERS_WINDOWING)
532          query->shaders = 0xffffffff;
533    }
534 
535    /* Map user-supplied query array to result indices */
536    query->counters = CALLOC(num_queries, sizeof(*query->counters));
537    for (i = 0; i < num_queries; ++i) {
538       struct si_query_counter *counter = &query->counters[i];
539       struct ac_pc_block *block;
540 
541       block =
542          ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index);
543 
544       sub_gid = sub_index / block->b->selectors;
545       sub_index = sub_index % block->b->selectors;
546 
547       group = get_group_state(screen, query, block, sub_gid);
548       assert(group != NULL);
549 
550       for (j = 0; j < group->num_counters; ++j) {
551          if (group->selectors[j] == sub_index)
552             break;
553       }
554 
555       counter->base = group->result_base + j;
556       counter->stride = group->num_counters;
557 
558       counter->qwords = 1;
559       if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
560          counter->qwords = screen->info.max_se;
561       if (group->instance < 0)
562          counter->qwords *= block->num_instances;
563    }
564 
565    return (struct pipe_query *)query;
566 
567 error:
568    si_pc_query_destroy((struct si_context *)ctx, &query->b);
569    return NULL;
570 }
571 
si_get_perfcounter_info(struct si_screen * screen,unsigned index,struct pipe_driver_query_info * info)572 int si_get_perfcounter_info(struct si_screen *screen, unsigned index,
573                             struct pipe_driver_query_info *info)
574 {
575    struct si_perfcounters *pc = screen->perfcounters;
576    struct ac_pc_block *block;
577    unsigned base_gid, sub;
578 
579    if (!pc)
580       return 0;
581 
582    if (!info) {
583       unsigned bid, num_queries = 0;
584 
585       for (bid = 0; bid < pc->base.num_blocks; ++bid) {
586          num_queries += pc->base.blocks[bid].b->selectors * pc->base.blocks[bid].num_groups;
587       }
588 
589       return num_queries;
590    }
591 
592    block = ac_lookup_counter(&pc->base, index, &base_gid, &sub);
593    if (!block)
594       return 0;
595 
596    if (!block->selector_names) {
597       if (!ac_init_block_names(&screen->info, &pc->base, block))
598          return 0;
599    }
600    info->name = block->selector_names + sub * block->selector_name_stride;
601    info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index;
602    info->max_value.u64 = 0;
603    info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
604    info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
605    info->group_id = base_gid + sub / block->b->selectors;
606    info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
607    if (sub > 0 && sub + 1 < block->b->selectors * block->num_groups)
608       info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
609    return 1;
610 }
611 
si_get_perfcounter_group_info(struct si_screen * screen,unsigned index,struct pipe_driver_query_group_info * info)612 int si_get_perfcounter_group_info(struct si_screen *screen, unsigned index,
613                                   struct pipe_driver_query_group_info *info)
614 {
615    struct si_perfcounters *pc = screen->perfcounters;
616    struct ac_pc_block *block;
617 
618    if (!pc)
619       return 0;
620 
621    if (!info)
622       return pc->base.num_groups;
623 
624    block = ac_lookup_group(&pc->base, &index);
625    if (!block)
626       return 0;
627 
628    if (!block->group_names) {
629       if (!ac_init_block_names(&screen->info, &pc->base, block))
630          return 0;
631    }
632    info->name = block->group_names + index * block->group_name_stride;
633    info->num_queries = block->b->selectors;
634    info->max_active_queries = block->b->b->num_counters;
635    return 1;
636 }
637 
si_destroy_perfcounters(struct si_screen * screen)638 void si_destroy_perfcounters(struct si_screen *screen)
639 {
640    struct si_perfcounters *pc = screen->perfcounters;
641 
642    if (!pc)
643       return;
644 
645    ac_destroy_perfcounters(&pc->base);
646    FREE(pc);
647    screen->perfcounters = NULL;
648 }
649 
si_init_perfcounters(struct si_screen * screen)650 void si_init_perfcounters(struct si_screen *screen)
651 {
652    bool separate_se, separate_instance;
653 
654    separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
655    separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
656 
657    screen->perfcounters = CALLOC_STRUCT(si_perfcounters);
658    if (!screen->perfcounters)
659       return;
660 
661    screen->perfcounters->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(screen);
662    screen->perfcounters->num_instance_cs_dwords = 3;
663 
664    if (!ac_init_perfcounters(&screen->info, separate_se, separate_instance,
665                              &screen->perfcounters->base)) {
666       si_destroy_perfcounters(screen);
667    }
668 }
669