1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "si_build_pm4.h"
26 #include "si_query.h"
27 #include "util/u_memory.h"
28
29 #include "ac_perfcounter.h"
30
31 struct si_query_group {
32 struct si_query_group *next;
33 struct ac_pc_block *block;
34 unsigned sub_gid; /* only used during init */
35 unsigned result_base; /* only used during init */
36 int se;
37 int instance;
38 unsigned num_counters;
39 unsigned selectors[AC_QUERY_MAX_COUNTERS];
40 };
41
42 struct si_query_counter {
43 unsigned base;
44 unsigned qwords;
45 unsigned stride; /* in uint64s */
46 };
47
48 struct si_query_pc {
49 struct si_query b;
50 struct si_query_buffer buffer;
51
52 /* Size of the results in memory, in bytes. */
53 unsigned result_size;
54
55 unsigned shaders;
56 unsigned num_counters;
57 struct si_query_counter *counters;
58 struct si_query_group *groups;
59 };
60
si_pc_emit_instance(struct si_context * sctx,int se,int instance)61 static void si_pc_emit_instance(struct si_context *sctx, int se, int instance)
62 {
63 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
64 unsigned value = S_030800_SH_BROADCAST_WRITES(1);
65
66 if (se >= 0) {
67 value |= S_030800_SE_INDEX(se);
68 } else {
69 value |= S_030800_SE_BROADCAST_WRITES(1);
70 }
71
72 if (sctx->chip_class >= GFX10) {
73 /* TODO: Expose counters from each shader array separately if needed. */
74 value |= S_030800_SA_BROADCAST_WRITES(1);
75 }
76
77 if (instance >= 0) {
78 value |= S_030800_INSTANCE_INDEX(instance);
79 } else {
80 value |= S_030800_INSTANCE_BROADCAST_WRITES(1);
81 }
82
83 radeon_begin(cs);
84 radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, value);
85 radeon_end();
86 }
87
si_pc_emit_shaders(struct si_context * sctx,unsigned shaders)88 static void si_pc_emit_shaders(struct si_context *sctx, unsigned shaders)
89 {
90 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
91
92 radeon_begin(cs);
93 radeon_set_uconfig_reg_seq(R_036780_SQ_PERFCOUNTER_CTRL, 2, false);
94 radeon_emit(shaders & 0x7f);
95 radeon_emit(0xffffffff);
96 radeon_end();
97 }
98
si_pc_emit_select(struct si_context * sctx,struct ac_pc_block * block,unsigned count,unsigned * selectors)99 static void si_pc_emit_select(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
100 unsigned *selectors)
101 {
102 struct ac_pc_block_base *regs = block->b->b;
103 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
104 unsigned idx;
105
106 assert(count <= regs->num_counters);
107
108 /* Fake counters. */
109 if (!regs->select0)
110 return;
111
112 radeon_begin(cs);
113
114 for (idx = 0; idx < count; ++idx) {
115 radeon_set_uconfig_reg_seq(regs->select0[idx], 1, false);
116 radeon_emit(selectors[idx] | regs->select_or);
117 }
118
119 for (idx = 0; idx < regs->num_spm_counters; idx++) {
120 radeon_set_uconfig_reg_seq(regs->select1[idx], 1, false);
121 radeon_emit(0);
122 }
123
124 radeon_end();
125 }
126
si_pc_emit_start(struct si_context * sctx,struct si_resource * buffer,uint64_t va)127 static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
128 {
129 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
130
131 si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address,
132 COPY_DATA_IMM, NULL, 1);
133
134 radeon_begin(cs);
135 radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
136 S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
137 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
138 radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
139 radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
140 S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
141 radeon_end();
142 }
143
144 /* Note: The buffer was already added in si_pc_emit_start, so we don't have to
145 * do it again in here. */
si_pc_emit_stop(struct si_context * sctx,struct si_resource * buffer,uint64_t va)146 static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
147 {
148 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
149
150 si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
151 EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY);
152 si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
153
154 radeon_begin(cs);
155 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
156 radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
157 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
158 radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
159 radeon_set_uconfig_reg(
160 R_036020_CP_PERFMON_CNTL,
161 S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1));
162 radeon_end();
163 }
164
si_pc_emit_read(struct si_context * sctx,struct ac_pc_block * block,unsigned count,uint64_t va)165 static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
166 uint64_t va)
167 {
168 struct ac_pc_block_base *regs = block->b->b;
169 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
170 unsigned idx;
171 unsigned reg = regs->counter0_lo;
172 unsigned reg_delta = 8;
173
174 radeon_begin(cs);
175
176 if (regs->select0) {
177 for (idx = 0; idx < count; ++idx) {
178 if (regs->counters)
179 reg = regs->counters[idx];
180
181 radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
182 radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
183 COPY_DATA_COUNT_SEL); /* 64 bits */
184 radeon_emit(reg >> 2);
185 radeon_emit(0); /* unused */
186 radeon_emit(va);
187 radeon_emit(va >> 32);
188 va += sizeof(uint64_t);
189 reg += reg_delta;
190 }
191 } else {
192 /* Fake counters. */
193 for (idx = 0; idx < count; ++idx) {
194 radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
195 radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
196 COPY_DATA_COUNT_SEL);
197 radeon_emit(0); /* immediate */
198 radeon_emit(0);
199 radeon_emit(va);
200 radeon_emit(va >> 32);
201 va += sizeof(uint64_t);
202 }
203 }
204 radeon_end();
205 }
206
si_pc_query_destroy(struct si_context * sctx,struct si_query * squery)207 static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery)
208 {
209 struct si_query_pc *query = (struct si_query_pc *)squery;
210
211 while (query->groups) {
212 struct si_query_group *group = query->groups;
213 query->groups = group->next;
214 FREE(group);
215 }
216
217 FREE(query->counters);
218
219 si_query_buffer_destroy(sctx->screen, &query->buffer);
220 FREE(query);
221 }
222
si_inhibit_clockgating(struct si_context * sctx,struct radeon_cmdbuf * cs,bool inhibit)223 void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit)
224 {
225 radeon_begin(&sctx->gfx_cs);
226
227 if (sctx->chip_class >= GFX10) {
228 radeon_set_uconfig_reg(R_037390_RLC_PERFMON_CLK_CNTL,
229 S_037390_PERFMON_CLOCK_STATE(inhibit));
230 } else if (sctx->chip_class >= GFX8) {
231 radeon_set_uconfig_reg(R_0372FC_RLC_PERFMON_CLK_CNTL,
232 S_0372FC_PERFMON_CLOCK_STATE(inhibit));
233 }
234 radeon_end();
235 }
236
si_pc_query_resume(struct si_context * sctx,struct si_query * squery)237 static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery)
238 /*
239 struct si_query_hw *hwquery,
240 struct si_resource *buffer, uint64_t va)*/
241 {
242 struct si_query_pc *query = (struct si_query_pc *)squery;
243 int current_se = -1;
244 int current_instance = -1;
245
246 if (!si_query_buffer_alloc(sctx, &query->buffer, NULL, query->result_size))
247 return;
248 si_need_gfx_cs_space(sctx, 0);
249
250 if (query->shaders)
251 si_pc_emit_shaders(sctx, query->shaders);
252
253 si_inhibit_clockgating(sctx, &sctx->gfx_cs, true);
254
255 for (struct si_query_group *group = query->groups; group; group = group->next) {
256 struct ac_pc_block *block = group->block;
257
258 if (group->se != current_se || group->instance != current_instance) {
259 current_se = group->se;
260 current_instance = group->instance;
261 si_pc_emit_instance(sctx, group->se, group->instance);
262 }
263
264 si_pc_emit_select(sctx, block, group->num_counters, group->selectors);
265 }
266
267 if (current_se != -1 || current_instance != -1)
268 si_pc_emit_instance(sctx, -1, -1);
269
270 uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
271 si_pc_emit_start(sctx, query->buffer.buf, va);
272 }
273
si_pc_query_suspend(struct si_context * sctx,struct si_query * squery)274 static void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery)
275 {
276 struct si_query_pc *query = (struct si_query_pc *)squery;
277
278 if (!query->buffer.buf)
279 return;
280
281 uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
282 query->buffer.results_end += query->result_size;
283
284 si_pc_emit_stop(sctx, query->buffer.buf, va);
285
286 for (struct si_query_group *group = query->groups; group; group = group->next) {
287 struct ac_pc_block *block = group->block;
288 unsigned se = group->se >= 0 ? group->se : 0;
289 unsigned se_end = se + 1;
290
291 if ((block->b->b->flags & AC_PC_BLOCK_SE) && (group->se < 0))
292 se_end = sctx->screen->info.max_se;
293
294 do {
295 unsigned instance = group->instance >= 0 ? group->instance : 0;
296
297 do {
298 si_pc_emit_instance(sctx, se, instance);
299 si_pc_emit_read(sctx, block, group->num_counters, va);
300 va += sizeof(uint64_t) * group->num_counters;
301 } while (group->instance < 0 && ++instance < block->num_instances);
302 } while (++se < se_end);
303 }
304
305 si_pc_emit_instance(sctx, -1, -1);
306
307 si_inhibit_clockgating(sctx, &sctx->gfx_cs, false);
308 }
309
si_pc_query_begin(struct si_context * ctx,struct si_query * squery)310 static bool si_pc_query_begin(struct si_context *ctx, struct si_query *squery)
311 {
312 struct si_query_pc *query = (struct si_query_pc *)squery;
313
314 si_query_buffer_reset(ctx, &query->buffer);
315
316 list_addtail(&query->b.active_list, &ctx->active_queries);
317 ctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
318
319 si_pc_query_resume(ctx, squery);
320
321 return true;
322 }
323
si_pc_query_end(struct si_context * ctx,struct si_query * squery)324 static bool si_pc_query_end(struct si_context *ctx, struct si_query *squery)
325 {
326 struct si_query_pc *query = (struct si_query_pc *)squery;
327
328 si_pc_query_suspend(ctx, squery);
329
330 list_del(&squery->active_list);
331 ctx->num_cs_dw_queries_suspend -= squery->num_cs_dw_suspend;
332
333 return query->buffer.buf != NULL;
334 }
335
si_pc_query_add_result(struct si_query_pc * query,void * buffer,union pipe_query_result * result)336 static void si_pc_query_add_result(struct si_query_pc *query, void *buffer,
337 union pipe_query_result *result)
338 {
339 uint64_t *results = buffer;
340 unsigned i, j;
341
342 for (i = 0; i < query->num_counters; ++i) {
343 struct si_query_counter *counter = &query->counters[i];
344
345 for (j = 0; j < counter->qwords; ++j) {
346 uint32_t value = results[counter->base + j * counter->stride];
347 result->batch[i].u64 += value;
348 }
349 }
350 }
351
si_pc_query_get_result(struct si_context * sctx,struct si_query * squery,bool wait,union pipe_query_result * result)352 static bool si_pc_query_get_result(struct si_context *sctx, struct si_query *squery, bool wait,
353 union pipe_query_result *result)
354 {
355 struct si_query_pc *query = (struct si_query_pc *)squery;
356
357 memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
358
359 for (struct si_query_buffer *qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
360 unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
361 unsigned results_base = 0;
362 void *map;
363
364 if (squery->b.flushed)
365 map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);
366 else
367 map = si_buffer_map(sctx, qbuf->buf, usage);
368
369 if (!map)
370 return false;
371
372 while (results_base != qbuf->results_end) {
373 si_pc_query_add_result(query, map + results_base, result);
374 results_base += query->result_size;
375 }
376 }
377
378 return true;
379 }
380
381 static const struct si_query_ops batch_query_ops = {
382 .destroy = si_pc_query_destroy,
383 .begin = si_pc_query_begin,
384 .end = si_pc_query_end,
385 .get_result = si_pc_query_get_result,
386
387 .suspend = si_pc_query_suspend,
388 .resume = si_pc_query_resume,
389 };
390
get_group_state(struct si_screen * screen,struct si_query_pc * query,struct ac_pc_block * block,unsigned sub_gid)391 static struct si_query_group *get_group_state(struct si_screen *screen, struct si_query_pc *query,
392 struct ac_pc_block *block, unsigned sub_gid)
393 {
394 struct si_perfcounters *pc = screen->perfcounters;
395 struct si_query_group *group = query->groups;
396
397 while (group) {
398 if (group->block == block && group->sub_gid == sub_gid)
399 return group;
400 group = group->next;
401 }
402
403 group = CALLOC_STRUCT(si_query_group);
404 if (!group)
405 return NULL;
406
407 group->block = block;
408 group->sub_gid = sub_gid;
409
410 if (block->b->b->flags & AC_PC_BLOCK_SHADER) {
411 unsigned sub_gids = block->num_instances;
412 unsigned shader_id;
413 unsigned shaders;
414 unsigned query_shaders;
415
416 if (ac_pc_block_has_per_se_groups(&pc->base, block))
417 sub_gids = sub_gids * screen->info.max_se;
418 shader_id = sub_gid / sub_gids;
419 sub_gid = sub_gid % sub_gids;
420
421 shaders = ac_pc_shader_type_bits[shader_id];
422
423 query_shaders = query->shaders & ~AC_PC_SHADERS_WINDOWING;
424 if (query_shaders && query_shaders != shaders) {
425 fprintf(stderr, "si_perfcounter: incompatible shader groups\n");
426 FREE(group);
427 return NULL;
428 }
429 query->shaders = shaders;
430 }
431
432 if (block->b->b->flags & AC_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
433 // A non-zero value in query->shaders ensures that the shader
434 // masking is reset unless the user explicitly requests one.
435 query->shaders = AC_PC_SHADERS_WINDOWING;
436 }
437
438 if (ac_pc_block_has_per_se_groups(&pc->base, block)) {
439 group->se = sub_gid / block->num_instances;
440 sub_gid = sub_gid % block->num_instances;
441 } else {
442 group->se = -1;
443 }
444
445 if (ac_pc_block_has_per_instance_groups(&pc->base, block)) {
446 group->instance = sub_gid;
447 } else {
448 group->instance = -1;
449 }
450
451 group->next = query->groups;
452 query->groups = group;
453
454 return group;
455 }
456
si_create_batch_query(struct pipe_context * ctx,unsigned num_queries,unsigned * query_types)457 struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_queries,
458 unsigned *query_types)
459 {
460 struct si_screen *screen = (struct si_screen *)ctx->screen;
461 struct si_perfcounters *pc = screen->perfcounters;
462 struct ac_pc_block *block;
463 struct si_query_group *group;
464 struct si_query_pc *query;
465 unsigned base_gid, sub_gid, sub_index;
466 unsigned i, j;
467
468 if (!pc)
469 return NULL;
470
471 query = CALLOC_STRUCT(si_query_pc);
472 if (!query)
473 return NULL;
474
475 query->b.ops = &batch_query_ops;
476
477 query->num_counters = num_queries;
478
479 /* Collect selectors per group */
480 for (i = 0; i < num_queries; ++i) {
481 unsigned sub_gid;
482
483 if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER)
484 goto error;
485
486 block =
487 ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index);
488 if (!block)
489 goto error;
490
491 sub_gid = sub_index / block->b->selectors;
492 sub_index = sub_index % block->b->selectors;
493
494 group = get_group_state(screen, query, block, sub_gid);
495 if (!group)
496 goto error;
497
498 if (group->num_counters >= block->b->b->num_counters) {
499 fprintf(stderr, "perfcounter group %s: too many selected\n", block->b->b->name);
500 goto error;
501 }
502 group->selectors[group->num_counters] = sub_index;
503 ++group->num_counters;
504 }
505
506 /* Compute result bases and CS size per group */
507 query->b.num_cs_dw_suspend = pc->num_stop_cs_dwords;
508 query->b.num_cs_dw_suspend += pc->num_instance_cs_dwords;
509
510 i = 0;
511 for (group = query->groups; group; group = group->next) {
512 struct ac_pc_block *block = group->block;
513 unsigned read_dw;
514 unsigned instances = 1;
515
516 if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
517 instances = screen->info.max_se;
518 if (group->instance < 0)
519 instances *= block->num_instances;
520
521 group->result_base = i;
522 query->result_size += sizeof(uint64_t) * instances * group->num_counters;
523 i += instances * group->num_counters;
524
525 read_dw = 6 * group->num_counters;
526 query->b.num_cs_dw_suspend += instances * read_dw;
527 query->b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords;
528 }
529
530 if (query->shaders) {
531 if (query->shaders == AC_PC_SHADERS_WINDOWING)
532 query->shaders = 0xffffffff;
533 }
534
535 /* Map user-supplied query array to result indices */
536 query->counters = CALLOC(num_queries, sizeof(*query->counters));
537 for (i = 0; i < num_queries; ++i) {
538 struct si_query_counter *counter = &query->counters[i];
539 struct ac_pc_block *block;
540
541 block =
542 ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index);
543
544 sub_gid = sub_index / block->b->selectors;
545 sub_index = sub_index % block->b->selectors;
546
547 group = get_group_state(screen, query, block, sub_gid);
548 assert(group != NULL);
549
550 for (j = 0; j < group->num_counters; ++j) {
551 if (group->selectors[j] == sub_index)
552 break;
553 }
554
555 counter->base = group->result_base + j;
556 counter->stride = group->num_counters;
557
558 counter->qwords = 1;
559 if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
560 counter->qwords = screen->info.max_se;
561 if (group->instance < 0)
562 counter->qwords *= block->num_instances;
563 }
564
565 return (struct pipe_query *)query;
566
567 error:
568 si_pc_query_destroy((struct si_context *)ctx, &query->b);
569 return NULL;
570 }
571
si_get_perfcounter_info(struct si_screen * screen,unsigned index,struct pipe_driver_query_info * info)572 int si_get_perfcounter_info(struct si_screen *screen, unsigned index,
573 struct pipe_driver_query_info *info)
574 {
575 struct si_perfcounters *pc = screen->perfcounters;
576 struct ac_pc_block *block;
577 unsigned base_gid, sub;
578
579 if (!pc)
580 return 0;
581
582 if (!info) {
583 unsigned bid, num_queries = 0;
584
585 for (bid = 0; bid < pc->base.num_blocks; ++bid) {
586 num_queries += pc->base.blocks[bid].b->selectors * pc->base.blocks[bid].num_groups;
587 }
588
589 return num_queries;
590 }
591
592 block = ac_lookup_counter(&pc->base, index, &base_gid, &sub);
593 if (!block)
594 return 0;
595
596 if (!block->selector_names) {
597 if (!ac_init_block_names(&screen->info, &pc->base, block))
598 return 0;
599 }
600 info->name = block->selector_names + sub * block->selector_name_stride;
601 info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index;
602 info->max_value.u64 = 0;
603 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
604 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
605 info->group_id = base_gid + sub / block->b->selectors;
606 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
607 if (sub > 0 && sub + 1 < block->b->selectors * block->num_groups)
608 info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
609 return 1;
610 }
611
si_get_perfcounter_group_info(struct si_screen * screen,unsigned index,struct pipe_driver_query_group_info * info)612 int si_get_perfcounter_group_info(struct si_screen *screen, unsigned index,
613 struct pipe_driver_query_group_info *info)
614 {
615 struct si_perfcounters *pc = screen->perfcounters;
616 struct ac_pc_block *block;
617
618 if (!pc)
619 return 0;
620
621 if (!info)
622 return pc->base.num_groups;
623
624 block = ac_lookup_group(&pc->base, &index);
625 if (!block)
626 return 0;
627
628 if (!block->group_names) {
629 if (!ac_init_block_names(&screen->info, &pc->base, block))
630 return 0;
631 }
632 info->name = block->group_names + index * block->group_name_stride;
633 info->num_queries = block->b->selectors;
634 info->max_active_queries = block->b->b->num_counters;
635 return 1;
636 }
637
si_destroy_perfcounters(struct si_screen * screen)638 void si_destroy_perfcounters(struct si_screen *screen)
639 {
640 struct si_perfcounters *pc = screen->perfcounters;
641
642 if (!pc)
643 return;
644
645 ac_destroy_perfcounters(&pc->base);
646 FREE(pc);
647 screen->perfcounters = NULL;
648 }
649
si_init_perfcounters(struct si_screen * screen)650 void si_init_perfcounters(struct si_screen *screen)
651 {
652 bool separate_se, separate_instance;
653
654 separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
655 separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
656
657 screen->perfcounters = CALLOC_STRUCT(si_perfcounters);
658 if (!screen->perfcounters)
659 return;
660
661 screen->perfcounters->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(screen);
662 screen->perfcounters->num_instance_cs_dwords = 3;
663
664 if (!ac_init_perfcounters(&screen->info, separate_se, separate_instance,
665 &screen->perfcounters->base)) {
666 si_destroy_perfcounters(screen);
667 }
668 }
669