1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "ac_gpu_info.h"
26 #include "ac_perfcounter.h"
27
28 #include "util/u_memory.h"
29 #include "macros.h"
30
31 /* cik_CB */
32 static unsigned cik_CB_select0[] = {
33 R_037004_CB_PERFCOUNTER0_SELECT,
34 R_03700C_CB_PERFCOUNTER1_SELECT,
35 R_037010_CB_PERFCOUNTER2_SELECT,
36 R_037014_CB_PERFCOUNTER3_SELECT,
37 };
38 static unsigned cik_CB_select1[] = {
39 R_037008_CB_PERFCOUNTER0_SELECT1,
40 };
41 static struct ac_pc_block_base cik_CB = {
42 .gpu_block = CB,
43 .name = "CB",
44 .num_counters = 4,
45 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
46
47 .select0 = cik_CB_select0,
48 .select1 = cik_CB_select1,
49 .counter0_lo = R_035018_CB_PERFCOUNTER0_LO,
50
51 .num_spm_counters = 1,
52 .num_spm_wires = 2,
53 .spm_block_select = 0x0,
54 };
55
56 /* cik_CPC */
57 static unsigned cik_CPC_select0[] = {
58 R_036024_CPC_PERFCOUNTER0_SELECT,
59 R_03600C_CPC_PERFCOUNTER1_SELECT,
60 };
61 static unsigned cik_CPC_select1[] = {
62 R_036010_CPC_PERFCOUNTER0_SELECT1,
63 };
64 static unsigned cik_CPC_counters[] = {
65 R_034018_CPC_PERFCOUNTER0_LO,
66 R_034010_CPC_PERFCOUNTER1_LO,
67 };
68 static struct ac_pc_block_base cik_CPC = {
69 .gpu_block = CPC,
70 .name = "CPC",
71 .num_counters = 2,
72
73 .select0 = cik_CPC_select0,
74 .select1 = cik_CPC_select1,
75 .counters = cik_CPC_counters,
76
77 .num_spm_counters = 1,
78 .num_spm_wires = 2,
79 .spm_block_select = 0x1,
80 };
81
82 /* cik_CPF */
83 static unsigned cik_CPF_select0[] = {
84 R_03601C_CPF_PERFCOUNTER0_SELECT,
85 R_036014_CPF_PERFCOUNTER1_SELECT,
86 };
87 static unsigned cik_CPF_select1[] = {
88 R_036018_CPF_PERFCOUNTER0_SELECT1,
89 };
90 static unsigned cik_CPF_counters[] = {
91 R_034028_CPF_PERFCOUNTER0_LO,
92 R_034020_CPF_PERFCOUNTER1_LO,
93 };
94 static struct ac_pc_block_base cik_CPF = {
95 .gpu_block = CPF,
96 .name = "CPF",
97 .num_counters = 2,
98
99 .select0 = cik_CPF_select0,
100 .select1 = cik_CPF_select1,
101 .counters = cik_CPF_counters,
102
103 .num_spm_counters = 1,
104 .num_spm_wires = 2,
105 .spm_block_select = 0x2,
106 };
107
108 /* cik_CPG */
109 static unsigned cik_CPG_select0[] = {
110 R_036008_CPG_PERFCOUNTER0_SELECT,
111 R_036000_CPG_PERFCOUNTER1_SELECT,
112 };
113 static unsigned cik_CPG_select1[] = {
114 R_036004_CPG_PERFCOUNTER0_SELECT1
115 };
116 static unsigned cik_CPG_counters[] = {
117 R_034008_CPG_PERFCOUNTER0_LO,
118 R_034000_CPG_PERFCOUNTER1_LO,
119 };
120 static struct ac_pc_block_base cik_CPG = {
121 .gpu_block = CPG,
122 .name = "CPG",
123 .num_counters = 2,
124
125 .select0 = cik_CPG_select0,
126 .select1 = cik_CPG_select1,
127 .counters = cik_CPG_counters,
128
129 .num_spm_counters = 1,
130 .num_spm_wires = 2,
131 .spm_block_select = 0x0,
132 };
133
134 /* cik_DB */
135 static unsigned cik_DB_select0[] = {
136 R_037100_DB_PERFCOUNTER0_SELECT,
137 R_037108_DB_PERFCOUNTER1_SELECT,
138 R_037110_DB_PERFCOUNTER2_SELECT,
139 R_037118_DB_PERFCOUNTER3_SELECT,
140 };
141 static unsigned cik_DB_select1[] = {
142 R_037104_DB_PERFCOUNTER0_SELECT1,
143 R_03710C_DB_PERFCOUNTER1_SELECT1,
144 };
145 static struct ac_pc_block_base cik_DB = {
146 .gpu_block = DB,
147 .name = "DB",
148 .num_counters = 4,
149 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
150
151 .select0 = cik_DB_select0,
152 .select1 = cik_DB_select1,
153 .counter0_lo = R_035100_DB_PERFCOUNTER0_LO,
154
155 .num_spm_counters = 2,
156 .num_spm_wires = 3,
157 .spm_block_select = 0x1,
158 };
159
160 /* cik_GDS */
161 static unsigned cik_GDS_select0[] = {
162 R_036A00_GDS_PERFCOUNTER0_SELECT,
163 R_036A04_GDS_PERFCOUNTER1_SELECT,
164 R_036A08_GDS_PERFCOUNTER2_SELECT,
165 R_036A0C_GDS_PERFCOUNTER3_SELECT,
166 };
167 static unsigned cik_GDS_select1[] = {
168 R_036A10_GDS_PERFCOUNTER0_SELECT1,
169 };
170 static struct ac_pc_block_base cik_GDS = {
171 .gpu_block = GDS,
172 .name = "GDS",
173 .num_counters = 4,
174
175 .select0 = cik_GDS_select0,
176 .select1 = cik_GDS_select1,
177 .counter0_lo = R_034A00_GDS_PERFCOUNTER0_LO,
178
179 .num_spm_counters = 1,
180 .num_spm_wires = 2,
181 .spm_block_select = 0x3,
182 };
183
184 /* cik_GRBM */
185 static unsigned cik_GRBM_select0[] = {
186 R_036100_GRBM_PERFCOUNTER0_SELECT,
187 R_036104_GRBM_PERFCOUNTER1_SELECT,
188 };
189 static unsigned cik_GRBM_counters[] = {
190 R_034100_GRBM_PERFCOUNTER0_LO,
191 R_03410C_GRBM_PERFCOUNTER1_LO,
192 };
193 static struct ac_pc_block_base cik_GRBM = {
194 .gpu_block = GRBM,
195 .name = "GRBM",
196 .num_counters = 2,
197
198 .select0 = cik_GRBM_select0,
199 .counters = cik_GRBM_counters,
200 };
201
202 /* cik_GRBMSE */
203 static unsigned cik_GRBMSE_select0[] = {
204 R_036108_GRBM_SE0_PERFCOUNTER_SELECT,
205 R_03610C_GRBM_SE1_PERFCOUNTER_SELECT,
206 R_036110_GRBM_SE2_PERFCOUNTER_SELECT,
207 R_036114_GRBM_SE3_PERFCOUNTER_SELECT,
208 };
209 static struct ac_pc_block_base cik_GRBMSE = {
210 .gpu_block = GRBMSE,
211 .name = "GRBMSE",
212 .num_counters = 4,
213
214 .select0 = cik_GRBMSE_select0,
215 .counter0_lo = R_034114_GRBM_SE0_PERFCOUNTER_LO,
216 };
217
218 /* cik_IA */
219 static unsigned cik_IA_select0[] = {
220 R_036210_IA_PERFCOUNTER0_SELECT,
221 R_036214_IA_PERFCOUNTER1_SELECT,
222 R_036218_IA_PERFCOUNTER2_SELECT,
223 R_03621C_IA_PERFCOUNTER3_SELECT,
224 };
225 static unsigned cik_IA_select1[] = {
226 R_036220_IA_PERFCOUNTER0_SELECT1,
227 };
228 static struct ac_pc_block_base cik_IA = {
229 .gpu_block = IA,
230 .name = "IA",
231 .num_counters = 4,
232
233 .select0 = cik_IA_select0,
234 .select1 = cik_IA_select1,
235 .counter0_lo = R_034220_IA_PERFCOUNTER0_LO,
236
237 .num_spm_counters = 1,
238 .num_spm_wires = 2,
239 .spm_block_select = 0x6,
240 };
241
242 /* cik_PA_SC */
243 static unsigned cik_PA_SC_select0[] = {
244 R_036500_PA_SC_PERFCOUNTER0_SELECT,
245 R_036508_PA_SC_PERFCOUNTER1_SELECT,
246 R_03650C_PA_SC_PERFCOUNTER2_SELECT,
247 R_036510_PA_SC_PERFCOUNTER3_SELECT,
248 R_036514_PA_SC_PERFCOUNTER4_SELECT,
249 R_036518_PA_SC_PERFCOUNTER5_SELECT,
250 R_03651C_PA_SC_PERFCOUNTER6_SELECT,
251 R_036520_PA_SC_PERFCOUNTER7_SELECT,
252 };
253 static unsigned cik_PA_SC_select1[] = {
254 R_036504_PA_SC_PERFCOUNTER0_SELECT1,
255 };
256 static struct ac_pc_block_base cik_PA_SC = {
257 .gpu_block = PA_SC,
258 .name = "PA_SC",
259 .num_counters = 8,
260 .flags = AC_PC_BLOCK_SE,
261
262 .select0 = cik_PA_SC_select0,
263 .select1 = cik_PA_SC_select1,
264 .counter0_lo = R_034500_PA_SC_PERFCOUNTER0_LO,
265
266 .num_spm_counters = 1,
267 .num_spm_wires = 2,
268 .spm_block_select = 0x4,
269 };
270
271 /* cik_PA_SU */
272 static unsigned cik_PA_SU_select0[] = {
273 R_036400_PA_SU_PERFCOUNTER0_SELECT,
274 R_036408_PA_SU_PERFCOUNTER1_SELECT,
275 R_036410_PA_SU_PERFCOUNTER2_SELECT,
276 R_036414_PA_SU_PERFCOUNTER3_SELECT,
277 };
278 static unsigned cik_PA_SU_select1[] = {
279 R_036404_PA_SU_PERFCOUNTER0_SELECT1,
280 R_03640C_PA_SU_PERFCOUNTER1_SELECT1,
281 };
282 /* According to docs, PA_SU counters are only 48 bits wide. */
283 static struct ac_pc_block_base cik_PA_SU = {
284 .gpu_block = PA_SU,
285 .name = "PA_SU",
286 .num_counters = 4,
287 .flags = AC_PC_BLOCK_SE,
288
289 .select0 = cik_PA_SU_select0,
290 .select1 = cik_PA_SU_select1,
291 .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO,
292
293 .num_spm_counters = 2,
294 .num_spm_wires = 3,
295 .spm_block_select = 0x2,
296 };
297
298 /* cik_SPI */
299 static unsigned cik_SPI_select0[] = {
300 R_036600_SPI_PERFCOUNTER0_SELECT,
301 R_036604_SPI_PERFCOUNTER1_SELECT,
302 R_036608_SPI_PERFCOUNTER2_SELECT,
303 R_03660C_SPI_PERFCOUNTER3_SELECT,
304 R_036620_SPI_PERFCOUNTER4_SELECT,
305 R_036624_SPI_PERFCOUNTER5_SELECT,
306 };
307 static unsigned cik_SPI_select1[] = {
308 R_036610_SPI_PERFCOUNTER0_SELECT1,
309 R_036614_SPI_PERFCOUNTER1_SELECT1,
310 R_036618_SPI_PERFCOUNTER2_SELECT1,
311 R_03661C_SPI_PERFCOUNTER3_SELECT1
312 };
313 static struct ac_pc_block_base cik_SPI = {
314 .gpu_block = SPI,
315 .name = "SPI",
316 .num_counters = 6,
317 .flags = AC_PC_BLOCK_SE,
318
319 .select0 = cik_SPI_select0,
320 .select1 = cik_SPI_select1,
321 .counter0_lo = R_034604_SPI_PERFCOUNTER0_LO,
322
323 .num_spm_counters = 4,
324 .num_spm_wires = 8,
325 .spm_block_select = 0x8,
326 };
327
328 /* cik_SQ */
329 static unsigned cik_SQ_select0[] = {
330 R_036700_SQ_PERFCOUNTER0_SELECT,
331 R_036704_SQ_PERFCOUNTER1_SELECT,
332 R_036708_SQ_PERFCOUNTER2_SELECT,
333 R_03670C_SQ_PERFCOUNTER3_SELECT,
334 R_036710_SQ_PERFCOUNTER4_SELECT,
335 R_036714_SQ_PERFCOUNTER5_SELECT,
336 R_036718_SQ_PERFCOUNTER6_SELECT,
337 R_03671C_SQ_PERFCOUNTER7_SELECT,
338 R_036720_SQ_PERFCOUNTER8_SELECT,
339 R_036724_SQ_PERFCOUNTER9_SELECT,
340 R_036728_SQ_PERFCOUNTER10_SELECT,
341 R_03672C_SQ_PERFCOUNTER11_SELECT,
342 R_036730_SQ_PERFCOUNTER12_SELECT,
343 R_036734_SQ_PERFCOUNTER13_SELECT,
344 R_036738_SQ_PERFCOUNTER14_SELECT,
345 R_03673C_SQ_PERFCOUNTER15_SELECT,
346 };
347 static struct ac_pc_block_base cik_SQ = {
348 .gpu_block = SQ,
349 .name = "SQ",
350 .num_counters = 16,
351 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER,
352
353 .select0 = cik_SQ_select0,
354 .select_or = S_036700_SQC_BANK_MASK(15) | S_036700_SQC_CLIENT_MASK(15) | S_036700_SIMD_MASK(15),
355 .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO,
356
357 .num_spm_wires = 8,
358 .spm_block_select = 0x9,
359 };
360
361 /* cik_SX */
362 static unsigned cik_SX_select0[] = {
363 R_036900_SX_PERFCOUNTER0_SELECT,
364 R_036904_SX_PERFCOUNTER1_SELECT,
365 R_036908_SX_PERFCOUNTER2_SELECT,
366 R_03690C_SX_PERFCOUNTER3_SELECT,
367 };
368 static unsigned cik_SX_select1[] = {
369 R_036910_SX_PERFCOUNTER0_SELECT1,
370 R_036914_SX_PERFCOUNTER1_SELECT1,
371 };
372 static struct ac_pc_block_base cik_SX = {
373 .gpu_block = SX,
374 .name = "SX",
375 .num_counters = 4,
376 .flags = AC_PC_BLOCK_SE,
377
378 .select0 = cik_SX_select0,
379 .select1 = cik_SX_select1,
380 .counter0_lo = R_034900_SX_PERFCOUNTER0_LO,
381
382 .num_spm_counters = 2,
383 .num_spm_wires = 4,
384 .spm_block_select = 0x3,
385 };
386
387 /* cik_TA */
388 static unsigned cik_TA_select0[] = {
389 R_036B00_TA_PERFCOUNTER0_SELECT,
390 R_036B08_TA_PERFCOUNTER1_SELECT,
391 };
392 static unsigned cik_TA_select1[] = {
393 R_036B04_TA_PERFCOUNTER0_SELECT1,
394 };
395 static struct ac_pc_block_base cik_TA = {
396 .gpu_block = TA,
397 .name = "TA",
398 .num_counters = 2,
399 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
400
401 .select0 = cik_TA_select0,
402 .select1 = cik_TA_select1,
403 .counter0_lo = R_034B00_TA_PERFCOUNTER0_LO,
404
405 .num_spm_counters = 1,
406 .num_spm_wires = 2,
407 .spm_block_select = 0x5,
408 };
409
410 /* cik_TD */
411 static unsigned cik_TD_select0[] = {
412 R_036C00_TD_PERFCOUNTER0_SELECT,
413 R_036C08_TD_PERFCOUNTER1_SELECT,
414 };
415 static unsigned cik_TD_select1[] = {
416 R_036C04_TD_PERFCOUNTER0_SELECT1,
417 };
418 static struct ac_pc_block_base cik_TD = {
419 .gpu_block = TD,
420 .name = "TD",
421 .num_counters = 2,
422 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
423
424 .select0 = cik_TD_select0,
425 .select1 = cik_TD_select1,
426 .counter0_lo = R_034C00_TD_PERFCOUNTER0_LO,
427
428 .num_spm_counters = 1,
429 .num_spm_wires = 2,
430 .spm_block_select = 0x6,
431 };
432
433 /* cik_TCA */
434 static unsigned cik_TCA_select0[] = {
435 R_036E40_TCA_PERFCOUNTER0_SELECT,
436 R_036E48_TCA_PERFCOUNTER1_SELECT,
437 R_036E50_TCA_PERFCOUNTER2_SELECT,
438 R_036E54_TCA_PERFCOUNTER3_SELECT,
439 };
440 static unsigned cik_TCA_select1[] = {
441 R_036E44_TCA_PERFCOUNTER0_SELECT1,
442 R_036E4C_TCA_PERFCOUNTER1_SELECT1,
443 };
444 static struct ac_pc_block_base cik_TCA = {
445 .gpu_block = TCA,
446 .name = "TCA",
447 .num_counters = 4,
448 .flags = AC_PC_BLOCK_INSTANCE_GROUPS,
449
450 .select0 = cik_TCA_select0,
451 .select1 = cik_TCA_select1,
452 .counter0_lo = R_034E40_TCA_PERFCOUNTER0_LO,
453
454 .num_spm_counters = 2,
455 .num_spm_wires = 4,
456 .spm_block_select = 0x5,
457 };
458
459 /* cik_TCC */
460 static unsigned cik_TCC_select0[] = {
461 R_036E00_TCC_PERFCOUNTER0_SELECT,
462 R_036E08_TCC_PERFCOUNTER1_SELECT,
463 R_036E10_TCC_PERFCOUNTER2_SELECT,
464 R_036E14_TCC_PERFCOUNTER3_SELECT,
465 };
466 static unsigned cik_TCC_select1[] = {
467 R_036E04_TCC_PERFCOUNTER0_SELECT1,
468 R_036E0C_TCC_PERFCOUNTER1_SELECT1,
469 };
470 static struct ac_pc_block_base cik_TCC = {
471 .gpu_block = TCC,
472 .name = "TCC",
473 .num_counters = 4,
474 .flags = AC_PC_BLOCK_INSTANCE_GROUPS,
475
476 .select0 = cik_TCC_select0,
477 .select1 = cik_TCC_select1,
478 .counter0_lo = R_034E00_TCC_PERFCOUNTER0_LO,
479
480 .num_spm_counters = 2,
481 .num_spm_wires = 4,
482 .spm_block_select = 0x4,
483 };
484
485 /* cik_TCP */
486 static unsigned cik_TCP_select0[] = {
487 R_036D00_TCP_PERFCOUNTER0_SELECT,
488 R_036D08_TCP_PERFCOUNTER1_SELECT,
489 R_036D10_TCP_PERFCOUNTER2_SELECT,
490 R_036D14_TCP_PERFCOUNTER3_SELECT,
491 };
492 static unsigned cik_TCP_select1[] = {
493 R_036D04_TCP_PERFCOUNTER0_SELECT1,
494 R_036D0C_TCP_PERFCOUNTER1_SELECT1,
495 };
496 static struct ac_pc_block_base cik_TCP = {
497 .gpu_block = TCP,
498 .name = "TCP",
499 .num_counters = 4,
500 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
501
502 .select0 = cik_TCP_select0,
503 .select1 = cik_TCP_select1,
504 .counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO,
505
506 .num_spm_counters = 2,
507 .num_spm_wires = 3,
508 .spm_block_select = 0x7,
509 };
510
511 /* cik_VGT */
512 static unsigned cik_VGT_select0[] = {
513 R_036230_VGT_PERFCOUNTER0_SELECT,
514 R_036234_VGT_PERFCOUNTER1_SELECT,
515 R_036238_VGT_PERFCOUNTER2_SELECT,
516 R_03623C_VGT_PERFCOUNTER3_SELECT,
517 };
518 static unsigned cik_VGT_select1[] = {
519 R_036240_VGT_PERFCOUNTER0_SELECT1,
520 R_036244_VGT_PERFCOUNTER1_SELECT1,
521 };
522 static struct ac_pc_block_base cik_VGT = {
523 .gpu_block = VGT,
524 .name = "VGT",
525 .num_counters = 4,
526 .flags = AC_PC_BLOCK_SE,
527
528 .select0 = cik_VGT_select0,
529 .select1 = cik_VGT_select1,
530 .counter0_lo = R_034240_VGT_PERFCOUNTER0_LO,
531
532 .num_spm_counters = 2,
533 .num_spm_wires = 3,
534 .spm_block_select = 0xa,
535 };
536
537 /* cik_WD */
538 static unsigned cik_WD_select0[] = {
539 R_036200_WD_PERFCOUNTER0_SELECT,
540 R_036204_WD_PERFCOUNTER1_SELECT,
541 R_036208_WD_PERFCOUNTER2_SELECT,
542 R_03620C_WD_PERFCOUNTER3_SELECT,
543 };
544 static struct ac_pc_block_base cik_WD = {
545 .gpu_block = WD,
546 .name = "WD",
547 .num_counters = 4,
548
549 .select0 = cik_WD_select0,
550 .counter0_lo = R_034200_WD_PERFCOUNTER0_LO,
551 };
552
553 /* cik_MC */
554 static struct ac_pc_block_base cik_MC = {
555 .gpu_block = MC,
556 .name = "MC",
557 .num_counters = 4,
558 };
559
560 /* cik_SRBM */
561 static struct ac_pc_block_base cik_SRBM = {
562 .gpu_block = SRBM,
563 .name = "SRBM",
564 .num_counters = 2,
565 };
566
567 /* gfx10_CHA */
568 static unsigned gfx10_CHA_select0[] = {
569 R_037780_CHA_PERFCOUNTER0_SELECT,
570 R_037788_CHA_PERFCOUNTER1_SELECT,
571 R_03778C_CHA_PERFCOUNTER2_SELECT,
572 R_037790_CHA_PERFCOUNTER3_SELECT,
573 };
574 static unsigned gfx10_CHA_select1[] = {
575 R_037784_CHA_PERFCOUNTER0_SELECT1,
576 };
577 static struct ac_pc_block_base gfx10_CHA = {
578 .gpu_block = CHA,
579 .name = "CHA",
580 .num_counters = 4,
581
582 .select0 = gfx10_CHA_select0,
583 .select1 = gfx10_CHA_select1,
584 .counter0_lo = R_035800_CHA_PERFCOUNTER0_LO,
585
586 .num_spm_counters = 1,
587 .num_spm_wires = 2,
588 .spm_block_select = 0xc,
589 };
590
591 /* gfx10_CHCG */
592 static unsigned gfx10_CHCG_select0[] = {
593 R_036F18_CHCG_PERFCOUNTER0_SELECT,
594 R_036F20_CHCG_PERFCOUNTER1_SELECT,
595 R_036F24_CHCG_PERFCOUNTER2_SELECT,
596 R_036F28_CHCG_PERFCOUNTER3_SELECT,
597 };
598 static unsigned gfx10_CHCG_select1[] = {
599 R_036F1C_CHCG_PERFCOUNTER0_SELECT1,
600 };
601 static struct ac_pc_block_base gfx10_CHCG = {
602 .gpu_block = CHCG,
603 .name = "CHCG",
604 .num_counters = 4,
605
606 .select0 = gfx10_CHCG_select0,
607 .select1 = gfx10_CHCG_select1,
608 .counter0_lo = R_034F20_CHCG_PERFCOUNTER0_LO,
609
610 .num_spm_counters = 1,
611 .num_spm_wires = 2,
612 .spm_block_select = 0xe,
613 };
614
615 /* gfx10_CHC */
616 static unsigned gfx10_CHC_select0[] = {
617 R_036F00_CHC_PERFCOUNTER0_SELECT,
618 R_036F08_CHC_PERFCOUNTER1_SELECT,
619 R_036F0C_CHC_PERFCOUNTER2_SELECT,
620 R_036F10_CHC_PERFCOUNTER3_SELECT,
621 };
622 static unsigned gfx10_CHC_select1[] = {
623 R_036F04_CHC_PERFCOUNTER0_SELECT1,
624 };
625 static struct ac_pc_block_base gfx10_CHC = {
626 .gpu_block = CHC,
627 .name = "CHC",
628 .num_counters = 4,
629
630 .select0 = gfx10_CHC_select0,
631 .select1 = gfx10_CHC_select1,
632 .counter0_lo = R_034F00_CHC_PERFCOUNTER0_LO,
633
634 .num_spm_counters = 1,
635 .num_spm_wires = 2,
636 .spm_block_select = 0xd,
637 };
638
639 /* gfx10_DB */
640 static struct ac_pc_block_base gfx10_DB = {
641 .gpu_block = DB,
642 .name = "DB",
643 .num_counters = 4,
644 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
645
646 .select0 = cik_DB_select0,
647 .select1 = cik_DB_select1,
648 .counter0_lo = R_035100_DB_PERFCOUNTER0_LO,
649
650 .num_spm_counters = 2,
651 .num_spm_wires = 4,
652 .spm_block_select = 0x1,
653 };
654
655 /* gfx10_GCR */
656 static unsigned gfx10_GCR_select0[] = {
657 R_037580_GCR_PERFCOUNTER0_SELECT,
658 R_037588_GCR_PERFCOUNTER1_SELECT,
659 };
660 static unsigned gfx10_GCR_select1[] = {
661 R_037584_GCR_PERFCOUNTER0_SELECT1,
662 };
663 static struct ac_pc_block_base gfx10_GCR = {
664 .gpu_block = GCR,
665 .name = "GCR",
666 .num_counters = 2,
667
668 .select0 = gfx10_GCR_select0,
669 .select1 = gfx10_GCR_select1,
670 .counter0_lo = R_035480_GCR_PERFCOUNTER0_LO,
671
672 .num_spm_counters = 1,
673 .num_spm_wires = 2,
674 .spm_block_select = 0x4,
675 };
676
677 /* gfx10_GE */
678 static unsigned gfx10_GE_select0[] = {
679 R_036200_GE_PERFCOUNTER0_SELECT,
680 R_036208_GE_PERFCOUNTER1_SELECT,
681 R_036210_GE_PERFCOUNTER2_SELECT,
682 R_036218_GE_PERFCOUNTER3_SELECT,
683 R_036220_GE_PERFCOUNTER4_SELECT,
684 R_036228_GE_PERFCOUNTER5_SELECT,
685 R_036230_GE_PERFCOUNTER6_SELECT,
686 R_036238_GE_PERFCOUNTER7_SELECT,
687 R_036240_GE_PERFCOUNTER8_SELECT,
688 R_036248_GE_PERFCOUNTER9_SELECT,
689 R_036250_GE_PERFCOUNTER10_SELECT,
690 R_036258_GE_PERFCOUNTER11_SELECT,
691 };
692 static unsigned gfx10_GE_select1[] = {
693 R_036204_GE_PERFCOUNTER0_SELECT1,
694 R_03620C_GE_PERFCOUNTER1_SELECT1,
695 R_036214_GE_PERFCOUNTER2_SELECT1,
696 R_03621C_GE_PERFCOUNTER3_SELECT1,
697 };
698 static struct ac_pc_block_base gfx10_GE = {
699 .gpu_block = GE,
700 .name = "GE",
701 .num_counters = 12,
702
703 .select0 = gfx10_GE_select0,
704 .select1 = gfx10_GE_select1,
705 .counter0_lo = R_034200_GE_PERFCOUNTER0_LO,
706
707 .num_spm_counters = 4,
708 .num_spm_wires = 8,
709 .spm_block_select = 0x6,
710 };
711
712 /* gfx10_GL1A */
713 static unsigned gfx10_GL1A_select0[] = {
714 R_037700_GL1A_PERFCOUNTER0_SELECT,
715 R_037708_GL1A_PERFCOUNTER1_SELECT,
716 R_03770C_GL1A_PERFCOUNTER2_SELECT,
717 R_037710_GL1A_PERFCOUNTER3_SELECT,
718 };
719 static unsigned gfx10_GL1A_select1[] = {
720 R_037704_GL1A_PERFCOUNTER0_SELECT1,
721 };
722 static struct ac_pc_block_base gfx10_GL1A = {
723 .gpu_block = GL1A,
724 .name = "GL1A",
725 .num_counters = 4,
726 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,
727
728 .select0 = gfx10_GL1A_select0,
729 .select1 = gfx10_GL1A_select1,
730 .counter0_lo = R_035700_GL1A_PERFCOUNTER0_LO,
731
732 .num_spm_counters = 1,
733 .num_spm_wires = 2,
734 .spm_block_select = 0xa,
735 };
736
737 /* gfx10_GL1C */
738 static unsigned gfx10_GL1C_select0[] = {
739 R_036E80_GL1C_PERFCOUNTER0_SELECT,
740 R_036E88_GL1C_PERFCOUNTER1_SELECT,
741 R_036E8C_GL1C_PERFCOUNTER2_SELECT,
742 R_036E90_GL1C_PERFCOUNTER3_SELECT,
743 };
744 static unsigned gfx10_GL1C_select1[] = {
745 R_036E84_GL1C_PERFCOUNTER0_SELECT1,
746 };
747 static struct ac_pc_block_base gfx10_GL1C = {
748 .gpu_block = GL1C,
749 .name = "GL1C",
750 .num_counters = 4,
751 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,
752
753 .select0 = gfx10_GL1C_select0,
754 .select1 = gfx10_GL1C_select1,
755 .counter0_lo = R_034E80_GL1C_PERFCOUNTER0_LO,
756
757 .num_spm_counters = 1,
758 .num_spm_wires = 2,
759 .spm_block_select = 0xc
760 };
761
762 /* gfx10_GL2A */
763 static unsigned gfx10_GL2A_select0[] = {
764 R_036E40_GL2A_PERFCOUNTER0_SELECT,
765 R_036E48_GL2A_PERFCOUNTER1_SELECT,
766 R_036E50_GL2A_PERFCOUNTER2_SELECT,
767 R_036E54_GL2A_PERFCOUNTER3_SELECT,
768 };
769 static unsigned gfx10_GL2A_select1[] = {
770 R_036E44_GL2A_PERFCOUNTER0_SELECT1,
771 R_036E4C_GL2A_PERFCOUNTER1_SELECT1,
772 };
773 static struct ac_pc_block_base gfx10_GL2A = {
774 .gpu_block = GL2A,
775 .name = "GL2A",
776 .num_counters = 4,
777
778 .select0 = gfx10_GL2A_select0,
779 .select1 = gfx10_GL2A_select1,
780 .counter0_lo = R_034E40_GL2A_PERFCOUNTER0_LO,
781
782 .num_spm_counters = 2,
783 .num_spm_wires = 4,
784 .spm_block_select = 0x7,
785 };
786
787 /* gfx10_GL2C */
788 static unsigned gfx10_GL2C_select0[] = {
789 R_036E00_GL2C_PERFCOUNTER0_SELECT,
790 R_036E08_GL2C_PERFCOUNTER1_SELECT,
791 R_036E10_GL2C_PERFCOUNTER2_SELECT,
792 R_036E14_GL2C_PERFCOUNTER3_SELECT,
793 };
794 static unsigned gfx10_GL2C_select1[] = {
795 R_036E04_GL2C_PERFCOUNTER0_SELECT1,
796 R_036E0C_GL2C_PERFCOUNTER1_SELECT1,
797 };
798 static struct ac_pc_block_base gfx10_GL2C = {
799 .gpu_block = GL2C,
800 .name = "GL2C",
801 .num_counters = 4,
802
803 .select0 = gfx10_GL2C_select0,
804 .select1 = gfx10_GL2C_select1,
805 .counter0_lo = R_034E00_GL2C_PERFCOUNTER0_LO,
806
807 .num_spm_counters = 2,
808 .num_spm_wires = 4,
809 .spm_block_select = 0x8,
810 };
811
812 /* gfx10_PA_PH */
813 static unsigned gfx10_PA_PH_select0[] = {
814 R_037600_PA_PH_PERFCOUNTER0_SELECT,
815 R_037608_PA_PH_PERFCOUNTER1_SELECT,
816 R_03760C_PA_PH_PERFCOUNTER2_SELECT,
817 R_037610_PA_PH_PERFCOUNTER3_SELECT,
818 R_037614_PA_PH_PERFCOUNTER4_SELECT,
819 R_037618_PA_PH_PERFCOUNTER5_SELECT,
820 R_03761C_PA_PH_PERFCOUNTER6_SELECT,
821 R_037620_PA_PH_PERFCOUNTER7_SELECT,
822 };
823 static unsigned gfx10_PA_PH_select1[] = {
824 R_037604_PA_PH_PERFCOUNTER0_SELECT1,
825 R_037640_PA_PH_PERFCOUNTER1_SELECT1,
826 R_037644_PA_PH_PERFCOUNTER2_SELECT1,
827 R_037648_PA_PH_PERFCOUNTER3_SELECT1,
828 };
829 static struct ac_pc_block_base gfx10_PA_PH = {
830 .gpu_block = PA_PH,
831 .name = "PA_PH",
832 .num_counters = 8,
833 .flags = AC_PC_BLOCK_SE,
834
835 .select0 = gfx10_PA_PH_select0,
836 .select1 = gfx10_PA_PH_select1,
837 .counter0_lo = R_035600_PA_PH_PERFCOUNTER0_LO,
838
839 .num_spm_counters = 4,
840 .num_spm_wires = 8,
841 .spm_block_select = 0x5,
842 };
843
844 /* gfx10_PA_SU */
845 static unsigned gfx10_PA_SU_select0[] = {
846 R_036400_PA_SU_PERFCOUNTER0_SELECT,
847 R_036408_PA_SU_PERFCOUNTER1_SELECT,
848 R_036410_PA_SU_PERFCOUNTER2_SELECT,
849 R_036418_PA_SU_PERFCOUNTER3_SELECT,
850 };
851 static unsigned gfx10_PA_SU_select1[] = {
852 R_036404_PA_SU_PERFCOUNTER0_SELECT1,
853 R_03640C_PA_SU_PERFCOUNTER1_SELECT1,
854 R_036414_PA_SU_PERFCOUNTER2_SELECT1,
855 R_03641C_PA_SU_PERFCOUNTER3_SELECT1,
856 };
857 static struct ac_pc_block_base gfx10_PA_SU = {
858 .gpu_block = PA_SU,
859 .name = "PA_SU",
860 .num_counters = 4,
861 .flags = AC_PC_BLOCK_SE,
862
863 .select0 = gfx10_PA_SU_select0,
864 .select1 = gfx10_PA_SU_select1,
865 .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO,
866
867 .num_spm_counters = 4,
868 .num_spm_wires = 8,
869 .spm_block_select = 0x2,
870 };
871
872 /* gfx10_RLC */
873 static unsigned gfx10_RLC_select0[] = {
874 R_037304_RLC_PERFCOUNTER0_SELECT,
875 R_037308_RLC_PERFCOUNTER1_SELECT,
876 };
877 static struct ac_pc_block_base gfx10_RLC = {
878 .gpu_block = RLC,
879 .name = "RLC",
880 .num_counters = 2,
881
882 .select0 = gfx10_RLC_select0,
883 .counter0_lo = R_035200_RLC_PERFCOUNTER0_LO,
884 .num_spm_counters = 0,
885 };
886
887 /* gfx10_RMI */
888 static unsigned gfx10_RMI_select0[] = {
889 R_037400_RMI_PERFCOUNTER0_SELECT,
890 R_037408_RMI_PERFCOUNTER1_SELECT,
891 R_03740C_RMI_PERFCOUNTER2_SELECT,
892 R_037414_RMI_PERFCOUNTER3_SELECT,
893 };
894 static unsigned gfx10_RMI_select1[] = {
895 R_037404_RMI_PERFCOUNTER0_SELECT1,
896 R_037410_RMI_PERFCOUNTER2_SELECT1,
897 };
898 static struct ac_pc_block_base gfx10_RMI = {
899 .gpu_block = RMI,
900 .name = "RMI",
901 .num_counters = 4,
902 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
903
904 .select0 = gfx10_RMI_select0,
905 .select1 = gfx10_RMI_select1,
906 .counter0_lo = R_035300_RMI_PERFCOUNTER0_LO,
907
908 .num_spm_counters = 2,
909 .num_spm_wires = 2,
910 .spm_block_select = 0xb,
911 };
912
913 /* gfx10_SQ */
914 static struct ac_pc_block_base gfx10_SQ = {
915 .gpu_block = SQ,
916 .name = "SQ",
917 .num_counters = 16,
918 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER,
919
920 .select0 = cik_SQ_select0,
921 .select_or = S_036700_SQC_BANK_MASK(15),
922 .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO,
923
924 .num_spm_wires = 16,
925 .spm_block_select = 0x9,
926 };
927
928 /* gfx10_TCP */
929 static struct ac_pc_block_base gfx10_TCP = {
930 .gpu_block = TCP,
931 .name = "TCP",
932 .num_counters = 4,
933 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
934
935 .select0 = cik_TCP_select0,
936 .select1 = cik_TCP_select1,
937 .counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO,
938
939 .num_spm_counters = 2,
940 .num_spm_wires = 4,
941 .spm_block_select = 0x7,
942 };
943
944 /* gfx10_UTCL1 */
945 static unsigned gfx10_UTCL1_select0[] = {
946 R_03758C_UTCL1_PERFCOUNTER0_SELECT,
947 R_037590_UTCL1_PERFCOUNTER1_SELECT,
948 };
949 static struct ac_pc_block_base gfx10_UTCL1 = {
950 .gpu_block = UTCL1,
951 .name = "UTCL1",
952 .num_counters = 2,
953 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,
954
955 .select0 = gfx10_UTCL1_select0,
956 .counter0_lo = R_035470_UTCL1_PERFCOUNTER0_LO,
957 .num_spm_counters = 0,
958 };
959
960 /* Both the number of instances and selectors varies between chips of the same
961 * class. We only differentiate by class here and simply expose the maximum
962 * number over all chips in a class.
963 *
964 * Unfortunately, GPUPerfStudio uses the order of performance counter groups
965 * blindly once it believes it has identified the hardware, so the order of
966 * blocks here matters.
967 */
968 static struct ac_pc_block_gfxdescr groups_CIK[] = {
969 {&cik_CB, 226}, {&cik_CPF, 17}, {&cik_DB, 257}, {&cik_GRBM, 34}, {&cik_GRBMSE, 15},
970 {&cik_PA_SU, 153}, {&cik_PA_SC, 395}, {&cik_SPI, 186}, {&cik_SQ, 252}, {&cik_SX, 32},
971 {&cik_TA, 111}, {&cik_TCA, 39, 2}, {&cik_TCC, 160}, {&cik_TD, 55}, {&cik_TCP, 154},
972 {&cik_GDS, 121}, {&cik_VGT, 140}, {&cik_IA, 22}, {&cik_MC, 22}, {&cik_SRBM, 19},
973 {&cik_WD, 22}, {&cik_CPG, 46}, {&cik_CPC, 22},
974
975 };
976
977 static struct ac_pc_block_gfxdescr groups_VI[] = {
978 {&cik_CB, 405}, {&cik_CPF, 19}, {&cik_DB, 257}, {&cik_GRBM, 34}, {&cik_GRBMSE, 15},
979 {&cik_PA_SU, 154}, {&cik_PA_SC, 397}, {&cik_SPI, 197}, {&cik_SQ, 273}, {&cik_SX, 34},
980 {&cik_TA, 119}, {&cik_TCA, 35, 2}, {&cik_TCC, 192}, {&cik_TD, 55}, {&cik_TCP, 180},
981 {&cik_GDS, 121}, {&cik_VGT, 147}, {&cik_IA, 24}, {&cik_MC, 22}, {&cik_SRBM, 27},
982 {&cik_WD, 37}, {&cik_CPG, 48}, {&cik_CPC, 24},
983
984 };
985
986 static struct ac_pc_block_gfxdescr groups_gfx9[] = {
987 {&cik_CB, 438}, {&cik_CPF, 32}, {&cik_DB, 328}, {&cik_GRBM, 38}, {&cik_GRBMSE, 16},
988 {&cik_PA_SU, 292}, {&cik_PA_SC, 491}, {&cik_SPI, 196}, {&cik_SQ, 374}, {&cik_SX, 208},
989 {&cik_TA, 119}, {&cik_TCA, 35, 2}, {&cik_TCC, 256}, {&cik_TD, 57}, {&cik_TCP, 85},
990 {&cik_GDS, 121}, {&cik_VGT, 148}, {&cik_IA, 32}, {&cik_WD, 58}, {&cik_CPG, 59},
991 {&cik_CPC, 35},
992 };
993
994 static struct ac_pc_block_gfxdescr groups_gfx10[] = {
995 {&cik_CB, 461},
996 {&gfx10_CHA, 45},
997 {&gfx10_CHCG, 35},
998 {&gfx10_CHC, 35},
999 {&cik_CPC, 47},
1000 {&cik_CPF, 40},
1001 {&cik_CPG, 82},
1002 {&gfx10_DB, 370},
1003 {&gfx10_GCR, 94},
1004 {&cik_GDS, 123},
1005 {&gfx10_GE, 315},
1006 {&gfx10_GL1A, 36},
1007 {&gfx10_GL1C, 64},
1008 {&gfx10_GL2A, 91},
1009 {&gfx10_GL2C, 235},
1010 {&cik_GRBM, 47},
1011 {&cik_GRBMSE, 19},
1012 {&gfx10_PA_PH, 960},
1013 {&cik_PA_SC, 552},
1014 {&gfx10_PA_SU, 266},
1015 {&gfx10_RLC, 7},
1016 {&gfx10_RMI, 258},
1017 {&cik_SPI, 329},
1018 {&gfx10_SQ, 509},
1019 {&cik_SX, 225},
1020 {&cik_TA, 226},
1021 {&gfx10_TCP, 77},
1022 {&cik_TD, 61},
1023 {&gfx10_UTCL1, 15},
1024 };
1025
ac_lookup_counter(const struct ac_perfcounters * pc,unsigned index,unsigned * base_gid,unsigned * sub_index)1026 struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,
1027 unsigned index, unsigned *base_gid,
1028 unsigned *sub_index)
1029 {
1030 struct ac_pc_block *block = pc->blocks;
1031 unsigned bid;
1032
1033 *base_gid = 0;
1034 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
1035 unsigned total = block->num_groups * block->b->selectors;
1036
1037 if (index < total) {
1038 *sub_index = index;
1039 return block;
1040 }
1041
1042 index -= total;
1043 *base_gid += block->num_groups;
1044 }
1045
1046 return NULL;
1047 }
1048
ac_lookup_group(const struct ac_perfcounters * pc,unsigned * index)1049 struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc,
1050 unsigned *index)
1051 {
1052 unsigned bid;
1053 struct ac_pc_block *block = pc->blocks;
1054
1055 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
1056 if (*index < block->num_groups)
1057 return block;
1058 *index -= block->num_groups;
1059 }
1060
1061 return NULL;
1062 }
1063
ac_init_block_names(const struct radeon_info * info,const struct ac_perfcounters * pc,struct ac_pc_block * block)1064 bool ac_init_block_names(const struct radeon_info *info,
1065 const struct ac_perfcounters *pc,
1066 struct ac_pc_block *block)
1067 {
1068 bool per_instance_groups = ac_pc_block_has_per_instance_groups(pc, block);
1069 bool per_se_groups = ac_pc_block_has_per_se_groups(pc, block);
1070 unsigned i, j, k;
1071 unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
1072 unsigned namelen;
1073 char *groupname;
1074 char *p;
1075
1076 if (per_instance_groups)
1077 groups_instance = block->num_instances;
1078 if (per_se_groups)
1079 groups_se = info->max_se;
1080 if (block->b->b->flags & AC_PC_BLOCK_SHADER)
1081 groups_shader = ARRAY_SIZE(ac_pc_shader_type_bits);
1082
1083 namelen = strlen(block->b->b->name);
1084 block->group_name_stride = namelen + 1;
1085 if (block->b->b->flags & AC_PC_BLOCK_SHADER)
1086 block->group_name_stride += 3;
1087 if (per_se_groups) {
1088 assert(groups_se <= 10);
1089 block->group_name_stride += 1;
1090
1091 if (per_instance_groups)
1092 block->group_name_stride += 1;
1093 }
1094 if (per_instance_groups) {
1095 assert(groups_instance <= 100);
1096 block->group_name_stride += 2;
1097 }
1098
1099 block->group_names = MALLOC(block->num_groups * block->group_name_stride);
1100 if (!block->group_names)
1101 return false;
1102
1103 groupname = block->group_names;
1104 for (i = 0; i < groups_shader; ++i) {
1105 const char *shader_suffix = ac_pc_shader_type_suffixes[i];
1106 unsigned shaderlen = strlen(shader_suffix);
1107 for (j = 0; j < groups_se; ++j) {
1108 for (k = 0; k < groups_instance; ++k) {
1109 strcpy(groupname, block->b->b->name);
1110 p = groupname + namelen;
1111
1112 if (block->b->b->flags & AC_PC_BLOCK_SHADER) {
1113 strcpy(p, shader_suffix);
1114 p += shaderlen;
1115 }
1116
1117 if (per_se_groups) {
1118 p += sprintf(p, "%d", j);
1119 if (per_instance_groups)
1120 *p++ = '_';
1121 }
1122
1123 if (per_instance_groups)
1124 p += sprintf(p, "%d", k);
1125
1126 groupname += block->group_name_stride;
1127 }
1128 }
1129 }
1130
1131 assert(block->b->selectors <= 1000);
1132 block->selector_name_stride = block->group_name_stride + 4;
1133 block->selector_names =
1134 MALLOC(block->num_groups * block->b->selectors * block->selector_name_stride);
1135 if (!block->selector_names)
1136 return false;
1137
1138 groupname = block->group_names;
1139 p = block->selector_names;
1140 for (i = 0; i < block->num_groups; ++i) {
1141 for (j = 0; j < block->b->selectors; ++j) {
1142 sprintf(p, "%s_%03d", groupname, j);
1143 p += block->selector_name_stride;
1144 }
1145 groupname += block->group_name_stride;
1146 }
1147
1148 return true;
1149 }
1150
ac_init_perfcounters(const struct radeon_info * info,bool separate_se,bool separate_instance,struct ac_perfcounters * pc)1151 bool ac_init_perfcounters(const struct radeon_info *info,
1152 bool separate_se,
1153 bool separate_instance,
1154 struct ac_perfcounters *pc)
1155 {
1156 const struct ac_pc_block_gfxdescr *blocks;
1157 unsigned num_blocks;
1158
1159 switch (info->gfx_level) {
1160 case GFX7:
1161 blocks = groups_CIK;
1162 num_blocks = ARRAY_SIZE(groups_CIK);
1163 break;
1164 case GFX8:
1165 blocks = groups_VI;
1166 num_blocks = ARRAY_SIZE(groups_VI);
1167 break;
1168 case GFX9:
1169 blocks = groups_gfx9;
1170 num_blocks = ARRAY_SIZE(groups_gfx9);
1171 break;
1172 case GFX10:
1173 case GFX10_3:
1174 blocks = groups_gfx10;
1175 num_blocks = ARRAY_SIZE(groups_gfx10);
1176 break;
1177 case GFX6:
1178 default:
1179 return false; /* not implemented */
1180 }
1181
1182 pc->separate_se = separate_se;
1183 pc->separate_instance = separate_instance;
1184
1185 pc->blocks = CALLOC(num_blocks, sizeof(struct ac_pc_block));
1186 if (!pc->blocks)
1187 return false;
1188 pc->num_blocks = num_blocks;
1189
1190 for (unsigned i = 0; i < num_blocks; i++) {
1191 struct ac_pc_block *block = &pc->blocks[i];
1192
1193 block->b = &blocks[i];
1194 block->num_instances = MAX2(1, block->b->instances);
1195
1196 if (!strcmp(block->b->b->name, "CB") ||
1197 !strcmp(block->b->b->name, "DB") ||
1198 !strcmp(block->b->b->name, "RMI"))
1199 block->num_instances = info->max_se;
1200 else if (!strcmp(block->b->b->name, "TCC"))
1201 block->num_instances = info->max_tcc_blocks;
1202 else if (!strcmp(block->b->b->name, "IA"))
1203 block->num_instances = MAX2(1, info->max_se / 2);
1204 else if (!strcmp(block->b->b->name, "TA") ||
1205 !strcmp(block->b->b->name, "TCP") ||
1206 !strcmp(block->b->b->name, "TD")) {
1207 block->num_instances = MAX2(1, info->max_good_cu_per_sa);
1208 }
1209
1210 if (ac_pc_block_has_per_instance_groups(pc, block)) {
1211 block->num_groups = block->num_instances;
1212 } else {
1213 block->num_groups = 1;
1214 }
1215
1216 if (ac_pc_block_has_per_se_groups(pc, block))
1217 block->num_groups *= info->max_se;
1218 if (block->b->b->flags & AC_PC_BLOCK_SHADER)
1219 block->num_groups *= ARRAY_SIZE(ac_pc_shader_type_bits);
1220
1221 pc->num_groups += block->num_groups;
1222 }
1223
1224 return true;
1225 }
1226
ac_destroy_perfcounters(struct ac_perfcounters * pc)1227 void ac_destroy_perfcounters(struct ac_perfcounters *pc)
1228 {
1229 if (!pc)
1230 return;
1231
1232 for (unsigned i = 0; i < pc->num_blocks; ++i) {
1233 FREE(pc->blocks[i].group_names);
1234 FREE(pc->blocks[i].selector_names);
1235 }
1236 FREE(pc->blocks);
1237 }
1238
ac_pc_get_block(const struct ac_perfcounters * pc,enum ac_pc_gpu_block gpu_block)1239 struct ac_pc_block *ac_pc_get_block(const struct ac_perfcounters *pc,
1240 enum ac_pc_gpu_block gpu_block)
1241 {
1242 for (unsigned i = 0; i < pc->num_blocks; i++) {
1243 struct ac_pc_block *block = &pc->blocks[i];
1244 if (block->b->b->gpu_block == gpu_block) {
1245 return block;
1246 }
1247 }
1248 return NULL;
1249 }
1250