1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #ifndef AC_PERFCOUNTER_H
26 #define AC_PERFCOUNTER_H
27
28 #include <stdbool.h>
29
30 #include "sid.h"
31
32 #include "ac_gpu_info.h"
33
34 /* Max counters per HW block */
35 #define AC_QUERY_MAX_COUNTERS 16
36
37 #define AC_PC_SHADERS_WINDOWING (1u << 31)
38
39 enum ac_pc_block_flags
40 {
41 /* This block is part of the shader engine */
42 AC_PC_BLOCK_SE = (1 << 0),
43
44 /* Expose per-instance groups instead of summing all instances (within
45 * an SE). */
46 AC_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
47
48 /* Expose per-SE groups instead of summing instances across SEs. */
49 AC_PC_BLOCK_SE_GROUPS = (1 << 2),
50
51 /* Shader block */
52 AC_PC_BLOCK_SHADER = (1 << 3),
53
54 /* Non-shader block with perfcounters windowed by shaders. */
55 AC_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
56 };
57
58 enum ac_pc_gpu_block {
59 CPF = 0x0,
60 IA = 0x1,
61 VGT = 0x2,
62 PA_SU = 0x3,
63 PA_SC = 0x4,
64 SPI = 0x5,
65 SQ = 0x6,
66 SX = 0x7,
67 TA = 0x8,
68 TD = 0x9,
69 TCP = 0xA,
70 TCC = 0xB,
71 TCA = 0xC,
72 DB = 0xD,
73 CB = 0xE,
74 GDS = 0xF,
75 SRBM = 0x10,
76 GRBM = 0x11,
77 GRBMSE = 0x12,
78 RLC = 0x13,
79 DMA = 0x14,
80 MC = 0x15,
81 CPG = 0x16,
82 CPC = 0x17,
83 WD = 0x18,
84 TCS = 0x19,
85 ATC = 0x1A,
86 ATCL2 = 0x1B,
87 MCVML2 = 0x1C,
88 EA = 0x1D,
89 RPB = 0x1E,
90 RMI = 0x1F,
91 UMCCH = 0x20,
92 GE = 0x21,
93 GE1 = GE,
94 GL1A = 0x22,
95 GL1C = 0x23,
96 GL1CG = 0x24,
97 GL2A = 0x25,
98 GL2C = 0x26,
99 CHA = 0x27,
100 CHC = 0x28,
101 CHCG = 0x29,
102 GUS = 0x2A,
103 GCR = 0x2B,
104 PA_PH = 0x2C,
105 UTCL1 = 0x2D,
106 GEDIST = 0x2E,
107 GESE = 0x2F,
108 DF = 0x30,
109 NUM_GPU_BLOCK,
110 };
111
112 struct ac_pc_block_base {
113 enum ac_pc_gpu_block gpu_block;
114 const char *name;
115 unsigned num_counters;
116 unsigned flags;
117
118 unsigned select_or;
119 unsigned *select0;
120 unsigned counter0_lo;
121 unsigned *counters;
122
123 /* SPM */
124 unsigned num_spm_counters;
125 unsigned num_spm_wires;
126 unsigned *select1;
127 unsigned spm_block_select;
128 };
129
130 struct ac_pc_block_gfxdescr {
131 struct ac_pc_block_base *b;
132 unsigned selectors;
133 unsigned instances;
134 };
135
136 struct ac_pc_block {
137 const struct ac_pc_block_gfxdescr *b;
138 unsigned num_instances;
139
140 unsigned num_groups;
141 char *group_names;
142 unsigned group_name_stride;
143
144 char *selector_names;
145 unsigned selector_name_stride;
146 };
147
148 struct ac_perfcounters {
149 unsigned num_groups;
150 unsigned num_blocks;
151 struct ac_pc_block *blocks;
152
153 bool separate_se;
154 bool separate_instance;
155 };
156
157 /* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
158 * performance counter group IDs.
159 */
160 static const char *const ac_pc_shader_type_suffixes[] = {"", "_ES", "_GS", "_VS",
161 "_PS", "_LS", "_HS", "_CS"};
162
163 static const unsigned ac_pc_shader_type_bits[] = {
164 0x7f,
165 S_036780_ES_EN(1),
166 S_036780_GS_EN(1),
167 S_036780_VS_EN(1),
168 S_036780_PS_EN(1),
169 S_036780_LS_EN(1),
170 S_036780_HS_EN(1),
171 S_036780_CS_EN(1),
172 };
173
174 static inline bool
ac_pc_block_has_per_se_groups(const struct ac_perfcounters * pc,const struct ac_pc_block * block)175 ac_pc_block_has_per_se_groups(const struct ac_perfcounters *pc,
176 const struct ac_pc_block *block)
177 {
178 return block->b->b->flags & AC_PC_BLOCK_SE_GROUPS ||
179 (block->b->b->flags & AC_PC_BLOCK_SE && pc->separate_se);
180 }
181
182 static inline bool
ac_pc_block_has_per_instance_groups(const struct ac_perfcounters * pc,const struct ac_pc_block * block)183 ac_pc_block_has_per_instance_groups(const struct ac_perfcounters *pc,
184 const struct ac_pc_block *block)
185 {
186 return block->b->b->flags & AC_PC_BLOCK_INSTANCE_GROUPS ||
187 (block->num_instances > 1 && pc->separate_instance);
188 }
189
190 struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,
191 unsigned index, unsigned *base_gid,
192 unsigned *sub_index);
193 struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc,
194 unsigned *index);
195
196 struct ac_pc_block *ac_pc_get_block(const struct ac_perfcounters *pc,
197 enum ac_pc_gpu_block gpu_block);
198
199 bool ac_init_block_names(const struct radeon_info *info,
200 const struct ac_perfcounters *pc,
201 struct ac_pc_block *block);
202
203 bool ac_init_perfcounters(const struct radeon_info *info,
204 bool separate_se,
205 bool separate_instance,
206 struct ac_perfcounters *pc);
207 void ac_destroy_perfcounters(struct ac_perfcounters *pc);
208
209 #endif
210