• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <inttypes.h>
25 
26 #include "radv_cs.h"
27 #include "radv_private.h"
28 #include "sid.h"
29 
30 #define SPM_RING_BASE_ALIGN 32
31 
32 static bool
radv_spm_init_bo(struct radv_device * device)33 radv_spm_init_bo(struct radv_device *device)
34 {
35    struct radeon_winsys *ws = device->ws;
36    uint64_t size = 32 * 1024 * 1024; /* Default to 1MB. */
37    uint16_t sample_interval = 4096;  /* Default to 4096 clk. */
38    VkResult result;
39 
40    device->spm.buffer_size = size;
41    device->spm.sample_interval = sample_interval;
42 
43    struct radeon_winsys_bo *bo = NULL;
44    result = ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_VRAM,
45                               RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
46                               RADV_BO_PRIORITY_SCRATCH, 0, &bo);
47    device->spm.bo = bo;
48    if (result != VK_SUCCESS)
49       return false;
50 
51    result = ws->buffer_make_resident(ws, device->spm.bo, true);
52    if (result != VK_SUCCESS)
53       return false;
54 
55    device->spm.ptr = ws->buffer_map(device->spm.bo);
56    if (!device->spm.ptr)
57       return false;
58 
59    return true;
60 }
61 
62 static void
radv_emit_spm_counters(struct radv_device * device,struct radeon_cmdbuf * cs,enum radv_queue_family qf)63 radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
64 {
65    const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
66    struct ac_spm *spm = &device->spm;
67 
68    if (gfx_level >= GFX11) {
69       for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sq_wgp); instance++) {
70          uint32_t num_counters = spm->sq_wgp[instance].num_counters;
71 
72          if (!num_counters)
73             continue;
74 
75          radeon_check_space(device->ws, cs, 3 + num_counters * 3);
76 
77          radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index);
78 
79          for (uint32_t b = 0; b < num_counters; b++) {
80             const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b];
81             uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
82 
83             radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, reg_base + b * 4, 1);
84             radeon_emit(cs, cntr_sel->sel0);
85          }
86       }
87    }
88 
89    for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
90       uint32_t num_counters = spm->sqg[instance].num_counters;
91 
92       if (!num_counters)
93          continue;
94 
95       radeon_check_space(device->ws, cs, 3 + num_counters * 3);
96 
97       radeon_set_uconfig_reg(
98          cs, R_030800_GRBM_GFX_INDEX,
99          S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1) | S_030800_SE_INDEX(instance));
100 
101       for (uint32_t b = 0; b < num_counters; b++) {
102          const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
103          uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
104 
105          radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, reg_base + b * 4, 1);
106          radeon_emit(cs, cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
107       }
108    }
109 
110    for (uint32_t b = 0; b < spm->num_block_sel; b++) {
111       struct ac_spm_block_select *block_sel = &spm->block_sel[b];
112       struct ac_pc_block_base *regs = block_sel->b->b->b;
113 
114       for (unsigned i = 0; i < block_sel->num_instances; i++) {
115          struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
116 
117          radeon_check_space(device->ws, cs, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6));
118 
119          radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
120 
121          for (unsigned c = 0; c < block_instance->num_counters; c++) {
122             const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
123 
124             if (!cntr_sel->active)
125                continue;
126 
127             radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, regs->select0[c], 1);
128             radeon_emit(cs, cntr_sel->sel0);
129 
130             radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, regs->select1[c], 1);
131             radeon_emit(cs, cntr_sel->sel1);
132          }
133       }
134    }
135 
136    /* Restore global broadcasting. */
137    radeon_set_uconfig_reg(
138       cs, R_030800_GRBM_GFX_INDEX,
139       S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1));
140 }
141 
142 void
radv_emit_spm_setup(struct radv_device * device,struct radeon_cmdbuf * cs,enum radv_queue_family qf)143 radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
144 {
145    const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
146    struct ac_spm *spm = &device->spm;
147    uint64_t va = radv_buffer_get_va(spm->bo);
148    uint64_t ring_size = spm->buffer_size;
149 
150    /* It's required that the ring VA and the size are correctly aligned. */
151    assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
152    assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
153    assert(spm->sample_interval >= 32);
154 
155    radeon_check_space(device->ws, cs, 27);
156 
157    /* Configure the SPM ring buffer. */
158    radeon_set_uconfig_reg(cs, R_037200_RLC_SPM_PERFMON_CNTL,
159                           S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
160                              S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
161    radeon_set_uconfig_reg(cs, R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
162    radeon_set_uconfig_reg(cs, R_037208_RLC_SPM_PERFMON_RING_BASE_HI, S_037208_RING_BASE_HI(va >> 32));
163    radeon_set_uconfig_reg(cs, R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
164 
165    /* Configure the muxsel. */
166    uint32_t total_muxsel_lines = 0;
167    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
168       total_muxsel_lines += spm->num_muxsel_lines[s];
169    }
170 
171    radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0);
172 
173    if (device->physical_device->rad_info.gfx_level >= GFX11) {
174       radeon_set_uconfig_reg(cs, R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE,
175                              S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) |
176                                 S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) |
177                                 S_03721C_SE_NUM_SEGMENT(spm->max_se_muxsel_lines));
178 
179       radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_RING_WRPTR, 0);
180    } else {
181       radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
182       radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
183                              S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
184                                 S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
185                                 S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
186                                 S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
187       radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
188                              S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
189                                 S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
190    }
191 
192    /* Upload each muxsel ram to the RLC. */
193    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
194       unsigned rlc_muxsel_addr, rlc_muxsel_data;
195       unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1);
196 
197       if (!spm->num_muxsel_lines[s])
198          continue;
199 
200       if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
201          grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
202 
203          rlc_muxsel_addr =
204             gfx_level >= GFX11 ? R_037220_RLC_SPM_GLOBAL_MUXSEL_ADDR : R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
205          rlc_muxsel_data =
206             gfx_level >= GFX11 ? R_037224_RLC_SPM_GLOBAL_MUXSEL_DATA : R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
207       } else {
208          grbm_gfx_index |= S_030800_SE_INDEX(s);
209 
210          rlc_muxsel_addr = gfx_level >= GFX11 ? R_037228_RLC_SPM_SE_MUXSEL_ADDR : R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
211          rlc_muxsel_data = gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA : R_037220_RLC_SPM_SE_MUXSEL_DATA;
212       }
213 
214       radeon_check_space(device->ws, cs, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE));
215 
216       radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
217 
218       for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
219          uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
220 
221          /* Select MUXSEL_ADDR to point to the next muxsel. */
222          radeon_set_uconfig_reg_perfctr(gfx_level, qf, cs, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
223 
224          /* Write the muxsel line configuration with MUXSEL_DATA. */
225          radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
226          radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME) |
227                             S_370_WR_ONE_ADDR(1));
228          radeon_emit(cs, rlc_muxsel_data >> 2);
229          radeon_emit(cs, 0);
230          radeon_emit_array(cs, data, AC_SPM_MUXSEL_LINE_SIZE);
231       }
232    }
233 
234    /* Select SPM counters. */
235    radv_emit_spm_counters(device, cs, qf);
236 }
237 
238 bool
radv_spm_init(struct radv_device * device)239 radv_spm_init(struct radv_device *device)
240 {
241    const struct radeon_info *info = &device->physical_device->rad_info;
242    struct ac_perfcounters *pc = &device->physical_device->ac_perfcounters;
243 
244    /* We failed to initialize the performance counters. */
245    if (!pc->blocks)
246       return false;
247 
248    if (!ac_init_spm(info, pc, &device->spm))
249       return false;
250 
251    if (!radv_spm_init_bo(device))
252       return false;
253 
254    return true;
255 }
256 
257 void
radv_spm_finish(struct radv_device * device)258 radv_spm_finish(struct radv_device *device)
259 {
260    struct radeon_winsys *ws = device->ws;
261 
262    if (device->spm.bo) {
263       ws->buffer_make_resident(ws, device->spm.bo, false);
264       ws->buffer_destroy(ws, device->spm.bo);
265    }
266 
267    ac_destroy_spm(&device->spm);
268 }
269