• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <inttypes.h>
8 
9 #include "radv_buffer.h"
10 #include "radv_cs.h"
11 #include "radv_spm.h"
12 #include "sid.h"
13 
14 #define SPM_RING_BASE_ALIGN 32
15 
16 static bool
radv_spm_init_bo(struct radv_device * device)17 radv_spm_init_bo(struct radv_device *device)
18 {
19    struct radeon_winsys *ws = device->ws;
20    VkResult result;
21 
22    struct radeon_winsys_bo *bo = NULL;
23    result = radv_bo_create(device, NULL, device->spm.buffer_size, 4096, RADEON_DOMAIN_VRAM,
24                            RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
25                            RADV_BO_PRIORITY_SCRATCH, 0, true, &bo);
26    device->spm.bo = bo;
27    if (result != VK_SUCCESS)
28       return false;
29 
30    result = ws->buffer_make_resident(ws, device->spm.bo, true);
31    if (result != VK_SUCCESS)
32       return false;
33 
34    device->spm.ptr = radv_buffer_map(ws, device->spm.bo);
35    if (!device->spm.ptr)
36       return false;
37 
38    return true;
39 }
40 
41 static void
radv_spm_finish_bo(struct radv_device * device)42 radv_spm_finish_bo(struct radv_device *device)
43 {
44    struct radeon_winsys *ws = device->ws;
45 
46    if (device->spm.bo) {
47       ws->buffer_make_resident(ws, device->spm.bo, false);
48       radv_bo_destroy(device, NULL, device->spm.bo);
49    }
50 }
51 
52 static bool
radv_spm_resize_bo(struct radv_device * device)53 radv_spm_resize_bo(struct radv_device *device)
54 {
55    /* Destroy the previous SPM bo. */
56    radv_spm_finish_bo(device);
57 
58    /* Double the size of the SPM bo. */
59    device->spm.buffer_size *= 2;
60 
61    fprintf(stderr,
62            "Failed to get the SPM trace because the buffer "
63            "was too small, resizing to %d KB\n",
64            device->spm.buffer_size / 1024);
65 
66    /* Re-create the SPM bo. */
67    return radv_spm_init_bo(device);
68 }
69 
70 static void
radv_emit_spm_counters(struct radv_device * device,struct radeon_cmdbuf * cs,enum radv_queue_family qf)71 radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
72 {
73    const struct radv_physical_device *pdev = radv_device_physical(device);
74    const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
75    struct ac_spm *spm = &device->spm;
76 
77    if (gfx_level >= GFX11) {
78       for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sq_wgp); instance++) {
79          uint32_t num_counters = spm->sq_wgp[instance].num_counters;
80 
81          if (!num_counters)
82             continue;
83 
84          radeon_check_space(device->ws, cs, 3 + num_counters * 3);
85 
86          radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index);
87 
88          for (uint32_t b = 0; b < num_counters; b++) {
89             const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b];
90             uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
91 
92             radeon_set_uconfig_perfctr_reg_seq(gfx_level, qf, cs, reg_base + b * 4, 1);
93             radeon_emit(cs, cntr_sel->sel0);
94          }
95       }
96    }
97 
98    for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
99       uint32_t num_counters = spm->sqg[instance].num_counters;
100 
101       if (!num_counters)
102          continue;
103 
104       radeon_check_space(device->ws, cs, 3 + num_counters * 3);
105 
106       radeon_set_uconfig_reg(
107          cs, R_030800_GRBM_GFX_INDEX,
108          S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1) | S_030800_SE_INDEX(instance));
109 
110       for (uint32_t b = 0; b < num_counters; b++) {
111          const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
112          uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
113 
114          radeon_set_uconfig_perfctr_reg_seq(gfx_level, qf, cs, reg_base + b * 4, 1);
115          radeon_emit(cs, cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
116       }
117    }
118 
119    for (uint32_t b = 0; b < spm->num_block_sel; b++) {
120       struct ac_spm_block_select *block_sel = &spm->block_sel[b];
121       struct ac_pc_block_base *regs = block_sel->b->b->b;
122 
123       for (unsigned i = 0; i < block_sel->num_instances; i++) {
124          struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
125 
126          radeon_check_space(device->ws, cs, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6));
127 
128          radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
129 
130          for (unsigned c = 0; c < block_instance->num_counters; c++) {
131             const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
132 
133             if (!cntr_sel->active)
134                continue;
135 
136             radeon_set_uconfig_perfctr_reg_seq(gfx_level, qf, cs, regs->select0[c], 1);
137             radeon_emit(cs, cntr_sel->sel0);
138 
139             radeon_set_uconfig_perfctr_reg_seq(gfx_level, qf, cs, regs->select1[c], 1);
140             radeon_emit(cs, cntr_sel->sel1);
141          }
142       }
143    }
144 
145    /* Restore global broadcasting. */
146    radeon_set_uconfig_reg(
147       cs, R_030800_GRBM_GFX_INDEX,
148       S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1));
149 }
150 
151 void
radv_emit_spm_setup(struct radv_device * device,struct radeon_cmdbuf * cs,enum radv_queue_family qf)152 radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
153 {
154    const struct radv_physical_device *pdev = radv_device_physical(device);
155    const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
156    struct ac_spm *spm = &device->spm;
157    uint64_t va = radv_buffer_get_va(spm->bo);
158    uint64_t ring_size = spm->buffer_size;
159 
160    /* It's required that the ring VA and the size are correctly aligned. */
161    assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
162    assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
163    assert(spm->sample_interval >= 32);
164 
165    radeon_check_space(device->ws, cs, 27);
166 
167    /* Configure the SPM ring buffer. */
168    radeon_set_uconfig_reg(cs, R_037200_RLC_SPM_PERFMON_CNTL,
169                           S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
170                              S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
171    radeon_set_uconfig_reg(cs, R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
172    radeon_set_uconfig_reg(cs, R_037208_RLC_SPM_PERFMON_RING_BASE_HI, S_037208_RING_BASE_HI(va >> 32));
173    radeon_set_uconfig_reg(cs, R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
174 
175    /* Configure the muxsel. */
176    uint32_t total_muxsel_lines = 0;
177    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
178       total_muxsel_lines += spm->num_muxsel_lines[s];
179    }
180 
181    radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0);
182 
183    if (pdev->info.gfx_level >= GFX11) {
184       radeon_set_uconfig_reg(cs, R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE,
185                              S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) |
186                                 S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) |
187                                 S_03721C_SE_NUM_SEGMENT(spm->max_se_muxsel_lines));
188 
189       radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_RING_WRPTR, 0);
190    } else {
191       radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
192       radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
193                              S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
194                                 S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
195                                 S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
196                                 S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
197       radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
198                              S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
199                                 S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
200    }
201 
202    /* Upload each muxsel ram to the RLC. */
203    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
204       unsigned rlc_muxsel_addr, rlc_muxsel_data;
205       unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1);
206 
207       if (!spm->num_muxsel_lines[s])
208          continue;
209 
210       if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
211          grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
212 
213          rlc_muxsel_addr =
214             gfx_level >= GFX11 ? R_037220_RLC_SPM_GLOBAL_MUXSEL_ADDR : R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
215          rlc_muxsel_data =
216             gfx_level >= GFX11 ? R_037224_RLC_SPM_GLOBAL_MUXSEL_DATA : R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
217       } else {
218          grbm_gfx_index |= S_030800_SE_INDEX(s);
219 
220          rlc_muxsel_addr = gfx_level >= GFX11 ? R_037228_RLC_SPM_SE_MUXSEL_ADDR : R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
221          rlc_muxsel_data = gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA : R_037220_RLC_SPM_SE_MUXSEL_DATA;
222       }
223 
224       radeon_check_space(device->ws, cs, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE));
225 
226       radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
227 
228       for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
229          uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
230 
231          /* Select MUXSEL_ADDR to point to the next muxsel. */
232          radeon_set_uconfig_perfctr_reg(gfx_level, qf, cs, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
233 
234          /* Write the muxsel line configuration with MUXSEL_DATA. */
235          radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
236          radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME) |
237                             S_370_WR_ONE_ADDR(1));
238          radeon_emit(cs, rlc_muxsel_data >> 2);
239          radeon_emit(cs, 0);
240          radeon_emit_array(cs, data, AC_SPM_MUXSEL_LINE_SIZE);
241       }
242    }
243 
244    /* Select SPM counters. */
245    radv_emit_spm_counters(device, cs, qf);
246 }
247 
248 bool
radv_spm_init(struct radv_device * device)249 radv_spm_init(struct radv_device *device)
250 {
251    struct radv_physical_device *pdev = radv_device_physical(device);
252    const struct radeon_info *gpu_info = &pdev->info;
253    struct ac_perfcounters *pc = &pdev->ac_perfcounters;
254 
255    /* We failed to initialize the performance counters. */
256    if (!pc->blocks)
257       return false;
258 
259    if (!ac_init_spm(gpu_info, pc, &device->spm))
260       return false;
261 
262    device->spm.buffer_size = 32 * 1024 * 1024; /* Default to 32MB. */
263    device->spm.sample_interval = 4096;        /* Default to 4096 clk. */
264 
265    if (!radv_spm_init_bo(device))
266       return false;
267 
268    return true;
269 }
270 
271 void
radv_spm_finish(struct radv_device * device)272 radv_spm_finish(struct radv_device *device)
273 {
274    radv_spm_finish_bo(device);
275 
276    ac_destroy_spm(&device->spm);
277 }
278 
279 bool
radv_get_spm_trace(struct radv_queue * queue,struct ac_spm_trace * spm_trace)280 radv_get_spm_trace(struct radv_queue *queue, struct ac_spm_trace *spm_trace)
281 {
282    struct radv_device *device = radv_queue_device(queue);
283 
284    if (!ac_spm_get_trace(&device->spm, spm_trace)) {
285       if (!radv_spm_resize_bo(device))
286          fprintf(stderr, "radv: Failed to resize the SPM buffer.\n");
287       return false;
288    }
289 
290    return true;
291 }
292