1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_0_cleaner_shader.h"
54 #include "gfx_v9_4_2.h"
55
56 #include "asic_reg/pwr/pwr_10_0_offset.h"
57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
58 #include "asic_reg/gc/gc_9_0_default.h"
59
60 #define GFX9_NUM_GFX_RINGS 1
61 #define GFX9_NUM_SW_GFX_RINGS 2
62 #define GFX9_MEC_HPD_SIZE 4096
63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
65
66 #define mmGCEA_PROBE_MAP 0x070c
67 #define mmGCEA_PROBE_MAP_BASE_IDX 0
68
69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
75
76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
82
83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
89
90 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/raven_me.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
96
97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
104
105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121
122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
128
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
134
135 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
137 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
139 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
141 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
143 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
145 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
147
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025
149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
152
153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
154 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
155 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
156 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
157 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
158 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
159 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
160 SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
161 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
162 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
163 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
164 SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
165 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
166 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
167 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
168 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
169 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
170 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
171 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
172 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
173 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
174 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
175 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
177 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
178 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
179 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
180 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
181 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
182 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
183 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
184 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
185 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
186 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
187 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
188 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
189 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
190 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
191 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
192 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
193 SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
194 SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
195 SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
196 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
197 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
198 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
199 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
200 SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
201 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
202 SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
203 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
204 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
205 SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
206 SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
207 SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
208 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
209 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
210 SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
211 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
212 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
213 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
214 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
215 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
216 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
217 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
218 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
219 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
220 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
221 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
222 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
223 SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
224 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
225 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
226 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
227 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
228 /* cp header registers */
229 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
230 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
231 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
232 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
233 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
234 /* SE status registers */
235 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
236 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
237 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
238 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
239 };
240
241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
242 /* compute queue registers */
243 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
244 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
245 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
246 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
247 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
248 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
249 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
250 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
251 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
252 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
253 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
254 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
255 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
256 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
257 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
258 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
259 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
260 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
261 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
262 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
263 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
264 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
265 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
266 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
267 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
268 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
269 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
270 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
271 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
272 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
273 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
274 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
275 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
276 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
277 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
278 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
279 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
280 };
281
282 enum ta_ras_gfx_subblock {
283 /*CPC*/
284 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
285 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
286 TA_RAS_BLOCK__GFX_CPC_UCODE,
287 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
288 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
289 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
290 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
291 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
292 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
294 /* CPF*/
295 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
297 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
298 TA_RAS_BLOCK__GFX_CPF_TAG,
299 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
300 /* CPG*/
301 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
303 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
304 TA_RAS_BLOCK__GFX_CPG_TAG,
305 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
306 /* GDS*/
307 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
309 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
310 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
311 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
312 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
314 /* SPI*/
315 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
316 /* SQ*/
317 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
319 TA_RAS_BLOCK__GFX_SQ_LDS_D,
320 TA_RAS_BLOCK__GFX_SQ_LDS_I,
321 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
322 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
323 /* SQC (3 ranges)*/
324 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
325 /* SQC range 0*/
326 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
327 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
328 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
329 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
330 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
331 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
332 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
333 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
334 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
335 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
336 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
337 /* SQC range 1*/
338 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
339 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
340 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
341 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
342 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
343 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
344 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
345 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
346 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
347 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
348 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
349 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
350 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
351 /* SQC range 2*/
352 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
353 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
354 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
355 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
356 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
357 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
358 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
359 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
360 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
361 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
362 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
363 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
364 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
365 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
366 /* TA*/
367 TA_RAS_BLOCK__GFX_TA_INDEX_START,
368 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
369 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
370 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
371 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
372 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
374 /* TCA*/
375 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
377 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
379 /* TCC (5 sub-ranges)*/
380 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
381 /* TCC range 0*/
382 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
383 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
384 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
385 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
386 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
387 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
388 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
389 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
390 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
392 /* TCC range 1*/
393 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
395 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
396 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
397 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
398 /* TCC range 2*/
399 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
401 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
402 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
403 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
404 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
405 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
406 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
407 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
408 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
409 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
410 /* TCC range 3*/
411 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
413 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
414 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
415 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
416 /* TCC range 4*/
417 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
418 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
419 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
420 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
421 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
422 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
423 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
424 /* TCI*/
425 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
426 /* TCP*/
427 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
429 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
430 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
431 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
432 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
433 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
434 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
436 /* TD*/
437 TA_RAS_BLOCK__GFX_TD_INDEX_START,
438 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
439 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
440 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
442 /* EA (3 sub-ranges)*/
443 TA_RAS_BLOCK__GFX_EA_INDEX_START,
444 /* EA range 0*/
445 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
446 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
447 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
448 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
449 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
450 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
451 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
452 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
453 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
455 /* EA range 1*/
456 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
458 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
459 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
460 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
461 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
462 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
463 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
465 /* EA range 2*/
466 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
468 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
469 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
470 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
472 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
473 /* UTC VM L2 bank*/
474 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
475 /* UTC VM walker*/
476 TA_RAS_BLOCK__UTC_VML2_WALKER,
477 /* UTC ATC L2 2MB cache*/
478 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
479 /* UTC ATC L2 4KB cache*/
480 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
481 TA_RAS_BLOCK__GFX_MAX
482 };
483
484 struct ras_gfx_subblock {
485 unsigned char *name;
486 int ta_subblock;
487 int hw_supported_error_type;
488 int sw_supported_error_type;
489 };
490
491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
492 [AMDGPU_RAS_BLOCK__##subblock] = { \
493 #subblock, \
494 TA_RAS_BLOCK__##subblock, \
495 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
496 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
497 }
498
499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
500 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
501 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
502 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
503 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
505 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
509 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
510 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
511 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
512 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
513 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
514 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
515 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
516 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
517 0),
518 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
519 0),
520 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
521 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
522 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
523 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
524 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
525 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
526 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
527 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
528 0, 0),
529 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
530 0),
531 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
532 0, 0),
533 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
534 0),
535 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
536 0, 0),
537 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
538 0),
539 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
540 1),
541 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
542 0, 0, 0),
543 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
544 0),
545 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
546 0),
547 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
548 0),
549 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
550 0),
551 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
552 0),
553 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
554 0, 0),
555 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
556 0),
557 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
558 0),
559 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
560 0, 0, 0),
561 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
562 0),
563 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
564 0),
565 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
566 0),
567 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
568 0),
569 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
570 0),
571 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
572 0, 0),
573 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
574 0),
575 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
576 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
580 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
581 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
582 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
583 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
584 1),
585 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
586 1),
587 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
588 1),
589 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
590 0),
591 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
592 0),
593 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
595 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
596 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
597 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
598 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
599 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
600 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
601 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
602 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
603 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
604 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
605 0),
606 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
607 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
608 0),
609 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
610 0, 0),
611 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
612 0),
613 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
614 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
615 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
616 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
617 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
618 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
619 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
620 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
621 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
622 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
623 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
624 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
625 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
632 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
639 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
643 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
644 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
645 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
646 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
647 };
648
649 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
650 {
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
671 };
672
673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
674 {
675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
693 };
694
695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
696 {
697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
708 };
709
710 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
711 {
712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
718 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
719 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
720 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
721 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
722 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
723 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
724 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
725 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
726 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
727 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
728 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
729 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
730 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
731 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
732 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
733 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
734 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
735 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
736 };
737
738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
739 {
740 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
741 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
742 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
743 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
744 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
745 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
746 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
747 };
748
749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
750 {
751 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
752 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
753 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
754 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
755 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
756 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
757 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
758 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
759 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
760 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
761 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
762 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
763 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
764 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
766 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
767 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
768 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
769 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
770 };
771
772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
773 {
774 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
775 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
776 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
777 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
778 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
779 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
780 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
781 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
782 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
783 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
784 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
785 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
786 };
787
788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
789 {
790 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
791 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
792 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
793 };
794
795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
796 {
797 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
798 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
799 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
800 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
801 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
802 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
803 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
804 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
805 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
806 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
807 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
808 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
809 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
810 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
811 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
812 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
813 };
814
815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
816 {
817 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
818 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
819 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
820 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
821 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
822 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
823 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
824 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
825 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
826 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
827 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
828 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
829 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
830 };
831
832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
833 {
834 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
835 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
836 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
837 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
838 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
839 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
840 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
841 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
842 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
843 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
844 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
845 };
846
847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
848 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
849 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
850 };
851
852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
853 {
854 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
862 };
863
864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
865 {
866 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
874 };
875
876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
880
881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
886 struct amdgpu_cu_info *cu_info);
887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
891 void *ras_error_status);
892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
893 void *inject_if, uint32_t instance_mask);
894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
896 unsigned int vmid);
897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
899
gfx_v9_0_kiq_set_resources(struct amdgpu_ring * kiq_ring,uint64_t queue_mask)900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
901 uint64_t queue_mask)
902 {
903 struct amdgpu_device *adev = kiq_ring->adev;
904 u64 shader_mc_addr;
905
906 /* Cleaner shader MC address */
907 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
908
909 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
910 amdgpu_ring_write(kiq_ring,
911 PACKET3_SET_RESOURCES_VMID_MASK(0) |
912 /* vmid_mask:0* queue_type:0 (KIQ) */
913 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
914 amdgpu_ring_write(kiq_ring,
915 lower_32_bits(queue_mask)); /* queue mask lo */
916 amdgpu_ring_write(kiq_ring,
917 upper_32_bits(queue_mask)); /* queue mask hi */
918 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
919 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
920 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
921 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
922 }
923
gfx_v9_0_kiq_map_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring)924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
925 struct amdgpu_ring *ring)
926 {
927 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
928 uint64_t wptr_addr = ring->wptr_gpu_addr;
929 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
930
931 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
932 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
933 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
934 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
935 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
936 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
937 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
938 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
939 /*queue_type: normal compute queue */
940 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
941 /* alloc format: all_on_one_pipe */
942 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
943 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
944 /* num_queues: must be 1 */
945 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
946 amdgpu_ring_write(kiq_ring,
947 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
948 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
949 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
950 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
951 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
952 }
953
gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,enum amdgpu_unmap_queues_action action,u64 gpu_addr,u64 seq)954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
955 struct amdgpu_ring *ring,
956 enum amdgpu_unmap_queues_action action,
957 u64 gpu_addr, u64 seq)
958 {
959 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
960
961 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
962 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
963 PACKET3_UNMAP_QUEUES_ACTION(action) |
964 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
965 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
966 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
967 amdgpu_ring_write(kiq_ring,
968 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
969
970 if (action == PREEMPT_QUEUES_NO_UNMAP) {
971 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
972 amdgpu_ring_write(kiq_ring, 0);
973 amdgpu_ring_write(kiq_ring, 0);
974
975 } else {
976 amdgpu_ring_write(kiq_ring, 0);
977 amdgpu_ring_write(kiq_ring, 0);
978 amdgpu_ring_write(kiq_ring, 0);
979 }
980 }
981
gfx_v9_0_kiq_query_status(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,u64 addr,u64 seq)982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
983 struct amdgpu_ring *ring,
984 u64 addr,
985 u64 seq)
986 {
987 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
988
989 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
990 amdgpu_ring_write(kiq_ring,
991 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
992 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
993 PACKET3_QUERY_STATUS_COMMAND(2));
994 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
995 amdgpu_ring_write(kiq_ring,
996 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
997 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
998 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
999 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
1000 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
1001 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
1002 }
1003
gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring * kiq_ring,uint16_t pasid,uint32_t flush_type,bool all_hub)1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
1005 uint16_t pasid, uint32_t flush_type,
1006 bool all_hub)
1007 {
1008 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1009 amdgpu_ring_write(kiq_ring,
1010 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1011 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1012 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1013 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1014 }
1015
1016
gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring * kiq_ring,uint32_t queue_type,uint32_t me_id,uint32_t pipe_id,uint32_t queue_id,uint32_t xcc_id,uint32_t vmid)1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
1018 uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
1019 uint32_t xcc_id, uint32_t vmid)
1020 {
1021 struct amdgpu_device *adev = kiq_ring->adev;
1022 unsigned i;
1023
1024 /* enter save mode */
1025 amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
1026 mutex_lock(&adev->srbm_mutex);
1027 soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
1028
1029 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
1031 WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
1032 /* wait till dequeue take effects */
1033 for (i = 0; i < adev->usec_timeout; i++) {
1034 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
1035 break;
1036 udelay(1);
1037 }
1038 if (i >= adev->usec_timeout)
1039 dev_err(adev->dev, "fail to wait on hqd deactive\n");
1040 } else {
1041 dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
1042 }
1043
1044 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
1045 mutex_unlock(&adev->srbm_mutex);
1046 /* exit safe mode */
1047 amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
1048 }
1049
1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1051 .kiq_set_resources = gfx_v9_0_kiq_set_resources,
1052 .kiq_map_queues = gfx_v9_0_kiq_map_queues,
1053 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1054 .kiq_query_status = gfx_v9_0_kiq_query_status,
1055 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1056 .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
1057 .set_resources_size = 8,
1058 .map_queues_size = 7,
1059 .unmap_queues_size = 6,
1060 .query_status_size = 7,
1061 .invalidate_tlbs_size = 2,
1062 };
1063
gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device * adev)1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1065 {
1066 adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1067 }
1068
gfx_v9_0_init_golden_registers(struct amdgpu_device * adev)1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1070 {
1071 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1072 case IP_VERSION(9, 0, 1):
1073 soc15_program_register_sequence(adev,
1074 golden_settings_gc_9_0,
1075 ARRAY_SIZE(golden_settings_gc_9_0));
1076 soc15_program_register_sequence(adev,
1077 golden_settings_gc_9_0_vg10,
1078 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1079 break;
1080 case IP_VERSION(9, 2, 1):
1081 soc15_program_register_sequence(adev,
1082 golden_settings_gc_9_2_1,
1083 ARRAY_SIZE(golden_settings_gc_9_2_1));
1084 soc15_program_register_sequence(adev,
1085 golden_settings_gc_9_2_1_vg12,
1086 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1087 break;
1088 case IP_VERSION(9, 4, 0):
1089 soc15_program_register_sequence(adev,
1090 golden_settings_gc_9_0,
1091 ARRAY_SIZE(golden_settings_gc_9_0));
1092 soc15_program_register_sequence(adev,
1093 golden_settings_gc_9_0_vg20,
1094 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1095 break;
1096 case IP_VERSION(9, 4, 1):
1097 soc15_program_register_sequence(adev,
1098 golden_settings_gc_9_4_1_arct,
1099 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1100 break;
1101 case IP_VERSION(9, 2, 2):
1102 case IP_VERSION(9, 1, 0):
1103 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1104 ARRAY_SIZE(golden_settings_gc_9_1));
1105 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1106 soc15_program_register_sequence(adev,
1107 golden_settings_gc_9_1_rv2,
1108 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1109 else
1110 soc15_program_register_sequence(adev,
1111 golden_settings_gc_9_1_rv1,
1112 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1113 break;
1114 case IP_VERSION(9, 3, 0):
1115 soc15_program_register_sequence(adev,
1116 golden_settings_gc_9_1_rn,
1117 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1118 return; /* for renoir, don't need common goldensetting */
1119 case IP_VERSION(9, 4, 2):
1120 gfx_v9_4_2_init_golden_registers(adev,
1121 adev->smuio.funcs->get_die_id(adev));
1122 break;
1123 default:
1124 break;
1125 }
1126
1127 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1128 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1129 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1130 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1131 }
1132
gfx_v9_0_write_data_to_reg(struct amdgpu_ring * ring,int eng_sel,bool wc,uint32_t reg,uint32_t val)1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1134 bool wc, uint32_t reg, uint32_t val)
1135 {
1136 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1137 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1138 WRITE_DATA_DST_SEL(0) |
1139 (wc ? WR_CONFIRM : 0));
1140 amdgpu_ring_write(ring, reg);
1141 amdgpu_ring_write(ring, 0);
1142 amdgpu_ring_write(ring, val);
1143 }
1144
gfx_v9_0_wait_reg_mem(struct amdgpu_ring * ring,int eng_sel,int mem_space,int opt,uint32_t addr0,uint32_t addr1,uint32_t ref,uint32_t mask,uint32_t inv)1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1146 int mem_space, int opt, uint32_t addr0,
1147 uint32_t addr1, uint32_t ref, uint32_t mask,
1148 uint32_t inv)
1149 {
1150 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1151 amdgpu_ring_write(ring,
1152 /* memory (1) or register (0) */
1153 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1154 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1155 WAIT_REG_MEM_FUNCTION(3) | /* equal */
1156 WAIT_REG_MEM_ENGINE(eng_sel)));
1157
1158 if (mem_space)
1159 BUG_ON(addr0 & 0x3); /* Dword align */
1160 amdgpu_ring_write(ring, addr0);
1161 amdgpu_ring_write(ring, addr1);
1162 amdgpu_ring_write(ring, ref);
1163 amdgpu_ring_write(ring, mask);
1164 amdgpu_ring_write(ring, inv); /* poll interval */
1165 }
1166
gfx_v9_0_ring_test_ring(struct amdgpu_ring * ring)1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1168 {
1169 struct amdgpu_device *adev = ring->adev;
1170 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1171 uint32_t tmp = 0;
1172 unsigned i;
1173 int r;
1174
1175 WREG32(scratch, 0xCAFEDEAD);
1176 r = amdgpu_ring_alloc(ring, 3);
1177 if (r)
1178 return r;
1179
1180 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1181 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1182 amdgpu_ring_write(ring, 0xDEADBEEF);
1183 amdgpu_ring_commit(ring);
1184
1185 for (i = 0; i < adev->usec_timeout; i++) {
1186 tmp = RREG32(scratch);
1187 if (tmp == 0xDEADBEEF)
1188 break;
1189 udelay(1);
1190 }
1191
1192 if (i >= adev->usec_timeout)
1193 r = -ETIMEDOUT;
1194 return r;
1195 }
1196
gfx_v9_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1198 {
1199 struct amdgpu_device *adev = ring->adev;
1200 struct amdgpu_ib ib;
1201 struct dma_fence *f = NULL;
1202
1203 unsigned index;
1204 uint64_t gpu_addr;
1205 uint32_t tmp;
1206 long r;
1207
1208 r = amdgpu_device_wb_get(adev, &index);
1209 if (r)
1210 return r;
1211
1212 gpu_addr = adev->wb.gpu_addr + (index * 4);
1213 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1214 memset(&ib, 0, sizeof(ib));
1215
1216 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1217 if (r)
1218 goto err1;
1219
1220 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1221 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1222 ib.ptr[2] = lower_32_bits(gpu_addr);
1223 ib.ptr[3] = upper_32_bits(gpu_addr);
1224 ib.ptr[4] = 0xDEADBEEF;
1225 ib.length_dw = 5;
1226
1227 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1228 if (r)
1229 goto err2;
1230
1231 r = dma_fence_wait_timeout(f, false, timeout);
1232 if (r == 0) {
1233 r = -ETIMEDOUT;
1234 goto err2;
1235 } else if (r < 0) {
1236 goto err2;
1237 }
1238
1239 tmp = adev->wb.wb[index];
1240 if (tmp == 0xDEADBEEF)
1241 r = 0;
1242 else
1243 r = -EINVAL;
1244
1245 err2:
1246 amdgpu_ib_free(adev, &ib, NULL);
1247 dma_fence_put(f);
1248 err1:
1249 amdgpu_device_wb_free(adev, index);
1250 return r;
1251 }
1252
1253
gfx_v9_0_free_microcode(struct amdgpu_device * adev)1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1255 {
1256 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1257 amdgpu_ucode_release(&adev->gfx.me_fw);
1258 amdgpu_ucode_release(&adev->gfx.ce_fw);
1259 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1260 amdgpu_ucode_release(&adev->gfx.mec_fw);
1261 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1262
1263 kfree(adev->gfx.rlc.register_list_format);
1264 }
1265
gfx_v9_0_check_fw_write_wait(struct amdgpu_device * adev)1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1267 {
1268 adev->gfx.me_fw_write_wait = false;
1269 adev->gfx.mec_fw_write_wait = false;
1270
1271 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1272 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) &&
1273 ((adev->gfx.mec_fw_version < 0x000001a5) ||
1274 (adev->gfx.mec_feature_version < 46) ||
1275 (adev->gfx.pfp_fw_version < 0x000000b7) ||
1276 (adev->gfx.pfp_feature_version < 46)))
1277 DRM_WARN_ONCE("CP firmware version too old, please update!");
1278
1279 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1280 case IP_VERSION(9, 0, 1):
1281 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1282 (adev->gfx.me_feature_version >= 42) &&
1283 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1284 (adev->gfx.pfp_feature_version >= 42))
1285 adev->gfx.me_fw_write_wait = true;
1286
1287 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
1288 (adev->gfx.mec_feature_version >= 42))
1289 adev->gfx.mec_fw_write_wait = true;
1290 break;
1291 case IP_VERSION(9, 2, 1):
1292 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1293 (adev->gfx.me_feature_version >= 44) &&
1294 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1295 (adev->gfx.pfp_feature_version >= 44))
1296 adev->gfx.me_fw_write_wait = true;
1297
1298 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
1299 (adev->gfx.mec_feature_version >= 44))
1300 adev->gfx.mec_fw_write_wait = true;
1301 break;
1302 case IP_VERSION(9, 4, 0):
1303 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1304 (adev->gfx.me_feature_version >= 44) &&
1305 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1306 (adev->gfx.pfp_feature_version >= 44))
1307 adev->gfx.me_fw_write_wait = true;
1308
1309 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
1310 (adev->gfx.mec_feature_version >= 44))
1311 adev->gfx.mec_fw_write_wait = true;
1312 break;
1313 case IP_VERSION(9, 1, 0):
1314 case IP_VERSION(9, 2, 2):
1315 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1316 (adev->gfx.me_feature_version >= 42) &&
1317 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1318 (adev->gfx.pfp_feature_version >= 42))
1319 adev->gfx.me_fw_write_wait = true;
1320
1321 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
1322 (adev->gfx.mec_feature_version >= 42))
1323 adev->gfx.mec_fw_write_wait = true;
1324 break;
1325 default:
1326 adev->gfx.me_fw_write_wait = true;
1327 adev->gfx.mec_fw_write_wait = true;
1328 break;
1329 }
1330 }
1331
1332 struct amdgpu_gfxoff_quirk {
1333 u16 chip_vendor;
1334 u16 chip_device;
1335 u16 subsys_vendor;
1336 u16 subsys_device;
1337 u8 revision;
1338 };
1339
1340 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1341 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1342 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1343 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1344 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1345 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1346 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1347 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1348 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1349 /* https://bbs.openkylin.top/t/topic/171497 */
1350 { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
1351 /* HP 705G4 DM with R5 2400G */
1352 { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
1353 { 0, 0, 0, 0, 0 },
1354 };
1355
gfx_v9_0_should_disable_gfxoff(struct pci_dev * pdev)1356 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1357 {
1358 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1359
1360 while (p && p->chip_device != 0) {
1361 if (pdev->vendor == p->chip_vendor &&
1362 pdev->device == p->chip_device &&
1363 pdev->subsystem_vendor == p->subsys_vendor &&
1364 pdev->subsystem_device == p->subsys_device &&
1365 pdev->revision == p->revision) {
1366 return true;
1367 }
1368 ++p;
1369 }
1370 return false;
1371 }
1372
is_raven_kicker(struct amdgpu_device * adev)1373 static bool is_raven_kicker(struct amdgpu_device *adev)
1374 {
1375 if (adev->pm.fw_version >= 0x41e2b)
1376 return true;
1377 else
1378 return false;
1379 }
1380
check_if_enlarge_doorbell_range(struct amdgpu_device * adev)1381 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1382 {
1383 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1384 (adev->gfx.me_fw_version >= 0x000000a5) &&
1385 (adev->gfx.me_feature_version >= 52))
1386 return true;
1387 else
1388 return false;
1389 }
1390
gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device * adev)1391 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1392 {
1393 if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1394 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1395
1396 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1397 case IP_VERSION(9, 0, 1):
1398 case IP_VERSION(9, 2, 1):
1399 case IP_VERSION(9, 4, 0):
1400 break;
1401 case IP_VERSION(9, 2, 2):
1402 case IP_VERSION(9, 1, 0):
1403 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1404 (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1405 ((!is_raven_kicker(adev) &&
1406 adev->gfx.rlc_fw_version < 531) ||
1407 (adev->gfx.rlc_feature_version < 1) ||
1408 !adev->gfx.rlc.is_rlc_v2_1))
1409 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1410
1411 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1412 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1413 AMD_PG_SUPPORT_CP |
1414 AMD_PG_SUPPORT_RLC_SMU_HS;
1415 break;
1416 case IP_VERSION(9, 3, 0):
1417 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1418 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1419 AMD_PG_SUPPORT_CP |
1420 AMD_PG_SUPPORT_RLC_SMU_HS;
1421 break;
1422 default:
1423 break;
1424 }
1425 }
1426
gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device * adev,char * chip_name)1427 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1428 char *chip_name)
1429 {
1430 int err;
1431
1432 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1433 "amdgpu/%s_pfp.bin", chip_name);
1434 if (err)
1435 goto out;
1436 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1437
1438 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1439 "amdgpu/%s_me.bin", chip_name);
1440 if (err)
1441 goto out;
1442 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1443
1444 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1445 "amdgpu/%s_ce.bin", chip_name);
1446 if (err)
1447 goto out;
1448 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1449
1450 out:
1451 if (err) {
1452 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1453 amdgpu_ucode_release(&adev->gfx.me_fw);
1454 amdgpu_ucode_release(&adev->gfx.ce_fw);
1455 }
1456 return err;
1457 }
1458
gfx_v9_0_init_rlc_microcode(struct amdgpu_device * adev,char * chip_name)1459 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1460 char *chip_name)
1461 {
1462 int err;
1463 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1464 uint16_t version_major;
1465 uint16_t version_minor;
1466 uint32_t smu_version;
1467
1468 /*
1469 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1470 * instead of picasso_rlc.bin.
1471 * Judgment method:
1472 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1473 * or revision >= 0xD8 && revision <= 0xDF
1474 * otherwise is PCO FP5
1475 */
1476 if (!strcmp(chip_name, "picasso") &&
1477 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1478 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1479 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1480 "amdgpu/%s_rlc_am4.bin", chip_name);
1481 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1482 (smu_version >= 0x41e2b))
1483 /**
1484 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1485 */
1486 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1487 "amdgpu/%s_kicker_rlc.bin", chip_name);
1488 else
1489 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1490 "amdgpu/%s_rlc.bin", chip_name);
1491 if (err)
1492 goto out;
1493
1494 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1495 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1496 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1497 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1498 out:
1499 if (err)
1500 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1501
1502 return err;
1503 }
1504
gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device * adev)1505 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1506 {
1507 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1508 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1509 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1510 return false;
1511
1512 return true;
1513 }
1514
gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device * adev,char * chip_name)1515 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1516 char *chip_name)
1517 {
1518 int err;
1519
1520 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1521 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1522 "amdgpu/%s_sjt_mec.bin", chip_name);
1523 else
1524 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1525 "amdgpu/%s_mec.bin", chip_name);
1526 if (err)
1527 goto out;
1528
1529 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1530 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1531
1532 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1533 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1534 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1535 "amdgpu/%s_sjt_mec2.bin", chip_name);
1536 else
1537 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1538 "amdgpu/%s_mec2.bin", chip_name);
1539 if (!err) {
1540 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1541 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1542 } else {
1543 err = 0;
1544 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1545 }
1546 } else {
1547 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1548 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1549 }
1550
1551 gfx_v9_0_check_if_need_gfxoff(adev);
1552 gfx_v9_0_check_fw_write_wait(adev);
1553
1554 out:
1555 if (err)
1556 amdgpu_ucode_release(&adev->gfx.mec_fw);
1557 return err;
1558 }
1559
gfx_v9_0_init_microcode(struct amdgpu_device * adev)1560 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1561 {
1562 char ucode_prefix[30];
1563 int r;
1564
1565 DRM_DEBUG("\n");
1566 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1567
1568 /* No CPG in Arcturus */
1569 if (adev->gfx.num_gfx_rings) {
1570 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1571 if (r)
1572 return r;
1573 }
1574
1575 r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1576 if (r)
1577 return r;
1578
1579 r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1580 if (r)
1581 return r;
1582
1583 return r;
1584 }
1585
gfx_v9_0_get_csb_size(struct amdgpu_device * adev)1586 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1587 {
1588 u32 count = 0;
1589 const struct cs_section_def *sect = NULL;
1590 const struct cs_extent_def *ext = NULL;
1591
1592 /* begin clear state */
1593 count += 2;
1594 /* context control state */
1595 count += 3;
1596
1597 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1598 for (ext = sect->section; ext->extent != NULL; ++ext) {
1599 if (sect->id == SECT_CONTEXT)
1600 count += 2 + ext->reg_count;
1601 else
1602 return 0;
1603 }
1604 }
1605
1606 /* end clear state */
1607 count += 2;
1608 /* clear state */
1609 count += 2;
1610
1611 return count;
1612 }
1613
gfx_v9_0_get_csb_buffer(struct amdgpu_device * adev,volatile u32 * buffer)1614 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1615 volatile u32 *buffer)
1616 {
1617 u32 count = 0, i;
1618 const struct cs_section_def *sect = NULL;
1619 const struct cs_extent_def *ext = NULL;
1620
1621 if (adev->gfx.rlc.cs_data == NULL)
1622 return;
1623 if (buffer == NULL)
1624 return;
1625
1626 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1627 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1628
1629 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1630 buffer[count++] = cpu_to_le32(0x80000000);
1631 buffer[count++] = cpu_to_le32(0x80000000);
1632
1633 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1634 for (ext = sect->section; ext->extent != NULL; ++ext) {
1635 if (sect->id == SECT_CONTEXT) {
1636 buffer[count++] =
1637 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1638 buffer[count++] = cpu_to_le32(ext->reg_index -
1639 PACKET3_SET_CONTEXT_REG_START);
1640 for (i = 0; i < ext->reg_count; i++)
1641 buffer[count++] = cpu_to_le32(ext->extent[i]);
1642 } else {
1643 return;
1644 }
1645 }
1646 }
1647
1648 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1649 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1650
1651 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1652 buffer[count++] = cpu_to_le32(0);
1653 }
1654
gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device * adev)1655 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1656 {
1657 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1658 uint32_t pg_always_on_cu_num = 2;
1659 uint32_t always_on_cu_num;
1660 uint32_t i, j, k;
1661 uint32_t mask, cu_bitmap, counter;
1662
1663 if (adev->flags & AMD_IS_APU)
1664 always_on_cu_num = 4;
1665 else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1666 always_on_cu_num = 8;
1667 else
1668 always_on_cu_num = 12;
1669
1670 mutex_lock(&adev->grbm_idx_mutex);
1671 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1672 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1673 mask = 1;
1674 cu_bitmap = 0;
1675 counter = 0;
1676 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1677
1678 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1679 if (cu_info->bitmap[0][i][j] & mask) {
1680 if (counter == pg_always_on_cu_num)
1681 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1682 if (counter < always_on_cu_num)
1683 cu_bitmap |= mask;
1684 else
1685 break;
1686 counter++;
1687 }
1688 mask <<= 1;
1689 }
1690
1691 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1692 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1693 }
1694 }
1695 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1696 mutex_unlock(&adev->grbm_idx_mutex);
1697 }
1698
gfx_v9_0_init_lbpw(struct amdgpu_device * adev)1699 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1700 {
1701 uint32_t data;
1702
1703 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1704 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1705 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1706 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1707 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1708
1709 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1710 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1711
1712 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1713 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1714
1715 mutex_lock(&adev->grbm_idx_mutex);
1716 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1717 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1718 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1719
1720 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1721 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1722 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1723 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1724 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1725
1726 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1727 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1728 data &= 0x0000FFFF;
1729 data |= 0x00C00000;
1730 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1731
1732 /*
1733 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1734 * programmed in gfx_v9_0_init_always_on_cu_mask()
1735 */
1736
1737 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1738 * but used for RLC_LB_CNTL configuration */
1739 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1740 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1741 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1742 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1743 mutex_unlock(&adev->grbm_idx_mutex);
1744
1745 gfx_v9_0_init_always_on_cu_mask(adev);
1746 }
1747
gfx_v9_4_init_lbpw(struct amdgpu_device * adev)1748 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1749 {
1750 uint32_t data;
1751
1752 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1753 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1754 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1755 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1756 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1757
1758 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1759 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1760
1761 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1762 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1763
1764 mutex_lock(&adev->grbm_idx_mutex);
1765 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1766 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1767 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1768
1769 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1770 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1771 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1772 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1773 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1774
1775 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1776 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1777 data &= 0x0000FFFF;
1778 data |= 0x00C00000;
1779 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1780
1781 /*
1782 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1783 * programmed in gfx_v9_0_init_always_on_cu_mask()
1784 */
1785
1786 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1787 * but used for RLC_LB_CNTL configuration */
1788 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1789 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1790 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1791 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1792 mutex_unlock(&adev->grbm_idx_mutex);
1793
1794 gfx_v9_0_init_always_on_cu_mask(adev);
1795 }
1796
gfx_v9_0_enable_lbpw(struct amdgpu_device * adev,bool enable)1797 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1798 {
1799 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1800 }
1801
gfx_v9_0_cp_jump_table_num(struct amdgpu_device * adev)1802 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1803 {
1804 if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1805 return 5;
1806 else
1807 return 4;
1808 }
1809
gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device * adev)1810 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1811 {
1812 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1813
1814 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1815 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1816 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1817 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1818 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1819 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1820 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1821 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1822 adev->gfx.rlc.rlcg_reg_access_supported = true;
1823 }
1824
gfx_v9_0_rlc_init(struct amdgpu_device * adev)1825 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1826 {
1827 const struct cs_section_def *cs_data;
1828 int r;
1829
1830 adev->gfx.rlc.cs_data = gfx9_cs_data;
1831
1832 cs_data = adev->gfx.rlc.cs_data;
1833
1834 if (cs_data) {
1835 /* init clear state block */
1836 r = amdgpu_gfx_rlc_init_csb(adev);
1837 if (r)
1838 return r;
1839 }
1840
1841 if (adev->flags & AMD_IS_APU) {
1842 /* TODO: double check the cp_table_size for RV */
1843 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1844 r = amdgpu_gfx_rlc_init_cpt(adev);
1845 if (r)
1846 return r;
1847 }
1848
1849 return 0;
1850 }
1851
gfx_v9_0_mec_fini(struct amdgpu_device * adev)1852 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1853 {
1854 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1855 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1856 }
1857
gfx_v9_0_mec_init(struct amdgpu_device * adev)1858 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1859 {
1860 int r;
1861 u32 *hpd;
1862 const __le32 *fw_data;
1863 unsigned fw_size;
1864 u32 *fw;
1865 size_t mec_hpd_size;
1866
1867 const struct gfx_firmware_header_v1_0 *mec_hdr;
1868
1869 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1870
1871 /* take ownership of the relevant compute queues */
1872 amdgpu_gfx_compute_queue_acquire(adev);
1873 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1874 if (mec_hpd_size) {
1875 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1876 AMDGPU_GEM_DOMAIN_VRAM |
1877 AMDGPU_GEM_DOMAIN_GTT,
1878 &adev->gfx.mec.hpd_eop_obj,
1879 &adev->gfx.mec.hpd_eop_gpu_addr,
1880 (void **)&hpd);
1881 if (r) {
1882 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1883 gfx_v9_0_mec_fini(adev);
1884 return r;
1885 }
1886
1887 memset(hpd, 0, mec_hpd_size);
1888
1889 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1890 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1891 }
1892
1893 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1894
1895 fw_data = (const __le32 *)
1896 (adev->gfx.mec_fw->data +
1897 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1898 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1899
1900 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1901 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1902 &adev->gfx.mec.mec_fw_obj,
1903 &adev->gfx.mec.mec_fw_gpu_addr,
1904 (void **)&fw);
1905 if (r) {
1906 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1907 gfx_v9_0_mec_fini(adev);
1908 return r;
1909 }
1910
1911 memcpy(fw, fw_data, fw_size);
1912
1913 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1914 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1915
1916 return 0;
1917 }
1918
wave_read_ind(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t address)1919 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1920 {
1921 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1922 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1923 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1924 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1925 (SQ_IND_INDEX__FORCE_READ_MASK));
1926 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1927 }
1928
wave_read_regs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)1929 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1930 uint32_t wave, uint32_t thread,
1931 uint32_t regno, uint32_t num, uint32_t *out)
1932 {
1933 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1934 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1935 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1936 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1937 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1938 (SQ_IND_INDEX__FORCE_READ_MASK) |
1939 (SQ_IND_INDEX__AUTO_INCR_MASK));
1940 while (num--)
1941 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1942 }
1943
gfx_v9_0_read_wave_data(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)1944 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1945 {
1946 /* type 1 wave data */
1947 dst[(*no_fields)++] = 1;
1948 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1949 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1950 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1951 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1952 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1953 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1954 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1955 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1956 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1957 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1958 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1959 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1960 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1961 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1962 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1963 }
1964
gfx_v9_0_read_wave_sgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)1965 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1966 uint32_t wave, uint32_t start,
1967 uint32_t size, uint32_t *dst)
1968 {
1969 wave_read_regs(
1970 adev, simd, wave, 0,
1971 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1972 }
1973
gfx_v9_0_read_wave_vgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t start,uint32_t size,uint32_t * dst)1974 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1975 uint32_t wave, uint32_t thread,
1976 uint32_t start, uint32_t size,
1977 uint32_t *dst)
1978 {
1979 wave_read_regs(
1980 adev, simd, wave, thread,
1981 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1982 }
1983
gfx_v9_0_select_me_pipe_q(struct amdgpu_device * adev,u32 me,u32 pipe,u32 q,u32 vm,u32 xcc_id)1984 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1985 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1986 {
1987 soc15_grbm_select(adev, me, pipe, q, vm, 0);
1988 }
1989
1990 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1991 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1992 .select_se_sh = &gfx_v9_0_select_se_sh,
1993 .read_wave_data = &gfx_v9_0_read_wave_data,
1994 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1995 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1996 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1997 };
1998
1999 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = {
2000 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2001 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2002 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2003 };
2004
2005 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2006 .ras_block = {
2007 .hw_ops = &gfx_v9_0_ras_ops,
2008 },
2009 };
2010
gfx_v9_0_gpu_early_init(struct amdgpu_device * adev)2011 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2012 {
2013 u32 gb_addr_config;
2014 int err;
2015
2016 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2017 case IP_VERSION(9, 0, 1):
2018 adev->gfx.config.max_hw_contexts = 8;
2019 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2020 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2021 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2022 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2023 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2024 break;
2025 case IP_VERSION(9, 2, 1):
2026 adev->gfx.config.max_hw_contexts = 8;
2027 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2028 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2029 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2030 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2031 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2032 DRM_INFO("fix gfx.config for vega12\n");
2033 break;
2034 case IP_VERSION(9, 4, 0):
2035 adev->gfx.ras = &gfx_v9_0_ras;
2036 adev->gfx.config.max_hw_contexts = 8;
2037 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2038 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2039 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2040 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2041 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2042 gb_addr_config &= ~0xf3e777ff;
2043 gb_addr_config |= 0x22014042;
2044 /* check vbios table if gpu info is not available */
2045 err = amdgpu_atomfirmware_get_gfx_info(adev);
2046 if (err)
2047 return err;
2048 break;
2049 case IP_VERSION(9, 2, 2):
2050 case IP_VERSION(9, 1, 0):
2051 adev->gfx.config.max_hw_contexts = 8;
2052 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2053 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2054 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2055 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2056 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2057 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2058 else
2059 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2060 break;
2061 case IP_VERSION(9, 4, 1):
2062 adev->gfx.ras = &gfx_v9_4_ras;
2063 adev->gfx.config.max_hw_contexts = 8;
2064 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2065 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2066 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2067 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2068 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2069 gb_addr_config &= ~0xf3e777ff;
2070 gb_addr_config |= 0x22014042;
2071 break;
2072 case IP_VERSION(9, 3, 0):
2073 adev->gfx.config.max_hw_contexts = 8;
2074 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2075 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2076 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2077 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2078 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2079 gb_addr_config &= ~0xf3e777ff;
2080 gb_addr_config |= 0x22010042;
2081 break;
2082 case IP_VERSION(9, 4, 2):
2083 adev->gfx.ras = &gfx_v9_4_2_ras;
2084 adev->gfx.config.max_hw_contexts = 8;
2085 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2086 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2087 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2088 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2089 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2090 gb_addr_config &= ~0xf3e777ff;
2091 gb_addr_config |= 0x22014042;
2092 /* check vbios table if gpu info is not available */
2093 err = amdgpu_atomfirmware_get_gfx_info(adev);
2094 if (err)
2095 return err;
2096 break;
2097 default:
2098 BUG();
2099 break;
2100 }
2101
2102 adev->gfx.config.gb_addr_config = gb_addr_config;
2103
2104 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2105 REG_GET_FIELD(
2106 adev->gfx.config.gb_addr_config,
2107 GB_ADDR_CONFIG,
2108 NUM_PIPES);
2109
2110 adev->gfx.config.max_tile_pipes =
2111 adev->gfx.config.gb_addr_config_fields.num_pipes;
2112
2113 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2114 REG_GET_FIELD(
2115 adev->gfx.config.gb_addr_config,
2116 GB_ADDR_CONFIG,
2117 NUM_BANKS);
2118 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2119 REG_GET_FIELD(
2120 adev->gfx.config.gb_addr_config,
2121 GB_ADDR_CONFIG,
2122 MAX_COMPRESSED_FRAGS);
2123 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2124 REG_GET_FIELD(
2125 adev->gfx.config.gb_addr_config,
2126 GB_ADDR_CONFIG,
2127 NUM_RB_PER_SE);
2128 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2129 REG_GET_FIELD(
2130 adev->gfx.config.gb_addr_config,
2131 GB_ADDR_CONFIG,
2132 NUM_SHADER_ENGINES);
2133 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2134 REG_GET_FIELD(
2135 adev->gfx.config.gb_addr_config,
2136 GB_ADDR_CONFIG,
2137 PIPE_INTERLEAVE_SIZE));
2138
2139 return 0;
2140 }
2141
gfx_v9_0_compute_ring_init(struct amdgpu_device * adev,int ring_id,int mec,int pipe,int queue)2142 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2143 int mec, int pipe, int queue)
2144 {
2145 unsigned irq_type;
2146 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2147 unsigned int hw_prio;
2148
2149 ring = &adev->gfx.compute_ring[ring_id];
2150
2151 /* mec0 is me1 */
2152 ring->me = mec + 1;
2153 ring->pipe = pipe;
2154 ring->queue = queue;
2155
2156 ring->ring_obj = NULL;
2157 ring->use_doorbell = true;
2158 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2159 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2160 + (ring_id * GFX9_MEC_HPD_SIZE);
2161 ring->vm_hub = AMDGPU_GFXHUB(0);
2162 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2163
2164 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2165 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2166 + ring->pipe;
2167 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2168 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2169 /* type-2 packets are deprecated on MEC, use type-3 instead */
2170 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2171 hw_prio, NULL);
2172 }
2173
gfx_v9_0_alloc_ip_dump(struct amdgpu_device * adev)2174 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2175 {
2176 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2177 uint32_t *ptr;
2178 uint32_t inst;
2179
2180 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2181 if (!ptr) {
2182 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2183 adev->gfx.ip_dump_core = NULL;
2184 } else {
2185 adev->gfx.ip_dump_core = ptr;
2186 }
2187
2188 /* Allocate memory for compute queue registers for all the instances */
2189 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2190 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2191 adev->gfx.mec.num_queue_per_pipe;
2192
2193 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2194 if (!ptr) {
2195 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2196 adev->gfx.ip_dump_compute_queues = NULL;
2197 } else {
2198 adev->gfx.ip_dump_compute_queues = ptr;
2199 }
2200 }
2201
gfx_v9_0_sw_init(void * handle)2202 static int gfx_v9_0_sw_init(void *handle)
2203 {
2204 int i, j, k, r, ring_id;
2205 int xcc_id = 0;
2206 struct amdgpu_ring *ring;
2207 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2208 unsigned int hw_prio;
2209
2210 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2211 case IP_VERSION(9, 0, 1):
2212 case IP_VERSION(9, 2, 1):
2213 case IP_VERSION(9, 4, 0):
2214 case IP_VERSION(9, 2, 2):
2215 case IP_VERSION(9, 1, 0):
2216 case IP_VERSION(9, 4, 1):
2217 case IP_VERSION(9, 3, 0):
2218 case IP_VERSION(9, 4, 2):
2219 adev->gfx.mec.num_mec = 2;
2220 break;
2221 default:
2222 adev->gfx.mec.num_mec = 1;
2223 break;
2224 }
2225
2226 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2227 case IP_VERSION(9, 4, 2):
2228 adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
2229 adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
2230 if (adev->gfx.mec_fw_version >= 88) {
2231 adev->gfx.enable_cleaner_shader = true;
2232 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
2233 if (r) {
2234 adev->gfx.enable_cleaner_shader = false;
2235 dev_err(adev->dev, "Failed to initialize cleaner shader\n");
2236 }
2237 }
2238 break;
2239 default:
2240 adev->gfx.enable_cleaner_shader = false;
2241 break;
2242 }
2243
2244 adev->gfx.mec.num_pipe_per_mec = 4;
2245 adev->gfx.mec.num_queue_per_pipe = 8;
2246
2247 /* EOP Event */
2248 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2249 if (r)
2250 return r;
2251
2252 /* Bad opcode Event */
2253 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
2254 GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
2255 &adev->gfx.bad_op_irq);
2256 if (r)
2257 return r;
2258
2259 /* Privileged reg */
2260 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2261 &adev->gfx.priv_reg_irq);
2262 if (r)
2263 return r;
2264
2265 /* Privileged inst */
2266 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2267 &adev->gfx.priv_inst_irq);
2268 if (r)
2269 return r;
2270
2271 /* ECC error */
2272 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2273 &adev->gfx.cp_ecc_error_irq);
2274 if (r)
2275 return r;
2276
2277 /* FUE error */
2278 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2279 &adev->gfx.cp_ecc_error_irq);
2280 if (r)
2281 return r;
2282
2283 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2284
2285 if (adev->gfx.rlc.funcs) {
2286 if (adev->gfx.rlc.funcs->init) {
2287 r = adev->gfx.rlc.funcs->init(adev);
2288 if (r) {
2289 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2290 return r;
2291 }
2292 }
2293 }
2294
2295 r = gfx_v9_0_mec_init(adev);
2296 if (r) {
2297 DRM_ERROR("Failed to init MEC BOs!\n");
2298 return r;
2299 }
2300
2301 /* set up the gfx ring */
2302 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2303 ring = &adev->gfx.gfx_ring[i];
2304 ring->ring_obj = NULL;
2305 if (!i)
2306 sprintf(ring->name, "gfx");
2307 else
2308 sprintf(ring->name, "gfx_%d", i);
2309 ring->use_doorbell = true;
2310 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2311
2312 /* disable scheduler on the real ring */
2313 ring->no_scheduler = adev->gfx.mcbp;
2314 ring->vm_hub = AMDGPU_GFXHUB(0);
2315 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2316 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2317 AMDGPU_RING_PRIO_DEFAULT, NULL);
2318 if (r)
2319 return r;
2320 }
2321
2322 /* set up the software rings */
2323 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2324 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2325 ring = &adev->gfx.sw_gfx_ring[i];
2326 ring->ring_obj = NULL;
2327 sprintf(ring->name, amdgpu_sw_ring_name(i));
2328 ring->use_doorbell = true;
2329 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2330 ring->is_sw_ring = true;
2331 hw_prio = amdgpu_sw_ring_priority(i);
2332 ring->vm_hub = AMDGPU_GFXHUB(0);
2333 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2334 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2335 NULL);
2336 if (r)
2337 return r;
2338 ring->wptr = 0;
2339 }
2340
2341 /* init the muxer and add software rings */
2342 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2343 GFX9_NUM_SW_GFX_RINGS);
2344 if (r) {
2345 DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2346 return r;
2347 }
2348 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2349 r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2350 &adev->gfx.sw_gfx_ring[i]);
2351 if (r) {
2352 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2353 return r;
2354 }
2355 }
2356 }
2357
2358 /* set up the compute queues - allocate horizontally across pipes */
2359 ring_id = 0;
2360 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2361 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2362 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2363 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2364 k, j))
2365 continue;
2366
2367 r = gfx_v9_0_compute_ring_init(adev,
2368 ring_id,
2369 i, k, j);
2370 if (r)
2371 return r;
2372
2373 ring_id++;
2374 }
2375 }
2376 }
2377
2378 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2379 if (r) {
2380 DRM_ERROR("Failed to init KIQ BOs!\n");
2381 return r;
2382 }
2383
2384 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2385 if (r)
2386 return r;
2387
2388 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2389 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2390 if (r)
2391 return r;
2392
2393 adev->gfx.ce_ram_size = 0x8000;
2394
2395 r = gfx_v9_0_gpu_early_init(adev);
2396 if (r)
2397 return r;
2398
2399 if (amdgpu_gfx_ras_sw_init(adev)) {
2400 dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2401 return -EINVAL;
2402 }
2403
2404 gfx_v9_0_alloc_ip_dump(adev);
2405
2406 r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
2407 if (r)
2408 return r;
2409
2410 return 0;
2411 }
2412
2413
gfx_v9_0_sw_fini(void * handle)2414 static int gfx_v9_0_sw_fini(void *handle)
2415 {
2416 int i;
2417 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2418
2419 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2420 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2421 amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2422 amdgpu_ring_mux_fini(&adev->gfx.muxer);
2423 }
2424
2425 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2426 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2427 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2428 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2429
2430 amdgpu_gfx_mqd_sw_fini(adev, 0);
2431 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2432 amdgpu_gfx_kiq_fini(adev, 0);
2433
2434 amdgpu_gfx_cleaner_shader_sw_fini(adev);
2435
2436 gfx_v9_0_mec_fini(adev);
2437 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2438 &adev->gfx.rlc.clear_state_gpu_addr,
2439 (void **)&adev->gfx.rlc.cs_ptr);
2440 if (adev->flags & AMD_IS_APU) {
2441 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2442 &adev->gfx.rlc.cp_table_gpu_addr,
2443 (void **)&adev->gfx.rlc.cp_table_ptr);
2444 }
2445 gfx_v9_0_free_microcode(adev);
2446
2447 amdgpu_gfx_sysfs_isolation_shader_fini(adev);
2448
2449 kfree(adev->gfx.ip_dump_core);
2450 kfree(adev->gfx.ip_dump_compute_queues);
2451
2452 return 0;
2453 }
2454
2455
gfx_v9_0_tiling_mode_table_init(struct amdgpu_device * adev)2456 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2457 {
2458 /* TODO */
2459 }
2460
gfx_v9_0_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance,int xcc_id)2461 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2462 u32 instance, int xcc_id)
2463 {
2464 u32 data;
2465
2466 if (instance == 0xffffffff)
2467 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2468 else
2469 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2470
2471 if (se_num == 0xffffffff)
2472 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2473 else
2474 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2475
2476 if (sh_num == 0xffffffff)
2477 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2478 else
2479 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2480
2481 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2482 }
2483
gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device * adev)2484 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2485 {
2486 u32 data, mask;
2487
2488 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2489 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2490
2491 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2492 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2493
2494 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2495 adev->gfx.config.max_sh_per_se);
2496
2497 return (~data) & mask;
2498 }
2499
gfx_v9_0_setup_rb(struct amdgpu_device * adev)2500 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2501 {
2502 int i, j;
2503 u32 data;
2504 u32 active_rbs = 0;
2505 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2506 adev->gfx.config.max_sh_per_se;
2507
2508 mutex_lock(&adev->grbm_idx_mutex);
2509 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2510 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2511 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2512 data = gfx_v9_0_get_rb_active_bitmap(adev);
2513 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2514 rb_bitmap_width_per_sh);
2515 }
2516 }
2517 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2518 mutex_unlock(&adev->grbm_idx_mutex);
2519
2520 adev->gfx.config.backend_enable_mask = active_rbs;
2521 adev->gfx.config.num_rbs = hweight32(active_rbs);
2522 }
2523
gfx_v9_0_debug_trap_config_init(struct amdgpu_device * adev,uint32_t first_vmid,uint32_t last_vmid)2524 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2525 uint32_t first_vmid,
2526 uint32_t last_vmid)
2527 {
2528 uint32_t data;
2529 uint32_t trap_config_vmid_mask = 0;
2530 int i;
2531
2532 /* Calculate trap config vmid mask */
2533 for (i = first_vmid; i < last_vmid; i++)
2534 trap_config_vmid_mask |= (1 << i);
2535
2536 data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2537 VMID_SEL, trap_config_vmid_mask);
2538 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2539 TRAP_EN, 1);
2540 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2541 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2542
2543 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2544 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2545 }
2546
2547 #define DEFAULT_SH_MEM_BASES (0x6000)
gfx_v9_0_init_compute_vmid(struct amdgpu_device * adev)2548 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2549 {
2550 int i;
2551 uint32_t sh_mem_config;
2552 uint32_t sh_mem_bases;
2553
2554 /*
2555 * Configure apertures:
2556 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2557 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2558 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2559 */
2560 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2561
2562 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2563 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2564 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2565
2566 mutex_lock(&adev->srbm_mutex);
2567 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2568 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2569 /* CP and shaders */
2570 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2571 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2572 }
2573 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2574 mutex_unlock(&adev->srbm_mutex);
2575
2576 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2577 access. These should be enabled by FW for target VMIDs. */
2578 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2579 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2580 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2581 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2582 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2583 }
2584 }
2585
gfx_v9_0_init_gds_vmid(struct amdgpu_device * adev)2586 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2587 {
2588 int vmid;
2589
2590 /*
2591 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2592 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2593 * the driver can enable them for graphics. VMID0 should maintain
2594 * access so that HWS firmware can save/restore entries.
2595 */
2596 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2597 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2598 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2599 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2600 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2601 }
2602 }
2603
gfx_v9_0_init_sq_config(struct amdgpu_device * adev)2604 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2605 {
2606 uint32_t tmp;
2607
2608 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2609 case IP_VERSION(9, 4, 1):
2610 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2611 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2612 !READ_ONCE(adev->barrier_has_auto_waitcnt));
2613 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2614 break;
2615 default:
2616 break;
2617 }
2618 }
2619
gfx_v9_0_constants_init(struct amdgpu_device * adev)2620 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2621 {
2622 u32 tmp;
2623 int i;
2624
2625 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2626
2627 gfx_v9_0_tiling_mode_table_init(adev);
2628
2629 if (adev->gfx.num_gfx_rings)
2630 gfx_v9_0_setup_rb(adev);
2631 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2632 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2633
2634 /* XXX SH_MEM regs */
2635 /* where to put LDS, scratch, GPUVM in FSA64 space */
2636 mutex_lock(&adev->srbm_mutex);
2637 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2638 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2639 /* CP and shaders */
2640 if (i == 0) {
2641 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2642 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2643 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2644 !!adev->gmc.noretry);
2645 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2646 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2647 } else {
2648 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2649 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2650 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2651 !!adev->gmc.noretry);
2652 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2653 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2654 (adev->gmc.private_aperture_start >> 48));
2655 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2656 (adev->gmc.shared_aperture_start >> 48));
2657 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2658 }
2659 }
2660 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2661
2662 mutex_unlock(&adev->srbm_mutex);
2663
2664 gfx_v9_0_init_compute_vmid(adev);
2665 gfx_v9_0_init_gds_vmid(adev);
2666 gfx_v9_0_init_sq_config(adev);
2667 }
2668
gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device * adev)2669 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2670 {
2671 u32 i, j, k;
2672 u32 mask;
2673
2674 mutex_lock(&adev->grbm_idx_mutex);
2675 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2676 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2677 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2678 for (k = 0; k < adev->usec_timeout; k++) {
2679 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2680 break;
2681 udelay(1);
2682 }
2683 if (k == adev->usec_timeout) {
2684 amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2685 0xffffffff, 0xffffffff, 0);
2686 mutex_unlock(&adev->grbm_idx_mutex);
2687 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2688 i, j);
2689 return;
2690 }
2691 }
2692 }
2693 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2694 mutex_unlock(&adev->grbm_idx_mutex);
2695
2696 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2697 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2698 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2699 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2700 for (k = 0; k < adev->usec_timeout; k++) {
2701 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2702 break;
2703 udelay(1);
2704 }
2705 }
2706
gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable)2707 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2708 bool enable)
2709 {
2710 u32 tmp;
2711
2712 /* These interrupts should be enabled to drive DS clock */
2713
2714 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2715
2716 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2717 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2718 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2719 if (adev->gfx.num_gfx_rings)
2720 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2721
2722 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2723 }
2724
gfx_v9_0_init_csb(struct amdgpu_device * adev)2725 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2726 {
2727 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2728 /* csib */
2729 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2730 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2731 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2732 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2733 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2734 adev->gfx.rlc.clear_state_size);
2735 }
2736
gfx_v9_1_parse_ind_reg_list(int * register_list_format,int indirect_offset,int list_size,int * unique_indirect_regs,int unique_indirect_reg_count,int * indirect_start_offsets,int * indirect_start_offsets_count,int max_start_offsets_count)2737 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2738 int indirect_offset,
2739 int list_size,
2740 int *unique_indirect_regs,
2741 int unique_indirect_reg_count,
2742 int *indirect_start_offsets,
2743 int *indirect_start_offsets_count,
2744 int max_start_offsets_count)
2745 {
2746 int idx;
2747
2748 for (; indirect_offset < list_size; indirect_offset++) {
2749 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2750 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2751 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2752
2753 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2754 indirect_offset += 2;
2755
2756 /* look for the matching indice */
2757 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2758 if (unique_indirect_regs[idx] ==
2759 register_list_format[indirect_offset] ||
2760 !unique_indirect_regs[idx])
2761 break;
2762 }
2763
2764 BUG_ON(idx >= unique_indirect_reg_count);
2765
2766 if (!unique_indirect_regs[idx])
2767 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2768
2769 indirect_offset++;
2770 }
2771 }
2772 }
2773
gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device * adev)2774 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2775 {
2776 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2777 int unique_indirect_reg_count = 0;
2778
2779 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2780 int indirect_start_offsets_count = 0;
2781
2782 int list_size = 0;
2783 int i = 0, j = 0;
2784 u32 tmp = 0;
2785
2786 u32 *register_list_format =
2787 kmemdup(adev->gfx.rlc.register_list_format,
2788 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2789 if (!register_list_format)
2790 return -ENOMEM;
2791
2792 /* setup unique_indirect_regs array and indirect_start_offsets array */
2793 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2794 gfx_v9_1_parse_ind_reg_list(register_list_format,
2795 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2796 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2797 unique_indirect_regs,
2798 unique_indirect_reg_count,
2799 indirect_start_offsets,
2800 &indirect_start_offsets_count,
2801 ARRAY_SIZE(indirect_start_offsets));
2802
2803 /* enable auto inc in case it is disabled */
2804 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2805 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2806 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2807
2808 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2809 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2810 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2811 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2812 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2813 adev->gfx.rlc.register_restore[i]);
2814
2815 /* load indirect register */
2816 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2817 adev->gfx.rlc.reg_list_format_start);
2818
2819 /* direct register portion */
2820 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2821 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2822 register_list_format[i]);
2823
2824 /* indirect register portion */
2825 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2826 if (register_list_format[i] == 0xFFFFFFFF) {
2827 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2828 continue;
2829 }
2830
2831 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2832 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2833
2834 for (j = 0; j < unique_indirect_reg_count; j++) {
2835 if (register_list_format[i] == unique_indirect_regs[j]) {
2836 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2837 break;
2838 }
2839 }
2840
2841 BUG_ON(j >= unique_indirect_reg_count);
2842
2843 i++;
2844 }
2845
2846 /* set save/restore list size */
2847 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2848 list_size = list_size >> 1;
2849 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2850 adev->gfx.rlc.reg_restore_list_size);
2851 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2852
2853 /* write the starting offsets to RLC scratch ram */
2854 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2855 adev->gfx.rlc.starting_offsets_start);
2856 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2857 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2858 indirect_start_offsets[i]);
2859
2860 /* load unique indirect regs*/
2861 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2862 if (unique_indirect_regs[i] != 0) {
2863 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2864 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2865 unique_indirect_regs[i] & 0x3FFFF);
2866
2867 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2868 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2869 unique_indirect_regs[i] >> 20);
2870 }
2871 }
2872
2873 kfree(register_list_format);
2874 return 0;
2875 }
2876
gfx_v9_0_enable_save_restore_machine(struct amdgpu_device * adev)2877 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2878 {
2879 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2880 }
2881
pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device * adev,bool enable)2882 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2883 bool enable)
2884 {
2885 uint32_t data = 0;
2886 uint32_t default_data = 0;
2887
2888 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2889 if (enable) {
2890 /* enable GFXIP control over CGPG */
2891 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2892 if(default_data != data)
2893 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2894
2895 /* update status */
2896 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2897 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2898 if(default_data != data)
2899 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2900 } else {
2901 /* restore GFXIP control over GCPG */
2902 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2903 if(default_data != data)
2904 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2905 }
2906 }
2907
gfx_v9_0_init_gfx_power_gating(struct amdgpu_device * adev)2908 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2909 {
2910 uint32_t data = 0;
2911
2912 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2913 AMD_PG_SUPPORT_GFX_SMG |
2914 AMD_PG_SUPPORT_GFX_DMG)) {
2915 /* init IDLE_POLL_COUNT = 60 */
2916 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2917 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2918 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2919 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2920
2921 /* init RLC PG Delay */
2922 data = 0;
2923 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2924 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2925 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2926 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2927 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2928
2929 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2930 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2931 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2932 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2933
2934 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2935 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2936 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2937 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2938
2939 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2940 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2941
2942 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2943 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2944 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2945 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2946 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2947 }
2948 }
2949
gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device * adev,bool enable)2950 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2951 bool enable)
2952 {
2953 uint32_t data = 0;
2954 uint32_t default_data = 0;
2955
2956 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2957 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2958 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2959 enable ? 1 : 0);
2960 if (default_data != data)
2961 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2962 }
2963
gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device * adev,bool enable)2964 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2965 bool enable)
2966 {
2967 uint32_t data = 0;
2968 uint32_t default_data = 0;
2969
2970 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2971 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2972 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2973 enable ? 1 : 0);
2974 if(default_data != data)
2975 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2976 }
2977
gfx_v9_0_enable_cp_power_gating(struct amdgpu_device * adev,bool enable)2978 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2979 bool enable)
2980 {
2981 uint32_t data = 0;
2982 uint32_t default_data = 0;
2983
2984 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2985 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2986 CP_PG_DISABLE,
2987 enable ? 0 : 1);
2988 if(default_data != data)
2989 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2990 }
2991
gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)2992 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2993 bool enable)
2994 {
2995 uint32_t data, default_data;
2996
2997 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2998 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2999 GFX_POWER_GATING_ENABLE,
3000 enable ? 1 : 0);
3001 if(default_data != data)
3002 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3003 }
3004
gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device * adev,bool enable)3005 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3006 bool enable)
3007 {
3008 uint32_t data, default_data;
3009
3010 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3011 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3012 GFX_PIPELINE_PG_ENABLE,
3013 enable ? 1 : 0);
3014 if(default_data != data)
3015 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3016
3017 if (!enable)
3018 /* read any GFX register to wake up GFX */
3019 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3020 }
3021
gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device * adev,bool enable)3022 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3023 bool enable)
3024 {
3025 uint32_t data, default_data;
3026
3027 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3028 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3029 STATIC_PER_CU_PG_ENABLE,
3030 enable ? 1 : 0);
3031 if(default_data != data)
3032 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3033 }
3034
gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device * adev,bool enable)3035 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3036 bool enable)
3037 {
3038 uint32_t data, default_data;
3039
3040 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3041 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3042 DYN_PER_CU_PG_ENABLE,
3043 enable ? 1 : 0);
3044 if(default_data != data)
3045 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3046 }
3047
gfx_v9_0_init_pg(struct amdgpu_device * adev)3048 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3049 {
3050 gfx_v9_0_init_csb(adev);
3051
3052 /*
3053 * Rlc save restore list is workable since v2_1.
3054 * And it's needed by gfxoff feature.
3055 */
3056 if (adev->gfx.rlc.is_rlc_v2_1) {
3057 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3058 IP_VERSION(9, 2, 1) ||
3059 (adev->apu_flags & AMD_APU_IS_RAVEN2))
3060 gfx_v9_1_init_rlc_save_restore_list(adev);
3061 gfx_v9_0_enable_save_restore_machine(adev);
3062 }
3063
3064 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3065 AMD_PG_SUPPORT_GFX_SMG |
3066 AMD_PG_SUPPORT_GFX_DMG |
3067 AMD_PG_SUPPORT_CP |
3068 AMD_PG_SUPPORT_GDS |
3069 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3070 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3071 adev->gfx.rlc.cp_table_gpu_addr >> 8);
3072 gfx_v9_0_init_gfx_power_gating(adev);
3073 }
3074 }
3075
gfx_v9_0_rlc_stop(struct amdgpu_device * adev)3076 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3077 {
3078 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3079 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3080 gfx_v9_0_wait_for_rlc_serdes(adev);
3081 }
3082
gfx_v9_0_rlc_reset(struct amdgpu_device * adev)3083 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3084 {
3085 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3086 udelay(50);
3087 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3088 udelay(50);
3089 }
3090
gfx_v9_0_rlc_start(struct amdgpu_device * adev)3091 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3092 {
3093 #ifdef AMDGPU_RLC_DEBUG_RETRY
3094 u32 rlc_ucode_ver;
3095 #endif
3096
3097 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3098 udelay(50);
3099
3100 /* carrizo do enable cp interrupt after cp inited */
3101 if (!(adev->flags & AMD_IS_APU)) {
3102 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3103 udelay(50);
3104 }
3105
3106 #ifdef AMDGPU_RLC_DEBUG_RETRY
3107 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3108 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3109 if(rlc_ucode_ver == 0x108) {
3110 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3111 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3112 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3113 * default is 0x9C4 to create a 100us interval */
3114 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3115 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3116 * to disable the page fault retry interrupts, default is
3117 * 0x100 (256) */
3118 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3119 }
3120 #endif
3121 }
3122
gfx_v9_0_rlc_load_microcode(struct amdgpu_device * adev)3123 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3124 {
3125 const struct rlc_firmware_header_v2_0 *hdr;
3126 const __le32 *fw_data;
3127 unsigned i, fw_size;
3128
3129 if (!adev->gfx.rlc_fw)
3130 return -EINVAL;
3131
3132 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3133 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3134
3135 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3136 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3137 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3138
3139 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3140 RLCG_UCODE_LOADING_START_ADDRESS);
3141 for (i = 0; i < fw_size; i++)
3142 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3143 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3144
3145 return 0;
3146 }
3147
gfx_v9_0_rlc_resume(struct amdgpu_device * adev)3148 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3149 {
3150 int r;
3151
3152 if (amdgpu_sriov_vf(adev)) {
3153 gfx_v9_0_init_csb(adev);
3154 return 0;
3155 }
3156
3157 adev->gfx.rlc.funcs->stop(adev);
3158
3159 /* disable CG */
3160 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3161
3162 gfx_v9_0_init_pg(adev);
3163
3164 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3165 /* legacy rlc firmware loading */
3166 r = gfx_v9_0_rlc_load_microcode(adev);
3167 if (r)
3168 return r;
3169 }
3170
3171 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3172 case IP_VERSION(9, 2, 2):
3173 case IP_VERSION(9, 1, 0):
3174 gfx_v9_0_init_lbpw(adev);
3175 if (amdgpu_lbpw == 0)
3176 gfx_v9_0_enable_lbpw(adev, false);
3177 else
3178 gfx_v9_0_enable_lbpw(adev, true);
3179 break;
3180 case IP_VERSION(9, 4, 0):
3181 gfx_v9_4_init_lbpw(adev);
3182 if (amdgpu_lbpw > 0)
3183 gfx_v9_0_enable_lbpw(adev, true);
3184 else
3185 gfx_v9_0_enable_lbpw(adev, false);
3186 break;
3187 default:
3188 break;
3189 }
3190
3191 gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3192
3193 adev->gfx.rlc.funcs->start(adev);
3194
3195 return 0;
3196 }
3197
gfx_v9_0_cp_gfx_enable(struct amdgpu_device * adev,bool enable)3198 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3199 {
3200 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3201
3202 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3203 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3204 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3205 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3206 udelay(50);
3207 }
3208
gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device * adev)3209 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3210 {
3211 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3212 const struct gfx_firmware_header_v1_0 *ce_hdr;
3213 const struct gfx_firmware_header_v1_0 *me_hdr;
3214 const __le32 *fw_data;
3215 unsigned i, fw_size;
3216
3217 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3218 return -EINVAL;
3219
3220 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3221 adev->gfx.pfp_fw->data;
3222 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3223 adev->gfx.ce_fw->data;
3224 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3225 adev->gfx.me_fw->data;
3226
3227 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3228 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3229 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3230
3231 gfx_v9_0_cp_gfx_enable(adev, false);
3232
3233 /* PFP */
3234 fw_data = (const __le32 *)
3235 (adev->gfx.pfp_fw->data +
3236 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3237 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3238 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3239 for (i = 0; i < fw_size; i++)
3240 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3241 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3242
3243 /* CE */
3244 fw_data = (const __le32 *)
3245 (adev->gfx.ce_fw->data +
3246 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3247 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3248 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3249 for (i = 0; i < fw_size; i++)
3250 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3251 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3252
3253 /* ME */
3254 fw_data = (const __le32 *)
3255 (adev->gfx.me_fw->data +
3256 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3257 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3258 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3259 for (i = 0; i < fw_size; i++)
3260 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3261 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3262
3263 return 0;
3264 }
3265
gfx_v9_0_cp_gfx_start(struct amdgpu_device * adev)3266 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3267 {
3268 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3269 const struct cs_section_def *sect = NULL;
3270 const struct cs_extent_def *ext = NULL;
3271 int r, i, tmp;
3272
3273 /* init the CP */
3274 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3275 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3276
3277 gfx_v9_0_cp_gfx_enable(adev, true);
3278
3279 /* Now only limit the quirk on the APU gfx9 series and already
3280 * confirmed that the APU gfx10/gfx11 needn't such update.
3281 */
3282 if (adev->flags & AMD_IS_APU &&
3283 adev->in_s3 && !adev->suspend_complete) {
3284 DRM_INFO(" Will skip the CSB packet resubmit\n");
3285 return 0;
3286 }
3287 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3288 if (r) {
3289 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3290 return r;
3291 }
3292
3293 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3294 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3295
3296 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3297 amdgpu_ring_write(ring, 0x80000000);
3298 amdgpu_ring_write(ring, 0x80000000);
3299
3300 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3301 for (ext = sect->section; ext->extent != NULL; ++ext) {
3302 if (sect->id == SECT_CONTEXT) {
3303 amdgpu_ring_write(ring,
3304 PACKET3(PACKET3_SET_CONTEXT_REG,
3305 ext->reg_count));
3306 amdgpu_ring_write(ring,
3307 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3308 for (i = 0; i < ext->reg_count; i++)
3309 amdgpu_ring_write(ring, ext->extent[i]);
3310 }
3311 }
3312 }
3313
3314 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3315 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3316
3317 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3318 amdgpu_ring_write(ring, 0);
3319
3320 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3321 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3322 amdgpu_ring_write(ring, 0x8000);
3323 amdgpu_ring_write(ring, 0x8000);
3324
3325 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3326 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3327 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3328 amdgpu_ring_write(ring, tmp);
3329 amdgpu_ring_write(ring, 0);
3330
3331 amdgpu_ring_commit(ring);
3332
3333 return 0;
3334 }
3335
gfx_v9_0_cp_gfx_resume(struct amdgpu_device * adev)3336 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3337 {
3338 struct amdgpu_ring *ring;
3339 u32 tmp;
3340 u32 rb_bufsz;
3341 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3342
3343 /* Set the write pointer delay */
3344 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3345
3346 /* set the RB to use vmid 0 */
3347 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3348
3349 /* Set ring buffer size */
3350 ring = &adev->gfx.gfx_ring[0];
3351 rb_bufsz = order_base_2(ring->ring_size / 8);
3352 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3353 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3354 #ifdef __BIG_ENDIAN
3355 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3356 #endif
3357 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3358
3359 /* Initialize the ring buffer's write pointers */
3360 ring->wptr = 0;
3361 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3362 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3363
3364 /* set the wb address wether it's enabled or not */
3365 rptr_addr = ring->rptr_gpu_addr;
3366 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3367 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3368
3369 wptr_gpu_addr = ring->wptr_gpu_addr;
3370 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3371 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3372
3373 mdelay(1);
3374 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3375
3376 rb_addr = ring->gpu_addr >> 8;
3377 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3378 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3379
3380 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3381 if (ring->use_doorbell) {
3382 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3383 DOORBELL_OFFSET, ring->doorbell_index);
3384 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3385 DOORBELL_EN, 1);
3386 } else {
3387 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3388 }
3389 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3390
3391 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3392 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3393 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3394
3395 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3396 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3397
3398
3399 /* start the ring */
3400 gfx_v9_0_cp_gfx_start(adev);
3401
3402 return 0;
3403 }
3404
gfx_v9_0_cp_compute_enable(struct amdgpu_device * adev,bool enable)3405 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3406 {
3407 if (enable) {
3408 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3409 } else {
3410 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3411 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3412 adev->gfx.kiq[0].ring.sched.ready = false;
3413 }
3414 udelay(50);
3415 }
3416
gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device * adev)3417 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3418 {
3419 const struct gfx_firmware_header_v1_0 *mec_hdr;
3420 const __le32 *fw_data;
3421 unsigned i;
3422 u32 tmp;
3423
3424 if (!adev->gfx.mec_fw)
3425 return -EINVAL;
3426
3427 gfx_v9_0_cp_compute_enable(adev, false);
3428
3429 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3430 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3431
3432 fw_data = (const __le32 *)
3433 (adev->gfx.mec_fw->data +
3434 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3435 tmp = 0;
3436 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3437 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3438 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3439
3440 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3441 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3442 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3443 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3444
3445 /* MEC1 */
3446 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3447 mec_hdr->jt_offset);
3448 for (i = 0; i < mec_hdr->jt_size; i++)
3449 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3450 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3451
3452 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3453 adev->gfx.mec_fw_version);
3454 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3455
3456 return 0;
3457 }
3458
3459 /* KIQ functions */
gfx_v9_0_kiq_setting(struct amdgpu_ring * ring)3460 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3461 {
3462 uint32_t tmp;
3463 struct amdgpu_device *adev = ring->adev;
3464
3465 /* tell RLC which is KIQ queue */
3466 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3467 tmp &= 0xffffff00;
3468 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3469 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3470 tmp |= 0x80;
3471 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3472 }
3473
gfx_v9_0_mqd_set_priority(struct amdgpu_ring * ring,struct v9_mqd * mqd)3474 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3475 {
3476 struct amdgpu_device *adev = ring->adev;
3477
3478 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3479 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3480 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3481 mqd->cp_hqd_queue_priority =
3482 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3483 }
3484 }
3485 }
3486
gfx_v9_0_mqd_init(struct amdgpu_ring * ring)3487 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3488 {
3489 struct amdgpu_device *adev = ring->adev;
3490 struct v9_mqd *mqd = ring->mqd_ptr;
3491 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3492 uint32_t tmp;
3493
3494 mqd->header = 0xC0310800;
3495 mqd->compute_pipelinestat_enable = 0x00000001;
3496 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3497 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3498 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3499 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3500 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3501 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3502 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3503 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3504 mqd->compute_misc_reserved = 0x00000003;
3505
3506 mqd->dynamic_cu_mask_addr_lo =
3507 lower_32_bits(ring->mqd_gpu_addr
3508 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3509 mqd->dynamic_cu_mask_addr_hi =
3510 upper_32_bits(ring->mqd_gpu_addr
3511 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3512
3513 eop_base_addr = ring->eop_gpu_addr >> 8;
3514 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3515 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3516
3517 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3518 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3519 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3520 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3521
3522 mqd->cp_hqd_eop_control = tmp;
3523
3524 /* enable doorbell? */
3525 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3526
3527 if (ring->use_doorbell) {
3528 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3529 DOORBELL_OFFSET, ring->doorbell_index);
3530 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3531 DOORBELL_EN, 1);
3532 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3533 DOORBELL_SOURCE, 0);
3534 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3535 DOORBELL_HIT, 0);
3536 } else {
3537 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3538 DOORBELL_EN, 0);
3539 }
3540
3541 mqd->cp_hqd_pq_doorbell_control = tmp;
3542
3543 /* disable the queue if it's active */
3544 ring->wptr = 0;
3545 mqd->cp_hqd_dequeue_request = 0;
3546 mqd->cp_hqd_pq_rptr = 0;
3547 mqd->cp_hqd_pq_wptr_lo = 0;
3548 mqd->cp_hqd_pq_wptr_hi = 0;
3549
3550 /* set the pointer to the MQD */
3551 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3552 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3553
3554 /* set MQD vmid to 0 */
3555 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3556 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3557 mqd->cp_mqd_control = tmp;
3558
3559 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3560 hqd_gpu_addr = ring->gpu_addr >> 8;
3561 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3562 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3563
3564 /* set up the HQD, this is similar to CP_RB0_CNTL */
3565 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3566 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3567 (order_base_2(ring->ring_size / 4) - 1));
3568 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3569 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3570 #ifdef __BIG_ENDIAN
3571 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3572 #endif
3573 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3574 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3575 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3576 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3577 mqd->cp_hqd_pq_control = tmp;
3578
3579 /* set the wb address whether it's enabled or not */
3580 wb_gpu_addr = ring->rptr_gpu_addr;
3581 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3582 mqd->cp_hqd_pq_rptr_report_addr_hi =
3583 upper_32_bits(wb_gpu_addr) & 0xffff;
3584
3585 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3586 wb_gpu_addr = ring->wptr_gpu_addr;
3587 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3588 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3589
3590 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3591 ring->wptr = 0;
3592 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3593
3594 /* set the vmid for the queue */
3595 mqd->cp_hqd_vmid = 0;
3596
3597 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3598 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3599 mqd->cp_hqd_persistent_state = tmp;
3600
3601 /* set MIN_IB_AVAIL_SIZE */
3602 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3603 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3604 mqd->cp_hqd_ib_control = tmp;
3605
3606 /* set static priority for a queue/ring */
3607 gfx_v9_0_mqd_set_priority(ring, mqd);
3608 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3609
3610 /* map_queues packet doesn't need activate the queue,
3611 * so only kiq need set this field.
3612 */
3613 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3614 mqd->cp_hqd_active = 1;
3615
3616 return 0;
3617 }
3618
gfx_v9_0_kiq_init_register(struct amdgpu_ring * ring)3619 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3620 {
3621 struct amdgpu_device *adev = ring->adev;
3622 struct v9_mqd *mqd = ring->mqd_ptr;
3623 int j;
3624
3625 /* disable wptr polling */
3626 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3627
3628 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3629 mqd->cp_hqd_eop_base_addr_lo);
3630 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3631 mqd->cp_hqd_eop_base_addr_hi);
3632
3633 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3634 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3635 mqd->cp_hqd_eop_control);
3636
3637 /* enable doorbell? */
3638 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3639 mqd->cp_hqd_pq_doorbell_control);
3640
3641 /* disable the queue if it's active */
3642 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3643 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3644 for (j = 0; j < adev->usec_timeout; j++) {
3645 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3646 break;
3647 udelay(1);
3648 }
3649 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3650 mqd->cp_hqd_dequeue_request);
3651 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3652 mqd->cp_hqd_pq_rptr);
3653 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3654 mqd->cp_hqd_pq_wptr_lo);
3655 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3656 mqd->cp_hqd_pq_wptr_hi);
3657 }
3658
3659 /* set the pointer to the MQD */
3660 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3661 mqd->cp_mqd_base_addr_lo);
3662 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3663 mqd->cp_mqd_base_addr_hi);
3664
3665 /* set MQD vmid to 0 */
3666 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3667 mqd->cp_mqd_control);
3668
3669 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3670 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3671 mqd->cp_hqd_pq_base_lo);
3672 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3673 mqd->cp_hqd_pq_base_hi);
3674
3675 /* set up the HQD, this is similar to CP_RB0_CNTL */
3676 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3677 mqd->cp_hqd_pq_control);
3678
3679 /* set the wb address whether it's enabled or not */
3680 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3681 mqd->cp_hqd_pq_rptr_report_addr_lo);
3682 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3683 mqd->cp_hqd_pq_rptr_report_addr_hi);
3684
3685 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3686 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3687 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3688 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3689 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3690
3691 /* enable the doorbell if requested */
3692 if (ring->use_doorbell) {
3693 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3694 (adev->doorbell_index.kiq * 2) << 2);
3695 /* If GC has entered CGPG, ringing doorbell > first page
3696 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3697 * workaround this issue. And this change has to align with firmware
3698 * update.
3699 */
3700 if (check_if_enlarge_doorbell_range(adev))
3701 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3702 (adev->doorbell.size - 4));
3703 else
3704 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3705 (adev->doorbell_index.userqueue_end * 2) << 2);
3706 }
3707
3708 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3709 mqd->cp_hqd_pq_doorbell_control);
3710
3711 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3712 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3713 mqd->cp_hqd_pq_wptr_lo);
3714 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3715 mqd->cp_hqd_pq_wptr_hi);
3716
3717 /* set the vmid for the queue */
3718 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3719
3720 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3721 mqd->cp_hqd_persistent_state);
3722
3723 /* activate the queue */
3724 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3725 mqd->cp_hqd_active);
3726
3727 if (ring->use_doorbell)
3728 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3729
3730 return 0;
3731 }
3732
gfx_v9_0_kiq_fini_register(struct amdgpu_ring * ring)3733 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3734 {
3735 struct amdgpu_device *adev = ring->adev;
3736 int j;
3737
3738 /* disable the queue if it's active */
3739 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3740
3741 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3742
3743 for (j = 0; j < adev->usec_timeout; j++) {
3744 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3745 break;
3746 udelay(1);
3747 }
3748
3749 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3750 DRM_DEBUG("KIQ dequeue request failed.\n");
3751
3752 /* Manual disable if dequeue request times out */
3753 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3754 }
3755
3756 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3757 0);
3758 }
3759
3760 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3761 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3762 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3763 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3764 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3765 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3766 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3767 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3768
3769 return 0;
3770 }
3771
gfx_v9_0_kiq_init_queue(struct amdgpu_ring * ring)3772 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3773 {
3774 struct amdgpu_device *adev = ring->adev;
3775 struct v9_mqd *mqd = ring->mqd_ptr;
3776 struct v9_mqd *tmp_mqd;
3777
3778 gfx_v9_0_kiq_setting(ring);
3779
3780 /* GPU could be in bad state during probe, driver trigger the reset
3781 * after load the SMU, in this case , the mqd is not be initialized.
3782 * driver need to re-init the mqd.
3783 * check mqd->cp_hqd_pq_control since this value should not be 0
3784 */
3785 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3786 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3787 /* for GPU_RESET case , reset MQD to a clean status */
3788 if (adev->gfx.kiq[0].mqd_backup)
3789 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3790
3791 /* reset ring buffer */
3792 ring->wptr = 0;
3793 amdgpu_ring_clear_ring(ring);
3794
3795 mutex_lock(&adev->srbm_mutex);
3796 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3797 gfx_v9_0_kiq_init_register(ring);
3798 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3799 mutex_unlock(&adev->srbm_mutex);
3800 } else {
3801 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3802 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3803 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3804 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3805 amdgpu_ring_clear_ring(ring);
3806 mutex_lock(&adev->srbm_mutex);
3807 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3808 gfx_v9_0_mqd_init(ring);
3809 gfx_v9_0_kiq_init_register(ring);
3810 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3811 mutex_unlock(&adev->srbm_mutex);
3812
3813 if (adev->gfx.kiq[0].mqd_backup)
3814 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3815 }
3816
3817 return 0;
3818 }
3819
gfx_v9_0_kcq_init_queue(struct amdgpu_ring * ring,bool restore)3820 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
3821 {
3822 struct amdgpu_device *adev = ring->adev;
3823 struct v9_mqd *mqd = ring->mqd_ptr;
3824 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3825 struct v9_mqd *tmp_mqd;
3826
3827 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3828 * is not be initialized before
3829 */
3830 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3831
3832 if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
3833 (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
3834 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3835 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3836 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3837 mutex_lock(&adev->srbm_mutex);
3838 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3839 gfx_v9_0_mqd_init(ring);
3840 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3841 mutex_unlock(&adev->srbm_mutex);
3842
3843 if (adev->gfx.mec.mqd_backup[mqd_idx])
3844 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3845 } else {
3846 /* restore MQD to a clean status */
3847 if (adev->gfx.mec.mqd_backup[mqd_idx])
3848 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3849 /* reset ring buffer */
3850 ring->wptr = 0;
3851 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3852 amdgpu_ring_clear_ring(ring);
3853 }
3854
3855 return 0;
3856 }
3857
gfx_v9_0_kiq_resume(struct amdgpu_device * adev)3858 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3859 {
3860 struct amdgpu_ring *ring;
3861 int r;
3862
3863 ring = &adev->gfx.kiq[0].ring;
3864
3865 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3866 if (unlikely(r != 0))
3867 return r;
3868
3869 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3870 if (unlikely(r != 0)) {
3871 amdgpu_bo_unreserve(ring->mqd_obj);
3872 return r;
3873 }
3874
3875 gfx_v9_0_kiq_init_queue(ring);
3876 amdgpu_bo_kunmap(ring->mqd_obj);
3877 ring->mqd_ptr = NULL;
3878 amdgpu_bo_unreserve(ring->mqd_obj);
3879 return 0;
3880 }
3881
gfx_v9_0_kcq_resume(struct amdgpu_device * adev)3882 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3883 {
3884 struct amdgpu_ring *ring = NULL;
3885 int r = 0, i;
3886
3887 gfx_v9_0_cp_compute_enable(adev, true);
3888
3889 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3890 ring = &adev->gfx.compute_ring[i];
3891
3892 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3893 if (unlikely(r != 0))
3894 goto done;
3895 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3896 if (!r) {
3897 r = gfx_v9_0_kcq_init_queue(ring, false);
3898 amdgpu_bo_kunmap(ring->mqd_obj);
3899 ring->mqd_ptr = NULL;
3900 }
3901 amdgpu_bo_unreserve(ring->mqd_obj);
3902 if (r)
3903 goto done;
3904 }
3905
3906 r = amdgpu_gfx_enable_kcq(adev, 0);
3907 done:
3908 return r;
3909 }
3910
gfx_v9_0_cp_resume(struct amdgpu_device * adev)3911 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3912 {
3913 int r, i;
3914 struct amdgpu_ring *ring;
3915
3916 if (!(adev->flags & AMD_IS_APU))
3917 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3918
3919 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3920 if (adev->gfx.num_gfx_rings) {
3921 /* legacy firmware loading */
3922 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3923 if (r)
3924 return r;
3925 }
3926
3927 r = gfx_v9_0_cp_compute_load_microcode(adev);
3928 if (r)
3929 return r;
3930 }
3931
3932 r = gfx_v9_0_kiq_resume(adev);
3933 if (r)
3934 return r;
3935
3936 if (adev->gfx.num_gfx_rings) {
3937 r = gfx_v9_0_cp_gfx_resume(adev);
3938 if (r)
3939 return r;
3940 }
3941
3942 r = gfx_v9_0_kcq_resume(adev);
3943 if (r)
3944 return r;
3945
3946 if (adev->gfx.num_gfx_rings) {
3947 ring = &adev->gfx.gfx_ring[0];
3948 r = amdgpu_ring_test_helper(ring);
3949 if (r)
3950 return r;
3951 }
3952
3953 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3954 ring = &adev->gfx.compute_ring[i];
3955 amdgpu_ring_test_helper(ring);
3956 }
3957
3958 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3959
3960 return 0;
3961 }
3962
gfx_v9_0_init_tcp_config(struct amdgpu_device * adev)3963 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3964 {
3965 u32 tmp;
3966
3967 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3968 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3969 return;
3970
3971 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3972 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3973 adev->df.hash_status.hash_64k);
3974 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3975 adev->df.hash_status.hash_2m);
3976 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3977 adev->df.hash_status.hash_1g);
3978 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3979 }
3980
gfx_v9_0_cp_enable(struct amdgpu_device * adev,bool enable)3981 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3982 {
3983 if (adev->gfx.num_gfx_rings)
3984 gfx_v9_0_cp_gfx_enable(adev, enable);
3985 gfx_v9_0_cp_compute_enable(adev, enable);
3986 }
3987
gfx_v9_0_hw_init(void * handle)3988 static int gfx_v9_0_hw_init(void *handle)
3989 {
3990 int r;
3991 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3992
3993 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
3994 adev->gfx.cleaner_shader_ptr);
3995
3996 if (!amdgpu_sriov_vf(adev))
3997 gfx_v9_0_init_golden_registers(adev);
3998
3999 gfx_v9_0_constants_init(adev);
4000
4001 gfx_v9_0_init_tcp_config(adev);
4002
4003 r = adev->gfx.rlc.funcs->resume(adev);
4004 if (r)
4005 return r;
4006
4007 r = gfx_v9_0_cp_resume(adev);
4008 if (r)
4009 return r;
4010
4011 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4012 gfx_v9_4_2_set_power_brake_sequence(adev);
4013
4014 return r;
4015 }
4016
gfx_v9_0_hw_fini(void * handle)4017 static int gfx_v9_0_hw_fini(void *handle)
4018 {
4019 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4020
4021 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4022 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4023 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4024 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4025 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4026
4027 /* DF freeze and kcq disable will fail */
4028 if (!amdgpu_ras_intr_triggered())
4029 /* disable KCQ to avoid CPC touch memory not valid anymore */
4030 amdgpu_gfx_disable_kcq(adev, 0);
4031
4032 if (amdgpu_sriov_vf(adev)) {
4033 gfx_v9_0_cp_gfx_enable(adev, false);
4034 /* must disable polling for SRIOV when hw finished, otherwise
4035 * CPC engine may still keep fetching WB address which is already
4036 * invalid after sw finished and trigger DMAR reading error in
4037 * hypervisor side.
4038 */
4039 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4040 return 0;
4041 }
4042
4043 /* Use deinitialize sequence from CAIL when unbinding device from driver,
4044 * otherwise KIQ is hanging when binding back
4045 */
4046 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4047 mutex_lock(&adev->srbm_mutex);
4048 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
4049 adev->gfx.kiq[0].ring.pipe,
4050 adev->gfx.kiq[0].ring.queue, 0, 0);
4051 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
4052 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
4053 mutex_unlock(&adev->srbm_mutex);
4054 }
4055
4056 gfx_v9_0_cp_enable(adev, false);
4057
4058 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4059 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4060 (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
4061 dev_dbg(adev->dev, "Skipping RLC halt\n");
4062 return 0;
4063 }
4064
4065 adev->gfx.rlc.funcs->stop(adev);
4066 return 0;
4067 }
4068
gfx_v9_0_suspend(void * handle)4069 static int gfx_v9_0_suspend(void *handle)
4070 {
4071 return gfx_v9_0_hw_fini(handle);
4072 }
4073
gfx_v9_0_resume(void * handle)4074 static int gfx_v9_0_resume(void *handle)
4075 {
4076 return gfx_v9_0_hw_init(handle);
4077 }
4078
gfx_v9_0_is_idle(void * handle)4079 static bool gfx_v9_0_is_idle(void *handle)
4080 {
4081 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4082
4083 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4084 GRBM_STATUS, GUI_ACTIVE))
4085 return false;
4086 else
4087 return true;
4088 }
4089
gfx_v9_0_wait_for_idle(void * handle)4090 static int gfx_v9_0_wait_for_idle(void *handle)
4091 {
4092 unsigned i;
4093 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4094
4095 for (i = 0; i < adev->usec_timeout; i++) {
4096 if (gfx_v9_0_is_idle(handle))
4097 return 0;
4098 udelay(1);
4099 }
4100 return -ETIMEDOUT;
4101 }
4102
gfx_v9_0_soft_reset(void * handle)4103 static int gfx_v9_0_soft_reset(void *handle)
4104 {
4105 u32 grbm_soft_reset = 0;
4106 u32 tmp;
4107 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4108
4109 /* GRBM_STATUS */
4110 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4111 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4112 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4113 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4114 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4115 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4116 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4117 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4118 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4119 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4120 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4121 }
4122
4123 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4124 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4125 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4126 }
4127
4128 /* GRBM_STATUS2 */
4129 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4130 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4131 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4132 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4133
4134
4135 if (grbm_soft_reset) {
4136 /* stop the rlc */
4137 adev->gfx.rlc.funcs->stop(adev);
4138
4139 if (adev->gfx.num_gfx_rings)
4140 /* Disable GFX parsing/prefetching */
4141 gfx_v9_0_cp_gfx_enable(adev, false);
4142
4143 /* Disable MEC parsing/prefetching */
4144 gfx_v9_0_cp_compute_enable(adev, false);
4145
4146 if (grbm_soft_reset) {
4147 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4148 tmp |= grbm_soft_reset;
4149 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4150 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4151 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4152
4153 udelay(50);
4154
4155 tmp &= ~grbm_soft_reset;
4156 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4157 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4158 }
4159
4160 /* Wait a little for things to settle down */
4161 udelay(50);
4162 }
4163 return 0;
4164 }
4165
gfx_v9_0_kiq_read_clock(struct amdgpu_device * adev)4166 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4167 {
4168 signed long r, cnt = 0;
4169 unsigned long flags;
4170 uint32_t seq, reg_val_offs = 0;
4171 uint64_t value = 0;
4172 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4173 struct amdgpu_ring *ring = &kiq->ring;
4174
4175 BUG_ON(!ring->funcs->emit_rreg);
4176
4177 spin_lock_irqsave(&kiq->ring_lock, flags);
4178 if (amdgpu_device_wb_get(adev, ®_val_offs)) {
4179 pr_err("critical bug! too many kiq readers\n");
4180 goto failed_unlock;
4181 }
4182 amdgpu_ring_alloc(ring, 32);
4183 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4184 amdgpu_ring_write(ring, 9 | /* src: register*/
4185 (5 << 8) | /* dst: memory */
4186 (1 << 16) | /* count sel */
4187 (1 << 20)); /* write confirm */
4188 amdgpu_ring_write(ring, 0);
4189 amdgpu_ring_write(ring, 0);
4190 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4191 reg_val_offs * 4));
4192 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4193 reg_val_offs * 4));
4194 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4195 if (r)
4196 goto failed_undo;
4197
4198 amdgpu_ring_commit(ring);
4199 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4200
4201 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4202
4203 /* don't wait anymore for gpu reset case because this way may
4204 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4205 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4206 * never return if we keep waiting in virt_kiq_rreg, which cause
4207 * gpu_recover() hang there.
4208 *
4209 * also don't wait anymore for IRQ context
4210 * */
4211 if (r < 1 && (amdgpu_in_reset(adev)))
4212 goto failed_kiq_read;
4213
4214 might_sleep();
4215 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4216 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4217 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4218 }
4219
4220 if (cnt > MAX_KIQ_REG_TRY)
4221 goto failed_kiq_read;
4222
4223 mb();
4224 value = (uint64_t)adev->wb.wb[reg_val_offs] |
4225 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4226 amdgpu_device_wb_free(adev, reg_val_offs);
4227 return value;
4228
4229 failed_undo:
4230 amdgpu_ring_undo(ring);
4231 failed_unlock:
4232 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4233 failed_kiq_read:
4234 if (reg_val_offs)
4235 amdgpu_device_wb_free(adev, reg_val_offs);
4236 pr_err("failed to read gpu clock\n");
4237 return ~0;
4238 }
4239
gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device * adev)4240 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4241 {
4242 uint64_t clock, clock_lo, clock_hi, hi_check;
4243
4244 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4245 case IP_VERSION(9, 3, 0):
4246 preempt_disable();
4247 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4248 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4249 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4250 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4251 * roughly every 42 seconds.
4252 */
4253 if (hi_check != clock_hi) {
4254 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4255 clock_hi = hi_check;
4256 }
4257 preempt_enable();
4258 clock = clock_lo | (clock_hi << 32ULL);
4259 break;
4260 default:
4261 amdgpu_gfx_off_ctrl(adev, false);
4262 mutex_lock(&adev->gfx.gpu_clock_mutex);
4263 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4264 IP_VERSION(9, 0, 1) &&
4265 amdgpu_sriov_runtime(adev)) {
4266 clock = gfx_v9_0_kiq_read_clock(adev);
4267 } else {
4268 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4269 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4270 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4271 }
4272 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4273 amdgpu_gfx_off_ctrl(adev, true);
4274 break;
4275 }
4276 return clock;
4277 }
4278
gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring * ring,uint32_t vmid,uint32_t gds_base,uint32_t gds_size,uint32_t gws_base,uint32_t gws_size,uint32_t oa_base,uint32_t oa_size)4279 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4280 uint32_t vmid,
4281 uint32_t gds_base, uint32_t gds_size,
4282 uint32_t gws_base, uint32_t gws_size,
4283 uint32_t oa_base, uint32_t oa_size)
4284 {
4285 struct amdgpu_device *adev = ring->adev;
4286
4287 /* GDS Base */
4288 gfx_v9_0_write_data_to_reg(ring, 0, false,
4289 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4290 gds_base);
4291
4292 /* GDS Size */
4293 gfx_v9_0_write_data_to_reg(ring, 0, false,
4294 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4295 gds_size);
4296
4297 /* GWS */
4298 gfx_v9_0_write_data_to_reg(ring, 0, false,
4299 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4300 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4301
4302 /* OA */
4303 gfx_v9_0_write_data_to_reg(ring, 0, false,
4304 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4305 (1 << (oa_size + oa_base)) - (1 << oa_base));
4306 }
4307
4308 static const u32 vgpr_init_compute_shader[] =
4309 {
4310 0xb07c0000, 0xbe8000ff,
4311 0x000000f8, 0xbf110800,
4312 0x7e000280, 0x7e020280,
4313 0x7e040280, 0x7e060280,
4314 0x7e080280, 0x7e0a0280,
4315 0x7e0c0280, 0x7e0e0280,
4316 0x80808800, 0xbe803200,
4317 0xbf84fff5, 0xbf9c0000,
4318 0xd28c0001, 0x0001007f,
4319 0xd28d0001, 0x0002027e,
4320 0x10020288, 0xb8810904,
4321 0xb7814000, 0xd1196a01,
4322 0x00000301, 0xbe800087,
4323 0xbefc00c1, 0xd89c4000,
4324 0x00020201, 0xd89cc080,
4325 0x00040401, 0x320202ff,
4326 0x00000800, 0x80808100,
4327 0xbf84fff8, 0x7e020280,
4328 0xbf810000, 0x00000000,
4329 };
4330
4331 static const u32 sgpr_init_compute_shader[] =
4332 {
4333 0xb07c0000, 0xbe8000ff,
4334 0x0000005f, 0xbee50080,
4335 0xbe812c65, 0xbe822c65,
4336 0xbe832c65, 0xbe842c65,
4337 0xbe852c65, 0xb77c0005,
4338 0x80808500, 0xbf84fff8,
4339 0xbe800080, 0xbf810000,
4340 };
4341
4342 static const u32 vgpr_init_compute_shader_arcturus[] = {
4343 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4344 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4345 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4346 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4347 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4348 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4349 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4350 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4351 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4352 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4353 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4354 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4355 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4356 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4357 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4358 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4359 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4360 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4361 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4362 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4363 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4364 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4365 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4366 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4367 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4368 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4369 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4370 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4371 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4372 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4373 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4374 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4375 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4376 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4377 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4378 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4379 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4380 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4381 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4382 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4383 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4384 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4385 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4386 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4387 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4388 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4389 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4390 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4391 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4392 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4393 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4394 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4395 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4396 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4397 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4398 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4399 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4400 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4401 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4402 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4403 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4404 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4405 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4406 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4407 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4408 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4409 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4410 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4411 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4412 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4413 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4414 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4415 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4416 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4417 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4418 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4419 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4420 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4421 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4422 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4423 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4424 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4425 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4426 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4427 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4428 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4429 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4430 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4431 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4432 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4433 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4434 0xbf84fff8, 0xbf810000,
4435 };
4436
4437 /* When below register arrays changed, please update gpr_reg_size,
4438 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4439 to cover all gfx9 ASICs */
4440 static const struct soc15_reg_entry vgpr_init_regs[] = {
4441 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4442 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4443 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4444 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4445 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4446 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4447 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4448 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4449 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4450 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4451 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4452 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4453 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4454 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4455 };
4456
4457 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4458 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4459 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4460 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4461 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4462 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4463 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4464 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4465 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4466 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4467 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4468 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4469 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4470 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4471 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4472 };
4473
4474 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4475 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4476 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4477 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4478 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4479 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4480 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4481 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4482 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4483 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4484 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4485 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4486 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4487 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4488 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4489 };
4490
4491 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4492 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4493 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4494 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4495 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4496 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4497 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4498 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4499 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4500 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4501 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4502 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4503 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4504 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4505 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4506 };
4507
4508 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4509 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4510 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4511 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4512 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4513 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4514 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4515 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4516 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4517 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4518 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4519 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4520 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4521 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4522 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4523 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4524 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4525 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4526 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4527 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4528 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4529 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4530 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4531 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4532 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4533 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4534 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4535 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4536 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4537 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4538 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4539 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4540 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4541 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4542 };
4543
gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device * adev)4544 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4545 {
4546 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4547 int i, r;
4548
4549 /* only support when RAS is enabled */
4550 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4551 return 0;
4552
4553 r = amdgpu_ring_alloc(ring, 7);
4554 if (r) {
4555 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4556 ring->name, r);
4557 return r;
4558 }
4559
4560 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4561 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4562
4563 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4564 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4565 PACKET3_DMA_DATA_DST_SEL(1) |
4566 PACKET3_DMA_DATA_SRC_SEL(2) |
4567 PACKET3_DMA_DATA_ENGINE(0)));
4568 amdgpu_ring_write(ring, 0);
4569 amdgpu_ring_write(ring, 0);
4570 amdgpu_ring_write(ring, 0);
4571 amdgpu_ring_write(ring, 0);
4572 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4573 adev->gds.gds_size);
4574
4575 amdgpu_ring_commit(ring);
4576
4577 for (i = 0; i < adev->usec_timeout; i++) {
4578 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4579 break;
4580 udelay(1);
4581 }
4582
4583 if (i >= adev->usec_timeout)
4584 r = -ETIMEDOUT;
4585
4586 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4587
4588 return r;
4589 }
4590
gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device * adev)4591 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4592 {
4593 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4594 struct amdgpu_ib ib;
4595 struct dma_fence *f = NULL;
4596 int r, i;
4597 unsigned total_size, vgpr_offset, sgpr_offset;
4598 u64 gpu_addr;
4599
4600 int compute_dim_x = adev->gfx.config.max_shader_engines *
4601 adev->gfx.config.max_cu_per_sh *
4602 adev->gfx.config.max_sh_per_se;
4603 int sgpr_work_group_size = 5;
4604 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4605 int vgpr_init_shader_size;
4606 const u32 *vgpr_init_shader_ptr;
4607 const struct soc15_reg_entry *vgpr_init_regs_ptr;
4608
4609 /* only support when RAS is enabled */
4610 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4611 return 0;
4612
4613 /* bail if the compute ring is not ready */
4614 if (!ring->sched.ready)
4615 return 0;
4616
4617 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4618 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4619 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4620 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4621 } else {
4622 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4623 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4624 vgpr_init_regs_ptr = vgpr_init_regs;
4625 }
4626
4627 total_size =
4628 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4629 total_size +=
4630 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4631 total_size +=
4632 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4633 total_size = ALIGN(total_size, 256);
4634 vgpr_offset = total_size;
4635 total_size += ALIGN(vgpr_init_shader_size, 256);
4636 sgpr_offset = total_size;
4637 total_size += sizeof(sgpr_init_compute_shader);
4638
4639 /* allocate an indirect buffer to put the commands in */
4640 memset(&ib, 0, sizeof(ib));
4641 r = amdgpu_ib_get(adev, NULL, total_size,
4642 AMDGPU_IB_POOL_DIRECT, &ib);
4643 if (r) {
4644 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4645 return r;
4646 }
4647
4648 /* load the compute shaders */
4649 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4650 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4651
4652 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4653 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4654
4655 /* init the ib length to 0 */
4656 ib.length_dw = 0;
4657
4658 /* VGPR */
4659 /* write the register state for the compute dispatch */
4660 for (i = 0; i < gpr_reg_size; i++) {
4661 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4662 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4663 - PACKET3_SET_SH_REG_START;
4664 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4665 }
4666 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4667 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4668 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4669 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4670 - PACKET3_SET_SH_REG_START;
4671 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4672 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4673
4674 /* write dispatch packet */
4675 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4676 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4677 ib.ptr[ib.length_dw++] = 1; /* y */
4678 ib.ptr[ib.length_dw++] = 1; /* z */
4679 ib.ptr[ib.length_dw++] =
4680 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4681
4682 /* write CS partial flush packet */
4683 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4684 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4685
4686 /* SGPR1 */
4687 /* write the register state for the compute dispatch */
4688 for (i = 0; i < gpr_reg_size; i++) {
4689 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4690 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4691 - PACKET3_SET_SH_REG_START;
4692 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4693 }
4694 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4695 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4696 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4697 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4698 - PACKET3_SET_SH_REG_START;
4699 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4700 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4701
4702 /* write dispatch packet */
4703 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4704 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4705 ib.ptr[ib.length_dw++] = 1; /* y */
4706 ib.ptr[ib.length_dw++] = 1; /* z */
4707 ib.ptr[ib.length_dw++] =
4708 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4709
4710 /* write CS partial flush packet */
4711 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4712 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4713
4714 /* SGPR2 */
4715 /* write the register state for the compute dispatch */
4716 for (i = 0; i < gpr_reg_size; i++) {
4717 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4718 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4719 - PACKET3_SET_SH_REG_START;
4720 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4721 }
4722 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4723 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4724 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4725 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4726 - PACKET3_SET_SH_REG_START;
4727 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4728 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4729
4730 /* write dispatch packet */
4731 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4732 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4733 ib.ptr[ib.length_dw++] = 1; /* y */
4734 ib.ptr[ib.length_dw++] = 1; /* z */
4735 ib.ptr[ib.length_dw++] =
4736 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4737
4738 /* write CS partial flush packet */
4739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4740 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4741
4742 /* shedule the ib on the ring */
4743 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4744 if (r) {
4745 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4746 goto fail;
4747 }
4748
4749 /* wait for the GPU to finish processing the IB */
4750 r = dma_fence_wait(f, false);
4751 if (r) {
4752 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4753 goto fail;
4754 }
4755
4756 fail:
4757 amdgpu_ib_free(adev, &ib, NULL);
4758 dma_fence_put(f);
4759
4760 return r;
4761 }
4762
gfx_v9_0_early_init(void * handle)4763 static int gfx_v9_0_early_init(void *handle)
4764 {
4765 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4766
4767 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4768
4769 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4770 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4771 adev->gfx.num_gfx_rings = 0;
4772 else
4773 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4774 adev->gfx.xcc_mask = 1;
4775 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4776 AMDGPU_MAX_COMPUTE_RINGS);
4777 gfx_v9_0_set_kiq_pm4_funcs(adev);
4778 gfx_v9_0_set_ring_funcs(adev);
4779 gfx_v9_0_set_irq_funcs(adev);
4780 gfx_v9_0_set_gds_init(adev);
4781 gfx_v9_0_set_rlc_funcs(adev);
4782
4783 /* init rlcg reg access ctrl */
4784 gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4785
4786 return gfx_v9_0_init_microcode(adev);
4787 }
4788
gfx_v9_0_ecc_late_init(void * handle)4789 static int gfx_v9_0_ecc_late_init(void *handle)
4790 {
4791 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4792 int r;
4793
4794 /*
4795 * Temp workaround to fix the issue that CP firmware fails to
4796 * update read pointer when CPDMA is writing clearing operation
4797 * to GDS in suspend/resume sequence on several cards. So just
4798 * limit this operation in cold boot sequence.
4799 */
4800 if ((!adev->in_suspend) &&
4801 (adev->gds.gds_size)) {
4802 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4803 if (r)
4804 return r;
4805 }
4806
4807 /* requires IBs so do in late init after IB pool is initialized */
4808 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4809 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4810 else
4811 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4812
4813 if (r)
4814 return r;
4815
4816 if (adev->gfx.ras &&
4817 adev->gfx.ras->enable_watchdog_timer)
4818 adev->gfx.ras->enable_watchdog_timer(adev);
4819
4820 return 0;
4821 }
4822
gfx_v9_0_late_init(void * handle)4823 static int gfx_v9_0_late_init(void *handle)
4824 {
4825 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4826 int r;
4827
4828 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4829 if (r)
4830 return r;
4831
4832 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4833 if (r)
4834 return r;
4835
4836 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
4837 if (r)
4838 return r;
4839
4840 r = gfx_v9_0_ecc_late_init(handle);
4841 if (r)
4842 return r;
4843
4844 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4845 gfx_v9_4_2_debug_trap_config_init(adev,
4846 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4847 else
4848 gfx_v9_0_debug_trap_config_init(adev,
4849 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4850
4851 return 0;
4852 }
4853
gfx_v9_0_is_rlc_enabled(struct amdgpu_device * adev)4854 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4855 {
4856 uint32_t rlc_setting;
4857
4858 /* if RLC is not enabled, do nothing */
4859 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4860 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4861 return false;
4862
4863 return true;
4864 }
4865
gfx_v9_0_set_safe_mode(struct amdgpu_device * adev,int xcc_id)4866 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4867 {
4868 uint32_t data;
4869 unsigned i;
4870
4871 data = RLC_SAFE_MODE__CMD_MASK;
4872 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4873 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4874
4875 /* wait for RLC_SAFE_MODE */
4876 for (i = 0; i < adev->usec_timeout; i++) {
4877 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4878 break;
4879 udelay(1);
4880 }
4881 }
4882
gfx_v9_0_unset_safe_mode(struct amdgpu_device * adev,int xcc_id)4883 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4884 {
4885 uint32_t data;
4886
4887 data = RLC_SAFE_MODE__CMD_MASK;
4888 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4889 }
4890
gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)4891 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4892 bool enable)
4893 {
4894 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4895
4896 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4897 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4898 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4899 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4900 } else {
4901 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4902 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4903 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4904 }
4905
4906 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4907 }
4908
gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device * adev,bool enable)4909 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4910 bool enable)
4911 {
4912 /* TODO: double check if we need to perform under safe mode */
4913 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4914
4915 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4916 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4917 else
4918 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4919
4920 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4921 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4922 else
4923 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4924
4925 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4926 }
4927
gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)4928 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4929 bool enable)
4930 {
4931 uint32_t data, def;
4932
4933 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4934
4935 /* It is disabled by HW by default */
4936 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4937 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4938 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4939
4940 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4941 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4942
4943 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4944 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4945 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4946
4947 /* only for Vega10 & Raven1 */
4948 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4949
4950 if (def != data)
4951 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4952
4953 /* MGLS is a global flag to control all MGLS in GFX */
4954 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4955 /* 2 - RLC memory Light sleep */
4956 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4957 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4958 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4959 if (def != data)
4960 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4961 }
4962 /* 3 - CP memory Light sleep */
4963 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4964 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4965 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4966 if (def != data)
4967 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4968 }
4969 }
4970 } else {
4971 /* 1 - MGCG_OVERRIDE */
4972 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4973
4974 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4975 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4976
4977 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4978 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4979 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4980 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4981
4982 if (def != data)
4983 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4984
4985 /* 2 - disable MGLS in RLC */
4986 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4987 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4988 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4989 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4990 }
4991
4992 /* 3 - disable MGLS in CP */
4993 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4994 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4995 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4996 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4997 }
4998 }
4999
5000 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5001 }
5002
gfx_v9_0_update_3d_clock_gating(struct amdgpu_device * adev,bool enable)5003 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
5004 bool enable)
5005 {
5006 uint32_t data, def;
5007
5008 if (!adev->gfx.num_gfx_rings)
5009 return;
5010
5011 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5012
5013 /* Enable 3D CGCG/CGLS */
5014 if (enable) {
5015 /* write cmd to clear cgcg/cgls ov */
5016 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5017 /* unset CGCG override */
5018 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5019 /* update CGCG and CGLS override bits */
5020 if (def != data)
5021 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5022
5023 /* enable 3Dcgcg FSM(0x0000363f) */
5024 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5025
5026 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5027 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5028 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5029 else
5030 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5031
5032 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5033 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5034 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5035 if (def != data)
5036 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5037
5038 /* set IDLE_POLL_COUNT(0x00900100) */
5039 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5040 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5041 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5042 if (def != data)
5043 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5044 } else {
5045 /* Disable CGCG/CGLS */
5046 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5047 /* disable cgcg, cgls should be disabled */
5048 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5049 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5050 /* disable cgcg and cgls in FSM */
5051 if (def != data)
5052 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5053 }
5054
5055 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5056 }
5057
gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable)5058 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5059 bool enable)
5060 {
5061 uint32_t def, data;
5062
5063 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5064
5065 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5066 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5067 /* unset CGCG override */
5068 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5069 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5070 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5071 else
5072 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5073 /* update CGCG and CGLS override bits */
5074 if (def != data)
5075 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5076
5077 /* enable cgcg FSM(0x0000363F) */
5078 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5079
5080 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
5081 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5082 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5083 else
5084 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5085 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5086 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5087 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5088 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5089 if (def != data)
5090 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5091
5092 /* set IDLE_POLL_COUNT(0x00900100) */
5093 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5094 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5095 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5096 if (def != data)
5097 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5098 } else {
5099 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5100 /* reset CGCG/CGLS bits */
5101 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5102 /* disable cgcg and cgls in FSM */
5103 if (def != data)
5104 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5105 }
5106
5107 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5108 }
5109
gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable)5110 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5111 bool enable)
5112 {
5113 if (enable) {
5114 /* CGCG/CGLS should be enabled after MGCG/MGLS
5115 * === MGCG + MGLS ===
5116 */
5117 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5118 /* === CGCG /CGLS for GFX 3D Only === */
5119 gfx_v9_0_update_3d_clock_gating(adev, enable);
5120 /* === CGCG + CGLS === */
5121 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5122 } else {
5123 /* CGCG/CGLS should be disabled before MGCG/MGLS
5124 * === CGCG + CGLS ===
5125 */
5126 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5127 /* === CGCG /CGLS for GFX 3D Only === */
5128 gfx_v9_0_update_3d_clock_gating(adev, enable);
5129 /* === MGCG + MGLS === */
5130 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5131 }
5132 return 0;
5133 }
5134
gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device * adev,unsigned int vmid)5135 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5136 unsigned int vmid)
5137 {
5138 u32 reg, data;
5139
5140 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5141 if (amdgpu_sriov_is_pp_one_vf(adev))
5142 data = RREG32_NO_KIQ(reg);
5143 else
5144 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5145
5146 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5147 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5148
5149 if (amdgpu_sriov_is_pp_one_vf(adev))
5150 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5151 else
5152 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5153 }
5154
gfx_v9_0_update_spm_vmid(struct amdgpu_device * adev,struct amdgpu_ring * ring,unsigned int vmid)5155 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5156 {
5157 amdgpu_gfx_off_ctrl(adev, false);
5158
5159 gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5160
5161 amdgpu_gfx_off_ctrl(adev, true);
5162 }
5163
gfx_v9_0_check_rlcg_range(struct amdgpu_device * adev,uint32_t offset,struct soc15_reg_rlcg * entries,int arr_size)5164 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5165 uint32_t offset,
5166 struct soc15_reg_rlcg *entries, int arr_size)
5167 {
5168 int i;
5169 uint32_t reg;
5170
5171 if (!entries)
5172 return false;
5173
5174 for (i = 0; i < arr_size; i++) {
5175 const struct soc15_reg_rlcg *entry;
5176
5177 entry = &entries[i];
5178 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5179 if (offset == reg)
5180 return true;
5181 }
5182
5183 return false;
5184 }
5185
gfx_v9_0_is_rlcg_access_range(struct amdgpu_device * adev,u32 offset)5186 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5187 {
5188 return gfx_v9_0_check_rlcg_range(adev, offset,
5189 (void *)rlcg_access_gc_9_0,
5190 ARRAY_SIZE(rlcg_access_gc_9_0));
5191 }
5192
5193 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5194 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5195 .set_safe_mode = gfx_v9_0_set_safe_mode,
5196 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5197 .init = gfx_v9_0_rlc_init,
5198 .get_csb_size = gfx_v9_0_get_csb_size,
5199 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5200 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5201 .resume = gfx_v9_0_rlc_resume,
5202 .stop = gfx_v9_0_rlc_stop,
5203 .reset = gfx_v9_0_rlc_reset,
5204 .start = gfx_v9_0_rlc_start,
5205 .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5206 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5207 };
5208
gfx_v9_0_set_powergating_state(void * handle,enum amd_powergating_state state)5209 static int gfx_v9_0_set_powergating_state(void *handle,
5210 enum amd_powergating_state state)
5211 {
5212 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5213 bool enable = (state == AMD_PG_STATE_GATE);
5214
5215 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5216 case IP_VERSION(9, 2, 2):
5217 case IP_VERSION(9, 1, 0):
5218 case IP_VERSION(9, 3, 0):
5219 if (!enable)
5220 amdgpu_gfx_off_ctrl(adev, false);
5221
5222 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5223 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5224 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5225 } else {
5226 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5227 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5228 }
5229
5230 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5231 gfx_v9_0_enable_cp_power_gating(adev, true);
5232 else
5233 gfx_v9_0_enable_cp_power_gating(adev, false);
5234
5235 /* update gfx cgpg state */
5236 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5237
5238 /* update mgcg state */
5239 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5240
5241 if (enable)
5242 amdgpu_gfx_off_ctrl(adev, true);
5243 break;
5244 case IP_VERSION(9, 2, 1):
5245 amdgpu_gfx_off_ctrl(adev, enable);
5246 break;
5247 default:
5248 break;
5249 }
5250
5251 return 0;
5252 }
5253
gfx_v9_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)5254 static int gfx_v9_0_set_clockgating_state(void *handle,
5255 enum amd_clockgating_state state)
5256 {
5257 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5258
5259 if (amdgpu_sriov_vf(adev))
5260 return 0;
5261
5262 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5263 case IP_VERSION(9, 0, 1):
5264 case IP_VERSION(9, 2, 1):
5265 case IP_VERSION(9, 4, 0):
5266 case IP_VERSION(9, 2, 2):
5267 case IP_VERSION(9, 1, 0):
5268 case IP_VERSION(9, 4, 1):
5269 case IP_VERSION(9, 3, 0):
5270 case IP_VERSION(9, 4, 2):
5271 gfx_v9_0_update_gfx_clock_gating(adev,
5272 state == AMD_CG_STATE_GATE);
5273 break;
5274 default:
5275 break;
5276 }
5277 return 0;
5278 }
5279
gfx_v9_0_get_clockgating_state(void * handle,u64 * flags)5280 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5281 {
5282 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5283 int data;
5284
5285 if (amdgpu_sriov_vf(adev))
5286 *flags = 0;
5287
5288 /* AMD_CG_SUPPORT_GFX_MGCG */
5289 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5290 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5291 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5292
5293 /* AMD_CG_SUPPORT_GFX_CGCG */
5294 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5295 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5296 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5297
5298 /* AMD_CG_SUPPORT_GFX_CGLS */
5299 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5300 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5301
5302 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5303 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5304 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5305 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5306
5307 /* AMD_CG_SUPPORT_GFX_CP_LS */
5308 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5309 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5310 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5311
5312 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5313 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5314 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5315 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5316 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5317
5318 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5319 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5320 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5321 }
5322 }
5323
gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring * ring)5324 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5325 {
5326 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5327 }
5328
gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring * ring)5329 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5330 {
5331 struct amdgpu_device *adev = ring->adev;
5332 u64 wptr;
5333
5334 /* XXX check if swapping is necessary on BE */
5335 if (ring->use_doorbell) {
5336 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5337 } else {
5338 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5339 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5340 }
5341
5342 return wptr;
5343 }
5344
gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring * ring)5345 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5346 {
5347 struct amdgpu_device *adev = ring->adev;
5348
5349 if (ring->use_doorbell) {
5350 /* XXX check if swapping is necessary on BE */
5351 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5352 WDOORBELL64(ring->doorbell_index, ring->wptr);
5353 } else {
5354 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5355 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5356 }
5357 }
5358
gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)5359 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5360 {
5361 struct amdgpu_device *adev = ring->adev;
5362 u32 ref_and_mask, reg_mem_engine;
5363 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5364
5365 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5366 switch (ring->me) {
5367 case 1:
5368 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5369 break;
5370 case 2:
5371 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5372 break;
5373 default:
5374 return;
5375 }
5376 reg_mem_engine = 0;
5377 } else {
5378 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5379 reg_mem_engine = 1; /* pfp */
5380 }
5381
5382 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5383 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5384 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5385 ref_and_mask, ref_and_mask, 0x20);
5386 }
5387
gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5388 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5389 struct amdgpu_job *job,
5390 struct amdgpu_ib *ib,
5391 uint32_t flags)
5392 {
5393 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5394 u32 header, control = 0;
5395
5396 if (ib->flags & AMDGPU_IB_FLAG_CE)
5397 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5398 else
5399 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5400
5401 control |= ib->length_dw | (vmid << 24);
5402
5403 if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5404 control |= INDIRECT_BUFFER_PRE_ENB(1);
5405
5406 if (flags & AMDGPU_IB_PREEMPTED)
5407 control |= INDIRECT_BUFFER_PRE_RESUME(1);
5408
5409 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5410 gfx_v9_0_ring_emit_de_meta(ring,
5411 (!amdgpu_sriov_vf(ring->adev) &&
5412 flags & AMDGPU_IB_PREEMPTED) ?
5413 true : false,
5414 job->gds_size > 0 && job->gds_base != 0);
5415 }
5416
5417 amdgpu_ring_write(ring, header);
5418 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5419 amdgpu_ring_write(ring,
5420 #ifdef __BIG_ENDIAN
5421 (2 << 0) |
5422 #endif
5423 lower_32_bits(ib->gpu_addr));
5424 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5425 amdgpu_ring_ib_on_emit_cntl(ring);
5426 amdgpu_ring_write(ring, control);
5427 }
5428
gfx_v9_0_ring_patch_cntl(struct amdgpu_ring * ring,unsigned offset)5429 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5430 unsigned offset)
5431 {
5432 u32 control = ring->ring[offset];
5433
5434 control |= INDIRECT_BUFFER_PRE_RESUME(1);
5435 ring->ring[offset] = control;
5436 }
5437
gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring * ring,unsigned offset)5438 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5439 unsigned offset)
5440 {
5441 struct amdgpu_device *adev = ring->adev;
5442 void *ce_payload_cpu_addr;
5443 uint64_t payload_offset, payload_size;
5444
5445 payload_size = sizeof(struct v9_ce_ib_state);
5446
5447 if (ring->is_mes_queue) {
5448 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5449 gfx[0].gfx_meta_data) +
5450 offsetof(struct v9_gfx_meta_data, ce_payload);
5451 ce_payload_cpu_addr =
5452 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5453 } else {
5454 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5455 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5456 }
5457
5458 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5459 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5460 } else {
5461 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5462 (ring->buf_mask + 1 - offset) << 2);
5463 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5464 memcpy((void *)&ring->ring[0],
5465 ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5466 payload_size);
5467 }
5468 }
5469
gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring * ring,unsigned offset)5470 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5471 unsigned offset)
5472 {
5473 struct amdgpu_device *adev = ring->adev;
5474 void *de_payload_cpu_addr;
5475 uint64_t payload_offset, payload_size;
5476
5477 payload_size = sizeof(struct v9_de_ib_state);
5478
5479 if (ring->is_mes_queue) {
5480 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5481 gfx[0].gfx_meta_data) +
5482 offsetof(struct v9_gfx_meta_data, de_payload);
5483 de_payload_cpu_addr =
5484 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5485 } else {
5486 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5487 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5488 }
5489
5490 ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5491 IB_COMPLETION_STATUS_PREEMPTED;
5492
5493 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5494 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5495 } else {
5496 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5497 (ring->buf_mask + 1 - offset) << 2);
5498 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5499 memcpy((void *)&ring->ring[0],
5500 de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5501 payload_size);
5502 }
5503 }
5504
gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5505 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5506 struct amdgpu_job *job,
5507 struct amdgpu_ib *ib,
5508 uint32_t flags)
5509 {
5510 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5511 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5512
5513 /* Currently, there is a high possibility to get wave ID mismatch
5514 * between ME and GDS, leading to a hw deadlock, because ME generates
5515 * different wave IDs than the GDS expects. This situation happens
5516 * randomly when at least 5 compute pipes use GDS ordered append.
5517 * The wave IDs generated by ME are also wrong after suspend/resume.
5518 * Those are probably bugs somewhere else in the kernel driver.
5519 *
5520 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5521 * GDS to 0 for this ring (me/pipe).
5522 */
5523 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5524 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5525 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5526 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5527 }
5528
5529 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5530 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5531 amdgpu_ring_write(ring,
5532 #ifdef __BIG_ENDIAN
5533 (2 << 0) |
5534 #endif
5535 lower_32_bits(ib->gpu_addr));
5536 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5537 amdgpu_ring_write(ring, control);
5538 }
5539
gfx_v9_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)5540 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5541 u64 seq, unsigned flags)
5542 {
5543 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5544 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5545 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5546 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5547 uint32_t dw2 = 0;
5548
5549 /* RELEASE_MEM - flush caches, send int */
5550 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5551
5552 if (writeback) {
5553 dw2 = EOP_TC_NC_ACTION_EN;
5554 } else {
5555 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5556 EOP_TC_MD_ACTION_EN;
5557 }
5558 dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5559 EVENT_INDEX(5);
5560 if (exec)
5561 dw2 |= EOP_EXEC;
5562
5563 amdgpu_ring_write(ring, dw2);
5564 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5565
5566 /*
5567 * the address should be Qword aligned if 64bit write, Dword
5568 * aligned if only send 32bit data low (discard data high)
5569 */
5570 if (write64bit)
5571 BUG_ON(addr & 0x7);
5572 else
5573 BUG_ON(addr & 0x3);
5574 amdgpu_ring_write(ring, lower_32_bits(addr));
5575 amdgpu_ring_write(ring, upper_32_bits(addr));
5576 amdgpu_ring_write(ring, lower_32_bits(seq));
5577 amdgpu_ring_write(ring, upper_32_bits(seq));
5578 amdgpu_ring_write(ring, 0);
5579 }
5580
gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)5581 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5582 {
5583 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5584 uint32_t seq = ring->fence_drv.sync_seq;
5585 uint64_t addr = ring->fence_drv.gpu_addr;
5586
5587 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5588 lower_32_bits(addr), upper_32_bits(addr),
5589 seq, 0xffffffff, 4);
5590 }
5591
gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)5592 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5593 unsigned vmid, uint64_t pd_addr)
5594 {
5595 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5596
5597 /* compute doesn't have PFP */
5598 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5599 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5600 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5601 amdgpu_ring_write(ring, 0x0);
5602 }
5603 }
5604
gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring * ring)5605 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5606 {
5607 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5608 }
5609
gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring * ring)5610 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5611 {
5612 u64 wptr;
5613
5614 /* XXX check if swapping is necessary on BE */
5615 if (ring->use_doorbell)
5616 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5617 else
5618 BUG();
5619 return wptr;
5620 }
5621
gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring * ring)5622 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5623 {
5624 struct amdgpu_device *adev = ring->adev;
5625
5626 /* XXX check if swapping is necessary on BE */
5627 if (ring->use_doorbell) {
5628 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5629 WDOORBELL64(ring->doorbell_index, ring->wptr);
5630 } else{
5631 BUG(); /* only DOORBELL method supported on gfx9 now */
5632 }
5633 }
5634
gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)5635 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5636 u64 seq, unsigned int flags)
5637 {
5638 struct amdgpu_device *adev = ring->adev;
5639
5640 /* we only allocate 32bit for each seq wb address */
5641 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5642
5643 /* write fence seq to the "addr" */
5644 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5645 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5646 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5647 amdgpu_ring_write(ring, lower_32_bits(addr));
5648 amdgpu_ring_write(ring, upper_32_bits(addr));
5649 amdgpu_ring_write(ring, lower_32_bits(seq));
5650
5651 if (flags & AMDGPU_FENCE_FLAG_INT) {
5652 /* set register to trigger INT */
5653 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5654 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5655 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5656 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5657 amdgpu_ring_write(ring, 0);
5658 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5659 }
5660 }
5661
gfx_v9_ring_emit_sb(struct amdgpu_ring * ring)5662 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5663 {
5664 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5665 amdgpu_ring_write(ring, 0);
5666 }
5667
gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring * ring,bool resume)5668 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5669 {
5670 struct amdgpu_device *adev = ring->adev;
5671 struct v9_ce_ib_state ce_payload = {0};
5672 uint64_t offset, ce_payload_gpu_addr;
5673 void *ce_payload_cpu_addr;
5674 int cnt;
5675
5676 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5677
5678 if (ring->is_mes_queue) {
5679 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5680 gfx[0].gfx_meta_data) +
5681 offsetof(struct v9_gfx_meta_data, ce_payload);
5682 ce_payload_gpu_addr =
5683 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5684 ce_payload_cpu_addr =
5685 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5686 } else {
5687 offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5688 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5689 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5690 }
5691
5692 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5693 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5694 WRITE_DATA_DST_SEL(8) |
5695 WR_CONFIRM) |
5696 WRITE_DATA_CACHE_POLICY(0));
5697 amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5698 amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5699
5700 amdgpu_ring_ib_on_emit_ce(ring);
5701
5702 if (resume)
5703 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5704 sizeof(ce_payload) >> 2);
5705 else
5706 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5707 sizeof(ce_payload) >> 2);
5708 }
5709
gfx_v9_0_ring_preempt_ib(struct amdgpu_ring * ring)5710 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5711 {
5712 int i, r = 0;
5713 struct amdgpu_device *adev = ring->adev;
5714 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5715 struct amdgpu_ring *kiq_ring = &kiq->ring;
5716 unsigned long flags;
5717
5718 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5719 return -EINVAL;
5720
5721 spin_lock_irqsave(&kiq->ring_lock, flags);
5722
5723 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5724 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5725 return -ENOMEM;
5726 }
5727
5728 /* assert preemption condition */
5729 amdgpu_ring_set_preempt_cond_exec(ring, false);
5730
5731 ring->trail_seq += 1;
5732 amdgpu_ring_alloc(ring, 13);
5733 gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5734 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5735
5736 /* assert IB preemption, emit the trailing fence */
5737 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5738 ring->trail_fence_gpu_addr,
5739 ring->trail_seq);
5740
5741 amdgpu_ring_commit(kiq_ring);
5742 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5743
5744 /* poll the trailing fence */
5745 for (i = 0; i < adev->usec_timeout; i++) {
5746 if (ring->trail_seq ==
5747 le32_to_cpu(*ring->trail_fence_cpu_addr))
5748 break;
5749 udelay(1);
5750 }
5751
5752 if (i >= adev->usec_timeout) {
5753 r = -EINVAL;
5754 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5755 }
5756
5757 /*reset the CP_VMID_PREEMPT after trailing fence*/
5758 amdgpu_ring_emit_wreg(ring,
5759 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5760 0x0);
5761 amdgpu_ring_commit(ring);
5762
5763 /* deassert preemption condition */
5764 amdgpu_ring_set_preempt_cond_exec(ring, true);
5765 return r;
5766 }
5767
gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring * ring,bool resume,bool usegds)5768 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5769 {
5770 struct amdgpu_device *adev = ring->adev;
5771 struct v9_de_ib_state de_payload = {0};
5772 uint64_t offset, gds_addr, de_payload_gpu_addr;
5773 void *de_payload_cpu_addr;
5774 int cnt;
5775
5776 if (ring->is_mes_queue) {
5777 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5778 gfx[0].gfx_meta_data) +
5779 offsetof(struct v9_gfx_meta_data, de_payload);
5780 de_payload_gpu_addr =
5781 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5782 de_payload_cpu_addr =
5783 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5784
5785 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5786 gfx[0].gds_backup) +
5787 offsetof(struct v9_gfx_meta_data, de_payload);
5788 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5789 } else {
5790 offset = offsetof(struct v9_gfx_meta_data, de_payload);
5791 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5792 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5793
5794 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5795 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5796 PAGE_SIZE);
5797 }
5798
5799 if (usegds) {
5800 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5801 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5802 }
5803
5804 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5805 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5806 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5807 WRITE_DATA_DST_SEL(8) |
5808 WR_CONFIRM) |
5809 WRITE_DATA_CACHE_POLICY(0));
5810 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5811 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5812
5813 amdgpu_ring_ib_on_emit_de(ring);
5814 if (resume)
5815 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5816 sizeof(de_payload) >> 2);
5817 else
5818 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5819 sizeof(de_payload) >> 2);
5820 }
5821
gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring * ring,bool start,bool secure)5822 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5823 bool secure)
5824 {
5825 uint32_t v = secure ? FRAME_TMZ : 0;
5826
5827 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5828 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5829 }
5830
gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring * ring,uint32_t flags)5831 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5832 {
5833 uint32_t dw2 = 0;
5834
5835 gfx_v9_0_ring_emit_ce_meta(ring,
5836 (!amdgpu_sriov_vf(ring->adev) &&
5837 flags & AMDGPU_IB_PREEMPTED) ? true : false);
5838
5839 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5840 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5841 /* set load_global_config & load_global_uconfig */
5842 dw2 |= 0x8001;
5843 /* set load_cs_sh_regs */
5844 dw2 |= 0x01000000;
5845 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5846 dw2 |= 0x10002;
5847
5848 /* set load_ce_ram if preamble presented */
5849 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5850 dw2 |= 0x10000000;
5851 } else {
5852 /* still load_ce_ram if this is the first time preamble presented
5853 * although there is no context switch happens.
5854 */
5855 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5856 dw2 |= 0x10000000;
5857 }
5858
5859 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5860 amdgpu_ring_write(ring, dw2);
5861 amdgpu_ring_write(ring, 0);
5862 }
5863
gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring * ring,uint64_t addr)5864 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5865 uint64_t addr)
5866 {
5867 unsigned ret;
5868 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5869 amdgpu_ring_write(ring, lower_32_bits(addr));
5870 amdgpu_ring_write(ring, upper_32_bits(addr));
5871 /* discard following DWs if *cond_exec_gpu_addr==0 */
5872 amdgpu_ring_write(ring, 0);
5873 ret = ring->wptr & ring->buf_mask;
5874 /* patch dummy value later */
5875 amdgpu_ring_write(ring, 0);
5876 return ret;
5877 }
5878
gfx_v9_0_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t reg_val_offs)5879 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5880 uint32_t reg_val_offs)
5881 {
5882 struct amdgpu_device *adev = ring->adev;
5883
5884 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5885 amdgpu_ring_write(ring, 0 | /* src: register*/
5886 (5 << 8) | /* dst: memory */
5887 (1 << 20)); /* write confirm */
5888 amdgpu_ring_write(ring, reg);
5889 amdgpu_ring_write(ring, 0);
5890 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5891 reg_val_offs * 4));
5892 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5893 reg_val_offs * 4));
5894 }
5895
gfx_v9_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)5896 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5897 uint32_t val)
5898 {
5899 uint32_t cmd = 0;
5900
5901 switch (ring->funcs->type) {
5902 case AMDGPU_RING_TYPE_GFX:
5903 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5904 break;
5905 case AMDGPU_RING_TYPE_KIQ:
5906 cmd = (1 << 16); /* no inc addr */
5907 break;
5908 default:
5909 cmd = WR_CONFIRM;
5910 break;
5911 }
5912 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5913 amdgpu_ring_write(ring, cmd);
5914 amdgpu_ring_write(ring, reg);
5915 amdgpu_ring_write(ring, 0);
5916 amdgpu_ring_write(ring, val);
5917 }
5918
gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)5919 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5920 uint32_t val, uint32_t mask)
5921 {
5922 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5923 }
5924
gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)5925 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5926 uint32_t reg0, uint32_t reg1,
5927 uint32_t ref, uint32_t mask)
5928 {
5929 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5930 struct amdgpu_device *adev = ring->adev;
5931 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5932 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5933
5934 if (fw_version_ok)
5935 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5936 ref, mask, 0x20);
5937 else
5938 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5939 ref, mask);
5940 }
5941
gfx_v9_0_ring_soft_recovery(struct amdgpu_ring * ring,unsigned vmid)5942 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5943 {
5944 struct amdgpu_device *adev = ring->adev;
5945 uint32_t value = 0;
5946
5947 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5948 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5949 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5950 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5951 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5952 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5953 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5954 }
5955
gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device * adev,enum amdgpu_interrupt_state state)5956 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5957 enum amdgpu_interrupt_state state)
5958 {
5959 switch (state) {
5960 case AMDGPU_IRQ_STATE_DISABLE:
5961 case AMDGPU_IRQ_STATE_ENABLE:
5962 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5963 TIME_STAMP_INT_ENABLE,
5964 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5965 break;
5966 default:
5967 break;
5968 }
5969 }
5970
gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state)5971 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5972 int me, int pipe,
5973 enum amdgpu_interrupt_state state)
5974 {
5975 u32 mec_int_cntl, mec_int_cntl_reg;
5976
5977 /*
5978 * amdgpu controls only the first MEC. That's why this function only
5979 * handles the setting of interrupts for this specific MEC. All other
5980 * pipes' interrupts are set by amdkfd.
5981 */
5982
5983 if (me == 1) {
5984 switch (pipe) {
5985 case 0:
5986 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5987 break;
5988 case 1:
5989 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5990 break;
5991 case 2:
5992 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5993 break;
5994 case 3:
5995 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5996 break;
5997 default:
5998 DRM_DEBUG("invalid pipe %d\n", pipe);
5999 return;
6000 }
6001 } else {
6002 DRM_DEBUG("invalid me %d\n", me);
6003 return;
6004 }
6005
6006 switch (state) {
6007 case AMDGPU_IRQ_STATE_DISABLE:
6008 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
6009 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6010 TIME_STAMP_INT_ENABLE, 0);
6011 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6012 break;
6013 case AMDGPU_IRQ_STATE_ENABLE:
6014 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6015 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6016 TIME_STAMP_INT_ENABLE, 1);
6017 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6018 break;
6019 default:
6020 break;
6021 }
6022 }
6023
gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device * adev,int me,int pipe)6024 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
6025 int me, int pipe)
6026 {
6027 /*
6028 * amdgpu controls only the first MEC. That's why this function only
6029 * handles the setting of interrupts for this specific MEC. All other
6030 * pipes' interrupts are set by amdkfd.
6031 */
6032 if (me != 1)
6033 return 0;
6034
6035 switch (pipe) {
6036 case 0:
6037 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6038 case 1:
6039 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6040 case 2:
6041 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6042 case 3:
6043 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6044 default:
6045 return 0;
6046 }
6047 }
6048
gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6049 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6050 struct amdgpu_irq_src *source,
6051 unsigned type,
6052 enum amdgpu_interrupt_state state)
6053 {
6054 u32 cp_int_cntl_reg, cp_int_cntl;
6055 int i, j;
6056
6057 switch (state) {
6058 case AMDGPU_IRQ_STATE_DISABLE:
6059 case AMDGPU_IRQ_STATE_ENABLE:
6060 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6061 PRIV_REG_INT_ENABLE,
6062 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6063 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6064 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6065 /* MECs start at 1 */
6066 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6067
6068 if (cp_int_cntl_reg) {
6069 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6070 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6071 PRIV_REG_INT_ENABLE,
6072 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6073 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6074 }
6075 }
6076 }
6077 break;
6078 default:
6079 break;
6080 }
6081
6082 return 0;
6083 }
6084
gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6085 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6086 struct amdgpu_irq_src *source,
6087 unsigned type,
6088 enum amdgpu_interrupt_state state)
6089 {
6090 u32 cp_int_cntl_reg, cp_int_cntl;
6091 int i, j;
6092
6093 switch (state) {
6094 case AMDGPU_IRQ_STATE_DISABLE:
6095 case AMDGPU_IRQ_STATE_ENABLE:
6096 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6097 OPCODE_ERROR_INT_ENABLE,
6098 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6099 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6100 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6101 /* MECs start at 1 */
6102 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6103
6104 if (cp_int_cntl_reg) {
6105 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6106 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6107 OPCODE_ERROR_INT_ENABLE,
6108 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6109 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6110 }
6111 }
6112 }
6113 break;
6114 default:
6115 break;
6116 }
6117
6118 return 0;
6119 }
6120
gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6121 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6122 struct amdgpu_irq_src *source,
6123 unsigned type,
6124 enum amdgpu_interrupt_state state)
6125 {
6126 switch (state) {
6127 case AMDGPU_IRQ_STATE_DISABLE:
6128 case AMDGPU_IRQ_STATE_ENABLE:
6129 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6130 PRIV_INSTR_INT_ENABLE,
6131 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6132 break;
6133 default:
6134 break;
6135 }
6136
6137 return 0;
6138 }
6139
6140 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
6141 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6142 CP_ECC_ERROR_INT_ENABLE, 1)
6143
6144 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
6145 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6146 CP_ECC_ERROR_INT_ENABLE, 0)
6147
gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6148 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
6149 struct amdgpu_irq_src *source,
6150 unsigned type,
6151 enum amdgpu_interrupt_state state)
6152 {
6153 switch (state) {
6154 case AMDGPU_IRQ_STATE_DISABLE:
6155 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6156 CP_ECC_ERROR_INT_ENABLE, 0);
6157 DISABLE_ECC_ON_ME_PIPE(1, 0);
6158 DISABLE_ECC_ON_ME_PIPE(1, 1);
6159 DISABLE_ECC_ON_ME_PIPE(1, 2);
6160 DISABLE_ECC_ON_ME_PIPE(1, 3);
6161 break;
6162
6163 case AMDGPU_IRQ_STATE_ENABLE:
6164 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6165 CP_ECC_ERROR_INT_ENABLE, 1);
6166 ENABLE_ECC_ON_ME_PIPE(1, 0);
6167 ENABLE_ECC_ON_ME_PIPE(1, 1);
6168 ENABLE_ECC_ON_ME_PIPE(1, 2);
6169 ENABLE_ECC_ON_ME_PIPE(1, 3);
6170 break;
6171 default:
6172 break;
6173 }
6174
6175 return 0;
6176 }
6177
6178
gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)6179 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6180 struct amdgpu_irq_src *src,
6181 unsigned type,
6182 enum amdgpu_interrupt_state state)
6183 {
6184 switch (type) {
6185 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6186 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6187 break;
6188 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6189 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6190 break;
6191 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6192 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6193 break;
6194 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6195 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6196 break;
6197 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6198 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6199 break;
6200 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6201 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6202 break;
6203 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6204 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6205 break;
6206 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6207 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6208 break;
6209 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6210 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6211 break;
6212 default:
6213 break;
6214 }
6215 return 0;
6216 }
6217
gfx_v9_0_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6218 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6219 struct amdgpu_irq_src *source,
6220 struct amdgpu_iv_entry *entry)
6221 {
6222 int i;
6223 u8 me_id, pipe_id, queue_id;
6224 struct amdgpu_ring *ring;
6225
6226 DRM_DEBUG("IH: CP EOP\n");
6227 me_id = (entry->ring_id & 0x0c) >> 2;
6228 pipe_id = (entry->ring_id & 0x03) >> 0;
6229 queue_id = (entry->ring_id & 0x70) >> 4;
6230
6231 switch (me_id) {
6232 case 0:
6233 if (adev->gfx.num_gfx_rings) {
6234 if (!adev->gfx.mcbp) {
6235 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6236 } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6237 /* Fence signals are handled on the software rings*/
6238 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6239 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6240 }
6241 }
6242 break;
6243 case 1:
6244 case 2:
6245 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6246 ring = &adev->gfx.compute_ring[i];
6247 /* Per-queue interrupt is supported for MEC starting from VI.
6248 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6249 */
6250 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6251 amdgpu_fence_process(ring);
6252 }
6253 break;
6254 }
6255 return 0;
6256 }
6257
gfx_v9_0_fault(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)6258 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6259 struct amdgpu_iv_entry *entry)
6260 {
6261 u8 me_id, pipe_id, queue_id;
6262 struct amdgpu_ring *ring;
6263 int i;
6264
6265 me_id = (entry->ring_id & 0x0c) >> 2;
6266 pipe_id = (entry->ring_id & 0x03) >> 0;
6267 queue_id = (entry->ring_id & 0x70) >> 4;
6268
6269 switch (me_id) {
6270 case 0:
6271 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6272 break;
6273 case 1:
6274 case 2:
6275 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6276 ring = &adev->gfx.compute_ring[i];
6277 if (ring->me == me_id && ring->pipe == pipe_id &&
6278 ring->queue == queue_id)
6279 drm_sched_fault(&ring->sched);
6280 }
6281 break;
6282 }
6283 }
6284
gfx_v9_0_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6285 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6286 struct amdgpu_irq_src *source,
6287 struct amdgpu_iv_entry *entry)
6288 {
6289 DRM_ERROR("Illegal register access in command stream\n");
6290 gfx_v9_0_fault(adev, entry);
6291 return 0;
6292 }
6293
gfx_v9_0_bad_op_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6294 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
6295 struct amdgpu_irq_src *source,
6296 struct amdgpu_iv_entry *entry)
6297 {
6298 DRM_ERROR("Illegal opcode in command stream\n");
6299 gfx_v9_0_fault(adev, entry);
6300 return 0;
6301 }
6302
gfx_v9_0_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6303 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6304 struct amdgpu_irq_src *source,
6305 struct amdgpu_iv_entry *entry)
6306 {
6307 DRM_ERROR("Illegal instruction in command stream\n");
6308 gfx_v9_0_fault(adev, entry);
6309 return 0;
6310 }
6311
6312
6313 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6314 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6315 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6316 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6317 },
6318 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6319 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6320 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6321 },
6322 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6323 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6324 0, 0
6325 },
6326 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6327 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6328 0, 0
6329 },
6330 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6331 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6332 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6333 },
6334 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6335 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6336 0, 0
6337 },
6338 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6339 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6340 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6341 },
6342 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6343 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6344 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6345 },
6346 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6347 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6348 0, 0
6349 },
6350 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6351 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6352 0, 0
6353 },
6354 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6355 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6356 0, 0
6357 },
6358 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6359 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6360 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6361 },
6362 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6363 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6364 0, 0
6365 },
6366 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6367 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6368 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6369 },
6370 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6371 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6372 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6373 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6374 },
6375 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6376 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6377 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6378 0, 0
6379 },
6380 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6381 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6382 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6383 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6384 },
6385 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6386 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6387 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6388 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6389 },
6390 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6391 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6392 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6393 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6394 },
6395 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6396 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6397 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6398 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6399 },
6400 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6401 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6402 0, 0
6403 },
6404 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6405 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6406 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6407 },
6408 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6409 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6410 0, 0
6411 },
6412 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6413 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6414 0, 0
6415 },
6416 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6417 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6418 0, 0
6419 },
6420 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6421 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6422 0, 0
6423 },
6424 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6425 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6426 0, 0
6427 },
6428 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6429 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6430 0, 0
6431 },
6432 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6433 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6434 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6435 },
6436 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6437 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6438 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6439 },
6440 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6441 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6442 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6443 },
6444 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6445 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6446 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6447 },
6448 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6449 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6450 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6451 },
6452 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6453 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6454 0, 0
6455 },
6456 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6457 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6458 0, 0
6459 },
6460 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6461 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6462 0, 0
6463 },
6464 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6465 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6466 0, 0
6467 },
6468 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6469 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6470 0, 0
6471 },
6472 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6473 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6474 0, 0
6475 },
6476 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6477 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6478 0, 0
6479 },
6480 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6481 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6482 0, 0
6483 },
6484 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6485 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6486 0, 0
6487 },
6488 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6489 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6490 0, 0
6491 },
6492 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6493 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6494 0, 0
6495 },
6496 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6497 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6498 0, 0
6499 },
6500 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6501 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6502 0, 0
6503 },
6504 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6505 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6506 0, 0
6507 },
6508 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6509 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6510 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6511 },
6512 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6513 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6514 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6515 },
6516 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6517 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6518 0, 0
6519 },
6520 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6521 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6522 0, 0
6523 },
6524 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6525 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6526 0, 0
6527 },
6528 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6529 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6530 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6531 },
6532 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6533 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6534 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6535 },
6536 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6537 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6538 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6539 },
6540 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6541 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6542 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6543 },
6544 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6545 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6546 0, 0
6547 },
6548 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6549 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6550 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6551 },
6552 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6553 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6554 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6555 },
6556 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6557 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6558 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6559 },
6560 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6561 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6562 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6563 },
6564 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6565 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6566 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6567 },
6568 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6569 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6570 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6571 },
6572 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6573 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6574 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6575 },
6576 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6577 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6578 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6579 },
6580 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6581 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6582 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6583 },
6584 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6585 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6586 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6587 },
6588 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6589 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6590 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6591 },
6592 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6593 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6594 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6595 },
6596 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6597 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6598 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6599 },
6600 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6601 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6602 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6603 },
6604 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6605 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6606 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6607 },
6608 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6609 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6610 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6611 },
6612 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6613 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6614 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6615 },
6616 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6617 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6618 0, 0
6619 },
6620 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6621 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6622 0, 0
6623 },
6624 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6625 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6626 0, 0
6627 },
6628 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6629 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6630 0, 0
6631 },
6632 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6633 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6634 0, 0
6635 },
6636 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6637 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6638 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6639 },
6640 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6641 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6642 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6643 },
6644 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6645 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6646 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6647 },
6648 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6649 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6650 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6651 },
6652 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6653 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6654 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6655 },
6656 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6657 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6658 0, 0
6659 },
6660 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6661 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6662 0, 0
6663 },
6664 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6665 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6666 0, 0
6667 },
6668 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6669 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6670 0, 0
6671 },
6672 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6673 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6674 0, 0
6675 },
6676 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6677 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6678 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6679 },
6680 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6681 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6682 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6683 },
6684 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6685 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6686 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6687 },
6688 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6689 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6690 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6691 },
6692 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6693 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6694 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6695 },
6696 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6697 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6698 0, 0
6699 },
6700 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6701 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6702 0, 0
6703 },
6704 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6705 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6706 0, 0
6707 },
6708 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6709 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6710 0, 0
6711 },
6712 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6713 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6714 0, 0
6715 },
6716 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6717 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6718 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6719 },
6720 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6721 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6722 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6723 },
6724 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6725 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6726 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6727 },
6728 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6729 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6730 0, 0
6731 },
6732 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6733 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6734 0, 0
6735 },
6736 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6737 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6738 0, 0
6739 },
6740 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6741 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6742 0, 0
6743 },
6744 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6745 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6746 0, 0
6747 },
6748 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6749 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6750 0, 0
6751 }
6752 };
6753
gfx_v9_0_ras_error_inject(struct amdgpu_device * adev,void * inject_if,uint32_t instance_mask)6754 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6755 void *inject_if, uint32_t instance_mask)
6756 {
6757 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6758 int ret;
6759 struct ta_ras_trigger_error_input block_info = { 0 };
6760
6761 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6762 return -EINVAL;
6763
6764 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6765 return -EINVAL;
6766
6767 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6768 return -EPERM;
6769
6770 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6771 info->head.type)) {
6772 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6773 ras_gfx_subblocks[info->head.sub_block_index].name,
6774 info->head.type);
6775 return -EPERM;
6776 }
6777
6778 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6779 info->head.type)) {
6780 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6781 ras_gfx_subblocks[info->head.sub_block_index].name,
6782 info->head.type);
6783 return -EPERM;
6784 }
6785
6786 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6787 block_info.sub_block_index =
6788 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6789 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6790 block_info.address = info->address;
6791 block_info.value = info->value;
6792
6793 mutex_lock(&adev->grbm_idx_mutex);
6794 ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6795 mutex_unlock(&adev->grbm_idx_mutex);
6796
6797 return ret;
6798 }
6799
6800 static const char * const vml2_mems[] = {
6801 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6802 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6803 "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6804 "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6805 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6806 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6807 "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6808 "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6809 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6810 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6811 "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6812 "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6813 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6814 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6815 "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6816 "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6817 };
6818
6819 static const char * const vml2_walker_mems[] = {
6820 "UTC_VML2_CACHE_PDE0_MEM0",
6821 "UTC_VML2_CACHE_PDE0_MEM1",
6822 "UTC_VML2_CACHE_PDE1_MEM0",
6823 "UTC_VML2_CACHE_PDE1_MEM1",
6824 "UTC_VML2_CACHE_PDE2_MEM0",
6825 "UTC_VML2_CACHE_PDE2_MEM1",
6826 "UTC_VML2_RDIF_LOG_FIFO",
6827 };
6828
6829 static const char * const atc_l2_cache_2m_mems[] = {
6830 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6831 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6832 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6833 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6834 };
6835
6836 static const char *atc_l2_cache_4k_mems[] = {
6837 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6838 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6839 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6840 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6841 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6842 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6843 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6844 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6845 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6846 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6847 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6848 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6849 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6850 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6851 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6852 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6853 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6854 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6855 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6856 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6857 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6858 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6859 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6860 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6861 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6862 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6863 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6864 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6865 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6866 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6867 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6868 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6869 };
6870
gfx_v9_0_query_utc_edc_status(struct amdgpu_device * adev,struct ras_err_data * err_data)6871 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6872 struct ras_err_data *err_data)
6873 {
6874 uint32_t i, data;
6875 uint32_t sec_count, ded_count;
6876
6877 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6878 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6879 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6880 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6881 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6882 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6883 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6884 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6885
6886 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6887 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6888 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6889
6890 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6891 if (sec_count) {
6892 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6893 "SEC %d\n", i, vml2_mems[i], sec_count);
6894 err_data->ce_count += sec_count;
6895 }
6896
6897 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6898 if (ded_count) {
6899 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6900 "DED %d\n", i, vml2_mems[i], ded_count);
6901 err_data->ue_count += ded_count;
6902 }
6903 }
6904
6905 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6906 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6907 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6908
6909 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6910 SEC_COUNT);
6911 if (sec_count) {
6912 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6913 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6914 err_data->ce_count += sec_count;
6915 }
6916
6917 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6918 DED_COUNT);
6919 if (ded_count) {
6920 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6921 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6922 err_data->ue_count += ded_count;
6923 }
6924 }
6925
6926 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6927 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6928 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6929
6930 sec_count = (data & 0x00006000L) >> 0xd;
6931 if (sec_count) {
6932 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6933 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6934 sec_count);
6935 err_data->ce_count += sec_count;
6936 }
6937 }
6938
6939 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6940 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6941 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6942
6943 sec_count = (data & 0x00006000L) >> 0xd;
6944 if (sec_count) {
6945 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6946 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6947 sec_count);
6948 err_data->ce_count += sec_count;
6949 }
6950
6951 ded_count = (data & 0x00018000L) >> 0xf;
6952 if (ded_count) {
6953 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6954 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6955 ded_count);
6956 err_data->ue_count += ded_count;
6957 }
6958 }
6959
6960 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6961 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6962 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6963 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6964
6965 return 0;
6966 }
6967
gfx_v9_0_ras_error_count(struct amdgpu_device * adev,const struct soc15_reg_entry * reg,uint32_t se_id,uint32_t inst_id,uint32_t value,uint32_t * sec_count,uint32_t * ded_count)6968 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6969 const struct soc15_reg_entry *reg,
6970 uint32_t se_id, uint32_t inst_id, uint32_t value,
6971 uint32_t *sec_count, uint32_t *ded_count)
6972 {
6973 uint32_t i;
6974 uint32_t sec_cnt, ded_cnt;
6975
6976 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6977 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6978 gfx_v9_0_ras_fields[i].seg != reg->seg ||
6979 gfx_v9_0_ras_fields[i].inst != reg->inst)
6980 continue;
6981
6982 sec_cnt = (value &
6983 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6984 gfx_v9_0_ras_fields[i].sec_count_shift;
6985 if (sec_cnt) {
6986 dev_info(adev->dev, "GFX SubBlock %s, "
6987 "Instance[%d][%d], SEC %d\n",
6988 gfx_v9_0_ras_fields[i].name,
6989 se_id, inst_id,
6990 sec_cnt);
6991 *sec_count += sec_cnt;
6992 }
6993
6994 ded_cnt = (value &
6995 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6996 gfx_v9_0_ras_fields[i].ded_count_shift;
6997 if (ded_cnt) {
6998 dev_info(adev->dev, "GFX SubBlock %s, "
6999 "Instance[%d][%d], DED %d\n",
7000 gfx_v9_0_ras_fields[i].name,
7001 se_id, inst_id,
7002 ded_cnt);
7003 *ded_count += ded_cnt;
7004 }
7005 }
7006
7007 return 0;
7008 }
7009
gfx_v9_0_reset_ras_error_count(struct amdgpu_device * adev)7010 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
7011 {
7012 int i, j, k;
7013
7014 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7015 return;
7016
7017 /* read back registers to clear the counters */
7018 mutex_lock(&adev->grbm_idx_mutex);
7019 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7020 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7021 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7022 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
7023 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7024 }
7025 }
7026 }
7027 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
7028 mutex_unlock(&adev->grbm_idx_mutex);
7029
7030 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7031 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
7032 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7033 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
7034 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7035 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
7036 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7037 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
7038
7039 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
7040 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
7041 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
7042 }
7043
7044 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
7045 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
7046 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
7047 }
7048
7049 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
7050 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
7051 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
7052 }
7053
7054 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
7055 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
7056 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
7057 }
7058
7059 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7060 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7061 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7062 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7063 }
7064
gfx_v9_0_query_ras_error_count(struct amdgpu_device * adev,void * ras_error_status)7065 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
7066 void *ras_error_status)
7067 {
7068 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
7069 uint32_t sec_count = 0, ded_count = 0;
7070 uint32_t i, j, k;
7071 uint32_t reg_value;
7072
7073 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7074 return;
7075
7076 err_data->ue_count = 0;
7077 err_data->ce_count = 0;
7078
7079 mutex_lock(&adev->grbm_idx_mutex);
7080
7081 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7082 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7083 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7084 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
7085 reg_value =
7086 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7087 if (reg_value)
7088 gfx_v9_0_ras_error_count(adev,
7089 &gfx_v9_0_edc_counter_regs[i],
7090 j, k, reg_value,
7091 &sec_count, &ded_count);
7092 }
7093 }
7094 }
7095
7096 err_data->ce_count += sec_count;
7097 err_data->ue_count += ded_count;
7098
7099 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7100 mutex_unlock(&adev->grbm_idx_mutex);
7101
7102 gfx_v9_0_query_utc_edc_status(adev, err_data);
7103 }
7104
gfx_v9_0_emit_mem_sync(struct amdgpu_ring * ring)7105 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
7106 {
7107 const unsigned int cp_coher_cntl =
7108 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
7109 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
7110 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
7111 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
7112 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
7113
7114 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
7115 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
7116 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
7117 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
7118 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
7119 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
7120 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
7121 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
7122 }
7123
gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring * ring,uint32_t pipe,bool enable)7124 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
7125 uint32_t pipe, bool enable)
7126 {
7127 struct amdgpu_device *adev = ring->adev;
7128 uint32_t val;
7129 uint32_t wcl_cs_reg;
7130
7131 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
7132 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
7133
7134 switch (pipe) {
7135 case 0:
7136 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
7137 break;
7138 case 1:
7139 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
7140 break;
7141 case 2:
7142 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
7143 break;
7144 case 3:
7145 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
7146 break;
7147 default:
7148 DRM_DEBUG("invalid pipe %d\n", pipe);
7149 return;
7150 }
7151
7152 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
7153
7154 }
gfx_v9_0_emit_wave_limit(struct amdgpu_ring * ring,bool enable)7155 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
7156 {
7157 struct amdgpu_device *adev = ring->adev;
7158 uint32_t val;
7159 int i;
7160
7161
7162 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
7163 * number of gfx waves. Setting 5 bit will make sure gfx only gets
7164 * around 25% of gpu resources.
7165 */
7166 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
7167 amdgpu_ring_emit_wreg(ring,
7168 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
7169 val);
7170
7171 /* Restrict waves for normal/low priority compute queues as well
7172 * to get best QoS for high priority compute jobs.
7173 *
7174 * amdgpu controls only 1st ME(0-3 CS pipes).
7175 */
7176 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
7177 if (i != ring->pipe)
7178 gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7179
7180 }
7181 }
7182
gfx_v9_ring_insert_nop(struct amdgpu_ring * ring,uint32_t num_nop)7183 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
7184 {
7185 int i;
7186
7187 /* Header itself is a NOP packet */
7188 if (num_nop == 1) {
7189 amdgpu_ring_write(ring, ring->funcs->nop);
7190 return;
7191 }
7192
7193 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
7194 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
7195
7196 /* Header is at index 0, followed by num_nops - 1 NOP packet's */
7197 for (i = 1; i < num_nop; i++)
7198 amdgpu_ring_write(ring, ring->funcs->nop);
7199 }
7200
gfx_v9_0_reset_kgq(struct amdgpu_ring * ring,unsigned int vmid)7201 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
7202 {
7203 struct amdgpu_device *adev = ring->adev;
7204 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7205 struct amdgpu_ring *kiq_ring = &kiq->ring;
7206 unsigned long flags;
7207 u32 tmp;
7208 int r;
7209
7210 if (amdgpu_sriov_vf(adev))
7211 return -EINVAL;
7212
7213 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7214 return -EINVAL;
7215
7216 spin_lock_irqsave(&kiq->ring_lock, flags);
7217
7218 if (amdgpu_ring_alloc(kiq_ring, 5)) {
7219 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7220 return -ENOMEM;
7221 }
7222
7223 tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
7224 gfx_v9_0_ring_emit_wreg(kiq_ring,
7225 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
7226 amdgpu_ring_commit(kiq_ring);
7227
7228 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7229
7230 r = amdgpu_ring_test_ring(kiq_ring);
7231 if (r)
7232 return r;
7233
7234 if (amdgpu_ring_alloc(ring, 7 + 7 + 5))
7235 return -ENOMEM;
7236 gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
7237 ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
7238 gfx_v9_0_ring_emit_reg_wait(ring,
7239 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff);
7240 gfx_v9_0_ring_emit_wreg(ring,
7241 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0);
7242
7243 return amdgpu_ring_test_ring(ring);
7244 }
7245
gfx_v9_0_reset_kcq(struct amdgpu_ring * ring,unsigned int vmid)7246 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
7247 unsigned int vmid)
7248 {
7249 struct amdgpu_device *adev = ring->adev;
7250 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7251 struct amdgpu_ring *kiq_ring = &kiq->ring;
7252 unsigned long flags;
7253 int i, r;
7254
7255 if (amdgpu_sriov_vf(adev))
7256 return -EINVAL;
7257
7258 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7259 return -EINVAL;
7260
7261 spin_lock_irqsave(&kiq->ring_lock, flags);
7262
7263 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
7264 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7265 return -ENOMEM;
7266 }
7267
7268 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
7269 0, 0);
7270 amdgpu_ring_commit(kiq_ring);
7271
7272 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7273
7274 r = amdgpu_ring_test_ring(kiq_ring);
7275 if (r)
7276 return r;
7277
7278 /* make sure dequeue is complete*/
7279 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
7280 mutex_lock(&adev->srbm_mutex);
7281 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
7282 for (i = 0; i < adev->usec_timeout; i++) {
7283 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
7284 break;
7285 udelay(1);
7286 }
7287 if (i >= adev->usec_timeout)
7288 r = -ETIMEDOUT;
7289 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7290 mutex_unlock(&adev->srbm_mutex);
7291 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
7292 if (r) {
7293 dev_err(adev->dev, "fail to wait on hqd deactive\n");
7294 return r;
7295 }
7296
7297 r = amdgpu_bo_reserve(ring->mqd_obj, false);
7298 if (unlikely(r != 0)){
7299 dev_err(adev->dev, "fail to resv mqd_obj\n");
7300 return r;
7301 }
7302 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
7303 if (!r) {
7304 r = gfx_v9_0_kcq_init_queue(ring, true);
7305 amdgpu_bo_kunmap(ring->mqd_obj);
7306 ring->mqd_ptr = NULL;
7307 }
7308 amdgpu_bo_unreserve(ring->mqd_obj);
7309 if (r) {
7310 dev_err(adev->dev, "fail to unresv mqd_obj\n");
7311 return r;
7312 }
7313 spin_lock_irqsave(&kiq->ring_lock, flags);
7314 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
7315 if (r) {
7316 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7317 return -ENOMEM;
7318 }
7319 kiq->pmf->kiq_map_queues(kiq_ring, ring);
7320 amdgpu_ring_commit(kiq_ring);
7321 r = amdgpu_ring_test_ring(kiq_ring);
7322 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7323 if (r) {
7324 DRM_ERROR("fail to remap queue\n");
7325 return r;
7326 }
7327 return amdgpu_ring_test_ring(ring);
7328 }
7329
gfx_v9_ip_print(void * handle,struct drm_printer * p)7330 static void gfx_v9_ip_print(void *handle, struct drm_printer *p)
7331 {
7332 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7333 uint32_t i, j, k, reg, index = 0;
7334 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7335
7336 if (!adev->gfx.ip_dump_core)
7337 return;
7338
7339 for (i = 0; i < reg_count; i++)
7340 drm_printf(p, "%-50s \t 0x%08x\n",
7341 gc_reg_list_9[i].reg_name,
7342 adev->gfx.ip_dump_core[i]);
7343
7344 /* print compute queue registers for all instances */
7345 if (!adev->gfx.ip_dump_compute_queues)
7346 return;
7347
7348 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7349 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7350 adev->gfx.mec.num_mec,
7351 adev->gfx.mec.num_pipe_per_mec,
7352 adev->gfx.mec.num_queue_per_pipe);
7353
7354 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7355 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7356 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7357 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7358 for (reg = 0; reg < reg_count; reg++) {
7359 drm_printf(p, "%-50s \t 0x%08x\n",
7360 gc_cp_reg_list_9[reg].reg_name,
7361 adev->gfx.ip_dump_compute_queues[index + reg]);
7362 }
7363 index += reg_count;
7364 }
7365 }
7366 }
7367
7368 }
7369
gfx_v9_ip_dump(void * handle)7370 static void gfx_v9_ip_dump(void *handle)
7371 {
7372 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7373 uint32_t i, j, k, reg, index = 0;
7374 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7375
7376 if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7377 return;
7378
7379 amdgpu_gfx_off_ctrl(adev, false);
7380 for (i = 0; i < reg_count; i++)
7381 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7382 amdgpu_gfx_off_ctrl(adev, true);
7383
7384 /* dump compute queue registers for all instances */
7385 if (!adev->gfx.ip_dump_compute_queues)
7386 return;
7387
7388 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7389 amdgpu_gfx_off_ctrl(adev, false);
7390 mutex_lock(&adev->srbm_mutex);
7391 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7392 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7393 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7394 /* ME0 is for GFX so start from 1 for CP */
7395 soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7396
7397 for (reg = 0; reg < reg_count; reg++) {
7398 adev->gfx.ip_dump_compute_queues[index + reg] =
7399 RREG32(SOC15_REG_ENTRY_OFFSET(
7400 gc_cp_reg_list_9[reg]));
7401 }
7402 index += reg_count;
7403 }
7404 }
7405 }
7406 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7407 mutex_unlock(&adev->srbm_mutex);
7408 amdgpu_gfx_off_ctrl(adev, true);
7409
7410 }
7411
gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring * ring)7412 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7413 {
7414 /* Emit the cleaner shader */
7415 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7416 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
7417 }
7418
gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring * ring)7419 static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring)
7420 {
7421 struct amdgpu_device *adev = ring->adev;
7422
7423 amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
7424
7425 /* Raven and PCO APUs seem to have stability issues
7426 * with compute and gfxoff and gfx pg. Disable gfx pg during
7427 * submission and allow again afterwards.
7428 */
7429 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
7430 gfx_v9_0_set_powergating_state(adev, AMD_PG_STATE_UNGATE);
7431 }
7432
gfx_v9_0_ring_end_use_compute(struct amdgpu_ring * ring)7433 static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring)
7434 {
7435 struct amdgpu_device *adev = ring->adev;
7436
7437 /* Raven and PCO APUs seem to have stability issues
7438 * with compute and gfxoff and gfx pg. Disable gfx pg during
7439 * submission and allow again afterwards.
7440 */
7441 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
7442 gfx_v9_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
7443
7444 amdgpu_gfx_enforce_isolation_ring_end_use(ring);
7445 }
7446
7447 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7448 .name = "gfx_v9_0",
7449 .early_init = gfx_v9_0_early_init,
7450 .late_init = gfx_v9_0_late_init,
7451 .sw_init = gfx_v9_0_sw_init,
7452 .sw_fini = gfx_v9_0_sw_fini,
7453 .hw_init = gfx_v9_0_hw_init,
7454 .hw_fini = gfx_v9_0_hw_fini,
7455 .suspend = gfx_v9_0_suspend,
7456 .resume = gfx_v9_0_resume,
7457 .is_idle = gfx_v9_0_is_idle,
7458 .wait_for_idle = gfx_v9_0_wait_for_idle,
7459 .soft_reset = gfx_v9_0_soft_reset,
7460 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
7461 .set_powergating_state = gfx_v9_0_set_powergating_state,
7462 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
7463 .dump_ip_state = gfx_v9_ip_dump,
7464 .print_ip_state = gfx_v9_ip_print,
7465 };
7466
7467 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7468 .type = AMDGPU_RING_TYPE_GFX,
7469 .align_mask = 0xff,
7470 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7471 .support_64bit_ptrs = true,
7472 .secure_submission_supported = true,
7473 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7474 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7475 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7476 .emit_frame_size = /* totally 242 maximum if 16 IBs */
7477 5 + /* COND_EXEC */
7478 7 + /* PIPELINE_SYNC */
7479 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7480 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7481 2 + /* VM_FLUSH */
7482 8 + /* FENCE for VM_FLUSH */
7483 20 + /* GDS switch */
7484 4 + /* double SWITCH_BUFFER,
7485 the first COND_EXEC jump to the place just
7486 prior to this double SWITCH_BUFFER */
7487 5 + /* COND_EXEC */
7488 7 + /* HDP_flush */
7489 4 + /* VGT_flush */
7490 14 + /* CE_META */
7491 31 + /* DE_META */
7492 3 + /* CNTX_CTRL */
7493 5 + /* HDP_INVL */
7494 8 + 8 + /* FENCE x2 */
7495 2 + /* SWITCH_BUFFER */
7496 7 + /* gfx_v9_0_emit_mem_sync */
7497 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7498 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7499 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7500 .emit_fence = gfx_v9_0_ring_emit_fence,
7501 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7502 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7503 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7504 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7505 .test_ring = gfx_v9_0_ring_test_ring,
7506 .insert_nop = gfx_v9_ring_insert_nop,
7507 .pad_ib = amdgpu_ring_generic_pad_ib,
7508 .emit_switch_buffer = gfx_v9_ring_emit_sb,
7509 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7510 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7511 .preempt_ib = gfx_v9_0_ring_preempt_ib,
7512 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7513 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7514 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7515 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7516 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7517 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7518 .reset = gfx_v9_0_reset_kgq,
7519 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7520 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7521 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7522 };
7523
7524 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7525 .type = AMDGPU_RING_TYPE_GFX,
7526 .align_mask = 0xff,
7527 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7528 .support_64bit_ptrs = true,
7529 .secure_submission_supported = true,
7530 .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7531 .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7532 .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7533 .emit_frame_size = /* totally 242 maximum if 16 IBs */
7534 5 + /* COND_EXEC */
7535 7 + /* PIPELINE_SYNC */
7536 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7537 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7538 2 + /* VM_FLUSH */
7539 8 + /* FENCE for VM_FLUSH */
7540 20 + /* GDS switch */
7541 4 + /* double SWITCH_BUFFER,
7542 * the first COND_EXEC jump to the place just
7543 * prior to this double SWITCH_BUFFER
7544 */
7545 5 + /* COND_EXEC */
7546 7 + /* HDP_flush */
7547 4 + /* VGT_flush */
7548 14 + /* CE_META */
7549 31 + /* DE_META */
7550 3 + /* CNTX_CTRL */
7551 5 + /* HDP_INVL */
7552 8 + 8 + /* FENCE x2 */
7553 2 + /* SWITCH_BUFFER */
7554 7 + /* gfx_v9_0_emit_mem_sync */
7555 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7556 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7557 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7558 .emit_fence = gfx_v9_0_ring_emit_fence,
7559 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7560 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7561 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7562 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7563 .test_ring = gfx_v9_0_ring_test_ring,
7564 .test_ib = gfx_v9_0_ring_test_ib,
7565 .insert_nop = gfx_v9_ring_insert_nop,
7566 .pad_ib = amdgpu_ring_generic_pad_ib,
7567 .emit_switch_buffer = gfx_v9_ring_emit_sb,
7568 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7569 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7570 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7571 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7572 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7573 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7574 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7575 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7576 .patch_cntl = gfx_v9_0_ring_patch_cntl,
7577 .patch_de = gfx_v9_0_ring_patch_de_meta,
7578 .patch_ce = gfx_v9_0_ring_patch_ce_meta,
7579 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7580 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7581 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7582 };
7583
7584 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7585 .type = AMDGPU_RING_TYPE_COMPUTE,
7586 .align_mask = 0xff,
7587 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7588 .support_64bit_ptrs = true,
7589 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7590 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7591 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7592 .emit_frame_size =
7593 20 + /* gfx_v9_0_ring_emit_gds_switch */
7594 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7595 5 + /* hdp invalidate */
7596 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7597 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7598 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7599 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7600 7 + /* gfx_v9_0_emit_mem_sync */
7601 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7602 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7603 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7604 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7605 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
7606 .emit_fence = gfx_v9_0_ring_emit_fence,
7607 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7608 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7609 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7610 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7611 .test_ring = gfx_v9_0_ring_test_ring,
7612 .test_ib = gfx_v9_0_ring_test_ib,
7613 .insert_nop = gfx_v9_ring_insert_nop,
7614 .pad_ib = amdgpu_ring_generic_pad_ib,
7615 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7616 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7617 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7618 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7619 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7620 .emit_wave_limit = gfx_v9_0_emit_wave_limit,
7621 .reset = gfx_v9_0_reset_kcq,
7622 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7623 .begin_use = gfx_v9_0_ring_begin_use_compute,
7624 .end_use = gfx_v9_0_ring_end_use_compute,
7625 };
7626
7627 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7628 .type = AMDGPU_RING_TYPE_KIQ,
7629 .align_mask = 0xff,
7630 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7631 .support_64bit_ptrs = true,
7632 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7633 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7634 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7635 .emit_frame_size =
7636 20 + /* gfx_v9_0_ring_emit_gds_switch */
7637 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7638 5 + /* hdp invalidate */
7639 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7640 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7641 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7642 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7643 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7644 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7645 .test_ring = gfx_v9_0_ring_test_ring,
7646 .insert_nop = amdgpu_ring_insert_nop,
7647 .pad_ib = amdgpu_ring_generic_pad_ib,
7648 .emit_rreg = gfx_v9_0_ring_emit_rreg,
7649 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7650 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7651 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7652 };
7653
gfx_v9_0_set_ring_funcs(struct amdgpu_device * adev)7654 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7655 {
7656 int i;
7657
7658 adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7659
7660 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7661 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7662
7663 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7664 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7665 adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7666 }
7667
7668 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7669 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7670 }
7671
7672 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7673 .set = gfx_v9_0_set_eop_interrupt_state,
7674 .process = gfx_v9_0_eop_irq,
7675 };
7676
7677 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7678 .set = gfx_v9_0_set_priv_reg_fault_state,
7679 .process = gfx_v9_0_priv_reg_irq,
7680 };
7681
7682 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
7683 .set = gfx_v9_0_set_bad_op_fault_state,
7684 .process = gfx_v9_0_bad_op_irq,
7685 };
7686
7687 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7688 .set = gfx_v9_0_set_priv_inst_fault_state,
7689 .process = gfx_v9_0_priv_inst_irq,
7690 };
7691
7692 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7693 .set = gfx_v9_0_set_cp_ecc_error_state,
7694 .process = amdgpu_gfx_cp_ecc_error_irq,
7695 };
7696
7697
gfx_v9_0_set_irq_funcs(struct amdgpu_device * adev)7698 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7699 {
7700 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7701 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7702
7703 adev->gfx.priv_reg_irq.num_types = 1;
7704 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7705
7706 adev->gfx.bad_op_irq.num_types = 1;
7707 adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
7708
7709 adev->gfx.priv_inst_irq.num_types = 1;
7710 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7711
7712 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7713 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7714 }
7715
gfx_v9_0_set_rlc_funcs(struct amdgpu_device * adev)7716 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7717 {
7718 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7719 case IP_VERSION(9, 0, 1):
7720 case IP_VERSION(9, 2, 1):
7721 case IP_VERSION(9, 4, 0):
7722 case IP_VERSION(9, 2, 2):
7723 case IP_VERSION(9, 1, 0):
7724 case IP_VERSION(9, 4, 1):
7725 case IP_VERSION(9, 3, 0):
7726 case IP_VERSION(9, 4, 2):
7727 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7728 break;
7729 default:
7730 break;
7731 }
7732 }
7733
gfx_v9_0_set_gds_init(struct amdgpu_device * adev)7734 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7735 {
7736 /* init asci gds info */
7737 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7738 case IP_VERSION(9, 0, 1):
7739 case IP_VERSION(9, 2, 1):
7740 case IP_VERSION(9, 4, 0):
7741 adev->gds.gds_size = 0x10000;
7742 break;
7743 case IP_VERSION(9, 2, 2):
7744 case IP_VERSION(9, 1, 0):
7745 case IP_VERSION(9, 4, 1):
7746 adev->gds.gds_size = 0x1000;
7747 break;
7748 case IP_VERSION(9, 4, 2):
7749 /* aldebaran removed all the GDS internal memory,
7750 * only support GWS opcode in kernel, like barrier
7751 * semaphore.etc */
7752 adev->gds.gds_size = 0;
7753 break;
7754 default:
7755 adev->gds.gds_size = 0x10000;
7756 break;
7757 }
7758
7759 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7760 case IP_VERSION(9, 0, 1):
7761 case IP_VERSION(9, 4, 0):
7762 adev->gds.gds_compute_max_wave_id = 0x7ff;
7763 break;
7764 case IP_VERSION(9, 2, 1):
7765 adev->gds.gds_compute_max_wave_id = 0x27f;
7766 break;
7767 case IP_VERSION(9, 2, 2):
7768 case IP_VERSION(9, 1, 0):
7769 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7770 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7771 else
7772 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7773 break;
7774 case IP_VERSION(9, 4, 1):
7775 adev->gds.gds_compute_max_wave_id = 0xfff;
7776 break;
7777 case IP_VERSION(9, 4, 2):
7778 /* deprecated for Aldebaran, no usage at all */
7779 adev->gds.gds_compute_max_wave_id = 0;
7780 break;
7781 default:
7782 /* this really depends on the chip */
7783 adev->gds.gds_compute_max_wave_id = 0x7ff;
7784 break;
7785 }
7786
7787 adev->gds.gws_size = 64;
7788 adev->gds.oa_size = 16;
7789 }
7790
gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device * adev,u32 bitmap)7791 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7792 u32 bitmap)
7793 {
7794 u32 data;
7795
7796 if (!bitmap)
7797 return;
7798
7799 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7800 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7801
7802 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7803 }
7804
gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device * adev)7805 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7806 {
7807 u32 data, mask;
7808
7809 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7810 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7811
7812 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7813 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7814
7815 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7816
7817 return (~data) & mask;
7818 }
7819
gfx_v9_0_get_cu_info(struct amdgpu_device * adev,struct amdgpu_cu_info * cu_info)7820 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7821 struct amdgpu_cu_info *cu_info)
7822 {
7823 int i, j, k, counter, active_cu_number = 0;
7824 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7825 unsigned disable_masks[4 * 4];
7826
7827 if (!adev || !cu_info)
7828 return -EINVAL;
7829
7830 /*
7831 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7832 */
7833 if (adev->gfx.config.max_shader_engines *
7834 adev->gfx.config.max_sh_per_se > 16)
7835 return -EINVAL;
7836
7837 amdgpu_gfx_parse_disable_cu(disable_masks,
7838 adev->gfx.config.max_shader_engines,
7839 adev->gfx.config.max_sh_per_se);
7840
7841 mutex_lock(&adev->grbm_idx_mutex);
7842 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7843 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7844 mask = 1;
7845 ao_bitmap = 0;
7846 counter = 0;
7847 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7848 gfx_v9_0_set_user_cu_inactive_bitmap(
7849 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7850 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7851
7852 /*
7853 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7854 * 4x4 size array, and it's usually suitable for Vega
7855 * ASICs which has 4*2 SE/SH layout.
7856 * But for Arcturus, SE/SH layout is changed to 8*1.
7857 * To mostly reduce the impact, we make it compatible
7858 * with current bitmap array as below:
7859 * SE4,SH0 --> bitmap[0][1]
7860 * SE5,SH0 --> bitmap[1][1]
7861 * SE6,SH0 --> bitmap[2][1]
7862 * SE7,SH0 --> bitmap[3][1]
7863 */
7864 cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7865
7866 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7867 if (bitmap & mask) {
7868 if (counter < adev->gfx.config.max_cu_per_sh)
7869 ao_bitmap |= mask;
7870 counter ++;
7871 }
7872 mask <<= 1;
7873 }
7874 active_cu_number += counter;
7875 if (i < 2 && j < 2)
7876 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7877 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7878 }
7879 }
7880 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7881 mutex_unlock(&adev->grbm_idx_mutex);
7882
7883 cu_info->number = active_cu_number;
7884 cu_info->ao_cu_mask = ao_cu_mask;
7885 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7886
7887 return 0;
7888 }
7889
7890 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7891 {
7892 .type = AMD_IP_BLOCK_TYPE_GFX,
7893 .major = 9,
7894 .minor = 0,
7895 .rev = 0,
7896 .funcs = &gfx_v9_0_ip_funcs,
7897 };
7898