1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23 #include <linux/firmware.h>
24 #include <drm/drmP.h>
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS 1
54 #define GFX8_MEC_HPD_SIZE 2048
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD 1
80 #define CLE_BPM_SERDES_CMD 0
81
82 /* BPM Register Address*/
83 enum {
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
89 BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength 14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
147
148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
149 {
150 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
151 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
152 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
153 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
154 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
155 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
156 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
157 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
158 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
159 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
160 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
161 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
162 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
163 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
164 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
165 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
166 };
167
168 static const u32 golden_settings_tonga_a11[] =
169 {
170 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
171 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
172 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
173 mmGB_GPU_ID, 0x0000000f, 0x00000000,
174 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
175 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
176 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
177 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
178 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
179 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
180 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
181 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
182 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
183 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
184 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
185 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
186 };
187
188 static const u32 tonga_golden_common_all[] =
189 {
190 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
192 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
193 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
194 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
195 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
197 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
198 };
199
200 static const u32 tonga_mgcg_cgcg_init[] =
201 {
202 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
203 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
209 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
211 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
212 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
213 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
214 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
224 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
225 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
227 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
228 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
229 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
232 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
236 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
241 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
246 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
251 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
256 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
261 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
266 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
271 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
274 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
275 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
276 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
277 };
278
279 static const u32 golden_settings_polaris11_a11[] =
280 {
281 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
282 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
283 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
284 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
285 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
286 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
287 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
288 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
289 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
290 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
291 mmSQ_CONFIG, 0x07f80000, 0x01180000,
292 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
293 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
294 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
295 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
296 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
297 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
298 };
299
300 static const u32 polaris11_golden_common_all[] =
301 {
302 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
303 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
304 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
305 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
307 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
308 };
309
310 static const u32 golden_settings_polaris10_a11[] =
311 {
312 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
313 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
314 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
315 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
316 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
317 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
318 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
319 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
320 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
321 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
322 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
323 mmSQ_CONFIG, 0x07f80000, 0x07180000,
324 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
325 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
326 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
327 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
328 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
329 };
330
331 static const u32 polaris10_golden_common_all[] =
332 {
333 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
335 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
336 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
340 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
341 };
342
343 static const u32 fiji_golden_common_all[] =
344 {
345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
347 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
348 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
349 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
350 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
352 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
353 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
354 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
355 };
356
357 static const u32 golden_settings_fiji_a10[] =
358 {
359 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
360 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
361 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
362 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
363 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
364 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
365 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
366 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
367 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
368 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
369 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
370 };
371
372 static const u32 fiji_mgcg_cgcg_init[] =
373 {
374 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
375 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
376 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
377 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
381 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
383 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
384 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
385 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
386 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
396 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
397 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
399 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
400 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
401 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
402 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
404 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
406 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
407 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
408 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
409 };
410
411 static const u32 golden_settings_iceland_a11[] =
412 {
413 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
414 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
416 mmGB_GPU_ID, 0x0000000f, 0x00000000,
417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
420 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
421 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
422 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
423 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
425 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
426 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
427 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
428 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
429 };
430
431 static const u32 iceland_golden_common_all[] =
432 {
433 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
440 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
441 };
442
443 static const u32 iceland_mgcg_cgcg_init[] =
444 {
445 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
450 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
468 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
499 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
507 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
508 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
509 };
510
511 static const u32 cz_golden_settings_a11[] =
512 {
513 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
514 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
515 mmGB_GPU_ID, 0x0000000f, 0x00000000,
516 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
517 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
518 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
519 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
520 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
521 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
522 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
523 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
524 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
525 };
526
527 static const u32 cz_golden_common_all[] =
528 {
529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
531 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
532 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
533 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
534 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
536 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
537 };
538
539 static const u32 cz_mgcg_cgcg_init[] =
540 {
541 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
542 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
544 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
550 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
551 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
552 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
553 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
563 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
564 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
566 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
567 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
568 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
569 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
571 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
572 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
580 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
585 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
590 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
595 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
600 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
605 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
610 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
613 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
614 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
615 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
616 };
617
618 static const u32 stoney_golden_settings_a11[] =
619 {
620 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
621 mmGB_GPU_ID, 0x0000000f, 0x00000000,
622 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
623 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
624 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
625 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
626 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
627 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
628 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
629 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
630 };
631
632 static const u32 stoney_golden_common_all[] =
633 {
634 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
636 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
637 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
638 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
639 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
641 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
642 };
643
644 static const u32 stoney_mgcg_cgcg_init[] =
645 {
646 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
647 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
648 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
649 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
651 };
652
653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
661
gfx_v8_0_init_golden_registers(struct amdgpu_device * adev)662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
663 {
664 switch (adev->asic_type) {
665 case CHIP_TOPAZ:
666 amdgpu_program_register_sequence(adev,
667 iceland_mgcg_cgcg_init,
668 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
669 amdgpu_program_register_sequence(adev,
670 golden_settings_iceland_a11,
671 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
672 amdgpu_program_register_sequence(adev,
673 iceland_golden_common_all,
674 (const u32)ARRAY_SIZE(iceland_golden_common_all));
675 break;
676 case CHIP_FIJI:
677 amdgpu_program_register_sequence(adev,
678 fiji_mgcg_cgcg_init,
679 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
680 amdgpu_program_register_sequence(adev,
681 golden_settings_fiji_a10,
682 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
683 amdgpu_program_register_sequence(adev,
684 fiji_golden_common_all,
685 (const u32)ARRAY_SIZE(fiji_golden_common_all));
686 break;
687
688 case CHIP_TONGA:
689 amdgpu_program_register_sequence(adev,
690 tonga_mgcg_cgcg_init,
691 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
692 amdgpu_program_register_sequence(adev,
693 golden_settings_tonga_a11,
694 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
695 amdgpu_program_register_sequence(adev,
696 tonga_golden_common_all,
697 (const u32)ARRAY_SIZE(tonga_golden_common_all));
698 break;
699 case CHIP_POLARIS11:
700 case CHIP_POLARIS12:
701 amdgpu_program_register_sequence(adev,
702 golden_settings_polaris11_a11,
703 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
704 amdgpu_program_register_sequence(adev,
705 polaris11_golden_common_all,
706 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
707 break;
708 case CHIP_POLARIS10:
709 amdgpu_program_register_sequence(adev,
710 golden_settings_polaris10_a11,
711 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
712 amdgpu_program_register_sequence(adev,
713 polaris10_golden_common_all,
714 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
715 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
716 if (adev->pdev->revision == 0xc7 &&
717 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
718 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
719 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
720 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
721 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
722 }
723 break;
724 case CHIP_CARRIZO:
725 amdgpu_program_register_sequence(adev,
726 cz_mgcg_cgcg_init,
727 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
728 amdgpu_program_register_sequence(adev,
729 cz_golden_settings_a11,
730 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_common_all,
733 (const u32)ARRAY_SIZE(cz_golden_common_all));
734 break;
735 case CHIP_STONEY:
736 amdgpu_program_register_sequence(adev,
737 stoney_mgcg_cgcg_init,
738 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
739 amdgpu_program_register_sequence(adev,
740 stoney_golden_settings_a11,
741 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_common_all,
744 (const u32)ARRAY_SIZE(stoney_golden_common_all));
745 break;
746 default:
747 break;
748 }
749 }
750
gfx_v8_0_scratch_init(struct amdgpu_device * adev)751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
752 {
753 adev->gfx.scratch.num_reg = 8;
754 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
755 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
756 }
757
gfx_v8_0_ring_test_ring(struct amdgpu_ring * ring)758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
759 {
760 struct amdgpu_device *adev = ring->adev;
761 uint32_t scratch;
762 uint32_t tmp = 0;
763 unsigned i;
764 int r;
765
766 r = amdgpu_gfx_scratch_get(adev, &scratch);
767 if (r) {
768 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
769 return r;
770 }
771 WREG32(scratch, 0xCAFEDEAD);
772 r = amdgpu_ring_alloc(ring, 3);
773 if (r) {
774 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
775 ring->idx, r);
776 amdgpu_gfx_scratch_free(adev, scratch);
777 return r;
778 }
779 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781 amdgpu_ring_write(ring, 0xDEADBEEF);
782 amdgpu_ring_commit(ring);
783
784 for (i = 0; i < adev->usec_timeout; i++) {
785 tmp = RREG32(scratch);
786 if (tmp == 0xDEADBEEF)
787 break;
788 DRM_UDELAY(1);
789 }
790 if (i < adev->usec_timeout) {
791 DRM_INFO("ring test on %d succeeded in %d usecs\n",
792 ring->idx, i);
793 } else {
794 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
795 ring->idx, scratch, tmp);
796 r = -EINVAL;
797 }
798 amdgpu_gfx_scratch_free(adev, scratch);
799 return r;
800 }
801
gfx_v8_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
803 {
804 struct amdgpu_device *adev = ring->adev;
805 struct amdgpu_ib ib;
806 struct dma_fence *f = NULL;
807 uint32_t scratch;
808 uint32_t tmp = 0;
809 long r;
810
811 r = amdgpu_gfx_scratch_get(adev, &scratch);
812 if (r) {
813 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
814 return r;
815 }
816 WREG32(scratch, 0xCAFEDEAD);
817 memset(&ib, 0, sizeof(ib));
818 r = amdgpu_ib_get(adev, NULL, 256, &ib);
819 if (r) {
820 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
821 goto err1;
822 }
823 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
824 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
825 ib.ptr[2] = 0xDEADBEEF;
826 ib.length_dw = 3;
827
828 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
829 if (r)
830 goto err2;
831
832 r = dma_fence_wait_timeout(f, false, timeout);
833 if (r == 0) {
834 DRM_ERROR("amdgpu: IB test timed out.\n");
835 r = -ETIMEDOUT;
836 goto err2;
837 } else if (r < 0) {
838 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
839 goto err2;
840 }
841 tmp = RREG32(scratch);
842 if (tmp == 0xDEADBEEF) {
843 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
844 r = 0;
845 } else {
846 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
847 scratch, tmp);
848 r = -EINVAL;
849 }
850 err2:
851 amdgpu_ib_free(adev, &ib, NULL);
852 dma_fence_put(f);
853 err1:
854 amdgpu_gfx_scratch_free(adev, scratch);
855 return r;
856 }
857
858
gfx_v8_0_free_microcode(struct amdgpu_device * adev)859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
860 {
861 release_firmware(adev->gfx.pfp_fw);
862 adev->gfx.pfp_fw = NULL;
863 release_firmware(adev->gfx.me_fw);
864 adev->gfx.me_fw = NULL;
865 release_firmware(adev->gfx.ce_fw);
866 adev->gfx.ce_fw = NULL;
867 release_firmware(adev->gfx.rlc_fw);
868 adev->gfx.rlc_fw = NULL;
869 release_firmware(adev->gfx.mec_fw);
870 adev->gfx.mec_fw = NULL;
871 if ((adev->asic_type != CHIP_STONEY) &&
872 (adev->asic_type != CHIP_TOPAZ))
873 release_firmware(adev->gfx.mec2_fw);
874 adev->gfx.mec2_fw = NULL;
875
876 kfree(adev->gfx.rlc.register_list_format);
877 }
878
gfx_v8_0_init_microcode(struct amdgpu_device * adev)879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
880 {
881 const char *chip_name;
882 char fw_name[30];
883 int err;
884 struct amdgpu_firmware_info *info = NULL;
885 const struct common_firmware_header *header = NULL;
886 const struct gfx_firmware_header_v1_0 *cp_hdr;
887 const struct rlc_firmware_header_v2_0 *rlc_hdr;
888 unsigned int *tmp = NULL, i;
889
890 DRM_DEBUG("\n");
891
892 switch (adev->asic_type) {
893 case CHIP_TOPAZ:
894 chip_name = "topaz";
895 break;
896 case CHIP_TONGA:
897 chip_name = "tonga";
898 break;
899 case CHIP_CARRIZO:
900 chip_name = "carrizo";
901 break;
902 case CHIP_FIJI:
903 chip_name = "fiji";
904 break;
905 case CHIP_POLARIS11:
906 chip_name = "polaris11";
907 break;
908 case CHIP_POLARIS10:
909 chip_name = "polaris10";
910 break;
911 case CHIP_POLARIS12:
912 chip_name = "polaris12";
913 break;
914 case CHIP_STONEY:
915 chip_name = "stoney";
916 break;
917 default:
918 BUG();
919 }
920
921 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923 if (err)
924 goto out;
925 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
926 if (err)
927 goto out;
928 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931
932 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
934 if (err)
935 goto out;
936 err = amdgpu_ucode_validate(adev->gfx.me_fw);
937 if (err)
938 goto out;
939 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
941
942 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
943
944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
945 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
946 if (err)
947 goto out;
948 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
949 if (err)
950 goto out;
951 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
952 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
953 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
954
955 /*
956 * Support for MCBP/Virtualization in combination with chained IBs is
957 * formal released on feature version #46
958 */
959 if (adev->gfx.ce_feature_version >= 46 &&
960 adev->gfx.pfp_feature_version >= 46) {
961 adev->virt.chained_ib_support = true;
962 DRM_INFO("Chained IB support enabled!\n");
963 } else
964 adev->virt.chained_ib_support = false;
965
966 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
967 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
968 if (err)
969 goto out;
970 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
971 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
972 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
973 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
974
975 adev->gfx.rlc.save_and_restore_offset =
976 le32_to_cpu(rlc_hdr->save_and_restore_offset);
977 adev->gfx.rlc.clear_state_descriptor_offset =
978 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
979 adev->gfx.rlc.avail_scratch_ram_locations =
980 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
981 adev->gfx.rlc.reg_restore_list_size =
982 le32_to_cpu(rlc_hdr->reg_restore_list_size);
983 adev->gfx.rlc.reg_list_format_start =
984 le32_to_cpu(rlc_hdr->reg_list_format_start);
985 adev->gfx.rlc.reg_list_format_separate_start =
986 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
987 adev->gfx.rlc.starting_offsets_start =
988 le32_to_cpu(rlc_hdr->starting_offsets_start);
989 adev->gfx.rlc.reg_list_format_size_bytes =
990 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
991 adev->gfx.rlc.reg_list_size_bytes =
992 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
993
994 adev->gfx.rlc.register_list_format =
995 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
996 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
997
998 if (!adev->gfx.rlc.register_list_format) {
999 err = -ENOMEM;
1000 goto out;
1001 }
1002
1003 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1004 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1005 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1006 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1007
1008 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1009
1010 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1011 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1012 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1014
1015 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1016 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1017 if (err)
1018 goto out;
1019 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1020 if (err)
1021 goto out;
1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1023 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1025
1026 if ((adev->asic_type != CHIP_STONEY) &&
1027 (adev->asic_type != CHIP_TOPAZ)) {
1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1029 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1030 if (!err) {
1031 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1032 if (err)
1033 goto out;
1034 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1035 adev->gfx.mec2_fw->data;
1036 adev->gfx.mec2_fw_version =
1037 le32_to_cpu(cp_hdr->header.ucode_version);
1038 adev->gfx.mec2_feature_version =
1039 le32_to_cpu(cp_hdr->ucode_feature_version);
1040 } else {
1041 err = 0;
1042 adev->gfx.mec2_fw = NULL;
1043 }
1044 }
1045
1046 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1047 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1048 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1049 info->fw = adev->gfx.pfp_fw;
1050 header = (const struct common_firmware_header *)info->fw->data;
1051 adev->firmware.fw_size +=
1052 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1055 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1056 info->fw = adev->gfx.me_fw;
1057 header = (const struct common_firmware_header *)info->fw->data;
1058 adev->firmware.fw_size +=
1059 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060
1061 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1062 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1063 info->fw = adev->gfx.ce_fw;
1064 header = (const struct common_firmware_header *)info->fw->data;
1065 adev->firmware.fw_size +=
1066 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1067
1068 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1069 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1070 info->fw = adev->gfx.rlc_fw;
1071 header = (const struct common_firmware_header *)info->fw->data;
1072 adev->firmware.fw_size +=
1073 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1074
1075 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1076 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1077 info->fw = adev->gfx.mec_fw;
1078 header = (const struct common_firmware_header *)info->fw->data;
1079 adev->firmware.fw_size +=
1080 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1081
1082 /* we need account JT in */
1083 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1084 adev->firmware.fw_size +=
1085 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1086
1087 if (amdgpu_sriov_vf(adev)) {
1088 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1089 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1090 info->fw = adev->gfx.mec_fw;
1091 adev->firmware.fw_size +=
1092 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1093 }
1094
1095 if (adev->gfx.mec2_fw) {
1096 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1097 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1098 info->fw = adev->gfx.mec2_fw;
1099 header = (const struct common_firmware_header *)info->fw->data;
1100 adev->firmware.fw_size +=
1101 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1102 }
1103
1104 }
1105
1106 out:
1107 if (err) {
1108 dev_err(adev->dev,
1109 "gfx8: Failed to load firmware \"%s\"\n",
1110 fw_name);
1111 release_firmware(adev->gfx.pfp_fw);
1112 adev->gfx.pfp_fw = NULL;
1113 release_firmware(adev->gfx.me_fw);
1114 adev->gfx.me_fw = NULL;
1115 release_firmware(adev->gfx.ce_fw);
1116 adev->gfx.ce_fw = NULL;
1117 release_firmware(adev->gfx.rlc_fw);
1118 adev->gfx.rlc_fw = NULL;
1119 release_firmware(adev->gfx.mec_fw);
1120 adev->gfx.mec_fw = NULL;
1121 release_firmware(adev->gfx.mec2_fw);
1122 adev->gfx.mec2_fw = NULL;
1123 }
1124 return err;
1125 }
1126
gfx_v8_0_get_csb_buffer(struct amdgpu_device * adev,volatile u32 * buffer)1127 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1128 volatile u32 *buffer)
1129 {
1130 u32 count = 0, i;
1131 const struct cs_section_def *sect = NULL;
1132 const struct cs_extent_def *ext = NULL;
1133
1134 if (adev->gfx.rlc.cs_data == NULL)
1135 return;
1136 if (buffer == NULL)
1137 return;
1138
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1140 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1141
1142 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1143 buffer[count++] = cpu_to_le32(0x80000000);
1144 buffer[count++] = cpu_to_le32(0x80000000);
1145
1146 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1147 for (ext = sect->section; ext->extent != NULL; ++ext) {
1148 if (sect->id == SECT_CONTEXT) {
1149 buffer[count++] =
1150 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1151 buffer[count++] = cpu_to_le32(ext->reg_index -
1152 PACKET3_SET_CONTEXT_REG_START);
1153 for (i = 0; i < ext->reg_count; i++)
1154 buffer[count++] = cpu_to_le32(ext->extent[i]);
1155 } else {
1156 return;
1157 }
1158 }
1159 }
1160
1161 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1162 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1163 PACKET3_SET_CONTEXT_REG_START);
1164 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1165 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1166
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1168 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1169
1170 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1171 buffer[count++] = cpu_to_le32(0);
1172 }
1173
cz_init_cp_jump_table(struct amdgpu_device * adev)1174 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1175 {
1176 const __le32 *fw_data;
1177 volatile u32 *dst_ptr;
1178 int me, i, max_me = 4;
1179 u32 bo_offset = 0;
1180 u32 table_offset, table_size;
1181
1182 if (adev->asic_type == CHIP_CARRIZO)
1183 max_me = 5;
1184
1185 /* write the cp table buffer */
1186 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1187 for (me = 0; me < max_me; me++) {
1188 if (me == 0) {
1189 const struct gfx_firmware_header_v1_0 *hdr =
1190 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1191 fw_data = (const __le32 *)
1192 (adev->gfx.ce_fw->data +
1193 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194 table_offset = le32_to_cpu(hdr->jt_offset);
1195 table_size = le32_to_cpu(hdr->jt_size);
1196 } else if (me == 1) {
1197 const struct gfx_firmware_header_v1_0 *hdr =
1198 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1199 fw_data = (const __le32 *)
1200 (adev->gfx.pfp_fw->data +
1201 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202 table_offset = le32_to_cpu(hdr->jt_offset);
1203 table_size = le32_to_cpu(hdr->jt_size);
1204 } else if (me == 2) {
1205 const struct gfx_firmware_header_v1_0 *hdr =
1206 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1207 fw_data = (const __le32 *)
1208 (adev->gfx.me_fw->data +
1209 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210 table_offset = le32_to_cpu(hdr->jt_offset);
1211 table_size = le32_to_cpu(hdr->jt_size);
1212 } else if (me == 3) {
1213 const struct gfx_firmware_header_v1_0 *hdr =
1214 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1215 fw_data = (const __le32 *)
1216 (adev->gfx.mec_fw->data +
1217 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218 table_offset = le32_to_cpu(hdr->jt_offset);
1219 table_size = le32_to_cpu(hdr->jt_size);
1220 } else if (me == 4) {
1221 const struct gfx_firmware_header_v1_0 *hdr =
1222 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1223 fw_data = (const __le32 *)
1224 (adev->gfx.mec2_fw->data +
1225 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1226 table_offset = le32_to_cpu(hdr->jt_offset);
1227 table_size = le32_to_cpu(hdr->jt_size);
1228 }
1229
1230 for (i = 0; i < table_size; i ++) {
1231 dst_ptr[bo_offset + i] =
1232 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1233 }
1234
1235 bo_offset += table_size;
1236 }
1237 }
1238
gfx_v8_0_rlc_fini(struct amdgpu_device * adev)1239 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1240 {
1241 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1242 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1243 }
1244
gfx_v8_0_rlc_init(struct amdgpu_device * adev)1245 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1246 {
1247 volatile u32 *dst_ptr;
1248 u32 dws;
1249 const struct cs_section_def *cs_data;
1250 int r;
1251
1252 adev->gfx.rlc.cs_data = vi_cs_data;
1253
1254 cs_data = adev->gfx.rlc.cs_data;
1255
1256 if (cs_data) {
1257 /* clear state block */
1258 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1259
1260 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1261 AMDGPU_GEM_DOMAIN_VRAM,
1262 &adev->gfx.rlc.clear_state_obj,
1263 &adev->gfx.rlc.clear_state_gpu_addr,
1264 (void **)&adev->gfx.rlc.cs_ptr);
1265 if (r) {
1266 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1267 gfx_v8_0_rlc_fini(adev);
1268 return r;
1269 }
1270
1271 /* set up the cs buffer */
1272 dst_ptr = adev->gfx.rlc.cs_ptr;
1273 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1274 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1275 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1276 }
1277
1278 if ((adev->asic_type == CHIP_CARRIZO) ||
1279 (adev->asic_type == CHIP_STONEY)) {
1280 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1281 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1282 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1283 &adev->gfx.rlc.cp_table_obj,
1284 &adev->gfx.rlc.cp_table_gpu_addr,
1285 (void **)&adev->gfx.rlc.cp_table_ptr);
1286 if (r) {
1287 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1288 return r;
1289 }
1290
1291 cz_init_cp_jump_table(adev);
1292
1293 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1294 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1295 }
1296
1297 return 0;
1298 }
1299
gfx_v8_0_mec_fini(struct amdgpu_device * adev)1300 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1301 {
1302 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1303 }
1304
gfx_v8_0_mec_init(struct amdgpu_device * adev)1305 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1306 {
1307 int r;
1308 u32 *hpd;
1309 size_t mec_hpd_size;
1310
1311 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1312
1313 /* take ownership of the relevant compute queues */
1314 amdgpu_gfx_compute_queue_acquire(adev);
1315
1316 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1317
1318 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1319 AMDGPU_GEM_DOMAIN_GTT,
1320 &adev->gfx.mec.hpd_eop_obj,
1321 &adev->gfx.mec.hpd_eop_gpu_addr,
1322 (void **)&hpd);
1323 if (r) {
1324 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1325 return r;
1326 }
1327
1328 memset(hpd, 0, mec_hpd_size);
1329
1330 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1331 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1332
1333 return 0;
1334 }
1335
1336 static const u32 vgpr_init_compute_shader[] =
1337 {
1338 0x7e000209, 0x7e020208,
1339 0x7e040207, 0x7e060206,
1340 0x7e080205, 0x7e0a0204,
1341 0x7e0c0203, 0x7e0e0202,
1342 0x7e100201, 0x7e120200,
1343 0x7e140209, 0x7e160208,
1344 0x7e180207, 0x7e1a0206,
1345 0x7e1c0205, 0x7e1e0204,
1346 0x7e200203, 0x7e220202,
1347 0x7e240201, 0x7e260200,
1348 0x7e280209, 0x7e2a0208,
1349 0x7e2c0207, 0x7e2e0206,
1350 0x7e300205, 0x7e320204,
1351 0x7e340203, 0x7e360202,
1352 0x7e380201, 0x7e3a0200,
1353 0x7e3c0209, 0x7e3e0208,
1354 0x7e400207, 0x7e420206,
1355 0x7e440205, 0x7e460204,
1356 0x7e480203, 0x7e4a0202,
1357 0x7e4c0201, 0x7e4e0200,
1358 0x7e500209, 0x7e520208,
1359 0x7e540207, 0x7e560206,
1360 0x7e580205, 0x7e5a0204,
1361 0x7e5c0203, 0x7e5e0202,
1362 0x7e600201, 0x7e620200,
1363 0x7e640209, 0x7e660208,
1364 0x7e680207, 0x7e6a0206,
1365 0x7e6c0205, 0x7e6e0204,
1366 0x7e700203, 0x7e720202,
1367 0x7e740201, 0x7e760200,
1368 0x7e780209, 0x7e7a0208,
1369 0x7e7c0207, 0x7e7e0206,
1370 0xbf8a0000, 0xbf810000,
1371 };
1372
1373 static const u32 sgpr_init_compute_shader[] =
1374 {
1375 0xbe8a0100, 0xbe8c0102,
1376 0xbe8e0104, 0xbe900106,
1377 0xbe920108, 0xbe940100,
1378 0xbe960102, 0xbe980104,
1379 0xbe9a0106, 0xbe9c0108,
1380 0xbe9e0100, 0xbea00102,
1381 0xbea20104, 0xbea40106,
1382 0xbea60108, 0xbea80100,
1383 0xbeaa0102, 0xbeac0104,
1384 0xbeae0106, 0xbeb00108,
1385 0xbeb20100, 0xbeb40102,
1386 0xbeb60104, 0xbeb80106,
1387 0xbeba0108, 0xbebc0100,
1388 0xbebe0102, 0xbec00104,
1389 0xbec20106, 0xbec40108,
1390 0xbec60100, 0xbec80102,
1391 0xbee60004, 0xbee70005,
1392 0xbeea0006, 0xbeeb0007,
1393 0xbee80008, 0xbee90009,
1394 0xbefc0000, 0xbf8a0000,
1395 0xbf810000, 0x00000000,
1396 };
1397
1398 static const u32 vgpr_init_regs[] =
1399 {
1400 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1401 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1402 mmCOMPUTE_NUM_THREAD_X, 256*4,
1403 mmCOMPUTE_NUM_THREAD_Y, 1,
1404 mmCOMPUTE_NUM_THREAD_Z, 1,
1405 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1406 mmCOMPUTE_PGM_RSRC2, 20,
1407 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1408 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1409 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1410 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1411 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1412 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1413 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1414 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1415 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1416 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1417 };
1418
1419 static const u32 sgpr1_init_regs[] =
1420 {
1421 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1422 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1423 mmCOMPUTE_NUM_THREAD_X, 256*5,
1424 mmCOMPUTE_NUM_THREAD_Y, 1,
1425 mmCOMPUTE_NUM_THREAD_Z, 1,
1426 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1427 mmCOMPUTE_PGM_RSRC2, 20,
1428 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1429 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1430 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1431 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1432 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1433 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1434 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1435 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1436 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1437 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1438 };
1439
1440 static const u32 sgpr2_init_regs[] =
1441 {
1442 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1443 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1444 mmCOMPUTE_NUM_THREAD_X, 256*5,
1445 mmCOMPUTE_NUM_THREAD_Y, 1,
1446 mmCOMPUTE_NUM_THREAD_Z, 1,
1447 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1448 mmCOMPUTE_PGM_RSRC2, 20,
1449 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1450 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1451 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1452 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1453 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1454 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1455 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1456 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1457 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1458 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1459 };
1460
1461 static const u32 sec_ded_counter_registers[] =
1462 {
1463 mmCPC_EDC_ATC_CNT,
1464 mmCPC_EDC_SCRATCH_CNT,
1465 mmCPC_EDC_UCODE_CNT,
1466 mmCPF_EDC_ATC_CNT,
1467 mmCPF_EDC_ROQ_CNT,
1468 mmCPF_EDC_TAG_CNT,
1469 mmCPG_EDC_ATC_CNT,
1470 mmCPG_EDC_DMA_CNT,
1471 mmCPG_EDC_TAG_CNT,
1472 mmDC_EDC_CSINVOC_CNT,
1473 mmDC_EDC_RESTORE_CNT,
1474 mmDC_EDC_STATE_CNT,
1475 mmGDS_EDC_CNT,
1476 mmGDS_EDC_GRBM_CNT,
1477 mmGDS_EDC_OA_DED,
1478 mmSPI_EDC_CNT,
1479 mmSQC_ATC_EDC_GATCL1_CNT,
1480 mmSQC_EDC_CNT,
1481 mmSQ_EDC_DED_CNT,
1482 mmSQ_EDC_INFO,
1483 mmSQ_EDC_SEC_CNT,
1484 mmTCC_EDC_CNT,
1485 mmTCP_ATC_EDC_GATCL1_CNT,
1486 mmTCP_EDC_CNT,
1487 mmTD_EDC_CNT
1488 };
1489
gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device * adev)1490 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1491 {
1492 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1493 struct amdgpu_ib ib;
1494 struct dma_fence *f = NULL;
1495 int r, i;
1496 u32 tmp;
1497 unsigned total_size, vgpr_offset, sgpr_offset;
1498 u64 gpu_addr;
1499
1500 /* only supported on CZ */
1501 if (adev->asic_type != CHIP_CARRIZO)
1502 return 0;
1503
1504 /* bail if the compute ring is not ready */
1505 if (!ring->ready)
1506 return 0;
1507
1508 tmp = RREG32(mmGB_EDC_MODE);
1509 WREG32(mmGB_EDC_MODE, 0);
1510
1511 total_size =
1512 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1513 total_size +=
1514 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1515 total_size +=
1516 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1517 total_size = ALIGN(total_size, 256);
1518 vgpr_offset = total_size;
1519 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1520 sgpr_offset = total_size;
1521 total_size += sizeof(sgpr_init_compute_shader);
1522
1523 /* allocate an indirect buffer to put the commands in */
1524 memset(&ib, 0, sizeof(ib));
1525 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1526 if (r) {
1527 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1528 return r;
1529 }
1530
1531 /* load the compute shaders */
1532 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1533 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1534
1535 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1536 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1537
1538 /* init the ib length to 0 */
1539 ib.length_dw = 0;
1540
1541 /* VGPR */
1542 /* write the register state for the compute dispatch */
1543 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1544 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1545 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1546 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1547 }
1548 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1549 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1550 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1551 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1552 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1553 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1554
1555 /* write dispatch packet */
1556 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1557 ib.ptr[ib.length_dw++] = 8; /* x */
1558 ib.ptr[ib.length_dw++] = 1; /* y */
1559 ib.ptr[ib.length_dw++] = 1; /* z */
1560 ib.ptr[ib.length_dw++] =
1561 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1562
1563 /* write CS partial flush packet */
1564 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1565 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1566
1567 /* SGPR1 */
1568 /* write the register state for the compute dispatch */
1569 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1570 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1571 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1572 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1573 }
1574 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1575 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1576 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1577 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1578 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1579 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1580
1581 /* write dispatch packet */
1582 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1583 ib.ptr[ib.length_dw++] = 8; /* x */
1584 ib.ptr[ib.length_dw++] = 1; /* y */
1585 ib.ptr[ib.length_dw++] = 1; /* z */
1586 ib.ptr[ib.length_dw++] =
1587 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1588
1589 /* write CS partial flush packet */
1590 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1591 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1592
1593 /* SGPR2 */
1594 /* write the register state for the compute dispatch */
1595 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1596 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1597 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1598 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1599 }
1600 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1601 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1602 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1603 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1604 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1605 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1606
1607 /* write dispatch packet */
1608 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1609 ib.ptr[ib.length_dw++] = 8; /* x */
1610 ib.ptr[ib.length_dw++] = 1; /* y */
1611 ib.ptr[ib.length_dw++] = 1; /* z */
1612 ib.ptr[ib.length_dw++] =
1613 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1614
1615 /* write CS partial flush packet */
1616 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1617 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1618
1619 /* shedule the ib on the ring */
1620 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1621 if (r) {
1622 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1623 goto fail;
1624 }
1625
1626 /* wait for the GPU to finish processing the IB */
1627 r = dma_fence_wait(f, false);
1628 if (r) {
1629 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1630 goto fail;
1631 }
1632
1633 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1634 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1635 WREG32(mmGB_EDC_MODE, tmp);
1636
1637 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1638 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1639 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1640
1641
1642 /* read back registers to clear the counters */
1643 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1644 RREG32(sec_ded_counter_registers[i]);
1645
1646 fail:
1647 amdgpu_ib_free(adev, &ib, NULL);
1648 dma_fence_put(f);
1649
1650 return r;
1651 }
1652
gfx_v8_0_gpu_early_init(struct amdgpu_device * adev)1653 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1654 {
1655 u32 gb_addr_config;
1656 u32 mc_shared_chmap, mc_arb_ramcfg;
1657 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1658 u32 tmp;
1659 int ret;
1660
1661 switch (adev->asic_type) {
1662 case CHIP_TOPAZ:
1663 adev->gfx.config.max_shader_engines = 1;
1664 adev->gfx.config.max_tile_pipes = 2;
1665 adev->gfx.config.max_cu_per_sh = 6;
1666 adev->gfx.config.max_sh_per_se = 1;
1667 adev->gfx.config.max_backends_per_se = 2;
1668 adev->gfx.config.max_texture_channel_caches = 2;
1669 adev->gfx.config.max_gprs = 256;
1670 adev->gfx.config.max_gs_threads = 32;
1671 adev->gfx.config.max_hw_contexts = 8;
1672
1673 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1674 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1675 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1676 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1677 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1678 break;
1679 case CHIP_FIJI:
1680 adev->gfx.config.max_shader_engines = 4;
1681 adev->gfx.config.max_tile_pipes = 16;
1682 adev->gfx.config.max_cu_per_sh = 16;
1683 adev->gfx.config.max_sh_per_se = 1;
1684 adev->gfx.config.max_backends_per_se = 4;
1685 adev->gfx.config.max_texture_channel_caches = 16;
1686 adev->gfx.config.max_gprs = 256;
1687 adev->gfx.config.max_gs_threads = 32;
1688 adev->gfx.config.max_hw_contexts = 8;
1689
1690 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1691 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1692 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1693 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1694 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1695 break;
1696 case CHIP_POLARIS11:
1697 case CHIP_POLARIS12:
1698 ret = amdgpu_atombios_get_gfx_info(adev);
1699 if (ret)
1700 return ret;
1701 adev->gfx.config.max_gprs = 256;
1702 adev->gfx.config.max_gs_threads = 32;
1703 adev->gfx.config.max_hw_contexts = 8;
1704
1705 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1706 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1707 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1708 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1709 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1710 break;
1711 case CHIP_POLARIS10:
1712 ret = amdgpu_atombios_get_gfx_info(adev);
1713 if (ret)
1714 return ret;
1715 adev->gfx.config.max_gprs = 256;
1716 adev->gfx.config.max_gs_threads = 32;
1717 adev->gfx.config.max_hw_contexts = 8;
1718
1719 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1720 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1721 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1722 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1723 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1724 break;
1725 case CHIP_TONGA:
1726 adev->gfx.config.max_shader_engines = 4;
1727 adev->gfx.config.max_tile_pipes = 8;
1728 adev->gfx.config.max_cu_per_sh = 8;
1729 adev->gfx.config.max_sh_per_se = 1;
1730 adev->gfx.config.max_backends_per_se = 2;
1731 adev->gfx.config.max_texture_channel_caches = 8;
1732 adev->gfx.config.max_gprs = 256;
1733 adev->gfx.config.max_gs_threads = 32;
1734 adev->gfx.config.max_hw_contexts = 8;
1735
1736 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1737 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1738 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1739 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1740 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1741 break;
1742 case CHIP_CARRIZO:
1743 adev->gfx.config.max_shader_engines = 1;
1744 adev->gfx.config.max_tile_pipes = 2;
1745 adev->gfx.config.max_sh_per_se = 1;
1746 adev->gfx.config.max_backends_per_se = 2;
1747 adev->gfx.config.max_cu_per_sh = 8;
1748 adev->gfx.config.max_texture_channel_caches = 2;
1749 adev->gfx.config.max_gprs = 256;
1750 adev->gfx.config.max_gs_threads = 32;
1751 adev->gfx.config.max_hw_contexts = 8;
1752
1753 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1754 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1755 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1756 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1757 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1758 break;
1759 case CHIP_STONEY:
1760 adev->gfx.config.max_shader_engines = 1;
1761 adev->gfx.config.max_tile_pipes = 2;
1762 adev->gfx.config.max_sh_per_se = 1;
1763 adev->gfx.config.max_backends_per_se = 1;
1764 adev->gfx.config.max_cu_per_sh = 3;
1765 adev->gfx.config.max_texture_channel_caches = 2;
1766 adev->gfx.config.max_gprs = 256;
1767 adev->gfx.config.max_gs_threads = 16;
1768 adev->gfx.config.max_hw_contexts = 8;
1769
1770 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1771 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1772 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1773 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1774 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1775 break;
1776 default:
1777 adev->gfx.config.max_shader_engines = 2;
1778 adev->gfx.config.max_tile_pipes = 4;
1779 adev->gfx.config.max_cu_per_sh = 2;
1780 adev->gfx.config.max_sh_per_se = 1;
1781 adev->gfx.config.max_backends_per_se = 2;
1782 adev->gfx.config.max_texture_channel_caches = 4;
1783 adev->gfx.config.max_gprs = 256;
1784 adev->gfx.config.max_gs_threads = 32;
1785 adev->gfx.config.max_hw_contexts = 8;
1786
1787 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1788 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1789 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1790 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1791 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1792 break;
1793 }
1794
1795 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1796 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1797 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1798
1799 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1800 adev->gfx.config.mem_max_burst_length_bytes = 256;
1801 if (adev->flags & AMD_IS_APU) {
1802 /* Get memory bank mapping mode. */
1803 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1804 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1805 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1806
1807 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1808 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1809 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1810
1811 /* Validate settings in case only one DIMM installed. */
1812 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1813 dimm00_addr_map = 0;
1814 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1815 dimm01_addr_map = 0;
1816 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1817 dimm10_addr_map = 0;
1818 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1819 dimm11_addr_map = 0;
1820
1821 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1822 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1823 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1824 adev->gfx.config.mem_row_size_in_kb = 2;
1825 else
1826 adev->gfx.config.mem_row_size_in_kb = 1;
1827 } else {
1828 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1829 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1830 if (adev->gfx.config.mem_row_size_in_kb > 4)
1831 adev->gfx.config.mem_row_size_in_kb = 4;
1832 }
1833
1834 adev->gfx.config.shader_engine_tile_size = 32;
1835 adev->gfx.config.num_gpus = 1;
1836 adev->gfx.config.multi_gpu_tile_size = 64;
1837
1838 /* fix up row size */
1839 switch (adev->gfx.config.mem_row_size_in_kb) {
1840 case 1:
1841 default:
1842 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1843 break;
1844 case 2:
1845 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1846 break;
1847 case 4:
1848 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1849 break;
1850 }
1851 adev->gfx.config.gb_addr_config = gb_addr_config;
1852
1853 return 0;
1854 }
1855
gfx_v8_0_compute_ring_init(struct amdgpu_device * adev,int ring_id,int mec,int pipe,int queue)1856 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1857 int mec, int pipe, int queue)
1858 {
1859 int r;
1860 unsigned irq_type;
1861 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1862
1863 ring = &adev->gfx.compute_ring[ring_id];
1864
1865 /* mec0 is me1 */
1866 ring->me = mec + 1;
1867 ring->pipe = pipe;
1868 ring->queue = queue;
1869
1870 ring->ring_obj = NULL;
1871 ring->use_doorbell = true;
1872 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1873 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1874 + (ring_id * GFX8_MEC_HPD_SIZE);
1875 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1876
1877 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1878 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1879 + ring->pipe;
1880
1881 /* type-2 packets are deprecated on MEC, use type-3 instead */
1882 r = amdgpu_ring_init(adev, ring, 1024,
1883 &adev->gfx.eop_irq, irq_type);
1884 if (r)
1885 return r;
1886
1887
1888 return 0;
1889 }
1890
gfx_v8_0_sw_init(void * handle)1891 static int gfx_v8_0_sw_init(void *handle)
1892 {
1893 int i, j, k, r, ring_id;
1894 struct amdgpu_ring *ring;
1895 struct amdgpu_kiq *kiq;
1896 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1897
1898 switch (adev->asic_type) {
1899 case CHIP_FIJI:
1900 case CHIP_TONGA:
1901 case CHIP_POLARIS11:
1902 case CHIP_POLARIS12:
1903 case CHIP_POLARIS10:
1904 case CHIP_CARRIZO:
1905 adev->gfx.mec.num_mec = 2;
1906 break;
1907 case CHIP_TOPAZ:
1908 case CHIP_STONEY:
1909 default:
1910 adev->gfx.mec.num_mec = 1;
1911 break;
1912 }
1913
1914 adev->gfx.mec.num_pipe_per_mec = 4;
1915 adev->gfx.mec.num_queue_per_pipe = 8;
1916
1917 /* KIQ event */
1918 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1919 if (r)
1920 return r;
1921
1922 /* EOP Event */
1923 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1924 if (r)
1925 return r;
1926
1927 /* Privileged reg */
1928 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1929 &adev->gfx.priv_reg_irq);
1930 if (r)
1931 return r;
1932
1933 /* Privileged inst */
1934 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1935 &adev->gfx.priv_inst_irq);
1936 if (r)
1937 return r;
1938
1939 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1940
1941 gfx_v8_0_scratch_init(adev);
1942
1943 r = gfx_v8_0_init_microcode(adev);
1944 if (r) {
1945 DRM_ERROR("Failed to load gfx firmware!\n");
1946 return r;
1947 }
1948
1949 r = gfx_v8_0_rlc_init(adev);
1950 if (r) {
1951 DRM_ERROR("Failed to init rlc BOs!\n");
1952 return r;
1953 }
1954
1955 r = gfx_v8_0_mec_init(adev);
1956 if (r) {
1957 DRM_ERROR("Failed to init MEC BOs!\n");
1958 return r;
1959 }
1960
1961 /* set up the gfx ring */
1962 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1963 ring = &adev->gfx.gfx_ring[i];
1964 ring->ring_obj = NULL;
1965 sprintf(ring->name, "gfx");
1966 /* no gfx doorbells on iceland */
1967 if (adev->asic_type != CHIP_TOPAZ) {
1968 ring->use_doorbell = true;
1969 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1970 }
1971
1972 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1973 AMDGPU_CP_IRQ_GFX_EOP);
1974 if (r)
1975 return r;
1976 }
1977
1978
1979 /* set up the compute queues - allocate horizontally across pipes */
1980 ring_id = 0;
1981 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1982 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1983 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1984 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1985 continue;
1986
1987 r = gfx_v8_0_compute_ring_init(adev,
1988 ring_id,
1989 i, k, j);
1990 if (r)
1991 return r;
1992
1993 ring_id++;
1994 }
1995 }
1996 }
1997
1998 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1999 if (r) {
2000 DRM_ERROR("Failed to init KIQ BOs!\n");
2001 return r;
2002 }
2003
2004 kiq = &adev->gfx.kiq;
2005 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2006 if (r)
2007 return r;
2008
2009 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2010 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2011 if (r)
2012 return r;
2013
2014 /* reserve GDS, GWS and OA resource for gfx */
2015 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2016 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2017 &adev->gds.gds_gfx_bo, NULL, NULL);
2018 if (r)
2019 return r;
2020
2021 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2022 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2023 &adev->gds.gws_gfx_bo, NULL, NULL);
2024 if (r)
2025 return r;
2026
2027 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2028 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2029 &adev->gds.oa_gfx_bo, NULL, NULL);
2030 if (r)
2031 return r;
2032
2033 adev->gfx.ce_ram_size = 0x8000;
2034
2035 r = gfx_v8_0_gpu_early_init(adev);
2036 if (r)
2037 return r;
2038
2039 return 0;
2040 }
2041
gfx_v8_0_sw_fini(void * handle)2042 static int gfx_v8_0_sw_fini(void *handle)
2043 {
2044 int i;
2045 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2046
2047 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2048 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2049 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2050
2051 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2052 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2053 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2054 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2055
2056 amdgpu_gfx_compute_mqd_sw_fini(adev);
2057 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2058 amdgpu_gfx_kiq_fini(adev);
2059
2060 gfx_v8_0_mec_fini(adev);
2061 gfx_v8_0_rlc_fini(adev);
2062 gfx_v8_0_free_microcode(adev);
2063
2064 return 0;
2065 }
2066
gfx_v8_0_tiling_mode_table_init(struct amdgpu_device * adev)2067 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2068 {
2069 uint32_t *modearray, *mod2array;
2070 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2071 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2072 u32 reg_offset;
2073
2074 modearray = adev->gfx.config.tile_mode_array;
2075 mod2array = adev->gfx.config.macrotile_mode_array;
2076
2077 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2078 modearray[reg_offset] = 0;
2079
2080 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2081 mod2array[reg_offset] = 0;
2082
2083 switch (adev->asic_type) {
2084 case CHIP_TOPAZ:
2085 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2086 PIPE_CONFIG(ADDR_SURF_P2) |
2087 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2088 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2089 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2090 PIPE_CONFIG(ADDR_SURF_P2) |
2091 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2092 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2093 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2094 PIPE_CONFIG(ADDR_SURF_P2) |
2095 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2097 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2098 PIPE_CONFIG(ADDR_SURF_P2) |
2099 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2100 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2101 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2102 PIPE_CONFIG(ADDR_SURF_P2) |
2103 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2104 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2105 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2106 PIPE_CONFIG(ADDR_SURF_P2) |
2107 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2108 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2109 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2110 PIPE_CONFIG(ADDR_SURF_P2) |
2111 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2112 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2113 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2114 PIPE_CONFIG(ADDR_SURF_P2));
2115 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2116 PIPE_CONFIG(ADDR_SURF_P2) |
2117 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2119 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2120 PIPE_CONFIG(ADDR_SURF_P2) |
2121 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2123 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2124 PIPE_CONFIG(ADDR_SURF_P2) |
2125 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2127 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2128 PIPE_CONFIG(ADDR_SURF_P2) |
2129 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2131 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2132 PIPE_CONFIG(ADDR_SURF_P2) |
2133 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2135 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2136 PIPE_CONFIG(ADDR_SURF_P2) |
2137 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2138 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2139 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2140 PIPE_CONFIG(ADDR_SURF_P2) |
2141 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2143 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2144 PIPE_CONFIG(ADDR_SURF_P2) |
2145 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2146 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2147 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2148 PIPE_CONFIG(ADDR_SURF_P2) |
2149 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2150 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2151 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2152 PIPE_CONFIG(ADDR_SURF_P2) |
2153 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2155 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2156 PIPE_CONFIG(ADDR_SURF_P2) |
2157 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2158 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2159 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2160 PIPE_CONFIG(ADDR_SURF_P2) |
2161 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2162 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2163 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2164 PIPE_CONFIG(ADDR_SURF_P2) |
2165 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2167 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2168 PIPE_CONFIG(ADDR_SURF_P2) |
2169 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2170 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2171 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2172 PIPE_CONFIG(ADDR_SURF_P2) |
2173 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2175 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2176 PIPE_CONFIG(ADDR_SURF_P2) |
2177 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2178 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2179 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2180 PIPE_CONFIG(ADDR_SURF_P2) |
2181 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2183 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2184 PIPE_CONFIG(ADDR_SURF_P2) |
2185 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2187
2188 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2191 NUM_BANKS(ADDR_SURF_8_BANK));
2192 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2195 NUM_BANKS(ADDR_SURF_8_BANK));
2196 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2197 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2198 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2199 NUM_BANKS(ADDR_SURF_8_BANK));
2200 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2203 NUM_BANKS(ADDR_SURF_8_BANK));
2204 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2205 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2206 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2207 NUM_BANKS(ADDR_SURF_8_BANK));
2208 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2209 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2210 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2211 NUM_BANKS(ADDR_SURF_8_BANK));
2212 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2215 NUM_BANKS(ADDR_SURF_8_BANK));
2216 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2217 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2218 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2219 NUM_BANKS(ADDR_SURF_16_BANK));
2220 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2221 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2222 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2223 NUM_BANKS(ADDR_SURF_16_BANK));
2224 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2227 NUM_BANKS(ADDR_SURF_16_BANK));
2228 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2229 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2230 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2231 NUM_BANKS(ADDR_SURF_16_BANK));
2232 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2233 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2234 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2235 NUM_BANKS(ADDR_SURF_16_BANK));
2236 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2239 NUM_BANKS(ADDR_SURF_16_BANK));
2240 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2241 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2242 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2243 NUM_BANKS(ADDR_SURF_8_BANK));
2244
2245 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2246 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2247 reg_offset != 23)
2248 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2249
2250 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2251 if (reg_offset != 7)
2252 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2253
2254 break;
2255 case CHIP_FIJI:
2256 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2258 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2260 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2262 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2263 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2264 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2265 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2266 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2268 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2269 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2270 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2272 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2274 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2275 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2276 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2277 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2278 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2280 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2281 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2282 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2283 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2284 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2285 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2287 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2288 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2289 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2290 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2294 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2299 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2302 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2303 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2306 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2307 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2314 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2315 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2322 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2326 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2327 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2330 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2331 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2334 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2335 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2338 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2339 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2342 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2343 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2346 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2347 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2348 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2350 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2351 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2353 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2354 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2355 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2356 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2358 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2359 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2362 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2363 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2366 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2367 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2370 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2371 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2374 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2375 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2376 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2378
2379 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2382 NUM_BANKS(ADDR_SURF_8_BANK));
2383 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386 NUM_BANKS(ADDR_SURF_8_BANK));
2387 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2390 NUM_BANKS(ADDR_SURF_8_BANK));
2391 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2394 NUM_BANKS(ADDR_SURF_8_BANK));
2395 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2398 NUM_BANKS(ADDR_SURF_8_BANK));
2399 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2402 NUM_BANKS(ADDR_SURF_8_BANK));
2403 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2406 NUM_BANKS(ADDR_SURF_8_BANK));
2407 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2410 NUM_BANKS(ADDR_SURF_8_BANK));
2411 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2413 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2414 NUM_BANKS(ADDR_SURF_8_BANK));
2415 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418 NUM_BANKS(ADDR_SURF_8_BANK));
2419 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2420 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2421 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2422 NUM_BANKS(ADDR_SURF_8_BANK));
2423 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2424 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2425 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2426 NUM_BANKS(ADDR_SURF_8_BANK));
2427 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2430 NUM_BANKS(ADDR_SURF_8_BANK));
2431 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2433 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2434 NUM_BANKS(ADDR_SURF_4_BANK));
2435
2436 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2437 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2438
2439 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2440 if (reg_offset != 7)
2441 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2442
2443 break;
2444 case CHIP_TONGA:
2445 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2447 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2449 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2451 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2453 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2455 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2457 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2459 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2461 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2463 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2465 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2466 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2467 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2468 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2469 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2470 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2471 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2472 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2473 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2474 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2476 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2477 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2478 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2479 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2480 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2483 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2487 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2488 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2491 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2492 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2493 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2495 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2496 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2499 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2504 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2510 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2511 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2512 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2513 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2514 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2515 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2516 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2518 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2519 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2520 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2522 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2523 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2524 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2526 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2527 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2528 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2530 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2531 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2532 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2534 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2535 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2536 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2537 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2538 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2539 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2540 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2542 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2543 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2544 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2546 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2547 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2548 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2550 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2551 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2554 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2560 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2562 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2563 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2564 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2565 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2566 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2567
2568 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2570 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2571 NUM_BANKS(ADDR_SURF_16_BANK));
2572 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2573 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2574 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2575 NUM_BANKS(ADDR_SURF_16_BANK));
2576 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2578 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2579 NUM_BANKS(ADDR_SURF_16_BANK));
2580 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2582 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2583 NUM_BANKS(ADDR_SURF_16_BANK));
2584 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2586 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2587 NUM_BANKS(ADDR_SURF_16_BANK));
2588 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2590 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2591 NUM_BANKS(ADDR_SURF_16_BANK));
2592 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2594 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2595 NUM_BANKS(ADDR_SURF_16_BANK));
2596 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2598 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2599 NUM_BANKS(ADDR_SURF_16_BANK));
2600 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2602 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2603 NUM_BANKS(ADDR_SURF_16_BANK));
2604 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2606 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2607 NUM_BANKS(ADDR_SURF_16_BANK));
2608 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2611 NUM_BANKS(ADDR_SURF_16_BANK));
2612 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615 NUM_BANKS(ADDR_SURF_8_BANK));
2616 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619 NUM_BANKS(ADDR_SURF_4_BANK));
2620 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2622 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2623 NUM_BANKS(ADDR_SURF_4_BANK));
2624
2625 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2626 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2627
2628 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2629 if (reg_offset != 7)
2630 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2631
2632 break;
2633 case CHIP_POLARIS11:
2634 case CHIP_POLARIS12:
2635 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2637 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2638 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2639 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2642 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2643 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2646 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2650 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2651 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2654 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2655 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2656 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2658 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2659 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2661 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2662 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2663 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2664 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2665 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2666 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2668 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2669 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2673 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2678 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2681 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2689 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2694 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2701 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2702 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2705 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2706 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2710 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2713 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2714 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2717 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2718 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2721 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2722 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2725 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2726 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2729 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2730 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2733 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2734 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2737 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2741 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2742 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2745 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2749 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2750 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2753 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2754 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2756 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2757
2758 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761 NUM_BANKS(ADDR_SURF_16_BANK));
2762
2763 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2765 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2766 NUM_BANKS(ADDR_SURF_16_BANK));
2767
2768 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771 NUM_BANKS(ADDR_SURF_16_BANK));
2772
2773 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2775 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2776 NUM_BANKS(ADDR_SURF_16_BANK));
2777
2778 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2781 NUM_BANKS(ADDR_SURF_16_BANK));
2782
2783 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2785 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2786 NUM_BANKS(ADDR_SURF_16_BANK));
2787
2788 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2789 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2790 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2791 NUM_BANKS(ADDR_SURF_16_BANK));
2792
2793 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2794 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2795 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2796 NUM_BANKS(ADDR_SURF_16_BANK));
2797
2798 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2799 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2800 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2801 NUM_BANKS(ADDR_SURF_16_BANK));
2802
2803 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2805 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2806 NUM_BANKS(ADDR_SURF_16_BANK));
2807
2808 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811 NUM_BANKS(ADDR_SURF_16_BANK));
2812
2813 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2816 NUM_BANKS(ADDR_SURF_16_BANK));
2817
2818 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2820 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2821 NUM_BANKS(ADDR_SURF_8_BANK));
2822
2823 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2825 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2826 NUM_BANKS(ADDR_SURF_4_BANK));
2827
2828 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2829 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2830
2831 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2832 if (reg_offset != 7)
2833 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2834
2835 break;
2836 case CHIP_POLARIS10:
2837 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2839 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2840 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2841 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2843 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2844 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2845 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2847 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2848 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2849 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2850 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2851 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2852 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2853 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2854 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2855 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2856 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2857 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2858 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2859 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2860 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2861 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2862 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2863 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2864 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2865 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2868 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2869 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2870 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2871 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2872 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2880 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2883 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2887 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2888 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2895 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2896 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2899 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2900 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2903 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2904 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2907 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2908 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2911 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2912 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2915 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2916 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2918 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2919 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2920 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2923 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2924 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2926 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2927 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2928 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2929 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2931 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2932 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2933 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2935 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2936 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2939 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2942 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2943 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2948 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2950 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2955 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2956 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2957 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2958 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2959
2960 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 NUM_BANKS(ADDR_SURF_16_BANK));
2964
2965 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2966 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2967 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2968 NUM_BANKS(ADDR_SURF_16_BANK));
2969
2970 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2972 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2973 NUM_BANKS(ADDR_SURF_16_BANK));
2974
2975 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 NUM_BANKS(ADDR_SURF_16_BANK));
2979
2980 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2982 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2983 NUM_BANKS(ADDR_SURF_16_BANK));
2984
2985 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2987 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2988 NUM_BANKS(ADDR_SURF_16_BANK));
2989
2990 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2991 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2992 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2993 NUM_BANKS(ADDR_SURF_16_BANK));
2994
2995 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2997 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998 NUM_BANKS(ADDR_SURF_16_BANK));
2999
3000 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3002 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 NUM_BANKS(ADDR_SURF_16_BANK));
3004
3005 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3006 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3007 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3008 NUM_BANKS(ADDR_SURF_16_BANK));
3009
3010 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3012 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3013 NUM_BANKS(ADDR_SURF_16_BANK));
3014
3015 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3017 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3018 NUM_BANKS(ADDR_SURF_8_BANK));
3019
3020 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3022 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3023 NUM_BANKS(ADDR_SURF_4_BANK));
3024
3025 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3027 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3028 NUM_BANKS(ADDR_SURF_4_BANK));
3029
3030 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3031 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3032
3033 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3034 if (reg_offset != 7)
3035 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3036
3037 break;
3038 case CHIP_STONEY:
3039 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040 PIPE_CONFIG(ADDR_SURF_P2) |
3041 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3043 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3044 PIPE_CONFIG(ADDR_SURF_P2) |
3045 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3046 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3047 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3048 PIPE_CONFIG(ADDR_SURF_P2) |
3049 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3050 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3051 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3052 PIPE_CONFIG(ADDR_SURF_P2) |
3053 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3054 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3055 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3056 PIPE_CONFIG(ADDR_SURF_P2) |
3057 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3058 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3059 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3060 PIPE_CONFIG(ADDR_SURF_P2) |
3061 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3062 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3063 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3064 PIPE_CONFIG(ADDR_SURF_P2) |
3065 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3066 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3067 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3068 PIPE_CONFIG(ADDR_SURF_P2));
3069 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3070 PIPE_CONFIG(ADDR_SURF_P2) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3073 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074 PIPE_CONFIG(ADDR_SURF_P2) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3077 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3078 PIPE_CONFIG(ADDR_SURF_P2) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3081 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3082 PIPE_CONFIG(ADDR_SURF_P2) |
3083 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3085 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3086 PIPE_CONFIG(ADDR_SURF_P2) |
3087 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3089 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3090 PIPE_CONFIG(ADDR_SURF_P2) |
3091 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3093 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3094 PIPE_CONFIG(ADDR_SURF_P2) |
3095 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3097 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3098 PIPE_CONFIG(ADDR_SURF_P2) |
3099 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3102 PIPE_CONFIG(ADDR_SURF_P2) |
3103 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3105 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3106 PIPE_CONFIG(ADDR_SURF_P2) |
3107 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3109 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3110 PIPE_CONFIG(ADDR_SURF_P2) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3113 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3114 PIPE_CONFIG(ADDR_SURF_P2) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3117 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3118 PIPE_CONFIG(ADDR_SURF_P2) |
3119 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3121 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3122 PIPE_CONFIG(ADDR_SURF_P2) |
3123 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3125 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3126 PIPE_CONFIG(ADDR_SURF_P2) |
3127 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3129 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3130 PIPE_CONFIG(ADDR_SURF_P2) |
3131 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3133 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3134 PIPE_CONFIG(ADDR_SURF_P2) |
3135 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3137 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3138 PIPE_CONFIG(ADDR_SURF_P2) |
3139 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3141
3142 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3145 NUM_BANKS(ADDR_SURF_8_BANK));
3146 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3149 NUM_BANKS(ADDR_SURF_8_BANK));
3150 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3152 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3153 NUM_BANKS(ADDR_SURF_8_BANK));
3154 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3155 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3156 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3157 NUM_BANKS(ADDR_SURF_8_BANK));
3158 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3161 NUM_BANKS(ADDR_SURF_8_BANK));
3162 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3165 NUM_BANKS(ADDR_SURF_8_BANK));
3166 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3169 NUM_BANKS(ADDR_SURF_8_BANK));
3170 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3173 NUM_BANKS(ADDR_SURF_16_BANK));
3174 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3177 NUM_BANKS(ADDR_SURF_16_BANK));
3178 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3181 NUM_BANKS(ADDR_SURF_16_BANK));
3182 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3185 NUM_BANKS(ADDR_SURF_16_BANK));
3186 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3189 NUM_BANKS(ADDR_SURF_16_BANK));
3190 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3191 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3192 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3193 NUM_BANKS(ADDR_SURF_16_BANK));
3194 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3197 NUM_BANKS(ADDR_SURF_8_BANK));
3198
3199 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3200 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3201 reg_offset != 23)
3202 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3203
3204 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3205 if (reg_offset != 7)
3206 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3207
3208 break;
3209 default:
3210 dev_warn(adev->dev,
3211 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3212 adev->asic_type);
3213
3214 case CHIP_CARRIZO:
3215 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3216 PIPE_CONFIG(ADDR_SURF_P2) |
3217 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3218 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3219 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3220 PIPE_CONFIG(ADDR_SURF_P2) |
3221 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3222 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3223 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3224 PIPE_CONFIG(ADDR_SURF_P2) |
3225 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3226 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3227 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3228 PIPE_CONFIG(ADDR_SURF_P2) |
3229 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3230 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3231 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232 PIPE_CONFIG(ADDR_SURF_P2) |
3233 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3234 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3235 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3236 PIPE_CONFIG(ADDR_SURF_P2) |
3237 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3238 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3239 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3240 PIPE_CONFIG(ADDR_SURF_P2) |
3241 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3242 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3243 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3244 PIPE_CONFIG(ADDR_SURF_P2));
3245 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3246 PIPE_CONFIG(ADDR_SURF_P2) |
3247 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3249 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3250 PIPE_CONFIG(ADDR_SURF_P2) |
3251 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3253 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3254 PIPE_CONFIG(ADDR_SURF_P2) |
3255 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3256 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3257 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3258 PIPE_CONFIG(ADDR_SURF_P2) |
3259 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3260 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3261 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3262 PIPE_CONFIG(ADDR_SURF_P2) |
3263 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3265 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3266 PIPE_CONFIG(ADDR_SURF_P2) |
3267 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3269 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3270 PIPE_CONFIG(ADDR_SURF_P2) |
3271 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3272 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3273 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3274 PIPE_CONFIG(ADDR_SURF_P2) |
3275 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3276 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3277 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3278 PIPE_CONFIG(ADDR_SURF_P2) |
3279 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3280 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3281 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3282 PIPE_CONFIG(ADDR_SURF_P2) |
3283 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3284 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3285 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3286 PIPE_CONFIG(ADDR_SURF_P2) |
3287 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3288 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3289 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3290 PIPE_CONFIG(ADDR_SURF_P2) |
3291 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3293 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3294 PIPE_CONFIG(ADDR_SURF_P2) |
3295 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3297 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3298 PIPE_CONFIG(ADDR_SURF_P2) |
3299 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3301 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3302 PIPE_CONFIG(ADDR_SURF_P2) |
3303 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3305 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3306 PIPE_CONFIG(ADDR_SURF_P2) |
3307 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3309 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3310 PIPE_CONFIG(ADDR_SURF_P2) |
3311 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3313 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3314 PIPE_CONFIG(ADDR_SURF_P2) |
3315 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3317
3318 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3320 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3321 NUM_BANKS(ADDR_SURF_8_BANK));
3322 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3324 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3325 NUM_BANKS(ADDR_SURF_8_BANK));
3326 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3327 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3328 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3329 NUM_BANKS(ADDR_SURF_8_BANK));
3330 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3331 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3332 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3333 NUM_BANKS(ADDR_SURF_8_BANK));
3334 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3337 NUM_BANKS(ADDR_SURF_8_BANK));
3338 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3339 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3340 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3341 NUM_BANKS(ADDR_SURF_8_BANK));
3342 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3343 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3344 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3345 NUM_BANKS(ADDR_SURF_8_BANK));
3346 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3347 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3348 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349 NUM_BANKS(ADDR_SURF_16_BANK));
3350 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3351 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3352 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3353 NUM_BANKS(ADDR_SURF_16_BANK));
3354 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3355 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3356 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3357 NUM_BANKS(ADDR_SURF_16_BANK));
3358 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3359 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3360 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361 NUM_BANKS(ADDR_SURF_16_BANK));
3362 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3363 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3364 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3365 NUM_BANKS(ADDR_SURF_16_BANK));
3366 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3367 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3368 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3369 NUM_BANKS(ADDR_SURF_16_BANK));
3370 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3372 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3373 NUM_BANKS(ADDR_SURF_8_BANK));
3374
3375 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3376 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3377 reg_offset != 23)
3378 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3379
3380 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3381 if (reg_offset != 7)
3382 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3383
3384 break;
3385 }
3386 }
3387
gfx_v8_0_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance)3388 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3389 u32 se_num, u32 sh_num, u32 instance)
3390 {
3391 u32 data;
3392
3393 if (instance == 0xffffffff)
3394 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3395 else
3396 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3397
3398 if (se_num == 0xffffffff)
3399 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3400 else
3401 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3402
3403 if (sh_num == 0xffffffff)
3404 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3405 else
3406 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3407
3408 WREG32(mmGRBM_GFX_INDEX, data);
3409 }
3410
gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device * adev)3411 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3412 {
3413 u32 data, mask;
3414
3415 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3416 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3417
3418 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3419
3420 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3421 adev->gfx.config.max_sh_per_se);
3422
3423 return (~data) & mask;
3424 }
3425
3426 static void
gfx_v8_0_raster_config(struct amdgpu_device * adev,u32 * rconf,u32 * rconf1)3427 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3428 {
3429 switch (adev->asic_type) {
3430 case CHIP_FIJI:
3431 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3432 RB_XSEL2(1) | PKR_MAP(2) |
3433 PKR_XSEL(1) | PKR_YSEL(1) |
3434 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3435 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3436 SE_PAIR_YSEL(2);
3437 break;
3438 case CHIP_TONGA:
3439 case CHIP_POLARIS10:
3440 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3441 SE_XSEL(1) | SE_YSEL(1);
3442 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3443 SE_PAIR_YSEL(2);
3444 break;
3445 case CHIP_TOPAZ:
3446 case CHIP_CARRIZO:
3447 *rconf |= RB_MAP_PKR0(2);
3448 *rconf1 |= 0x0;
3449 break;
3450 case CHIP_POLARIS11:
3451 case CHIP_POLARIS12:
3452 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3453 SE_XSEL(1) | SE_YSEL(1);
3454 *rconf1 |= 0x0;
3455 break;
3456 case CHIP_STONEY:
3457 *rconf |= 0x0;
3458 *rconf1 |= 0x0;
3459 break;
3460 default:
3461 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3462 break;
3463 }
3464 }
3465
3466 static void
gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device * adev,u32 raster_config,u32 raster_config_1,unsigned rb_mask,unsigned num_rb)3467 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3468 u32 raster_config, u32 raster_config_1,
3469 unsigned rb_mask, unsigned num_rb)
3470 {
3471 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3472 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3473 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3474 unsigned rb_per_se = num_rb / num_se;
3475 unsigned se_mask[4];
3476 unsigned se;
3477
3478 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3479 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3480 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3481 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3482
3483 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3484 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3485 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3486
3487 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3488 (!se_mask[2] && !se_mask[3]))) {
3489 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3490
3491 if (!se_mask[0] && !se_mask[1]) {
3492 raster_config_1 |=
3493 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3494 } else {
3495 raster_config_1 |=
3496 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3497 }
3498 }
3499
3500 for (se = 0; se < num_se; se++) {
3501 unsigned raster_config_se = raster_config;
3502 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3503 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3504 int idx = (se / 2) * 2;
3505
3506 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3507 raster_config_se &= ~SE_MAP_MASK;
3508
3509 if (!se_mask[idx]) {
3510 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3511 } else {
3512 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3513 }
3514 }
3515
3516 pkr0_mask &= rb_mask;
3517 pkr1_mask &= rb_mask;
3518 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3519 raster_config_se &= ~PKR_MAP_MASK;
3520
3521 if (!pkr0_mask) {
3522 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3523 } else {
3524 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3525 }
3526 }
3527
3528 if (rb_per_se >= 2) {
3529 unsigned rb0_mask = 1 << (se * rb_per_se);
3530 unsigned rb1_mask = rb0_mask << 1;
3531
3532 rb0_mask &= rb_mask;
3533 rb1_mask &= rb_mask;
3534 if (!rb0_mask || !rb1_mask) {
3535 raster_config_se &= ~RB_MAP_PKR0_MASK;
3536
3537 if (!rb0_mask) {
3538 raster_config_se |=
3539 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3540 } else {
3541 raster_config_se |=
3542 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3543 }
3544 }
3545
3546 if (rb_per_se > 2) {
3547 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3548 rb1_mask = rb0_mask << 1;
3549 rb0_mask &= rb_mask;
3550 rb1_mask &= rb_mask;
3551 if (!rb0_mask || !rb1_mask) {
3552 raster_config_se &= ~RB_MAP_PKR1_MASK;
3553
3554 if (!rb0_mask) {
3555 raster_config_se |=
3556 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3557 } else {
3558 raster_config_se |=
3559 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3560 }
3561 }
3562 }
3563 }
3564
3565 /* GRBM_GFX_INDEX has a different offset on VI */
3566 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3567 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3568 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3569 }
3570
3571 /* GRBM_GFX_INDEX has a different offset on VI */
3572 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3573 }
3574
gfx_v8_0_setup_rb(struct amdgpu_device * adev)3575 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3576 {
3577 int i, j;
3578 u32 data;
3579 u32 raster_config = 0, raster_config_1 = 0;
3580 u32 active_rbs = 0;
3581 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3582 adev->gfx.config.max_sh_per_se;
3583 unsigned num_rb_pipes;
3584
3585 mutex_lock(&adev->grbm_idx_mutex);
3586 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3587 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3588 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3589 data = gfx_v8_0_get_rb_active_bitmap(adev);
3590 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3591 rb_bitmap_width_per_sh);
3592 }
3593 }
3594 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3595
3596 adev->gfx.config.backend_enable_mask = active_rbs;
3597 adev->gfx.config.num_rbs = hweight32(active_rbs);
3598
3599 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3600 adev->gfx.config.max_shader_engines, 16);
3601
3602 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3603
3604 if (!adev->gfx.config.backend_enable_mask ||
3605 adev->gfx.config.num_rbs >= num_rb_pipes) {
3606 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3607 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3608 } else {
3609 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3610 adev->gfx.config.backend_enable_mask,
3611 num_rb_pipes);
3612 }
3613
3614 /* cache the values for userspace */
3615 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3616 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3617 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3618 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3619 RREG32(mmCC_RB_BACKEND_DISABLE);
3620 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3621 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3622 adev->gfx.config.rb_config[i][j].raster_config =
3623 RREG32(mmPA_SC_RASTER_CONFIG);
3624 adev->gfx.config.rb_config[i][j].raster_config_1 =
3625 RREG32(mmPA_SC_RASTER_CONFIG_1);
3626 }
3627 }
3628 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3629 mutex_unlock(&adev->grbm_idx_mutex);
3630 }
3631
3632 /**
3633 * gfx_v8_0_init_compute_vmid - gart enable
3634 *
3635 * @adev: amdgpu_device pointer
3636 *
3637 * Initialize compute vmid sh_mem registers
3638 *
3639 */
3640 #define DEFAULT_SH_MEM_BASES (0x6000)
3641 #define FIRST_COMPUTE_VMID (8)
3642 #define LAST_COMPUTE_VMID (16)
gfx_v8_0_init_compute_vmid(struct amdgpu_device * adev)3643 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3644 {
3645 int i;
3646 uint32_t sh_mem_config;
3647 uint32_t sh_mem_bases;
3648
3649 /*
3650 * Configure apertures:
3651 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3652 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3653 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3654 */
3655 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3656
3657 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3658 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3659 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3660 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3661 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3662 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3663
3664 mutex_lock(&adev->srbm_mutex);
3665 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3666 vi_srbm_select(adev, 0, 0, 0, i);
3667 /* CP and shaders */
3668 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3669 WREG32(mmSH_MEM_APE1_BASE, 1);
3670 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3671 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3672 }
3673 vi_srbm_select(adev, 0, 0, 0, 0);
3674 mutex_unlock(&adev->srbm_mutex);
3675 }
3676
gfx_v8_0_config_init(struct amdgpu_device * adev)3677 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3678 {
3679 switch (adev->asic_type) {
3680 default:
3681 adev->gfx.config.double_offchip_lds_buf = 1;
3682 break;
3683 case CHIP_CARRIZO:
3684 case CHIP_STONEY:
3685 adev->gfx.config.double_offchip_lds_buf = 0;
3686 break;
3687 }
3688 }
3689
gfx_v8_0_gpu_init(struct amdgpu_device * adev)3690 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3691 {
3692 u32 tmp, sh_static_mem_cfg;
3693 int i;
3694
3695 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3696 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3697 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3698 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3699
3700 gfx_v8_0_tiling_mode_table_init(adev);
3701 gfx_v8_0_setup_rb(adev);
3702 gfx_v8_0_get_cu_info(adev);
3703 gfx_v8_0_config_init(adev);
3704
3705 /* XXX SH_MEM regs */
3706 /* where to put LDS, scratch, GPUVM in FSA64 space */
3707 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3708 SWIZZLE_ENABLE, 1);
3709 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3710 ELEMENT_SIZE, 1);
3711 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3712 INDEX_STRIDE, 3);
3713 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3714
3715 mutex_lock(&adev->srbm_mutex);
3716 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3717 vi_srbm_select(adev, 0, 0, 0, i);
3718 /* CP and shaders */
3719 if (i == 0) {
3720 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3721 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3722 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3723 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3724 WREG32(mmSH_MEM_CONFIG, tmp);
3725 WREG32(mmSH_MEM_BASES, 0);
3726 } else {
3727 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3728 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3729 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3730 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3731 WREG32(mmSH_MEM_CONFIG, tmp);
3732 tmp = adev->mc.shared_aperture_start >> 48;
3733 WREG32(mmSH_MEM_BASES, tmp);
3734 }
3735
3736 WREG32(mmSH_MEM_APE1_BASE, 1);
3737 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3738 }
3739 vi_srbm_select(adev, 0, 0, 0, 0);
3740 mutex_unlock(&adev->srbm_mutex);
3741
3742 gfx_v8_0_init_compute_vmid(adev);
3743
3744 mutex_lock(&adev->grbm_idx_mutex);
3745 /*
3746 * making sure that the following register writes will be broadcasted
3747 * to all the shaders
3748 */
3749 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3750
3751 WREG32(mmPA_SC_FIFO_SIZE,
3752 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3753 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3754 (adev->gfx.config.sc_prim_fifo_size_backend <<
3755 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3756 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3757 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3758 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3759 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3760
3761 tmp = RREG32(mmSPI_ARB_PRIORITY);
3762 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3763 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3764 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3765 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3766 WREG32(mmSPI_ARB_PRIORITY, tmp);
3767
3768 mutex_unlock(&adev->grbm_idx_mutex);
3769
3770 }
3771
gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device * adev)3772 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3773 {
3774 u32 i, j, k;
3775 u32 mask;
3776
3777 mutex_lock(&adev->grbm_idx_mutex);
3778 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3779 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3780 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3781 for (k = 0; k < adev->usec_timeout; k++) {
3782 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3783 break;
3784 udelay(1);
3785 }
3786 }
3787 }
3788 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3789 mutex_unlock(&adev->grbm_idx_mutex);
3790
3791 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3792 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3793 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3794 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3795 for (k = 0; k < adev->usec_timeout; k++) {
3796 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3797 break;
3798 udelay(1);
3799 }
3800 }
3801
gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable)3802 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3803 bool enable)
3804 {
3805 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3806
3807 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3808 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3809 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3810 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3811
3812 WREG32(mmCP_INT_CNTL_RING0, tmp);
3813 }
3814
gfx_v8_0_init_csb(struct amdgpu_device * adev)3815 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3816 {
3817 /* csib */
3818 WREG32(mmRLC_CSIB_ADDR_HI,
3819 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3820 WREG32(mmRLC_CSIB_ADDR_LO,
3821 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3822 WREG32(mmRLC_CSIB_LENGTH,
3823 adev->gfx.rlc.clear_state_size);
3824 }
3825
gfx_v8_0_parse_ind_reg_list(int * register_list_format,int ind_offset,int list_size,int * unique_indices,int * indices_count,int max_indices,int * ind_start_offsets,int * offset_count,int max_offset)3826 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3827 int ind_offset,
3828 int list_size,
3829 int *unique_indices,
3830 int *indices_count,
3831 int max_indices,
3832 int *ind_start_offsets,
3833 int *offset_count,
3834 int max_offset)
3835 {
3836 int indices;
3837 bool new_entry = true;
3838
3839 for (; ind_offset < list_size; ind_offset++) {
3840
3841 if (new_entry) {
3842 new_entry = false;
3843 ind_start_offsets[*offset_count] = ind_offset;
3844 *offset_count = *offset_count + 1;
3845 BUG_ON(*offset_count >= max_offset);
3846 }
3847
3848 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3849 new_entry = true;
3850 continue;
3851 }
3852
3853 ind_offset += 2;
3854
3855 /* look for the matching indice */
3856 for (indices = 0;
3857 indices < *indices_count;
3858 indices++) {
3859 if (unique_indices[indices] ==
3860 register_list_format[ind_offset])
3861 break;
3862 }
3863
3864 if (indices >= *indices_count) {
3865 unique_indices[*indices_count] =
3866 register_list_format[ind_offset];
3867 indices = *indices_count;
3868 *indices_count = *indices_count + 1;
3869 BUG_ON(*indices_count >= max_indices);
3870 }
3871
3872 register_list_format[ind_offset] = indices;
3873 }
3874 }
3875
gfx_v8_0_init_save_restore_list(struct amdgpu_device * adev)3876 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3877 {
3878 int i, temp, data;
3879 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3880 int indices_count = 0;
3881 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3882 int offset_count = 0;
3883
3884 int list_size;
3885 unsigned int *register_list_format =
3886 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3887 if (!register_list_format)
3888 return -ENOMEM;
3889 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3890 adev->gfx.rlc.reg_list_format_size_bytes);
3891
3892 gfx_v8_0_parse_ind_reg_list(register_list_format,
3893 RLC_FormatDirectRegListLength,
3894 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3895 unique_indices,
3896 &indices_count,
3897 sizeof(unique_indices) / sizeof(int),
3898 indirect_start_offsets,
3899 &offset_count,
3900 sizeof(indirect_start_offsets)/sizeof(int));
3901
3902 /* save and restore list */
3903 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3904
3905 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3906 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3907 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3908
3909 /* indirect list */
3910 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3911 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3912 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3913
3914 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3915 list_size = list_size >> 1;
3916 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3917 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3918
3919 /* starting offsets starts */
3920 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3921 adev->gfx.rlc.starting_offsets_start);
3922 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3923 WREG32(mmRLC_GPM_SCRATCH_DATA,
3924 indirect_start_offsets[i]);
3925
3926 /* unique indices */
3927 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3928 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3929 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3930 if (unique_indices[i] != 0) {
3931 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3932 WREG32(data + i, unique_indices[i] >> 20);
3933 }
3934 }
3935 kfree(register_list_format);
3936
3937 return 0;
3938 }
3939
gfx_v8_0_enable_save_restore_machine(struct amdgpu_device * adev)3940 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3941 {
3942 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3943 }
3944
gfx_v8_0_init_power_gating(struct amdgpu_device * adev)3945 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3946 {
3947 uint32_t data;
3948
3949 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3950
3951 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3952 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3953 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3954 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3955 WREG32(mmRLC_PG_DELAY, data);
3956
3957 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3958 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3959
3960 }
3961
cz_enable_sck_slow_down_on_power_up(struct amdgpu_device * adev,bool enable)3962 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3963 bool enable)
3964 {
3965 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3966 }
3967
cz_enable_sck_slow_down_on_power_down(struct amdgpu_device * adev,bool enable)3968 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3969 bool enable)
3970 {
3971 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3972 }
3973
cz_enable_cp_power_gating(struct amdgpu_device * adev,bool enable)3974 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3975 {
3976 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3977 }
3978
gfx_v8_0_init_pg(struct amdgpu_device * adev)3979 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3980 {
3981 if ((adev->asic_type == CHIP_CARRIZO) ||
3982 (adev->asic_type == CHIP_STONEY)) {
3983 gfx_v8_0_init_csb(adev);
3984 gfx_v8_0_init_save_restore_list(adev);
3985 gfx_v8_0_enable_save_restore_machine(adev);
3986 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3987 gfx_v8_0_init_power_gating(adev);
3988 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3989 } else if ((adev->asic_type == CHIP_POLARIS11) ||
3990 (adev->asic_type == CHIP_POLARIS12)) {
3991 gfx_v8_0_init_csb(adev);
3992 gfx_v8_0_init_save_restore_list(adev);
3993 gfx_v8_0_enable_save_restore_machine(adev);
3994 gfx_v8_0_init_power_gating(adev);
3995 }
3996
3997 }
3998
gfx_v8_0_rlc_stop(struct amdgpu_device * adev)3999 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4000 {
4001 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4002
4003 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4004 gfx_v8_0_wait_for_rlc_serdes(adev);
4005 }
4006
gfx_v8_0_rlc_reset(struct amdgpu_device * adev)4007 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4008 {
4009 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4010 udelay(50);
4011
4012 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4013 udelay(50);
4014 }
4015
gfx_v8_0_rlc_start(struct amdgpu_device * adev)4016 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4017 {
4018 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4019
4020 /* carrizo do enable cp interrupt after cp inited */
4021 if (!(adev->flags & AMD_IS_APU))
4022 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4023
4024 udelay(50);
4025 }
4026
gfx_v8_0_rlc_load_microcode(struct amdgpu_device * adev)4027 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4028 {
4029 const struct rlc_firmware_header_v2_0 *hdr;
4030 const __le32 *fw_data;
4031 unsigned i, fw_size;
4032
4033 if (!adev->gfx.rlc_fw)
4034 return -EINVAL;
4035
4036 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4037 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4038
4039 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4040 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4041 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4042
4043 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4044 for (i = 0; i < fw_size; i++)
4045 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4046 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4047
4048 return 0;
4049 }
4050
gfx_v8_0_rlc_resume(struct amdgpu_device * adev)4051 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4052 {
4053 int r;
4054 u32 tmp;
4055
4056 gfx_v8_0_rlc_stop(adev);
4057
4058 /* disable CG */
4059 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4060 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4061 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4062 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4063 if (adev->asic_type == CHIP_POLARIS11 ||
4064 adev->asic_type == CHIP_POLARIS10 ||
4065 adev->asic_type == CHIP_POLARIS12) {
4066 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4067 tmp &= ~0x3;
4068 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4069 }
4070
4071 /* disable PG */
4072 WREG32(mmRLC_PG_CNTL, 0);
4073
4074 gfx_v8_0_rlc_reset(adev);
4075 gfx_v8_0_init_pg(adev);
4076
4077 if (!adev->pp_enabled) {
4078 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4079 /* legacy rlc firmware loading */
4080 r = gfx_v8_0_rlc_load_microcode(adev);
4081 if (r)
4082 return r;
4083 } else {
4084 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4085 AMDGPU_UCODE_ID_RLC_G);
4086 if (r)
4087 return -EINVAL;
4088 }
4089 }
4090
4091 gfx_v8_0_rlc_start(adev);
4092
4093 return 0;
4094 }
4095
gfx_v8_0_cp_gfx_enable(struct amdgpu_device * adev,bool enable)4096 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4097 {
4098 int i;
4099 u32 tmp = RREG32(mmCP_ME_CNTL);
4100
4101 if (enable) {
4102 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4103 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4104 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4105 } else {
4106 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4107 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4108 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4109 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4110 adev->gfx.gfx_ring[i].ready = false;
4111 }
4112 WREG32(mmCP_ME_CNTL, tmp);
4113 udelay(50);
4114 }
4115
gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device * adev)4116 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4117 {
4118 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4119 const struct gfx_firmware_header_v1_0 *ce_hdr;
4120 const struct gfx_firmware_header_v1_0 *me_hdr;
4121 const __le32 *fw_data;
4122 unsigned i, fw_size;
4123
4124 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4125 return -EINVAL;
4126
4127 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4128 adev->gfx.pfp_fw->data;
4129 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4130 adev->gfx.ce_fw->data;
4131 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4132 adev->gfx.me_fw->data;
4133
4134 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4135 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4136 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4137
4138 gfx_v8_0_cp_gfx_enable(adev, false);
4139
4140 /* PFP */
4141 fw_data = (const __le32 *)
4142 (adev->gfx.pfp_fw->data +
4143 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4144 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4145 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4146 for (i = 0; i < fw_size; i++)
4147 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4148 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4149
4150 /* CE */
4151 fw_data = (const __le32 *)
4152 (adev->gfx.ce_fw->data +
4153 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4154 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4155 WREG32(mmCP_CE_UCODE_ADDR, 0);
4156 for (i = 0; i < fw_size; i++)
4157 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4158 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4159
4160 /* ME */
4161 fw_data = (const __le32 *)
4162 (adev->gfx.me_fw->data +
4163 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4164 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4165 WREG32(mmCP_ME_RAM_WADDR, 0);
4166 for (i = 0; i < fw_size; i++)
4167 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4168 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4169
4170 return 0;
4171 }
4172
gfx_v8_0_get_csb_size(struct amdgpu_device * adev)4173 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4174 {
4175 u32 count = 0;
4176 const struct cs_section_def *sect = NULL;
4177 const struct cs_extent_def *ext = NULL;
4178
4179 /* begin clear state */
4180 count += 2;
4181 /* context control state */
4182 count += 3;
4183
4184 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4185 for (ext = sect->section; ext->extent != NULL; ++ext) {
4186 if (sect->id == SECT_CONTEXT)
4187 count += 2 + ext->reg_count;
4188 else
4189 return 0;
4190 }
4191 }
4192 /* pa_sc_raster_config/pa_sc_raster_config1 */
4193 count += 4;
4194 /* end clear state */
4195 count += 2;
4196 /* clear state */
4197 count += 2;
4198
4199 return count;
4200 }
4201
gfx_v8_0_cp_gfx_start(struct amdgpu_device * adev)4202 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4203 {
4204 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4205 const struct cs_section_def *sect = NULL;
4206 const struct cs_extent_def *ext = NULL;
4207 int r, i;
4208
4209 /* init the CP */
4210 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4211 WREG32(mmCP_ENDIAN_SWAP, 0);
4212 WREG32(mmCP_DEVICE_ID, 1);
4213
4214 gfx_v8_0_cp_gfx_enable(adev, true);
4215
4216 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4217 if (r) {
4218 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4219 return r;
4220 }
4221
4222 /* clear state buffer */
4223 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4224 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4225
4226 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4227 amdgpu_ring_write(ring, 0x80000000);
4228 amdgpu_ring_write(ring, 0x80000000);
4229
4230 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4231 for (ext = sect->section; ext->extent != NULL; ++ext) {
4232 if (sect->id == SECT_CONTEXT) {
4233 amdgpu_ring_write(ring,
4234 PACKET3(PACKET3_SET_CONTEXT_REG,
4235 ext->reg_count));
4236 amdgpu_ring_write(ring,
4237 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4238 for (i = 0; i < ext->reg_count; i++)
4239 amdgpu_ring_write(ring, ext->extent[i]);
4240 }
4241 }
4242 }
4243
4244 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4245 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4246 switch (adev->asic_type) {
4247 case CHIP_TONGA:
4248 case CHIP_POLARIS10:
4249 amdgpu_ring_write(ring, 0x16000012);
4250 amdgpu_ring_write(ring, 0x0000002A);
4251 break;
4252 case CHIP_POLARIS11:
4253 case CHIP_POLARIS12:
4254 amdgpu_ring_write(ring, 0x16000012);
4255 amdgpu_ring_write(ring, 0x00000000);
4256 break;
4257 case CHIP_FIJI:
4258 amdgpu_ring_write(ring, 0x3a00161a);
4259 amdgpu_ring_write(ring, 0x0000002e);
4260 break;
4261 case CHIP_CARRIZO:
4262 amdgpu_ring_write(ring, 0x00000002);
4263 amdgpu_ring_write(ring, 0x00000000);
4264 break;
4265 case CHIP_TOPAZ:
4266 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4267 0x00000000 : 0x00000002);
4268 amdgpu_ring_write(ring, 0x00000000);
4269 break;
4270 case CHIP_STONEY:
4271 amdgpu_ring_write(ring, 0x00000000);
4272 amdgpu_ring_write(ring, 0x00000000);
4273 break;
4274 default:
4275 BUG();
4276 }
4277
4278 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4279 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4280
4281 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4282 amdgpu_ring_write(ring, 0);
4283
4284 /* init the CE partitions */
4285 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4286 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4287 amdgpu_ring_write(ring, 0x8000);
4288 amdgpu_ring_write(ring, 0x8000);
4289
4290 amdgpu_ring_commit(ring);
4291
4292 return 0;
4293 }
gfx_v8_0_set_cpg_door_bell(struct amdgpu_device * adev,struct amdgpu_ring * ring)4294 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4295 {
4296 u32 tmp;
4297 /* no gfx doorbells on iceland */
4298 if (adev->asic_type == CHIP_TOPAZ)
4299 return;
4300
4301 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4302
4303 if (ring->use_doorbell) {
4304 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4305 DOORBELL_OFFSET, ring->doorbell_index);
4306 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4307 DOORBELL_HIT, 0);
4308 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4309 DOORBELL_EN, 1);
4310 } else {
4311 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4312 }
4313
4314 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4315
4316 if (adev->flags & AMD_IS_APU)
4317 return;
4318
4319 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4320 DOORBELL_RANGE_LOWER,
4321 AMDGPU_DOORBELL_GFX_RING0);
4322 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4323
4324 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4325 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4326 }
4327
gfx_v8_0_cp_gfx_resume(struct amdgpu_device * adev)4328 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4329 {
4330 struct amdgpu_ring *ring;
4331 u32 tmp;
4332 u32 rb_bufsz;
4333 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4334 int r;
4335
4336 /* Set the write pointer delay */
4337 WREG32(mmCP_RB_WPTR_DELAY, 0);
4338
4339 /* set the RB to use vmid 0 */
4340 WREG32(mmCP_RB_VMID, 0);
4341
4342 /* Set ring buffer size */
4343 ring = &adev->gfx.gfx_ring[0];
4344 rb_bufsz = order_base_2(ring->ring_size / 8);
4345 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4346 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4347 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4348 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4349 #ifdef __BIG_ENDIAN
4350 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4351 #endif
4352 WREG32(mmCP_RB0_CNTL, tmp);
4353
4354 /* Initialize the ring buffer's read and write pointers */
4355 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4356 ring->wptr = 0;
4357 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4358
4359 /* set the wb address wether it's enabled or not */
4360 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4361 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4362 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4363
4364 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4365 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4366 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4367 mdelay(1);
4368 WREG32(mmCP_RB0_CNTL, tmp);
4369
4370 rb_addr = ring->gpu_addr >> 8;
4371 WREG32(mmCP_RB0_BASE, rb_addr);
4372 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4373
4374 gfx_v8_0_set_cpg_door_bell(adev, ring);
4375 /* start the ring */
4376 amdgpu_ring_clear_ring(ring);
4377 gfx_v8_0_cp_gfx_start(adev);
4378 ring->ready = true;
4379 r = amdgpu_ring_test_ring(ring);
4380 if (r)
4381 ring->ready = false;
4382
4383 return r;
4384 }
4385
gfx_v8_0_cp_compute_enable(struct amdgpu_device * adev,bool enable)4386 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4387 {
4388 int i;
4389
4390 if (enable) {
4391 WREG32(mmCP_MEC_CNTL, 0);
4392 } else {
4393 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4394 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4395 adev->gfx.compute_ring[i].ready = false;
4396 adev->gfx.kiq.ring.ready = false;
4397 }
4398 udelay(50);
4399 }
4400
gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device * adev)4401 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4402 {
4403 const struct gfx_firmware_header_v1_0 *mec_hdr;
4404 const __le32 *fw_data;
4405 unsigned i, fw_size;
4406
4407 if (!adev->gfx.mec_fw)
4408 return -EINVAL;
4409
4410 gfx_v8_0_cp_compute_enable(adev, false);
4411
4412 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4413 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4414
4415 fw_data = (const __le32 *)
4416 (adev->gfx.mec_fw->data +
4417 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4418 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4419
4420 /* MEC1 */
4421 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4422 for (i = 0; i < fw_size; i++)
4423 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4424 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4425
4426 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4427 if (adev->gfx.mec2_fw) {
4428 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4429
4430 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4431 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4432
4433 fw_data = (const __le32 *)
4434 (adev->gfx.mec2_fw->data +
4435 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4436 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4437
4438 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4439 for (i = 0; i < fw_size; i++)
4440 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4441 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4442 }
4443
4444 return 0;
4445 }
4446
4447 /* KIQ functions */
gfx_v8_0_kiq_setting(struct amdgpu_ring * ring)4448 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4449 {
4450 uint32_t tmp;
4451 struct amdgpu_device *adev = ring->adev;
4452
4453 /* tell RLC which is KIQ queue */
4454 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4455 tmp &= 0xffffff00;
4456 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4457 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4458 tmp |= 0x80;
4459 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4460 }
4461
gfx_v8_0_kiq_kcq_enable(struct amdgpu_device * adev)4462 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4463 {
4464 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4465 uint32_t scratch, tmp = 0;
4466 uint64_t queue_mask = 0;
4467 int r, i;
4468
4469 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4470 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4471 continue;
4472
4473 /* This situation may be hit in the future if a new HW
4474 * generation exposes more than 64 queues. If so, the
4475 * definition of queue_mask needs updating */
4476 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4477 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4478 break;
4479 }
4480
4481 queue_mask |= (1ull << i);
4482 }
4483
4484 r = amdgpu_gfx_scratch_get(adev, &scratch);
4485 if (r) {
4486 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4487 return r;
4488 }
4489 WREG32(scratch, 0xCAFEDEAD);
4490
4491 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4492 if (r) {
4493 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4494 amdgpu_gfx_scratch_free(adev, scratch);
4495 return r;
4496 }
4497 /* set resources */
4498 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4499 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4500 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4501 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4502 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4503 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4504 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4505 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4506 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4507 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4508 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4509 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4510
4511 /* map queues */
4512 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4513 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4514 amdgpu_ring_write(kiq_ring,
4515 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4516 amdgpu_ring_write(kiq_ring,
4517 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4518 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4519 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4520 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4521 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4522 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4523 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4524 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4525 }
4526 /* write to scratch for completion */
4527 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4528 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4529 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4530 amdgpu_ring_commit(kiq_ring);
4531
4532 for (i = 0; i < adev->usec_timeout; i++) {
4533 tmp = RREG32(scratch);
4534 if (tmp == 0xDEADBEEF)
4535 break;
4536 DRM_UDELAY(1);
4537 }
4538 if (i >= adev->usec_timeout) {
4539 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4540 scratch, tmp);
4541 r = -EINVAL;
4542 }
4543 amdgpu_gfx_scratch_free(adev, scratch);
4544
4545 return r;
4546 }
4547
gfx_v8_0_deactivate_hqd(struct amdgpu_device * adev,u32 req)4548 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4549 {
4550 int i, r = 0;
4551
4552 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4553 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4554 for (i = 0; i < adev->usec_timeout; i++) {
4555 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4556 break;
4557 udelay(1);
4558 }
4559 if (i == adev->usec_timeout)
4560 r = -ETIMEDOUT;
4561 }
4562 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4563 WREG32(mmCP_HQD_PQ_RPTR, 0);
4564 WREG32(mmCP_HQD_PQ_WPTR, 0);
4565
4566 return r;
4567 }
4568
gfx_v8_0_mqd_init(struct amdgpu_ring * ring)4569 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4570 {
4571 struct amdgpu_device *adev = ring->adev;
4572 struct vi_mqd *mqd = ring->mqd_ptr;
4573 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4574 uint32_t tmp;
4575
4576 mqd->header = 0xC0310800;
4577 mqd->compute_pipelinestat_enable = 0x00000001;
4578 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4579 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4580 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4581 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4582 mqd->compute_misc_reserved = 0x00000003;
4583 if (!(adev->flags & AMD_IS_APU)) {
4584 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4585 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4586 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4587 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4588 }
4589 eop_base_addr = ring->eop_gpu_addr >> 8;
4590 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4591 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4592
4593 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4594 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4595 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4596 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4597
4598 mqd->cp_hqd_eop_control = tmp;
4599
4600 /* enable doorbell? */
4601 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4602 CP_HQD_PQ_DOORBELL_CONTROL,
4603 DOORBELL_EN,
4604 ring->use_doorbell ? 1 : 0);
4605
4606 mqd->cp_hqd_pq_doorbell_control = tmp;
4607
4608 /* set the pointer to the MQD */
4609 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4610 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4611
4612 /* set MQD vmid to 0 */
4613 tmp = RREG32(mmCP_MQD_CONTROL);
4614 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4615 mqd->cp_mqd_control = tmp;
4616
4617 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4618 hqd_gpu_addr = ring->gpu_addr >> 8;
4619 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4620 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4621
4622 /* set up the HQD, this is similar to CP_RB0_CNTL */
4623 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4624 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4625 (order_base_2(ring->ring_size / 4) - 1));
4626 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4627 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4628 #ifdef __BIG_ENDIAN
4629 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4630 #endif
4631 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4632 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4633 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4634 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4635 mqd->cp_hqd_pq_control = tmp;
4636
4637 /* set the wb address whether it's enabled or not */
4638 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4639 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4640 mqd->cp_hqd_pq_rptr_report_addr_hi =
4641 upper_32_bits(wb_gpu_addr) & 0xffff;
4642
4643 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4644 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4645 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4646 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4647
4648 tmp = 0;
4649 /* enable the doorbell if requested */
4650 if (ring->use_doorbell) {
4651 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4652 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4653 DOORBELL_OFFSET, ring->doorbell_index);
4654
4655 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4656 DOORBELL_EN, 1);
4657 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4658 DOORBELL_SOURCE, 0);
4659 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4660 DOORBELL_HIT, 0);
4661 }
4662
4663 mqd->cp_hqd_pq_doorbell_control = tmp;
4664
4665 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4666 ring->wptr = 0;
4667 mqd->cp_hqd_pq_wptr = ring->wptr;
4668 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4669
4670 /* set the vmid for the queue */
4671 mqd->cp_hqd_vmid = 0;
4672
4673 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4674 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4675 mqd->cp_hqd_persistent_state = tmp;
4676
4677 /* set MTYPE */
4678 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4679 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4680 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4681 mqd->cp_hqd_ib_control = tmp;
4682
4683 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4684 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4685 mqd->cp_hqd_iq_timer = tmp;
4686
4687 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4688 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4689 mqd->cp_hqd_ctx_save_control = tmp;
4690
4691 /* defaults */
4692 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4693 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4694 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4695 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4696 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4697 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4698 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4699 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4700 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4701 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4702 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4703 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4704 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4705 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4706 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4707
4708 /* activate the queue */
4709 mqd->cp_hqd_active = 1;
4710
4711 return 0;
4712 }
4713
gfx_v8_0_mqd_commit(struct amdgpu_device * adev,struct vi_mqd * mqd)4714 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4715 struct vi_mqd *mqd)
4716 {
4717 uint32_t mqd_reg;
4718 uint32_t *mqd_data;
4719
4720 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4721 mqd_data = &mqd->cp_mqd_base_addr_lo;
4722
4723 /* disable wptr polling */
4724 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4725
4726 /* program all HQD registers */
4727 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4728 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4729
4730 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4731 * This is safe since EOP RPTR==WPTR for any inactive HQD
4732 * on ASICs that do not support context-save.
4733 * EOP writes/reads can start anywhere in the ring.
4734 */
4735 if (adev->asic_type != CHIP_TONGA) {
4736 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4737 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4738 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4739 }
4740
4741 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4742 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4743
4744 /* activate the HQD */
4745 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4746 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4747
4748 return 0;
4749 }
4750
gfx_v8_0_kiq_init_queue(struct amdgpu_ring * ring)4751 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4752 {
4753 struct amdgpu_device *adev = ring->adev;
4754 struct vi_mqd *mqd = ring->mqd_ptr;
4755 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4756
4757 gfx_v8_0_kiq_setting(ring);
4758
4759 if (adev->gfx.in_reset) { /* for GPU_RESET case */
4760 /* reset MQD to a clean status */
4761 if (adev->gfx.mec.mqd_backup[mqd_idx])
4762 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4763
4764 /* reset ring buffer */
4765 ring->wptr = 0;
4766 amdgpu_ring_clear_ring(ring);
4767 mutex_lock(&adev->srbm_mutex);
4768 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4769 gfx_v8_0_mqd_commit(adev, mqd);
4770 vi_srbm_select(adev, 0, 0, 0, 0);
4771 mutex_unlock(&adev->srbm_mutex);
4772 } else {
4773 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4774 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4775 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4776 mutex_lock(&adev->srbm_mutex);
4777 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4778 gfx_v8_0_mqd_init(ring);
4779 gfx_v8_0_mqd_commit(adev, mqd);
4780 vi_srbm_select(adev, 0, 0, 0, 0);
4781 mutex_unlock(&adev->srbm_mutex);
4782
4783 if (adev->gfx.mec.mqd_backup[mqd_idx])
4784 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4785 }
4786
4787 return 0;
4788 }
4789
gfx_v8_0_kcq_init_queue(struct amdgpu_ring * ring)4790 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4791 {
4792 struct amdgpu_device *adev = ring->adev;
4793 struct vi_mqd *mqd = ring->mqd_ptr;
4794 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4795
4796 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4797 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4798 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4799 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4800 mutex_lock(&adev->srbm_mutex);
4801 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4802 gfx_v8_0_mqd_init(ring);
4803 vi_srbm_select(adev, 0, 0, 0, 0);
4804 mutex_unlock(&adev->srbm_mutex);
4805
4806 if (adev->gfx.mec.mqd_backup[mqd_idx])
4807 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4808 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4809 /* reset MQD to a clean status */
4810 if (adev->gfx.mec.mqd_backup[mqd_idx])
4811 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4812 /* reset ring buffer */
4813 ring->wptr = 0;
4814 amdgpu_ring_clear_ring(ring);
4815 } else {
4816 amdgpu_ring_clear_ring(ring);
4817 }
4818 return 0;
4819 }
4820
gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device * adev)4821 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4822 {
4823 if (adev->asic_type > CHIP_TONGA) {
4824 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4825 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4826 }
4827 /* enable doorbells */
4828 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4829 }
4830
gfx_v8_0_kiq_resume(struct amdgpu_device * adev)4831 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4832 {
4833 struct amdgpu_ring *ring = NULL;
4834 int r = 0, i;
4835
4836 gfx_v8_0_cp_compute_enable(adev, true);
4837
4838 ring = &adev->gfx.kiq.ring;
4839
4840 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4841 if (unlikely(r != 0))
4842 goto done;
4843
4844 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4845 if (!r) {
4846 r = gfx_v8_0_kiq_init_queue(ring);
4847 amdgpu_bo_kunmap(ring->mqd_obj);
4848 ring->mqd_ptr = NULL;
4849 }
4850 amdgpu_bo_unreserve(ring->mqd_obj);
4851 if (r)
4852 goto done;
4853
4854 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4855 ring = &adev->gfx.compute_ring[i];
4856
4857 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4858 if (unlikely(r != 0))
4859 goto done;
4860 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4861 if (!r) {
4862 r = gfx_v8_0_kcq_init_queue(ring);
4863 amdgpu_bo_kunmap(ring->mqd_obj);
4864 ring->mqd_ptr = NULL;
4865 }
4866 amdgpu_bo_unreserve(ring->mqd_obj);
4867 if (r)
4868 goto done;
4869 }
4870
4871 gfx_v8_0_set_mec_doorbell_range(adev);
4872
4873 r = gfx_v8_0_kiq_kcq_enable(adev);
4874 if (r)
4875 goto done;
4876
4877 /* Test KIQ */
4878 ring = &adev->gfx.kiq.ring;
4879 ring->ready = true;
4880 r = amdgpu_ring_test_ring(ring);
4881 if (r) {
4882 ring->ready = false;
4883 goto done;
4884 }
4885
4886 /* Test KCQs */
4887 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4888 ring = &adev->gfx.compute_ring[i];
4889 ring->ready = true;
4890 r = amdgpu_ring_test_ring(ring);
4891 if (r)
4892 ring->ready = false;
4893 }
4894
4895 done:
4896 return r;
4897 }
4898
gfx_v8_0_cp_resume(struct amdgpu_device * adev)4899 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4900 {
4901 int r;
4902
4903 if (!(adev->flags & AMD_IS_APU))
4904 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4905
4906 if (!adev->pp_enabled) {
4907 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4908 /* legacy firmware loading */
4909 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4910 if (r)
4911 return r;
4912
4913 r = gfx_v8_0_cp_compute_load_microcode(adev);
4914 if (r)
4915 return r;
4916 } else {
4917 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4918 AMDGPU_UCODE_ID_CP_CE);
4919 if (r)
4920 return -EINVAL;
4921
4922 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4923 AMDGPU_UCODE_ID_CP_PFP);
4924 if (r)
4925 return -EINVAL;
4926
4927 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4928 AMDGPU_UCODE_ID_CP_ME);
4929 if (r)
4930 return -EINVAL;
4931
4932 if (adev->asic_type == CHIP_TOPAZ) {
4933 r = gfx_v8_0_cp_compute_load_microcode(adev);
4934 if (r)
4935 return r;
4936 } else {
4937 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4938 AMDGPU_UCODE_ID_CP_MEC1);
4939 if (r)
4940 return -EINVAL;
4941 }
4942 }
4943 }
4944
4945 r = gfx_v8_0_cp_gfx_resume(adev);
4946 if (r)
4947 return r;
4948
4949 r = gfx_v8_0_kiq_resume(adev);
4950 if (r)
4951 return r;
4952
4953 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4954
4955 return 0;
4956 }
4957
gfx_v8_0_cp_enable(struct amdgpu_device * adev,bool enable)4958 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4959 {
4960 gfx_v8_0_cp_gfx_enable(adev, enable);
4961 gfx_v8_0_cp_compute_enable(adev, enable);
4962 }
4963
gfx_v8_0_hw_init(void * handle)4964 static int gfx_v8_0_hw_init(void *handle)
4965 {
4966 int r;
4967 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4968
4969 gfx_v8_0_init_golden_registers(adev);
4970 gfx_v8_0_gpu_init(adev);
4971
4972 r = gfx_v8_0_rlc_resume(adev);
4973 if (r)
4974 return r;
4975
4976 r = gfx_v8_0_cp_resume(adev);
4977
4978 return r;
4979 }
4980
gfx_v8_0_hw_fini(void * handle)4981 static int gfx_v8_0_hw_fini(void *handle)
4982 {
4983 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4984
4985 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4986 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4987 if (amdgpu_sriov_vf(adev)) {
4988 pr_debug("For SRIOV client, shouldn't do anything.\n");
4989 return 0;
4990 }
4991 gfx_v8_0_cp_enable(adev, false);
4992 gfx_v8_0_rlc_stop(adev);
4993
4994 amdgpu_set_powergating_state(adev,
4995 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4996
4997 return 0;
4998 }
4999
gfx_v8_0_suspend(void * handle)5000 static int gfx_v8_0_suspend(void *handle)
5001 {
5002 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5003 adev->gfx.in_suspend = true;
5004 return gfx_v8_0_hw_fini(adev);
5005 }
5006
gfx_v8_0_resume(void * handle)5007 static int gfx_v8_0_resume(void *handle)
5008 {
5009 int r;
5010 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5011
5012 r = gfx_v8_0_hw_init(adev);
5013 adev->gfx.in_suspend = false;
5014 return r;
5015 }
5016
gfx_v8_0_is_idle(void * handle)5017 static bool gfx_v8_0_is_idle(void *handle)
5018 {
5019 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5020
5021 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5022 return false;
5023 else
5024 return true;
5025 }
5026
gfx_v8_0_wait_for_idle(void * handle)5027 static int gfx_v8_0_wait_for_idle(void *handle)
5028 {
5029 unsigned i;
5030 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5031
5032 for (i = 0; i < adev->usec_timeout; i++) {
5033 if (gfx_v8_0_is_idle(handle))
5034 return 0;
5035
5036 udelay(1);
5037 }
5038 return -ETIMEDOUT;
5039 }
5040
gfx_v8_0_check_soft_reset(void * handle)5041 static bool gfx_v8_0_check_soft_reset(void *handle)
5042 {
5043 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5044 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5045 u32 tmp;
5046
5047 /* GRBM_STATUS */
5048 tmp = RREG32(mmGRBM_STATUS);
5049 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5050 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5051 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5052 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5053 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5054 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5055 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5056 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5057 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5058 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5059 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5060 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5061 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5062 }
5063
5064 /* GRBM_STATUS2 */
5065 tmp = RREG32(mmGRBM_STATUS2);
5066 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5067 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5068 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5069
5070 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5071 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5072 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5073 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5074 SOFT_RESET_CPF, 1);
5075 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5076 SOFT_RESET_CPC, 1);
5077 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5078 SOFT_RESET_CPG, 1);
5079 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5080 SOFT_RESET_GRBM, 1);
5081 }
5082
5083 /* SRBM_STATUS */
5084 tmp = RREG32(mmSRBM_STATUS);
5085 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5086 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5087 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5088 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5089 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5090 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5091
5092 if (grbm_soft_reset || srbm_soft_reset) {
5093 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5094 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5095 return true;
5096 } else {
5097 adev->gfx.grbm_soft_reset = 0;
5098 adev->gfx.srbm_soft_reset = 0;
5099 return false;
5100 }
5101 }
5102
gfx_v8_0_pre_soft_reset(void * handle)5103 static int gfx_v8_0_pre_soft_reset(void *handle)
5104 {
5105 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5106 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5107
5108 if ((!adev->gfx.grbm_soft_reset) &&
5109 (!adev->gfx.srbm_soft_reset))
5110 return 0;
5111
5112 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5113 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5114
5115 /* stop the rlc */
5116 gfx_v8_0_rlc_stop(adev);
5117
5118 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5119 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5120 /* Disable GFX parsing/prefetching */
5121 gfx_v8_0_cp_gfx_enable(adev, false);
5122
5123 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5124 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5125 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5126 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5127 int i;
5128
5129 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5130 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5131
5132 mutex_lock(&adev->srbm_mutex);
5133 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5134 gfx_v8_0_deactivate_hqd(adev, 2);
5135 vi_srbm_select(adev, 0, 0, 0, 0);
5136 mutex_unlock(&adev->srbm_mutex);
5137 }
5138 /* Disable MEC parsing/prefetching */
5139 gfx_v8_0_cp_compute_enable(adev, false);
5140 }
5141
5142 return 0;
5143 }
5144
gfx_v8_0_soft_reset(void * handle)5145 static int gfx_v8_0_soft_reset(void *handle)
5146 {
5147 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5148 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5149 u32 tmp;
5150
5151 if ((!adev->gfx.grbm_soft_reset) &&
5152 (!adev->gfx.srbm_soft_reset))
5153 return 0;
5154
5155 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5156 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5157
5158 if (grbm_soft_reset || srbm_soft_reset) {
5159 tmp = RREG32(mmGMCON_DEBUG);
5160 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5161 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5162 WREG32(mmGMCON_DEBUG, tmp);
5163 udelay(50);
5164 }
5165
5166 if (grbm_soft_reset) {
5167 tmp = RREG32(mmGRBM_SOFT_RESET);
5168 tmp |= grbm_soft_reset;
5169 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5170 WREG32(mmGRBM_SOFT_RESET, tmp);
5171 tmp = RREG32(mmGRBM_SOFT_RESET);
5172
5173 udelay(50);
5174
5175 tmp &= ~grbm_soft_reset;
5176 WREG32(mmGRBM_SOFT_RESET, tmp);
5177 tmp = RREG32(mmGRBM_SOFT_RESET);
5178 }
5179
5180 if (srbm_soft_reset) {
5181 tmp = RREG32(mmSRBM_SOFT_RESET);
5182 tmp |= srbm_soft_reset;
5183 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5184 WREG32(mmSRBM_SOFT_RESET, tmp);
5185 tmp = RREG32(mmSRBM_SOFT_RESET);
5186
5187 udelay(50);
5188
5189 tmp &= ~srbm_soft_reset;
5190 WREG32(mmSRBM_SOFT_RESET, tmp);
5191 tmp = RREG32(mmSRBM_SOFT_RESET);
5192 }
5193
5194 if (grbm_soft_reset || srbm_soft_reset) {
5195 tmp = RREG32(mmGMCON_DEBUG);
5196 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5197 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5198 WREG32(mmGMCON_DEBUG, tmp);
5199 }
5200
5201 /* Wait a little for things to settle down */
5202 udelay(50);
5203
5204 return 0;
5205 }
5206
gfx_v8_0_post_soft_reset(void * handle)5207 static int gfx_v8_0_post_soft_reset(void *handle)
5208 {
5209 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5210 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5211
5212 if ((!adev->gfx.grbm_soft_reset) &&
5213 (!adev->gfx.srbm_soft_reset))
5214 return 0;
5215
5216 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5217 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5218
5219 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5220 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5221 gfx_v8_0_cp_gfx_resume(adev);
5222
5223 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5224 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5225 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5226 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5227 int i;
5228
5229 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5230 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5231
5232 mutex_lock(&adev->srbm_mutex);
5233 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5234 gfx_v8_0_deactivate_hqd(adev, 2);
5235 vi_srbm_select(adev, 0, 0, 0, 0);
5236 mutex_unlock(&adev->srbm_mutex);
5237 }
5238 gfx_v8_0_kiq_resume(adev);
5239 }
5240 gfx_v8_0_rlc_start(adev);
5241
5242 return 0;
5243 }
5244
5245 /**
5246 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5247 *
5248 * @adev: amdgpu_device pointer
5249 *
5250 * Fetches a GPU clock counter snapshot.
5251 * Returns the 64 bit clock counter snapshot.
5252 */
gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device * adev)5253 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5254 {
5255 uint64_t clock;
5256
5257 mutex_lock(&adev->gfx.gpu_clock_mutex);
5258 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5259 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5260 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5261 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5262 return clock;
5263 }
5264
gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring * ring,uint32_t vmid,uint32_t gds_base,uint32_t gds_size,uint32_t gws_base,uint32_t gws_size,uint32_t oa_base,uint32_t oa_size)5265 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5266 uint32_t vmid,
5267 uint32_t gds_base, uint32_t gds_size,
5268 uint32_t gws_base, uint32_t gws_size,
5269 uint32_t oa_base, uint32_t oa_size)
5270 {
5271 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5272 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5273
5274 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5275 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5276
5277 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5278 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5279
5280 /* GDS Base */
5281 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5282 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5283 WRITE_DATA_DST_SEL(0)));
5284 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5285 amdgpu_ring_write(ring, 0);
5286 amdgpu_ring_write(ring, gds_base);
5287
5288 /* GDS Size */
5289 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5290 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5291 WRITE_DATA_DST_SEL(0)));
5292 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5293 amdgpu_ring_write(ring, 0);
5294 amdgpu_ring_write(ring, gds_size);
5295
5296 /* GWS */
5297 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5298 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5299 WRITE_DATA_DST_SEL(0)));
5300 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5301 amdgpu_ring_write(ring, 0);
5302 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5303
5304 /* OA */
5305 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5306 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5307 WRITE_DATA_DST_SEL(0)));
5308 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5309 amdgpu_ring_write(ring, 0);
5310 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5311 }
5312
wave_read_ind(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t address)5313 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5314 {
5315 WREG32(mmSQ_IND_INDEX,
5316 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5317 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5318 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5319 (SQ_IND_INDEX__FORCE_READ_MASK));
5320 return RREG32(mmSQ_IND_DATA);
5321 }
5322
wave_read_regs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)5323 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5324 uint32_t wave, uint32_t thread,
5325 uint32_t regno, uint32_t num, uint32_t *out)
5326 {
5327 WREG32(mmSQ_IND_INDEX,
5328 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5329 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5330 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5331 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5332 (SQ_IND_INDEX__FORCE_READ_MASK) |
5333 (SQ_IND_INDEX__AUTO_INCR_MASK));
5334 while (num--)
5335 *(out++) = RREG32(mmSQ_IND_DATA);
5336 }
5337
gfx_v8_0_read_wave_data(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)5338 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5339 {
5340 /* type 0 wave data */
5341 dst[(*no_fields)++] = 0;
5342 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5343 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5344 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5345 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5346 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5347 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5348 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5349 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5350 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5351 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5352 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5353 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5354 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5355 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5356 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5357 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5358 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5359 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5360 }
5361
gfx_v8_0_read_wave_sgprs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)5362 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5363 uint32_t wave, uint32_t start,
5364 uint32_t size, uint32_t *dst)
5365 {
5366 wave_read_regs(
5367 adev, simd, wave, 0,
5368 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5369 }
5370
5371
5372 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5373 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5374 .select_se_sh = &gfx_v8_0_select_se_sh,
5375 .read_wave_data = &gfx_v8_0_read_wave_data,
5376 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5377 };
5378
gfx_v8_0_early_init(void * handle)5379 static int gfx_v8_0_early_init(void *handle)
5380 {
5381 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5382
5383 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5384 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5385 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5386 gfx_v8_0_set_ring_funcs(adev);
5387 gfx_v8_0_set_irq_funcs(adev);
5388 gfx_v8_0_set_gds_init(adev);
5389 gfx_v8_0_set_rlc_funcs(adev);
5390
5391 return 0;
5392 }
5393
gfx_v8_0_late_init(void * handle)5394 static int gfx_v8_0_late_init(void *handle)
5395 {
5396 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5397 int r;
5398
5399 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5400 if (r)
5401 return r;
5402
5403 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5404 if (r)
5405 return r;
5406
5407 /* requires IBs so do in late init after IB pool is initialized */
5408 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5409 if (r)
5410 return r;
5411
5412 amdgpu_set_powergating_state(adev,
5413 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5414
5415 return 0;
5416 }
5417
gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device * adev,bool enable)5418 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5419 bool enable)
5420 {
5421 if ((adev->asic_type == CHIP_POLARIS11) ||
5422 (adev->asic_type == CHIP_POLARIS12))
5423 /* Send msg to SMU via Powerplay */
5424 amdgpu_set_powergating_state(adev,
5425 AMD_IP_BLOCK_TYPE_SMC,
5426 enable ?
5427 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5428
5429 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5430 }
5431
gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device * adev,bool enable)5432 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5433 bool enable)
5434 {
5435 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5436 }
5437
polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device * adev,bool enable)5438 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5439 bool enable)
5440 {
5441 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5442 }
5443
cz_enable_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)5444 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5445 bool enable)
5446 {
5447 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5448 }
5449
cz_enable_gfx_pipeline_power_gating(struct amdgpu_device * adev,bool enable)5450 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5451 bool enable)
5452 {
5453 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5454
5455 /* Read any GFX register to wake up GFX. */
5456 if (!enable)
5457 RREG32(mmDB_RENDER_CONTROL);
5458 }
5459
cz_update_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)5460 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5461 bool enable)
5462 {
5463 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5464 cz_enable_gfx_cg_power_gating(adev, true);
5465 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5466 cz_enable_gfx_pipeline_power_gating(adev, true);
5467 } else {
5468 cz_enable_gfx_cg_power_gating(adev, false);
5469 cz_enable_gfx_pipeline_power_gating(adev, false);
5470 }
5471 }
5472
gfx_v8_0_set_powergating_state(void * handle,enum amd_powergating_state state)5473 static int gfx_v8_0_set_powergating_state(void *handle,
5474 enum amd_powergating_state state)
5475 {
5476 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5477 bool enable = (state == AMD_PG_STATE_GATE);
5478
5479 if (amdgpu_sriov_vf(adev))
5480 return 0;
5481
5482 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5483 AMD_PG_SUPPORT_RLC_SMU_HS |
5484 AMD_PG_SUPPORT_CP |
5485 AMD_PG_SUPPORT_GFX_DMG))
5486 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5487 switch (adev->asic_type) {
5488 case CHIP_CARRIZO:
5489 case CHIP_STONEY:
5490
5491 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5492 cz_enable_sck_slow_down_on_power_up(adev, true);
5493 cz_enable_sck_slow_down_on_power_down(adev, true);
5494 } else {
5495 cz_enable_sck_slow_down_on_power_up(adev, false);
5496 cz_enable_sck_slow_down_on_power_down(adev, false);
5497 }
5498 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5499 cz_enable_cp_power_gating(adev, true);
5500 else
5501 cz_enable_cp_power_gating(adev, false);
5502
5503 cz_update_gfx_cg_power_gating(adev, enable);
5504
5505 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5506 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5507 else
5508 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5509
5510 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5511 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5512 else
5513 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5514 break;
5515 case CHIP_POLARIS11:
5516 case CHIP_POLARIS12:
5517 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5518 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5519 else
5520 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5521
5522 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5523 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5524 else
5525 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5526
5527 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5528 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5529 else
5530 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5531 break;
5532 default:
5533 break;
5534 }
5535 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5536 AMD_PG_SUPPORT_RLC_SMU_HS |
5537 AMD_PG_SUPPORT_CP |
5538 AMD_PG_SUPPORT_GFX_DMG))
5539 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5540 return 0;
5541 }
5542
gfx_v8_0_get_clockgating_state(void * handle,u32 * flags)5543 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5544 {
5545 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5546 int data;
5547
5548 if (amdgpu_sriov_vf(adev))
5549 *flags = 0;
5550
5551 /* AMD_CG_SUPPORT_GFX_MGCG */
5552 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5553 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5554 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5555
5556 /* AMD_CG_SUPPORT_GFX_CGLG */
5557 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5558 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5559 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5560
5561 /* AMD_CG_SUPPORT_GFX_CGLS */
5562 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5563 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5564
5565 /* AMD_CG_SUPPORT_GFX_CGTS */
5566 data = RREG32(mmCGTS_SM_CTRL_REG);
5567 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5568 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5569
5570 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5571 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5572 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5573
5574 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5575 data = RREG32(mmRLC_MEM_SLP_CNTL);
5576 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5577 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5578
5579 /* AMD_CG_SUPPORT_GFX_CP_LS */
5580 data = RREG32(mmCP_MEM_SLP_CNTL);
5581 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5582 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5583 }
5584
gfx_v8_0_send_serdes_cmd(struct amdgpu_device * adev,uint32_t reg_addr,uint32_t cmd)5585 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5586 uint32_t reg_addr, uint32_t cmd)
5587 {
5588 uint32_t data;
5589
5590 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5591
5592 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5593 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5594
5595 data = RREG32(mmRLC_SERDES_WR_CTRL);
5596 if (adev->asic_type == CHIP_STONEY)
5597 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5598 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5599 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5600 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5601 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5602 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5603 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5604 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5605 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5606 else
5607 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5608 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5609 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5610 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5611 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5612 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5613 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5614 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5615 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5616 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5617 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5618 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5619 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5620 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5621 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5622
5623 WREG32(mmRLC_SERDES_WR_CTRL, data);
5624 }
5625
5626 #define MSG_ENTER_RLC_SAFE_MODE 1
5627 #define MSG_EXIT_RLC_SAFE_MODE 0
5628 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5629 #define RLC_GPR_REG2__REQ__SHIFT 0
5630 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5631 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5632
iceland_enter_rlc_safe_mode(struct amdgpu_device * adev)5633 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5634 {
5635 u32 data;
5636 unsigned i;
5637
5638 data = RREG32(mmRLC_CNTL);
5639 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5640 return;
5641
5642 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5643 data |= RLC_SAFE_MODE__CMD_MASK;
5644 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5645 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5646 WREG32(mmRLC_SAFE_MODE, data);
5647
5648 for (i = 0; i < adev->usec_timeout; i++) {
5649 if ((RREG32(mmRLC_GPM_STAT) &
5650 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5651 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5652 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5653 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5654 break;
5655 udelay(1);
5656 }
5657
5658 for (i = 0; i < adev->usec_timeout; i++) {
5659 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5660 break;
5661 udelay(1);
5662 }
5663 adev->gfx.rlc.in_safe_mode = true;
5664 }
5665 }
5666
iceland_exit_rlc_safe_mode(struct amdgpu_device * adev)5667 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5668 {
5669 u32 data = 0;
5670 unsigned i;
5671
5672 data = RREG32(mmRLC_CNTL);
5673 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5674 return;
5675
5676 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5677 if (adev->gfx.rlc.in_safe_mode) {
5678 data |= RLC_SAFE_MODE__CMD_MASK;
5679 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5680 WREG32(mmRLC_SAFE_MODE, data);
5681 adev->gfx.rlc.in_safe_mode = false;
5682 }
5683 }
5684
5685 for (i = 0; i < adev->usec_timeout; i++) {
5686 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5687 break;
5688 udelay(1);
5689 }
5690 }
5691
5692 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5693 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5694 .exit_safe_mode = iceland_exit_rlc_safe_mode
5695 };
5696
gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)5697 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5698 bool enable)
5699 {
5700 uint32_t temp, data;
5701
5702 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5703
5704 /* It is disabled by HW by default */
5705 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5706 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5707 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5708 /* 1 - RLC memory Light sleep */
5709 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5710
5711 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5712 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5713 }
5714
5715 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5716 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5717 if (adev->flags & AMD_IS_APU)
5718 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5719 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5720 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5721 else
5722 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5723 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5724 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5725 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5726
5727 if (temp != data)
5728 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5729
5730 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5731 gfx_v8_0_wait_for_rlc_serdes(adev);
5732
5733 /* 5 - clear mgcg override */
5734 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5735
5736 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5737 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5738 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5739 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5740 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5741 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5742 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5743 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5744 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5745 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5746 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5747 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5748 if (temp != data)
5749 WREG32(mmCGTS_SM_CTRL_REG, data);
5750 }
5751 udelay(50);
5752
5753 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5754 gfx_v8_0_wait_for_rlc_serdes(adev);
5755 } else {
5756 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5757 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5758 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5759 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5760 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5761 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5762 if (temp != data)
5763 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5764
5765 /* 2 - disable MGLS in RLC */
5766 data = RREG32(mmRLC_MEM_SLP_CNTL);
5767 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5768 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5769 WREG32(mmRLC_MEM_SLP_CNTL, data);
5770 }
5771
5772 /* 3 - disable MGLS in CP */
5773 data = RREG32(mmCP_MEM_SLP_CNTL);
5774 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5775 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5776 WREG32(mmCP_MEM_SLP_CNTL, data);
5777 }
5778
5779 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5780 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5781 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5782 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5783 if (temp != data)
5784 WREG32(mmCGTS_SM_CTRL_REG, data);
5785
5786 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5787 gfx_v8_0_wait_for_rlc_serdes(adev);
5788
5789 /* 6 - set mgcg override */
5790 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5791
5792 udelay(50);
5793
5794 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5795 gfx_v8_0_wait_for_rlc_serdes(adev);
5796 }
5797
5798 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5799 }
5800
gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable)5801 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5802 bool enable)
5803 {
5804 uint32_t temp, temp1, data, data1;
5805
5806 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5807
5808 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5809
5810 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5811 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5812 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5813 if (temp1 != data1)
5814 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5815
5816 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5817 gfx_v8_0_wait_for_rlc_serdes(adev);
5818
5819 /* 2 - clear cgcg override */
5820 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5821
5822 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5823 gfx_v8_0_wait_for_rlc_serdes(adev);
5824
5825 /* 3 - write cmd to set CGLS */
5826 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5827
5828 /* 4 - enable cgcg */
5829 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5830
5831 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5832 /* enable cgls*/
5833 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5834
5835 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5836 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5837
5838 if (temp1 != data1)
5839 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5840 } else {
5841 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5842 }
5843
5844 if (temp != data)
5845 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5846
5847 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5848 * Cmp_busy/GFX_Idle interrupts
5849 */
5850 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5851 } else {
5852 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5853 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5854
5855 /* TEST CGCG */
5856 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5857 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5858 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5859 if (temp1 != data1)
5860 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5861
5862 /* read gfx register to wake up cgcg */
5863 RREG32(mmCB_CGTT_SCLK_CTRL);
5864 RREG32(mmCB_CGTT_SCLK_CTRL);
5865 RREG32(mmCB_CGTT_SCLK_CTRL);
5866 RREG32(mmCB_CGTT_SCLK_CTRL);
5867
5868 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5869 gfx_v8_0_wait_for_rlc_serdes(adev);
5870
5871 /* write cmd to Set CGCG Overrride */
5872 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5873
5874 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5875 gfx_v8_0_wait_for_rlc_serdes(adev);
5876
5877 /* write cmd to Clear CGLS */
5878 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5879
5880 /* disable cgcg, cgls should be disabled too. */
5881 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5882 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5883 if (temp != data)
5884 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5885 /* enable interrupts again for PG */
5886 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5887 }
5888
5889 gfx_v8_0_wait_for_rlc_serdes(adev);
5890
5891 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5892 }
gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable)5893 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5894 bool enable)
5895 {
5896 if (enable) {
5897 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5898 * === MGCG + MGLS + TS(CG/LS) ===
5899 */
5900 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5901 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5902 } else {
5903 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5904 * === CGCG + CGLS ===
5905 */
5906 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5907 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5908 }
5909 return 0;
5910 }
5911
gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device * adev,enum amd_clockgating_state state)5912 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5913 enum amd_clockgating_state state)
5914 {
5915 uint32_t msg_id, pp_state = 0;
5916 uint32_t pp_support_state = 0;
5917 void *pp_handle = adev->powerplay.pp_handle;
5918
5919 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5920 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5921 pp_support_state = PP_STATE_SUPPORT_LS;
5922 pp_state = PP_STATE_LS;
5923 }
5924 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5925 pp_support_state |= PP_STATE_SUPPORT_CG;
5926 pp_state |= PP_STATE_CG;
5927 }
5928 if (state == AMD_CG_STATE_UNGATE)
5929 pp_state = 0;
5930
5931 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5932 PP_BLOCK_GFX_CG,
5933 pp_support_state,
5934 pp_state);
5935 amd_set_clockgating_by_smu(pp_handle, msg_id);
5936 }
5937
5938 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5939 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5940 pp_support_state = PP_STATE_SUPPORT_LS;
5941 pp_state = PP_STATE_LS;
5942 }
5943
5944 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5945 pp_support_state |= PP_STATE_SUPPORT_CG;
5946 pp_state |= PP_STATE_CG;
5947 }
5948
5949 if (state == AMD_CG_STATE_UNGATE)
5950 pp_state = 0;
5951
5952 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5953 PP_BLOCK_GFX_MG,
5954 pp_support_state,
5955 pp_state);
5956 amd_set_clockgating_by_smu(pp_handle, msg_id);
5957 }
5958
5959 return 0;
5960 }
5961
gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device * adev,enum amd_clockgating_state state)5962 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5963 enum amd_clockgating_state state)
5964 {
5965
5966 uint32_t msg_id, pp_state = 0;
5967 uint32_t pp_support_state = 0;
5968 void *pp_handle = adev->powerplay.pp_handle;
5969
5970 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5971 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5972 pp_support_state = PP_STATE_SUPPORT_LS;
5973 pp_state = PP_STATE_LS;
5974 }
5975 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5976 pp_support_state |= PP_STATE_SUPPORT_CG;
5977 pp_state |= PP_STATE_CG;
5978 }
5979 if (state == AMD_CG_STATE_UNGATE)
5980 pp_state = 0;
5981
5982 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5983 PP_BLOCK_GFX_CG,
5984 pp_support_state,
5985 pp_state);
5986 amd_set_clockgating_by_smu(pp_handle, msg_id);
5987 }
5988
5989 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5990 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5991 pp_support_state = PP_STATE_SUPPORT_LS;
5992 pp_state = PP_STATE_LS;
5993 }
5994 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5995 pp_support_state |= PP_STATE_SUPPORT_CG;
5996 pp_state |= PP_STATE_CG;
5997 }
5998 if (state == AMD_CG_STATE_UNGATE)
5999 pp_state = 0;
6000
6001 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6002 PP_BLOCK_GFX_3D,
6003 pp_support_state,
6004 pp_state);
6005 amd_set_clockgating_by_smu(pp_handle, msg_id);
6006 }
6007
6008 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6009 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6010 pp_support_state = PP_STATE_SUPPORT_LS;
6011 pp_state = PP_STATE_LS;
6012 }
6013
6014 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6015 pp_support_state |= PP_STATE_SUPPORT_CG;
6016 pp_state |= PP_STATE_CG;
6017 }
6018
6019 if (state == AMD_CG_STATE_UNGATE)
6020 pp_state = 0;
6021
6022 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6023 PP_BLOCK_GFX_MG,
6024 pp_support_state,
6025 pp_state);
6026 amd_set_clockgating_by_smu(pp_handle, msg_id);
6027 }
6028
6029 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6030 pp_support_state = PP_STATE_SUPPORT_LS;
6031
6032 if (state == AMD_CG_STATE_UNGATE)
6033 pp_state = 0;
6034 else
6035 pp_state = PP_STATE_LS;
6036
6037 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6038 PP_BLOCK_GFX_RLC,
6039 pp_support_state,
6040 pp_state);
6041 amd_set_clockgating_by_smu(pp_handle, msg_id);
6042 }
6043
6044 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6045 pp_support_state = PP_STATE_SUPPORT_LS;
6046
6047 if (state == AMD_CG_STATE_UNGATE)
6048 pp_state = 0;
6049 else
6050 pp_state = PP_STATE_LS;
6051 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6052 PP_BLOCK_GFX_CP,
6053 pp_support_state,
6054 pp_state);
6055 amd_set_clockgating_by_smu(pp_handle, msg_id);
6056 }
6057
6058 return 0;
6059 }
6060
gfx_v8_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)6061 static int gfx_v8_0_set_clockgating_state(void *handle,
6062 enum amd_clockgating_state state)
6063 {
6064 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6065
6066 if (amdgpu_sriov_vf(adev))
6067 return 0;
6068
6069 switch (adev->asic_type) {
6070 case CHIP_FIJI:
6071 case CHIP_CARRIZO:
6072 case CHIP_STONEY:
6073 gfx_v8_0_update_gfx_clock_gating(adev,
6074 state == AMD_CG_STATE_GATE);
6075 break;
6076 case CHIP_TONGA:
6077 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6078 break;
6079 case CHIP_POLARIS10:
6080 case CHIP_POLARIS11:
6081 case CHIP_POLARIS12:
6082 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6083 break;
6084 default:
6085 break;
6086 }
6087 return 0;
6088 }
6089
gfx_v8_0_ring_get_rptr(struct amdgpu_ring * ring)6090 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6091 {
6092 return ring->adev->wb.wb[ring->rptr_offs];
6093 }
6094
gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring * ring)6095 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6096 {
6097 struct amdgpu_device *adev = ring->adev;
6098
6099 if (ring->use_doorbell)
6100 /* XXX check if swapping is necessary on BE */
6101 return ring->adev->wb.wb[ring->wptr_offs];
6102 else
6103 return RREG32(mmCP_RB0_WPTR);
6104 }
6105
gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring * ring)6106 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6107 {
6108 struct amdgpu_device *adev = ring->adev;
6109
6110 if (ring->use_doorbell) {
6111 /* XXX check if swapping is necessary on BE */
6112 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6113 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6114 } else {
6115 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6116 (void)RREG32(mmCP_RB0_WPTR);
6117 }
6118 }
6119
gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)6120 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6121 {
6122 u32 ref_and_mask, reg_mem_engine;
6123
6124 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6125 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6126 switch (ring->me) {
6127 case 1:
6128 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6129 break;
6130 case 2:
6131 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6132 break;
6133 default:
6134 return;
6135 }
6136 reg_mem_engine = 0;
6137 } else {
6138 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6139 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6140 }
6141
6142 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6143 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6144 WAIT_REG_MEM_FUNCTION(3) | /* == */
6145 reg_mem_engine));
6146 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6147 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6148 amdgpu_ring_write(ring, ref_and_mask);
6149 amdgpu_ring_write(ring, ref_and_mask);
6150 amdgpu_ring_write(ring, 0x20); /* poll interval */
6151 }
6152
gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring * ring)6153 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6154 {
6155 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6156 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6157 EVENT_INDEX(4));
6158
6159 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6160 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6161 EVENT_INDEX(0));
6162 }
6163
6164
gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring * ring)6165 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6166 {
6167 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6168 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6169 WRITE_DATA_DST_SEL(0) |
6170 WR_CONFIRM));
6171 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6172 amdgpu_ring_write(ring, 0);
6173 amdgpu_ring_write(ring, 1);
6174
6175 }
6176
gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring * ring,struct amdgpu_ib * ib,unsigned vm_id,bool ctx_switch)6177 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6178 struct amdgpu_ib *ib,
6179 unsigned vm_id, bool ctx_switch)
6180 {
6181 u32 header, control = 0;
6182
6183 if (ib->flags & AMDGPU_IB_FLAG_CE)
6184 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6185 else
6186 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6187
6188 control |= ib->length_dw | (vm_id << 24);
6189
6190 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6191 control |= INDIRECT_BUFFER_PRE_ENB(1);
6192
6193 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6194 gfx_v8_0_ring_emit_de_meta(ring);
6195 }
6196
6197 amdgpu_ring_write(ring, header);
6198 amdgpu_ring_write(ring,
6199 #ifdef __BIG_ENDIAN
6200 (2 << 0) |
6201 #endif
6202 (ib->gpu_addr & 0xFFFFFFFC));
6203 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6204 amdgpu_ring_write(ring, control);
6205 }
6206
gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_ib * ib,unsigned vm_id,bool ctx_switch)6207 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6208 struct amdgpu_ib *ib,
6209 unsigned vm_id, bool ctx_switch)
6210 {
6211 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6212
6213 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6214 amdgpu_ring_write(ring,
6215 #ifdef __BIG_ENDIAN
6216 (2 << 0) |
6217 #endif
6218 (ib->gpu_addr & 0xFFFFFFFC));
6219 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6220 amdgpu_ring_write(ring, control);
6221 }
6222
gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)6223 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6224 u64 seq, unsigned flags)
6225 {
6226 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6227 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6228
6229 /* EVENT_WRITE_EOP - flush caches, send int */
6230 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6231 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6232 EOP_TC_ACTION_EN |
6233 EOP_TC_WB_ACTION_EN |
6234 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6235 EVENT_INDEX(5)));
6236 amdgpu_ring_write(ring, addr & 0xfffffffc);
6237 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6238 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6239 amdgpu_ring_write(ring, lower_32_bits(seq));
6240 amdgpu_ring_write(ring, upper_32_bits(seq));
6241
6242 }
6243
gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)6244 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6245 {
6246 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6247 uint32_t seq = ring->fence_drv.sync_seq;
6248 uint64_t addr = ring->fence_drv.gpu_addr;
6249
6250 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6251 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6252 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6253 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6254 amdgpu_ring_write(ring, addr & 0xfffffffc);
6255 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6256 amdgpu_ring_write(ring, seq);
6257 amdgpu_ring_write(ring, 0xffffffff);
6258 amdgpu_ring_write(ring, 4); /* poll interval */
6259 }
6260
gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vm_id,uint64_t pd_addr)6261 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6262 unsigned vm_id, uint64_t pd_addr)
6263 {
6264 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6265
6266 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6267 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6268 WRITE_DATA_DST_SEL(0)) |
6269 WR_CONFIRM);
6270 if (vm_id < 8) {
6271 amdgpu_ring_write(ring,
6272 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6273 } else {
6274 amdgpu_ring_write(ring,
6275 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6276 }
6277 amdgpu_ring_write(ring, 0);
6278 amdgpu_ring_write(ring, pd_addr >> 12);
6279
6280 /* bits 0-15 are the VM contexts0-15 */
6281 /* invalidate the cache */
6282 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6283 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6284 WRITE_DATA_DST_SEL(0)));
6285 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6286 amdgpu_ring_write(ring, 0);
6287 amdgpu_ring_write(ring, 1 << vm_id);
6288
6289 /* wait for the invalidate to complete */
6290 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6291 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6292 WAIT_REG_MEM_FUNCTION(0) | /* always */
6293 WAIT_REG_MEM_ENGINE(0))); /* me */
6294 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6295 amdgpu_ring_write(ring, 0);
6296 amdgpu_ring_write(ring, 0); /* ref */
6297 amdgpu_ring_write(ring, 0); /* mask */
6298 amdgpu_ring_write(ring, 0x20); /* poll interval */
6299
6300 /* compute doesn't have PFP */
6301 if (usepfp) {
6302 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6303 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6304 amdgpu_ring_write(ring, 0x0);
6305 }
6306 }
6307
gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring * ring)6308 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6309 {
6310 return ring->adev->wb.wb[ring->wptr_offs];
6311 }
6312
gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring * ring)6313 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6314 {
6315 struct amdgpu_device *adev = ring->adev;
6316
6317 /* XXX check if swapping is necessary on BE */
6318 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6319 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6320 }
6321
gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)6322 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6323 u64 addr, u64 seq,
6324 unsigned flags)
6325 {
6326 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6327 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6328
6329 /* RELEASE_MEM - flush caches, send int */
6330 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6331 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6332 EOP_TC_ACTION_EN |
6333 EOP_TC_WB_ACTION_EN |
6334 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6335 EVENT_INDEX(5)));
6336 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6337 amdgpu_ring_write(ring, addr & 0xfffffffc);
6338 amdgpu_ring_write(ring, upper_32_bits(addr));
6339 amdgpu_ring_write(ring, lower_32_bits(seq));
6340 amdgpu_ring_write(ring, upper_32_bits(seq));
6341 }
6342
gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)6343 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6344 u64 seq, unsigned int flags)
6345 {
6346 /* we only allocate 32bit for each seq wb address */
6347 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6348
6349 /* write fence seq to the "addr" */
6350 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6351 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6352 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6353 amdgpu_ring_write(ring, lower_32_bits(addr));
6354 amdgpu_ring_write(ring, upper_32_bits(addr));
6355 amdgpu_ring_write(ring, lower_32_bits(seq));
6356
6357 if (flags & AMDGPU_FENCE_FLAG_INT) {
6358 /* set register to trigger INT */
6359 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6360 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6361 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6362 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6363 amdgpu_ring_write(ring, 0);
6364 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6365 }
6366 }
6367
gfx_v8_ring_emit_sb(struct amdgpu_ring * ring)6368 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6369 {
6370 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6371 amdgpu_ring_write(ring, 0);
6372 }
6373
gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring * ring,uint32_t flags)6374 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6375 {
6376 uint32_t dw2 = 0;
6377
6378 if (amdgpu_sriov_vf(ring->adev))
6379 gfx_v8_0_ring_emit_ce_meta(ring);
6380
6381 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6382 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6383 gfx_v8_0_ring_emit_vgt_flush(ring);
6384 /* set load_global_config & load_global_uconfig */
6385 dw2 |= 0x8001;
6386 /* set load_cs_sh_regs */
6387 dw2 |= 0x01000000;
6388 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6389 dw2 |= 0x10002;
6390
6391 /* set load_ce_ram if preamble presented */
6392 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6393 dw2 |= 0x10000000;
6394 } else {
6395 /* still load_ce_ram if this is the first time preamble presented
6396 * although there is no context switch happens.
6397 */
6398 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6399 dw2 |= 0x10000000;
6400 }
6401
6402 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6403 amdgpu_ring_write(ring, dw2);
6404 amdgpu_ring_write(ring, 0);
6405 }
6406
gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring * ring)6407 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6408 {
6409 unsigned ret;
6410
6411 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6412 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6413 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6414 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6415 ret = ring->wptr & ring->buf_mask;
6416 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6417 return ret;
6418 }
6419
gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring * ring,unsigned offset)6420 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6421 {
6422 unsigned cur;
6423
6424 BUG_ON(offset > ring->buf_mask);
6425 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6426
6427 cur = (ring->wptr & ring->buf_mask) - 1;
6428 if (likely(cur > offset))
6429 ring->ring[offset] = cur - offset;
6430 else
6431 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6432 }
6433
gfx_v8_0_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg)6434 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6435 {
6436 struct amdgpu_device *adev = ring->adev;
6437
6438 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6439 amdgpu_ring_write(ring, 0 | /* src: register*/
6440 (5 << 8) | /* dst: memory */
6441 (1 << 20)); /* write confirm */
6442 amdgpu_ring_write(ring, reg);
6443 amdgpu_ring_write(ring, 0);
6444 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6445 adev->virt.reg_val_offs * 4));
6446 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6447 adev->virt.reg_val_offs * 4));
6448 }
6449
gfx_v8_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)6450 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6451 uint32_t val)
6452 {
6453 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6454 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6455 amdgpu_ring_write(ring, reg);
6456 amdgpu_ring_write(ring, 0);
6457 amdgpu_ring_write(ring, val);
6458 }
6459
gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device * adev,enum amdgpu_interrupt_state state)6460 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6461 enum amdgpu_interrupt_state state)
6462 {
6463 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6464 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6465 }
6466
gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state)6467 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6468 int me, int pipe,
6469 enum amdgpu_interrupt_state state)
6470 {
6471 u32 mec_int_cntl, mec_int_cntl_reg;
6472
6473 /*
6474 * amdgpu controls only the first MEC. That's why this function only
6475 * handles the setting of interrupts for this specific MEC. All other
6476 * pipes' interrupts are set by amdkfd.
6477 */
6478
6479 if (me == 1) {
6480 switch (pipe) {
6481 case 0:
6482 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6483 break;
6484 case 1:
6485 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6486 break;
6487 case 2:
6488 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6489 break;
6490 case 3:
6491 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6492 break;
6493 default:
6494 DRM_DEBUG("invalid pipe %d\n", pipe);
6495 return;
6496 }
6497 } else {
6498 DRM_DEBUG("invalid me %d\n", me);
6499 return;
6500 }
6501
6502 switch (state) {
6503 case AMDGPU_IRQ_STATE_DISABLE:
6504 mec_int_cntl = RREG32(mec_int_cntl_reg);
6505 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6506 WREG32(mec_int_cntl_reg, mec_int_cntl);
6507 break;
6508 case AMDGPU_IRQ_STATE_ENABLE:
6509 mec_int_cntl = RREG32(mec_int_cntl_reg);
6510 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6511 WREG32(mec_int_cntl_reg, mec_int_cntl);
6512 break;
6513 default:
6514 break;
6515 }
6516 }
6517
gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6518 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6519 struct amdgpu_irq_src *source,
6520 unsigned type,
6521 enum amdgpu_interrupt_state state)
6522 {
6523 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6524 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6525
6526 return 0;
6527 }
6528
gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6529 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6530 struct amdgpu_irq_src *source,
6531 unsigned type,
6532 enum amdgpu_interrupt_state state)
6533 {
6534 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6535 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6536
6537 return 0;
6538 }
6539
gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)6540 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6541 struct amdgpu_irq_src *src,
6542 unsigned type,
6543 enum amdgpu_interrupt_state state)
6544 {
6545 switch (type) {
6546 case AMDGPU_CP_IRQ_GFX_EOP:
6547 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6548 break;
6549 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6550 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6551 break;
6552 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6553 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6554 break;
6555 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6556 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6557 break;
6558 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6559 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6560 break;
6561 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6562 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6563 break;
6564 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6565 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6566 break;
6567 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6568 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6569 break;
6570 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6571 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6572 break;
6573 default:
6574 break;
6575 }
6576 return 0;
6577 }
6578
gfx_v8_0_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6579 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6580 struct amdgpu_irq_src *source,
6581 struct amdgpu_iv_entry *entry)
6582 {
6583 int i;
6584 u8 me_id, pipe_id, queue_id;
6585 struct amdgpu_ring *ring;
6586
6587 DRM_DEBUG("IH: CP EOP\n");
6588 me_id = (entry->ring_id & 0x0c) >> 2;
6589 pipe_id = (entry->ring_id & 0x03) >> 0;
6590 queue_id = (entry->ring_id & 0x70) >> 4;
6591
6592 switch (me_id) {
6593 case 0:
6594 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6595 break;
6596 case 1:
6597 case 2:
6598 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6599 ring = &adev->gfx.compute_ring[i];
6600 /* Per-queue interrupt is supported for MEC starting from VI.
6601 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6602 */
6603 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6604 amdgpu_fence_process(ring);
6605 }
6606 break;
6607 }
6608 return 0;
6609 }
6610
gfx_v8_0_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6611 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6612 struct amdgpu_irq_src *source,
6613 struct amdgpu_iv_entry *entry)
6614 {
6615 DRM_ERROR("Illegal register access in command stream\n");
6616 schedule_work(&adev->reset_work);
6617 return 0;
6618 }
6619
gfx_v8_0_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6620 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6621 struct amdgpu_irq_src *source,
6622 struct amdgpu_iv_entry *entry)
6623 {
6624 DRM_ERROR("Illegal instruction in command stream\n");
6625 schedule_work(&adev->reset_work);
6626 return 0;
6627 }
6628
gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned int type,enum amdgpu_interrupt_state state)6629 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6630 struct amdgpu_irq_src *src,
6631 unsigned int type,
6632 enum amdgpu_interrupt_state state)
6633 {
6634 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6635
6636 switch (type) {
6637 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6638 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6639 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6640 if (ring->me == 1)
6641 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6642 ring->pipe,
6643 GENERIC2_INT_ENABLE,
6644 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6645 else
6646 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6647 ring->pipe,
6648 GENERIC2_INT_ENABLE,
6649 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6650 break;
6651 default:
6652 BUG(); /* kiq only support GENERIC2_INT now */
6653 break;
6654 }
6655 return 0;
6656 }
6657
gfx_v8_0_kiq_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6658 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6659 struct amdgpu_irq_src *source,
6660 struct amdgpu_iv_entry *entry)
6661 {
6662 u8 me_id, pipe_id, queue_id;
6663 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6664
6665 me_id = (entry->ring_id & 0x0c) >> 2;
6666 pipe_id = (entry->ring_id & 0x03) >> 0;
6667 queue_id = (entry->ring_id & 0x70) >> 4;
6668 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6669 me_id, pipe_id, queue_id);
6670
6671 amdgpu_fence_process(ring);
6672 return 0;
6673 }
6674
6675 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6676 .name = "gfx_v8_0",
6677 .early_init = gfx_v8_0_early_init,
6678 .late_init = gfx_v8_0_late_init,
6679 .sw_init = gfx_v8_0_sw_init,
6680 .sw_fini = gfx_v8_0_sw_fini,
6681 .hw_init = gfx_v8_0_hw_init,
6682 .hw_fini = gfx_v8_0_hw_fini,
6683 .suspend = gfx_v8_0_suspend,
6684 .resume = gfx_v8_0_resume,
6685 .is_idle = gfx_v8_0_is_idle,
6686 .wait_for_idle = gfx_v8_0_wait_for_idle,
6687 .check_soft_reset = gfx_v8_0_check_soft_reset,
6688 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6689 .soft_reset = gfx_v8_0_soft_reset,
6690 .post_soft_reset = gfx_v8_0_post_soft_reset,
6691 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6692 .set_powergating_state = gfx_v8_0_set_powergating_state,
6693 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6694 };
6695
6696 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6697 .type = AMDGPU_RING_TYPE_GFX,
6698 .align_mask = 0xff,
6699 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6700 .support_64bit_ptrs = false,
6701 .get_rptr = gfx_v8_0_ring_get_rptr,
6702 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6703 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6704 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6705 5 + /* COND_EXEC */
6706 7 + /* PIPELINE_SYNC */
6707 19 + /* VM_FLUSH */
6708 8 + /* FENCE for VM_FLUSH */
6709 20 + /* GDS switch */
6710 4 + /* double SWITCH_BUFFER,
6711 the first COND_EXEC jump to the place just
6712 prior to this double SWITCH_BUFFER */
6713 5 + /* COND_EXEC */
6714 7 + /* HDP_flush */
6715 4 + /* VGT_flush */
6716 14 + /* CE_META */
6717 31 + /* DE_META */
6718 3 + /* CNTX_CTRL */
6719 5 + /* HDP_INVL */
6720 8 + 8 + /* FENCE x2 */
6721 2, /* SWITCH_BUFFER */
6722 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6723 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6724 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6725 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6726 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6727 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6728 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6729 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6730 .test_ring = gfx_v8_0_ring_test_ring,
6731 .test_ib = gfx_v8_0_ring_test_ib,
6732 .insert_nop = amdgpu_ring_insert_nop,
6733 .pad_ib = amdgpu_ring_generic_pad_ib,
6734 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6735 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6736 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6737 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6738 };
6739
6740 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6741 .type = AMDGPU_RING_TYPE_COMPUTE,
6742 .align_mask = 0xff,
6743 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6744 .support_64bit_ptrs = false,
6745 .get_rptr = gfx_v8_0_ring_get_rptr,
6746 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6747 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6748 .emit_frame_size =
6749 20 + /* gfx_v8_0_ring_emit_gds_switch */
6750 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6751 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6752 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6753 17 + /* gfx_v8_0_ring_emit_vm_flush */
6754 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6755 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6756 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6757 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6758 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6759 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6760 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6761 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6762 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6763 .test_ring = gfx_v8_0_ring_test_ring,
6764 .test_ib = gfx_v8_0_ring_test_ib,
6765 .insert_nop = amdgpu_ring_insert_nop,
6766 .pad_ib = amdgpu_ring_generic_pad_ib,
6767 };
6768
6769 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6770 .type = AMDGPU_RING_TYPE_KIQ,
6771 .align_mask = 0xff,
6772 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6773 .support_64bit_ptrs = false,
6774 .get_rptr = gfx_v8_0_ring_get_rptr,
6775 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6776 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6777 .emit_frame_size =
6778 20 + /* gfx_v8_0_ring_emit_gds_switch */
6779 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6780 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6781 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6782 17 + /* gfx_v8_0_ring_emit_vm_flush */
6783 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6784 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6785 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6786 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6787 .test_ring = gfx_v8_0_ring_test_ring,
6788 .test_ib = gfx_v8_0_ring_test_ib,
6789 .insert_nop = amdgpu_ring_insert_nop,
6790 .pad_ib = amdgpu_ring_generic_pad_ib,
6791 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6792 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6793 };
6794
gfx_v8_0_set_ring_funcs(struct amdgpu_device * adev)6795 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6796 {
6797 int i;
6798
6799 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6800
6801 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6802 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6803
6804 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6805 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6806 }
6807
6808 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6809 .set = gfx_v8_0_set_eop_interrupt_state,
6810 .process = gfx_v8_0_eop_irq,
6811 };
6812
6813 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6814 .set = gfx_v8_0_set_priv_reg_fault_state,
6815 .process = gfx_v8_0_priv_reg_irq,
6816 };
6817
6818 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6819 .set = gfx_v8_0_set_priv_inst_fault_state,
6820 .process = gfx_v8_0_priv_inst_irq,
6821 };
6822
6823 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6824 .set = gfx_v8_0_kiq_set_interrupt_state,
6825 .process = gfx_v8_0_kiq_irq,
6826 };
6827
gfx_v8_0_set_irq_funcs(struct amdgpu_device * adev)6828 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6829 {
6830 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6831 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6832
6833 adev->gfx.priv_reg_irq.num_types = 1;
6834 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6835
6836 adev->gfx.priv_inst_irq.num_types = 1;
6837 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6838
6839 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6840 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6841 }
6842
gfx_v8_0_set_rlc_funcs(struct amdgpu_device * adev)6843 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6844 {
6845 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6846 }
6847
gfx_v8_0_set_gds_init(struct amdgpu_device * adev)6848 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6849 {
6850 /* init asci gds info */
6851 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6852 adev->gds.gws.total_size = 64;
6853 adev->gds.oa.total_size = 16;
6854
6855 if (adev->gds.mem.total_size == 64 * 1024) {
6856 adev->gds.mem.gfx_partition_size = 4096;
6857 adev->gds.mem.cs_partition_size = 4096;
6858
6859 adev->gds.gws.gfx_partition_size = 4;
6860 adev->gds.gws.cs_partition_size = 4;
6861
6862 adev->gds.oa.gfx_partition_size = 4;
6863 adev->gds.oa.cs_partition_size = 1;
6864 } else {
6865 adev->gds.mem.gfx_partition_size = 1024;
6866 adev->gds.mem.cs_partition_size = 1024;
6867
6868 adev->gds.gws.gfx_partition_size = 16;
6869 adev->gds.gws.cs_partition_size = 16;
6870
6871 adev->gds.oa.gfx_partition_size = 4;
6872 adev->gds.oa.cs_partition_size = 4;
6873 }
6874 }
6875
gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device * adev,u32 bitmap)6876 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6877 u32 bitmap)
6878 {
6879 u32 data;
6880
6881 if (!bitmap)
6882 return;
6883
6884 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6885 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6886
6887 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6888 }
6889
gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device * adev)6890 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6891 {
6892 u32 data, mask;
6893
6894 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6895 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6896
6897 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6898
6899 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6900 }
6901
gfx_v8_0_get_cu_info(struct amdgpu_device * adev)6902 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6903 {
6904 int i, j, k, counter, active_cu_number = 0;
6905 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6906 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6907 unsigned disable_masks[4 * 2];
6908 u32 ao_cu_num;
6909
6910 memset(cu_info, 0, sizeof(*cu_info));
6911
6912 if (adev->flags & AMD_IS_APU)
6913 ao_cu_num = 2;
6914 else
6915 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6916
6917 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6918
6919 mutex_lock(&adev->grbm_idx_mutex);
6920 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6921 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6922 mask = 1;
6923 ao_bitmap = 0;
6924 counter = 0;
6925 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6926 if (i < 4 && j < 2)
6927 gfx_v8_0_set_user_cu_inactive_bitmap(
6928 adev, disable_masks[i * 2 + j]);
6929 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6930 cu_info->bitmap[i][j] = bitmap;
6931
6932 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6933 if (bitmap & mask) {
6934 if (counter < ao_cu_num)
6935 ao_bitmap |= mask;
6936 counter ++;
6937 }
6938 mask <<= 1;
6939 }
6940 active_cu_number += counter;
6941 if (i < 2 && j < 2)
6942 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6943 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
6944 }
6945 }
6946 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6947 mutex_unlock(&adev->grbm_idx_mutex);
6948
6949 cu_info->number = active_cu_number;
6950 cu_info->ao_cu_mask = ao_cu_mask;
6951 }
6952
6953 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6954 {
6955 .type = AMD_IP_BLOCK_TYPE_GFX,
6956 .major = 8,
6957 .minor = 0,
6958 .rev = 0,
6959 .funcs = &gfx_v8_0_ip_funcs,
6960 };
6961
6962 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6963 {
6964 .type = AMD_IP_BLOCK_TYPE_GFX,
6965 .major = 8,
6966 .minor = 1,
6967 .rev = 0,
6968 .funcs = &gfx_v8_0_ip_funcs,
6969 };
6970
gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring * ring)6971 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
6972 {
6973 uint64_t ce_payload_addr;
6974 int cnt_ce;
6975 static union {
6976 struct vi_ce_ib_state regular;
6977 struct vi_ce_ib_state_chained_ib chained;
6978 } ce_payload = {};
6979
6980 if (ring->adev->virt.chained_ib_support) {
6981 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6982 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
6983 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
6984 } else {
6985 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6986 offsetof(struct vi_gfx_meta_data, ce_payload);
6987 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
6988 }
6989
6990 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
6991 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
6992 WRITE_DATA_DST_SEL(8) |
6993 WR_CONFIRM) |
6994 WRITE_DATA_CACHE_POLICY(0));
6995 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
6996 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
6997 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
6998 }
6999
gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring * ring)7000 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7001 {
7002 uint64_t de_payload_addr, gds_addr, csa_addr;
7003 int cnt_de;
7004 static union {
7005 struct vi_de_ib_state regular;
7006 struct vi_de_ib_state_chained_ib chained;
7007 } de_payload = {};
7008
7009 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7010 gds_addr = csa_addr + 4096;
7011 if (ring->adev->virt.chained_ib_support) {
7012 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7013 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7014 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7015 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7016 } else {
7017 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7018 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7019 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7020 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7021 }
7022
7023 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7024 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7025 WRITE_DATA_DST_SEL(8) |
7026 WR_CONFIRM) |
7027 WRITE_DATA_CACHE_POLICY(0));
7028 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7029 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7030 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7031 }
7032