• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/firmware.h>
25 #include <linux/pci.h>
26 
27 #include <drm/drm_cache.h>
28 
29 #include "amdgpu.h"
30 #include "gmc_v9_0.h"
31 #include "amdgpu_atomfirmware.h"
32 #include "amdgpu_gem.h"
33 
34 #include "hdp/hdp_4_0_offset.h"
35 #include "hdp/hdp_4_0_sh_mask.h"
36 #include "gc/gc_9_0_sh_mask.h"
37 #include "dce/dce_12_0_offset.h"
38 #include "dce/dce_12_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "athub/athub_1_0_sh_mask.h"
42 #include "athub/athub_1_0_offset.h"
43 #include "oss/osssys_4_0_offset.h"
44 
45 #include "soc15.h"
46 #include "soc15d.h"
47 #include "soc15_common.h"
48 #include "umc/umc_6_0_sh_mask.h"
49 
50 #include "gfxhub_v1_0.h"
51 #include "mmhub_v1_0.h"
52 #include "athub_v1_0.h"
53 #include "gfxhub_v1_1.h"
54 #include "mmhub_v9_4.h"
55 #include "umc_v6_1.h"
56 #include "umc_v6_0.h"
57 
58 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
59 
60 #include "amdgpu_ras.h"
61 #include "amdgpu_xgmi.h"
62 
63 /* add these here since we already include dce12 headers and these are for DCN */
64 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
65 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX                                                 2
66 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT                                        0x0
67 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT                                       0x10
68 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK                                          0x00003FFFL
69 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK                                         0x3FFF0000L
70 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0                                                                  0x049d
71 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX                                                         2
72 
73 
74 static const char *gfxhub_client_ids[] = {
75 	"CB",
76 	"DB",
77 	"IA",
78 	"WD",
79 	"CPF",
80 	"CPC",
81 	"CPG",
82 	"RLC",
83 	"TCP",
84 	"SQC (inst)",
85 	"SQC (data)",
86 	"SQG",
87 	"PA",
88 };
89 
90 static const char *mmhub_client_ids_raven[][2] = {
91 	[0][0] = "MP1",
92 	[1][0] = "MP0",
93 	[2][0] = "VCN",
94 	[3][0] = "VCNU",
95 	[4][0] = "HDP",
96 	[5][0] = "DCE",
97 	[13][0] = "UTCL2",
98 	[19][0] = "TLS",
99 	[26][0] = "OSS",
100 	[27][0] = "SDMA0",
101 	[0][1] = "MP1",
102 	[1][1] = "MP0",
103 	[2][1] = "VCN",
104 	[3][1] = "VCNU",
105 	[4][1] = "HDP",
106 	[5][1] = "XDP",
107 	[6][1] = "DBGU0",
108 	[7][1] = "DCE",
109 	[8][1] = "DCEDWB0",
110 	[9][1] = "DCEDWB1",
111 	[26][1] = "OSS",
112 	[27][1] = "SDMA0",
113 };
114 
115 static const char *mmhub_client_ids_renoir[][2] = {
116 	[0][0] = "MP1",
117 	[1][0] = "MP0",
118 	[2][0] = "HDP",
119 	[4][0] = "DCEDMC",
120 	[5][0] = "DCEVGA",
121 	[13][0] = "UTCL2",
122 	[19][0] = "TLS",
123 	[26][0] = "OSS",
124 	[27][0] = "SDMA0",
125 	[28][0] = "VCN",
126 	[29][0] = "VCNU",
127 	[30][0] = "JPEG",
128 	[0][1] = "MP1",
129 	[1][1] = "MP0",
130 	[2][1] = "HDP",
131 	[3][1] = "XDP",
132 	[6][1] = "DBGU0",
133 	[7][1] = "DCEDMC",
134 	[8][1] = "DCEVGA",
135 	[9][1] = "DCEDWB",
136 	[26][1] = "OSS",
137 	[27][1] = "SDMA0",
138 	[28][1] = "VCN",
139 	[29][1] = "VCNU",
140 	[30][1] = "JPEG",
141 };
142 
143 static const char *mmhub_client_ids_vega10[][2] = {
144 	[0][0] = "MP0",
145 	[1][0] = "UVD",
146 	[2][0] = "UVDU",
147 	[3][0] = "HDP",
148 	[13][0] = "UTCL2",
149 	[14][0] = "OSS",
150 	[15][0] = "SDMA1",
151 	[32+0][0] = "VCE0",
152 	[32+1][0] = "VCE0U",
153 	[32+2][0] = "XDMA",
154 	[32+3][0] = "DCE",
155 	[32+4][0] = "MP1",
156 	[32+14][0] = "SDMA0",
157 	[0][1] = "MP0",
158 	[1][1] = "UVD",
159 	[2][1] = "UVDU",
160 	[3][1] = "DBGU0",
161 	[4][1] = "HDP",
162 	[5][1] = "XDP",
163 	[14][1] = "OSS",
164 	[15][1] = "SDMA0",
165 	[32+0][1] = "VCE0",
166 	[32+1][1] = "VCE0U",
167 	[32+2][1] = "XDMA",
168 	[32+3][1] = "DCE",
169 	[32+4][1] = "DCEDWB",
170 	[32+5][1] = "MP1",
171 	[32+6][1] = "DBGU1",
172 	[32+14][1] = "SDMA1",
173 };
174 
175 static const char *mmhub_client_ids_vega12[][2] = {
176 	[0][0] = "MP0",
177 	[1][0] = "VCE0",
178 	[2][0] = "VCE0U",
179 	[3][0] = "HDP",
180 	[13][0] = "UTCL2",
181 	[14][0] = "OSS",
182 	[15][0] = "SDMA1",
183 	[32+0][0] = "DCE",
184 	[32+1][0] = "XDMA",
185 	[32+2][0] = "UVD",
186 	[32+3][0] = "UVDU",
187 	[32+4][0] = "MP1",
188 	[32+15][0] = "SDMA0",
189 	[0][1] = "MP0",
190 	[1][1] = "VCE0",
191 	[2][1] = "VCE0U",
192 	[3][1] = "DBGU0",
193 	[4][1] = "HDP",
194 	[5][1] = "XDP",
195 	[14][1] = "OSS",
196 	[15][1] = "SDMA0",
197 	[32+0][1] = "DCE",
198 	[32+1][1] = "DCEDWB",
199 	[32+2][1] = "XDMA",
200 	[32+3][1] = "UVD",
201 	[32+4][1] = "UVDU",
202 	[32+5][1] = "MP1",
203 	[32+6][1] = "DBGU1",
204 	[32+15][1] = "SDMA1",
205 };
206 
207 static const char *mmhub_client_ids_vega20[][2] = {
208 	[0][0] = "XDMA",
209 	[1][0] = "DCE",
210 	[2][0] = "VCE0",
211 	[3][0] = "VCE0U",
212 	[4][0] = "UVD",
213 	[5][0] = "UVD1U",
214 	[13][0] = "OSS",
215 	[14][0] = "HDP",
216 	[15][0] = "SDMA0",
217 	[32+0][0] = "UVD",
218 	[32+1][0] = "UVDU",
219 	[32+2][0] = "MP1",
220 	[32+3][0] = "MP0",
221 	[32+12][0] = "UTCL2",
222 	[32+14][0] = "SDMA1",
223 	[0][1] = "XDMA",
224 	[1][1] = "DCE",
225 	[2][1] = "DCEDWB",
226 	[3][1] = "VCE0",
227 	[4][1] = "VCE0U",
228 	[5][1] = "UVD1",
229 	[6][1] = "UVD1U",
230 	[7][1] = "DBGU0",
231 	[8][1] = "XDP",
232 	[13][1] = "OSS",
233 	[14][1] = "HDP",
234 	[15][1] = "SDMA0",
235 	[32+0][1] = "UVD",
236 	[32+1][1] = "UVDU",
237 	[32+2][1] = "DBGU1",
238 	[32+3][1] = "MP1",
239 	[32+4][1] = "MP0",
240 	[32+14][1] = "SDMA1",
241 };
242 
243 static const char *mmhub_client_ids_arcturus[][2] = {
244 	[2][0] = "MP1",
245 	[3][0] = "MP0",
246 	[10][0] = "UTCL2",
247 	[13][0] = "OSS",
248 	[14][0] = "HDP",
249 	[15][0] = "SDMA0",
250 	[32+15][0] = "SDMA1",
251 	[64+15][0] = "SDMA2",
252 	[96+15][0] = "SDMA3",
253 	[128+15][0] = "SDMA4",
254 	[160+11][0] = "JPEG",
255 	[160+12][0] = "VCN",
256 	[160+13][0] = "VCNU",
257 	[160+15][0] = "SDMA5",
258 	[192+10][0] = "UTCL2",
259 	[192+11][0] = "JPEG1",
260 	[192+12][0] = "VCN1",
261 	[192+13][0] = "VCN1U",
262 	[192+15][0] = "SDMA6",
263 	[224+15][0] = "SDMA7",
264 	[0][1] = "DBGU1",
265 	[1][1] = "XDP",
266 	[2][1] = "MP1",
267 	[3][1] = "MP0",
268 	[13][1] = "OSS",
269 	[14][1] = "HDP",
270 	[15][1] = "SDMA0",
271 	[32+15][1] = "SDMA1",
272 	[64+15][1] = "SDMA2",
273 	[96+15][1] = "SDMA3",
274 	[128+15][1] = "SDMA4",
275 	[160+11][1] = "JPEG",
276 	[160+12][1] = "VCN",
277 	[160+13][1] = "VCNU",
278 	[160+15][1] = "SDMA5",
279 	[192+11][1] = "JPEG1",
280 	[192+12][1] = "VCN1",
281 	[192+13][1] = "VCN1U",
282 	[192+15][1] = "SDMA6",
283 	[224+15][1] = "SDMA7",
284 };
285 
286 static const u32 golden_settings_vega10_hdp[] =
287 {
288 	0xf64, 0x0fffffff, 0x00000000,
289 	0xf65, 0x0fffffff, 0x00000000,
290 	0xf66, 0x0fffffff, 0x00000000,
291 	0xf67, 0x0fffffff, 0x00000000,
292 	0xf68, 0x0fffffff, 0x00000000,
293 	0xf6a, 0x0fffffff, 0x00000000,
294 	0xf6b, 0x0fffffff, 0x00000000,
295 	0xf6c, 0x0fffffff, 0x00000000,
296 	0xf6d, 0x0fffffff, 0x00000000,
297 	0xf6e, 0x0fffffff, 0x00000000,
298 };
299 
300 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
301 {
302 	SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
303 	SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
304 };
305 
306 static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
307 {
308 	SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
309 	SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
310 };
311 
312 static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
313 	(0x000143c0 + 0x00000000),
314 	(0x000143c0 + 0x00000800),
315 	(0x000143c0 + 0x00001000),
316 	(0x000143c0 + 0x00001800),
317 	(0x000543c0 + 0x00000000),
318 	(0x000543c0 + 0x00000800),
319 	(0x000543c0 + 0x00001000),
320 	(0x000543c0 + 0x00001800),
321 	(0x000943c0 + 0x00000000),
322 	(0x000943c0 + 0x00000800),
323 	(0x000943c0 + 0x00001000),
324 	(0x000943c0 + 0x00001800),
325 	(0x000d43c0 + 0x00000000),
326 	(0x000d43c0 + 0x00000800),
327 	(0x000d43c0 + 0x00001000),
328 	(0x000d43c0 + 0x00001800),
329 	(0x001143c0 + 0x00000000),
330 	(0x001143c0 + 0x00000800),
331 	(0x001143c0 + 0x00001000),
332 	(0x001143c0 + 0x00001800),
333 	(0x001543c0 + 0x00000000),
334 	(0x001543c0 + 0x00000800),
335 	(0x001543c0 + 0x00001000),
336 	(0x001543c0 + 0x00001800),
337 	(0x001943c0 + 0x00000000),
338 	(0x001943c0 + 0x00000800),
339 	(0x001943c0 + 0x00001000),
340 	(0x001943c0 + 0x00001800),
341 	(0x001d43c0 + 0x00000000),
342 	(0x001d43c0 + 0x00000800),
343 	(0x001d43c0 + 0x00001000),
344 	(0x001d43c0 + 0x00001800),
345 };
346 
347 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
348 	(0x000143e0 + 0x00000000),
349 	(0x000143e0 + 0x00000800),
350 	(0x000143e0 + 0x00001000),
351 	(0x000143e0 + 0x00001800),
352 	(0x000543e0 + 0x00000000),
353 	(0x000543e0 + 0x00000800),
354 	(0x000543e0 + 0x00001000),
355 	(0x000543e0 + 0x00001800),
356 	(0x000943e0 + 0x00000000),
357 	(0x000943e0 + 0x00000800),
358 	(0x000943e0 + 0x00001000),
359 	(0x000943e0 + 0x00001800),
360 	(0x000d43e0 + 0x00000000),
361 	(0x000d43e0 + 0x00000800),
362 	(0x000d43e0 + 0x00001000),
363 	(0x000d43e0 + 0x00001800),
364 	(0x001143e0 + 0x00000000),
365 	(0x001143e0 + 0x00000800),
366 	(0x001143e0 + 0x00001000),
367 	(0x001143e0 + 0x00001800),
368 	(0x001543e0 + 0x00000000),
369 	(0x001543e0 + 0x00000800),
370 	(0x001543e0 + 0x00001000),
371 	(0x001543e0 + 0x00001800),
372 	(0x001943e0 + 0x00000000),
373 	(0x001943e0 + 0x00000800),
374 	(0x001943e0 + 0x00001000),
375 	(0x001943e0 + 0x00001800),
376 	(0x001d43e0 + 0x00000000),
377 	(0x001d43e0 + 0x00000800),
378 	(0x001d43e0 + 0x00001000),
379 	(0x001d43e0 + 0x00001800),
380 };
381 
382 static const uint32_t ecc_umc_mcumc_status_addrs[] = {
383 	(0x000143c2 + 0x00000000),
384 	(0x000143c2 + 0x00000800),
385 	(0x000143c2 + 0x00001000),
386 	(0x000143c2 + 0x00001800),
387 	(0x000543c2 + 0x00000000),
388 	(0x000543c2 + 0x00000800),
389 	(0x000543c2 + 0x00001000),
390 	(0x000543c2 + 0x00001800),
391 	(0x000943c2 + 0x00000000),
392 	(0x000943c2 + 0x00000800),
393 	(0x000943c2 + 0x00001000),
394 	(0x000943c2 + 0x00001800),
395 	(0x000d43c2 + 0x00000000),
396 	(0x000d43c2 + 0x00000800),
397 	(0x000d43c2 + 0x00001000),
398 	(0x000d43c2 + 0x00001800),
399 	(0x001143c2 + 0x00000000),
400 	(0x001143c2 + 0x00000800),
401 	(0x001143c2 + 0x00001000),
402 	(0x001143c2 + 0x00001800),
403 	(0x001543c2 + 0x00000000),
404 	(0x001543c2 + 0x00000800),
405 	(0x001543c2 + 0x00001000),
406 	(0x001543c2 + 0x00001800),
407 	(0x001943c2 + 0x00000000),
408 	(0x001943c2 + 0x00000800),
409 	(0x001943c2 + 0x00001000),
410 	(0x001943c2 + 0x00001800),
411 	(0x001d43c2 + 0x00000000),
412 	(0x001d43c2 + 0x00000800),
413 	(0x001d43c2 + 0x00001000),
414 	(0x001d43c2 + 0x00001800),
415 };
416 
gmc_v9_0_ecc_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)417 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
418 		struct amdgpu_irq_src *src,
419 		unsigned type,
420 		enum amdgpu_interrupt_state state)
421 {
422 	u32 bits, i, tmp, reg;
423 
424 	/* Devices newer then VEGA10/12 shall have these programming
425 	     sequences performed by PSP BL */
426 	if (adev->asic_type >= CHIP_VEGA20)
427 		return 0;
428 
429 	bits = 0x7f;
430 
431 	switch (state) {
432 	case AMDGPU_IRQ_STATE_DISABLE:
433 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
434 			reg = ecc_umc_mcumc_ctrl_addrs[i];
435 			tmp = RREG32(reg);
436 			tmp &= ~bits;
437 			WREG32(reg, tmp);
438 		}
439 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
440 			reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
441 			tmp = RREG32(reg);
442 			tmp &= ~bits;
443 			WREG32(reg, tmp);
444 		}
445 		break;
446 	case AMDGPU_IRQ_STATE_ENABLE:
447 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
448 			reg = ecc_umc_mcumc_ctrl_addrs[i];
449 			tmp = RREG32(reg);
450 			tmp |= bits;
451 			WREG32(reg, tmp);
452 		}
453 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
454 			reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
455 			tmp = RREG32(reg);
456 			tmp |= bits;
457 			WREG32(reg, tmp);
458 		}
459 		break;
460 	default:
461 		break;
462 	}
463 
464 	return 0;
465 }
466 
gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)467 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
468 					struct amdgpu_irq_src *src,
469 					unsigned type,
470 					enum amdgpu_interrupt_state state)
471 {
472 	struct amdgpu_vmhub *hub;
473 	u32 tmp, reg, bits, i, j;
474 
475 	bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
476 		VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
477 		VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
478 		VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
479 		VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
480 		VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
481 		VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
482 
483 	switch (state) {
484 	case AMDGPU_IRQ_STATE_DISABLE:
485 		for (j = 0; j < adev->num_vmhubs; j++) {
486 			hub = &adev->vmhub[j];
487 			for (i = 0; i < 16; i++) {
488 				reg = hub->vm_context0_cntl + i;
489 				tmp = RREG32(reg);
490 				tmp &= ~bits;
491 				WREG32(reg, tmp);
492 			}
493 		}
494 		break;
495 	case AMDGPU_IRQ_STATE_ENABLE:
496 		for (j = 0; j < adev->num_vmhubs; j++) {
497 			hub = &adev->vmhub[j];
498 			for (i = 0; i < 16; i++) {
499 				reg = hub->vm_context0_cntl + i;
500 				tmp = RREG32(reg);
501 				tmp |= bits;
502 				WREG32(reg, tmp);
503 			}
504 		}
505 	default:
506 		break;
507 	}
508 
509 	return 0;
510 }
511 
gmc_v9_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)512 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
513 				struct amdgpu_irq_src *source,
514 				struct amdgpu_iv_entry *entry)
515 {
516 	struct amdgpu_vmhub *hub;
517 	bool retry_fault = !!(entry->src_data[1] & 0x80);
518 	uint32_t status = 0, cid = 0, rw = 0;
519 	u64 addr;
520 	char hub_name[10];
521 	const char *mmhub_cid;
522 
523 	addr = (u64)entry->src_data[0] << 12;
524 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
525 
526 	if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
527 						    entry->timestamp))
528 		return 1; /* This also prevents sending it to KFD */
529 
530 	if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
531 		snprintf(hub_name, sizeof(hub_name), "mmhub0");
532 		hub = &adev->vmhub[AMDGPU_MMHUB_0];
533 	} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
534 		snprintf(hub_name, sizeof(hub_name), "mmhub1");
535 		hub = &adev->vmhub[AMDGPU_MMHUB_1];
536 	} else {
537 		snprintf(hub_name, sizeof(hub_name), "gfxhub0");
538 		hub = &adev->vmhub[AMDGPU_GFXHUB_0];
539 	}
540 
541 	/* If it's the first fault for this address, process it normally */
542 	if (retry_fault && !in_interrupt() &&
543 	    amdgpu_vm_handle_fault(adev, entry->pasid, addr))
544 		return 1; /* This also prevents sending it to KFD */
545 
546 	if (!amdgpu_sriov_vf(adev)) {
547 		/*
548 		 * Issue a dummy read to wait for the status register to
549 		 * be updated to avoid reading an incorrect value due to
550 		 * the new fast GRBM interface.
551 		 */
552 		if (entry->vmid_src == AMDGPU_GFXHUB_0)
553 			RREG32(hub->vm_l2_pro_fault_status);
554 
555 		status = RREG32(hub->vm_l2_pro_fault_status);
556 		cid = REG_GET_FIELD(status,
557 				    VM_L2_PROTECTION_FAULT_STATUS, CID);
558 		rw = REG_GET_FIELD(status,
559 				   VM_L2_PROTECTION_FAULT_STATUS, RW);
560 		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
561 	}
562 
563 	if (printk_ratelimit()) {
564 		struct amdgpu_task_info task_info;
565 
566 		memset(&task_info, 0, sizeof(struct amdgpu_task_info));
567 		amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
568 
569 		dev_err(adev->dev,
570 			"[%s] %s page fault (src_id:%u ring:%u vmid:%u "
571 			"pasid:%u, for process %s pid %d thread %s pid %d)\n",
572 			hub_name, retry_fault ? "retry" : "no-retry",
573 			entry->src_id, entry->ring_id, entry->vmid,
574 			entry->pasid, task_info.process_name, task_info.tgid,
575 			task_info.task_name, task_info.pid);
576 		dev_err(adev->dev, "  in page starting at address 0x%016llx from client %d\n",
577 			addr, entry->client_id);
578 		if (!amdgpu_sriov_vf(adev)) {
579 			dev_err(adev->dev,
580 				"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
581 				status);
582 			if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
583 				dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
584 					cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
585 					cid);
586 			} else {
587 				switch (adev->asic_type) {
588 				case CHIP_VEGA10:
589 					mmhub_cid = mmhub_client_ids_vega10[cid][rw];
590 					break;
591 				case CHIP_VEGA12:
592 					mmhub_cid = mmhub_client_ids_vega12[cid][rw];
593 					break;
594 				case CHIP_VEGA20:
595 					mmhub_cid = mmhub_client_ids_vega20[cid][rw];
596 					break;
597 				case CHIP_ARCTURUS:
598 					mmhub_cid = mmhub_client_ids_arcturus[cid][rw];
599 					break;
600 				case CHIP_RAVEN:
601 					mmhub_cid = mmhub_client_ids_raven[cid][rw];
602 					break;
603 				case CHIP_RENOIR:
604 					mmhub_cid = mmhub_client_ids_renoir[cid][rw];
605 					break;
606 				default:
607 					mmhub_cid = NULL;
608 					break;
609 				}
610 				dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
611 					mmhub_cid ? mmhub_cid : "unknown", cid);
612 			}
613 			dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
614 				REG_GET_FIELD(status,
615 				VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
616 			dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
617 				REG_GET_FIELD(status,
618 				VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
619 			dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
620 				REG_GET_FIELD(status,
621 				VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
622 			dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
623 				REG_GET_FIELD(status,
624 				VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
625 			dev_err(adev->dev, "\t RW: 0x%x\n", rw);
626 		}
627 	}
628 
629 	return 0;
630 }
631 
632 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
633 	.set = gmc_v9_0_vm_fault_interrupt_state,
634 	.process = gmc_v9_0_process_interrupt,
635 };
636 
637 
638 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
639 	.set = gmc_v9_0_ecc_interrupt_state,
640 	.process = amdgpu_umc_process_ecc_irq,
641 };
642 
gmc_v9_0_set_irq_funcs(struct amdgpu_device * adev)643 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
644 {
645 	adev->gmc.vm_fault.num_types = 1;
646 	adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
647 
648 	if (!amdgpu_sriov_vf(adev)) {
649 		adev->gmc.ecc_irq.num_types = 1;
650 		adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
651 	}
652 }
653 
gmc_v9_0_get_invalidate_req(unsigned int vmid,uint32_t flush_type)654 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
655 					uint32_t flush_type)
656 {
657 	u32 req = 0;
658 
659 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
660 			    PER_VMID_INVALIDATE_REQ, 1 << vmid);
661 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
662 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
663 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
664 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
665 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
666 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
667 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
668 			    CLEAR_PROTECTION_FAULT_STATUS_ADDR,	0);
669 
670 	return req;
671 }
672 
673 /**
674  * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
675  *
676  * @adev: amdgpu_device pointer
677  * @vmhub: vmhub type
678  *
679  */
gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device * adev,uint32_t vmhub)680 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
681 				       uint32_t vmhub)
682 {
683 	return ((vmhub == AMDGPU_MMHUB_0 ||
684 		 vmhub == AMDGPU_MMHUB_1) &&
685 		(!amdgpu_sriov_vf(adev)) &&
686 		(!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
687 		   (adev->apu_flags & AMD_APU_IS_PICASSO))));
688 }
689 
gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device * adev,uint8_t vmid,uint16_t * p_pasid)690 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
691 					uint8_t vmid, uint16_t *p_pasid)
692 {
693 	uint32_t value;
694 
695 	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
696 		     + vmid);
697 	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
698 
699 	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
700 }
701 
702 /*
703  * GART
704  * VMID 0 is the physical GPU addresses as used by the kernel.
705  * VMIDs 1-15 are used for userspace clients and are handled
706  * by the amdgpu vm/hsa code.
707  */
708 
709 /**
710  * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
711  *
712  * @adev: amdgpu_device pointer
713  * @vmid: vm instance to flush
714  * @flush_type: the flush type
715  *
716  * Flush the TLB for the requested page table using certain type.
717  */
gmc_v9_0_flush_gpu_tlb(struct amdgpu_device * adev,uint32_t vmid,uint32_t vmhub,uint32_t flush_type)718 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
719 					uint32_t vmhub, uint32_t flush_type)
720 {
721 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
722 	const unsigned eng = 17;
723 	u32 j, inv_req, inv_req2, tmp;
724 	struct amdgpu_vmhub *hub;
725 
726 	BUG_ON(vmhub >= adev->num_vmhubs);
727 
728 	hub = &adev->vmhub[vmhub];
729 	if (adev->gmc.xgmi.num_physical_nodes &&
730 	    adev->asic_type == CHIP_VEGA20) {
731 		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
732 		 * heavy-weight TLB flush (type 2), which flushes
733 		 * both. Due to a race condition with concurrent
734 		 * memory accesses using the same TLB cache line, we
735 		 * still need a second TLB flush after this.
736 		 */
737 		inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
738 		inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
739 	} else {
740 		inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
741 		inv_req2 = 0;
742 	}
743 
744 	/* This is necessary for a HW workaround under SRIOV as well
745 	 * as GFXOFF under bare metal
746 	 */
747 	if (adev->gfx.kiq.ring.sched.ready &&
748 	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
749 	    down_read_trylock(&adev->reset_sem)) {
750 		uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
751 		uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
752 
753 		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
754 						   1 << vmid);
755 		up_read(&adev->reset_sem);
756 		return;
757 	}
758 
759 	spin_lock(&adev->gmc.invalidate_lock);
760 
761 	/*
762 	 * It may lose gpuvm invalidate acknowldege state across power-gating
763 	 * off cycle, add semaphore acquire before invalidation and semaphore
764 	 * release after invalidation to avoid entering power gated state
765 	 * to WA the Issue
766 	 */
767 
768 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
769 	if (use_semaphore) {
770 		for (j = 0; j < adev->usec_timeout; j++) {
771 			/* a read return value of 1 means semaphore acuqire */
772 			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
773 					    hub->eng_distance * eng);
774 			if (tmp & 0x1)
775 				break;
776 			udelay(1);
777 		}
778 
779 		if (j >= adev->usec_timeout)
780 			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
781 	}
782 
783 	do {
784 		WREG32_NO_KIQ(hub->vm_inv_eng0_req +
785 			      hub->eng_distance * eng, inv_req);
786 
787 		/*
788 		 * Issue a dummy read to wait for the ACK register to
789 		 * be cleared to avoid a false ACK due to the new fast
790 		 * GRBM interface.
791 		 */
792 		if (vmhub == AMDGPU_GFXHUB_0)
793 			RREG32_NO_KIQ(hub->vm_inv_eng0_req +
794 				      hub->eng_distance * eng);
795 
796 		for (j = 0; j < adev->usec_timeout; j++) {
797 			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
798 					    hub->eng_distance * eng);
799 			if (tmp & (1 << vmid))
800 				break;
801 			udelay(1);
802 		}
803 
804 		inv_req = inv_req2;
805 		inv_req2 = 0;
806 	} while (inv_req);
807 
808 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
809 	if (use_semaphore)
810 		/*
811 		 * add semaphore release after invalidation,
812 		 * write with 0 means semaphore release
813 		 */
814 		WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
815 			      hub->eng_distance * eng, 0);
816 
817 	spin_unlock(&adev->gmc.invalidate_lock);
818 
819 	if (j < adev->usec_timeout)
820 		return;
821 
822 	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
823 }
824 
825 /**
826  * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
827  *
828  * @adev: amdgpu_device pointer
829  * @pasid: pasid to be flush
830  *
831  * Flush the TLB for the requested pasid.
832  */
gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device * adev,uint16_t pasid,uint32_t flush_type,bool all_hub)833 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
834 					uint16_t pasid, uint32_t flush_type,
835 					bool all_hub)
836 {
837 	int vmid, i;
838 	signed long r;
839 	uint32_t seq;
840 	uint16_t queried_pasid;
841 	bool ret;
842 	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
843 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
844 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
845 
846 	if (amdgpu_in_reset(adev))
847 		return -EIO;
848 
849 	if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) {
850 		/* Vega20+XGMI caches PTEs in TC and TLB. Add a
851 		 * heavy-weight TLB flush (type 2), which flushes
852 		 * both. Due to a race condition with concurrent
853 		 * memory accesses using the same TLB cache line, we
854 		 * still need a second TLB flush after this.
855 		 */
856 		bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
857 				       adev->asic_type == CHIP_VEGA20);
858 		/* 2 dwords flush + 8 dwords fence */
859 		unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
860 
861 		if (vega20_xgmi_wa)
862 			ndw += kiq->pmf->invalidate_tlbs_size;
863 
864 		spin_lock(&adev->gfx.kiq.ring_lock);
865 		/* 2 dwords flush + 8 dwords fence */
866 		amdgpu_ring_alloc(ring, ndw);
867 		if (vega20_xgmi_wa)
868 			kiq->pmf->kiq_invalidate_tlbs(ring,
869 						      pasid, 2, all_hub);
870 		kiq->pmf->kiq_invalidate_tlbs(ring,
871 					pasid, flush_type, all_hub);
872 		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
873 		if (r) {
874 			amdgpu_ring_undo(ring);
875 			spin_unlock(&adev->gfx.kiq.ring_lock);
876 			up_read(&adev->reset_sem);
877 			return -ETIME;
878 		}
879 
880 		amdgpu_ring_commit(ring);
881 		spin_unlock(&adev->gfx.kiq.ring_lock);
882 		r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
883 		if (r < 1) {
884 			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
885 			up_read(&adev->reset_sem);
886 			return -ETIME;
887 		}
888 		up_read(&adev->reset_sem);
889 		return 0;
890 	}
891 
892 	for (vmid = 1; vmid < 16; vmid++) {
893 
894 		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
895 				&queried_pasid);
896 		if (ret && queried_pasid == pasid) {
897 			if (all_hub) {
898 				for (i = 0; i < adev->num_vmhubs; i++)
899 					gmc_v9_0_flush_gpu_tlb(adev, vmid,
900 							i, flush_type);
901 			} else {
902 				gmc_v9_0_flush_gpu_tlb(adev, vmid,
903 						AMDGPU_GFXHUB_0, flush_type);
904 			}
905 			break;
906 		}
907 	}
908 
909 	return 0;
910 
911 }
912 
gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)913 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
914 					    unsigned vmid, uint64_t pd_addr)
915 {
916 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
917 	struct amdgpu_device *adev = ring->adev;
918 	struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
919 	uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
920 	unsigned eng = ring->vm_inv_eng;
921 
922 	/*
923 	 * It may lose gpuvm invalidate acknowldege state across power-gating
924 	 * off cycle, add semaphore acquire before invalidation and semaphore
925 	 * release after invalidation to avoid entering power gated state
926 	 * to WA the Issue
927 	 */
928 
929 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
930 	if (use_semaphore)
931 		/* a read return value of 1 means semaphore acuqire */
932 		amdgpu_ring_emit_reg_wait(ring,
933 					  hub->vm_inv_eng0_sem +
934 					  hub->eng_distance * eng, 0x1, 0x1);
935 
936 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
937 			      (hub->ctx_addr_distance * vmid),
938 			      lower_32_bits(pd_addr));
939 
940 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
941 			      (hub->ctx_addr_distance * vmid),
942 			      upper_32_bits(pd_addr));
943 
944 	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
945 					    hub->eng_distance * eng,
946 					    hub->vm_inv_eng0_ack +
947 					    hub->eng_distance * eng,
948 					    req, 1 << vmid);
949 
950 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
951 	if (use_semaphore)
952 		/*
953 		 * add semaphore release after invalidation,
954 		 * write with 0 means semaphore release
955 		 */
956 		amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
957 				      hub->eng_distance * eng, 0);
958 
959 	return pd_addr;
960 }
961 
gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring * ring,unsigned vmid,unsigned pasid)962 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
963 					unsigned pasid)
964 {
965 	struct amdgpu_device *adev = ring->adev;
966 	uint32_t reg;
967 
968 	/* Do nothing because there's no lut register for mmhub1. */
969 	if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
970 		return;
971 
972 	if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
973 		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
974 	else
975 		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
976 
977 	amdgpu_ring_emit_wreg(ring, reg, pasid);
978 }
979 
980 /*
981  * PTE format on VEGA 10:
982  * 63:59 reserved
983  * 58:57 mtype
984  * 56 F
985  * 55 L
986  * 54 P
987  * 53 SW
988  * 52 T
989  * 50:48 reserved
990  * 47:12 4k physical page base address
991  * 11:7 fragment
992  * 6 write
993  * 5 read
994  * 4 exe
995  * 3 Z
996  * 2 snooped
997  * 1 system
998  * 0 valid
999  *
1000  * PDE format on VEGA 10:
1001  * 63:59 block fragment size
1002  * 58:55 reserved
1003  * 54 P
1004  * 53:48 reserved
1005  * 47:6 physical base address of PD or PTE
1006  * 5:3 reserved
1007  * 2 C
1008  * 1 system
1009  * 0 valid
1010  */
1011 
gmc_v9_0_map_mtype(struct amdgpu_device * adev,uint32_t flags)1012 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
1013 
1014 {
1015 	switch (flags) {
1016 	case AMDGPU_VM_MTYPE_DEFAULT:
1017 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1018 	case AMDGPU_VM_MTYPE_NC:
1019 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1020 	case AMDGPU_VM_MTYPE_WC:
1021 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
1022 	case AMDGPU_VM_MTYPE_RW:
1023 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
1024 	case AMDGPU_VM_MTYPE_CC:
1025 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
1026 	case AMDGPU_VM_MTYPE_UC:
1027 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
1028 	default:
1029 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1030 	}
1031 }
1032 
gmc_v9_0_get_vm_pde(struct amdgpu_device * adev,int level,uint64_t * addr,uint64_t * flags)1033 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
1034 				uint64_t *addr, uint64_t *flags)
1035 {
1036 	if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
1037 		*addr = adev->vm_manager.vram_base_offset + *addr -
1038 			adev->gmc.vram_start;
1039 	BUG_ON(*addr & 0xFFFF00000000003FULL);
1040 
1041 	if (!adev->gmc.translate_further)
1042 		return;
1043 
1044 	if (level == AMDGPU_VM_PDB1) {
1045 		/* Set the block fragment size */
1046 		if (!(*flags & AMDGPU_PDE_PTE))
1047 			*flags |= AMDGPU_PDE_BFS(0x9);
1048 
1049 	} else if (level == AMDGPU_VM_PDB0) {
1050 		if (*flags & AMDGPU_PDE_PTE)
1051 			*flags &= ~AMDGPU_PDE_PTE;
1052 		else
1053 			*flags |= AMDGPU_PTE_TF;
1054 	}
1055 }
1056 
gmc_v9_0_get_vm_pte(struct amdgpu_device * adev,struct amdgpu_bo_va_mapping * mapping,uint64_t * flags)1057 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
1058 				struct amdgpu_bo_va_mapping *mapping,
1059 				uint64_t *flags)
1060 {
1061 	*flags &= ~AMDGPU_PTE_EXECUTABLE;
1062 	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1063 
1064 	*flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
1065 	*flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
1066 
1067 	if (mapping->flags & AMDGPU_PTE_PRT) {
1068 		*flags |= AMDGPU_PTE_PRT;
1069 		*flags &= ~AMDGPU_PTE_VALID;
1070 	}
1071 
1072 	if (adev->asic_type == CHIP_ARCTURUS &&
1073 	    !(*flags & AMDGPU_PTE_SYSTEM) &&
1074 	    mapping->bo_va->is_xgmi)
1075 		*flags |= AMDGPU_PTE_SNOOPED;
1076 }
1077 
gmc_v9_0_get_vbios_fb_size(struct amdgpu_device * adev)1078 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
1079 {
1080 	u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
1081 	unsigned size;
1082 
1083 	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
1084 		size = AMDGPU_VBIOS_VGA_ALLOCATION;
1085 	} else {
1086 		u32 viewport;
1087 
1088 		switch (adev->asic_type) {
1089 		case CHIP_RAVEN:
1090 		case CHIP_RENOIR:
1091 			viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
1092 			size = (REG_GET_FIELD(viewport,
1093 					      HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1094 				REG_GET_FIELD(viewport,
1095 					      HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1096 				4);
1097 			break;
1098 		case CHIP_VEGA10:
1099 		case CHIP_VEGA12:
1100 		case CHIP_VEGA20:
1101 		default:
1102 			viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
1103 			size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
1104 				REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
1105 				4);
1106 			break;
1107 		}
1108 	}
1109 
1110 	return size;
1111 }
1112 
1113 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
1114 	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
1115 	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
1116 	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
1117 	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
1118 	.map_mtype = gmc_v9_0_map_mtype,
1119 	.get_vm_pde = gmc_v9_0_get_vm_pde,
1120 	.get_vm_pte = gmc_v9_0_get_vm_pte,
1121 	.get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
1122 };
1123 
gmc_v9_0_set_gmc_funcs(struct amdgpu_device * adev)1124 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
1125 {
1126 	adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
1127 }
1128 
gmc_v9_0_set_umc_funcs(struct amdgpu_device * adev)1129 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
1130 {
1131 	switch (adev->asic_type) {
1132 	case CHIP_VEGA10:
1133 		adev->umc.funcs = &umc_v6_0_funcs;
1134 		break;
1135 	case CHIP_VEGA20:
1136 		adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1137 		adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1138 		adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1139 		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
1140 		adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1141 		adev->umc.funcs = &umc_v6_1_funcs;
1142 		break;
1143 	case CHIP_ARCTURUS:
1144 		adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1145 		adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1146 		adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1147 		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
1148 		adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1149 		adev->umc.funcs = &umc_v6_1_funcs;
1150 		break;
1151 	default:
1152 		break;
1153 	}
1154 }
1155 
gmc_v9_0_set_mmhub_funcs(struct amdgpu_device * adev)1156 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
1157 {
1158 	switch (adev->asic_type) {
1159 	case CHIP_ARCTURUS:
1160 		adev->mmhub.funcs = &mmhub_v9_4_funcs;
1161 		break;
1162 	default:
1163 		adev->mmhub.funcs = &mmhub_v1_0_funcs;
1164 		break;
1165 	}
1166 }
1167 
gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device * adev)1168 static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
1169 {
1170 	switch (adev->asic_type) {
1171 	case CHIP_ARCTURUS:
1172 	case CHIP_VEGA20:
1173 		adev->gfxhub.funcs = &gfxhub_v1_1_funcs;
1174 		break;
1175 	default:
1176 		adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
1177 		break;
1178 	}
1179 }
1180 
gmc_v9_0_early_init(void * handle)1181 static int gmc_v9_0_early_init(void *handle)
1182 {
1183 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1184 
1185 	gmc_v9_0_set_gmc_funcs(adev);
1186 	gmc_v9_0_set_irq_funcs(adev);
1187 	gmc_v9_0_set_umc_funcs(adev);
1188 	gmc_v9_0_set_mmhub_funcs(adev);
1189 	gmc_v9_0_set_gfxhub_funcs(adev);
1190 
1191 	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
1192 	adev->gmc.shared_aperture_end =
1193 		adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
1194 	adev->gmc.private_aperture_start = 0x1000000000000000ULL;
1195 	adev->gmc.private_aperture_end =
1196 		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
1197 
1198 	return 0;
1199 }
1200 
gmc_v9_0_late_init(void * handle)1201 static int gmc_v9_0_late_init(void *handle)
1202 {
1203 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1204 	int r;
1205 
1206 	amdgpu_bo_late_init(adev);
1207 
1208 	r = amdgpu_gmc_allocate_vm_inv_eng(adev);
1209 	if (r)
1210 		return r;
1211 
1212 	/*
1213 	 * Workaround performance drop issue with VBIOS enables partial
1214 	 * writes, while disables HBM ECC for vega10.
1215 	 */
1216 	if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
1217 		if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) {
1218 			if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
1219 				adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
1220 		}
1221 	}
1222 
1223 	if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
1224 		adev->mmhub.funcs->reset_ras_error_count(adev);
1225 
1226 	r = amdgpu_gmc_ras_late_init(adev);
1227 	if (r)
1228 		return r;
1229 
1230 	return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
1231 }
1232 
gmc_v9_0_vram_gtt_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)1233 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
1234 					struct amdgpu_gmc *mc)
1235 {
1236 	u64 base = 0;
1237 
1238 	if (!amdgpu_sriov_vf(adev))
1239 		base = adev->mmhub.funcs->get_fb_location(adev);
1240 
1241 	/* add the xgmi offset of the physical node */
1242 	base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1243 	amdgpu_gmc_vram_location(adev, mc, base);
1244 	amdgpu_gmc_gart_location(adev, mc);
1245 	amdgpu_gmc_agp_location(adev, mc);
1246 	/* base offset of vram pages */
1247 	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1248 
1249 	/* XXX: add the xgmi offset of the physical node? */
1250 	adev->vm_manager.vram_base_offset +=
1251 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1252 }
1253 
1254 /**
1255  * gmc_v9_0_mc_init - initialize the memory controller driver params
1256  *
1257  * @adev: amdgpu_device pointer
1258  *
1259  * Look up the amount of vram, vram width, and decide how to place
1260  * vram and gart within the GPU's physical address space.
1261  * Returns 0 for success.
1262  */
gmc_v9_0_mc_init(struct amdgpu_device * adev)1263 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
1264 {
1265 	int r;
1266 
1267 	/* size in MB on si */
1268 	adev->gmc.mc_vram_size =
1269 		adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
1270 	adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
1271 
1272 	if (!(adev->flags & AMD_IS_APU)) {
1273 		r = amdgpu_device_resize_fb_bar(adev);
1274 		if (r)
1275 			return r;
1276 	}
1277 	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
1278 	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
1279 
1280 #ifdef CONFIG_X86_64
1281 	if (adev->flags & AMD_IS_APU) {
1282 		adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1283 		adev->gmc.aper_size = adev->gmc.real_vram_size;
1284 	}
1285 #endif
1286 	/* In case the PCI BAR is larger than the actual amount of vram */
1287 	adev->gmc.visible_vram_size = adev->gmc.aper_size;
1288 	if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
1289 		adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
1290 
1291 	/* set the gart size */
1292 	if (amdgpu_gart_size == -1) {
1293 		switch (adev->asic_type) {
1294 		case CHIP_VEGA10:  /* all engines support GPUVM */
1295 		case CHIP_VEGA12:  /* all engines support GPUVM */
1296 		case CHIP_VEGA20:
1297 		case CHIP_ARCTURUS:
1298 		default:
1299 			adev->gmc.gart_size = 512ULL << 20;
1300 			break;
1301 		case CHIP_RAVEN:   /* DCE SG support */
1302 		case CHIP_RENOIR:
1303 			adev->gmc.gart_size = 1024ULL << 20;
1304 			break;
1305 		}
1306 	} else {
1307 		adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
1308 	}
1309 
1310 	gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
1311 
1312 	return 0;
1313 }
1314 
gmc_v9_0_gart_init(struct amdgpu_device * adev)1315 static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
1316 {
1317 	int r;
1318 
1319 	if (adev->gart.bo) {
1320 		WARN(1, "VEGA10 PCIE GART already initialized\n");
1321 		return 0;
1322 	}
1323 	/* Initialize common gart structure */
1324 	r = amdgpu_gart_init(adev);
1325 	if (r)
1326 		return r;
1327 	adev->gart.table_size = adev->gart.num_gpu_pages * 8;
1328 	adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
1329 				 AMDGPU_PTE_EXECUTABLE;
1330 	return amdgpu_gart_table_vram_alloc(adev);
1331 }
1332 
1333 /**
1334  * gmc_v9_0_save_registers - saves regs
1335  *
1336  * @adev: amdgpu_device pointer
1337  *
1338  * This saves potential register values that should be
1339  * restored upon resume
1340  */
gmc_v9_0_save_registers(struct amdgpu_device * adev)1341 static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
1342 {
1343 	if (adev->asic_type == CHIP_RAVEN)
1344 		adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
1345 }
1346 
gmc_v9_0_sw_init(void * handle)1347 static int gmc_v9_0_sw_init(void *handle)
1348 {
1349 	int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
1350 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1351 
1352 	adev->gfxhub.funcs->init(adev);
1353 
1354 	adev->mmhub.funcs->init(adev);
1355 
1356 	spin_lock_init(&adev->gmc.invalidate_lock);
1357 
1358 	r = amdgpu_atomfirmware_get_vram_info(adev,
1359 		&vram_width, &vram_type, &vram_vendor);
1360 	if (amdgpu_sriov_vf(adev))
1361 		/* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
1362 		 * and DF related registers is not readable, seems hardcord is the
1363 		 * only way to set the correct vram_width
1364 		 */
1365 		adev->gmc.vram_width = 2048;
1366 	else if (amdgpu_emu_mode != 1)
1367 		adev->gmc.vram_width = vram_width;
1368 
1369 	if (!adev->gmc.vram_width) {
1370 		int chansize, numchan;
1371 
1372 		/* hbm memory channel size */
1373 		if (adev->flags & AMD_IS_APU)
1374 			chansize = 64;
1375 		else
1376 			chansize = 128;
1377 
1378 		numchan = adev->df.funcs->get_hbm_channel_number(adev);
1379 		adev->gmc.vram_width = numchan * chansize;
1380 	}
1381 
1382 	adev->gmc.vram_type = vram_type;
1383 	adev->gmc.vram_vendor = vram_vendor;
1384 	switch (adev->asic_type) {
1385 	case CHIP_RAVEN:
1386 		adev->num_vmhubs = 2;
1387 
1388 		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
1389 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1390 		} else {
1391 			/* vm_size is 128TB + 512GB for legacy 3-level page support */
1392 			amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
1393 			adev->gmc.translate_further =
1394 				adev->vm_manager.num_level > 1;
1395 		}
1396 		break;
1397 	case CHIP_VEGA10:
1398 	case CHIP_VEGA12:
1399 	case CHIP_VEGA20:
1400 	case CHIP_RENOIR:
1401 		adev->num_vmhubs = 2;
1402 
1403 
1404 		/*
1405 		 * To fulfill 4-level page support,
1406 		 * vm size is 256TB (48bit), maximum size of Vega10,
1407 		 * block size 512 (9bit)
1408 		 */
1409 		/* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
1410 		if (amdgpu_sriov_vf(adev))
1411 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
1412 		else
1413 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1414 		break;
1415 	case CHIP_ARCTURUS:
1416 		adev->num_vmhubs = 3;
1417 
1418 		/* Keep the vm size same with Vega20 */
1419 		amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1420 		break;
1421 	default:
1422 		break;
1423 	}
1424 
1425 	/* This interrupt is VMC page fault.*/
1426 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
1427 				&adev->gmc.vm_fault);
1428 	if (r)
1429 		return r;
1430 
1431 	if (adev->asic_type == CHIP_ARCTURUS) {
1432 		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
1433 					&adev->gmc.vm_fault);
1434 		if (r)
1435 			return r;
1436 	}
1437 
1438 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
1439 				&adev->gmc.vm_fault);
1440 
1441 	if (r)
1442 		return r;
1443 
1444 	if (!amdgpu_sriov_vf(adev)) {
1445 		/* interrupt sent to DF. */
1446 		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
1447 				      &adev->gmc.ecc_irq);
1448 		if (r)
1449 			return r;
1450 	}
1451 
1452 	/* Set the internal MC address mask
1453 	 * This is the max address of the GPU's
1454 	 * internal address space.
1455 	 */
1456 	adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
1457 
1458 	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
1459 	if (r) {
1460 		printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
1461 		return r;
1462 	}
1463 	adev->need_swiotlb = drm_need_swiotlb(44);
1464 
1465 	if (adev->gmc.xgmi.supported) {
1466 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
1467 		if (r)
1468 			return r;
1469 	}
1470 
1471 	r = gmc_v9_0_mc_init(adev);
1472 	if (r)
1473 		return r;
1474 
1475 	amdgpu_gmc_get_vbios_allocations(adev);
1476 
1477 	/* Memory manager */
1478 	r = amdgpu_bo_init(adev);
1479 	if (r)
1480 		return r;
1481 
1482 	r = gmc_v9_0_gart_init(adev);
1483 	if (r)
1484 		return r;
1485 
1486 	/*
1487 	 * number of VMs
1488 	 * VMID 0 is reserved for System
1489 	 * amdgpu graphics/compute will use VMIDs 1..n-1
1490 	 * amdkfd will use VMIDs n..15
1491 	 *
1492 	 * The first KFD VMID is 8 for GPUs with graphics, 3 for
1493 	 * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs
1494 	 * for video processing.
1495 	 */
1496 	adev->vm_manager.first_kfd_vmid =
1497 		adev->asic_type == CHIP_ARCTURUS ? 3 : 8;
1498 
1499 	amdgpu_vm_manager_init(adev);
1500 
1501 	gmc_v9_0_save_registers(adev);
1502 
1503 	return 0;
1504 }
1505 
gmc_v9_0_sw_fini(void * handle)1506 static int gmc_v9_0_sw_fini(void *handle)
1507 {
1508 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1509 
1510 	amdgpu_gmc_ras_fini(adev);
1511 	amdgpu_gem_force_release(adev);
1512 	amdgpu_vm_manager_fini(adev);
1513 	amdgpu_gart_table_vram_free(adev);
1514 	amdgpu_bo_fini(adev);
1515 	amdgpu_gart_fini(adev);
1516 
1517 	return 0;
1518 }
1519 
gmc_v9_0_init_golden_registers(struct amdgpu_device * adev)1520 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
1521 {
1522 
1523 	switch (adev->asic_type) {
1524 	case CHIP_VEGA10:
1525 		if (amdgpu_sriov_vf(adev))
1526 			break;
1527 		fallthrough;
1528 	case CHIP_VEGA20:
1529 		soc15_program_register_sequence(adev,
1530 						golden_settings_mmhub_1_0_0,
1531 						ARRAY_SIZE(golden_settings_mmhub_1_0_0));
1532 		soc15_program_register_sequence(adev,
1533 						golden_settings_athub_1_0_0,
1534 						ARRAY_SIZE(golden_settings_athub_1_0_0));
1535 		break;
1536 	case CHIP_VEGA12:
1537 		break;
1538 	case CHIP_RAVEN:
1539 		/* TODO for renoir */
1540 		soc15_program_register_sequence(adev,
1541 						golden_settings_athub_1_0_0,
1542 						ARRAY_SIZE(golden_settings_athub_1_0_0));
1543 		break;
1544 	default:
1545 		break;
1546 	}
1547 }
1548 
1549 /**
1550  * gmc_v9_0_restore_registers - restores regs
1551  *
1552  * @adev: amdgpu_device pointer
1553  *
1554  * This restores register values, saved at suspend.
1555  */
gmc_v9_0_restore_registers(struct amdgpu_device * adev)1556 void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
1557 {
1558 	if (adev->asic_type == CHIP_RAVEN) {
1559 		WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
1560 		WARN_ON(adev->gmc.sdpif_register !=
1561 			RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
1562 	}
1563 }
1564 
1565 /**
1566  * gmc_v9_0_gart_enable - gart enable
1567  *
1568  * @adev: amdgpu_device pointer
1569  */
gmc_v9_0_gart_enable(struct amdgpu_device * adev)1570 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1571 {
1572 	int r;
1573 
1574 	if (adev->gart.bo == NULL) {
1575 		dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
1576 		return -EINVAL;
1577 	}
1578 	r = amdgpu_gart_table_vram_pin(adev);
1579 	if (r)
1580 		return r;
1581 
1582 	r = adev->gfxhub.funcs->gart_enable(adev);
1583 	if (r)
1584 		return r;
1585 
1586 	r = adev->mmhub.funcs->gart_enable(adev);
1587 	if (r)
1588 		return r;
1589 
1590 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1591 		 (unsigned)(adev->gmc.gart_size >> 20),
1592 		 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
1593 	adev->gart.ready = true;
1594 	return 0;
1595 }
1596 
gmc_v9_0_hw_init(void * handle)1597 static int gmc_v9_0_hw_init(void *handle)
1598 {
1599 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1600 	bool value;
1601 	int r, i;
1602 	u32 tmp;
1603 
1604 	/* The sequence of these two function calls matters.*/
1605 	gmc_v9_0_init_golden_registers(adev);
1606 
1607 	if (adev->mode_info.num_crtc) {
1608 		if (adev->asic_type != CHIP_ARCTURUS) {
1609 			/* Lockout access through VGA aperture*/
1610 			WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
1611 
1612 			/* disable VGA render */
1613 			WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
1614 		}
1615 	}
1616 
1617 	amdgpu_device_program_register_sequence(adev,
1618 						golden_settings_vega10_hdp,
1619 						ARRAY_SIZE(golden_settings_vega10_hdp));
1620 
1621 	if (adev->mmhub.funcs->update_power_gating)
1622 		adev->mmhub.funcs->update_power_gating(adev, true);
1623 
1624 	switch (adev->asic_type) {
1625 	case CHIP_ARCTURUS:
1626 		WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
1627 		break;
1628 	default:
1629 		break;
1630 	}
1631 
1632 	WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
1633 
1634 	tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
1635 	WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
1636 
1637 	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
1638 	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
1639 
1640 	/* After HDP is initialized, flush HDP.*/
1641 	adev->nbio.funcs->hdp_flush(adev, NULL);
1642 
1643 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
1644 		value = false;
1645 	else
1646 		value = true;
1647 
1648 	if (!amdgpu_sriov_vf(adev)) {
1649 		adev->gfxhub.funcs->set_fault_enable_default(adev, value);
1650 		adev->mmhub.funcs->set_fault_enable_default(adev, value);
1651 	}
1652 	for (i = 0; i < adev->num_vmhubs; ++i)
1653 		gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
1654 
1655 	if (adev->umc.funcs && adev->umc.funcs->init_registers)
1656 		adev->umc.funcs->init_registers(adev);
1657 
1658 	r = gmc_v9_0_gart_enable(adev);
1659 
1660 	return r;
1661 }
1662 
1663 /**
1664  * gmc_v9_0_gart_disable - gart disable
1665  *
1666  * @adev: amdgpu_device pointer
1667  *
1668  * This disables all VM page table.
1669  */
gmc_v9_0_gart_disable(struct amdgpu_device * adev)1670 static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
1671 {
1672 	adev->gfxhub.funcs->gart_disable(adev);
1673 	adev->mmhub.funcs->gart_disable(adev);
1674 	amdgpu_gart_table_vram_unpin(adev);
1675 }
1676 
gmc_v9_0_hw_fini(void * handle)1677 static int gmc_v9_0_hw_fini(void *handle)
1678 {
1679 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1680 
1681 	gmc_v9_0_gart_disable(adev);
1682 
1683 	if (amdgpu_sriov_vf(adev)) {
1684 		/* full access mode, so don't touch any GMC register */
1685 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1686 		return 0;
1687 	}
1688 
1689 	amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
1690 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1691 
1692 	return 0;
1693 }
1694 
gmc_v9_0_suspend(void * handle)1695 static int gmc_v9_0_suspend(void *handle)
1696 {
1697 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1698 
1699 	return gmc_v9_0_hw_fini(adev);
1700 }
1701 
gmc_v9_0_resume(void * handle)1702 static int gmc_v9_0_resume(void *handle)
1703 {
1704 	int r;
1705 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1706 
1707 	r = gmc_v9_0_hw_init(adev);
1708 	if (r)
1709 		return r;
1710 
1711 	amdgpu_vmid_reset_all(adev);
1712 
1713 	return 0;
1714 }
1715 
gmc_v9_0_is_idle(void * handle)1716 static bool gmc_v9_0_is_idle(void *handle)
1717 {
1718 	/* MC is always ready in GMC v9.*/
1719 	return true;
1720 }
1721 
gmc_v9_0_wait_for_idle(void * handle)1722 static int gmc_v9_0_wait_for_idle(void *handle)
1723 {
1724 	/* There is no need to wait for MC idle in GMC v9.*/
1725 	return 0;
1726 }
1727 
gmc_v9_0_soft_reset(void * handle)1728 static int gmc_v9_0_soft_reset(void *handle)
1729 {
1730 	/* XXX for emulation.*/
1731 	return 0;
1732 }
1733 
gmc_v9_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)1734 static int gmc_v9_0_set_clockgating_state(void *handle,
1735 					enum amd_clockgating_state state)
1736 {
1737 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1738 
1739 	adev->mmhub.funcs->set_clockgating(adev, state);
1740 
1741 	athub_v1_0_set_clockgating(adev, state);
1742 
1743 	return 0;
1744 }
1745 
gmc_v9_0_get_clockgating_state(void * handle,u32 * flags)1746 static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
1747 {
1748 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1749 
1750 	adev->mmhub.funcs->get_clockgating(adev, flags);
1751 
1752 	athub_v1_0_get_clockgating(adev, flags);
1753 }
1754 
gmc_v9_0_set_powergating_state(void * handle,enum amd_powergating_state state)1755 static int gmc_v9_0_set_powergating_state(void *handle,
1756 					enum amd_powergating_state state)
1757 {
1758 	return 0;
1759 }
1760 
1761 const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
1762 	.name = "gmc_v9_0",
1763 	.early_init = gmc_v9_0_early_init,
1764 	.late_init = gmc_v9_0_late_init,
1765 	.sw_init = gmc_v9_0_sw_init,
1766 	.sw_fini = gmc_v9_0_sw_fini,
1767 	.hw_init = gmc_v9_0_hw_init,
1768 	.hw_fini = gmc_v9_0_hw_fini,
1769 	.suspend = gmc_v9_0_suspend,
1770 	.resume = gmc_v9_0_resume,
1771 	.is_idle = gmc_v9_0_is_idle,
1772 	.wait_for_idle = gmc_v9_0_wait_for_idle,
1773 	.soft_reset = gmc_v9_0_soft_reset,
1774 	.set_clockgating_state = gmc_v9_0_set_clockgating_state,
1775 	.set_powergating_state = gmc_v9_0_set_powergating_state,
1776 	.get_clockgating_state = gmc_v9_0_get_clockgating_state,
1777 };
1778 
1779 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
1780 {
1781 	.type = AMD_IP_BLOCK_TYPE_GMC,
1782 	.major = 9,
1783 	.minor = 0,
1784 	.rev = 0,
1785 	.funcs = &gmc_v9_0_ip_funcs,
1786 };
1787