1 /*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27 #include <linux/io-64-nonatomic-lo-hi.h>
28
29 #include "amdgpu.h"
30 #include "amdgpu_gmc.h"
31 #include "amdgpu_ras.h"
32 #include "amdgpu_xgmi.h"
33
34 /**
35 * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
36 *
37 * @bo: the BO to get the PDE for
38 * @level: the level in the PD hirarchy
39 * @addr: resulting addr
40 * @flags: resulting flags
41 *
42 * Get the address and flags to be used for a PDE (Page Directory Entry).
43 */
amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo * bo,int level,uint64_t * addr,uint64_t * flags)44 void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
45 uint64_t *addr, uint64_t *flags)
46 {
47 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
48 struct ttm_dma_tt *ttm;
49
50 switch (bo->tbo.mem.mem_type) {
51 case TTM_PL_TT:
52 ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
53 *addr = ttm->dma_address[0];
54 break;
55 case TTM_PL_VRAM:
56 *addr = amdgpu_bo_gpu_offset(bo);
57 break;
58 default:
59 *addr = 0;
60 break;
61 }
62 *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem);
63 amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
64 }
65
66 /**
67 * amdgpu_gmc_pd_addr - return the address of the root directory
68 *
69 */
amdgpu_gmc_pd_addr(struct amdgpu_bo * bo)70 uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
71 {
72 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
73 uint64_t pd_addr;
74
75 /* TODO: move that into ASIC specific code */
76 if (adev->asic_type >= CHIP_VEGA10) {
77 uint64_t flags = AMDGPU_PTE_VALID;
78
79 amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
80 pd_addr |= flags;
81 } else {
82 pd_addr = amdgpu_bo_gpu_offset(bo);
83 }
84 return pd_addr;
85 }
86
87 /**
88 * amdgpu_gmc_set_pte_pde - update the page tables using CPU
89 *
90 * @adev: amdgpu_device pointer
91 * @cpu_pt_addr: cpu address of the page table
92 * @gpu_page_idx: entry in the page table to update
93 * @addr: dst addr to write into pte/pde
94 * @flags: access flags
95 *
96 * Update the page tables using CPU.
97 */
amdgpu_gmc_set_pte_pde(struct amdgpu_device * adev,void * cpu_pt_addr,uint32_t gpu_page_idx,uint64_t addr,uint64_t flags)98 int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
99 uint32_t gpu_page_idx, uint64_t addr,
100 uint64_t flags)
101 {
102 void __iomem *ptr = (void *)cpu_pt_addr;
103 uint64_t value;
104
105 /*
106 * The following is for PTE only. GART does not have PDEs.
107 */
108 value = addr & 0x0000FFFFFFFFF000ULL;
109 value |= flags;
110 writeq(value, ptr + (gpu_page_idx * 8));
111 return 0;
112 }
113
114 /**
115 * amdgpu_gmc_agp_addr - return the address in the AGP address space
116 *
117 * @tbo: TTM BO which needs the address, must be in GTT domain
118 *
119 * Tries to figure out how to access the BO through the AGP aperture. Returns
120 * AMDGPU_BO_INVALID_OFFSET if that is not possible.
121 */
amdgpu_gmc_agp_addr(struct ttm_buffer_object * bo)122 uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
123 {
124 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
125 struct ttm_dma_tt *ttm;
126
127 if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
128 return AMDGPU_BO_INVALID_OFFSET;
129
130 ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
131 if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
132 return AMDGPU_BO_INVALID_OFFSET;
133
134 return adev->gmc.agp_start + ttm->dma_address[0];
135 }
136
137 /**
138 * amdgpu_gmc_vram_location - try to find VRAM location
139 *
140 * @adev: amdgpu device structure holding all necessary information
141 * @mc: memory controller structure holding memory information
142 * @base: base address at which to put VRAM
143 *
144 * Function will try to place VRAM at base address provided
145 * as parameter.
146 */
amdgpu_gmc_vram_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc,u64 base)147 void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
148 u64 base)
149 {
150 uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
151
152 mc->vram_start = base;
153 mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
154 if (limit && limit < mc->real_vram_size)
155 mc->real_vram_size = limit;
156
157 if (mc->xgmi.num_physical_nodes == 0) {
158 mc->fb_start = mc->vram_start;
159 mc->fb_end = mc->vram_end;
160 }
161 dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
162 mc->mc_vram_size >> 20, mc->vram_start,
163 mc->vram_end, mc->real_vram_size >> 20);
164 }
165
166 /**
167 * amdgpu_gmc_gart_location - try to find GART location
168 *
169 * @adev: amdgpu device structure holding all necessary information
170 * @mc: memory controller structure holding memory information
171 *
172 * Function will place try to place GART before or after VRAM.
173 *
174 * If GART size is bigger than space left then we ajust GART size.
175 * Thus function will never fails.
176 */
amdgpu_gmc_gart_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)177 void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
178 {
179 const uint64_t four_gb = 0x100000000ULL;
180 u64 size_af, size_bf;
181 /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
182 u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
183
184 mc->gart_size += adev->pm.smu_prv_buffer_size;
185
186 /* VCE doesn't like it when BOs cross a 4GB segment, so align
187 * the GART base on a 4GB boundary as well.
188 */
189 size_bf = mc->fb_start;
190 size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
191
192 if (mc->gart_size > max(size_bf, size_af)) {
193 dev_warn(adev->dev, "limiting GART\n");
194 mc->gart_size = max(size_bf, size_af);
195 }
196
197 if ((size_bf >= mc->gart_size && size_bf < size_af) ||
198 (size_af < mc->gart_size))
199 mc->gart_start = 0;
200 else
201 mc->gart_start = max_mc_address - mc->gart_size + 1;
202
203 mc->gart_start &= ~(four_gb - 1);
204 mc->gart_end = mc->gart_start + mc->gart_size - 1;
205 dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
206 mc->gart_size >> 20, mc->gart_start, mc->gart_end);
207 }
208
209 /**
210 * amdgpu_gmc_agp_location - try to find AGP location
211 * @adev: amdgpu device structure holding all necessary information
212 * @mc: memory controller structure holding memory information
213 *
214 * Function will place try to find a place for the AGP BAR in the MC address
215 * space.
216 *
217 * AGP BAR will be assigned the largest available hole in the address space.
218 * Should be called after VRAM and GART locations are setup.
219 */
amdgpu_gmc_agp_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)220 void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
221 {
222 const uint64_t sixteen_gb = 1ULL << 34;
223 const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
224 u64 size_af, size_bf;
225
226 if (amdgpu_sriov_vf(adev)) {
227 mc->agp_start = 0xffffffffffff;
228 mc->agp_end = 0x0;
229 mc->agp_size = 0;
230
231 return;
232 }
233
234 if (mc->fb_start > mc->gart_start) {
235 size_bf = (mc->fb_start & sixteen_gb_mask) -
236 ALIGN(mc->gart_end + 1, sixteen_gb);
237 size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
238 } else {
239 size_bf = mc->fb_start & sixteen_gb_mask;
240 size_af = (mc->gart_start & sixteen_gb_mask) -
241 ALIGN(mc->fb_end + 1, sixteen_gb);
242 }
243
244 if (size_bf > size_af) {
245 mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
246 mc->agp_size = size_bf;
247 } else {
248 mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
249 mc->agp_size = size_af;
250 }
251
252 mc->agp_end = mc->agp_start + mc->agp_size - 1;
253 dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
254 mc->agp_size >> 20, mc->agp_start, mc->agp_end);
255 }
256
257 /**
258 * amdgpu_gmc_filter_faults - filter VM faults
259 *
260 * @adev: amdgpu device structure
261 * @addr: address of the VM fault
262 * @pasid: PASID of the process causing the fault
263 * @timestamp: timestamp of the fault
264 *
265 * Returns:
266 * True if the fault was filtered and should not be processed further.
267 * False if the fault is a new one and needs to be handled.
268 */
amdgpu_gmc_filter_faults(struct amdgpu_device * adev,uint64_t addr,uint16_t pasid,uint64_t timestamp)269 bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
270 uint16_t pasid, uint64_t timestamp)
271 {
272 struct amdgpu_gmc *gmc = &adev->gmc;
273
274 uint64_t stamp, key = addr << 4 | pasid;
275 struct amdgpu_gmc_fault *fault;
276 uint32_t hash;
277
278 /* If we don't have space left in the ring buffer return immediately */
279 stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
280 AMDGPU_GMC_FAULT_TIMEOUT;
281 if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
282 return true;
283
284 /* Try to find the fault in the hash */
285 hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
286 fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
287 while (fault->timestamp >= stamp) {
288 uint64_t tmp;
289
290 if (fault->key == key)
291 return true;
292
293 tmp = fault->timestamp;
294 fault = &gmc->fault_ring[fault->next];
295
296 /* Check if the entry was reused */
297 if (fault->timestamp >= tmp)
298 break;
299 }
300
301 /* Add the fault to the ring */
302 fault = &gmc->fault_ring[gmc->last_fault];
303 fault->key = key;
304 fault->timestamp = timestamp;
305
306 /* And update the hash */
307 fault->next = gmc->fault_hash[hash].idx;
308 gmc->fault_hash[hash].idx = gmc->last_fault++;
309 return false;
310 }
311
amdgpu_gmc_ras_late_init(struct amdgpu_device * adev)312 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
313 {
314 int r;
315
316 if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
317 r = adev->umc.funcs->ras_late_init(adev);
318 if (r)
319 return r;
320 }
321
322 if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) {
323 r = adev->mmhub.funcs->ras_late_init(adev);
324 if (r)
325 return r;
326 }
327
328 return amdgpu_xgmi_ras_late_init(adev);
329 }
330
amdgpu_gmc_ras_fini(struct amdgpu_device * adev)331 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
332 {
333 amdgpu_umc_ras_fini(adev);
334 amdgpu_mmhub_ras_fini(adev);
335 amdgpu_xgmi_ras_fini(adev);
336 }
337
338 /*
339 * The latest engine allocation on gfx9/10 is:
340 * Engine 2, 3: firmware
341 * Engine 0, 1, 4~16: amdgpu ring,
342 * subject to change when ring number changes
343 * Engine 17: Gart flushes
344 */
345 #define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
346 #define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
347
amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device * adev)348 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
349 {
350 struct amdgpu_ring *ring;
351 unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
352 {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
353 GFXHUB_FREE_VM_INV_ENGS_BITMAP};
354 unsigned i;
355 unsigned vmhub, inv_eng;
356
357 for (i = 0; i < adev->num_rings; ++i) {
358 ring = adev->rings[i];
359 vmhub = ring->funcs->vmhub;
360
361 if (ring == &adev->mes.ring)
362 continue;
363
364 inv_eng = ffs(vm_inv_engs[vmhub]);
365 if (!inv_eng) {
366 dev_err(adev->dev, "no VM inv eng for ring %s\n",
367 ring->name);
368 return -EINVAL;
369 }
370
371 ring->vm_inv_eng = inv_eng - 1;
372 vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
373
374 dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
375 ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
376 }
377
378 return 0;
379 }
380
381 /**
382 * amdgpu_tmz_set -- check and set if a device supports TMZ
383 * @adev: amdgpu_device pointer
384 *
385 * Check and set if an the device @adev supports Trusted Memory
386 * Zones (TMZ).
387 */
amdgpu_gmc_tmz_set(struct amdgpu_device * adev)388 void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
389 {
390 switch (adev->asic_type) {
391 case CHIP_RAVEN:
392 case CHIP_RENOIR:
393 case CHIP_NAVI10:
394 case CHIP_NAVI14:
395 case CHIP_NAVI12:
396 /* Don't enable it by default yet.
397 */
398 if (amdgpu_tmz < 1) {
399 adev->gmc.tmz_enabled = false;
400 dev_info(adev->dev,
401 "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n");
402 } else {
403 adev->gmc.tmz_enabled = true;
404 dev_info(adev->dev,
405 "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n");
406 }
407 break;
408 default:
409 adev->gmc.tmz_enabled = false;
410 dev_warn(adev->dev,
411 "Trusted Memory Zone (TMZ) feature not supported\n");
412 break;
413 }
414 }
415
416 /**
417 * amdgpu_noretry_set -- set per asic noretry defaults
418 * @adev: amdgpu_device pointer
419 *
420 * Set a per asic default for the no-retry parameter.
421 *
422 */
amdgpu_gmc_noretry_set(struct amdgpu_device * adev)423 void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
424 {
425 struct amdgpu_gmc *gmc = &adev->gmc;
426
427 switch (adev->asic_type) {
428 case CHIP_RAVEN:
429 /* Raven currently has issues with noretry
430 * regardless of what we decide for other
431 * asics, we should leave raven with
432 * noretry = 0 until we root cause the
433 * issues.
434 */
435 if (amdgpu_noretry == -1)
436 gmc->noretry = 0;
437 else
438 gmc->noretry = amdgpu_noretry;
439 break;
440 default:
441 /* default this to 0 for now, but we may want
442 * to change this in the future for certain
443 * GPUs as it can increase performance in
444 * certain cases.
445 */
446 if (amdgpu_noretry == -1)
447 gmc->noretry = 0;
448 else
449 gmc->noretry = amdgpu_noretry;
450 break;
451 }
452 }
453
amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device * adev,int hub_type,bool enable)454 void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
455 bool enable)
456 {
457 struct amdgpu_vmhub *hub;
458 u32 tmp, reg, i;
459
460 hub = &adev->vmhub[hub_type];
461 for (i = 0; i < 16; i++) {
462 reg = hub->vm_context0_cntl + hub->ctx_distance * i;
463
464 tmp = RREG32(reg);
465 if (enable)
466 tmp |= hub->vm_cntx_cntl_vm_fault;
467 else
468 tmp &= ~hub->vm_cntx_cntl_vm_fault;
469
470 WREG32(reg, tmp);
471 }
472 }
473
amdgpu_gmc_get_vbios_allocations(struct amdgpu_device * adev)474 void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
475 {
476 unsigned size;
477
478 /*
479 * TODO:
480 * Currently there is a bug where some memory client outside
481 * of the driver writes to first 8M of VRAM on S3 resume,
482 * this overrides GART which by default gets placed in first 8M and
483 * causes VM_FAULTS once GTT is accessed.
484 * Keep the stolen memory reservation until the while this is not solved.
485 */
486 switch (adev->asic_type) {
487 case CHIP_VEGA10:
488 case CHIP_RAVEN:
489 case CHIP_RENOIR:
490 adev->mman.keep_stolen_vga_memory = true;
491 break;
492 default:
493 adev->mman.keep_stolen_vga_memory = false;
494 break;
495 }
496
497 if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) {
498 size = 0;
499 } else {
500 size = amdgpu_gmc_get_vbios_fb_size(adev);
501
502 if (adev->mman.keep_stolen_vga_memory)
503 size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION);
504 }
505
506 /* set to 0 if the pre-OS buffer uses up most of vram */
507 if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
508 size = 0;
509
510 if (size > AMDGPU_VBIOS_VGA_ALLOCATION) {
511 adev->mman.stolen_vga_size = AMDGPU_VBIOS_VGA_ALLOCATION;
512 adev->mman.stolen_extended_size = size - adev->mman.stolen_vga_size;
513 } else {
514 adev->mman.stolen_vga_size = size;
515 adev->mman.stolen_extended_size = 0;
516 }
517 }
518