1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25
26 #include <linux/firmware.h>
27 #include <linux/pm_runtime.h>
28
29 #include "amdgpu.h"
30 #include "amdgpu_gfx.h"
31 #include "amdgpu_rlc.h"
32 #include "amdgpu_ras.h"
33 #include "amdgpu_reset.h"
34 #include "amdgpu_xcp.h"
35 #include "amdgpu_xgmi.h"
36
37 /* delay 0.1 second to enable gfx off feature */
38 #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
39
40 #define GFX_OFF_NO_DELAY 0
41
42 /*
43 * GPU GFX IP block helpers function.
44 */
45
amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device * adev,int mec,int pipe,int queue)46 int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
47 int pipe, int queue)
48 {
49 int bit = 0;
50
51 bit += mec * adev->gfx.mec.num_pipe_per_mec
52 * adev->gfx.mec.num_queue_per_pipe;
53 bit += pipe * adev->gfx.mec.num_queue_per_pipe;
54 bit += queue;
55
56 return bit;
57 }
58
amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device * adev,int bit,int * mec,int * pipe,int * queue)59 void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
60 int *mec, int *pipe, int *queue)
61 {
62 *queue = bit % adev->gfx.mec.num_queue_per_pipe;
63 *pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
64 % adev->gfx.mec.num_pipe_per_mec;
65 *mec = (bit / adev->gfx.mec.num_queue_per_pipe)
66 / adev->gfx.mec.num_pipe_per_mec;
67
68 }
69
amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device * adev,int xcc_id,int mec,int pipe,int queue)70 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
71 int xcc_id, int mec, int pipe, int queue)
72 {
73 return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
74 adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
75 }
76
amdgpu_gfx_me_queue_to_bit(struct amdgpu_device * adev,int me,int pipe,int queue)77 int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
78 int me, int pipe, int queue)
79 {
80 int bit = 0;
81
82 bit += me * adev->gfx.me.num_pipe_per_me
83 * adev->gfx.me.num_queue_per_pipe;
84 bit += pipe * adev->gfx.me.num_queue_per_pipe;
85 bit += queue;
86
87 return bit;
88 }
89
amdgpu_gfx_bit_to_me_queue(struct amdgpu_device * adev,int bit,int * me,int * pipe,int * queue)90 void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
91 int *me, int *pipe, int *queue)
92 {
93 *queue = bit % adev->gfx.me.num_queue_per_pipe;
94 *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
95 % adev->gfx.me.num_pipe_per_me;
96 *me = (bit / adev->gfx.me.num_queue_per_pipe)
97 / adev->gfx.me.num_pipe_per_me;
98 }
99
amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device * adev,int me,int pipe,int queue)100 bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
101 int me, int pipe, int queue)
102 {
103 return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
104 adev->gfx.me.queue_bitmap);
105 }
106
107 /**
108 * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
109 *
110 * @mask: array in which the per-shader array disable masks will be stored
111 * @max_se: number of SEs
112 * @max_sh: number of SHs
113 *
114 * The bitmask of CUs to be disabled in the shader array determined by se and
115 * sh is stored in mask[se * max_sh + sh].
116 */
amdgpu_gfx_parse_disable_cu(unsigned int * mask,unsigned int max_se,unsigned int max_sh)117 void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
118 {
119 unsigned int se, sh, cu;
120 const char *p;
121
122 memset(mask, 0, sizeof(*mask) * max_se * max_sh);
123
124 if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
125 return;
126
127 p = amdgpu_disable_cu;
128 for (;;) {
129 char *next;
130 int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
131
132 if (ret < 3) {
133 DRM_ERROR("amdgpu: could not parse disable_cu\n");
134 return;
135 }
136
137 if (se < max_se && sh < max_sh && cu < 16) {
138 DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
139 mask[se * max_sh + sh] |= 1u << cu;
140 } else {
141 DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
142 se, sh, cu);
143 }
144
145 next = strchr(p, ',');
146 if (!next)
147 break;
148 p = next + 1;
149 }
150 }
151
amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device * adev)152 static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
153 {
154 return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
155 }
156
amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device * adev)157 static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
158 {
159 if (amdgpu_compute_multipipe != -1) {
160 DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
161 amdgpu_compute_multipipe);
162 return amdgpu_compute_multipipe == 1;
163 }
164
165 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
166 return true;
167
168 /* FIXME: spreading the queues across pipes causes perf regressions
169 * on POLARIS11 compute workloads */
170 if (adev->asic_type == CHIP_POLARIS11)
171 return false;
172
173 return adev->gfx.mec.num_mec > 1;
174 }
175
amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)176 bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
177 struct amdgpu_ring *ring)
178 {
179 int queue = ring->queue;
180 int pipe = ring->pipe;
181
182 /* Policy: use pipe1 queue0 as high priority graphics queue if we
183 * have more than one gfx pipe.
184 */
185 if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
186 adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
187 int me = ring->me;
188 int bit;
189
190 bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
191 if (ring == &adev->gfx.gfx_ring[bit])
192 return true;
193 }
194
195 return false;
196 }
197
amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)198 bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
199 struct amdgpu_ring *ring)
200 {
201 /* Policy: use 1st queue as high priority compute queue if we
202 * have more than one compute queue.
203 */
204 if (adev->gfx.num_compute_rings > 1 &&
205 ring == &adev->gfx.compute_ring[0])
206 return true;
207
208 return false;
209 }
210
amdgpu_gfx_compute_queue_acquire(struct amdgpu_device * adev)211 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
212 {
213 int i, j, queue, pipe;
214 bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
215 int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
216 adev->gfx.mec.num_queue_per_pipe,
217 adev->gfx.num_compute_rings);
218 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
219
220 if (multipipe_policy) {
221 /* policy: make queues evenly cross all pipes on MEC1 only
222 * for multiple xcc, just use the original policy for simplicity */
223 for (j = 0; j < num_xcc; j++) {
224 for (i = 0; i < max_queues_per_mec; i++) {
225 pipe = i % adev->gfx.mec.num_pipe_per_mec;
226 queue = (i / adev->gfx.mec.num_pipe_per_mec) %
227 adev->gfx.mec.num_queue_per_pipe;
228
229 set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
230 adev->gfx.mec_bitmap[j].queue_bitmap);
231 }
232 }
233 } else {
234 /* policy: amdgpu owns all queues in the given pipe */
235 for (j = 0; j < num_xcc; j++) {
236 for (i = 0; i < max_queues_per_mec; ++i)
237 set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
238 }
239 }
240
241 for (j = 0; j < num_xcc; j++) {
242 dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
243 bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
244 }
245 }
246
amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device * adev)247 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
248 {
249 int i, queue, pipe;
250 bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
251 int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
252 adev->gfx.me.num_queue_per_pipe;
253
254 if (multipipe_policy) {
255 /* policy: amdgpu owns the first queue per pipe at this stage
256 * will extend to mulitple queues per pipe later */
257 for (i = 0; i < max_queues_per_me; i++) {
258 pipe = i % adev->gfx.me.num_pipe_per_me;
259 queue = (i / adev->gfx.me.num_pipe_per_me) %
260 adev->gfx.me.num_queue_per_pipe;
261
262 set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
263 adev->gfx.me.queue_bitmap);
264 }
265 } else {
266 for (i = 0; i < max_queues_per_me; ++i)
267 set_bit(i, adev->gfx.me.queue_bitmap);
268 }
269
270 /* update the number of active graphics rings */
271 adev->gfx.num_gfx_rings =
272 bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
273 }
274
amdgpu_gfx_kiq_acquire(struct amdgpu_device * adev,struct amdgpu_ring * ring,int xcc_id)275 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
276 struct amdgpu_ring *ring, int xcc_id)
277 {
278 int queue_bit;
279 int mec, pipe, queue;
280
281 queue_bit = adev->gfx.mec.num_mec
282 * adev->gfx.mec.num_pipe_per_mec
283 * adev->gfx.mec.num_queue_per_pipe;
284
285 while (--queue_bit >= 0) {
286 if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
287 continue;
288
289 amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
290
291 /*
292 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
293 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
294 * only can be issued on queue 0.
295 */
296 if ((mec == 1 && pipe > 1) || queue != 0)
297 continue;
298
299 ring->me = mec + 1;
300 ring->pipe = pipe;
301 ring->queue = queue;
302
303 return 0;
304 }
305
306 dev_err(adev->dev, "Failed to find a queue for KIQ\n");
307 return -EINVAL;
308 }
309
amdgpu_gfx_kiq_init_ring(struct amdgpu_device * adev,int xcc_id)310 int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
311 {
312 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
313 struct amdgpu_irq_src *irq = &kiq->irq;
314 struct amdgpu_ring *ring = &kiq->ring;
315 int r = 0;
316
317 spin_lock_init(&kiq->ring_lock);
318
319 ring->adev = NULL;
320 ring->ring_obj = NULL;
321 ring->use_doorbell = true;
322 ring->xcc_id = xcc_id;
323 ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
324 ring->doorbell_index =
325 (adev->doorbell_index.kiq +
326 xcc_id * adev->doorbell_index.xcc_doorbell_range)
327 << 1;
328
329 r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
330 if (r)
331 return r;
332
333 ring->eop_gpu_addr = kiq->eop_gpu_addr;
334 ring->no_scheduler = true;
335 snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
336 (unsigned char)xcc_id, (unsigned char)ring->me,
337 (unsigned char)ring->pipe, (unsigned char)ring->queue);
338 r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
339 AMDGPU_RING_PRIO_DEFAULT, NULL);
340 if (r)
341 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
342
343 return r;
344 }
345
amdgpu_gfx_kiq_free_ring(struct amdgpu_ring * ring)346 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
347 {
348 amdgpu_ring_fini(ring);
349 }
350
amdgpu_gfx_kiq_fini(struct amdgpu_device * adev,int xcc_id)351 void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
352 {
353 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
354
355 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
356 }
357
amdgpu_gfx_kiq_init(struct amdgpu_device * adev,unsigned int hpd_size,int xcc_id)358 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
359 unsigned int hpd_size, int xcc_id)
360 {
361 int r;
362 u32 *hpd;
363 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
364
365 r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
366 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
367 &kiq->eop_gpu_addr, (void **)&hpd);
368 if (r) {
369 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
370 return r;
371 }
372
373 memset(hpd, 0, hpd_size);
374
375 r = amdgpu_bo_reserve(kiq->eop_obj, true);
376 if (unlikely(r != 0))
377 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
378 amdgpu_bo_kunmap(kiq->eop_obj);
379 amdgpu_bo_unreserve(kiq->eop_obj);
380
381 return 0;
382 }
383
384 /* create MQD for each compute/gfx queue */
amdgpu_gfx_mqd_sw_init(struct amdgpu_device * adev,unsigned int mqd_size,int xcc_id)385 int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
386 unsigned int mqd_size, int xcc_id)
387 {
388 int r, i, j;
389 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
390 struct amdgpu_ring *ring = &kiq->ring;
391 u32 domain = AMDGPU_GEM_DOMAIN_GTT;
392
393 #if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
394 /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
395 if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
396 domain |= AMDGPU_GEM_DOMAIN_VRAM;
397 #endif
398
399 /* create MQD for KIQ */
400 if (!adev->enable_mes_kiq && !ring->mqd_obj) {
401 /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
402 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
403 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
404 * KIQ MQD no matter SRIOV or Bare-metal
405 */
406 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
407 AMDGPU_GEM_DOMAIN_VRAM |
408 AMDGPU_GEM_DOMAIN_GTT,
409 &ring->mqd_obj,
410 &ring->mqd_gpu_addr,
411 &ring->mqd_ptr);
412 if (r) {
413 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
414 return r;
415 }
416
417 /* prepare MQD backup */
418 kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
419 if (!kiq->mqd_backup) {
420 dev_warn(adev->dev,
421 "no memory to create MQD backup for ring %s\n", ring->name);
422 return -ENOMEM;
423 }
424 }
425
426 if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
427 /* create MQD for each KGQ */
428 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
429 ring = &adev->gfx.gfx_ring[i];
430 if (!ring->mqd_obj) {
431 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
432 domain, &ring->mqd_obj,
433 &ring->mqd_gpu_addr, &ring->mqd_ptr);
434 if (r) {
435 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
436 return r;
437 }
438
439 ring->mqd_size = mqd_size;
440 /* prepare MQD backup */
441 adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
442 if (!adev->gfx.me.mqd_backup[i]) {
443 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
444 return -ENOMEM;
445 }
446 }
447 }
448 }
449
450 /* create MQD for each KCQ */
451 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
452 j = i + xcc_id * adev->gfx.num_compute_rings;
453 ring = &adev->gfx.compute_ring[j];
454 if (!ring->mqd_obj) {
455 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
456 domain, &ring->mqd_obj,
457 &ring->mqd_gpu_addr, &ring->mqd_ptr);
458 if (r) {
459 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
460 return r;
461 }
462
463 ring->mqd_size = mqd_size;
464 /* prepare MQD backup */
465 adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
466 if (!adev->gfx.mec.mqd_backup[j]) {
467 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
468 return -ENOMEM;
469 }
470 }
471 }
472
473 return 0;
474 }
475
amdgpu_gfx_mqd_sw_fini(struct amdgpu_device * adev,int xcc_id)476 void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
477 {
478 struct amdgpu_ring *ring = NULL;
479 int i, j;
480 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
481
482 if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
483 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
484 ring = &adev->gfx.gfx_ring[i];
485 kfree(adev->gfx.me.mqd_backup[i]);
486 amdgpu_bo_free_kernel(&ring->mqd_obj,
487 &ring->mqd_gpu_addr,
488 &ring->mqd_ptr);
489 }
490 }
491
492 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
493 j = i + xcc_id * adev->gfx.num_compute_rings;
494 ring = &adev->gfx.compute_ring[j];
495 kfree(adev->gfx.mec.mqd_backup[j]);
496 amdgpu_bo_free_kernel(&ring->mqd_obj,
497 &ring->mqd_gpu_addr,
498 &ring->mqd_ptr);
499 }
500
501 ring = &kiq->ring;
502 kfree(kiq->mqd_backup);
503 amdgpu_bo_free_kernel(&ring->mqd_obj,
504 &ring->mqd_gpu_addr,
505 &ring->mqd_ptr);
506 }
507
amdgpu_gfx_disable_kcq(struct amdgpu_device * adev,int xcc_id)508 int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
509 {
510 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
511 struct amdgpu_ring *kiq_ring = &kiq->ring;
512 int i, r = 0;
513 int j;
514
515 if (adev->enable_mes) {
516 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
517 j = i + xcc_id * adev->gfx.num_compute_rings;
518 amdgpu_mes_unmap_legacy_queue(adev,
519 &adev->gfx.compute_ring[j],
520 RESET_QUEUES, 0, 0);
521 }
522 return 0;
523 }
524
525 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
526 return -EINVAL;
527
528 if (!kiq_ring->sched.ready || adev->job_hang)
529 return 0;
530 /**
531 * This is workaround: only skip kiq_ring test
532 * during ras recovery in suspend stage for gfx9.4.3
533 */
534 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
535 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
536 amdgpu_ras_in_recovery(adev))
537 return 0;
538
539 spin_lock(&kiq->ring_lock);
540 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
541 adev->gfx.num_compute_rings)) {
542 spin_unlock(&kiq->ring_lock);
543 return -ENOMEM;
544 }
545
546 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
547 j = i + xcc_id * adev->gfx.num_compute_rings;
548 kiq->pmf->kiq_unmap_queues(kiq_ring,
549 &adev->gfx.compute_ring[j],
550 RESET_QUEUES, 0, 0);
551 }
552 /* Submit unmap queue packet */
553 amdgpu_ring_commit(kiq_ring);
554 /*
555 * Ring test will do a basic scratch register change check. Just run
556 * this to ensure that unmap queues that is submitted before got
557 * processed successfully before returning.
558 */
559 r = amdgpu_ring_test_helper(kiq_ring);
560
561 spin_unlock(&kiq->ring_lock);
562
563 return r;
564 }
565
amdgpu_gfx_disable_kgq(struct amdgpu_device * adev,int xcc_id)566 int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
567 {
568 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
569 struct amdgpu_ring *kiq_ring = &kiq->ring;
570 int i, r = 0;
571 int j;
572
573 if (adev->enable_mes) {
574 if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
575 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
576 j = i + xcc_id * adev->gfx.num_gfx_rings;
577 amdgpu_mes_unmap_legacy_queue(adev,
578 &adev->gfx.gfx_ring[j],
579 PREEMPT_QUEUES, 0, 0);
580 }
581 }
582 return 0;
583 }
584
585 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
586 return -EINVAL;
587
588 if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang)
589 return 0;
590
591 if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
592 spin_lock(&kiq->ring_lock);
593 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
594 adev->gfx.num_gfx_rings)) {
595 spin_unlock(&kiq->ring_lock);
596 return -ENOMEM;
597 }
598
599 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
600 j = i + xcc_id * adev->gfx.num_gfx_rings;
601 kiq->pmf->kiq_unmap_queues(kiq_ring,
602 &adev->gfx.gfx_ring[j],
603 PREEMPT_QUEUES, 0, 0);
604 }
605 /* Submit unmap queue packet */
606 amdgpu_ring_commit(kiq_ring);
607
608 /*
609 * Ring test will do a basic scratch register change check.
610 * Just run this to ensure that unmap queues that is submitted
611 * before got processed successfully before returning.
612 */
613 r = amdgpu_ring_test_helper(kiq_ring);
614 spin_unlock(&kiq->ring_lock);
615 }
616
617 return r;
618 }
619
amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device * adev,int queue_bit)620 int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
621 int queue_bit)
622 {
623 int mec, pipe, queue;
624 int set_resource_bit = 0;
625
626 amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
627
628 set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
629
630 return set_resource_bit;
631 }
632
amdgpu_gfx_mes_enable_kcq(struct amdgpu_device * adev,int xcc_id)633 static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
634 {
635 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
636 struct amdgpu_ring *kiq_ring = &kiq->ring;
637 uint64_t queue_mask = ~0ULL;
638 int r, i, j;
639
640 amdgpu_device_flush_hdp(adev, NULL);
641
642 if (!adev->enable_uni_mes) {
643 spin_lock(&kiq->ring_lock);
644 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
645 if (r) {
646 dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
647 spin_unlock(&kiq->ring_lock);
648 return r;
649 }
650
651 kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
652 r = amdgpu_ring_test_helper(kiq_ring);
653 spin_unlock(&kiq->ring_lock);
654 if (r)
655 dev_err(adev->dev, "KIQ failed to set resources\n");
656 }
657
658 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
659 j = i + xcc_id * adev->gfx.num_compute_rings;
660 r = amdgpu_mes_map_legacy_queue(adev,
661 &adev->gfx.compute_ring[j]);
662 if (r) {
663 dev_err(adev->dev, "failed to map compute queue\n");
664 return r;
665 }
666 }
667
668 return 0;
669 }
670
amdgpu_gfx_enable_kcq(struct amdgpu_device * adev,int xcc_id)671 int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
672 {
673 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
674 struct amdgpu_ring *kiq_ring = &kiq->ring;
675 uint64_t queue_mask = 0;
676 int r, i, j;
677
678 if (adev->mes.enable_legacy_queue_map)
679 return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
680
681 if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
682 return -EINVAL;
683
684 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
685 if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
686 continue;
687
688 /* This situation may be hit in the future if a new HW
689 * generation exposes more than 64 queues. If so, the
690 * definition of queue_mask needs updating */
691 if (WARN_ON(i > (sizeof(queue_mask)*8))) {
692 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
693 break;
694 }
695
696 queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
697 }
698
699 amdgpu_device_flush_hdp(adev, NULL);
700
701 DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
702 kiq_ring->queue);
703
704 spin_lock(&kiq->ring_lock);
705 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
706 adev->gfx.num_compute_rings +
707 kiq->pmf->set_resources_size);
708 if (r) {
709 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
710 spin_unlock(&kiq->ring_lock);
711 return r;
712 }
713
714 kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
715 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
716 j = i + xcc_id * adev->gfx.num_compute_rings;
717 kiq->pmf->kiq_map_queues(kiq_ring,
718 &adev->gfx.compute_ring[j]);
719 }
720 /* Submit map queue packet */
721 amdgpu_ring_commit(kiq_ring);
722 /*
723 * Ring test will do a basic scratch register change check. Just run
724 * this to ensure that map queues that is submitted before got
725 * processed successfully before returning.
726 */
727 r = amdgpu_ring_test_helper(kiq_ring);
728 spin_unlock(&kiq->ring_lock);
729 if (r)
730 DRM_ERROR("KCQ enable failed\n");
731
732 return r;
733 }
734
amdgpu_gfx_enable_kgq(struct amdgpu_device * adev,int xcc_id)735 int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
736 {
737 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
738 struct amdgpu_ring *kiq_ring = &kiq->ring;
739 int r, i, j;
740
741 if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
742 return -EINVAL;
743
744 amdgpu_device_flush_hdp(adev, NULL);
745
746 if (adev->mes.enable_legacy_queue_map) {
747 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
748 j = i + xcc_id * adev->gfx.num_gfx_rings;
749 r = amdgpu_mes_map_legacy_queue(adev,
750 &adev->gfx.gfx_ring[j]);
751 if (r) {
752 DRM_ERROR("failed to map gfx queue\n");
753 return r;
754 }
755 }
756
757 return 0;
758 }
759
760 spin_lock(&kiq->ring_lock);
761 /* No need to map kcq on the slave */
762 if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
763 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
764 adev->gfx.num_gfx_rings);
765 if (r) {
766 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
767 spin_unlock(&kiq->ring_lock);
768 return r;
769 }
770
771 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
772 j = i + xcc_id * adev->gfx.num_gfx_rings;
773 kiq->pmf->kiq_map_queues(kiq_ring,
774 &adev->gfx.gfx_ring[j]);
775 }
776 }
777 /* Submit map queue packet */
778 amdgpu_ring_commit(kiq_ring);
779 /*
780 * Ring test will do a basic scratch register change check. Just run
781 * this to ensure that map queues that is submitted before got
782 * processed successfully before returning.
783 */
784 r = amdgpu_ring_test_helper(kiq_ring);
785 spin_unlock(&kiq->ring_lock);
786 if (r)
787 DRM_ERROR("KGQ enable failed\n");
788
789 return r;
790 }
791
792 /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
793 *
794 * @adev: amdgpu_device pointer
795 * @bool enable true: enable gfx off feature, false: disable gfx off feature
796 *
797 * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
798 * 2. other client can send request to disable gfx off feature, the request should be honored.
799 * 3. other client can cancel their request of disable gfx off feature
800 * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
801 */
802
amdgpu_gfx_off_ctrl(struct amdgpu_device * adev,bool enable)803 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
804 {
805 unsigned long delay = GFX_OFF_DELAY_ENABLE;
806
807 if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
808 return;
809
810 mutex_lock(&adev->gfx.gfx_off_mutex);
811
812 if (enable) {
813 /* If the count is already 0, it means there's an imbalance bug somewhere.
814 * Note that the bug may be in a different caller than the one which triggers the
815 * WARN_ON_ONCE.
816 */
817 if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
818 goto unlock;
819
820 adev->gfx.gfx_off_req_count--;
821
822 if (adev->gfx.gfx_off_req_count == 0 &&
823 !adev->gfx.gfx_off_state) {
824 /* If going to s2idle, no need to wait */
825 if (adev->in_s0ix) {
826 if (!amdgpu_dpm_set_powergating_by_smu(adev,
827 AMD_IP_BLOCK_TYPE_GFX, true))
828 adev->gfx.gfx_off_state = true;
829 } else {
830 schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
831 delay);
832 }
833 }
834 } else {
835 if (adev->gfx.gfx_off_req_count == 0) {
836 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
837
838 if (adev->gfx.gfx_off_state &&
839 !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
840 adev->gfx.gfx_off_state = false;
841
842 if (adev->gfx.funcs->init_spm_golden) {
843 dev_dbg(adev->dev,
844 "GFXOFF is disabled, re-init SPM golden settings\n");
845 amdgpu_gfx_init_spm_golden(adev);
846 }
847 }
848 }
849
850 adev->gfx.gfx_off_req_count++;
851 }
852
853 unlock:
854 mutex_unlock(&adev->gfx.gfx_off_mutex);
855 }
856
amdgpu_set_gfx_off_residency(struct amdgpu_device * adev,bool value)857 int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
858 {
859 int r = 0;
860
861 mutex_lock(&adev->gfx.gfx_off_mutex);
862
863 r = amdgpu_dpm_set_residency_gfxoff(adev, value);
864
865 mutex_unlock(&adev->gfx.gfx_off_mutex);
866
867 return r;
868 }
869
amdgpu_get_gfx_off_residency(struct amdgpu_device * adev,u32 * value)870 int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
871 {
872 int r = 0;
873
874 mutex_lock(&adev->gfx.gfx_off_mutex);
875
876 r = amdgpu_dpm_get_residency_gfxoff(adev, value);
877
878 mutex_unlock(&adev->gfx.gfx_off_mutex);
879
880 return r;
881 }
882
amdgpu_get_gfx_off_entrycount(struct amdgpu_device * adev,u64 * value)883 int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
884 {
885 int r = 0;
886
887 mutex_lock(&adev->gfx.gfx_off_mutex);
888
889 r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
890
891 mutex_unlock(&adev->gfx.gfx_off_mutex);
892
893 return r;
894 }
895
amdgpu_get_gfx_off_status(struct amdgpu_device * adev,uint32_t * value)896 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
897 {
898
899 int r = 0;
900
901 mutex_lock(&adev->gfx.gfx_off_mutex);
902
903 r = amdgpu_dpm_get_status_gfxoff(adev, value);
904
905 mutex_unlock(&adev->gfx.gfx_off_mutex);
906
907 return r;
908 }
909
amdgpu_gfx_ras_late_init(struct amdgpu_device * adev,struct ras_common_if * ras_block)910 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
911 {
912 int r;
913
914 if (amdgpu_ras_is_supported(adev, ras_block->block)) {
915 if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
916 r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
917 if (r)
918 return r;
919 }
920
921 r = amdgpu_ras_block_late_init(adev, ras_block);
922 if (r)
923 return r;
924
925 if (adev->gfx.cp_ecc_error_irq.funcs) {
926 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
927 if (r)
928 goto late_fini;
929 }
930 } else {
931 amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
932 }
933
934 return 0;
935 late_fini:
936 amdgpu_ras_block_late_fini(adev, ras_block);
937 return r;
938 }
939
amdgpu_gfx_ras_sw_init(struct amdgpu_device * adev)940 int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
941 {
942 int err = 0;
943 struct amdgpu_gfx_ras *ras = NULL;
944
945 /* adev->gfx.ras is NULL, which means gfx does not
946 * support ras function, then do nothing here.
947 */
948 if (!adev->gfx.ras)
949 return 0;
950
951 ras = adev->gfx.ras;
952
953 err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
954 if (err) {
955 dev_err(adev->dev, "Failed to register gfx ras block!\n");
956 return err;
957 }
958
959 strcpy(ras->ras_block.ras_comm.name, "gfx");
960 ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
961 ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
962 adev->gfx.ras_if = &ras->ras_block.ras_comm;
963
964 /* If not define special ras_late_init function, use gfx default ras_late_init */
965 if (!ras->ras_block.ras_late_init)
966 ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
967
968 /* If not defined special ras_cb function, use default ras_cb */
969 if (!ras->ras_block.ras_cb)
970 ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
971
972 return 0;
973 }
974
amdgpu_gfx_poison_consumption_handler(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)975 int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
976 struct amdgpu_iv_entry *entry)
977 {
978 if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
979 return adev->gfx.ras->poison_consumption_handler(adev, entry);
980
981 return 0;
982 }
983
amdgpu_gfx_process_ras_data_cb(struct amdgpu_device * adev,void * err_data,struct amdgpu_iv_entry * entry)984 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
985 void *err_data,
986 struct amdgpu_iv_entry *entry)
987 {
988 /* TODO ue will trigger an interrupt.
989 *
990 * When “Full RAS” is enabled, the per-IP interrupt sources should
991 * be disabled and the driver should only look for the aggregated
992 * interrupt via sync flood
993 */
994 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
995 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
996 if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
997 adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
998 adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
999 amdgpu_ras_reset_gpu(adev);
1000 }
1001 return AMDGPU_RAS_SUCCESS;
1002 }
1003
amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1004 int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
1005 struct amdgpu_irq_src *source,
1006 struct amdgpu_iv_entry *entry)
1007 {
1008 struct ras_common_if *ras_if = adev->gfx.ras_if;
1009 struct ras_dispatch_if ih_data = {
1010 .entry = entry,
1011 };
1012
1013 if (!ras_if)
1014 return 0;
1015
1016 ih_data.head = *ras_if;
1017
1018 DRM_ERROR("CP ECC ERROR IRQ\n");
1019 amdgpu_ras_interrupt_dispatch(adev, &ih_data);
1020 return 0;
1021 }
1022
amdgpu_gfx_ras_error_func(struct amdgpu_device * adev,void * ras_error_status,void (* func)(struct amdgpu_device * adev,void * ras_error_status,int xcc_id))1023 void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
1024 void *ras_error_status,
1025 void (*func)(struct amdgpu_device *adev, void *ras_error_status,
1026 int xcc_id))
1027 {
1028 int i;
1029 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
1030 uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
1031 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
1032
1033 if (err_data) {
1034 err_data->ue_count = 0;
1035 err_data->ce_count = 0;
1036 }
1037
1038 for_each_inst(i, xcc_mask)
1039 func(adev, ras_error_status, i);
1040 }
1041
amdgpu_kiq_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t xcc_id)1042 uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
1043 {
1044 signed long r, cnt = 0;
1045 unsigned long flags;
1046 uint32_t seq, reg_val_offs = 0, value = 0;
1047 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1048 struct amdgpu_ring *ring = &kiq->ring;
1049
1050 if (amdgpu_device_skip_hw_access(adev))
1051 return 0;
1052
1053 if (adev->mes.ring[0].sched.ready)
1054 return amdgpu_mes_rreg(adev, reg);
1055
1056 BUG_ON(!ring->funcs->emit_rreg);
1057
1058 spin_lock_irqsave(&kiq->ring_lock, flags);
1059 if (amdgpu_device_wb_get(adev, ®_val_offs)) {
1060 pr_err("critical bug! too many kiq readers\n");
1061 goto failed_unlock;
1062 }
1063 r = amdgpu_ring_alloc(ring, 32);
1064 if (r)
1065 goto failed_unlock;
1066
1067 amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
1068 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1069 if (r)
1070 goto failed_undo;
1071
1072 amdgpu_ring_commit(ring);
1073 spin_unlock_irqrestore(&kiq->ring_lock, flags);
1074
1075 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1076
1077 /* don't wait anymore for gpu reset case because this way may
1078 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1079 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1080 * never return if we keep waiting in virt_kiq_rreg, which cause
1081 * gpu_recover() hang there.
1082 *
1083 * also don't wait anymore for IRQ context
1084 * */
1085 if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1086 goto failed_kiq_read;
1087
1088 might_sleep();
1089 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1090 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1091 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1092 }
1093
1094 if (cnt > MAX_KIQ_REG_TRY)
1095 goto failed_kiq_read;
1096
1097 mb();
1098 value = adev->wb.wb[reg_val_offs];
1099 amdgpu_device_wb_free(adev, reg_val_offs);
1100 return value;
1101
1102 failed_undo:
1103 amdgpu_ring_undo(ring);
1104 failed_unlock:
1105 spin_unlock_irqrestore(&kiq->ring_lock, flags);
1106 failed_kiq_read:
1107 if (reg_val_offs)
1108 amdgpu_device_wb_free(adev, reg_val_offs);
1109 dev_err(adev->dev, "failed to read reg:%x\n", reg);
1110 return ~0;
1111 }
1112
amdgpu_kiq_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t xcc_id)1113 void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
1114 {
1115 signed long r, cnt = 0;
1116 unsigned long flags;
1117 uint32_t seq;
1118 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1119 struct amdgpu_ring *ring = &kiq->ring;
1120
1121 BUG_ON(!ring->funcs->emit_wreg);
1122
1123 if (amdgpu_device_skip_hw_access(adev))
1124 return;
1125
1126 if (adev->mes.ring[0].sched.ready) {
1127 amdgpu_mes_wreg(adev, reg, v);
1128 return;
1129 }
1130
1131 spin_lock_irqsave(&kiq->ring_lock, flags);
1132 r = amdgpu_ring_alloc(ring, 32);
1133 if (r)
1134 goto failed_unlock;
1135
1136 amdgpu_ring_emit_wreg(ring, reg, v);
1137 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1138 if (r)
1139 goto failed_undo;
1140
1141 amdgpu_ring_commit(ring);
1142 spin_unlock_irqrestore(&kiq->ring_lock, flags);
1143
1144 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1145
1146 /* don't wait anymore for gpu reset case because this way may
1147 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1148 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1149 * never return if we keep waiting in virt_kiq_rreg, which cause
1150 * gpu_recover() hang there.
1151 *
1152 * also don't wait anymore for IRQ context
1153 * */
1154 if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1155 goto failed_kiq_write;
1156
1157 might_sleep();
1158 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1159
1160 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1161 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1162 }
1163
1164 if (cnt > MAX_KIQ_REG_TRY)
1165 goto failed_kiq_write;
1166
1167 return;
1168
1169 failed_undo:
1170 amdgpu_ring_undo(ring);
1171 failed_unlock:
1172 spin_unlock_irqrestore(&kiq->ring_lock, flags);
1173 failed_kiq_write:
1174 dev_err(adev->dev, "failed to write reg:%x\n", reg);
1175 }
1176
amdgpu_gfx_get_num_kcq(struct amdgpu_device * adev)1177 int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
1178 {
1179 if (amdgpu_num_kcq == -1) {
1180 return 8;
1181 } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
1182 dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
1183 return 8;
1184 }
1185 return amdgpu_num_kcq;
1186 }
1187
amdgpu_gfx_cp_init_microcode(struct amdgpu_device * adev,uint32_t ucode_id)1188 void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
1189 uint32_t ucode_id)
1190 {
1191 const struct gfx_firmware_header_v1_0 *cp_hdr;
1192 const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
1193 struct amdgpu_firmware_info *info = NULL;
1194 const struct firmware *ucode_fw;
1195 unsigned int fw_size;
1196
1197 switch (ucode_id) {
1198 case AMDGPU_UCODE_ID_CP_PFP:
1199 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1200 adev->gfx.pfp_fw->data;
1201 adev->gfx.pfp_fw_version =
1202 le32_to_cpu(cp_hdr->header.ucode_version);
1203 adev->gfx.pfp_feature_version =
1204 le32_to_cpu(cp_hdr->ucode_feature_version);
1205 ucode_fw = adev->gfx.pfp_fw;
1206 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1207 break;
1208 case AMDGPU_UCODE_ID_CP_RS64_PFP:
1209 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1210 adev->gfx.pfp_fw->data;
1211 adev->gfx.pfp_fw_version =
1212 le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1213 adev->gfx.pfp_feature_version =
1214 le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1215 ucode_fw = adev->gfx.pfp_fw;
1216 fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1217 break;
1218 case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
1219 case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
1220 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1221 adev->gfx.pfp_fw->data;
1222 ucode_fw = adev->gfx.pfp_fw;
1223 fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1224 break;
1225 case AMDGPU_UCODE_ID_CP_ME:
1226 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1227 adev->gfx.me_fw->data;
1228 adev->gfx.me_fw_version =
1229 le32_to_cpu(cp_hdr->header.ucode_version);
1230 adev->gfx.me_feature_version =
1231 le32_to_cpu(cp_hdr->ucode_feature_version);
1232 ucode_fw = adev->gfx.me_fw;
1233 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1234 break;
1235 case AMDGPU_UCODE_ID_CP_RS64_ME:
1236 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1237 adev->gfx.me_fw->data;
1238 adev->gfx.me_fw_version =
1239 le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1240 adev->gfx.me_feature_version =
1241 le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1242 ucode_fw = adev->gfx.me_fw;
1243 fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1244 break;
1245 case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
1246 case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
1247 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1248 adev->gfx.me_fw->data;
1249 ucode_fw = adev->gfx.me_fw;
1250 fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1251 break;
1252 case AMDGPU_UCODE_ID_CP_CE:
1253 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1254 adev->gfx.ce_fw->data;
1255 adev->gfx.ce_fw_version =
1256 le32_to_cpu(cp_hdr->header.ucode_version);
1257 adev->gfx.ce_feature_version =
1258 le32_to_cpu(cp_hdr->ucode_feature_version);
1259 ucode_fw = adev->gfx.ce_fw;
1260 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1261 break;
1262 case AMDGPU_UCODE_ID_CP_MEC1:
1263 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1264 adev->gfx.mec_fw->data;
1265 adev->gfx.mec_fw_version =
1266 le32_to_cpu(cp_hdr->header.ucode_version);
1267 adev->gfx.mec_feature_version =
1268 le32_to_cpu(cp_hdr->ucode_feature_version);
1269 ucode_fw = adev->gfx.mec_fw;
1270 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1271 le32_to_cpu(cp_hdr->jt_size) * 4;
1272 break;
1273 case AMDGPU_UCODE_ID_CP_MEC1_JT:
1274 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1275 adev->gfx.mec_fw->data;
1276 ucode_fw = adev->gfx.mec_fw;
1277 fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1278 break;
1279 case AMDGPU_UCODE_ID_CP_MEC2:
1280 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1281 adev->gfx.mec2_fw->data;
1282 adev->gfx.mec2_fw_version =
1283 le32_to_cpu(cp_hdr->header.ucode_version);
1284 adev->gfx.mec2_feature_version =
1285 le32_to_cpu(cp_hdr->ucode_feature_version);
1286 ucode_fw = adev->gfx.mec2_fw;
1287 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1288 le32_to_cpu(cp_hdr->jt_size) * 4;
1289 break;
1290 case AMDGPU_UCODE_ID_CP_MEC2_JT:
1291 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1292 adev->gfx.mec2_fw->data;
1293 ucode_fw = adev->gfx.mec2_fw;
1294 fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1295 break;
1296 case AMDGPU_UCODE_ID_CP_RS64_MEC:
1297 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1298 adev->gfx.mec_fw->data;
1299 adev->gfx.mec_fw_version =
1300 le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1301 adev->gfx.mec_feature_version =
1302 le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1303 ucode_fw = adev->gfx.mec_fw;
1304 fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1305 break;
1306 case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
1307 case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
1308 case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
1309 case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
1310 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1311 adev->gfx.mec_fw->data;
1312 ucode_fw = adev->gfx.mec_fw;
1313 fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1314 break;
1315 default:
1316 dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
1317 return;
1318 }
1319
1320 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1321 info = &adev->firmware.ucode[ucode_id];
1322 info->ucode_id = ucode_id;
1323 info->fw = ucode_fw;
1324 adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
1325 }
1326 }
1327
amdgpu_gfx_is_master_xcc(struct amdgpu_device * adev,int xcc_id)1328 bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
1329 {
1330 return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
1331 adev->gfx.num_xcc_per_xcp : 1));
1332 }
1333
amdgpu_gfx_get_current_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)1334 static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
1335 struct device_attribute *addr,
1336 char *buf)
1337 {
1338 struct drm_device *ddev = dev_get_drvdata(dev);
1339 struct amdgpu_device *adev = drm_to_adev(ddev);
1340 int mode;
1341
1342 mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
1343 AMDGPU_XCP_FL_NONE);
1344
1345 return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
1346 }
1347
amdgpu_gfx_set_compute_partition(struct device * dev,struct device_attribute * addr,const char * buf,size_t count)1348 static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
1349 struct device_attribute *addr,
1350 const char *buf, size_t count)
1351 {
1352 struct drm_device *ddev = dev_get_drvdata(dev);
1353 struct amdgpu_device *adev = drm_to_adev(ddev);
1354 enum amdgpu_gfx_partition mode;
1355 int ret = 0, num_xcc;
1356
1357 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1358 if (num_xcc % 2 != 0)
1359 return -EINVAL;
1360
1361 if (!strncasecmp("SPX", buf, strlen("SPX"))) {
1362 mode = AMDGPU_SPX_PARTITION_MODE;
1363 } else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
1364 /*
1365 * DPX mode needs AIDs to be in multiple of 2.
1366 * Each AID connects 2 XCCs.
1367 */
1368 if (num_xcc%4)
1369 return -EINVAL;
1370 mode = AMDGPU_DPX_PARTITION_MODE;
1371 } else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
1372 if (num_xcc != 6)
1373 return -EINVAL;
1374 mode = AMDGPU_TPX_PARTITION_MODE;
1375 } else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
1376 if (num_xcc != 8)
1377 return -EINVAL;
1378 mode = AMDGPU_QPX_PARTITION_MODE;
1379 } else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
1380 mode = AMDGPU_CPX_PARTITION_MODE;
1381 } else {
1382 return -EINVAL;
1383 }
1384
1385 ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
1386
1387 if (ret)
1388 return ret;
1389
1390 return count;
1391 }
1392
amdgpu_gfx_get_available_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)1393 static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
1394 struct device_attribute *addr,
1395 char *buf)
1396 {
1397 struct drm_device *ddev = dev_get_drvdata(dev);
1398 struct amdgpu_device *adev = drm_to_adev(ddev);
1399 char *supported_partition;
1400
1401 /* TBD */
1402 switch (NUM_XCC(adev->gfx.xcc_mask)) {
1403 case 8:
1404 supported_partition = "SPX, DPX, QPX, CPX";
1405 break;
1406 case 6:
1407 supported_partition = "SPX, TPX, CPX";
1408 break;
1409 case 4:
1410 supported_partition = "SPX, DPX, CPX";
1411 break;
1412 /* this seems only existing in emulation phase */
1413 case 2:
1414 supported_partition = "SPX, CPX";
1415 break;
1416 default:
1417 supported_partition = "Not supported";
1418 break;
1419 }
1420
1421 return sysfs_emit(buf, "%s\n", supported_partition);
1422 }
1423
amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring * ring)1424 static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
1425 {
1426 struct amdgpu_device *adev = ring->adev;
1427 struct drm_gpu_scheduler *sched = &ring->sched;
1428 struct drm_sched_entity entity;
1429 static atomic_t counter;
1430 struct dma_fence *f;
1431 struct amdgpu_job *job;
1432 struct amdgpu_ib *ib;
1433 void *owner;
1434 int i, r;
1435
1436 /* Initialize the scheduler entity */
1437 r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
1438 &sched, 1, NULL);
1439 if (r) {
1440 dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
1441 goto err;
1442 }
1443
1444 /*
1445 * Use some unique dummy value as the owner to make sure we execute
1446 * the cleaner shader on each submission. The value just need to change
1447 * for each submission and is otherwise meaningless.
1448 */
1449 owner = (void *)(unsigned long)atomic_inc_return(&counter);
1450
1451 r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
1452 64, 0, &job);
1453 if (r)
1454 goto err;
1455
1456 job->enforce_isolation = true;
1457
1458 ib = &job->ibs[0];
1459 for (i = 0; i <= ring->funcs->align_mask; ++i)
1460 ib->ptr[i] = ring->funcs->nop;
1461 ib->length_dw = ring->funcs->align_mask + 1;
1462
1463 f = amdgpu_job_submit(job);
1464
1465 r = dma_fence_wait(f, false);
1466 if (r)
1467 goto err;
1468
1469 dma_fence_put(f);
1470
1471 /* Clean up the scheduler entity */
1472 drm_sched_entity_destroy(&entity);
1473 return 0;
1474
1475 err:
1476 return r;
1477 }
1478
amdgpu_gfx_run_cleaner_shader(struct amdgpu_device * adev,int xcp_id)1479 static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
1480 {
1481 int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1482 struct amdgpu_ring *ring;
1483 int num_xcc_to_clear;
1484 int i, r, xcc_id;
1485
1486 if (adev->gfx.num_xcc_per_xcp)
1487 num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
1488 else
1489 num_xcc_to_clear = 1;
1490
1491 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1492 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1493 ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
1494 if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
1495 r = amdgpu_gfx_run_cleaner_shader_job(ring);
1496 if (r)
1497 return r;
1498 num_xcc_to_clear--;
1499 break;
1500 }
1501 }
1502 }
1503
1504 if (num_xcc_to_clear)
1505 return -ENOENT;
1506
1507 return 0;
1508 }
1509
amdgpu_gfx_set_run_cleaner_shader(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1510 static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
1511 struct device_attribute *attr,
1512 const char *buf,
1513 size_t count)
1514 {
1515 struct drm_device *ddev = dev_get_drvdata(dev);
1516 struct amdgpu_device *adev = drm_to_adev(ddev);
1517 int ret;
1518 long value;
1519
1520 if (amdgpu_in_reset(adev))
1521 return -EPERM;
1522 if (adev->in_suspend && !adev->in_runpm)
1523 return -EPERM;
1524
1525 ret = kstrtol(buf, 0, &value);
1526
1527 if (ret)
1528 return -EINVAL;
1529
1530 if (value < 0)
1531 return -EINVAL;
1532
1533 if (adev->xcp_mgr) {
1534 if (value >= adev->xcp_mgr->num_xcps)
1535 return -EINVAL;
1536 } else {
1537 if (value > 1)
1538 return -EINVAL;
1539 }
1540
1541 ret = pm_runtime_get_sync(ddev->dev);
1542 if (ret < 0) {
1543 pm_runtime_put_autosuspend(ddev->dev);
1544 return ret;
1545 }
1546
1547 ret = amdgpu_gfx_run_cleaner_shader(adev, value);
1548
1549 pm_runtime_mark_last_busy(ddev->dev);
1550 pm_runtime_put_autosuspend(ddev->dev);
1551
1552 if (ret)
1553 return ret;
1554
1555 return count;
1556 }
1557
amdgpu_gfx_get_enforce_isolation(struct device * dev,struct device_attribute * attr,char * buf)1558 static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
1559 struct device_attribute *attr,
1560 char *buf)
1561 {
1562 struct drm_device *ddev = dev_get_drvdata(dev);
1563 struct amdgpu_device *adev = drm_to_adev(ddev);
1564 int i;
1565 ssize_t size = 0;
1566
1567 if (adev->xcp_mgr) {
1568 for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
1569 size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
1570 if (i < (adev->xcp_mgr->num_xcps - 1))
1571 size += sysfs_emit_at(buf, size, " ");
1572 }
1573 buf[size++] = '\n';
1574 } else {
1575 size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
1576 }
1577
1578 return size;
1579 }
1580
amdgpu_gfx_set_enforce_isolation(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1581 static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
1582 struct device_attribute *attr,
1583 const char *buf, size_t count)
1584 {
1585 struct drm_device *ddev = dev_get_drvdata(dev);
1586 struct amdgpu_device *adev = drm_to_adev(ddev);
1587 long partition_values[MAX_XCP] = {0};
1588 int ret, i, num_partitions;
1589 const char *input_buf = buf;
1590
1591 for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
1592 ret = sscanf(input_buf, "%ld", &partition_values[i]);
1593 if (ret <= 0)
1594 break;
1595
1596 /* Move the pointer to the next value in the string */
1597 input_buf = strchr(input_buf, ' ');
1598 if (input_buf) {
1599 input_buf++;
1600 } else {
1601 i++;
1602 break;
1603 }
1604 }
1605 num_partitions = i;
1606
1607 if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
1608 return -EINVAL;
1609
1610 if (!adev->xcp_mgr && num_partitions != 1)
1611 return -EINVAL;
1612
1613 for (i = 0; i < num_partitions; i++) {
1614 if (partition_values[i] != 0 && partition_values[i] != 1)
1615 return -EINVAL;
1616 }
1617
1618 mutex_lock(&adev->enforce_isolation_mutex);
1619
1620 for (i = 0; i < num_partitions; i++) {
1621 if (adev->enforce_isolation[i] && !partition_values[i]) {
1622 /* Going from enabled to disabled */
1623 amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
1624 } else if (!adev->enforce_isolation[i] && partition_values[i]) {
1625 /* Going from disabled to enabled */
1626 amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
1627 }
1628 adev->enforce_isolation[i] = partition_values[i];
1629 }
1630
1631 mutex_unlock(&adev->enforce_isolation_mutex);
1632
1633 return count;
1634 }
1635
1636 static DEVICE_ATTR(run_cleaner_shader, 0200,
1637 NULL, amdgpu_gfx_set_run_cleaner_shader);
1638
1639 static DEVICE_ATTR(enforce_isolation, 0644,
1640 amdgpu_gfx_get_enforce_isolation,
1641 amdgpu_gfx_set_enforce_isolation);
1642
1643 static DEVICE_ATTR(current_compute_partition, 0644,
1644 amdgpu_gfx_get_current_compute_partition,
1645 amdgpu_gfx_set_compute_partition);
1646
1647 static DEVICE_ATTR(available_compute_partition, 0444,
1648 amdgpu_gfx_get_available_compute_partition, NULL);
1649
amdgpu_gfx_sysfs_init(struct amdgpu_device * adev)1650 int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
1651 {
1652 int r;
1653
1654 r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
1655 if (r)
1656 return r;
1657
1658 r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
1659
1660 return r;
1661 }
1662
amdgpu_gfx_sysfs_fini(struct amdgpu_device * adev)1663 void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
1664 {
1665 device_remove_file(adev->dev, &dev_attr_current_compute_partition);
1666 device_remove_file(adev->dev, &dev_attr_available_compute_partition);
1667 }
1668
amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device * adev)1669 int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
1670 {
1671 int r;
1672
1673 r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
1674 if (r)
1675 return r;
1676
1677 r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
1678 if (r)
1679 return r;
1680
1681 return 0;
1682 }
1683
amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device * adev)1684 void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
1685 {
1686 device_remove_file(adev->dev, &dev_attr_enforce_isolation);
1687 device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
1688 }
1689
amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size)1690 int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
1691 unsigned int cleaner_shader_size)
1692 {
1693 if (!adev->gfx.enable_cleaner_shader)
1694 return -EOPNOTSUPP;
1695
1696 return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
1697 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
1698 &adev->gfx.cleaner_shader_obj,
1699 &adev->gfx.cleaner_shader_gpu_addr,
1700 (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1701 }
1702
amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device * adev)1703 void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
1704 {
1705 if (!adev->gfx.enable_cleaner_shader)
1706 return;
1707
1708 amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
1709 &adev->gfx.cleaner_shader_gpu_addr,
1710 (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1711 }
1712
amdgpu_gfx_cleaner_shader_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size,const void * cleaner_shader_ptr)1713 void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
1714 unsigned int cleaner_shader_size,
1715 const void *cleaner_shader_ptr)
1716 {
1717 if (!adev->gfx.enable_cleaner_shader)
1718 return;
1719
1720 if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
1721 memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
1722 cleaner_shader_size);
1723 }
1724
1725 /**
1726 * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
1727 * @adev: amdgpu_device pointer
1728 * @idx: Index of the scheduler to control
1729 * @enable: Whether to enable or disable the KFD scheduler
1730 *
1731 * This function is used to control the KFD (Kernel Fusion Driver) scheduler
1732 * from the KGD. It is part of the cleaner shader feature. This function plays
1733 * a key role in enforcing process isolation on the GPU.
1734 *
1735 * The function uses a reference count mechanism (kfd_sch_req_count) to keep
1736 * track of the number of requests to enable the KFD scheduler. When a request
1737 * to enable the KFD scheduler is made, the reference count is decremented.
1738 * When the reference count reaches zero, a delayed work is scheduled to
1739 * enforce isolation after a delay of GFX_SLICE_PERIOD.
1740 *
1741 * When a request to disable the KFD scheduler is made, the function first
1742 * checks if the reference count is zero. If it is, it cancels the delayed work
1743 * for enforcing isolation and checks if the KFD scheduler is active. If the
1744 * KFD scheduler is active, it sends a request to stop the KFD scheduler and
1745 * sets the KFD scheduler state to inactive. Then, it increments the reference
1746 * count.
1747 *
1748 * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
1749 * scheduler state and reference count are updated atomically.
1750 *
1751 * Note: If the reference count is already zero when a request to enable the
1752 * KFD scheduler is made, it means there's an imbalance bug somewhere. The
1753 * function triggers a warning in this case.
1754 */
amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device * adev,u32 idx,bool enable)1755 static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
1756 bool enable)
1757 {
1758 mutex_lock(&adev->gfx.kfd_sch_mutex);
1759
1760 if (enable) {
1761 /* If the count is already 0, it means there's an imbalance bug somewhere.
1762 * Note that the bug may be in a different caller than the one which triggers the
1763 * WARN_ON_ONCE.
1764 */
1765 if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
1766 dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
1767 goto unlock;
1768 }
1769
1770 adev->gfx.kfd_sch_req_count[idx]--;
1771
1772 if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
1773 adev->gfx.kfd_sch_inactive[idx]) {
1774 schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1775 GFX_SLICE_PERIOD);
1776 }
1777 } else {
1778 if (adev->gfx.kfd_sch_req_count[idx] == 0) {
1779 cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
1780 if (!adev->gfx.kfd_sch_inactive[idx]) {
1781 amdgpu_amdkfd_stop_sched(adev, idx);
1782 adev->gfx.kfd_sch_inactive[idx] = true;
1783 }
1784 }
1785
1786 adev->gfx.kfd_sch_req_count[idx]++;
1787 }
1788
1789 unlock:
1790 mutex_unlock(&adev->gfx.kfd_sch_mutex);
1791 }
1792
1793 /**
1794 * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
1795 *
1796 * @work: work_struct.
1797 *
1798 * This function is the work handler for enforcing shader isolation on AMD GPUs.
1799 * It counts the number of emitted fences for each GFX and compute ring. If there
1800 * are any fences, it schedules the `enforce_isolation_work` to be run after a
1801 * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
1802 * Driver (KFD) to resume the runqueue. The function is synchronized using the
1803 * `enforce_isolation_mutex`.
1804 */
amdgpu_gfx_enforce_isolation_handler(struct work_struct * work)1805 void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
1806 {
1807 struct amdgpu_isolation_work *isolation_work =
1808 container_of(work, struct amdgpu_isolation_work, work.work);
1809 struct amdgpu_device *adev = isolation_work->adev;
1810 u32 i, idx, fences = 0;
1811
1812 if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
1813 idx = 0;
1814 else
1815 idx = isolation_work->xcp_id;
1816
1817 if (idx >= MAX_XCP)
1818 return;
1819
1820 mutex_lock(&adev->enforce_isolation_mutex);
1821 for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
1822 if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
1823 fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
1824 }
1825 for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
1826 if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
1827 fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
1828 }
1829 if (fences) {
1830 schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1831 GFX_SLICE_PERIOD);
1832 } else {
1833 /* Tell KFD to resume the runqueue */
1834 if (adev->kfd.init_complete) {
1835 WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
1836 WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
1837 amdgpu_amdkfd_start_sched(adev, idx);
1838 adev->gfx.kfd_sch_inactive[idx] = false;
1839 }
1840 }
1841 mutex_unlock(&adev->enforce_isolation_mutex);
1842 }
1843
amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring * ring)1844 void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
1845 {
1846 struct amdgpu_device *adev = ring->adev;
1847 u32 idx;
1848 bool sched_work = false;
1849
1850 if (!adev->gfx.enable_cleaner_shader)
1851 return;
1852
1853 if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
1854 idx = 0;
1855 else
1856 idx = ring->xcp_id;
1857
1858 if (idx >= MAX_XCP)
1859 return;
1860
1861 mutex_lock(&adev->enforce_isolation_mutex);
1862 if (adev->enforce_isolation[idx]) {
1863 if (adev->kfd.init_complete)
1864 sched_work = true;
1865 }
1866 mutex_unlock(&adev->enforce_isolation_mutex);
1867
1868 if (sched_work)
1869 amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
1870 }
1871
amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring * ring)1872 void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
1873 {
1874 struct amdgpu_device *adev = ring->adev;
1875 u32 idx;
1876 bool sched_work = false;
1877
1878 if (!adev->gfx.enable_cleaner_shader)
1879 return;
1880
1881 if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
1882 idx = 0;
1883 else
1884 idx = ring->xcp_id;
1885
1886 if (idx >= MAX_XCP)
1887 return;
1888
1889 mutex_lock(&adev->enforce_isolation_mutex);
1890 if (adev->enforce_isolation[idx]) {
1891 if (adev->kfd.init_complete)
1892 sched_work = true;
1893 }
1894 mutex_unlock(&adev->enforce_isolation_mutex);
1895
1896 if (sched_work)
1897 amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
1898 }
1899