• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25 
26 #include <linux/firmware.h>
27 #include <linux/pm_runtime.h>
28 
29 #include "amdgpu.h"
30 #include "amdgpu_gfx.h"
31 #include "amdgpu_rlc.h"
32 #include "amdgpu_ras.h"
33 #include "amdgpu_reset.h"
34 #include "amdgpu_xcp.h"
35 #include "amdgpu_xgmi.h"
36 
37 /* delay 0.1 second to enable gfx off feature */
38 #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
39 
40 #define GFX_OFF_NO_DELAY 0
41 
42 /*
43  * GPU GFX IP block helpers function.
44  */
45 
amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device * adev,int mec,int pipe,int queue)46 int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
47 				int pipe, int queue)
48 {
49 	int bit = 0;
50 
51 	bit += mec * adev->gfx.mec.num_pipe_per_mec
52 		* adev->gfx.mec.num_queue_per_pipe;
53 	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
54 	bit += queue;
55 
56 	return bit;
57 }
58 
amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device * adev,int bit,int * mec,int * pipe,int * queue)59 void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
60 				 int *mec, int *pipe, int *queue)
61 {
62 	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
63 	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
64 		% adev->gfx.mec.num_pipe_per_mec;
65 	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
66 	       / adev->gfx.mec.num_pipe_per_mec;
67 
68 }
69 
amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device * adev,int xcc_id,int mec,int pipe,int queue)70 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
71 				     int xcc_id, int mec, int pipe, int queue)
72 {
73 	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
74 			adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
75 }
76 
amdgpu_gfx_me_queue_to_bit(struct amdgpu_device * adev,int me,int pipe,int queue)77 int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
78 			       int me, int pipe, int queue)
79 {
80 	int bit = 0;
81 
82 	bit += me * adev->gfx.me.num_pipe_per_me
83 		* adev->gfx.me.num_queue_per_pipe;
84 	bit += pipe * adev->gfx.me.num_queue_per_pipe;
85 	bit += queue;
86 
87 	return bit;
88 }
89 
amdgpu_gfx_bit_to_me_queue(struct amdgpu_device * adev,int bit,int * me,int * pipe,int * queue)90 void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
91 				int *me, int *pipe, int *queue)
92 {
93 	*queue = bit % adev->gfx.me.num_queue_per_pipe;
94 	*pipe = (bit / adev->gfx.me.num_queue_per_pipe)
95 		% adev->gfx.me.num_pipe_per_me;
96 	*me = (bit / adev->gfx.me.num_queue_per_pipe)
97 		/ adev->gfx.me.num_pipe_per_me;
98 }
99 
amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device * adev,int me,int pipe,int queue)100 bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
101 				    int me, int pipe, int queue)
102 {
103 	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
104 			adev->gfx.me.queue_bitmap);
105 }
106 
107 /**
108  * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
109  *
110  * @mask: array in which the per-shader array disable masks will be stored
111  * @max_se: number of SEs
112  * @max_sh: number of SHs
113  *
114  * The bitmask of CUs to be disabled in the shader array determined by se and
115  * sh is stored in mask[se * max_sh + sh].
116  */
amdgpu_gfx_parse_disable_cu(unsigned int * mask,unsigned int max_se,unsigned int max_sh)117 void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
118 {
119 	unsigned int se, sh, cu;
120 	const char *p;
121 
122 	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
123 
124 	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
125 		return;
126 
127 	p = amdgpu_disable_cu;
128 	for (;;) {
129 		char *next;
130 		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
131 
132 		if (ret < 3) {
133 			DRM_ERROR("amdgpu: could not parse disable_cu\n");
134 			return;
135 		}
136 
137 		if (se < max_se && sh < max_sh && cu < 16) {
138 			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
139 			mask[se * max_sh + sh] |= 1u << cu;
140 		} else {
141 			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
142 				  se, sh, cu);
143 		}
144 
145 		next = strchr(p, ',');
146 		if (!next)
147 			break;
148 		p = next + 1;
149 	}
150 }
151 
amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device * adev)152 static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
153 {
154 	return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
155 }
156 
amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device * adev)157 static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
158 {
159 	if (amdgpu_compute_multipipe != -1) {
160 		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
161 			 amdgpu_compute_multipipe);
162 		return amdgpu_compute_multipipe == 1;
163 	}
164 
165 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
166 		return true;
167 
168 	/* FIXME: spreading the queues across pipes causes perf regressions
169 	 * on POLARIS11 compute workloads */
170 	if (adev->asic_type == CHIP_POLARIS11)
171 		return false;
172 
173 	return adev->gfx.mec.num_mec > 1;
174 }
175 
amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)176 bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
177 						struct amdgpu_ring *ring)
178 {
179 	int queue = ring->queue;
180 	int pipe = ring->pipe;
181 
182 	/* Policy: use pipe1 queue0 as high priority graphics queue if we
183 	 * have more than one gfx pipe.
184 	 */
185 	if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
186 	    adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
187 		int me = ring->me;
188 		int bit;
189 
190 		bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
191 		if (ring == &adev->gfx.gfx_ring[bit])
192 			return true;
193 	}
194 
195 	return false;
196 }
197 
amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)198 bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
199 					       struct amdgpu_ring *ring)
200 {
201 	/* Policy: use 1st queue as high priority compute queue if we
202 	 * have more than one compute queue.
203 	 */
204 	if (adev->gfx.num_compute_rings > 1 &&
205 	    ring == &adev->gfx.compute_ring[0])
206 		return true;
207 
208 	return false;
209 }
210 
amdgpu_gfx_compute_queue_acquire(struct amdgpu_device * adev)211 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
212 {
213 	int i, j, queue, pipe;
214 	bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
215 	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
216 				     adev->gfx.mec.num_queue_per_pipe,
217 				     adev->gfx.num_compute_rings);
218 	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
219 
220 	if (multipipe_policy) {
221 		/* policy: make queues evenly cross all pipes on MEC1 only
222 		 * for multiple xcc, just use the original policy for simplicity */
223 		for (j = 0; j < num_xcc; j++) {
224 			for (i = 0; i < max_queues_per_mec; i++) {
225 				pipe = i % adev->gfx.mec.num_pipe_per_mec;
226 				queue = (i / adev->gfx.mec.num_pipe_per_mec) %
227 					 adev->gfx.mec.num_queue_per_pipe;
228 
229 				set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
230 					adev->gfx.mec_bitmap[j].queue_bitmap);
231 			}
232 		}
233 	} else {
234 		/* policy: amdgpu owns all queues in the given pipe */
235 		for (j = 0; j < num_xcc; j++) {
236 			for (i = 0; i < max_queues_per_mec; ++i)
237 				set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
238 		}
239 	}
240 
241 	for (j = 0; j < num_xcc; j++) {
242 		dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
243 			bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
244 	}
245 }
246 
amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device * adev)247 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
248 {
249 	int i, queue, pipe;
250 	bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
251 	int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
252 					adev->gfx.me.num_queue_per_pipe;
253 
254 	if (multipipe_policy) {
255 		/* policy: amdgpu owns the first queue per pipe at this stage
256 		 * will extend to mulitple queues per pipe later */
257 		for (i = 0; i < max_queues_per_me; i++) {
258 			pipe = i % adev->gfx.me.num_pipe_per_me;
259 			queue = (i / adev->gfx.me.num_pipe_per_me) %
260 				adev->gfx.me.num_queue_per_pipe;
261 
262 			set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
263 				adev->gfx.me.queue_bitmap);
264 		}
265 	} else {
266 		for (i = 0; i < max_queues_per_me; ++i)
267 			set_bit(i, adev->gfx.me.queue_bitmap);
268 	}
269 
270 	/* update the number of active graphics rings */
271 	adev->gfx.num_gfx_rings =
272 		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
273 }
274 
amdgpu_gfx_kiq_acquire(struct amdgpu_device * adev,struct amdgpu_ring * ring,int xcc_id)275 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
276 				  struct amdgpu_ring *ring, int xcc_id)
277 {
278 	int queue_bit;
279 	int mec, pipe, queue;
280 
281 	queue_bit = adev->gfx.mec.num_mec
282 		    * adev->gfx.mec.num_pipe_per_mec
283 		    * adev->gfx.mec.num_queue_per_pipe;
284 
285 	while (--queue_bit >= 0) {
286 		if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
287 			continue;
288 
289 		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
290 
291 		/*
292 		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
293 		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
294 		 * only can be issued on queue 0.
295 		 */
296 		if ((mec == 1 && pipe > 1) || queue != 0)
297 			continue;
298 
299 		ring->me = mec + 1;
300 		ring->pipe = pipe;
301 		ring->queue = queue;
302 
303 		return 0;
304 	}
305 
306 	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
307 	return -EINVAL;
308 }
309 
amdgpu_gfx_kiq_init_ring(struct amdgpu_device * adev,int xcc_id)310 int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
311 {
312 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
313 	struct amdgpu_irq_src *irq = &kiq->irq;
314 	struct amdgpu_ring *ring = &kiq->ring;
315 	int r = 0;
316 
317 	spin_lock_init(&kiq->ring_lock);
318 
319 	ring->adev = NULL;
320 	ring->ring_obj = NULL;
321 	ring->use_doorbell = true;
322 	ring->xcc_id = xcc_id;
323 	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
324 	ring->doorbell_index =
325 		(adev->doorbell_index.kiq +
326 		 xcc_id * adev->doorbell_index.xcc_doorbell_range)
327 		<< 1;
328 
329 	r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
330 	if (r)
331 		return r;
332 
333 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
334 	ring->no_scheduler = true;
335 	snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
336 		 (unsigned char)xcc_id, (unsigned char)ring->me,
337 		 (unsigned char)ring->pipe, (unsigned char)ring->queue);
338 	r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
339 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
340 	if (r)
341 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
342 
343 	return r;
344 }
345 
amdgpu_gfx_kiq_free_ring(struct amdgpu_ring * ring)346 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
347 {
348 	amdgpu_ring_fini(ring);
349 }
350 
amdgpu_gfx_kiq_fini(struct amdgpu_device * adev,int xcc_id)351 void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
352 {
353 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
354 
355 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
356 }
357 
amdgpu_gfx_kiq_init(struct amdgpu_device * adev,unsigned int hpd_size,int xcc_id)358 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
359 			unsigned int hpd_size, int xcc_id)
360 {
361 	int r;
362 	u32 *hpd;
363 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
364 
365 	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
366 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
367 				    &kiq->eop_gpu_addr, (void **)&hpd);
368 	if (r) {
369 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
370 		return r;
371 	}
372 
373 	memset(hpd, 0, hpd_size);
374 
375 	r = amdgpu_bo_reserve(kiq->eop_obj, true);
376 	if (unlikely(r != 0))
377 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
378 	amdgpu_bo_kunmap(kiq->eop_obj);
379 	amdgpu_bo_unreserve(kiq->eop_obj);
380 
381 	return 0;
382 }
383 
384 /* create MQD for each compute/gfx queue */
amdgpu_gfx_mqd_sw_init(struct amdgpu_device * adev,unsigned int mqd_size,int xcc_id)385 int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
386 			   unsigned int mqd_size, int xcc_id)
387 {
388 	int r, i, j;
389 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
390 	struct amdgpu_ring *ring = &kiq->ring;
391 	u32 domain = AMDGPU_GEM_DOMAIN_GTT;
392 
393 #if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
394 	/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
395 	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
396 		domain |= AMDGPU_GEM_DOMAIN_VRAM;
397 #endif
398 
399 	/* create MQD for KIQ */
400 	if (!adev->enable_mes_kiq && !ring->mqd_obj) {
401 		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
402 		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
403 		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
404 		 * KIQ MQD no matter SRIOV or Bare-metal
405 		 */
406 		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
407 					    AMDGPU_GEM_DOMAIN_VRAM |
408 					    AMDGPU_GEM_DOMAIN_GTT,
409 					    &ring->mqd_obj,
410 					    &ring->mqd_gpu_addr,
411 					    &ring->mqd_ptr);
412 		if (r) {
413 			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
414 			return r;
415 		}
416 
417 		/* prepare MQD backup */
418 		kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
419 		if (!kiq->mqd_backup) {
420 			dev_warn(adev->dev,
421 				 "no memory to create MQD backup for ring %s\n", ring->name);
422 			return -ENOMEM;
423 		}
424 	}
425 
426 	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
427 		/* create MQD for each KGQ */
428 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
429 			ring = &adev->gfx.gfx_ring[i];
430 			if (!ring->mqd_obj) {
431 				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
432 							    domain, &ring->mqd_obj,
433 							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
434 				if (r) {
435 					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
436 					return r;
437 				}
438 
439 				ring->mqd_size = mqd_size;
440 				/* prepare MQD backup */
441 				adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
442 				if (!adev->gfx.me.mqd_backup[i]) {
443 					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
444 					return -ENOMEM;
445 				}
446 			}
447 		}
448 	}
449 
450 	/* create MQD for each KCQ */
451 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
452 		j = i + xcc_id * adev->gfx.num_compute_rings;
453 		ring = &adev->gfx.compute_ring[j];
454 		if (!ring->mqd_obj) {
455 			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
456 						    domain, &ring->mqd_obj,
457 						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
458 			if (r) {
459 				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
460 				return r;
461 			}
462 
463 			ring->mqd_size = mqd_size;
464 			/* prepare MQD backup */
465 			adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
466 			if (!adev->gfx.mec.mqd_backup[j]) {
467 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
468 				return -ENOMEM;
469 			}
470 		}
471 	}
472 
473 	return 0;
474 }
475 
amdgpu_gfx_mqd_sw_fini(struct amdgpu_device * adev,int xcc_id)476 void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
477 {
478 	struct amdgpu_ring *ring = NULL;
479 	int i, j;
480 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
481 
482 	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
483 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
484 			ring = &adev->gfx.gfx_ring[i];
485 			kfree(adev->gfx.me.mqd_backup[i]);
486 			amdgpu_bo_free_kernel(&ring->mqd_obj,
487 					      &ring->mqd_gpu_addr,
488 					      &ring->mqd_ptr);
489 		}
490 	}
491 
492 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
493 		j = i + xcc_id * adev->gfx.num_compute_rings;
494 		ring = &adev->gfx.compute_ring[j];
495 		kfree(adev->gfx.mec.mqd_backup[j]);
496 		amdgpu_bo_free_kernel(&ring->mqd_obj,
497 				      &ring->mqd_gpu_addr,
498 				      &ring->mqd_ptr);
499 	}
500 
501 	ring = &kiq->ring;
502 	kfree(kiq->mqd_backup);
503 	amdgpu_bo_free_kernel(&ring->mqd_obj,
504 			      &ring->mqd_gpu_addr,
505 			      &ring->mqd_ptr);
506 }
507 
amdgpu_gfx_disable_kcq(struct amdgpu_device * adev,int xcc_id)508 int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
509 {
510 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
511 	struct amdgpu_ring *kiq_ring = &kiq->ring;
512 	int i, r = 0;
513 	int j;
514 
515 	if (adev->enable_mes) {
516 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
517 			j = i + xcc_id * adev->gfx.num_compute_rings;
518 			amdgpu_mes_unmap_legacy_queue(adev,
519 						   &adev->gfx.compute_ring[j],
520 						   RESET_QUEUES, 0, 0);
521 		}
522 		return 0;
523 	}
524 
525 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
526 		return -EINVAL;
527 
528 	if (!kiq_ring->sched.ready || adev->job_hang)
529 		return 0;
530 	/**
531 	 * This is workaround: only skip kiq_ring test
532 	 * during ras recovery in suspend stage for gfx9.4.3
533 	 */
534 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
535 	     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
536 	    amdgpu_ras_in_recovery(adev))
537 		return 0;
538 
539 	spin_lock(&kiq->ring_lock);
540 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
541 					adev->gfx.num_compute_rings)) {
542 		spin_unlock(&kiq->ring_lock);
543 		return -ENOMEM;
544 	}
545 
546 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
547 		j = i + xcc_id * adev->gfx.num_compute_rings;
548 		kiq->pmf->kiq_unmap_queues(kiq_ring,
549 					   &adev->gfx.compute_ring[j],
550 					   RESET_QUEUES, 0, 0);
551 	}
552 	/* Submit unmap queue packet */
553 	amdgpu_ring_commit(kiq_ring);
554 	/*
555 	 * Ring test will do a basic scratch register change check. Just run
556 	 * this to ensure that unmap queues that is submitted before got
557 	 * processed successfully before returning.
558 	 */
559 	r = amdgpu_ring_test_helper(kiq_ring);
560 
561 	spin_unlock(&kiq->ring_lock);
562 
563 	return r;
564 }
565 
amdgpu_gfx_disable_kgq(struct amdgpu_device * adev,int xcc_id)566 int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
567 {
568 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
569 	struct amdgpu_ring *kiq_ring = &kiq->ring;
570 	int i, r = 0;
571 	int j;
572 
573 	if (adev->enable_mes) {
574 		if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
575 			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
576 				j = i + xcc_id * adev->gfx.num_gfx_rings;
577 				amdgpu_mes_unmap_legacy_queue(adev,
578 						      &adev->gfx.gfx_ring[j],
579 						      PREEMPT_QUEUES, 0, 0);
580 			}
581 		}
582 		return 0;
583 	}
584 
585 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
586 		return -EINVAL;
587 
588 	if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang)
589 		return 0;
590 
591 	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
592 		spin_lock(&kiq->ring_lock);
593 		if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
594 						adev->gfx.num_gfx_rings)) {
595 			spin_unlock(&kiq->ring_lock);
596 			return -ENOMEM;
597 		}
598 
599 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
600 			j = i + xcc_id * adev->gfx.num_gfx_rings;
601 			kiq->pmf->kiq_unmap_queues(kiq_ring,
602 						   &adev->gfx.gfx_ring[j],
603 						   PREEMPT_QUEUES, 0, 0);
604 		}
605 		/* Submit unmap queue packet */
606 		amdgpu_ring_commit(kiq_ring);
607 
608 		/*
609 		 * Ring test will do a basic scratch register change check.
610 		 * Just run this to ensure that unmap queues that is submitted
611 		 * before got processed successfully before returning.
612 		 */
613 		r = amdgpu_ring_test_helper(kiq_ring);
614 		spin_unlock(&kiq->ring_lock);
615 	}
616 
617 	return r;
618 }
619 
amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device * adev,int queue_bit)620 int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
621 					int queue_bit)
622 {
623 	int mec, pipe, queue;
624 	int set_resource_bit = 0;
625 
626 	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
627 
628 	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
629 
630 	return set_resource_bit;
631 }
632 
amdgpu_gfx_mes_enable_kcq(struct amdgpu_device * adev,int xcc_id)633 static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
634 {
635 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
636 	struct amdgpu_ring *kiq_ring = &kiq->ring;
637 	uint64_t queue_mask = ~0ULL;
638 	int r, i, j;
639 
640 	amdgpu_device_flush_hdp(adev, NULL);
641 
642 	if (!adev->enable_uni_mes) {
643 		spin_lock(&kiq->ring_lock);
644 		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
645 		if (r) {
646 			dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
647 			spin_unlock(&kiq->ring_lock);
648 			return r;
649 		}
650 
651 		kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
652 		r = amdgpu_ring_test_helper(kiq_ring);
653 		spin_unlock(&kiq->ring_lock);
654 		if (r)
655 			dev_err(adev->dev, "KIQ failed to set resources\n");
656 	}
657 
658 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
659 		j = i + xcc_id * adev->gfx.num_compute_rings;
660 		r = amdgpu_mes_map_legacy_queue(adev,
661 						&adev->gfx.compute_ring[j]);
662 		if (r) {
663 			dev_err(adev->dev, "failed to map compute queue\n");
664 			return r;
665 		}
666 	}
667 
668 	return 0;
669 }
670 
amdgpu_gfx_enable_kcq(struct amdgpu_device * adev,int xcc_id)671 int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
672 {
673 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
674 	struct amdgpu_ring *kiq_ring = &kiq->ring;
675 	uint64_t queue_mask = 0;
676 	int r, i, j;
677 
678 	if (adev->mes.enable_legacy_queue_map)
679 		return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
680 
681 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
682 		return -EINVAL;
683 
684 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
685 		if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
686 			continue;
687 
688 		/* This situation may be hit in the future if a new HW
689 		 * generation exposes more than 64 queues. If so, the
690 		 * definition of queue_mask needs updating */
691 		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
692 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
693 			break;
694 		}
695 
696 		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
697 	}
698 
699 	amdgpu_device_flush_hdp(adev, NULL);
700 
701 	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
702 		 kiq_ring->queue);
703 
704 	spin_lock(&kiq->ring_lock);
705 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
706 					adev->gfx.num_compute_rings +
707 					kiq->pmf->set_resources_size);
708 	if (r) {
709 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
710 		spin_unlock(&kiq->ring_lock);
711 		return r;
712 	}
713 
714 	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
715 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
716 		j = i + xcc_id * adev->gfx.num_compute_rings;
717 		kiq->pmf->kiq_map_queues(kiq_ring,
718 					 &adev->gfx.compute_ring[j]);
719 	}
720 	/* Submit map queue packet */
721 	amdgpu_ring_commit(kiq_ring);
722 	/*
723 	 * Ring test will do a basic scratch register change check. Just run
724 	 * this to ensure that map queues that is submitted before got
725 	 * processed successfully before returning.
726 	 */
727 	r = amdgpu_ring_test_helper(kiq_ring);
728 	spin_unlock(&kiq->ring_lock);
729 	if (r)
730 		DRM_ERROR("KCQ enable failed\n");
731 
732 	return r;
733 }
734 
amdgpu_gfx_enable_kgq(struct amdgpu_device * adev,int xcc_id)735 int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
736 {
737 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
738 	struct amdgpu_ring *kiq_ring = &kiq->ring;
739 	int r, i, j;
740 
741 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
742 		return -EINVAL;
743 
744 	amdgpu_device_flush_hdp(adev, NULL);
745 
746 	if (adev->mes.enable_legacy_queue_map) {
747 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
748 			j = i + xcc_id * adev->gfx.num_gfx_rings;
749 			r = amdgpu_mes_map_legacy_queue(adev,
750 							&adev->gfx.gfx_ring[j]);
751 			if (r) {
752 				DRM_ERROR("failed to map gfx queue\n");
753 				return r;
754 			}
755 		}
756 
757 		return 0;
758 	}
759 
760 	spin_lock(&kiq->ring_lock);
761 	/* No need to map kcq on the slave */
762 	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
763 		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
764 						adev->gfx.num_gfx_rings);
765 		if (r) {
766 			DRM_ERROR("Failed to lock KIQ (%d).\n", r);
767 			spin_unlock(&kiq->ring_lock);
768 			return r;
769 		}
770 
771 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
772 			j = i + xcc_id * adev->gfx.num_gfx_rings;
773 			kiq->pmf->kiq_map_queues(kiq_ring,
774 						 &adev->gfx.gfx_ring[j]);
775 		}
776 	}
777 	/* Submit map queue packet */
778 	amdgpu_ring_commit(kiq_ring);
779 	/*
780 	 * Ring test will do a basic scratch register change check. Just run
781 	 * this to ensure that map queues that is submitted before got
782 	 * processed successfully before returning.
783 	 */
784 	r = amdgpu_ring_test_helper(kiq_ring);
785 	spin_unlock(&kiq->ring_lock);
786 	if (r)
787 		DRM_ERROR("KGQ enable failed\n");
788 
789 	return r;
790 }
791 
792 /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
793  *
794  * @adev: amdgpu_device pointer
795  * @bool enable true: enable gfx off feature, false: disable gfx off feature
796  *
797  * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
798  * 2. other client can send request to disable gfx off feature, the request should be honored.
799  * 3. other client can cancel their request of disable gfx off feature
800  * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
801  */
802 
amdgpu_gfx_off_ctrl(struct amdgpu_device * adev,bool enable)803 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
804 {
805 	unsigned long delay = GFX_OFF_DELAY_ENABLE;
806 
807 	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
808 		return;
809 
810 	mutex_lock(&adev->gfx.gfx_off_mutex);
811 
812 	if (enable) {
813 		/* If the count is already 0, it means there's an imbalance bug somewhere.
814 		 * Note that the bug may be in a different caller than the one which triggers the
815 		 * WARN_ON_ONCE.
816 		 */
817 		if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
818 			goto unlock;
819 
820 		adev->gfx.gfx_off_req_count--;
821 
822 		if (adev->gfx.gfx_off_req_count == 0 &&
823 		    !adev->gfx.gfx_off_state) {
824 			/* If going to s2idle, no need to wait */
825 			if (adev->in_s0ix) {
826 				if (!amdgpu_dpm_set_powergating_by_smu(adev,
827 						AMD_IP_BLOCK_TYPE_GFX, true))
828 					adev->gfx.gfx_off_state = true;
829 			} else {
830 				schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
831 					      delay);
832 			}
833 		}
834 	} else {
835 		if (adev->gfx.gfx_off_req_count == 0) {
836 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
837 
838 			if (adev->gfx.gfx_off_state &&
839 			    !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
840 				adev->gfx.gfx_off_state = false;
841 
842 				if (adev->gfx.funcs->init_spm_golden) {
843 					dev_dbg(adev->dev,
844 						"GFXOFF is disabled, re-init SPM golden settings\n");
845 					amdgpu_gfx_init_spm_golden(adev);
846 				}
847 			}
848 		}
849 
850 		adev->gfx.gfx_off_req_count++;
851 	}
852 
853 unlock:
854 	mutex_unlock(&adev->gfx.gfx_off_mutex);
855 }
856 
amdgpu_set_gfx_off_residency(struct amdgpu_device * adev,bool value)857 int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
858 {
859 	int r = 0;
860 
861 	mutex_lock(&adev->gfx.gfx_off_mutex);
862 
863 	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
864 
865 	mutex_unlock(&adev->gfx.gfx_off_mutex);
866 
867 	return r;
868 }
869 
amdgpu_get_gfx_off_residency(struct amdgpu_device * adev,u32 * value)870 int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
871 {
872 	int r = 0;
873 
874 	mutex_lock(&adev->gfx.gfx_off_mutex);
875 
876 	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
877 
878 	mutex_unlock(&adev->gfx.gfx_off_mutex);
879 
880 	return r;
881 }
882 
amdgpu_get_gfx_off_entrycount(struct amdgpu_device * adev,u64 * value)883 int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
884 {
885 	int r = 0;
886 
887 	mutex_lock(&adev->gfx.gfx_off_mutex);
888 
889 	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
890 
891 	mutex_unlock(&adev->gfx.gfx_off_mutex);
892 
893 	return r;
894 }
895 
amdgpu_get_gfx_off_status(struct amdgpu_device * adev,uint32_t * value)896 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
897 {
898 
899 	int r = 0;
900 
901 	mutex_lock(&adev->gfx.gfx_off_mutex);
902 
903 	r = amdgpu_dpm_get_status_gfxoff(adev, value);
904 
905 	mutex_unlock(&adev->gfx.gfx_off_mutex);
906 
907 	return r;
908 }
909 
amdgpu_gfx_ras_late_init(struct amdgpu_device * adev,struct ras_common_if * ras_block)910 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
911 {
912 	int r;
913 
914 	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
915 		if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
916 			r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
917 			if (r)
918 				return r;
919 		}
920 
921 		r = amdgpu_ras_block_late_init(adev, ras_block);
922 		if (r)
923 			return r;
924 
925 		if (adev->gfx.cp_ecc_error_irq.funcs) {
926 			r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
927 			if (r)
928 				goto late_fini;
929 		}
930 	} else {
931 		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
932 	}
933 
934 	return 0;
935 late_fini:
936 	amdgpu_ras_block_late_fini(adev, ras_block);
937 	return r;
938 }
939 
amdgpu_gfx_ras_sw_init(struct amdgpu_device * adev)940 int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
941 {
942 	int err = 0;
943 	struct amdgpu_gfx_ras *ras = NULL;
944 
945 	/* adev->gfx.ras is NULL, which means gfx does not
946 	 * support ras function, then do nothing here.
947 	 */
948 	if (!adev->gfx.ras)
949 		return 0;
950 
951 	ras = adev->gfx.ras;
952 
953 	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
954 	if (err) {
955 		dev_err(adev->dev, "Failed to register gfx ras block!\n");
956 		return err;
957 	}
958 
959 	strcpy(ras->ras_block.ras_comm.name, "gfx");
960 	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
961 	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
962 	adev->gfx.ras_if = &ras->ras_block.ras_comm;
963 
964 	/* If not define special ras_late_init function, use gfx default ras_late_init */
965 	if (!ras->ras_block.ras_late_init)
966 		ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
967 
968 	/* If not defined special ras_cb function, use default ras_cb */
969 	if (!ras->ras_block.ras_cb)
970 		ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
971 
972 	return 0;
973 }
974 
amdgpu_gfx_poison_consumption_handler(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)975 int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
976 						struct amdgpu_iv_entry *entry)
977 {
978 	if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
979 		return adev->gfx.ras->poison_consumption_handler(adev, entry);
980 
981 	return 0;
982 }
983 
amdgpu_gfx_process_ras_data_cb(struct amdgpu_device * adev,void * err_data,struct amdgpu_iv_entry * entry)984 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
985 		void *err_data,
986 		struct amdgpu_iv_entry *entry)
987 {
988 	/* TODO ue will trigger an interrupt.
989 	 *
990 	 * When “Full RAS” is enabled, the per-IP interrupt sources should
991 	 * be disabled and the driver should only look for the aggregated
992 	 * interrupt via sync flood
993 	 */
994 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
995 		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
996 		if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
997 		    adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
998 			adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
999 		amdgpu_ras_reset_gpu(adev);
1000 	}
1001 	return AMDGPU_RAS_SUCCESS;
1002 }
1003 
amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1004 int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
1005 				  struct amdgpu_irq_src *source,
1006 				  struct amdgpu_iv_entry *entry)
1007 {
1008 	struct ras_common_if *ras_if = adev->gfx.ras_if;
1009 	struct ras_dispatch_if ih_data = {
1010 		.entry = entry,
1011 	};
1012 
1013 	if (!ras_if)
1014 		return 0;
1015 
1016 	ih_data.head = *ras_if;
1017 
1018 	DRM_ERROR("CP ECC ERROR IRQ\n");
1019 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
1020 	return 0;
1021 }
1022 
amdgpu_gfx_ras_error_func(struct amdgpu_device * adev,void * ras_error_status,void (* func)(struct amdgpu_device * adev,void * ras_error_status,int xcc_id))1023 void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
1024 		void *ras_error_status,
1025 		void (*func)(struct amdgpu_device *adev, void *ras_error_status,
1026 				int xcc_id))
1027 {
1028 	int i;
1029 	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
1030 	uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
1031 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
1032 
1033 	if (err_data) {
1034 		err_data->ue_count = 0;
1035 		err_data->ce_count = 0;
1036 	}
1037 
1038 	for_each_inst(i, xcc_mask)
1039 		func(adev, ras_error_status, i);
1040 }
1041 
amdgpu_kiq_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t xcc_id)1042 uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
1043 {
1044 	signed long r, cnt = 0;
1045 	unsigned long flags;
1046 	uint32_t seq, reg_val_offs = 0, value = 0;
1047 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1048 	struct amdgpu_ring *ring = &kiq->ring;
1049 
1050 	if (amdgpu_device_skip_hw_access(adev))
1051 		return 0;
1052 
1053 	if (adev->mes.ring[0].sched.ready)
1054 		return amdgpu_mes_rreg(adev, reg);
1055 
1056 	BUG_ON(!ring->funcs->emit_rreg);
1057 
1058 	spin_lock_irqsave(&kiq->ring_lock, flags);
1059 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
1060 		pr_err("critical bug! too many kiq readers\n");
1061 		goto failed_unlock;
1062 	}
1063 	r = amdgpu_ring_alloc(ring, 32);
1064 	if (r)
1065 		goto failed_unlock;
1066 
1067 	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
1068 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1069 	if (r)
1070 		goto failed_undo;
1071 
1072 	amdgpu_ring_commit(ring);
1073 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1074 
1075 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1076 
1077 	/* don't wait anymore for gpu reset case because this way may
1078 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1079 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1080 	 * never return if we keep waiting in virt_kiq_rreg, which cause
1081 	 * gpu_recover() hang there.
1082 	 *
1083 	 * also don't wait anymore for IRQ context
1084 	 * */
1085 	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1086 		goto failed_kiq_read;
1087 
1088 	might_sleep();
1089 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1090 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1091 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1092 	}
1093 
1094 	if (cnt > MAX_KIQ_REG_TRY)
1095 		goto failed_kiq_read;
1096 
1097 	mb();
1098 	value = adev->wb.wb[reg_val_offs];
1099 	amdgpu_device_wb_free(adev, reg_val_offs);
1100 	return value;
1101 
1102 failed_undo:
1103 	amdgpu_ring_undo(ring);
1104 failed_unlock:
1105 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1106 failed_kiq_read:
1107 	if (reg_val_offs)
1108 		amdgpu_device_wb_free(adev, reg_val_offs);
1109 	dev_err(adev->dev, "failed to read reg:%x\n", reg);
1110 	return ~0;
1111 }
1112 
amdgpu_kiq_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t xcc_id)1113 void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
1114 {
1115 	signed long r, cnt = 0;
1116 	unsigned long flags;
1117 	uint32_t seq;
1118 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1119 	struct amdgpu_ring *ring = &kiq->ring;
1120 
1121 	BUG_ON(!ring->funcs->emit_wreg);
1122 
1123 	if (amdgpu_device_skip_hw_access(adev))
1124 		return;
1125 
1126 	if (adev->mes.ring[0].sched.ready) {
1127 		amdgpu_mes_wreg(adev, reg, v);
1128 		return;
1129 	}
1130 
1131 	spin_lock_irqsave(&kiq->ring_lock, flags);
1132 	r = amdgpu_ring_alloc(ring, 32);
1133 	if (r)
1134 		goto failed_unlock;
1135 
1136 	amdgpu_ring_emit_wreg(ring, reg, v);
1137 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1138 	if (r)
1139 		goto failed_undo;
1140 
1141 	amdgpu_ring_commit(ring);
1142 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1143 
1144 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1145 
1146 	/* don't wait anymore for gpu reset case because this way may
1147 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1148 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1149 	 * never return if we keep waiting in virt_kiq_rreg, which cause
1150 	 * gpu_recover() hang there.
1151 	 *
1152 	 * also don't wait anymore for IRQ context
1153 	 * */
1154 	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1155 		goto failed_kiq_write;
1156 
1157 	might_sleep();
1158 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1159 
1160 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1161 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1162 	}
1163 
1164 	if (cnt > MAX_KIQ_REG_TRY)
1165 		goto failed_kiq_write;
1166 
1167 	return;
1168 
1169 failed_undo:
1170 	amdgpu_ring_undo(ring);
1171 failed_unlock:
1172 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1173 failed_kiq_write:
1174 	dev_err(adev->dev, "failed to write reg:%x\n", reg);
1175 }
1176 
amdgpu_gfx_get_num_kcq(struct amdgpu_device * adev)1177 int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
1178 {
1179 	if (amdgpu_num_kcq == -1) {
1180 		return 8;
1181 	} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
1182 		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
1183 		return 8;
1184 	}
1185 	return amdgpu_num_kcq;
1186 }
1187 
amdgpu_gfx_cp_init_microcode(struct amdgpu_device * adev,uint32_t ucode_id)1188 void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
1189 				  uint32_t ucode_id)
1190 {
1191 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1192 	const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
1193 	struct amdgpu_firmware_info *info = NULL;
1194 	const struct firmware *ucode_fw;
1195 	unsigned int fw_size;
1196 
1197 	switch (ucode_id) {
1198 	case AMDGPU_UCODE_ID_CP_PFP:
1199 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1200 			adev->gfx.pfp_fw->data;
1201 		adev->gfx.pfp_fw_version =
1202 			le32_to_cpu(cp_hdr->header.ucode_version);
1203 		adev->gfx.pfp_feature_version =
1204 			le32_to_cpu(cp_hdr->ucode_feature_version);
1205 		ucode_fw = adev->gfx.pfp_fw;
1206 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1207 		break;
1208 	case AMDGPU_UCODE_ID_CP_RS64_PFP:
1209 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1210 			adev->gfx.pfp_fw->data;
1211 		adev->gfx.pfp_fw_version =
1212 			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1213 		adev->gfx.pfp_feature_version =
1214 			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1215 		ucode_fw = adev->gfx.pfp_fw;
1216 		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1217 		break;
1218 	case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
1219 	case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
1220 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1221 			adev->gfx.pfp_fw->data;
1222 		ucode_fw = adev->gfx.pfp_fw;
1223 		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1224 		break;
1225 	case AMDGPU_UCODE_ID_CP_ME:
1226 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1227 			adev->gfx.me_fw->data;
1228 		adev->gfx.me_fw_version =
1229 			le32_to_cpu(cp_hdr->header.ucode_version);
1230 		adev->gfx.me_feature_version =
1231 			le32_to_cpu(cp_hdr->ucode_feature_version);
1232 		ucode_fw = adev->gfx.me_fw;
1233 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1234 		break;
1235 	case AMDGPU_UCODE_ID_CP_RS64_ME:
1236 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1237 			adev->gfx.me_fw->data;
1238 		adev->gfx.me_fw_version =
1239 			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1240 		adev->gfx.me_feature_version =
1241 			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1242 		ucode_fw = adev->gfx.me_fw;
1243 		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1244 		break;
1245 	case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
1246 	case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
1247 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1248 			adev->gfx.me_fw->data;
1249 		ucode_fw = adev->gfx.me_fw;
1250 		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1251 		break;
1252 	case AMDGPU_UCODE_ID_CP_CE:
1253 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1254 			adev->gfx.ce_fw->data;
1255 		adev->gfx.ce_fw_version =
1256 			le32_to_cpu(cp_hdr->header.ucode_version);
1257 		adev->gfx.ce_feature_version =
1258 			le32_to_cpu(cp_hdr->ucode_feature_version);
1259 		ucode_fw = adev->gfx.ce_fw;
1260 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1261 		break;
1262 	case AMDGPU_UCODE_ID_CP_MEC1:
1263 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1264 			adev->gfx.mec_fw->data;
1265 		adev->gfx.mec_fw_version =
1266 			le32_to_cpu(cp_hdr->header.ucode_version);
1267 		adev->gfx.mec_feature_version =
1268 			le32_to_cpu(cp_hdr->ucode_feature_version);
1269 		ucode_fw = adev->gfx.mec_fw;
1270 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1271 			  le32_to_cpu(cp_hdr->jt_size) * 4;
1272 		break;
1273 	case AMDGPU_UCODE_ID_CP_MEC1_JT:
1274 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1275 			adev->gfx.mec_fw->data;
1276 		ucode_fw = adev->gfx.mec_fw;
1277 		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1278 		break;
1279 	case AMDGPU_UCODE_ID_CP_MEC2:
1280 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1281 			adev->gfx.mec2_fw->data;
1282 		adev->gfx.mec2_fw_version =
1283 			le32_to_cpu(cp_hdr->header.ucode_version);
1284 		adev->gfx.mec2_feature_version =
1285 			le32_to_cpu(cp_hdr->ucode_feature_version);
1286 		ucode_fw = adev->gfx.mec2_fw;
1287 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1288 			  le32_to_cpu(cp_hdr->jt_size) * 4;
1289 		break;
1290 	case AMDGPU_UCODE_ID_CP_MEC2_JT:
1291 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1292 			adev->gfx.mec2_fw->data;
1293 		ucode_fw = adev->gfx.mec2_fw;
1294 		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1295 		break;
1296 	case AMDGPU_UCODE_ID_CP_RS64_MEC:
1297 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1298 			adev->gfx.mec_fw->data;
1299 		adev->gfx.mec_fw_version =
1300 			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1301 		adev->gfx.mec_feature_version =
1302 			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1303 		ucode_fw = adev->gfx.mec_fw;
1304 		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1305 		break;
1306 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
1307 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
1308 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
1309 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
1310 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1311 			adev->gfx.mec_fw->data;
1312 		ucode_fw = adev->gfx.mec_fw;
1313 		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1314 		break;
1315 	default:
1316 		dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
1317 		return;
1318 	}
1319 
1320 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1321 		info = &adev->firmware.ucode[ucode_id];
1322 		info->ucode_id = ucode_id;
1323 		info->fw = ucode_fw;
1324 		adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
1325 	}
1326 }
1327 
amdgpu_gfx_is_master_xcc(struct amdgpu_device * adev,int xcc_id)1328 bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
1329 {
1330 	return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
1331 			adev->gfx.num_xcc_per_xcp : 1));
1332 }
1333 
amdgpu_gfx_get_current_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)1334 static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
1335 						struct device_attribute *addr,
1336 						char *buf)
1337 {
1338 	struct drm_device *ddev = dev_get_drvdata(dev);
1339 	struct amdgpu_device *adev = drm_to_adev(ddev);
1340 	int mode;
1341 
1342 	mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
1343 					       AMDGPU_XCP_FL_NONE);
1344 
1345 	return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
1346 }
1347 
amdgpu_gfx_set_compute_partition(struct device * dev,struct device_attribute * addr,const char * buf,size_t count)1348 static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
1349 						struct device_attribute *addr,
1350 						const char *buf, size_t count)
1351 {
1352 	struct drm_device *ddev = dev_get_drvdata(dev);
1353 	struct amdgpu_device *adev = drm_to_adev(ddev);
1354 	enum amdgpu_gfx_partition mode;
1355 	int ret = 0, num_xcc;
1356 
1357 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1358 	if (num_xcc % 2 != 0)
1359 		return -EINVAL;
1360 
1361 	if (!strncasecmp("SPX", buf, strlen("SPX"))) {
1362 		mode = AMDGPU_SPX_PARTITION_MODE;
1363 	} else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
1364 		/*
1365 		 * DPX mode needs AIDs to be in multiple of 2.
1366 		 * Each AID connects 2 XCCs.
1367 		 */
1368 		if (num_xcc%4)
1369 			return -EINVAL;
1370 		mode = AMDGPU_DPX_PARTITION_MODE;
1371 	} else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
1372 		if (num_xcc != 6)
1373 			return -EINVAL;
1374 		mode = AMDGPU_TPX_PARTITION_MODE;
1375 	} else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
1376 		if (num_xcc != 8)
1377 			return -EINVAL;
1378 		mode = AMDGPU_QPX_PARTITION_MODE;
1379 	} else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
1380 		mode = AMDGPU_CPX_PARTITION_MODE;
1381 	} else {
1382 		return -EINVAL;
1383 	}
1384 
1385 	ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
1386 
1387 	if (ret)
1388 		return ret;
1389 
1390 	return count;
1391 }
1392 
amdgpu_gfx_get_available_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)1393 static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
1394 						struct device_attribute *addr,
1395 						char *buf)
1396 {
1397 	struct drm_device *ddev = dev_get_drvdata(dev);
1398 	struct amdgpu_device *adev = drm_to_adev(ddev);
1399 	char *supported_partition;
1400 
1401 	/* TBD */
1402 	switch (NUM_XCC(adev->gfx.xcc_mask)) {
1403 	case 8:
1404 		supported_partition = "SPX, DPX, QPX, CPX";
1405 		break;
1406 	case 6:
1407 		supported_partition = "SPX, TPX, CPX";
1408 		break;
1409 	case 4:
1410 		supported_partition = "SPX, DPX, CPX";
1411 		break;
1412 	/* this seems only existing in emulation phase */
1413 	case 2:
1414 		supported_partition = "SPX, CPX";
1415 		break;
1416 	default:
1417 		supported_partition = "Not supported";
1418 		break;
1419 	}
1420 
1421 	return sysfs_emit(buf, "%s\n", supported_partition);
1422 }
1423 
amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring * ring)1424 static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
1425 {
1426 	struct amdgpu_device *adev = ring->adev;
1427 	struct drm_gpu_scheduler *sched = &ring->sched;
1428 	struct drm_sched_entity entity;
1429 	static atomic_t counter;
1430 	struct dma_fence *f;
1431 	struct amdgpu_job *job;
1432 	struct amdgpu_ib *ib;
1433 	void *owner;
1434 	int i, r;
1435 
1436 	/* Initialize the scheduler entity */
1437 	r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
1438 				  &sched, 1, NULL);
1439 	if (r) {
1440 		dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
1441 		goto err;
1442 	}
1443 
1444 	/*
1445 	 * Use some unique dummy value as the owner to make sure we execute
1446 	 * the cleaner shader on each submission. The value just need to change
1447 	 * for each submission and is otherwise meaningless.
1448 	 */
1449 	owner = (void *)(unsigned long)atomic_inc_return(&counter);
1450 
1451 	r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
1452 				     64, 0, &job);
1453 	if (r)
1454 		goto err;
1455 
1456 	job->enforce_isolation = true;
1457 
1458 	ib = &job->ibs[0];
1459 	for (i = 0; i <= ring->funcs->align_mask; ++i)
1460 		ib->ptr[i] = ring->funcs->nop;
1461 	ib->length_dw = ring->funcs->align_mask + 1;
1462 
1463 	f = amdgpu_job_submit(job);
1464 
1465 	r = dma_fence_wait(f, false);
1466 	if (r)
1467 		goto err;
1468 
1469 	dma_fence_put(f);
1470 
1471 	/* Clean up the scheduler entity */
1472 	drm_sched_entity_destroy(&entity);
1473 	return 0;
1474 
1475 err:
1476 	return r;
1477 }
1478 
amdgpu_gfx_run_cleaner_shader(struct amdgpu_device * adev,int xcp_id)1479 static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
1480 {
1481 	int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1482 	struct amdgpu_ring *ring;
1483 	int num_xcc_to_clear;
1484 	int i, r, xcc_id;
1485 
1486 	if (adev->gfx.num_xcc_per_xcp)
1487 		num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
1488 	else
1489 		num_xcc_to_clear = 1;
1490 
1491 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1492 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1493 			ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
1494 			if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
1495 				r = amdgpu_gfx_run_cleaner_shader_job(ring);
1496 				if (r)
1497 					return r;
1498 				num_xcc_to_clear--;
1499 				break;
1500 			}
1501 		}
1502 	}
1503 
1504 	if (num_xcc_to_clear)
1505 		return -ENOENT;
1506 
1507 	return 0;
1508 }
1509 
amdgpu_gfx_set_run_cleaner_shader(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1510 static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
1511 						 struct device_attribute *attr,
1512 						 const char *buf,
1513 						 size_t count)
1514 {
1515 	struct drm_device *ddev = dev_get_drvdata(dev);
1516 	struct amdgpu_device *adev = drm_to_adev(ddev);
1517 	int ret;
1518 	long value;
1519 
1520 	if (amdgpu_in_reset(adev))
1521 		return -EPERM;
1522 	if (adev->in_suspend && !adev->in_runpm)
1523 		return -EPERM;
1524 
1525 	ret = kstrtol(buf, 0, &value);
1526 
1527 	if (ret)
1528 		return -EINVAL;
1529 
1530 	if (value < 0)
1531 		return -EINVAL;
1532 
1533 	if (adev->xcp_mgr) {
1534 		if (value >= adev->xcp_mgr->num_xcps)
1535 			return -EINVAL;
1536 	} else {
1537 		if (value > 1)
1538 			return -EINVAL;
1539 	}
1540 
1541 	ret = pm_runtime_get_sync(ddev->dev);
1542 	if (ret < 0) {
1543 		pm_runtime_put_autosuspend(ddev->dev);
1544 		return ret;
1545 	}
1546 
1547 	ret = amdgpu_gfx_run_cleaner_shader(adev, value);
1548 
1549 	pm_runtime_mark_last_busy(ddev->dev);
1550 	pm_runtime_put_autosuspend(ddev->dev);
1551 
1552 	if (ret)
1553 		return ret;
1554 
1555 	return count;
1556 }
1557 
amdgpu_gfx_get_enforce_isolation(struct device * dev,struct device_attribute * attr,char * buf)1558 static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
1559 						struct device_attribute *attr,
1560 						char *buf)
1561 {
1562 	struct drm_device *ddev = dev_get_drvdata(dev);
1563 	struct amdgpu_device *adev = drm_to_adev(ddev);
1564 	int i;
1565 	ssize_t size = 0;
1566 
1567 	if (adev->xcp_mgr) {
1568 		for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
1569 			size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
1570 			if (i < (adev->xcp_mgr->num_xcps - 1))
1571 				size += sysfs_emit_at(buf, size, " ");
1572 		}
1573 		buf[size++] = '\n';
1574 	} else {
1575 		size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
1576 	}
1577 
1578 	return size;
1579 }
1580 
amdgpu_gfx_set_enforce_isolation(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1581 static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
1582 						struct device_attribute *attr,
1583 						const char *buf, size_t count)
1584 {
1585 	struct drm_device *ddev = dev_get_drvdata(dev);
1586 	struct amdgpu_device *adev = drm_to_adev(ddev);
1587 	long partition_values[MAX_XCP] = {0};
1588 	int ret, i, num_partitions;
1589 	const char *input_buf = buf;
1590 
1591 	for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
1592 		ret = sscanf(input_buf, "%ld", &partition_values[i]);
1593 		if (ret <= 0)
1594 			break;
1595 
1596 		/* Move the pointer to the next value in the string */
1597 		input_buf = strchr(input_buf, ' ');
1598 		if (input_buf) {
1599 			input_buf++;
1600 		} else {
1601 			i++;
1602 			break;
1603 		}
1604 	}
1605 	num_partitions = i;
1606 
1607 	if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
1608 		return -EINVAL;
1609 
1610 	if (!adev->xcp_mgr && num_partitions != 1)
1611 		return -EINVAL;
1612 
1613 	for (i = 0; i < num_partitions; i++) {
1614 		if (partition_values[i] != 0 && partition_values[i] != 1)
1615 			return -EINVAL;
1616 	}
1617 
1618 	mutex_lock(&adev->enforce_isolation_mutex);
1619 
1620 	for (i = 0; i < num_partitions; i++) {
1621 		if (adev->enforce_isolation[i] && !partition_values[i]) {
1622 			/* Going from enabled to disabled */
1623 			amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
1624 		} else if (!adev->enforce_isolation[i] && partition_values[i]) {
1625 			/* Going from disabled to enabled */
1626 			amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
1627 		}
1628 		adev->enforce_isolation[i] = partition_values[i];
1629 	}
1630 
1631 	mutex_unlock(&adev->enforce_isolation_mutex);
1632 
1633 	return count;
1634 }
1635 
1636 static DEVICE_ATTR(run_cleaner_shader, 0200,
1637 		   NULL, amdgpu_gfx_set_run_cleaner_shader);
1638 
1639 static DEVICE_ATTR(enforce_isolation, 0644,
1640 		   amdgpu_gfx_get_enforce_isolation,
1641 		   amdgpu_gfx_set_enforce_isolation);
1642 
1643 static DEVICE_ATTR(current_compute_partition, 0644,
1644 		   amdgpu_gfx_get_current_compute_partition,
1645 		   amdgpu_gfx_set_compute_partition);
1646 
1647 static DEVICE_ATTR(available_compute_partition, 0444,
1648 		   amdgpu_gfx_get_available_compute_partition, NULL);
1649 
amdgpu_gfx_sysfs_init(struct amdgpu_device * adev)1650 int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
1651 {
1652 	int r;
1653 
1654 	r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
1655 	if (r)
1656 		return r;
1657 
1658 	r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
1659 
1660 	return r;
1661 }
1662 
amdgpu_gfx_sysfs_fini(struct amdgpu_device * adev)1663 void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
1664 {
1665 	device_remove_file(adev->dev, &dev_attr_current_compute_partition);
1666 	device_remove_file(adev->dev, &dev_attr_available_compute_partition);
1667 }
1668 
amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device * adev)1669 int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
1670 {
1671 	int r;
1672 
1673 	r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
1674 	if (r)
1675 		return r;
1676 
1677 	r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
1678 	if (r)
1679 		return r;
1680 
1681 	return 0;
1682 }
1683 
amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device * adev)1684 void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
1685 {
1686 	device_remove_file(adev->dev, &dev_attr_enforce_isolation);
1687 	device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
1688 }
1689 
amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size)1690 int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
1691 				      unsigned int cleaner_shader_size)
1692 {
1693 	if (!adev->gfx.enable_cleaner_shader)
1694 		return -EOPNOTSUPP;
1695 
1696 	return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
1697 				       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
1698 				       &adev->gfx.cleaner_shader_obj,
1699 				       &adev->gfx.cleaner_shader_gpu_addr,
1700 				       (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1701 }
1702 
amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device * adev)1703 void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
1704 {
1705 	if (!adev->gfx.enable_cleaner_shader)
1706 		return;
1707 
1708 	amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
1709 			      &adev->gfx.cleaner_shader_gpu_addr,
1710 			      (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1711 }
1712 
amdgpu_gfx_cleaner_shader_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size,const void * cleaner_shader_ptr)1713 void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
1714 				    unsigned int cleaner_shader_size,
1715 				    const void *cleaner_shader_ptr)
1716 {
1717 	if (!adev->gfx.enable_cleaner_shader)
1718 		return;
1719 
1720 	if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
1721 		memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
1722 			    cleaner_shader_size);
1723 }
1724 
1725 /**
1726  * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
1727  * @adev: amdgpu_device pointer
1728  * @idx: Index of the scheduler to control
1729  * @enable: Whether to enable or disable the KFD scheduler
1730  *
1731  * This function is used to control the KFD (Kernel Fusion Driver) scheduler
1732  * from the KGD. It is part of the cleaner shader feature. This function plays
1733  * a key role in enforcing process isolation on the GPU.
1734  *
1735  * The function uses a reference count mechanism (kfd_sch_req_count) to keep
1736  * track of the number of requests to enable the KFD scheduler. When a request
1737  * to enable the KFD scheduler is made, the reference count is decremented.
1738  * When the reference count reaches zero, a delayed work is scheduled to
1739  * enforce isolation after a delay of GFX_SLICE_PERIOD.
1740  *
1741  * When a request to disable the KFD scheduler is made, the function first
1742  * checks if the reference count is zero. If it is, it cancels the delayed work
1743  * for enforcing isolation and checks if the KFD scheduler is active. If the
1744  * KFD scheduler is active, it sends a request to stop the KFD scheduler and
1745  * sets the KFD scheduler state to inactive. Then, it increments the reference
1746  * count.
1747  *
1748  * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
1749  * scheduler state and reference count are updated atomically.
1750  *
1751  * Note: If the reference count is already zero when a request to enable the
1752  * KFD scheduler is made, it means there's an imbalance bug somewhere. The
1753  * function triggers a warning in this case.
1754  */
amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device * adev,u32 idx,bool enable)1755 static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
1756 				    bool enable)
1757 {
1758 	mutex_lock(&adev->gfx.kfd_sch_mutex);
1759 
1760 	if (enable) {
1761 		/* If the count is already 0, it means there's an imbalance bug somewhere.
1762 		 * Note that the bug may be in a different caller than the one which triggers the
1763 		 * WARN_ON_ONCE.
1764 		 */
1765 		if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
1766 			dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
1767 			goto unlock;
1768 		}
1769 
1770 		adev->gfx.kfd_sch_req_count[idx]--;
1771 
1772 		if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
1773 		    adev->gfx.kfd_sch_inactive[idx]) {
1774 			schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1775 					      GFX_SLICE_PERIOD);
1776 		}
1777 	} else {
1778 		if (adev->gfx.kfd_sch_req_count[idx] == 0) {
1779 			cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
1780 			if (!adev->gfx.kfd_sch_inactive[idx]) {
1781 				amdgpu_amdkfd_stop_sched(adev, idx);
1782 				adev->gfx.kfd_sch_inactive[idx] = true;
1783 			}
1784 		}
1785 
1786 		adev->gfx.kfd_sch_req_count[idx]++;
1787 	}
1788 
1789 unlock:
1790 	mutex_unlock(&adev->gfx.kfd_sch_mutex);
1791 }
1792 
1793 /**
1794  * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
1795  *
1796  * @work: work_struct.
1797  *
1798  * This function is the work handler for enforcing shader isolation on AMD GPUs.
1799  * It counts the number of emitted fences for each GFX and compute ring. If there
1800  * are any fences, it schedules the `enforce_isolation_work` to be run after a
1801  * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
1802  * Driver (KFD) to resume the runqueue. The function is synchronized using the
1803  * `enforce_isolation_mutex`.
1804  */
amdgpu_gfx_enforce_isolation_handler(struct work_struct * work)1805 void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
1806 {
1807 	struct amdgpu_isolation_work *isolation_work =
1808 		container_of(work, struct amdgpu_isolation_work, work.work);
1809 	struct amdgpu_device *adev = isolation_work->adev;
1810 	u32 i, idx, fences = 0;
1811 
1812 	if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
1813 		idx = 0;
1814 	else
1815 		idx = isolation_work->xcp_id;
1816 
1817 	if (idx >= MAX_XCP)
1818 		return;
1819 
1820 	mutex_lock(&adev->enforce_isolation_mutex);
1821 	for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
1822 		if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
1823 			fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
1824 	}
1825 	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
1826 		if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
1827 			fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
1828 	}
1829 	if (fences) {
1830 		schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1831 				      GFX_SLICE_PERIOD);
1832 	} else {
1833 		/* Tell KFD to resume the runqueue */
1834 		if (adev->kfd.init_complete) {
1835 			WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
1836 			WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
1837 				amdgpu_amdkfd_start_sched(adev, idx);
1838 				adev->gfx.kfd_sch_inactive[idx] = false;
1839 		}
1840 	}
1841 	mutex_unlock(&adev->enforce_isolation_mutex);
1842 }
1843 
amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring * ring)1844 void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
1845 {
1846 	struct amdgpu_device *adev = ring->adev;
1847 	u32 idx;
1848 	bool sched_work = false;
1849 
1850 	if (!adev->gfx.enable_cleaner_shader)
1851 		return;
1852 
1853 	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
1854 		idx = 0;
1855 	else
1856 		idx = ring->xcp_id;
1857 
1858 	if (idx >= MAX_XCP)
1859 		return;
1860 
1861 	mutex_lock(&adev->enforce_isolation_mutex);
1862 	if (adev->enforce_isolation[idx]) {
1863 		if (adev->kfd.init_complete)
1864 			sched_work = true;
1865 	}
1866 	mutex_unlock(&adev->enforce_isolation_mutex);
1867 
1868 	if (sched_work)
1869 		amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
1870 }
1871 
amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring * ring)1872 void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
1873 {
1874 	struct amdgpu_device *adev = ring->adev;
1875 	u32 idx;
1876 	bool sched_work = false;
1877 
1878 	if (!adev->gfx.enable_cleaner_shader)
1879 		return;
1880 
1881 	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
1882 		idx = 0;
1883 	else
1884 		idx = ring->xcp_id;
1885 
1886 	if (idx >= MAX_XCP)
1887 		return;
1888 
1889 	mutex_lock(&adev->enforce_isolation_mutex);
1890 	if (adev->enforce_isolation[idx]) {
1891 		if (adev->kfd.init_complete)
1892 			sched_work = true;
1893 	}
1894 	mutex_unlock(&adev->enforce_isolation_mutex);
1895 
1896 	if (sched_work)
1897 		amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
1898 }
1899