• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_guc_submit.h"
7 
8 #include <linux/bitfield.h>
9 #include <linux/bitmap.h>
10 #include <linux/circ_buf.h>
11 #include <linux/delay.h>
12 #include <linux/dma-fence-array.h>
13 #include <linux/math64.h>
14 
15 #include <drm/drm_managed.h>
16 
17 #include "abi/guc_actions_abi.h"
18 #include "abi/guc_klvs_abi.h"
19 #include "regs/xe_lrc_layout.h"
20 #include "xe_assert.h"
21 #include "xe_devcoredump.h"
22 #include "xe_device.h"
23 #include "xe_exec_queue.h"
24 #include "xe_force_wake.h"
25 #include "xe_gpu_scheduler.h"
26 #include "xe_gt.h"
27 #include "xe_gt_clock.h"
28 #include "xe_gt_printk.h"
29 #include "xe_guc.h"
30 #include "xe_guc_ct.h"
31 #include "xe_guc_exec_queue_types.h"
32 #include "xe_guc_id_mgr.h"
33 #include "xe_guc_submit_types.h"
34 #include "xe_hw_engine.h"
35 #include "xe_hw_fence.h"
36 #include "xe_lrc.h"
37 #include "xe_macros.h"
38 #include "xe_map.h"
39 #include "xe_mocs.h"
40 #include "xe_pm.h"
41 #include "xe_ring_ops_types.h"
42 #include "xe_sched_job.h"
43 #include "xe_trace.h"
44 #include "xe_vm.h"
45 
46 static struct xe_guc *
exec_queue_to_guc(struct xe_exec_queue * q)47 exec_queue_to_guc(struct xe_exec_queue *q)
48 {
49 	return &q->gt->uc.guc;
50 }
51 
52 /*
53  * Helpers for engine state, using an atomic as some of the bits can transition
54  * as the same time (e.g. a suspend can be happning at the same time as schedule
55  * engine done being processed).
56  */
57 #define EXEC_QUEUE_STATE_REGISTERED		(1 << 0)
58 #define EXEC_QUEUE_STATE_ENABLED		(1 << 1)
59 #define EXEC_QUEUE_STATE_PENDING_ENABLE		(1 << 2)
60 #define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
61 #define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
62 #define EXEC_QUEUE_STATE_SUSPENDED		(1 << 5)
63 #define EXEC_QUEUE_STATE_RESET			(1 << 6)
64 #define EXEC_QUEUE_STATE_KILLED			(1 << 7)
65 #define EXEC_QUEUE_STATE_WEDGED			(1 << 8)
66 #define EXEC_QUEUE_STATE_BANNED			(1 << 9)
67 #define EXEC_QUEUE_STATE_CHECK_TIMEOUT		(1 << 10)
68 #define EXEC_QUEUE_STATE_EXTRA_REF		(1 << 11)
69 
exec_queue_registered(struct xe_exec_queue * q)70 static bool exec_queue_registered(struct xe_exec_queue *q)
71 {
72 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
73 }
74 
set_exec_queue_registered(struct xe_exec_queue * q)75 static void set_exec_queue_registered(struct xe_exec_queue *q)
76 {
77 	atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
78 }
79 
clear_exec_queue_registered(struct xe_exec_queue * q)80 static void clear_exec_queue_registered(struct xe_exec_queue *q)
81 {
82 	atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
83 }
84 
exec_queue_enabled(struct xe_exec_queue * q)85 static bool exec_queue_enabled(struct xe_exec_queue *q)
86 {
87 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
88 }
89 
set_exec_queue_enabled(struct xe_exec_queue * q)90 static void set_exec_queue_enabled(struct xe_exec_queue *q)
91 {
92 	atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
93 }
94 
clear_exec_queue_enabled(struct xe_exec_queue * q)95 static void clear_exec_queue_enabled(struct xe_exec_queue *q)
96 {
97 	atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
98 }
99 
exec_queue_pending_enable(struct xe_exec_queue * q)100 static bool exec_queue_pending_enable(struct xe_exec_queue *q)
101 {
102 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
103 }
104 
set_exec_queue_pending_enable(struct xe_exec_queue * q)105 static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
106 {
107 	atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
108 }
109 
clear_exec_queue_pending_enable(struct xe_exec_queue * q)110 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
111 {
112 	atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
113 }
114 
exec_queue_pending_disable(struct xe_exec_queue * q)115 static bool exec_queue_pending_disable(struct xe_exec_queue *q)
116 {
117 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
118 }
119 
set_exec_queue_pending_disable(struct xe_exec_queue * q)120 static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
121 {
122 	atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
123 }
124 
clear_exec_queue_pending_disable(struct xe_exec_queue * q)125 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
126 {
127 	atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
128 }
129 
exec_queue_destroyed(struct xe_exec_queue * q)130 static bool exec_queue_destroyed(struct xe_exec_queue *q)
131 {
132 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
133 }
134 
set_exec_queue_destroyed(struct xe_exec_queue * q)135 static void set_exec_queue_destroyed(struct xe_exec_queue *q)
136 {
137 	atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
138 }
139 
exec_queue_banned(struct xe_exec_queue * q)140 static bool exec_queue_banned(struct xe_exec_queue *q)
141 {
142 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED;
143 }
144 
set_exec_queue_banned(struct xe_exec_queue * q)145 static void set_exec_queue_banned(struct xe_exec_queue *q)
146 {
147 	atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state);
148 }
149 
exec_queue_suspended(struct xe_exec_queue * q)150 static bool exec_queue_suspended(struct xe_exec_queue *q)
151 {
152 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
153 }
154 
set_exec_queue_suspended(struct xe_exec_queue * q)155 static void set_exec_queue_suspended(struct xe_exec_queue *q)
156 {
157 	atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
158 }
159 
clear_exec_queue_suspended(struct xe_exec_queue * q)160 static void clear_exec_queue_suspended(struct xe_exec_queue *q)
161 {
162 	atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
163 }
164 
exec_queue_reset(struct xe_exec_queue * q)165 static bool exec_queue_reset(struct xe_exec_queue *q)
166 {
167 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
168 }
169 
set_exec_queue_reset(struct xe_exec_queue * q)170 static void set_exec_queue_reset(struct xe_exec_queue *q)
171 {
172 	atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
173 }
174 
exec_queue_killed(struct xe_exec_queue * q)175 static bool exec_queue_killed(struct xe_exec_queue *q)
176 {
177 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED;
178 }
179 
set_exec_queue_killed(struct xe_exec_queue * q)180 static void set_exec_queue_killed(struct xe_exec_queue *q)
181 {
182 	atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state);
183 }
184 
exec_queue_wedged(struct xe_exec_queue * q)185 static bool exec_queue_wedged(struct xe_exec_queue *q)
186 {
187 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
188 }
189 
set_exec_queue_wedged(struct xe_exec_queue * q)190 static void set_exec_queue_wedged(struct xe_exec_queue *q)
191 {
192 	atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
193 }
194 
exec_queue_check_timeout(struct xe_exec_queue * q)195 static bool exec_queue_check_timeout(struct xe_exec_queue *q)
196 {
197 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT;
198 }
199 
set_exec_queue_check_timeout(struct xe_exec_queue * q)200 static void set_exec_queue_check_timeout(struct xe_exec_queue *q)
201 {
202 	atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state);
203 }
204 
clear_exec_queue_check_timeout(struct xe_exec_queue * q)205 static void clear_exec_queue_check_timeout(struct xe_exec_queue *q)
206 {
207 	atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state);
208 }
209 
exec_queue_extra_ref(struct xe_exec_queue * q)210 static bool exec_queue_extra_ref(struct xe_exec_queue *q)
211 {
212 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF;
213 }
214 
set_exec_queue_extra_ref(struct xe_exec_queue * q)215 static void set_exec_queue_extra_ref(struct xe_exec_queue *q)
216 {
217 	atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state);
218 }
219 
exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue * q)220 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
221 {
222 	return (atomic_read(&q->guc->state) &
223 		(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED |
224 		 EXEC_QUEUE_STATE_BANNED));
225 }
226 
guc_submit_fini(struct drm_device * drm,void * arg)227 static void guc_submit_fini(struct drm_device *drm, void *arg)
228 {
229 	struct xe_guc *guc = arg;
230 	struct xe_device *xe = guc_to_xe(guc);
231 	struct xe_gt *gt = guc_to_gt(guc);
232 	int ret;
233 
234 	ret = wait_event_timeout(guc->submission_state.fini_wq,
235 				 xa_empty(&guc->submission_state.exec_queue_lookup),
236 				 HZ * 5);
237 
238 	drain_workqueue(xe->destroy_wq);
239 
240 	xe_gt_assert(gt, ret);
241 
242 	xa_destroy(&guc->submission_state.exec_queue_lookup);
243 }
244 
guc_submit_wedged_fini(void * arg)245 static void guc_submit_wedged_fini(void *arg)
246 {
247 	struct xe_guc *guc = arg;
248 	struct xe_exec_queue *q;
249 	unsigned long index;
250 
251 	mutex_lock(&guc->submission_state.lock);
252 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
253 		if (exec_queue_wedged(q)) {
254 			mutex_unlock(&guc->submission_state.lock);
255 			xe_exec_queue_put(q);
256 			mutex_lock(&guc->submission_state.lock);
257 		}
258 	}
259 	mutex_unlock(&guc->submission_state.lock);
260 }
261 
262 static const struct xe_exec_queue_ops guc_exec_queue_ops;
263 
primelockdep(struct xe_guc * guc)264 static void primelockdep(struct xe_guc *guc)
265 {
266 	if (!IS_ENABLED(CONFIG_LOCKDEP))
267 		return;
268 
269 	fs_reclaim_acquire(GFP_KERNEL);
270 
271 	mutex_lock(&guc->submission_state.lock);
272 	mutex_unlock(&guc->submission_state.lock);
273 
274 	fs_reclaim_release(GFP_KERNEL);
275 }
276 
277 /**
278  * xe_guc_submit_init() - Initialize GuC submission.
279  * @guc: the &xe_guc to initialize
280  * @num_ids: number of GuC context IDs to use
281  *
282  * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all
283  * GuC context IDs supported by the GuC firmware should be used for submission.
284  *
285  * Only VF drivers will have to provide explicit number of GuC context IDs
286  * that they can use for submission.
287  *
288  * Return: 0 on success or a negative error code on failure.
289  */
xe_guc_submit_init(struct xe_guc * guc,unsigned int num_ids)290 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
291 {
292 	struct xe_device *xe = guc_to_xe(guc);
293 	struct xe_gt *gt = guc_to_gt(guc);
294 	int err;
295 
296 	err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
297 	if (err)
298 		return err;
299 
300 	err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids);
301 	if (err)
302 		return err;
303 
304 	gt->exec_queue_ops = &guc_exec_queue_ops;
305 
306 	xa_init(&guc->submission_state.exec_queue_lookup);
307 
308 	init_waitqueue_head(&guc->submission_state.fini_wq);
309 
310 	primelockdep(guc);
311 
312 	guc->submission_state.initialized = true;
313 
314 	return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
315 }
316 
__release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q,u32 xa_count)317 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
318 {
319 	int i;
320 
321 	lockdep_assert_held(&guc->submission_state.lock);
322 
323 	for (i = 0; i < xa_count; ++i)
324 		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
325 
326 	xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
327 				     q->guc->id, q->width);
328 
329 	if (xa_empty(&guc->submission_state.exec_queue_lookup))
330 		wake_up(&guc->submission_state.fini_wq);
331 }
332 
alloc_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)333 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
334 {
335 	int ret;
336 	int i;
337 
338 	/*
339 	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
340 	 * worse case user gets -ENOMEM on engine create and has to try again.
341 	 *
342 	 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
343 	 * failure.
344 	 */
345 	lockdep_assert_held(&guc->submission_state.lock);
346 
347 	ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
348 					   q->width);
349 	if (ret < 0)
350 		return ret;
351 
352 	q->guc->id = ret;
353 
354 	for (i = 0; i < q->width; ++i) {
355 		ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
356 				      q->guc->id + i, q, GFP_NOWAIT));
357 		if (ret)
358 			goto err_release;
359 	}
360 
361 	return 0;
362 
363 err_release:
364 	__release_guc_id(guc, q, i);
365 
366 	return ret;
367 }
368 
release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)369 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
370 {
371 	mutex_lock(&guc->submission_state.lock);
372 	__release_guc_id(guc, q, q->width);
373 	mutex_unlock(&guc->submission_state.lock);
374 }
375 
376 struct exec_queue_policy {
377 	u32 count;
378 	struct guc_update_exec_queue_policy h2g;
379 };
380 
__guc_exec_queue_policy_action_size(struct exec_queue_policy * policy)381 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
382 {
383 	size_t bytes = sizeof(policy->h2g.header) +
384 		       (sizeof(policy->h2g.klv[0]) * policy->count);
385 
386 	return bytes / sizeof(u32);
387 }
388 
__guc_exec_queue_policy_start_klv(struct exec_queue_policy * policy,u16 guc_id)389 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
390 					      u16 guc_id)
391 {
392 	policy->h2g.header.action =
393 		XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
394 	policy->h2g.header.guc_id = guc_id;
395 	policy->count = 0;
396 }
397 
398 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
399 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
400 					   u32 data) \
401 { \
402 	XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
403 \
404 	policy->h2g.klv[policy->count].kl = \
405 		FIELD_PREP(GUC_KLV_0_KEY, \
406 			   GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
407 		FIELD_PREP(GUC_KLV_0_LEN, 1); \
408 	policy->h2g.klv[policy->count].value = data; \
409 	policy->count++; \
410 }
411 
412 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
413 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
414 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
415 #undef MAKE_EXEC_QUEUE_POLICY_ADD
416 
417 static const int xe_exec_queue_prio_to_guc[] = {
418 	[XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
419 	[XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
420 	[XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
421 	[XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
422 };
423 
init_policies(struct xe_guc * guc,struct xe_exec_queue * q)424 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
425 {
426 	struct exec_queue_policy policy;
427 	struct xe_device *xe = guc_to_xe(guc);
428 	enum xe_exec_queue_priority prio = q->sched_props.priority;
429 	u32 timeslice_us = q->sched_props.timeslice_us;
430 	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
431 
432 	xe_assert(xe, exec_queue_registered(q));
433 
434 	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
435 	__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
436 	__guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
437 	__guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
438 
439 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
440 		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
441 }
442 
set_min_preemption_timeout(struct xe_guc * guc,struct xe_exec_queue * q)443 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
444 {
445 	struct exec_queue_policy policy;
446 
447 	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
448 	__guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
449 
450 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
451 		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
452 }
453 
454 #define parallel_read(xe_, map_, field_) \
455 	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
456 			field_)
457 #define parallel_write(xe_, map_, field_, val_) \
458 	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
459 			field_, val_)
460 
__register_mlrc_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q,struct guc_ctxt_registration_info * info)461 static void __register_mlrc_exec_queue(struct xe_guc *guc,
462 				       struct xe_exec_queue *q,
463 				       struct guc_ctxt_registration_info *info)
464 {
465 #define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
466 	struct xe_device *xe = guc_to_xe(guc);
467 	u32 action[MAX_MLRC_REG_SIZE];
468 	int len = 0;
469 	int i;
470 
471 	xe_assert(xe, xe_exec_queue_is_parallel(q));
472 
473 	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
474 	action[len++] = info->flags;
475 	action[len++] = info->context_idx;
476 	action[len++] = info->engine_class;
477 	action[len++] = info->engine_submit_mask;
478 	action[len++] = info->wq_desc_lo;
479 	action[len++] = info->wq_desc_hi;
480 	action[len++] = info->wq_base_lo;
481 	action[len++] = info->wq_base_hi;
482 	action[len++] = info->wq_size;
483 	action[len++] = q->width;
484 	action[len++] = info->hwlrca_lo;
485 	action[len++] = info->hwlrca_hi;
486 
487 	for (i = 1; i < q->width; ++i) {
488 		struct xe_lrc *lrc = q->lrc[i];
489 
490 		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
491 		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
492 	}
493 
494 	xe_assert(xe, len <= MAX_MLRC_REG_SIZE);
495 #undef MAX_MLRC_REG_SIZE
496 
497 	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
498 }
499 
__register_exec_queue(struct xe_guc * guc,struct guc_ctxt_registration_info * info)500 static void __register_exec_queue(struct xe_guc *guc,
501 				  struct guc_ctxt_registration_info *info)
502 {
503 	u32 action[] = {
504 		XE_GUC_ACTION_REGISTER_CONTEXT,
505 		info->flags,
506 		info->context_idx,
507 		info->engine_class,
508 		info->engine_submit_mask,
509 		info->wq_desc_lo,
510 		info->wq_desc_hi,
511 		info->wq_base_lo,
512 		info->wq_base_hi,
513 		info->wq_size,
514 		info->hwlrca_lo,
515 		info->hwlrca_hi,
516 	};
517 
518 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
519 }
520 
register_exec_queue(struct xe_exec_queue * q)521 static void register_exec_queue(struct xe_exec_queue *q)
522 {
523 	struct xe_guc *guc = exec_queue_to_guc(q);
524 	struct xe_device *xe = guc_to_xe(guc);
525 	struct xe_lrc *lrc = q->lrc[0];
526 	struct guc_ctxt_registration_info info;
527 
528 	xe_assert(xe, !exec_queue_registered(q));
529 
530 	memset(&info, 0, sizeof(info));
531 	info.context_idx = q->guc->id;
532 	info.engine_class = xe_engine_class_to_guc_class(q->class);
533 	info.engine_submit_mask = q->logical_mask;
534 	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
535 	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
536 	info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
537 
538 	if (xe_exec_queue_is_parallel(q)) {
539 		u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
540 		struct iosys_map map = xe_lrc_parallel_map(lrc);
541 
542 		info.wq_desc_lo = lower_32_bits(ggtt_addr +
543 			offsetof(struct guc_submit_parallel_scratch, wq_desc));
544 		info.wq_desc_hi = upper_32_bits(ggtt_addr +
545 			offsetof(struct guc_submit_parallel_scratch, wq_desc));
546 		info.wq_base_lo = lower_32_bits(ggtt_addr +
547 			offsetof(struct guc_submit_parallel_scratch, wq[0]));
548 		info.wq_base_hi = upper_32_bits(ggtt_addr +
549 			offsetof(struct guc_submit_parallel_scratch, wq[0]));
550 		info.wq_size = WQ_SIZE;
551 
552 		q->guc->wqi_head = 0;
553 		q->guc->wqi_tail = 0;
554 		xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
555 		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
556 	}
557 
558 	/*
559 	 * We must keep a reference for LR engines if engine is registered with
560 	 * the GuC as jobs signal immediately and can't destroy an engine if the
561 	 * GuC has a reference to it.
562 	 */
563 	if (xe_exec_queue_is_lr(q))
564 		xe_exec_queue_get(q);
565 
566 	set_exec_queue_registered(q);
567 	trace_xe_exec_queue_register(q);
568 	if (xe_exec_queue_is_parallel(q))
569 		__register_mlrc_exec_queue(guc, q, &info);
570 	else
571 		__register_exec_queue(guc, &info);
572 	init_policies(guc, q);
573 }
574 
wq_space_until_wrap(struct xe_exec_queue * q)575 static u32 wq_space_until_wrap(struct xe_exec_queue *q)
576 {
577 	return (WQ_SIZE - q->guc->wqi_tail);
578 }
579 
wq_wait_for_space(struct xe_exec_queue * q,u32 wqi_size)580 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
581 {
582 	struct xe_guc *guc = exec_queue_to_guc(q);
583 	struct xe_device *xe = guc_to_xe(guc);
584 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
585 	unsigned int sleep_period_ms = 1;
586 
587 #define AVAILABLE_SPACE \
588 	CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
589 	if (wqi_size > AVAILABLE_SPACE) {
590 try_again:
591 		q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
592 		if (wqi_size > AVAILABLE_SPACE) {
593 			if (sleep_period_ms == 1024) {
594 				xe_gt_reset_async(q->gt);
595 				return -ENODEV;
596 			}
597 
598 			msleep(sleep_period_ms);
599 			sleep_period_ms <<= 1;
600 			goto try_again;
601 		}
602 	}
603 #undef AVAILABLE_SPACE
604 
605 	return 0;
606 }
607 
wq_noop_append(struct xe_exec_queue * q)608 static int wq_noop_append(struct xe_exec_queue *q)
609 {
610 	struct xe_guc *guc = exec_queue_to_guc(q);
611 	struct xe_device *xe = guc_to_xe(guc);
612 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
613 	u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
614 
615 	if (wq_wait_for_space(q, wq_space_until_wrap(q)))
616 		return -ENODEV;
617 
618 	xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw));
619 
620 	parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
621 		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
622 		       FIELD_PREP(WQ_LEN_MASK, len_dw));
623 	q->guc->wqi_tail = 0;
624 
625 	return 0;
626 }
627 
wq_item_append(struct xe_exec_queue * q)628 static void wq_item_append(struct xe_exec_queue *q)
629 {
630 	struct xe_guc *guc = exec_queue_to_guc(q);
631 	struct xe_device *xe = guc_to_xe(guc);
632 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
633 #define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
634 	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
635 	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
636 	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
637 	int i = 0, j;
638 
639 	if (wqi_size > wq_space_until_wrap(q)) {
640 		if (wq_noop_append(q))
641 			return;
642 	}
643 	if (wq_wait_for_space(q, wqi_size))
644 		return;
645 
646 	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
647 		FIELD_PREP(WQ_LEN_MASK, len_dw);
648 	wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
649 	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
650 		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
651 	wqi[i++] = 0;
652 	for (j = 1; j < q->width; ++j) {
653 		struct xe_lrc *lrc = q->lrc[j];
654 
655 		wqi[i++] = lrc->ring.tail / sizeof(u64);
656 	}
657 
658 	xe_assert(xe, i == wqi_size / sizeof(u32));
659 
660 	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
661 				      wq[q->guc->wqi_tail / sizeof(u32)]));
662 	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
663 	q->guc->wqi_tail += wqi_size;
664 	xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE);
665 
666 	xe_device_wmb(xe);
667 
668 	map = xe_lrc_parallel_map(q->lrc[0]);
669 	parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
670 }
671 
672 #define RESUME_PENDING	~0x0ull
submit_exec_queue(struct xe_exec_queue * q)673 static void submit_exec_queue(struct xe_exec_queue *q)
674 {
675 	struct xe_guc *guc = exec_queue_to_guc(q);
676 	struct xe_device *xe = guc_to_xe(guc);
677 	struct xe_lrc *lrc = q->lrc[0];
678 	u32 action[3];
679 	u32 g2h_len = 0;
680 	u32 num_g2h = 0;
681 	int len = 0;
682 	bool extra_submit = false;
683 
684 	xe_assert(xe, exec_queue_registered(q));
685 
686 	if (xe_exec_queue_is_parallel(q))
687 		wq_item_append(q);
688 	else
689 		xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
690 
691 	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
692 		return;
693 
694 	if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
695 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
696 		action[len++] = q->guc->id;
697 		action[len++] = GUC_CONTEXT_ENABLE;
698 		g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
699 		num_g2h = 1;
700 		if (xe_exec_queue_is_parallel(q))
701 			extra_submit = true;
702 
703 		q->guc->resume_time = RESUME_PENDING;
704 		set_exec_queue_pending_enable(q);
705 		set_exec_queue_enabled(q);
706 		trace_xe_exec_queue_scheduling_enable(q);
707 	} else {
708 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
709 		action[len++] = q->guc->id;
710 		trace_xe_exec_queue_submit(q);
711 	}
712 
713 	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
714 
715 	if (extra_submit) {
716 		len = 0;
717 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
718 		action[len++] = q->guc->id;
719 		trace_xe_exec_queue_submit(q);
720 
721 		xe_guc_ct_send(&guc->ct, action, len, 0, 0);
722 	}
723 }
724 
725 static struct dma_fence *
guc_exec_queue_run_job(struct drm_sched_job * drm_job)726 guc_exec_queue_run_job(struct drm_sched_job *drm_job)
727 {
728 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
729 	struct xe_exec_queue *q = job->q;
730 	struct xe_guc *guc = exec_queue_to_guc(q);
731 	struct xe_device *xe = guc_to_xe(guc);
732 	bool lr = xe_exec_queue_is_lr(q);
733 
734 	xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
735 		  exec_queue_banned(q) || exec_queue_suspended(q));
736 
737 	trace_xe_sched_job_run(job);
738 
739 	if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
740 		if (!exec_queue_registered(q))
741 			register_exec_queue(q);
742 		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
743 			q->ring_ops->emit_job(job);
744 		submit_exec_queue(q);
745 	}
746 
747 	if (lr) {
748 		xe_sched_job_set_error(job, -EOPNOTSUPP);
749 		return NULL;
750 	} else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) {
751 		return job->fence;
752 	} else {
753 		return dma_fence_get(job->fence);
754 	}
755 }
756 
guc_exec_queue_free_job(struct drm_sched_job * drm_job)757 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
758 {
759 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
760 
761 	trace_xe_sched_job_free(job);
762 	xe_sched_job_put(job);
763 }
764 
guc_read_stopped(struct xe_guc * guc)765 static int guc_read_stopped(struct xe_guc *guc)
766 {
767 	return atomic_read(&guc->submission_state.stopped);
768 }
769 
770 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable)			\
771 	u32 action[] = {						\
772 		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
773 		q->guc->id,						\
774 		GUC_CONTEXT_##enable_disable,				\
775 	}
776 
disable_scheduling_deregister(struct xe_guc * guc,struct xe_exec_queue * q)777 static void disable_scheduling_deregister(struct xe_guc *guc,
778 					  struct xe_exec_queue *q)
779 {
780 	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
781 	struct xe_device *xe = guc_to_xe(guc);
782 	int ret;
783 
784 	set_min_preemption_timeout(guc, q);
785 	smp_rmb();
786 	ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) ||
787 				 guc_read_stopped(guc), HZ * 5);
788 	if (!ret) {
789 		struct xe_gpu_scheduler *sched = &q->guc->sched;
790 
791 		drm_warn(&xe->drm, "Pending enable failed to respond");
792 		xe_sched_submission_start(sched);
793 		xe_gt_reset_async(q->gt);
794 		xe_sched_tdr_queue_imm(sched);
795 		return;
796 	}
797 
798 	clear_exec_queue_enabled(q);
799 	set_exec_queue_pending_disable(q);
800 	set_exec_queue_destroyed(q);
801 	trace_xe_exec_queue_scheduling_disable(q);
802 
803 	/*
804 	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
805 	 * handler and we are not allowed to reserved G2H space in handlers.
806 	 */
807 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
808 		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
809 		       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
810 }
811 
xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue * q)812 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
813 {
814 	struct xe_guc *guc = exec_queue_to_guc(q);
815 	struct xe_device *xe = guc_to_xe(guc);
816 
817 	/** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
818 	wake_up_all(&xe->ufence_wq);
819 
820 	if (xe_exec_queue_is_lr(q))
821 		queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
822 	else
823 		xe_sched_tdr_queue_imm(&q->guc->sched);
824 }
825 
826 /**
827  * xe_guc_submit_wedge() - Wedge GuC submission
828  * @guc: the GuC object
829  *
830  * Save exec queue's registered with GuC state by taking a ref to each queue.
831  * Register a DRMM handler to drop refs upon driver unload.
832  */
xe_guc_submit_wedge(struct xe_guc * guc)833 void xe_guc_submit_wedge(struct xe_guc *guc)
834 {
835 	struct xe_device *xe = guc_to_xe(guc);
836 	struct xe_exec_queue *q;
837 	unsigned long index;
838 	int err;
839 
840 	xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
841 
842 	/*
843 	 * If device is being wedged even before submission_state is
844 	 * initialized, there's nothing to do here.
845 	 */
846 	if (!guc->submission_state.initialized)
847 		return;
848 
849 	err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
850 				       guc_submit_wedged_fini, guc);
851 	if (err) {
852 		drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n");
853 		return;
854 	}
855 
856 	mutex_lock(&guc->submission_state.lock);
857 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
858 		if (xe_exec_queue_get_unless_zero(q))
859 			set_exec_queue_wedged(q);
860 	mutex_unlock(&guc->submission_state.lock);
861 }
862 
guc_submit_hint_wedged(struct xe_guc * guc)863 static bool guc_submit_hint_wedged(struct xe_guc *guc)
864 {
865 	struct xe_device *xe = guc_to_xe(guc);
866 
867 	if (xe->wedged.mode != 2)
868 		return false;
869 
870 	if (xe_device_wedged(xe))
871 		return true;
872 
873 	xe_device_declare_wedged(xe);
874 
875 	return true;
876 }
877 
xe_guc_exec_queue_lr_cleanup(struct work_struct * w)878 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
879 {
880 	struct xe_guc_exec_queue *ge =
881 		container_of(w, struct xe_guc_exec_queue, lr_tdr);
882 	struct xe_exec_queue *q = ge->q;
883 	struct xe_guc *guc = exec_queue_to_guc(q);
884 	struct xe_device *xe = guc_to_xe(guc);
885 	struct xe_gpu_scheduler *sched = &ge->sched;
886 	bool wedged;
887 
888 	xe_assert(xe, xe_exec_queue_is_lr(q));
889 	trace_xe_exec_queue_lr_cleanup(q);
890 
891 	wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
892 
893 	/* Kill the run_job / process_msg entry points */
894 	xe_sched_submission_stop(sched);
895 
896 	/*
897 	 * Engine state now mostly stable, disable scheduling / deregister if
898 	 * needed. This cleanup routine might be called multiple times, where
899 	 * the actual async engine deregister drops the final engine ref.
900 	 * Calling disable_scheduling_deregister will mark the engine as
901 	 * destroyed and fire off the CT requests to disable scheduling /
902 	 * deregister, which we only want to do once. We also don't want to mark
903 	 * the engine as pending_disable again as this may race with the
904 	 * xe_guc_deregister_done_handler() which treats it as an unexpected
905 	 * state.
906 	 */
907 	if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
908 		struct xe_guc *guc = exec_queue_to_guc(q);
909 		int ret;
910 
911 		set_exec_queue_banned(q);
912 		disable_scheduling_deregister(guc, q);
913 
914 		/*
915 		 * Must wait for scheduling to be disabled before signalling
916 		 * any fences, if GT broken the GT reset code should signal us.
917 		 */
918 		ret = wait_event_timeout(guc->ct.wq,
919 					 !exec_queue_pending_disable(q) ||
920 					 guc_read_stopped(guc), HZ * 5);
921 		if (!ret) {
922 			drm_warn(&xe->drm, "Schedule disable failed to respond");
923 			xe_sched_submission_start(sched);
924 			xe_gt_reset_async(q->gt);
925 			return;
926 		}
927 	}
928 
929 	xe_sched_submission_start(sched);
930 }
931 
932 #define ADJUST_FIVE_PERCENT(__t)	mul_u64_u32_div(__t, 105, 100)
933 
check_timeout(struct xe_exec_queue * q,struct xe_sched_job * job)934 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
935 {
936 	struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
937 	u32 ctx_timestamp, ctx_job_timestamp;
938 	u32 timeout_ms = q->sched_props.job_timeout_ms;
939 	u32 diff;
940 	u64 running_time_ms;
941 
942 	if (!xe_sched_job_started(job)) {
943 		xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
944 			   xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
945 			   q->guc->id);
946 
947 		return xe_sched_invalidate_job(job, 2);
948 	}
949 
950 	ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
951 	ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
952 
953 	/*
954 	 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
955 	 * possible overflows with a high timeout.
956 	 */
957 	xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC);
958 
959 	if (ctx_timestamp < ctx_job_timestamp)
960 		diff = ctx_timestamp + U32_MAX - ctx_job_timestamp;
961 	else
962 		diff = ctx_timestamp - ctx_job_timestamp;
963 
964 	/*
965 	 * Ensure timeout is within 5% to account for an GuC scheduling latency
966 	 */
967 	running_time_ms =
968 		ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff));
969 
970 	xe_gt_dbg(gt,
971 		  "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x",
972 		  xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
973 		  q->guc->id, running_time_ms, timeout_ms, diff);
974 
975 	return running_time_ms >= timeout_ms;
976 }
977 
enable_scheduling(struct xe_exec_queue * q)978 static void enable_scheduling(struct xe_exec_queue *q)
979 {
980 	MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
981 	struct xe_guc *guc = exec_queue_to_guc(q);
982 	int ret;
983 
984 	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
985 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
986 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
987 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
988 
989 	set_exec_queue_pending_enable(q);
990 	set_exec_queue_enabled(q);
991 	trace_xe_exec_queue_scheduling_enable(q);
992 
993 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
994 		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
995 
996 	ret = wait_event_timeout(guc->ct.wq,
997 				 !exec_queue_pending_enable(q) ||
998 				 guc_read_stopped(guc), HZ * 5);
999 	if (!ret || guc_read_stopped(guc)) {
1000 		xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
1001 		set_exec_queue_banned(q);
1002 		xe_gt_reset_async(q->gt);
1003 		xe_sched_tdr_queue_imm(&q->guc->sched);
1004 	}
1005 }
1006 
disable_scheduling(struct xe_exec_queue * q,bool immediate)1007 static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
1008 {
1009 	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1010 	struct xe_guc *guc = exec_queue_to_guc(q);
1011 
1012 	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1013 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1014 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1015 
1016 	if (immediate)
1017 		set_min_preemption_timeout(guc, q);
1018 	clear_exec_queue_enabled(q);
1019 	set_exec_queue_pending_disable(q);
1020 	trace_xe_exec_queue_scheduling_disable(q);
1021 
1022 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1023 		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1024 }
1025 
__deregister_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q)1026 static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
1027 {
1028 	u32 action[] = {
1029 		XE_GUC_ACTION_DEREGISTER_CONTEXT,
1030 		q->guc->id,
1031 	};
1032 
1033 	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1034 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1035 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1036 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1037 
1038 	set_exec_queue_destroyed(q);
1039 	trace_xe_exec_queue_deregister(q);
1040 
1041 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1042 		       G2H_LEN_DW_DEREGISTER_CONTEXT, 1);
1043 }
1044 
1045 static enum drm_gpu_sched_stat
guc_exec_queue_timedout_job(struct drm_sched_job * drm_job)1046 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
1047 {
1048 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
1049 	struct xe_sched_job *tmp_job;
1050 	struct xe_exec_queue *q = job->q;
1051 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1052 	struct xe_guc *guc = exec_queue_to_guc(q);
1053 	const char *process_name = "no process";
1054 	int err = -ETIME;
1055 	pid_t pid = -1;
1056 	int i = 0;
1057 	bool wedged, skip_timeout_check;
1058 
1059 	/*
1060 	 * TDR has fired before free job worker. Common if exec queue
1061 	 * immediately closed after last fence signaled. Add back to pending
1062 	 * list so job can be freed and kick scheduler ensuring free job is not
1063 	 * lost.
1064 	 */
1065 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
1066 		xe_sched_add_pending_job(sched, job);
1067 		xe_sched_submission_start(sched);
1068 
1069 		return DRM_GPU_SCHED_STAT_NOMINAL;
1070 	}
1071 
1072 	/* Kill the run_job entry point */
1073 	xe_sched_submission_stop(sched);
1074 
1075 	/* Must check all state after stopping scheduler */
1076 	skip_timeout_check = exec_queue_reset(q) ||
1077 		exec_queue_killed_or_banned_or_wedged(q) ||
1078 		exec_queue_destroyed(q);
1079 
1080 	/*
1081 	 * XXX: Sampling timeout doesn't work in wedged mode as we have to
1082 	 * modify scheduling state to read timestamp. We could read the
1083 	 * timestamp from a register to accumulate current running time but this
1084 	 * doesn't work for SRIOV. For now assuming timeouts in wedged mode are
1085 	 * genuine timeouts.
1086 	 */
1087 	wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
1088 
1089 	/* Engine state now stable, disable scheduling to check timestamp */
1090 	if (!wedged && exec_queue_registered(q)) {
1091 		int ret;
1092 
1093 		if (exec_queue_reset(q))
1094 			err = -EIO;
1095 
1096 		if (!exec_queue_destroyed(q)) {
1097 			/*
1098 			 * Wait for any pending G2H to flush out before
1099 			 * modifying state
1100 			 */
1101 			ret = wait_event_timeout(guc->ct.wq,
1102 						 !exec_queue_pending_enable(q) ||
1103 						 guc_read_stopped(guc), HZ * 5);
1104 			if (!ret || guc_read_stopped(guc))
1105 				goto trigger_reset;
1106 
1107 			/*
1108 			 * Flag communicates to G2H handler that schedule
1109 			 * disable originated from a timeout check. The G2H then
1110 			 * avoid triggering cleanup or deregistering the exec
1111 			 * queue.
1112 			 */
1113 			set_exec_queue_check_timeout(q);
1114 			disable_scheduling(q, skip_timeout_check);
1115 		}
1116 
1117 		/*
1118 		 * Must wait for scheduling to be disabled before signalling
1119 		 * any fences, if GT broken the GT reset code should signal us.
1120 		 *
1121 		 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
1122 		 * error) messages which can cause the schedule disable to get
1123 		 * lost. If this occurs, trigger a GT reset to recover.
1124 		 */
1125 		smp_rmb();
1126 		ret = wait_event_timeout(guc->ct.wq,
1127 					 !exec_queue_pending_disable(q) ||
1128 					 guc_read_stopped(guc), HZ * 5);
1129 		if (!ret || guc_read_stopped(guc)) {
1130 trigger_reset:
1131 			if (!ret)
1132 				xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond");
1133 			set_exec_queue_extra_ref(q);
1134 			xe_exec_queue_get(q);	/* GT reset owns this */
1135 			set_exec_queue_banned(q);
1136 			xe_gt_reset_async(q->gt);
1137 			xe_sched_tdr_queue_imm(sched);
1138 			goto rearm;
1139 		}
1140 	}
1141 
1142 	/*
1143 	 * Check if job is actually timed out, if so restart job execution and TDR
1144 	 */
1145 	if (!wedged && !skip_timeout_check && !check_timeout(q, job) &&
1146 	    !exec_queue_reset(q) && exec_queue_registered(q)) {
1147 		clear_exec_queue_check_timeout(q);
1148 		goto sched_enable;
1149 	}
1150 
1151 	if (q->vm && q->vm->xef) {
1152 		process_name = q->vm->xef->process_name;
1153 		pid = q->vm->xef->pid;
1154 	}
1155 	xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
1156 		     xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1157 		     q->guc->id, q->flags, process_name, pid);
1158 
1159 	trace_xe_sched_job_timedout(job);
1160 
1161 	if (!exec_queue_killed(q))
1162 		xe_devcoredump(job);
1163 
1164 	/*
1165 	 * Kernel jobs should never fail, nor should VM jobs if they do
1166 	 * somethings has gone wrong and the GT needs a reset
1167 	 */
1168 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
1169 		   "Kernel-submitted job timed out\n");
1170 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
1171 		   "VM job timed out on non-killed execqueue\n");
1172 	if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
1173 			(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
1174 		if (!xe_sched_invalidate_job(job, 2)) {
1175 			clear_exec_queue_check_timeout(q);
1176 			xe_gt_reset_async(q->gt);
1177 			goto rearm;
1178 		}
1179 	}
1180 
1181 	/* Finish cleaning up exec queue via deregister */
1182 	set_exec_queue_banned(q);
1183 	if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
1184 		set_exec_queue_extra_ref(q);
1185 		xe_exec_queue_get(q);
1186 		__deregister_exec_queue(guc, q);
1187 	}
1188 
1189 	/* Stop fence signaling */
1190 	xe_hw_fence_irq_stop(q->fence_irq);
1191 
1192 	/*
1193 	 * Fence state now stable, stop / start scheduler which cleans up any
1194 	 * fences that are complete
1195 	 */
1196 	xe_sched_add_pending_job(sched, job);
1197 	xe_sched_submission_start(sched);
1198 
1199 	xe_guc_exec_queue_trigger_cleanup(q);
1200 
1201 	/* Mark all outstanding jobs as bad, thus completing them */
1202 	spin_lock(&sched->base.job_list_lock);
1203 	list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
1204 		xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
1205 	spin_unlock(&sched->base.job_list_lock);
1206 
1207 	/* Start fence signaling */
1208 	xe_hw_fence_irq_start(q->fence_irq);
1209 
1210 	return DRM_GPU_SCHED_STAT_NOMINAL;
1211 
1212 sched_enable:
1213 	enable_scheduling(q);
1214 rearm:
1215 	/*
1216 	 * XXX: Ideally want to adjust timeout based on current exection time
1217 	 * but there is not currently an easy way to do in DRM scheduler. With
1218 	 * some thought, do this in a follow up.
1219 	 */
1220 	xe_sched_add_pending_job(sched, job);
1221 	xe_sched_submission_start(sched);
1222 
1223 	return DRM_GPU_SCHED_STAT_NOMINAL;
1224 }
1225 
__guc_exec_queue_fini_async(struct work_struct * w)1226 static void __guc_exec_queue_fini_async(struct work_struct *w)
1227 {
1228 	struct xe_guc_exec_queue *ge =
1229 		container_of(w, struct xe_guc_exec_queue, fini_async);
1230 	struct xe_exec_queue *q = ge->q;
1231 	struct xe_guc *guc = exec_queue_to_guc(q);
1232 
1233 	xe_pm_runtime_get(guc_to_xe(guc));
1234 	trace_xe_exec_queue_destroy(q);
1235 
1236 	release_guc_id(guc, q);
1237 	if (xe_exec_queue_is_lr(q))
1238 		cancel_work_sync(&ge->lr_tdr);
1239 	/* Confirm no work left behind accessing device structures */
1240 	cancel_delayed_work_sync(&ge->sched.base.work_tdr);
1241 	xe_sched_entity_fini(&ge->entity);
1242 	xe_sched_fini(&ge->sched);
1243 
1244 	/*
1245 	 * RCU free due sched being exported via DRM scheduler fences
1246 	 * (timeline name).
1247 	 */
1248 	kfree_rcu(ge, rcu);
1249 	xe_exec_queue_fini(q);
1250 	xe_pm_runtime_put(guc_to_xe(guc));
1251 }
1252 
guc_exec_queue_fini_async(struct xe_exec_queue * q)1253 static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
1254 {
1255 	struct xe_guc *guc = exec_queue_to_guc(q);
1256 	struct xe_device *xe = guc_to_xe(guc);
1257 
1258 	INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
1259 
1260 	/* We must block on kernel engines so slabs are empty on driver unload */
1261 	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
1262 		__guc_exec_queue_fini_async(&q->guc->fini_async);
1263 	else
1264 		queue_work(xe->destroy_wq, &q->guc->fini_async);
1265 }
1266 
__guc_exec_queue_fini(struct xe_guc * guc,struct xe_exec_queue * q)1267 static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
1268 {
1269 	/*
1270 	 * Might be done from within the GPU scheduler, need to do async as we
1271 	 * fini the scheduler when the engine is fini'd, the scheduler can't
1272 	 * complete fini within itself (circular dependency). Async resolves
1273 	 * this we and don't really care when everything is fini'd, just that it
1274 	 * is.
1275 	 */
1276 	guc_exec_queue_fini_async(q);
1277 }
1278 
__guc_exec_queue_process_msg_cleanup(struct xe_sched_msg * msg)1279 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
1280 {
1281 	struct xe_exec_queue *q = msg->private_data;
1282 	struct xe_guc *guc = exec_queue_to_guc(q);
1283 	struct xe_device *xe = guc_to_xe(guc);
1284 
1285 	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
1286 	trace_xe_exec_queue_cleanup_entity(q);
1287 
1288 	if (exec_queue_registered(q))
1289 		disable_scheduling_deregister(guc, q);
1290 	else
1291 		__guc_exec_queue_fini(guc, q);
1292 }
1293 
guc_exec_queue_allowed_to_change_state(struct xe_exec_queue * q)1294 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
1295 {
1296 	return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
1297 }
1298 
__guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg * msg)1299 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
1300 {
1301 	struct xe_exec_queue *q = msg->private_data;
1302 	struct xe_guc *guc = exec_queue_to_guc(q);
1303 
1304 	if (guc_exec_queue_allowed_to_change_state(q))
1305 		init_policies(guc, q);
1306 	kfree(msg);
1307 }
1308 
__suspend_fence_signal(struct xe_exec_queue * q)1309 static void __suspend_fence_signal(struct xe_exec_queue *q)
1310 {
1311 	if (!q->guc->suspend_pending)
1312 		return;
1313 
1314 	WRITE_ONCE(q->guc->suspend_pending, false);
1315 	wake_up(&q->guc->suspend_wait);
1316 }
1317 
suspend_fence_signal(struct xe_exec_queue * q)1318 static void suspend_fence_signal(struct xe_exec_queue *q)
1319 {
1320 	struct xe_guc *guc = exec_queue_to_guc(q);
1321 	struct xe_device *xe = guc_to_xe(guc);
1322 
1323 	xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
1324 		  guc_read_stopped(guc));
1325 	xe_assert(xe, q->guc->suspend_pending);
1326 
1327 	__suspend_fence_signal(q);
1328 }
1329 
__guc_exec_queue_process_msg_suspend(struct xe_sched_msg * msg)1330 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
1331 {
1332 	struct xe_exec_queue *q = msg->private_data;
1333 	struct xe_guc *guc = exec_queue_to_guc(q);
1334 
1335 	if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
1336 	    exec_queue_enabled(q)) {
1337 		wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING ||
1338 			   guc_read_stopped(guc));
1339 
1340 		if (!guc_read_stopped(guc)) {
1341 			s64 since_resume_ms =
1342 				ktime_ms_delta(ktime_get(),
1343 					       q->guc->resume_time);
1344 			s64 wait_ms = q->vm->preempt.min_run_period_ms -
1345 				since_resume_ms;
1346 
1347 			if (wait_ms > 0 && q->guc->resume_time)
1348 				msleep(wait_ms);
1349 
1350 			set_exec_queue_suspended(q);
1351 			disable_scheduling(q, false);
1352 		}
1353 	} else if (q->guc->suspend_pending) {
1354 		set_exec_queue_suspended(q);
1355 		suspend_fence_signal(q);
1356 	}
1357 }
1358 
__guc_exec_queue_process_msg_resume(struct xe_sched_msg * msg)1359 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
1360 {
1361 	struct xe_exec_queue *q = msg->private_data;
1362 
1363 	if (guc_exec_queue_allowed_to_change_state(q)) {
1364 		clear_exec_queue_suspended(q);
1365 		if (!exec_queue_enabled(q)) {
1366 			q->guc->resume_time = RESUME_PENDING;
1367 			enable_scheduling(q);
1368 		}
1369 	} else {
1370 		clear_exec_queue_suspended(q);
1371 	}
1372 }
1373 
1374 #define CLEANUP		1	/* Non-zero values to catch uninitialized msg */
1375 #define SET_SCHED_PROPS	2
1376 #define SUSPEND		3
1377 #define RESUME		4
1378 #define OPCODE_MASK	0xf
1379 #define MSG_LOCKED	BIT(8)
1380 
guc_exec_queue_process_msg(struct xe_sched_msg * msg)1381 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
1382 {
1383 	struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data));
1384 
1385 	trace_xe_sched_msg_recv(msg);
1386 
1387 	switch (msg->opcode) {
1388 	case CLEANUP:
1389 		__guc_exec_queue_process_msg_cleanup(msg);
1390 		break;
1391 	case SET_SCHED_PROPS:
1392 		__guc_exec_queue_process_msg_set_sched_props(msg);
1393 		break;
1394 	case SUSPEND:
1395 		__guc_exec_queue_process_msg_suspend(msg);
1396 		break;
1397 	case RESUME:
1398 		__guc_exec_queue_process_msg_resume(msg);
1399 		break;
1400 	default:
1401 		XE_WARN_ON("Unknown message type");
1402 	}
1403 
1404 	xe_pm_runtime_put(xe);
1405 }
1406 
1407 static const struct drm_sched_backend_ops drm_sched_ops = {
1408 	.run_job = guc_exec_queue_run_job,
1409 	.free_job = guc_exec_queue_free_job,
1410 	.timedout_job = guc_exec_queue_timedout_job,
1411 };
1412 
1413 static const struct xe_sched_backend_ops xe_sched_ops = {
1414 	.process_msg = guc_exec_queue_process_msg,
1415 };
1416 
guc_exec_queue_init(struct xe_exec_queue * q)1417 static int guc_exec_queue_init(struct xe_exec_queue *q)
1418 {
1419 	struct xe_gpu_scheduler *sched;
1420 	struct xe_guc *guc = exec_queue_to_guc(q);
1421 	struct xe_device *xe = guc_to_xe(guc);
1422 	struct xe_guc_exec_queue *ge;
1423 	long timeout;
1424 	int err, i;
1425 
1426 	xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
1427 
1428 	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
1429 	if (!ge)
1430 		return -ENOMEM;
1431 
1432 	q->guc = ge;
1433 	ge->q = q;
1434 	init_rcu_head(&ge->rcu);
1435 	init_waitqueue_head(&ge->suspend_wait);
1436 
1437 	for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
1438 		INIT_LIST_HEAD(&ge->static_msgs[i].link);
1439 
1440 	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
1441 		  msecs_to_jiffies(q->sched_props.job_timeout_ms);
1442 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
1443 			    NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
1444 			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
1445 			    q->name, gt_to_xe(q->gt)->drm.dev);
1446 	if (err)
1447 		goto err_free;
1448 
1449 	sched = &ge->sched;
1450 	err = xe_sched_entity_init(&ge->entity, sched);
1451 	if (err)
1452 		goto err_sched;
1453 
1454 	if (xe_exec_queue_is_lr(q))
1455 		INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
1456 
1457 	mutex_lock(&guc->submission_state.lock);
1458 
1459 	err = alloc_guc_id(guc, q);
1460 	if (err)
1461 		goto err_entity;
1462 
1463 	q->entity = &ge->entity;
1464 
1465 	if (guc_read_stopped(guc))
1466 		xe_sched_stop(sched);
1467 
1468 	mutex_unlock(&guc->submission_state.lock);
1469 
1470 	xe_exec_queue_assign_name(q, q->guc->id);
1471 
1472 	trace_xe_exec_queue_create(q);
1473 
1474 	return 0;
1475 
1476 err_entity:
1477 	mutex_unlock(&guc->submission_state.lock);
1478 	xe_sched_entity_fini(&ge->entity);
1479 err_sched:
1480 	xe_sched_fini(&ge->sched);
1481 err_free:
1482 	kfree(ge);
1483 
1484 	return err;
1485 }
1486 
guc_exec_queue_kill(struct xe_exec_queue * q)1487 static void guc_exec_queue_kill(struct xe_exec_queue *q)
1488 {
1489 	trace_xe_exec_queue_kill(q);
1490 	set_exec_queue_killed(q);
1491 	__suspend_fence_signal(q);
1492 	xe_guc_exec_queue_trigger_cleanup(q);
1493 }
1494 
guc_exec_queue_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)1495 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
1496 				   u32 opcode)
1497 {
1498 	xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
1499 
1500 	INIT_LIST_HEAD(&msg->link);
1501 	msg->opcode = opcode & OPCODE_MASK;
1502 	msg->private_data = q;
1503 
1504 	trace_xe_sched_msg_add(msg);
1505 	if (opcode & MSG_LOCKED)
1506 		xe_sched_add_msg_locked(&q->guc->sched, msg);
1507 	else
1508 		xe_sched_add_msg(&q->guc->sched, msg);
1509 }
1510 
guc_exec_queue_try_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)1511 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
1512 				       struct xe_sched_msg *msg,
1513 				       u32 opcode)
1514 {
1515 	if (!list_empty(&msg->link))
1516 		return false;
1517 
1518 	guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED);
1519 
1520 	return true;
1521 }
1522 
1523 #define STATIC_MSG_CLEANUP	0
1524 #define STATIC_MSG_SUSPEND	1
1525 #define STATIC_MSG_RESUME	2
guc_exec_queue_fini(struct xe_exec_queue * q)1526 static void guc_exec_queue_fini(struct xe_exec_queue *q)
1527 {
1528 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
1529 
1530 	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
1531 		guc_exec_queue_add_msg(q, msg, CLEANUP);
1532 	else
1533 		__guc_exec_queue_fini(exec_queue_to_guc(q), q);
1534 }
1535 
guc_exec_queue_set_priority(struct xe_exec_queue * q,enum xe_exec_queue_priority priority)1536 static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
1537 				       enum xe_exec_queue_priority priority)
1538 {
1539 	struct xe_sched_msg *msg;
1540 
1541 	if (q->sched_props.priority == priority ||
1542 	    exec_queue_killed_or_banned_or_wedged(q))
1543 		return 0;
1544 
1545 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1546 	if (!msg)
1547 		return -ENOMEM;
1548 
1549 	q->sched_props.priority = priority;
1550 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1551 
1552 	return 0;
1553 }
1554 
guc_exec_queue_set_timeslice(struct xe_exec_queue * q,u32 timeslice_us)1555 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
1556 {
1557 	struct xe_sched_msg *msg;
1558 
1559 	if (q->sched_props.timeslice_us == timeslice_us ||
1560 	    exec_queue_killed_or_banned_or_wedged(q))
1561 		return 0;
1562 
1563 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1564 	if (!msg)
1565 		return -ENOMEM;
1566 
1567 	q->sched_props.timeslice_us = timeslice_us;
1568 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1569 
1570 	return 0;
1571 }
1572 
guc_exec_queue_set_preempt_timeout(struct xe_exec_queue * q,u32 preempt_timeout_us)1573 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
1574 					      u32 preempt_timeout_us)
1575 {
1576 	struct xe_sched_msg *msg;
1577 
1578 	if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
1579 	    exec_queue_killed_or_banned_or_wedged(q))
1580 		return 0;
1581 
1582 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1583 	if (!msg)
1584 		return -ENOMEM;
1585 
1586 	q->sched_props.preempt_timeout_us = preempt_timeout_us;
1587 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1588 
1589 	return 0;
1590 }
1591 
guc_exec_queue_suspend(struct xe_exec_queue * q)1592 static int guc_exec_queue_suspend(struct xe_exec_queue *q)
1593 {
1594 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1595 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
1596 
1597 	if (exec_queue_killed_or_banned_or_wedged(q))
1598 		return -EINVAL;
1599 
1600 	xe_sched_msg_lock(sched);
1601 	if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
1602 		q->guc->suspend_pending = true;
1603 	xe_sched_msg_unlock(sched);
1604 
1605 	return 0;
1606 }
1607 
guc_exec_queue_suspend_wait(struct xe_exec_queue * q)1608 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
1609 {
1610 	struct xe_guc *guc = exec_queue_to_guc(q);
1611 	int ret;
1612 
1613 	/*
1614 	 * Likely don't need to check exec_queue_killed() as we clear
1615 	 * suspend_pending upon kill but to be paranoid but races in which
1616 	 * suspend_pending is set after kill also check kill here.
1617 	 */
1618 	ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
1619 					       !READ_ONCE(q->guc->suspend_pending) ||
1620 					       exec_queue_killed(q) ||
1621 					       guc_read_stopped(guc),
1622 					       HZ * 5);
1623 
1624 	if (!ret) {
1625 		xe_gt_warn(guc_to_gt(guc),
1626 			   "Suspend fence, guc_id=%d, failed to respond",
1627 			   q->guc->id);
1628 		/* XXX: Trigger GT reset? */
1629 		return -ETIME;
1630 	}
1631 
1632 	return ret < 0 ? ret : 0;
1633 }
1634 
guc_exec_queue_resume(struct xe_exec_queue * q)1635 static void guc_exec_queue_resume(struct xe_exec_queue *q)
1636 {
1637 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1638 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
1639 	struct xe_guc *guc = exec_queue_to_guc(q);
1640 	struct xe_device *xe = guc_to_xe(guc);
1641 
1642 	xe_assert(xe, !q->guc->suspend_pending);
1643 
1644 	xe_sched_msg_lock(sched);
1645 	guc_exec_queue_try_add_msg(q, msg, RESUME);
1646 	xe_sched_msg_unlock(sched);
1647 }
1648 
guc_exec_queue_reset_status(struct xe_exec_queue * q)1649 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
1650 {
1651 	return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
1652 }
1653 
1654 /*
1655  * All of these functions are an abstraction layer which other parts of XE can
1656  * use to trap into the GuC backend. All of these functions, aside from init,
1657  * really shouldn't do much other than trap into the DRM scheduler which
1658  * synchronizes these operations.
1659  */
1660 static const struct xe_exec_queue_ops guc_exec_queue_ops = {
1661 	.init = guc_exec_queue_init,
1662 	.kill = guc_exec_queue_kill,
1663 	.fini = guc_exec_queue_fini,
1664 	.set_priority = guc_exec_queue_set_priority,
1665 	.set_timeslice = guc_exec_queue_set_timeslice,
1666 	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
1667 	.suspend = guc_exec_queue_suspend,
1668 	.suspend_wait = guc_exec_queue_suspend_wait,
1669 	.resume = guc_exec_queue_resume,
1670 	.reset_status = guc_exec_queue_reset_status,
1671 };
1672 
guc_exec_queue_stop(struct xe_guc * guc,struct xe_exec_queue * q)1673 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
1674 {
1675 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1676 
1677 	/* Stop scheduling + flush any DRM scheduler operations */
1678 	xe_sched_submission_stop(sched);
1679 
1680 	/* Clean up lost G2H + reset engine state */
1681 	if (exec_queue_registered(q)) {
1682 		if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
1683 			xe_exec_queue_put(q);
1684 		else if (exec_queue_destroyed(q))
1685 			__guc_exec_queue_fini(guc, q);
1686 	}
1687 	if (q->guc->suspend_pending) {
1688 		set_exec_queue_suspended(q);
1689 		suspend_fence_signal(q);
1690 	}
1691 	atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED |
1692 		   EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED |
1693 		   EXEC_QUEUE_STATE_SUSPENDED,
1694 		   &q->guc->state);
1695 	q->guc->resume_time = 0;
1696 	trace_xe_exec_queue_stop(q);
1697 
1698 	/*
1699 	 * Ban any engine (aside from kernel and engines used for VM ops) with a
1700 	 * started but not complete job or if a job has gone through a GT reset
1701 	 * more than twice.
1702 	 */
1703 	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
1704 		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
1705 		bool ban = false;
1706 
1707 		if (job) {
1708 			if ((xe_sched_job_started(job) &&
1709 			    !xe_sched_job_completed(job)) ||
1710 			    xe_sched_invalidate_job(job, 2)) {
1711 				trace_xe_sched_job_ban(job);
1712 				ban = true;
1713 			}
1714 		} else if (xe_exec_queue_is_lr(q) &&
1715 			   (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) {
1716 			ban = true;
1717 		}
1718 
1719 		if (ban) {
1720 			set_exec_queue_banned(q);
1721 			xe_guc_exec_queue_trigger_cleanup(q);
1722 		}
1723 	}
1724 }
1725 
xe_guc_submit_reset_prepare(struct xe_guc * guc)1726 int xe_guc_submit_reset_prepare(struct xe_guc *guc)
1727 {
1728 	int ret;
1729 
1730 	if (!guc->submission_state.initialized)
1731 		return 0;
1732 
1733 	/*
1734 	 * Using an atomic here rather than submission_state.lock as this
1735 	 * function can be called while holding the CT lock (engine reset
1736 	 * failure). submission_state.lock needs the CT lock to resubmit jobs.
1737 	 * Atomic is not ideal, but it works to prevent against concurrent reset
1738 	 * and releasing any TDRs waiting on guc->submission_state.stopped.
1739 	 */
1740 	ret = atomic_fetch_or(1, &guc->submission_state.stopped);
1741 	smp_wmb();
1742 	wake_up_all(&guc->ct.wq);
1743 
1744 	return ret;
1745 }
1746 
xe_guc_submit_reset_wait(struct xe_guc * guc)1747 void xe_guc_submit_reset_wait(struct xe_guc *guc)
1748 {
1749 	wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
1750 		   !guc_read_stopped(guc));
1751 }
1752 
xe_guc_submit_stop(struct xe_guc * guc)1753 void xe_guc_submit_stop(struct xe_guc *guc)
1754 {
1755 	struct xe_exec_queue *q;
1756 	unsigned long index;
1757 	struct xe_device *xe = guc_to_xe(guc);
1758 
1759 	xe_assert(xe, guc_read_stopped(guc) == 1);
1760 
1761 	mutex_lock(&guc->submission_state.lock);
1762 
1763 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
1764 		/* Prevent redundant attempts to stop parallel queues */
1765 		if (q->guc->id != index)
1766 			continue;
1767 
1768 		guc_exec_queue_stop(guc, q);
1769 	}
1770 
1771 	mutex_unlock(&guc->submission_state.lock);
1772 
1773 	/*
1774 	 * No one can enter the backend at this point, aside from new engine
1775 	 * creation which is protected by guc->submission_state.lock.
1776 	 */
1777 
1778 }
1779 
guc_exec_queue_start(struct xe_exec_queue * q)1780 static void guc_exec_queue_start(struct xe_exec_queue *q)
1781 {
1782 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1783 
1784 	if (!exec_queue_killed_or_banned_or_wedged(q)) {
1785 		int i;
1786 
1787 		trace_xe_exec_queue_resubmit(q);
1788 		for (i = 0; i < q->width; ++i)
1789 			xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail);
1790 		xe_sched_resubmit_jobs(sched);
1791 	}
1792 
1793 	xe_sched_submission_start(sched);
1794 	xe_sched_submission_resume_tdr(sched);
1795 }
1796 
xe_guc_submit_start(struct xe_guc * guc)1797 int xe_guc_submit_start(struct xe_guc *guc)
1798 {
1799 	struct xe_exec_queue *q;
1800 	unsigned long index;
1801 	struct xe_device *xe = guc_to_xe(guc);
1802 
1803 	xe_assert(xe, guc_read_stopped(guc) == 1);
1804 
1805 	mutex_lock(&guc->submission_state.lock);
1806 	atomic_dec(&guc->submission_state.stopped);
1807 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
1808 		/* Prevent redundant attempts to start parallel queues */
1809 		if (q->guc->id != index)
1810 			continue;
1811 
1812 		guc_exec_queue_start(q);
1813 	}
1814 	mutex_unlock(&guc->submission_state.lock);
1815 
1816 	wake_up_all(&guc->ct.wq);
1817 
1818 	return 0;
1819 }
1820 
1821 static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc * guc,u32 guc_id)1822 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
1823 {
1824 	struct xe_device *xe = guc_to_xe(guc);
1825 	struct xe_exec_queue *q;
1826 
1827 	if (unlikely(guc_id >= GUC_ID_MAX)) {
1828 		drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
1829 		return NULL;
1830 	}
1831 
1832 	q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
1833 	if (unlikely(!q)) {
1834 		drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
1835 		return NULL;
1836 	}
1837 
1838 	xe_assert(xe, guc_id >= q->guc->id);
1839 	xe_assert(xe, guc_id < (q->guc->id + q->width));
1840 
1841 	return q;
1842 }
1843 
deregister_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q)1844 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
1845 {
1846 	u32 action[] = {
1847 		XE_GUC_ACTION_DEREGISTER_CONTEXT,
1848 		q->guc->id,
1849 	};
1850 
1851 	xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q));
1852 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1853 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1854 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1855 
1856 	trace_xe_exec_queue_deregister(q);
1857 
1858 	xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
1859 }
1860 
handle_sched_done(struct xe_guc * guc,struct xe_exec_queue * q,u32 runnable_state)1861 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
1862 			      u32 runnable_state)
1863 {
1864 	trace_xe_exec_queue_scheduling_done(q);
1865 
1866 	if (runnable_state == 1) {
1867 		xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
1868 
1869 		q->guc->resume_time = ktime_get();
1870 		clear_exec_queue_pending_enable(q);
1871 		smp_wmb();
1872 		wake_up_all(&guc->ct.wq);
1873 	} else {
1874 		bool check_timeout = exec_queue_check_timeout(q);
1875 
1876 		xe_gt_assert(guc_to_gt(guc), runnable_state == 0);
1877 		xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q));
1878 
1879 		if (q->guc->suspend_pending) {
1880 			suspend_fence_signal(q);
1881 			clear_exec_queue_pending_disable(q);
1882 		} else {
1883 			if (exec_queue_banned(q) || check_timeout) {
1884 				smp_wmb();
1885 				wake_up_all(&guc->ct.wq);
1886 			}
1887 			if (!check_timeout && exec_queue_destroyed(q)) {
1888 				/*
1889 				 * Make sure to clear the pending_disable only
1890 				 * after sampling the destroyed state. We want
1891 				 * to ensure we don't trigger the unregister too
1892 				 * early with something intending to only
1893 				 * disable scheduling. The caller doing the
1894 				 * destroy must wait for an ongoing
1895 				 * pending_disable before marking as destroyed.
1896 				 */
1897 				clear_exec_queue_pending_disable(q);
1898 				deregister_exec_queue(guc, q);
1899 			} else {
1900 				clear_exec_queue_pending_disable(q);
1901 			}
1902 		}
1903 	}
1904 }
1905 
xe_guc_sched_done_handler(struct xe_guc * guc,u32 * msg,u32 len)1906 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1907 {
1908 	struct xe_device *xe = guc_to_xe(guc);
1909 	struct xe_exec_queue *q;
1910 	u32 guc_id = msg[0];
1911 	u32 runnable_state = msg[1];
1912 
1913 	if (unlikely(len < 2)) {
1914 		drm_err(&xe->drm, "Invalid length %u", len);
1915 		return -EPROTO;
1916 	}
1917 
1918 	q = g2h_exec_queue_lookup(guc, guc_id);
1919 	if (unlikely(!q))
1920 		return -EPROTO;
1921 
1922 	if (unlikely(!exec_queue_pending_enable(q) &&
1923 		     !exec_queue_pending_disable(q))) {
1924 		xe_gt_err(guc_to_gt(guc),
1925 			  "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u",
1926 			  atomic_read(&q->guc->state), q->guc->id,
1927 			  runnable_state);
1928 		return -EPROTO;
1929 	}
1930 
1931 	handle_sched_done(guc, q, runnable_state);
1932 
1933 	return 0;
1934 }
1935 
handle_deregister_done(struct xe_guc * guc,struct xe_exec_queue * q)1936 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
1937 {
1938 	trace_xe_exec_queue_deregister_done(q);
1939 
1940 	clear_exec_queue_registered(q);
1941 
1942 	if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
1943 		xe_exec_queue_put(q);
1944 	else
1945 		__guc_exec_queue_fini(guc, q);
1946 }
1947 
xe_guc_deregister_done_handler(struct xe_guc * guc,u32 * msg,u32 len)1948 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1949 {
1950 	struct xe_device *xe = guc_to_xe(guc);
1951 	struct xe_exec_queue *q;
1952 	u32 guc_id = msg[0];
1953 
1954 	if (unlikely(len < 1)) {
1955 		drm_err(&xe->drm, "Invalid length %u", len);
1956 		return -EPROTO;
1957 	}
1958 
1959 	q = g2h_exec_queue_lookup(guc, guc_id);
1960 	if (unlikely(!q))
1961 		return -EPROTO;
1962 
1963 	if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
1964 	    exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
1965 		xe_gt_err(guc_to_gt(guc),
1966 			  "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d",
1967 			  atomic_read(&q->guc->state), q->guc->id);
1968 		return -EPROTO;
1969 	}
1970 
1971 	handle_deregister_done(guc, q);
1972 
1973 	return 0;
1974 }
1975 
xe_guc_exec_queue_reset_handler(struct xe_guc * guc,u32 * msg,u32 len)1976 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
1977 {
1978 	struct xe_gt *gt = guc_to_gt(guc);
1979 	struct xe_device *xe = guc_to_xe(guc);
1980 	struct xe_exec_queue *q;
1981 	u32 guc_id = msg[0];
1982 
1983 	if (unlikely(len < 1)) {
1984 		drm_err(&xe->drm, "Invalid length %u", len);
1985 		return -EPROTO;
1986 	}
1987 
1988 	q = g2h_exec_queue_lookup(guc, guc_id);
1989 	if (unlikely(!q))
1990 		return -EPROTO;
1991 
1992 	xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
1993 		   xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
1994 
1995 	/* FIXME: Do error capture, most likely async */
1996 
1997 	trace_xe_exec_queue_reset(q);
1998 
1999 	/*
2000 	 * A banned engine is a NOP at this point (came from
2001 	 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
2002 	 * jobs by setting timeout of the job to the minimum value kicking
2003 	 * guc_exec_queue_timedout_job.
2004 	 */
2005 	set_exec_queue_reset(q);
2006 	if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
2007 		xe_guc_exec_queue_trigger_cleanup(q);
2008 
2009 	return 0;
2010 }
2011 
xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc * guc,u32 * msg,u32 len)2012 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
2013 					       u32 len)
2014 {
2015 	struct xe_gt *gt = guc_to_gt(guc);
2016 	struct xe_device *xe = guc_to_xe(guc);
2017 	struct xe_exec_queue *q;
2018 	u32 guc_id = msg[0];
2019 
2020 	if (unlikely(len < 1)) {
2021 		drm_err(&xe->drm, "Invalid length %u", len);
2022 		return -EPROTO;
2023 	}
2024 
2025 	q = g2h_exec_queue_lookup(guc, guc_id);
2026 	if (unlikely(!q))
2027 		return -EPROTO;
2028 
2029 	xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
2030 		  xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2031 
2032 	trace_xe_exec_queue_memory_cat_error(q);
2033 
2034 	/* Treat the same as engine reset */
2035 	set_exec_queue_reset(q);
2036 	if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
2037 		xe_guc_exec_queue_trigger_cleanup(q);
2038 
2039 	return 0;
2040 }
2041 
xe_guc_exec_queue_reset_failure_handler(struct xe_guc * guc,u32 * msg,u32 len)2042 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
2043 {
2044 	struct xe_device *xe = guc_to_xe(guc);
2045 	u8 guc_class, instance;
2046 	u32 reason;
2047 
2048 	if (unlikely(len != 3)) {
2049 		drm_err(&xe->drm, "Invalid length %u", len);
2050 		return -EPROTO;
2051 	}
2052 
2053 	guc_class = msg[0];
2054 	instance = msg[1];
2055 	reason = msg[2];
2056 
2057 	/* Unexpected failure of a hardware feature, log an actual error */
2058 	drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
2059 		guc_class, instance, reason);
2060 
2061 	xe_gt_reset_async(guc_to_gt(guc));
2062 
2063 	return 0;
2064 }
2065 
2066 static void
guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue * q,struct xe_guc_submit_exec_queue_snapshot * snapshot)2067 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
2068 				   struct xe_guc_submit_exec_queue_snapshot *snapshot)
2069 {
2070 	struct xe_guc *guc = exec_queue_to_guc(q);
2071 	struct xe_device *xe = guc_to_xe(guc);
2072 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
2073 	int i;
2074 
2075 	snapshot->guc.wqi_head = q->guc->wqi_head;
2076 	snapshot->guc.wqi_tail = q->guc->wqi_tail;
2077 	snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
2078 	snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
2079 	snapshot->parallel.wq_desc.status = parallel_read(xe, map,
2080 							  wq_desc.wq_status);
2081 
2082 	if (snapshot->parallel.wq_desc.head !=
2083 	    snapshot->parallel.wq_desc.tail) {
2084 		for (i = snapshot->parallel.wq_desc.head;
2085 		     i != snapshot->parallel.wq_desc.tail;
2086 		     i = (i + sizeof(u32)) % WQ_SIZE)
2087 			snapshot->parallel.wq[i / sizeof(u32)] =
2088 				parallel_read(xe, map, wq[i / sizeof(u32)]);
2089 	}
2090 }
2091 
2092 static void
guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)2093 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2094 				 struct drm_printer *p)
2095 {
2096 	int i;
2097 
2098 	drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
2099 		   snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
2100 	drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
2101 		   snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
2102 	drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
2103 
2104 	if (snapshot->parallel.wq_desc.head !=
2105 	    snapshot->parallel.wq_desc.tail) {
2106 		for (i = snapshot->parallel.wq_desc.head;
2107 		     i != snapshot->parallel.wq_desc.tail;
2108 		     i = (i + sizeof(u32)) % WQ_SIZE)
2109 			drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
2110 				   snapshot->parallel.wq[i / sizeof(u32)]);
2111 	}
2112 }
2113 
2114 /**
2115  * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
2116  * @q: faulty exec queue
2117  *
2118  * This can be printed out in a later stage like during dev_coredump
2119  * analysis.
2120  *
2121  * Returns: a GuC Submit Engine snapshot object that must be freed by the
2122  * caller, using `xe_guc_exec_queue_snapshot_free`.
2123  */
2124 struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue * q)2125 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
2126 {
2127 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2128 	struct xe_guc_submit_exec_queue_snapshot *snapshot;
2129 	int i;
2130 
2131 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
2132 
2133 	if (!snapshot)
2134 		return NULL;
2135 
2136 	snapshot->guc.id = q->guc->id;
2137 	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
2138 	snapshot->class = q->class;
2139 	snapshot->logical_mask = q->logical_mask;
2140 	snapshot->width = q->width;
2141 	snapshot->refcount = kref_read(&q->refcount);
2142 	snapshot->sched_timeout = sched->base.timeout;
2143 	snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
2144 	snapshot->sched_props.preempt_timeout_us =
2145 		q->sched_props.preempt_timeout_us;
2146 
2147 	snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *),
2148 				      GFP_ATOMIC);
2149 
2150 	if (snapshot->lrc) {
2151 		for (i = 0; i < q->width; ++i) {
2152 			struct xe_lrc *lrc = q->lrc[i];
2153 
2154 			snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
2155 		}
2156 	}
2157 
2158 	snapshot->schedule_state = atomic_read(&q->guc->state);
2159 	snapshot->exec_queue_flags = q->flags;
2160 
2161 	snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
2162 	if (snapshot->parallel_execution)
2163 		guc_exec_queue_wq_snapshot_capture(q, snapshot);
2164 
2165 	spin_lock(&sched->base.job_list_lock);
2166 	snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
2167 	snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
2168 					       sizeof(struct pending_list_snapshot),
2169 					       GFP_ATOMIC);
2170 
2171 	if (snapshot->pending_list) {
2172 		struct xe_sched_job *job_iter;
2173 
2174 		i = 0;
2175 		list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
2176 			snapshot->pending_list[i].seqno =
2177 				xe_sched_job_seqno(job_iter);
2178 			snapshot->pending_list[i].fence =
2179 				dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
2180 			snapshot->pending_list[i].finished =
2181 				dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
2182 				? 1 : 0;
2183 			i++;
2184 		}
2185 	}
2186 
2187 	spin_unlock(&sched->base.job_list_lock);
2188 
2189 	return snapshot;
2190 }
2191 
2192 /**
2193  * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
2194  * @snapshot: Previously captured snapshot of job.
2195  *
2196  * This captures some data that requires taking some locks, so it cannot be done in signaling path.
2197  */
2198 void
xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot * snapshot)2199 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2200 {
2201 	int i;
2202 
2203 	if (!snapshot || !snapshot->lrc)
2204 		return;
2205 
2206 	for (i = 0; i < snapshot->width; ++i)
2207 		xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
2208 }
2209 
2210 /**
2211  * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
2212  * @snapshot: GuC Submit Engine snapshot object.
2213  * @p: drm_printer where it will be printed out.
2214  *
2215  * This function prints out a given GuC Submit Engine snapshot object.
2216  */
2217 void
xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)2218 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2219 				 struct drm_printer *p)
2220 {
2221 	int i;
2222 
2223 	if (!snapshot)
2224 		return;
2225 
2226 	drm_printf(p, "GuC ID: %d\n", snapshot->guc.id);
2227 	drm_printf(p, "\tName: %s\n", snapshot->name);
2228 	drm_printf(p, "\tClass: %d\n", snapshot->class);
2229 	drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
2230 	drm_printf(p, "\tWidth: %d\n", snapshot->width);
2231 	drm_printf(p, "\tRef: %d\n", snapshot->refcount);
2232 	drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
2233 	drm_printf(p, "\tTimeslice: %u (us)\n",
2234 		   snapshot->sched_props.timeslice_us);
2235 	drm_printf(p, "\tPreempt timeout: %u (us)\n",
2236 		   snapshot->sched_props.preempt_timeout_us);
2237 
2238 	for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
2239 		xe_lrc_snapshot_print(snapshot->lrc[i], p);
2240 
2241 	drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
2242 	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
2243 
2244 	if (snapshot->parallel_execution)
2245 		guc_exec_queue_wq_snapshot_print(snapshot, p);
2246 
2247 	for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
2248 	     i++)
2249 		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
2250 			   snapshot->pending_list[i].seqno,
2251 			   snapshot->pending_list[i].fence,
2252 			   snapshot->pending_list[i].finished);
2253 }
2254 
2255 /**
2256  * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
2257  * snapshot.
2258  * @snapshot: GuC Submit Engine snapshot object.
2259  *
2260  * This function free all the memory that needed to be allocated at capture
2261  * time.
2262  */
xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot * snapshot)2263 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2264 {
2265 	int i;
2266 
2267 	if (!snapshot)
2268 		return;
2269 
2270 	if (snapshot->lrc) {
2271 		for (i = 0; i < snapshot->width; i++)
2272 			xe_lrc_snapshot_free(snapshot->lrc[i]);
2273 		kfree(snapshot->lrc);
2274 	}
2275 	kfree(snapshot->pending_list);
2276 	kfree(snapshot);
2277 }
2278 
guc_exec_queue_print(struct xe_exec_queue * q,struct drm_printer * p)2279 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
2280 {
2281 	struct xe_guc_submit_exec_queue_snapshot *snapshot;
2282 
2283 	snapshot = xe_guc_exec_queue_snapshot_capture(q);
2284 	xe_guc_exec_queue_snapshot_print(snapshot, p);
2285 	xe_guc_exec_queue_snapshot_free(snapshot);
2286 }
2287 
2288 /**
2289  * xe_guc_submit_print - GuC Submit Print.
2290  * @guc: GuC.
2291  * @p: drm_printer where it will be printed out.
2292  *
2293  * This function capture and prints snapshots of **all** GuC Engines.
2294  */
xe_guc_submit_print(struct xe_guc * guc,struct drm_printer * p)2295 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
2296 {
2297 	struct xe_exec_queue *q;
2298 	unsigned long index;
2299 
2300 	if (!xe_device_uc_enabled(guc_to_xe(guc)))
2301 		return;
2302 
2303 	mutex_lock(&guc->submission_state.lock);
2304 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2305 		guc_exec_queue_print(q, p);
2306 	mutex_unlock(&guc->submission_state.lock);
2307 }
2308