1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_guc_submit.h"
7
8 #include <linux/bitfield.h>
9 #include <linux/bitmap.h>
10 #include <linux/circ_buf.h>
11 #include <linux/delay.h>
12 #include <linux/dma-fence-array.h>
13 #include <linux/math64.h>
14
15 #include <drm/drm_managed.h>
16
17 #include "abi/guc_actions_abi.h"
18 #include "abi/guc_klvs_abi.h"
19 #include "regs/xe_lrc_layout.h"
20 #include "xe_assert.h"
21 #include "xe_devcoredump.h"
22 #include "xe_device.h"
23 #include "xe_exec_queue.h"
24 #include "xe_force_wake.h"
25 #include "xe_gpu_scheduler.h"
26 #include "xe_gt.h"
27 #include "xe_gt_clock.h"
28 #include "xe_gt_printk.h"
29 #include "xe_guc.h"
30 #include "xe_guc_ct.h"
31 #include "xe_guc_exec_queue_types.h"
32 #include "xe_guc_id_mgr.h"
33 #include "xe_guc_submit_types.h"
34 #include "xe_hw_engine.h"
35 #include "xe_hw_fence.h"
36 #include "xe_lrc.h"
37 #include "xe_macros.h"
38 #include "xe_map.h"
39 #include "xe_mocs.h"
40 #include "xe_pm.h"
41 #include "xe_ring_ops_types.h"
42 #include "xe_sched_job.h"
43 #include "xe_trace.h"
44 #include "xe_vm.h"
45
46 static struct xe_guc *
exec_queue_to_guc(struct xe_exec_queue * q)47 exec_queue_to_guc(struct xe_exec_queue *q)
48 {
49 return &q->gt->uc.guc;
50 }
51
52 /*
53 * Helpers for engine state, using an atomic as some of the bits can transition
54 * as the same time (e.g. a suspend can be happning at the same time as schedule
55 * engine done being processed).
56 */
57 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0)
58 #define EXEC_QUEUE_STATE_ENABLED (1 << 1)
59 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2)
60 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3)
61 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4)
62 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5)
63 #define EXEC_QUEUE_STATE_RESET (1 << 6)
64 #define EXEC_QUEUE_STATE_KILLED (1 << 7)
65 #define EXEC_QUEUE_STATE_WEDGED (1 << 8)
66 #define EXEC_QUEUE_STATE_BANNED (1 << 9)
67 #define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10)
68 #define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11)
69
exec_queue_registered(struct xe_exec_queue * q)70 static bool exec_queue_registered(struct xe_exec_queue *q)
71 {
72 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
73 }
74
set_exec_queue_registered(struct xe_exec_queue * q)75 static void set_exec_queue_registered(struct xe_exec_queue *q)
76 {
77 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
78 }
79
clear_exec_queue_registered(struct xe_exec_queue * q)80 static void clear_exec_queue_registered(struct xe_exec_queue *q)
81 {
82 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
83 }
84
exec_queue_enabled(struct xe_exec_queue * q)85 static bool exec_queue_enabled(struct xe_exec_queue *q)
86 {
87 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
88 }
89
set_exec_queue_enabled(struct xe_exec_queue * q)90 static void set_exec_queue_enabled(struct xe_exec_queue *q)
91 {
92 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
93 }
94
clear_exec_queue_enabled(struct xe_exec_queue * q)95 static void clear_exec_queue_enabled(struct xe_exec_queue *q)
96 {
97 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
98 }
99
exec_queue_pending_enable(struct xe_exec_queue * q)100 static bool exec_queue_pending_enable(struct xe_exec_queue *q)
101 {
102 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
103 }
104
set_exec_queue_pending_enable(struct xe_exec_queue * q)105 static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
106 {
107 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
108 }
109
clear_exec_queue_pending_enable(struct xe_exec_queue * q)110 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
111 {
112 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
113 }
114
exec_queue_pending_disable(struct xe_exec_queue * q)115 static bool exec_queue_pending_disable(struct xe_exec_queue *q)
116 {
117 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
118 }
119
set_exec_queue_pending_disable(struct xe_exec_queue * q)120 static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
121 {
122 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
123 }
124
clear_exec_queue_pending_disable(struct xe_exec_queue * q)125 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
126 {
127 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
128 }
129
exec_queue_destroyed(struct xe_exec_queue * q)130 static bool exec_queue_destroyed(struct xe_exec_queue *q)
131 {
132 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
133 }
134
set_exec_queue_destroyed(struct xe_exec_queue * q)135 static void set_exec_queue_destroyed(struct xe_exec_queue *q)
136 {
137 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
138 }
139
exec_queue_banned(struct xe_exec_queue * q)140 static bool exec_queue_banned(struct xe_exec_queue *q)
141 {
142 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED;
143 }
144
set_exec_queue_banned(struct xe_exec_queue * q)145 static void set_exec_queue_banned(struct xe_exec_queue *q)
146 {
147 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state);
148 }
149
exec_queue_suspended(struct xe_exec_queue * q)150 static bool exec_queue_suspended(struct xe_exec_queue *q)
151 {
152 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
153 }
154
set_exec_queue_suspended(struct xe_exec_queue * q)155 static void set_exec_queue_suspended(struct xe_exec_queue *q)
156 {
157 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
158 }
159
clear_exec_queue_suspended(struct xe_exec_queue * q)160 static void clear_exec_queue_suspended(struct xe_exec_queue *q)
161 {
162 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
163 }
164
exec_queue_reset(struct xe_exec_queue * q)165 static bool exec_queue_reset(struct xe_exec_queue *q)
166 {
167 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
168 }
169
set_exec_queue_reset(struct xe_exec_queue * q)170 static void set_exec_queue_reset(struct xe_exec_queue *q)
171 {
172 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
173 }
174
exec_queue_killed(struct xe_exec_queue * q)175 static bool exec_queue_killed(struct xe_exec_queue *q)
176 {
177 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED;
178 }
179
set_exec_queue_killed(struct xe_exec_queue * q)180 static void set_exec_queue_killed(struct xe_exec_queue *q)
181 {
182 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state);
183 }
184
exec_queue_wedged(struct xe_exec_queue * q)185 static bool exec_queue_wedged(struct xe_exec_queue *q)
186 {
187 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
188 }
189
set_exec_queue_wedged(struct xe_exec_queue * q)190 static void set_exec_queue_wedged(struct xe_exec_queue *q)
191 {
192 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
193 }
194
exec_queue_check_timeout(struct xe_exec_queue * q)195 static bool exec_queue_check_timeout(struct xe_exec_queue *q)
196 {
197 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT;
198 }
199
set_exec_queue_check_timeout(struct xe_exec_queue * q)200 static void set_exec_queue_check_timeout(struct xe_exec_queue *q)
201 {
202 atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state);
203 }
204
clear_exec_queue_check_timeout(struct xe_exec_queue * q)205 static void clear_exec_queue_check_timeout(struct xe_exec_queue *q)
206 {
207 atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state);
208 }
209
exec_queue_extra_ref(struct xe_exec_queue * q)210 static bool exec_queue_extra_ref(struct xe_exec_queue *q)
211 {
212 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF;
213 }
214
set_exec_queue_extra_ref(struct xe_exec_queue * q)215 static void set_exec_queue_extra_ref(struct xe_exec_queue *q)
216 {
217 atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state);
218 }
219
exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue * q)220 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
221 {
222 return (atomic_read(&q->guc->state) &
223 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED |
224 EXEC_QUEUE_STATE_BANNED));
225 }
226
guc_submit_fini(struct drm_device * drm,void * arg)227 static void guc_submit_fini(struct drm_device *drm, void *arg)
228 {
229 struct xe_guc *guc = arg;
230 struct xe_device *xe = guc_to_xe(guc);
231 struct xe_gt *gt = guc_to_gt(guc);
232 int ret;
233
234 ret = wait_event_timeout(guc->submission_state.fini_wq,
235 xa_empty(&guc->submission_state.exec_queue_lookup),
236 HZ * 5);
237
238 drain_workqueue(xe->destroy_wq);
239
240 xe_gt_assert(gt, ret);
241
242 xa_destroy(&guc->submission_state.exec_queue_lookup);
243 }
244
guc_submit_wedged_fini(void * arg)245 static void guc_submit_wedged_fini(void *arg)
246 {
247 struct xe_guc *guc = arg;
248 struct xe_exec_queue *q;
249 unsigned long index;
250
251 mutex_lock(&guc->submission_state.lock);
252 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
253 if (exec_queue_wedged(q)) {
254 mutex_unlock(&guc->submission_state.lock);
255 xe_exec_queue_put(q);
256 mutex_lock(&guc->submission_state.lock);
257 }
258 }
259 mutex_unlock(&guc->submission_state.lock);
260 }
261
262 static const struct xe_exec_queue_ops guc_exec_queue_ops;
263
primelockdep(struct xe_guc * guc)264 static void primelockdep(struct xe_guc *guc)
265 {
266 if (!IS_ENABLED(CONFIG_LOCKDEP))
267 return;
268
269 fs_reclaim_acquire(GFP_KERNEL);
270
271 mutex_lock(&guc->submission_state.lock);
272 mutex_unlock(&guc->submission_state.lock);
273
274 fs_reclaim_release(GFP_KERNEL);
275 }
276
277 /**
278 * xe_guc_submit_init() - Initialize GuC submission.
279 * @guc: the &xe_guc to initialize
280 * @num_ids: number of GuC context IDs to use
281 *
282 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all
283 * GuC context IDs supported by the GuC firmware should be used for submission.
284 *
285 * Only VF drivers will have to provide explicit number of GuC context IDs
286 * that they can use for submission.
287 *
288 * Return: 0 on success or a negative error code on failure.
289 */
xe_guc_submit_init(struct xe_guc * guc,unsigned int num_ids)290 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
291 {
292 struct xe_device *xe = guc_to_xe(guc);
293 struct xe_gt *gt = guc_to_gt(guc);
294 int err;
295
296 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
297 if (err)
298 return err;
299
300 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids);
301 if (err)
302 return err;
303
304 gt->exec_queue_ops = &guc_exec_queue_ops;
305
306 xa_init(&guc->submission_state.exec_queue_lookup);
307
308 init_waitqueue_head(&guc->submission_state.fini_wq);
309
310 primelockdep(guc);
311
312 guc->submission_state.initialized = true;
313
314 return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
315 }
316
__release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q,u32 xa_count)317 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
318 {
319 int i;
320
321 lockdep_assert_held(&guc->submission_state.lock);
322
323 for (i = 0; i < xa_count; ++i)
324 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
325
326 xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
327 q->guc->id, q->width);
328
329 if (xa_empty(&guc->submission_state.exec_queue_lookup))
330 wake_up(&guc->submission_state.fini_wq);
331 }
332
alloc_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)333 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
334 {
335 int ret;
336 int i;
337
338 /*
339 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
340 * worse case user gets -ENOMEM on engine create and has to try again.
341 *
342 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
343 * failure.
344 */
345 lockdep_assert_held(&guc->submission_state.lock);
346
347 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
348 q->width);
349 if (ret < 0)
350 return ret;
351
352 q->guc->id = ret;
353
354 for (i = 0; i < q->width; ++i) {
355 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
356 q->guc->id + i, q, GFP_NOWAIT));
357 if (ret)
358 goto err_release;
359 }
360
361 return 0;
362
363 err_release:
364 __release_guc_id(guc, q, i);
365
366 return ret;
367 }
368
release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)369 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
370 {
371 mutex_lock(&guc->submission_state.lock);
372 __release_guc_id(guc, q, q->width);
373 mutex_unlock(&guc->submission_state.lock);
374 }
375
376 struct exec_queue_policy {
377 u32 count;
378 struct guc_update_exec_queue_policy h2g;
379 };
380
__guc_exec_queue_policy_action_size(struct exec_queue_policy * policy)381 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
382 {
383 size_t bytes = sizeof(policy->h2g.header) +
384 (sizeof(policy->h2g.klv[0]) * policy->count);
385
386 return bytes / sizeof(u32);
387 }
388
__guc_exec_queue_policy_start_klv(struct exec_queue_policy * policy,u16 guc_id)389 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
390 u16 guc_id)
391 {
392 policy->h2g.header.action =
393 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
394 policy->h2g.header.guc_id = guc_id;
395 policy->count = 0;
396 }
397
398 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
399 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
400 u32 data) \
401 { \
402 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
403 \
404 policy->h2g.klv[policy->count].kl = \
405 FIELD_PREP(GUC_KLV_0_KEY, \
406 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
407 FIELD_PREP(GUC_KLV_0_LEN, 1); \
408 policy->h2g.klv[policy->count].value = data; \
409 policy->count++; \
410 }
411
412 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
413 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
414 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
415 #undef MAKE_EXEC_QUEUE_POLICY_ADD
416
417 static const int xe_exec_queue_prio_to_guc[] = {
418 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
419 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
420 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
421 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
422 };
423
init_policies(struct xe_guc * guc,struct xe_exec_queue * q)424 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
425 {
426 struct exec_queue_policy policy;
427 struct xe_device *xe = guc_to_xe(guc);
428 enum xe_exec_queue_priority prio = q->sched_props.priority;
429 u32 timeslice_us = q->sched_props.timeslice_us;
430 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
431
432 xe_assert(xe, exec_queue_registered(q));
433
434 __guc_exec_queue_policy_start_klv(&policy, q->guc->id);
435 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
436 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
437 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
438
439 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
440 __guc_exec_queue_policy_action_size(&policy), 0, 0);
441 }
442
set_min_preemption_timeout(struct xe_guc * guc,struct xe_exec_queue * q)443 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
444 {
445 struct exec_queue_policy policy;
446
447 __guc_exec_queue_policy_start_klv(&policy, q->guc->id);
448 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
449
450 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
451 __guc_exec_queue_policy_action_size(&policy), 0, 0);
452 }
453
454 #define parallel_read(xe_, map_, field_) \
455 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
456 field_)
457 #define parallel_write(xe_, map_, field_, val_) \
458 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
459 field_, val_)
460
__register_mlrc_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q,struct guc_ctxt_registration_info * info)461 static void __register_mlrc_exec_queue(struct xe_guc *guc,
462 struct xe_exec_queue *q,
463 struct guc_ctxt_registration_info *info)
464 {
465 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
466 struct xe_device *xe = guc_to_xe(guc);
467 u32 action[MAX_MLRC_REG_SIZE];
468 int len = 0;
469 int i;
470
471 xe_assert(xe, xe_exec_queue_is_parallel(q));
472
473 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
474 action[len++] = info->flags;
475 action[len++] = info->context_idx;
476 action[len++] = info->engine_class;
477 action[len++] = info->engine_submit_mask;
478 action[len++] = info->wq_desc_lo;
479 action[len++] = info->wq_desc_hi;
480 action[len++] = info->wq_base_lo;
481 action[len++] = info->wq_base_hi;
482 action[len++] = info->wq_size;
483 action[len++] = q->width;
484 action[len++] = info->hwlrca_lo;
485 action[len++] = info->hwlrca_hi;
486
487 for (i = 1; i < q->width; ++i) {
488 struct xe_lrc *lrc = q->lrc[i];
489
490 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
491 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
492 }
493
494 xe_assert(xe, len <= MAX_MLRC_REG_SIZE);
495 #undef MAX_MLRC_REG_SIZE
496
497 xe_guc_ct_send(&guc->ct, action, len, 0, 0);
498 }
499
__register_exec_queue(struct xe_guc * guc,struct guc_ctxt_registration_info * info)500 static void __register_exec_queue(struct xe_guc *guc,
501 struct guc_ctxt_registration_info *info)
502 {
503 u32 action[] = {
504 XE_GUC_ACTION_REGISTER_CONTEXT,
505 info->flags,
506 info->context_idx,
507 info->engine_class,
508 info->engine_submit_mask,
509 info->wq_desc_lo,
510 info->wq_desc_hi,
511 info->wq_base_lo,
512 info->wq_base_hi,
513 info->wq_size,
514 info->hwlrca_lo,
515 info->hwlrca_hi,
516 };
517
518 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
519 }
520
register_exec_queue(struct xe_exec_queue * q)521 static void register_exec_queue(struct xe_exec_queue *q)
522 {
523 struct xe_guc *guc = exec_queue_to_guc(q);
524 struct xe_device *xe = guc_to_xe(guc);
525 struct xe_lrc *lrc = q->lrc[0];
526 struct guc_ctxt_registration_info info;
527
528 xe_assert(xe, !exec_queue_registered(q));
529
530 memset(&info, 0, sizeof(info));
531 info.context_idx = q->guc->id;
532 info.engine_class = xe_engine_class_to_guc_class(q->class);
533 info.engine_submit_mask = q->logical_mask;
534 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
535 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
536 info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
537
538 if (xe_exec_queue_is_parallel(q)) {
539 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
540 struct iosys_map map = xe_lrc_parallel_map(lrc);
541
542 info.wq_desc_lo = lower_32_bits(ggtt_addr +
543 offsetof(struct guc_submit_parallel_scratch, wq_desc));
544 info.wq_desc_hi = upper_32_bits(ggtt_addr +
545 offsetof(struct guc_submit_parallel_scratch, wq_desc));
546 info.wq_base_lo = lower_32_bits(ggtt_addr +
547 offsetof(struct guc_submit_parallel_scratch, wq[0]));
548 info.wq_base_hi = upper_32_bits(ggtt_addr +
549 offsetof(struct guc_submit_parallel_scratch, wq[0]));
550 info.wq_size = WQ_SIZE;
551
552 q->guc->wqi_head = 0;
553 q->guc->wqi_tail = 0;
554 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
555 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
556 }
557
558 /*
559 * We must keep a reference for LR engines if engine is registered with
560 * the GuC as jobs signal immediately and can't destroy an engine if the
561 * GuC has a reference to it.
562 */
563 if (xe_exec_queue_is_lr(q))
564 xe_exec_queue_get(q);
565
566 set_exec_queue_registered(q);
567 trace_xe_exec_queue_register(q);
568 if (xe_exec_queue_is_parallel(q))
569 __register_mlrc_exec_queue(guc, q, &info);
570 else
571 __register_exec_queue(guc, &info);
572 init_policies(guc, q);
573 }
574
wq_space_until_wrap(struct xe_exec_queue * q)575 static u32 wq_space_until_wrap(struct xe_exec_queue *q)
576 {
577 return (WQ_SIZE - q->guc->wqi_tail);
578 }
579
wq_wait_for_space(struct xe_exec_queue * q,u32 wqi_size)580 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
581 {
582 struct xe_guc *guc = exec_queue_to_guc(q);
583 struct xe_device *xe = guc_to_xe(guc);
584 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
585 unsigned int sleep_period_ms = 1;
586
587 #define AVAILABLE_SPACE \
588 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
589 if (wqi_size > AVAILABLE_SPACE) {
590 try_again:
591 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
592 if (wqi_size > AVAILABLE_SPACE) {
593 if (sleep_period_ms == 1024) {
594 xe_gt_reset_async(q->gt);
595 return -ENODEV;
596 }
597
598 msleep(sleep_period_ms);
599 sleep_period_ms <<= 1;
600 goto try_again;
601 }
602 }
603 #undef AVAILABLE_SPACE
604
605 return 0;
606 }
607
wq_noop_append(struct xe_exec_queue * q)608 static int wq_noop_append(struct xe_exec_queue *q)
609 {
610 struct xe_guc *guc = exec_queue_to_guc(q);
611 struct xe_device *xe = guc_to_xe(guc);
612 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
613 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
614
615 if (wq_wait_for_space(q, wq_space_until_wrap(q)))
616 return -ENODEV;
617
618 xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw));
619
620 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
621 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
622 FIELD_PREP(WQ_LEN_MASK, len_dw));
623 q->guc->wqi_tail = 0;
624
625 return 0;
626 }
627
wq_item_append(struct xe_exec_queue * q)628 static void wq_item_append(struct xe_exec_queue *q)
629 {
630 struct xe_guc *guc = exec_queue_to_guc(q);
631 struct xe_device *xe = guc_to_xe(guc);
632 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
633 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */
634 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
635 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
636 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
637 int i = 0, j;
638
639 if (wqi_size > wq_space_until_wrap(q)) {
640 if (wq_noop_append(q))
641 return;
642 }
643 if (wq_wait_for_space(q, wqi_size))
644 return;
645
646 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
647 FIELD_PREP(WQ_LEN_MASK, len_dw);
648 wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
649 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
650 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
651 wqi[i++] = 0;
652 for (j = 1; j < q->width; ++j) {
653 struct xe_lrc *lrc = q->lrc[j];
654
655 wqi[i++] = lrc->ring.tail / sizeof(u64);
656 }
657
658 xe_assert(xe, i == wqi_size / sizeof(u32));
659
660 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
661 wq[q->guc->wqi_tail / sizeof(u32)]));
662 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
663 q->guc->wqi_tail += wqi_size;
664 xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE);
665
666 xe_device_wmb(xe);
667
668 map = xe_lrc_parallel_map(q->lrc[0]);
669 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
670 }
671
672 #define RESUME_PENDING ~0x0ull
submit_exec_queue(struct xe_exec_queue * q)673 static void submit_exec_queue(struct xe_exec_queue *q)
674 {
675 struct xe_guc *guc = exec_queue_to_guc(q);
676 struct xe_device *xe = guc_to_xe(guc);
677 struct xe_lrc *lrc = q->lrc[0];
678 u32 action[3];
679 u32 g2h_len = 0;
680 u32 num_g2h = 0;
681 int len = 0;
682 bool extra_submit = false;
683
684 xe_assert(xe, exec_queue_registered(q));
685
686 if (xe_exec_queue_is_parallel(q))
687 wq_item_append(q);
688 else
689 xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
690
691 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
692 return;
693
694 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
695 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
696 action[len++] = q->guc->id;
697 action[len++] = GUC_CONTEXT_ENABLE;
698 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
699 num_g2h = 1;
700 if (xe_exec_queue_is_parallel(q))
701 extra_submit = true;
702
703 q->guc->resume_time = RESUME_PENDING;
704 set_exec_queue_pending_enable(q);
705 set_exec_queue_enabled(q);
706 trace_xe_exec_queue_scheduling_enable(q);
707 } else {
708 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
709 action[len++] = q->guc->id;
710 trace_xe_exec_queue_submit(q);
711 }
712
713 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
714
715 if (extra_submit) {
716 len = 0;
717 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
718 action[len++] = q->guc->id;
719 trace_xe_exec_queue_submit(q);
720
721 xe_guc_ct_send(&guc->ct, action, len, 0, 0);
722 }
723 }
724
725 static struct dma_fence *
guc_exec_queue_run_job(struct drm_sched_job * drm_job)726 guc_exec_queue_run_job(struct drm_sched_job *drm_job)
727 {
728 struct xe_sched_job *job = to_xe_sched_job(drm_job);
729 struct xe_exec_queue *q = job->q;
730 struct xe_guc *guc = exec_queue_to_guc(q);
731 struct xe_device *xe = guc_to_xe(guc);
732 bool lr = xe_exec_queue_is_lr(q);
733
734 xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
735 exec_queue_banned(q) || exec_queue_suspended(q));
736
737 trace_xe_sched_job_run(job);
738
739 if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
740 if (!exec_queue_registered(q))
741 register_exec_queue(q);
742 if (!lr) /* LR jobs are emitted in the exec IOCTL */
743 q->ring_ops->emit_job(job);
744 submit_exec_queue(q);
745 }
746
747 if (lr) {
748 xe_sched_job_set_error(job, -EOPNOTSUPP);
749 return NULL;
750 } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) {
751 return job->fence;
752 } else {
753 return dma_fence_get(job->fence);
754 }
755 }
756
guc_exec_queue_free_job(struct drm_sched_job * drm_job)757 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
758 {
759 struct xe_sched_job *job = to_xe_sched_job(drm_job);
760
761 trace_xe_sched_job_free(job);
762 xe_sched_job_put(job);
763 }
764
guc_read_stopped(struct xe_guc * guc)765 static int guc_read_stopped(struct xe_guc *guc)
766 {
767 return atomic_read(&guc->submission_state.stopped);
768 }
769
770 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \
771 u32 action[] = { \
772 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \
773 q->guc->id, \
774 GUC_CONTEXT_##enable_disable, \
775 }
776
disable_scheduling_deregister(struct xe_guc * guc,struct xe_exec_queue * q)777 static void disable_scheduling_deregister(struct xe_guc *guc,
778 struct xe_exec_queue *q)
779 {
780 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
781 struct xe_device *xe = guc_to_xe(guc);
782 int ret;
783
784 set_min_preemption_timeout(guc, q);
785 smp_rmb();
786 ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) ||
787 guc_read_stopped(guc), HZ * 5);
788 if (!ret) {
789 struct xe_gpu_scheduler *sched = &q->guc->sched;
790
791 drm_warn(&xe->drm, "Pending enable failed to respond");
792 xe_sched_submission_start(sched);
793 xe_gt_reset_async(q->gt);
794 xe_sched_tdr_queue_imm(sched);
795 return;
796 }
797
798 clear_exec_queue_enabled(q);
799 set_exec_queue_pending_disable(q);
800 set_exec_queue_destroyed(q);
801 trace_xe_exec_queue_scheduling_disable(q);
802
803 /*
804 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
805 * handler and we are not allowed to reserved G2H space in handlers.
806 */
807 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
808 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
809 G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
810 }
811
xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue * q)812 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
813 {
814 struct xe_guc *guc = exec_queue_to_guc(q);
815 struct xe_device *xe = guc_to_xe(guc);
816
817 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
818 wake_up_all(&xe->ufence_wq);
819
820 if (xe_exec_queue_is_lr(q))
821 queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
822 else
823 xe_sched_tdr_queue_imm(&q->guc->sched);
824 }
825
826 /**
827 * xe_guc_submit_wedge() - Wedge GuC submission
828 * @guc: the GuC object
829 *
830 * Save exec queue's registered with GuC state by taking a ref to each queue.
831 * Register a DRMM handler to drop refs upon driver unload.
832 */
xe_guc_submit_wedge(struct xe_guc * guc)833 void xe_guc_submit_wedge(struct xe_guc *guc)
834 {
835 struct xe_device *xe = guc_to_xe(guc);
836 struct xe_exec_queue *q;
837 unsigned long index;
838 int err;
839
840 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
841
842 /*
843 * If device is being wedged even before submission_state is
844 * initialized, there's nothing to do here.
845 */
846 if (!guc->submission_state.initialized)
847 return;
848
849 err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
850 guc_submit_wedged_fini, guc);
851 if (err) {
852 drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n");
853 return;
854 }
855
856 mutex_lock(&guc->submission_state.lock);
857 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
858 if (xe_exec_queue_get_unless_zero(q))
859 set_exec_queue_wedged(q);
860 mutex_unlock(&guc->submission_state.lock);
861 }
862
guc_submit_hint_wedged(struct xe_guc * guc)863 static bool guc_submit_hint_wedged(struct xe_guc *guc)
864 {
865 struct xe_device *xe = guc_to_xe(guc);
866
867 if (xe->wedged.mode != 2)
868 return false;
869
870 if (xe_device_wedged(xe))
871 return true;
872
873 xe_device_declare_wedged(xe);
874
875 return true;
876 }
877
xe_guc_exec_queue_lr_cleanup(struct work_struct * w)878 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
879 {
880 struct xe_guc_exec_queue *ge =
881 container_of(w, struct xe_guc_exec_queue, lr_tdr);
882 struct xe_exec_queue *q = ge->q;
883 struct xe_guc *guc = exec_queue_to_guc(q);
884 struct xe_device *xe = guc_to_xe(guc);
885 struct xe_gpu_scheduler *sched = &ge->sched;
886 bool wedged;
887
888 xe_assert(xe, xe_exec_queue_is_lr(q));
889 trace_xe_exec_queue_lr_cleanup(q);
890
891 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
892
893 /* Kill the run_job / process_msg entry points */
894 xe_sched_submission_stop(sched);
895
896 /*
897 * Engine state now mostly stable, disable scheduling / deregister if
898 * needed. This cleanup routine might be called multiple times, where
899 * the actual async engine deregister drops the final engine ref.
900 * Calling disable_scheduling_deregister will mark the engine as
901 * destroyed and fire off the CT requests to disable scheduling /
902 * deregister, which we only want to do once. We also don't want to mark
903 * the engine as pending_disable again as this may race with the
904 * xe_guc_deregister_done_handler() which treats it as an unexpected
905 * state.
906 */
907 if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
908 struct xe_guc *guc = exec_queue_to_guc(q);
909 int ret;
910
911 set_exec_queue_banned(q);
912 disable_scheduling_deregister(guc, q);
913
914 /*
915 * Must wait for scheduling to be disabled before signalling
916 * any fences, if GT broken the GT reset code should signal us.
917 */
918 ret = wait_event_timeout(guc->ct.wq,
919 !exec_queue_pending_disable(q) ||
920 guc_read_stopped(guc), HZ * 5);
921 if (!ret) {
922 drm_warn(&xe->drm, "Schedule disable failed to respond");
923 xe_sched_submission_start(sched);
924 xe_gt_reset_async(q->gt);
925 return;
926 }
927 }
928
929 xe_sched_submission_start(sched);
930 }
931
932 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100)
933
check_timeout(struct xe_exec_queue * q,struct xe_sched_job * job)934 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
935 {
936 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
937 u32 ctx_timestamp, ctx_job_timestamp;
938 u32 timeout_ms = q->sched_props.job_timeout_ms;
939 u32 diff;
940 u64 running_time_ms;
941
942 if (!xe_sched_job_started(job)) {
943 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
944 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
945 q->guc->id);
946
947 return xe_sched_invalidate_job(job, 2);
948 }
949
950 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
951 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
952
953 /*
954 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
955 * possible overflows with a high timeout.
956 */
957 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC);
958
959 if (ctx_timestamp < ctx_job_timestamp)
960 diff = ctx_timestamp + U32_MAX - ctx_job_timestamp;
961 else
962 diff = ctx_timestamp - ctx_job_timestamp;
963
964 /*
965 * Ensure timeout is within 5% to account for an GuC scheduling latency
966 */
967 running_time_ms =
968 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff));
969
970 xe_gt_dbg(gt,
971 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x",
972 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
973 q->guc->id, running_time_ms, timeout_ms, diff);
974
975 return running_time_ms >= timeout_ms;
976 }
977
enable_scheduling(struct xe_exec_queue * q)978 static void enable_scheduling(struct xe_exec_queue *q)
979 {
980 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
981 struct xe_guc *guc = exec_queue_to_guc(q);
982 int ret;
983
984 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
985 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
986 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
987 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
988
989 set_exec_queue_pending_enable(q);
990 set_exec_queue_enabled(q);
991 trace_xe_exec_queue_scheduling_enable(q);
992
993 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
994 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
995
996 ret = wait_event_timeout(guc->ct.wq,
997 !exec_queue_pending_enable(q) ||
998 guc_read_stopped(guc), HZ * 5);
999 if (!ret || guc_read_stopped(guc)) {
1000 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
1001 set_exec_queue_banned(q);
1002 xe_gt_reset_async(q->gt);
1003 xe_sched_tdr_queue_imm(&q->guc->sched);
1004 }
1005 }
1006
disable_scheduling(struct xe_exec_queue * q,bool immediate)1007 static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
1008 {
1009 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1010 struct xe_guc *guc = exec_queue_to_guc(q);
1011
1012 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1013 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1014 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1015
1016 if (immediate)
1017 set_min_preemption_timeout(guc, q);
1018 clear_exec_queue_enabled(q);
1019 set_exec_queue_pending_disable(q);
1020 trace_xe_exec_queue_scheduling_disable(q);
1021
1022 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1023 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1024 }
1025
__deregister_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q)1026 static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
1027 {
1028 u32 action[] = {
1029 XE_GUC_ACTION_DEREGISTER_CONTEXT,
1030 q->guc->id,
1031 };
1032
1033 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1034 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1035 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1036 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1037
1038 set_exec_queue_destroyed(q);
1039 trace_xe_exec_queue_deregister(q);
1040
1041 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1042 G2H_LEN_DW_DEREGISTER_CONTEXT, 1);
1043 }
1044
1045 static enum drm_gpu_sched_stat
guc_exec_queue_timedout_job(struct drm_sched_job * drm_job)1046 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
1047 {
1048 struct xe_sched_job *job = to_xe_sched_job(drm_job);
1049 struct xe_sched_job *tmp_job;
1050 struct xe_exec_queue *q = job->q;
1051 struct xe_gpu_scheduler *sched = &q->guc->sched;
1052 struct xe_guc *guc = exec_queue_to_guc(q);
1053 const char *process_name = "no process";
1054 int err = -ETIME;
1055 pid_t pid = -1;
1056 int i = 0;
1057 bool wedged, skip_timeout_check;
1058
1059 /*
1060 * TDR has fired before free job worker. Common if exec queue
1061 * immediately closed after last fence signaled. Add back to pending
1062 * list so job can be freed and kick scheduler ensuring free job is not
1063 * lost.
1064 */
1065 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
1066 xe_sched_add_pending_job(sched, job);
1067 xe_sched_submission_start(sched);
1068
1069 return DRM_GPU_SCHED_STAT_NOMINAL;
1070 }
1071
1072 /* Kill the run_job entry point */
1073 xe_sched_submission_stop(sched);
1074
1075 /* Must check all state after stopping scheduler */
1076 skip_timeout_check = exec_queue_reset(q) ||
1077 exec_queue_killed_or_banned_or_wedged(q) ||
1078 exec_queue_destroyed(q);
1079
1080 /*
1081 * XXX: Sampling timeout doesn't work in wedged mode as we have to
1082 * modify scheduling state to read timestamp. We could read the
1083 * timestamp from a register to accumulate current running time but this
1084 * doesn't work for SRIOV. For now assuming timeouts in wedged mode are
1085 * genuine timeouts.
1086 */
1087 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
1088
1089 /* Engine state now stable, disable scheduling to check timestamp */
1090 if (!wedged && exec_queue_registered(q)) {
1091 int ret;
1092
1093 if (exec_queue_reset(q))
1094 err = -EIO;
1095
1096 if (!exec_queue_destroyed(q)) {
1097 /*
1098 * Wait for any pending G2H to flush out before
1099 * modifying state
1100 */
1101 ret = wait_event_timeout(guc->ct.wq,
1102 !exec_queue_pending_enable(q) ||
1103 guc_read_stopped(guc), HZ * 5);
1104 if (!ret || guc_read_stopped(guc))
1105 goto trigger_reset;
1106
1107 /*
1108 * Flag communicates to G2H handler that schedule
1109 * disable originated from a timeout check. The G2H then
1110 * avoid triggering cleanup or deregistering the exec
1111 * queue.
1112 */
1113 set_exec_queue_check_timeout(q);
1114 disable_scheduling(q, skip_timeout_check);
1115 }
1116
1117 /*
1118 * Must wait for scheduling to be disabled before signalling
1119 * any fences, if GT broken the GT reset code should signal us.
1120 *
1121 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
1122 * error) messages which can cause the schedule disable to get
1123 * lost. If this occurs, trigger a GT reset to recover.
1124 */
1125 smp_rmb();
1126 ret = wait_event_timeout(guc->ct.wq,
1127 !exec_queue_pending_disable(q) ||
1128 guc_read_stopped(guc), HZ * 5);
1129 if (!ret || guc_read_stopped(guc)) {
1130 trigger_reset:
1131 if (!ret)
1132 xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond");
1133 set_exec_queue_extra_ref(q);
1134 xe_exec_queue_get(q); /* GT reset owns this */
1135 set_exec_queue_banned(q);
1136 xe_gt_reset_async(q->gt);
1137 xe_sched_tdr_queue_imm(sched);
1138 goto rearm;
1139 }
1140 }
1141
1142 /*
1143 * Check if job is actually timed out, if so restart job execution and TDR
1144 */
1145 if (!wedged && !skip_timeout_check && !check_timeout(q, job) &&
1146 !exec_queue_reset(q) && exec_queue_registered(q)) {
1147 clear_exec_queue_check_timeout(q);
1148 goto sched_enable;
1149 }
1150
1151 if (q->vm && q->vm->xef) {
1152 process_name = q->vm->xef->process_name;
1153 pid = q->vm->xef->pid;
1154 }
1155 xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
1156 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1157 q->guc->id, q->flags, process_name, pid);
1158
1159 trace_xe_sched_job_timedout(job);
1160
1161 if (!exec_queue_killed(q))
1162 xe_devcoredump(job);
1163
1164 /*
1165 * Kernel jobs should never fail, nor should VM jobs if they do
1166 * somethings has gone wrong and the GT needs a reset
1167 */
1168 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
1169 "Kernel-submitted job timed out\n");
1170 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
1171 "VM job timed out on non-killed execqueue\n");
1172 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
1173 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
1174 if (!xe_sched_invalidate_job(job, 2)) {
1175 clear_exec_queue_check_timeout(q);
1176 xe_gt_reset_async(q->gt);
1177 goto rearm;
1178 }
1179 }
1180
1181 /* Finish cleaning up exec queue via deregister */
1182 set_exec_queue_banned(q);
1183 if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
1184 set_exec_queue_extra_ref(q);
1185 xe_exec_queue_get(q);
1186 __deregister_exec_queue(guc, q);
1187 }
1188
1189 /* Stop fence signaling */
1190 xe_hw_fence_irq_stop(q->fence_irq);
1191
1192 /*
1193 * Fence state now stable, stop / start scheduler which cleans up any
1194 * fences that are complete
1195 */
1196 xe_sched_add_pending_job(sched, job);
1197 xe_sched_submission_start(sched);
1198
1199 xe_guc_exec_queue_trigger_cleanup(q);
1200
1201 /* Mark all outstanding jobs as bad, thus completing them */
1202 spin_lock(&sched->base.job_list_lock);
1203 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
1204 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
1205 spin_unlock(&sched->base.job_list_lock);
1206
1207 /* Start fence signaling */
1208 xe_hw_fence_irq_start(q->fence_irq);
1209
1210 return DRM_GPU_SCHED_STAT_NOMINAL;
1211
1212 sched_enable:
1213 enable_scheduling(q);
1214 rearm:
1215 /*
1216 * XXX: Ideally want to adjust timeout based on current exection time
1217 * but there is not currently an easy way to do in DRM scheduler. With
1218 * some thought, do this in a follow up.
1219 */
1220 xe_sched_add_pending_job(sched, job);
1221 xe_sched_submission_start(sched);
1222
1223 return DRM_GPU_SCHED_STAT_NOMINAL;
1224 }
1225
__guc_exec_queue_fini_async(struct work_struct * w)1226 static void __guc_exec_queue_fini_async(struct work_struct *w)
1227 {
1228 struct xe_guc_exec_queue *ge =
1229 container_of(w, struct xe_guc_exec_queue, fini_async);
1230 struct xe_exec_queue *q = ge->q;
1231 struct xe_guc *guc = exec_queue_to_guc(q);
1232
1233 xe_pm_runtime_get(guc_to_xe(guc));
1234 trace_xe_exec_queue_destroy(q);
1235
1236 release_guc_id(guc, q);
1237 if (xe_exec_queue_is_lr(q))
1238 cancel_work_sync(&ge->lr_tdr);
1239 /* Confirm no work left behind accessing device structures */
1240 cancel_delayed_work_sync(&ge->sched.base.work_tdr);
1241 xe_sched_entity_fini(&ge->entity);
1242 xe_sched_fini(&ge->sched);
1243
1244 /*
1245 * RCU free due sched being exported via DRM scheduler fences
1246 * (timeline name).
1247 */
1248 kfree_rcu(ge, rcu);
1249 xe_exec_queue_fini(q);
1250 xe_pm_runtime_put(guc_to_xe(guc));
1251 }
1252
guc_exec_queue_fini_async(struct xe_exec_queue * q)1253 static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
1254 {
1255 struct xe_guc *guc = exec_queue_to_guc(q);
1256 struct xe_device *xe = guc_to_xe(guc);
1257
1258 INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
1259
1260 /* We must block on kernel engines so slabs are empty on driver unload */
1261 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
1262 __guc_exec_queue_fini_async(&q->guc->fini_async);
1263 else
1264 queue_work(xe->destroy_wq, &q->guc->fini_async);
1265 }
1266
__guc_exec_queue_fini(struct xe_guc * guc,struct xe_exec_queue * q)1267 static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
1268 {
1269 /*
1270 * Might be done from within the GPU scheduler, need to do async as we
1271 * fini the scheduler when the engine is fini'd, the scheduler can't
1272 * complete fini within itself (circular dependency). Async resolves
1273 * this we and don't really care when everything is fini'd, just that it
1274 * is.
1275 */
1276 guc_exec_queue_fini_async(q);
1277 }
1278
__guc_exec_queue_process_msg_cleanup(struct xe_sched_msg * msg)1279 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
1280 {
1281 struct xe_exec_queue *q = msg->private_data;
1282 struct xe_guc *guc = exec_queue_to_guc(q);
1283 struct xe_device *xe = guc_to_xe(guc);
1284
1285 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
1286 trace_xe_exec_queue_cleanup_entity(q);
1287
1288 if (exec_queue_registered(q))
1289 disable_scheduling_deregister(guc, q);
1290 else
1291 __guc_exec_queue_fini(guc, q);
1292 }
1293
guc_exec_queue_allowed_to_change_state(struct xe_exec_queue * q)1294 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
1295 {
1296 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
1297 }
1298
__guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg * msg)1299 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
1300 {
1301 struct xe_exec_queue *q = msg->private_data;
1302 struct xe_guc *guc = exec_queue_to_guc(q);
1303
1304 if (guc_exec_queue_allowed_to_change_state(q))
1305 init_policies(guc, q);
1306 kfree(msg);
1307 }
1308
__suspend_fence_signal(struct xe_exec_queue * q)1309 static void __suspend_fence_signal(struct xe_exec_queue *q)
1310 {
1311 if (!q->guc->suspend_pending)
1312 return;
1313
1314 WRITE_ONCE(q->guc->suspend_pending, false);
1315 wake_up(&q->guc->suspend_wait);
1316 }
1317
suspend_fence_signal(struct xe_exec_queue * q)1318 static void suspend_fence_signal(struct xe_exec_queue *q)
1319 {
1320 struct xe_guc *guc = exec_queue_to_guc(q);
1321 struct xe_device *xe = guc_to_xe(guc);
1322
1323 xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
1324 guc_read_stopped(guc));
1325 xe_assert(xe, q->guc->suspend_pending);
1326
1327 __suspend_fence_signal(q);
1328 }
1329
__guc_exec_queue_process_msg_suspend(struct xe_sched_msg * msg)1330 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
1331 {
1332 struct xe_exec_queue *q = msg->private_data;
1333 struct xe_guc *guc = exec_queue_to_guc(q);
1334
1335 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
1336 exec_queue_enabled(q)) {
1337 wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING ||
1338 guc_read_stopped(guc));
1339
1340 if (!guc_read_stopped(guc)) {
1341 s64 since_resume_ms =
1342 ktime_ms_delta(ktime_get(),
1343 q->guc->resume_time);
1344 s64 wait_ms = q->vm->preempt.min_run_period_ms -
1345 since_resume_ms;
1346
1347 if (wait_ms > 0 && q->guc->resume_time)
1348 msleep(wait_ms);
1349
1350 set_exec_queue_suspended(q);
1351 disable_scheduling(q, false);
1352 }
1353 } else if (q->guc->suspend_pending) {
1354 set_exec_queue_suspended(q);
1355 suspend_fence_signal(q);
1356 }
1357 }
1358
__guc_exec_queue_process_msg_resume(struct xe_sched_msg * msg)1359 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
1360 {
1361 struct xe_exec_queue *q = msg->private_data;
1362
1363 if (guc_exec_queue_allowed_to_change_state(q)) {
1364 clear_exec_queue_suspended(q);
1365 if (!exec_queue_enabled(q)) {
1366 q->guc->resume_time = RESUME_PENDING;
1367 enable_scheduling(q);
1368 }
1369 } else {
1370 clear_exec_queue_suspended(q);
1371 }
1372 }
1373
1374 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */
1375 #define SET_SCHED_PROPS 2
1376 #define SUSPEND 3
1377 #define RESUME 4
1378 #define OPCODE_MASK 0xf
1379 #define MSG_LOCKED BIT(8)
1380
guc_exec_queue_process_msg(struct xe_sched_msg * msg)1381 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
1382 {
1383 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data));
1384
1385 trace_xe_sched_msg_recv(msg);
1386
1387 switch (msg->opcode) {
1388 case CLEANUP:
1389 __guc_exec_queue_process_msg_cleanup(msg);
1390 break;
1391 case SET_SCHED_PROPS:
1392 __guc_exec_queue_process_msg_set_sched_props(msg);
1393 break;
1394 case SUSPEND:
1395 __guc_exec_queue_process_msg_suspend(msg);
1396 break;
1397 case RESUME:
1398 __guc_exec_queue_process_msg_resume(msg);
1399 break;
1400 default:
1401 XE_WARN_ON("Unknown message type");
1402 }
1403
1404 xe_pm_runtime_put(xe);
1405 }
1406
1407 static const struct drm_sched_backend_ops drm_sched_ops = {
1408 .run_job = guc_exec_queue_run_job,
1409 .free_job = guc_exec_queue_free_job,
1410 .timedout_job = guc_exec_queue_timedout_job,
1411 };
1412
1413 static const struct xe_sched_backend_ops xe_sched_ops = {
1414 .process_msg = guc_exec_queue_process_msg,
1415 };
1416
guc_exec_queue_init(struct xe_exec_queue * q)1417 static int guc_exec_queue_init(struct xe_exec_queue *q)
1418 {
1419 struct xe_gpu_scheduler *sched;
1420 struct xe_guc *guc = exec_queue_to_guc(q);
1421 struct xe_device *xe = guc_to_xe(guc);
1422 struct xe_guc_exec_queue *ge;
1423 long timeout;
1424 int err, i;
1425
1426 xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
1427
1428 ge = kzalloc(sizeof(*ge), GFP_KERNEL);
1429 if (!ge)
1430 return -ENOMEM;
1431
1432 q->guc = ge;
1433 ge->q = q;
1434 init_rcu_head(&ge->rcu);
1435 init_waitqueue_head(&ge->suspend_wait);
1436
1437 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
1438 INIT_LIST_HEAD(&ge->static_msgs[i].link);
1439
1440 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
1441 msecs_to_jiffies(q->sched_props.job_timeout_ms);
1442 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
1443 NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
1444 timeout, guc_to_gt(guc)->ordered_wq, NULL,
1445 q->name, gt_to_xe(q->gt)->drm.dev);
1446 if (err)
1447 goto err_free;
1448
1449 sched = &ge->sched;
1450 err = xe_sched_entity_init(&ge->entity, sched);
1451 if (err)
1452 goto err_sched;
1453
1454 if (xe_exec_queue_is_lr(q))
1455 INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
1456
1457 mutex_lock(&guc->submission_state.lock);
1458
1459 err = alloc_guc_id(guc, q);
1460 if (err)
1461 goto err_entity;
1462
1463 q->entity = &ge->entity;
1464
1465 if (guc_read_stopped(guc))
1466 xe_sched_stop(sched);
1467
1468 mutex_unlock(&guc->submission_state.lock);
1469
1470 xe_exec_queue_assign_name(q, q->guc->id);
1471
1472 trace_xe_exec_queue_create(q);
1473
1474 return 0;
1475
1476 err_entity:
1477 mutex_unlock(&guc->submission_state.lock);
1478 xe_sched_entity_fini(&ge->entity);
1479 err_sched:
1480 xe_sched_fini(&ge->sched);
1481 err_free:
1482 kfree(ge);
1483
1484 return err;
1485 }
1486
guc_exec_queue_kill(struct xe_exec_queue * q)1487 static void guc_exec_queue_kill(struct xe_exec_queue *q)
1488 {
1489 trace_xe_exec_queue_kill(q);
1490 set_exec_queue_killed(q);
1491 __suspend_fence_signal(q);
1492 xe_guc_exec_queue_trigger_cleanup(q);
1493 }
1494
guc_exec_queue_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)1495 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
1496 u32 opcode)
1497 {
1498 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
1499
1500 INIT_LIST_HEAD(&msg->link);
1501 msg->opcode = opcode & OPCODE_MASK;
1502 msg->private_data = q;
1503
1504 trace_xe_sched_msg_add(msg);
1505 if (opcode & MSG_LOCKED)
1506 xe_sched_add_msg_locked(&q->guc->sched, msg);
1507 else
1508 xe_sched_add_msg(&q->guc->sched, msg);
1509 }
1510
guc_exec_queue_try_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)1511 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
1512 struct xe_sched_msg *msg,
1513 u32 opcode)
1514 {
1515 if (!list_empty(&msg->link))
1516 return false;
1517
1518 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED);
1519
1520 return true;
1521 }
1522
1523 #define STATIC_MSG_CLEANUP 0
1524 #define STATIC_MSG_SUSPEND 1
1525 #define STATIC_MSG_RESUME 2
guc_exec_queue_fini(struct xe_exec_queue * q)1526 static void guc_exec_queue_fini(struct xe_exec_queue *q)
1527 {
1528 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
1529
1530 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
1531 guc_exec_queue_add_msg(q, msg, CLEANUP);
1532 else
1533 __guc_exec_queue_fini(exec_queue_to_guc(q), q);
1534 }
1535
guc_exec_queue_set_priority(struct xe_exec_queue * q,enum xe_exec_queue_priority priority)1536 static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
1537 enum xe_exec_queue_priority priority)
1538 {
1539 struct xe_sched_msg *msg;
1540
1541 if (q->sched_props.priority == priority ||
1542 exec_queue_killed_or_banned_or_wedged(q))
1543 return 0;
1544
1545 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1546 if (!msg)
1547 return -ENOMEM;
1548
1549 q->sched_props.priority = priority;
1550 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1551
1552 return 0;
1553 }
1554
guc_exec_queue_set_timeslice(struct xe_exec_queue * q,u32 timeslice_us)1555 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
1556 {
1557 struct xe_sched_msg *msg;
1558
1559 if (q->sched_props.timeslice_us == timeslice_us ||
1560 exec_queue_killed_or_banned_or_wedged(q))
1561 return 0;
1562
1563 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1564 if (!msg)
1565 return -ENOMEM;
1566
1567 q->sched_props.timeslice_us = timeslice_us;
1568 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1569
1570 return 0;
1571 }
1572
guc_exec_queue_set_preempt_timeout(struct xe_exec_queue * q,u32 preempt_timeout_us)1573 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
1574 u32 preempt_timeout_us)
1575 {
1576 struct xe_sched_msg *msg;
1577
1578 if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
1579 exec_queue_killed_or_banned_or_wedged(q))
1580 return 0;
1581
1582 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1583 if (!msg)
1584 return -ENOMEM;
1585
1586 q->sched_props.preempt_timeout_us = preempt_timeout_us;
1587 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1588
1589 return 0;
1590 }
1591
guc_exec_queue_suspend(struct xe_exec_queue * q)1592 static int guc_exec_queue_suspend(struct xe_exec_queue *q)
1593 {
1594 struct xe_gpu_scheduler *sched = &q->guc->sched;
1595 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
1596
1597 if (exec_queue_killed_or_banned_or_wedged(q))
1598 return -EINVAL;
1599
1600 xe_sched_msg_lock(sched);
1601 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
1602 q->guc->suspend_pending = true;
1603 xe_sched_msg_unlock(sched);
1604
1605 return 0;
1606 }
1607
guc_exec_queue_suspend_wait(struct xe_exec_queue * q)1608 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
1609 {
1610 struct xe_guc *guc = exec_queue_to_guc(q);
1611 int ret;
1612
1613 /*
1614 * Likely don't need to check exec_queue_killed() as we clear
1615 * suspend_pending upon kill but to be paranoid but races in which
1616 * suspend_pending is set after kill also check kill here.
1617 */
1618 ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
1619 !READ_ONCE(q->guc->suspend_pending) ||
1620 exec_queue_killed(q) ||
1621 guc_read_stopped(guc),
1622 HZ * 5);
1623
1624 if (!ret) {
1625 xe_gt_warn(guc_to_gt(guc),
1626 "Suspend fence, guc_id=%d, failed to respond",
1627 q->guc->id);
1628 /* XXX: Trigger GT reset? */
1629 return -ETIME;
1630 }
1631
1632 return ret < 0 ? ret : 0;
1633 }
1634
guc_exec_queue_resume(struct xe_exec_queue * q)1635 static void guc_exec_queue_resume(struct xe_exec_queue *q)
1636 {
1637 struct xe_gpu_scheduler *sched = &q->guc->sched;
1638 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
1639 struct xe_guc *guc = exec_queue_to_guc(q);
1640 struct xe_device *xe = guc_to_xe(guc);
1641
1642 xe_assert(xe, !q->guc->suspend_pending);
1643
1644 xe_sched_msg_lock(sched);
1645 guc_exec_queue_try_add_msg(q, msg, RESUME);
1646 xe_sched_msg_unlock(sched);
1647 }
1648
guc_exec_queue_reset_status(struct xe_exec_queue * q)1649 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
1650 {
1651 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
1652 }
1653
1654 /*
1655 * All of these functions are an abstraction layer which other parts of XE can
1656 * use to trap into the GuC backend. All of these functions, aside from init,
1657 * really shouldn't do much other than trap into the DRM scheduler which
1658 * synchronizes these operations.
1659 */
1660 static const struct xe_exec_queue_ops guc_exec_queue_ops = {
1661 .init = guc_exec_queue_init,
1662 .kill = guc_exec_queue_kill,
1663 .fini = guc_exec_queue_fini,
1664 .set_priority = guc_exec_queue_set_priority,
1665 .set_timeslice = guc_exec_queue_set_timeslice,
1666 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
1667 .suspend = guc_exec_queue_suspend,
1668 .suspend_wait = guc_exec_queue_suspend_wait,
1669 .resume = guc_exec_queue_resume,
1670 .reset_status = guc_exec_queue_reset_status,
1671 };
1672
guc_exec_queue_stop(struct xe_guc * guc,struct xe_exec_queue * q)1673 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
1674 {
1675 struct xe_gpu_scheduler *sched = &q->guc->sched;
1676
1677 /* Stop scheduling + flush any DRM scheduler operations */
1678 xe_sched_submission_stop(sched);
1679
1680 /* Clean up lost G2H + reset engine state */
1681 if (exec_queue_registered(q)) {
1682 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
1683 xe_exec_queue_put(q);
1684 else if (exec_queue_destroyed(q))
1685 __guc_exec_queue_fini(guc, q);
1686 }
1687 if (q->guc->suspend_pending) {
1688 set_exec_queue_suspended(q);
1689 suspend_fence_signal(q);
1690 }
1691 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED |
1692 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED |
1693 EXEC_QUEUE_STATE_SUSPENDED,
1694 &q->guc->state);
1695 q->guc->resume_time = 0;
1696 trace_xe_exec_queue_stop(q);
1697
1698 /*
1699 * Ban any engine (aside from kernel and engines used for VM ops) with a
1700 * started but not complete job or if a job has gone through a GT reset
1701 * more than twice.
1702 */
1703 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
1704 struct xe_sched_job *job = xe_sched_first_pending_job(sched);
1705 bool ban = false;
1706
1707 if (job) {
1708 if ((xe_sched_job_started(job) &&
1709 !xe_sched_job_completed(job)) ||
1710 xe_sched_invalidate_job(job, 2)) {
1711 trace_xe_sched_job_ban(job);
1712 ban = true;
1713 }
1714 } else if (xe_exec_queue_is_lr(q) &&
1715 (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) {
1716 ban = true;
1717 }
1718
1719 if (ban) {
1720 set_exec_queue_banned(q);
1721 xe_guc_exec_queue_trigger_cleanup(q);
1722 }
1723 }
1724 }
1725
xe_guc_submit_reset_prepare(struct xe_guc * guc)1726 int xe_guc_submit_reset_prepare(struct xe_guc *guc)
1727 {
1728 int ret;
1729
1730 if (!guc->submission_state.initialized)
1731 return 0;
1732
1733 /*
1734 * Using an atomic here rather than submission_state.lock as this
1735 * function can be called while holding the CT lock (engine reset
1736 * failure). submission_state.lock needs the CT lock to resubmit jobs.
1737 * Atomic is not ideal, but it works to prevent against concurrent reset
1738 * and releasing any TDRs waiting on guc->submission_state.stopped.
1739 */
1740 ret = atomic_fetch_or(1, &guc->submission_state.stopped);
1741 smp_wmb();
1742 wake_up_all(&guc->ct.wq);
1743
1744 return ret;
1745 }
1746
xe_guc_submit_reset_wait(struct xe_guc * guc)1747 void xe_guc_submit_reset_wait(struct xe_guc *guc)
1748 {
1749 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
1750 !guc_read_stopped(guc));
1751 }
1752
xe_guc_submit_stop(struct xe_guc * guc)1753 void xe_guc_submit_stop(struct xe_guc *guc)
1754 {
1755 struct xe_exec_queue *q;
1756 unsigned long index;
1757 struct xe_device *xe = guc_to_xe(guc);
1758
1759 xe_assert(xe, guc_read_stopped(guc) == 1);
1760
1761 mutex_lock(&guc->submission_state.lock);
1762
1763 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
1764 /* Prevent redundant attempts to stop parallel queues */
1765 if (q->guc->id != index)
1766 continue;
1767
1768 guc_exec_queue_stop(guc, q);
1769 }
1770
1771 mutex_unlock(&guc->submission_state.lock);
1772
1773 /*
1774 * No one can enter the backend at this point, aside from new engine
1775 * creation which is protected by guc->submission_state.lock.
1776 */
1777
1778 }
1779
guc_exec_queue_start(struct xe_exec_queue * q)1780 static void guc_exec_queue_start(struct xe_exec_queue *q)
1781 {
1782 struct xe_gpu_scheduler *sched = &q->guc->sched;
1783
1784 if (!exec_queue_killed_or_banned_or_wedged(q)) {
1785 int i;
1786
1787 trace_xe_exec_queue_resubmit(q);
1788 for (i = 0; i < q->width; ++i)
1789 xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail);
1790 xe_sched_resubmit_jobs(sched);
1791 }
1792
1793 xe_sched_submission_start(sched);
1794 xe_sched_submission_resume_tdr(sched);
1795 }
1796
xe_guc_submit_start(struct xe_guc * guc)1797 int xe_guc_submit_start(struct xe_guc *guc)
1798 {
1799 struct xe_exec_queue *q;
1800 unsigned long index;
1801 struct xe_device *xe = guc_to_xe(guc);
1802
1803 xe_assert(xe, guc_read_stopped(guc) == 1);
1804
1805 mutex_lock(&guc->submission_state.lock);
1806 atomic_dec(&guc->submission_state.stopped);
1807 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
1808 /* Prevent redundant attempts to start parallel queues */
1809 if (q->guc->id != index)
1810 continue;
1811
1812 guc_exec_queue_start(q);
1813 }
1814 mutex_unlock(&guc->submission_state.lock);
1815
1816 wake_up_all(&guc->ct.wq);
1817
1818 return 0;
1819 }
1820
1821 static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc * guc,u32 guc_id)1822 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
1823 {
1824 struct xe_device *xe = guc_to_xe(guc);
1825 struct xe_exec_queue *q;
1826
1827 if (unlikely(guc_id >= GUC_ID_MAX)) {
1828 drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
1829 return NULL;
1830 }
1831
1832 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
1833 if (unlikely(!q)) {
1834 drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
1835 return NULL;
1836 }
1837
1838 xe_assert(xe, guc_id >= q->guc->id);
1839 xe_assert(xe, guc_id < (q->guc->id + q->width));
1840
1841 return q;
1842 }
1843
deregister_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q)1844 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
1845 {
1846 u32 action[] = {
1847 XE_GUC_ACTION_DEREGISTER_CONTEXT,
1848 q->guc->id,
1849 };
1850
1851 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q));
1852 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1853 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1854 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1855
1856 trace_xe_exec_queue_deregister(q);
1857
1858 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
1859 }
1860
handle_sched_done(struct xe_guc * guc,struct xe_exec_queue * q,u32 runnable_state)1861 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
1862 u32 runnable_state)
1863 {
1864 trace_xe_exec_queue_scheduling_done(q);
1865
1866 if (runnable_state == 1) {
1867 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
1868
1869 q->guc->resume_time = ktime_get();
1870 clear_exec_queue_pending_enable(q);
1871 smp_wmb();
1872 wake_up_all(&guc->ct.wq);
1873 } else {
1874 bool check_timeout = exec_queue_check_timeout(q);
1875
1876 xe_gt_assert(guc_to_gt(guc), runnable_state == 0);
1877 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q));
1878
1879 if (q->guc->suspend_pending) {
1880 suspend_fence_signal(q);
1881 clear_exec_queue_pending_disable(q);
1882 } else {
1883 if (exec_queue_banned(q) || check_timeout) {
1884 smp_wmb();
1885 wake_up_all(&guc->ct.wq);
1886 }
1887 if (!check_timeout && exec_queue_destroyed(q)) {
1888 /*
1889 * Make sure to clear the pending_disable only
1890 * after sampling the destroyed state. We want
1891 * to ensure we don't trigger the unregister too
1892 * early with something intending to only
1893 * disable scheduling. The caller doing the
1894 * destroy must wait for an ongoing
1895 * pending_disable before marking as destroyed.
1896 */
1897 clear_exec_queue_pending_disable(q);
1898 deregister_exec_queue(guc, q);
1899 } else {
1900 clear_exec_queue_pending_disable(q);
1901 }
1902 }
1903 }
1904 }
1905
xe_guc_sched_done_handler(struct xe_guc * guc,u32 * msg,u32 len)1906 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1907 {
1908 struct xe_device *xe = guc_to_xe(guc);
1909 struct xe_exec_queue *q;
1910 u32 guc_id = msg[0];
1911 u32 runnable_state = msg[1];
1912
1913 if (unlikely(len < 2)) {
1914 drm_err(&xe->drm, "Invalid length %u", len);
1915 return -EPROTO;
1916 }
1917
1918 q = g2h_exec_queue_lookup(guc, guc_id);
1919 if (unlikely(!q))
1920 return -EPROTO;
1921
1922 if (unlikely(!exec_queue_pending_enable(q) &&
1923 !exec_queue_pending_disable(q))) {
1924 xe_gt_err(guc_to_gt(guc),
1925 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u",
1926 atomic_read(&q->guc->state), q->guc->id,
1927 runnable_state);
1928 return -EPROTO;
1929 }
1930
1931 handle_sched_done(guc, q, runnable_state);
1932
1933 return 0;
1934 }
1935
handle_deregister_done(struct xe_guc * guc,struct xe_exec_queue * q)1936 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
1937 {
1938 trace_xe_exec_queue_deregister_done(q);
1939
1940 clear_exec_queue_registered(q);
1941
1942 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
1943 xe_exec_queue_put(q);
1944 else
1945 __guc_exec_queue_fini(guc, q);
1946 }
1947
xe_guc_deregister_done_handler(struct xe_guc * guc,u32 * msg,u32 len)1948 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1949 {
1950 struct xe_device *xe = guc_to_xe(guc);
1951 struct xe_exec_queue *q;
1952 u32 guc_id = msg[0];
1953
1954 if (unlikely(len < 1)) {
1955 drm_err(&xe->drm, "Invalid length %u", len);
1956 return -EPROTO;
1957 }
1958
1959 q = g2h_exec_queue_lookup(guc, guc_id);
1960 if (unlikely(!q))
1961 return -EPROTO;
1962
1963 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
1964 exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
1965 xe_gt_err(guc_to_gt(guc),
1966 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d",
1967 atomic_read(&q->guc->state), q->guc->id);
1968 return -EPROTO;
1969 }
1970
1971 handle_deregister_done(guc, q);
1972
1973 return 0;
1974 }
1975
xe_guc_exec_queue_reset_handler(struct xe_guc * guc,u32 * msg,u32 len)1976 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
1977 {
1978 struct xe_gt *gt = guc_to_gt(guc);
1979 struct xe_device *xe = guc_to_xe(guc);
1980 struct xe_exec_queue *q;
1981 u32 guc_id = msg[0];
1982
1983 if (unlikely(len < 1)) {
1984 drm_err(&xe->drm, "Invalid length %u", len);
1985 return -EPROTO;
1986 }
1987
1988 q = g2h_exec_queue_lookup(guc, guc_id);
1989 if (unlikely(!q))
1990 return -EPROTO;
1991
1992 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
1993 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
1994
1995 /* FIXME: Do error capture, most likely async */
1996
1997 trace_xe_exec_queue_reset(q);
1998
1999 /*
2000 * A banned engine is a NOP at this point (came from
2001 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
2002 * jobs by setting timeout of the job to the minimum value kicking
2003 * guc_exec_queue_timedout_job.
2004 */
2005 set_exec_queue_reset(q);
2006 if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
2007 xe_guc_exec_queue_trigger_cleanup(q);
2008
2009 return 0;
2010 }
2011
xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc * guc,u32 * msg,u32 len)2012 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
2013 u32 len)
2014 {
2015 struct xe_gt *gt = guc_to_gt(guc);
2016 struct xe_device *xe = guc_to_xe(guc);
2017 struct xe_exec_queue *q;
2018 u32 guc_id = msg[0];
2019
2020 if (unlikely(len < 1)) {
2021 drm_err(&xe->drm, "Invalid length %u", len);
2022 return -EPROTO;
2023 }
2024
2025 q = g2h_exec_queue_lookup(guc, guc_id);
2026 if (unlikely(!q))
2027 return -EPROTO;
2028
2029 xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
2030 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2031
2032 trace_xe_exec_queue_memory_cat_error(q);
2033
2034 /* Treat the same as engine reset */
2035 set_exec_queue_reset(q);
2036 if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
2037 xe_guc_exec_queue_trigger_cleanup(q);
2038
2039 return 0;
2040 }
2041
xe_guc_exec_queue_reset_failure_handler(struct xe_guc * guc,u32 * msg,u32 len)2042 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
2043 {
2044 struct xe_device *xe = guc_to_xe(guc);
2045 u8 guc_class, instance;
2046 u32 reason;
2047
2048 if (unlikely(len != 3)) {
2049 drm_err(&xe->drm, "Invalid length %u", len);
2050 return -EPROTO;
2051 }
2052
2053 guc_class = msg[0];
2054 instance = msg[1];
2055 reason = msg[2];
2056
2057 /* Unexpected failure of a hardware feature, log an actual error */
2058 drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
2059 guc_class, instance, reason);
2060
2061 xe_gt_reset_async(guc_to_gt(guc));
2062
2063 return 0;
2064 }
2065
2066 static void
guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue * q,struct xe_guc_submit_exec_queue_snapshot * snapshot)2067 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
2068 struct xe_guc_submit_exec_queue_snapshot *snapshot)
2069 {
2070 struct xe_guc *guc = exec_queue_to_guc(q);
2071 struct xe_device *xe = guc_to_xe(guc);
2072 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
2073 int i;
2074
2075 snapshot->guc.wqi_head = q->guc->wqi_head;
2076 snapshot->guc.wqi_tail = q->guc->wqi_tail;
2077 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
2078 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
2079 snapshot->parallel.wq_desc.status = parallel_read(xe, map,
2080 wq_desc.wq_status);
2081
2082 if (snapshot->parallel.wq_desc.head !=
2083 snapshot->parallel.wq_desc.tail) {
2084 for (i = snapshot->parallel.wq_desc.head;
2085 i != snapshot->parallel.wq_desc.tail;
2086 i = (i + sizeof(u32)) % WQ_SIZE)
2087 snapshot->parallel.wq[i / sizeof(u32)] =
2088 parallel_read(xe, map, wq[i / sizeof(u32)]);
2089 }
2090 }
2091
2092 static void
guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)2093 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2094 struct drm_printer *p)
2095 {
2096 int i;
2097
2098 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
2099 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
2100 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
2101 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
2102 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
2103
2104 if (snapshot->parallel.wq_desc.head !=
2105 snapshot->parallel.wq_desc.tail) {
2106 for (i = snapshot->parallel.wq_desc.head;
2107 i != snapshot->parallel.wq_desc.tail;
2108 i = (i + sizeof(u32)) % WQ_SIZE)
2109 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
2110 snapshot->parallel.wq[i / sizeof(u32)]);
2111 }
2112 }
2113
2114 /**
2115 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
2116 * @q: faulty exec queue
2117 *
2118 * This can be printed out in a later stage like during dev_coredump
2119 * analysis.
2120 *
2121 * Returns: a GuC Submit Engine snapshot object that must be freed by the
2122 * caller, using `xe_guc_exec_queue_snapshot_free`.
2123 */
2124 struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue * q)2125 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
2126 {
2127 struct xe_gpu_scheduler *sched = &q->guc->sched;
2128 struct xe_guc_submit_exec_queue_snapshot *snapshot;
2129 int i;
2130
2131 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
2132
2133 if (!snapshot)
2134 return NULL;
2135
2136 snapshot->guc.id = q->guc->id;
2137 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
2138 snapshot->class = q->class;
2139 snapshot->logical_mask = q->logical_mask;
2140 snapshot->width = q->width;
2141 snapshot->refcount = kref_read(&q->refcount);
2142 snapshot->sched_timeout = sched->base.timeout;
2143 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
2144 snapshot->sched_props.preempt_timeout_us =
2145 q->sched_props.preempt_timeout_us;
2146
2147 snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *),
2148 GFP_ATOMIC);
2149
2150 if (snapshot->lrc) {
2151 for (i = 0; i < q->width; ++i) {
2152 struct xe_lrc *lrc = q->lrc[i];
2153
2154 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
2155 }
2156 }
2157
2158 snapshot->schedule_state = atomic_read(&q->guc->state);
2159 snapshot->exec_queue_flags = q->flags;
2160
2161 snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
2162 if (snapshot->parallel_execution)
2163 guc_exec_queue_wq_snapshot_capture(q, snapshot);
2164
2165 spin_lock(&sched->base.job_list_lock);
2166 snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
2167 snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
2168 sizeof(struct pending_list_snapshot),
2169 GFP_ATOMIC);
2170
2171 if (snapshot->pending_list) {
2172 struct xe_sched_job *job_iter;
2173
2174 i = 0;
2175 list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
2176 snapshot->pending_list[i].seqno =
2177 xe_sched_job_seqno(job_iter);
2178 snapshot->pending_list[i].fence =
2179 dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
2180 snapshot->pending_list[i].finished =
2181 dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
2182 ? 1 : 0;
2183 i++;
2184 }
2185 }
2186
2187 spin_unlock(&sched->base.job_list_lock);
2188
2189 return snapshot;
2190 }
2191
2192 /**
2193 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
2194 * @snapshot: Previously captured snapshot of job.
2195 *
2196 * This captures some data that requires taking some locks, so it cannot be done in signaling path.
2197 */
2198 void
xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot * snapshot)2199 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2200 {
2201 int i;
2202
2203 if (!snapshot || !snapshot->lrc)
2204 return;
2205
2206 for (i = 0; i < snapshot->width; ++i)
2207 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
2208 }
2209
2210 /**
2211 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
2212 * @snapshot: GuC Submit Engine snapshot object.
2213 * @p: drm_printer where it will be printed out.
2214 *
2215 * This function prints out a given GuC Submit Engine snapshot object.
2216 */
2217 void
xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)2218 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2219 struct drm_printer *p)
2220 {
2221 int i;
2222
2223 if (!snapshot)
2224 return;
2225
2226 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id);
2227 drm_printf(p, "\tName: %s\n", snapshot->name);
2228 drm_printf(p, "\tClass: %d\n", snapshot->class);
2229 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
2230 drm_printf(p, "\tWidth: %d\n", snapshot->width);
2231 drm_printf(p, "\tRef: %d\n", snapshot->refcount);
2232 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
2233 drm_printf(p, "\tTimeslice: %u (us)\n",
2234 snapshot->sched_props.timeslice_us);
2235 drm_printf(p, "\tPreempt timeout: %u (us)\n",
2236 snapshot->sched_props.preempt_timeout_us);
2237
2238 for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
2239 xe_lrc_snapshot_print(snapshot->lrc[i], p);
2240
2241 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
2242 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
2243
2244 if (snapshot->parallel_execution)
2245 guc_exec_queue_wq_snapshot_print(snapshot, p);
2246
2247 for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
2248 i++)
2249 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
2250 snapshot->pending_list[i].seqno,
2251 snapshot->pending_list[i].fence,
2252 snapshot->pending_list[i].finished);
2253 }
2254
2255 /**
2256 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
2257 * snapshot.
2258 * @snapshot: GuC Submit Engine snapshot object.
2259 *
2260 * This function free all the memory that needed to be allocated at capture
2261 * time.
2262 */
xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot * snapshot)2263 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2264 {
2265 int i;
2266
2267 if (!snapshot)
2268 return;
2269
2270 if (snapshot->lrc) {
2271 for (i = 0; i < snapshot->width; i++)
2272 xe_lrc_snapshot_free(snapshot->lrc[i]);
2273 kfree(snapshot->lrc);
2274 }
2275 kfree(snapshot->pending_list);
2276 kfree(snapshot);
2277 }
2278
guc_exec_queue_print(struct xe_exec_queue * q,struct drm_printer * p)2279 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
2280 {
2281 struct xe_guc_submit_exec_queue_snapshot *snapshot;
2282
2283 snapshot = xe_guc_exec_queue_snapshot_capture(q);
2284 xe_guc_exec_queue_snapshot_print(snapshot, p);
2285 xe_guc_exec_queue_snapshot_free(snapshot);
2286 }
2287
2288 /**
2289 * xe_guc_submit_print - GuC Submit Print.
2290 * @guc: GuC.
2291 * @p: drm_printer where it will be printed out.
2292 *
2293 * This function capture and prints snapshots of **all** GuC Engines.
2294 */
xe_guc_submit_print(struct xe_guc * guc,struct drm_printer * p)2295 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
2296 {
2297 struct xe_exec_queue *q;
2298 unsigned long index;
2299
2300 if (!xe_device_uc_enabled(guc_to_xe(guc)))
2301 return;
2302
2303 mutex_lock(&guc->submission_state.lock);
2304 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2305 guc_exec_queue_print(q, p);
2306 mutex_unlock(&guc->submission_state.lock);
2307 }
2308