1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2023-2024 Intel Corporation
4 */
5
6 #include <drm/drm_managed.h>
7
8 #include "abi/guc_actions_sriov_abi.h"
9
10 #include "xe_device.h"
11 #include "xe_gt.h"
12 #include "xe_gt_sriov_pf.h"
13 #include "xe_gt_sriov_pf_config.h"
14 #include "xe_gt_sriov_pf_control.h"
15 #include "xe_gt_sriov_pf_helpers.h"
16 #include "xe_gt_sriov_pf_monitor.h"
17 #include "xe_gt_sriov_pf_service.h"
18 #include "xe_gt_sriov_printk.h"
19 #include "xe_guc_ct.h"
20 #include "xe_sriov.h"
21
control_cmd_to_string(u32 cmd)22 static const char *control_cmd_to_string(u32 cmd)
23 {
24 switch (cmd) {
25 case GUC_PF_TRIGGER_VF_PAUSE:
26 return "PAUSE";
27 case GUC_PF_TRIGGER_VF_RESUME:
28 return "RESUME";
29 case GUC_PF_TRIGGER_VF_STOP:
30 return "STOP";
31 case GUC_PF_TRIGGER_VF_FLR_START:
32 return "FLR_START";
33 case GUC_PF_TRIGGER_VF_FLR_FINISH:
34 return "FLR_FINISH";
35 default:
36 return "<unknown>";
37 }
38 }
39
guc_action_vf_control_cmd(struct xe_guc * guc,u32 vfid,u32 cmd)40 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd)
41 {
42 u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = {
43 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
44 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
45 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL),
46 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid),
47 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd),
48 };
49 int ret;
50
51 ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
52 return ret > 0 ? -EPROTO : ret;
53 }
54
pf_send_vf_control_cmd(struct xe_gt * gt,unsigned int vfid,u32 cmd)55 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd)
56 {
57 int err;
58
59 xe_gt_assert(gt, vfid != PFID);
60 xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n",
61 vfid, control_cmd_to_string(cmd));
62
63 err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd);
64 if (unlikely(err))
65 xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n",
66 vfid, control_cmd_to_string(cmd), ERR_PTR(err));
67 return err;
68 }
69
pf_send_vf_pause(struct xe_gt * gt,unsigned int vfid)70 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid)
71 {
72 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE);
73 }
74
pf_send_vf_resume(struct xe_gt * gt,unsigned int vfid)75 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid)
76 {
77 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME);
78 }
79
pf_send_vf_stop(struct xe_gt * gt,unsigned int vfid)80 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid)
81 {
82 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP);
83 }
84
pf_send_vf_flr_start(struct xe_gt * gt,unsigned int vfid)85 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid)
86 {
87 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START);
88 }
89
pf_send_vf_flr_finish(struct xe_gt * gt,unsigned int vfid)90 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid)
91 {
92 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH);
93 }
94
95 /**
96 * DOC: The VF state machine
97 *
98 * The simplified VF state machine could be presented as::
99 *
100 * pause--------------------------o
101 * / |
102 * / v
103 * (READY)<------------------resume-----(PAUSED)
104 * ^ \ / /
105 * | \ / /
106 * | stop---->(STOPPED)<----stop /
107 * | / /
108 * | / /
109 * o--------<-----flr /
110 * \ /
111 * o------<--------------------flr
112 *
113 * Where:
114 *
115 * * READY - represents a state in which VF is fully operable
116 * * PAUSED - represents a state in which VF activity is temporarily suspended
117 * * STOPPED - represents a state in which VF activity is definitely halted
118 * * pause - represents a request to temporarily suspend VF activity
119 * * resume - represents a request to resume VF activity
120 * * stop - represents a request to definitely halt VF activity
121 * * flr - represents a request to perform VF FLR to restore VF activity
122 *
123 * However, each state transition requires additional steps that involves
124 * communication with GuC that might fail or be interrupted by other requests::
125 *
126 * .................................WIP....
127 * : :
128 * pause--------------------->PAUSE_WIP----------------------------o
129 * / : / \ : |
130 * / : o----<---stop flr--o : |
131 * / : | \ / | : V
132 * (READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED)
133 * ^ \ \ : | | : / /
134 * | \ \ : | | : / /
135 * | \ \ : | | : / /
136 * | \ \ : o----<----------------------+--<-------stop /
137 * | \ \ : | | : /
138 * | \ \ : V | : /
139 * | \ stop----->STOP_WIP---------flr--->-----o : /
140 * | \ : | | : /
141 * | \ : | V : /
142 * | flr--------+----->----------------->FLR_WIP<-----flr
143 * | : | / ^ :
144 * | : | / | :
145 * o--------<-------:----+-----<----------------o | :
146 * : | | :
147 * :....|...........................|.....:
148 * | |
149 * V |
150 * (STOPPED)--------------------flr
151 *
152 * For details about each internal WIP state machine see:
153 *
154 * * `The VF PAUSE state machine`_
155 * * `The VF RESUME state machine`_
156 * * `The VF STOP state machine`_
157 * * `The VF FLR state machine`_
158 */
159
160 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV
control_bit_to_string(enum xe_gt_sriov_control_bits bit)161 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
162 {
163 switch (bit) {
164 #define CASE2STR(_X) \
165 case XE_GT_SRIOV_STATE_##_X: return #_X
166 CASE2STR(WIP);
167 CASE2STR(FLR_WIP);
168 CASE2STR(FLR_SEND_START);
169 CASE2STR(FLR_WAIT_GUC);
170 CASE2STR(FLR_GUC_DONE);
171 CASE2STR(FLR_RESET_CONFIG);
172 CASE2STR(FLR_RESET_DATA);
173 CASE2STR(FLR_RESET_MMIO);
174 CASE2STR(FLR_SEND_FINISH);
175 CASE2STR(FLR_FAILED);
176 CASE2STR(PAUSE_WIP);
177 CASE2STR(PAUSE_SEND_PAUSE);
178 CASE2STR(PAUSE_WAIT_GUC);
179 CASE2STR(PAUSE_GUC_DONE);
180 CASE2STR(PAUSE_FAILED);
181 CASE2STR(PAUSED);
182 CASE2STR(RESUME_WIP);
183 CASE2STR(RESUME_SEND_RESUME);
184 CASE2STR(RESUME_FAILED);
185 CASE2STR(RESUMED);
186 CASE2STR(STOP_WIP);
187 CASE2STR(STOP_SEND_STOP);
188 CASE2STR(STOP_FAILED);
189 CASE2STR(STOPPED);
190 CASE2STR(MISMATCH);
191 #undef CASE2STR
192 default: return "?";
193 }
194 }
195 #endif
196
pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)197 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
198 {
199 switch (bit) {
200 case XE_GT_SRIOV_STATE_FLR_WAIT_GUC:
201 case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC:
202 return HZ / 2;
203 case XE_GT_SRIOV_STATE_FLR_WIP:
204 case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
205 return 5 * HZ;
206 default:
207 return HZ;
208 }
209 }
210
pf_pick_vf_control(struct xe_gt * gt,unsigned int vfid)211 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid)
212 {
213 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
214 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
215
216 return >->sriov.pf.vfs[vfid].control;
217 }
218
pf_peek_vf_state(struct xe_gt * gt,unsigned int vfid)219 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid)
220 {
221 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
222
223 return &cs->state;
224 }
225
pf_check_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)226 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid,
227 enum xe_gt_sriov_control_bits bit)
228 {
229 return test_bit(bit, pf_peek_vf_state(gt, vfid));
230 }
231
pf_dump_vf_state(struct xe_gt * gt,unsigned int vfid)232 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid)
233 {
234 unsigned long state = *pf_peek_vf_state(gt, vfid);
235 enum xe_gt_sriov_control_bits bit;
236
237 if (state) {
238 xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n",
239 vfid, state, state ? " bits " : "",
240 (int)BITS_PER_LONG, &state);
241 for_each_set_bit(bit, &state, BITS_PER_LONG)
242 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n",
243 vfid, control_bit_to_string(bit), bit);
244 } else {
245 xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid);
246 }
247 }
248
pf_expect_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)249 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid,
250 enum xe_gt_sriov_control_bits bit)
251 {
252 bool result = pf_check_vf_state(gt, vfid, bit);
253
254 if (unlikely(!result))
255 pf_dump_vf_state(gt, vfid);
256
257 return result;
258 }
259
pf_expect_vf_not_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)260 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid,
261 enum xe_gt_sriov_control_bits bit)
262 {
263 bool result = !pf_check_vf_state(gt, vfid, bit);
264
265 if (unlikely(!result))
266 pf_dump_vf_state(gt, vfid);
267
268 return result;
269 }
270
pf_enter_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)271 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid,
272 enum xe_gt_sriov_control_bits bit)
273 {
274 if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) {
275 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n",
276 vfid, control_bit_to_string(bit), bit);
277 return true;
278 }
279 return false;
280 }
281
pf_exit_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)282 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid,
283 enum xe_gt_sriov_control_bits bit)
284 {
285 if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) {
286 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n",
287 vfid, control_bit_to_string(bit), bit);
288 return true;
289 }
290 return false;
291 }
292
pf_escape_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)293 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid,
294 enum xe_gt_sriov_control_bits bit)
295 {
296 if (pf_exit_vf_state(gt, vfid, bit))
297 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n",
298 vfid, control_bit_to_string(bit), bit,
299 __builtin_return_address(0));
300 }
301
pf_enter_vf_mismatch(struct xe_gt * gt,unsigned int vfid)302 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
303 {
304 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) {
305 xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n",
306 vfid, __builtin_return_address(0));
307 pf_dump_vf_state(gt, vfid);
308 }
309 }
310
pf_exit_vf_mismatch(struct xe_gt * gt,unsigned int vfid)311 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
312 {
313 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH))
314 xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n",
315 vfid, __builtin_return_address(0));
316
317 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
318 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
319 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
320 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
321 }
322
323 #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \
324 pf_enter_vf_mismatch((gt), (vfid)); \
325 })
326
pf_queue_control_worker(struct xe_gt * gt)327 static void pf_queue_control_worker(struct xe_gt *gt)
328 {
329 struct xe_device *xe = gt_to_xe(gt);
330
331 xe_gt_assert(gt, IS_SRIOV_PF(xe));
332
333 queue_work(xe->sriov.wq, >->sriov.pf.control.worker);
334 }
335
pf_queue_vf(struct xe_gt * gt,unsigned int vfid)336 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
337 {
338 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control;
339
340 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
341
342 spin_lock(&pfc->lock);
343 list_move_tail(>->sriov.pf.vfs[vfid].control.link, &pfc->list);
344 spin_unlock(&pfc->lock);
345
346 pf_queue_control_worker(gt);
347 }
348
349 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
350 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
351 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
352 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
353
pf_enter_vf_wip(struct xe_gt * gt,unsigned int vfid)354 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid)
355 {
356 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
357 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
358
359 reinit_completion(&cs->done);
360 return true;
361 }
362 return false;
363 }
364
pf_exit_vf_wip(struct xe_gt * gt,unsigned int vfid)365 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
366 {
367 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
368 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
369
370 pf_exit_vf_flr_wip(gt, vfid);
371 pf_exit_vf_stop_wip(gt, vfid);
372 pf_exit_vf_pause_wip(gt, vfid);
373 pf_exit_vf_resume_wip(gt, vfid);
374
375 complete_all(&cs->done);
376 }
377 }
378
pf_wait_vf_wip_done(struct xe_gt * gt,unsigned int vfid,unsigned long timeout)379 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout)
380 {
381 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
382
383 return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT;
384 }
385
pf_enter_vf_ready(struct xe_gt * gt,unsigned int vfid)386 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
387 {
388 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
389 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
390 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
391 pf_exit_vf_mismatch(gt, vfid);
392 pf_exit_vf_wip(gt, vfid);
393 }
394
395 /**
396 * DOC: The VF PAUSE state machine
397 *
398 * The VF PAUSE state machine looks like::
399 *
400 * (READY,RESUMED)<-------------<---------------------o---------o
401 * | \ \
402 * pause \ \
403 * | \ \
404 * ....V...........................PAUSE_WIP........ \ \
405 * : \ : o \
406 * : \ o------<-----busy : | \
407 * : \ / / : | |
408 * : PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED) |
409 * : | \ : | |
410 * : acked rejected---->----------o--->(MISMATCH) /
411 * : | : /
412 * : v : /
413 * : PAUSE_WAIT_GUC : /
414 * : | : /
415 * : done : /
416 * : | : /
417 * : v : /
418 * : PAUSE_GUC_DONE o-----restart
419 * : / :
420 * : / :
421 * :....o..............o...............o...........:
422 * | | |
423 * completed flr stop
424 * | | |
425 * V .....V..... ......V.....
426 * (PAUSED) : FLR_WIP : : STOP_WIP :
427 * :.........: :..........:
428 *
429 * For the full state machine view, see `The VF state machine`_.
430 */
431
pf_exit_vf_pause_wip(struct xe_gt * gt,unsigned int vfid)432 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
433 {
434 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
435 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
436 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
437 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
438 }
439 }
440
pf_enter_vf_paused(struct xe_gt * gt,unsigned int vfid)441 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid)
442 {
443 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED))
444 pf_enter_vf_state_machine_bug(gt, vfid);
445
446 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
447 pf_exit_vf_mismatch(gt, vfid);
448 pf_exit_vf_wip(gt, vfid);
449 }
450
pf_enter_vf_pause_completed(struct xe_gt * gt,unsigned int vfid)451 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid)
452 {
453 pf_enter_vf_paused(gt, vfid);
454 }
455
pf_enter_vf_pause_failed(struct xe_gt * gt,unsigned int vfid)456 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid)
457 {
458 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
459 pf_exit_vf_wip(gt, vfid);
460 }
461
pf_enter_vf_pause_rejected(struct xe_gt * gt,unsigned int vfid)462 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
463 {
464 pf_enter_vf_mismatch(gt, vfid);
465 pf_enter_vf_pause_failed(gt, vfid);
466 }
467
pf_exit_vf_pause_guc_done(struct xe_gt * gt,unsigned int vfid)468 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
469 {
470 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
471 return false;
472
473 pf_enter_vf_pause_completed(gt, vfid);
474 return true;
475 }
476
pf_enter_vf_pause_guc_done(struct xe_gt * gt,unsigned int vfid)477 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
478 {
479 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
480 pf_queue_vf(gt, vfid);
481 }
482
pf_enter_pause_wait_guc(struct xe_gt * gt,unsigned int vfid)483 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
484 {
485 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC))
486 pf_enter_vf_state_machine_bug(gt, vfid);
487 }
488
pf_exit_pause_wait_guc(struct xe_gt * gt,unsigned int vfid)489 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
490 {
491 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
492 }
493
pf_enter_vf_pause_send_pause(struct xe_gt * gt,unsigned int vfid)494 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
495 {
496 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
497 pf_enter_vf_state_machine_bug(gt, vfid);
498
499 pf_queue_vf(gt, vfid);
500 }
501
pf_exit_vf_pause_send_pause(struct xe_gt * gt,unsigned int vfid)502 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
503 {
504 int err;
505
506 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
507 return false;
508
509 /* GuC may actually send a PAUSE_DONE before we get a RESPONSE */
510 pf_enter_pause_wait_guc(gt, vfid);
511
512 err = pf_send_vf_pause(gt, vfid);
513 if (err) {
514 /* send failed, so we shouldn't expect PAUSE_DONE from GuC */
515 pf_exit_pause_wait_guc(gt, vfid);
516
517 if (err == -EBUSY)
518 pf_enter_vf_pause_send_pause(gt, vfid);
519 else if (err == -EIO)
520 pf_enter_vf_pause_rejected(gt, vfid);
521 else
522 pf_enter_vf_pause_failed(gt, vfid);
523 } else {
524 /*
525 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
526 * but since GuC didn't complain, we may clear MISMATCH
527 */
528 pf_exit_vf_mismatch(gt, vfid);
529 }
530
531 return true;
532 }
533
pf_enter_vf_pause_wip(struct xe_gt * gt,unsigned int vfid)534 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
535 {
536 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
537 pf_enter_vf_wip(gt, vfid);
538 pf_enter_vf_pause_send_pause(gt, vfid);
539 return true;
540 }
541
542 return false;
543 }
544
545 /**
546 * xe_gt_sriov_pf_control_pause_vf - Pause a VF.
547 * @gt: the &xe_gt
548 * @vfid: the VF identifier
549 *
550 * This function is for PF only.
551 *
552 * Return: 0 on success or a negative error code on failure.
553 */
xe_gt_sriov_pf_control_pause_vf(struct xe_gt * gt,unsigned int vfid)554 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
555 {
556 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP);
557 int err;
558
559 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
560 xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
561 return -EPERM;
562 }
563
564 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
565 xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid);
566 return -ESTALE;
567 }
568
569 if (!pf_enter_vf_pause_wip(gt, vfid)) {
570 xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid);
571 return -EALREADY;
572 }
573
574 err = pf_wait_vf_wip_done(gt, vfid, timeout);
575 if (err) {
576 xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n",
577 vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
578 return err;
579 }
580
581 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
582 xe_gt_sriov_info(gt, "VF%u paused!\n", vfid);
583 return 0;
584 }
585
586 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) {
587 xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid);
588 return -EIO;
589 }
590
591 xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid);
592 return -ECANCELED;
593 }
594
595 /**
596 * DOC: The VF RESUME state machine
597 *
598 * The VF RESUME state machine looks like::
599 *
600 * (PAUSED)<-----------------<------------------------o
601 * | \
602 * resume \
603 * | \
604 * ....V............................RESUME_WIP...... \
605 * : \ : o
606 * : \ o-------<-----busy : |
607 * : \ / / : |
608 * : RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED)
609 * : / \ : |
610 * : acked rejected---->---------o--->(MISMATCH)
611 * : / :
612 * :....o..............o...............o.....o.....:
613 * | | | \
614 * completed flr stop restart-->(READY)
615 * | | |
616 * V .....V..... ......V.....
617 * (RESUMED) : FLR_WIP : : STOP_WIP :
618 * :.........: :..........:
619 *
620 * For the full state machine view, see `The VF state machine`_.
621 */
622
pf_exit_vf_resume_wip(struct xe_gt * gt,unsigned int vfid)623 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
624 {
625 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP))
626 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME);
627 }
628
pf_enter_vf_resumed(struct xe_gt * gt,unsigned int vfid)629 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
630 {
631 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
632 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
633 pf_exit_vf_mismatch(gt, vfid);
634 pf_exit_vf_wip(gt, vfid);
635 }
636
pf_enter_vf_resume_completed(struct xe_gt * gt,unsigned int vfid)637 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid)
638 {
639 pf_enter_vf_resumed(gt, vfid);
640 }
641
pf_enter_vf_resume_failed(struct xe_gt * gt,unsigned int vfid)642 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid)
643 {
644 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
645 pf_exit_vf_wip(gt, vfid);
646 }
647
pf_enter_vf_resume_rejected(struct xe_gt * gt,unsigned int vfid)648 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid)
649 {
650 pf_enter_vf_mismatch(gt, vfid);
651 pf_enter_vf_resume_failed(gt, vfid);
652 }
653
pf_enter_vf_resume_send_resume(struct xe_gt * gt,unsigned int vfid)654 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
655 {
656 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
657 pf_enter_vf_state_machine_bug(gt, vfid);
658
659 pf_queue_vf(gt, vfid);
660 }
661
pf_exit_vf_resume_send_resume(struct xe_gt * gt,unsigned int vfid)662 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
663 {
664 int err;
665
666 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
667 return false;
668
669 err = pf_send_vf_resume(gt, vfid);
670 if (err == -EBUSY)
671 pf_enter_vf_resume_send_resume(gt, vfid);
672 else if (err == -EIO)
673 pf_enter_vf_resume_rejected(gt, vfid);
674 else if (err)
675 pf_enter_vf_resume_failed(gt, vfid);
676 else
677 pf_enter_vf_resume_completed(gt, vfid);
678 return true;
679 }
680
pf_enter_vf_resume_wip(struct xe_gt * gt,unsigned int vfid)681 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
682 {
683 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) {
684 pf_enter_vf_wip(gt, vfid);
685 pf_enter_vf_resume_send_resume(gt, vfid);
686 return true;
687 }
688
689 return false;
690 }
691
692 /**
693 * xe_gt_sriov_pf_control_resume_vf - Resume a VF.
694 * @gt: the &xe_gt
695 * @vfid: the VF identifier
696 *
697 * This function is for PF only.
698 *
699 * Return: 0 on success or a negative error code on failure.
700 */
xe_gt_sriov_pf_control_resume_vf(struct xe_gt * gt,unsigned int vfid)701 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
702 {
703 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP);
704 int err;
705
706 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
707 xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
708 return -EPERM;
709 }
710
711 if (!pf_enter_vf_resume_wip(gt, vfid)) {
712 xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
713 return -EALREADY;
714 }
715
716 err = pf_wait_vf_wip_done(gt, vfid, timeout);
717 if (err)
718 return err;
719
720 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) {
721 xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid);
722 return 0;
723 }
724
725 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) {
726 xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid);
727 return -EIO;
728 }
729
730 xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid);
731 return -ECANCELED;
732 }
733
734 /**
735 * DOC: The VF STOP state machine
736 *
737 * The VF STOP state machine looks like::
738 *
739 * (READY,PAUSED,RESUMED)<-------<--------------------o
740 * | \
741 * stop \
742 * | \
743 * ....V..............................STOP_WIP...... \
744 * : \ : o
745 * : \ o----<----busy : |
746 * : \ / / : |
747 * : STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED)
748 * : / \ : |
749 * : acked rejected-------->--------o--->(MISMATCH)
750 * : / :
751 * :....o..............o...............o...........:
752 * | | |
753 * completed flr restart
754 * | | |
755 * V .....V..... V
756 * (STOPPED) : FLR_WIP : (READY)
757 * :.........:
758 *
759 * For the full state machine view, see `The VF state machine`_.
760 */
761
pf_exit_vf_stop_wip(struct xe_gt * gt,unsigned int vfid)762 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
763 {
764 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP))
765 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP);
766 }
767
pf_enter_vf_stopped(struct xe_gt * gt,unsigned int vfid)768 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
769 {
770 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED))
771 pf_enter_vf_state_machine_bug(gt, vfid);
772
773 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
774 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
775 pf_exit_vf_mismatch(gt, vfid);
776 pf_exit_vf_wip(gt, vfid);
777 }
778
pf_enter_vf_stop_completed(struct xe_gt * gt,unsigned int vfid)779 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid)
780 {
781 pf_enter_vf_stopped(gt, vfid);
782 }
783
pf_enter_vf_stop_failed(struct xe_gt * gt,unsigned int vfid)784 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid)
785 {
786 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
787 pf_exit_vf_wip(gt, vfid);
788 }
789
pf_enter_vf_stop_rejected(struct xe_gt * gt,unsigned int vfid)790 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid)
791 {
792 pf_enter_vf_mismatch(gt, vfid);
793 pf_enter_vf_stop_failed(gt, vfid);
794 }
795
pf_enter_vf_stop_send_stop(struct xe_gt * gt,unsigned int vfid)796 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
797 {
798 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
799 pf_enter_vf_state_machine_bug(gt, vfid);
800
801 pf_queue_vf(gt, vfid);
802 }
803
pf_exit_vf_stop_send_stop(struct xe_gt * gt,unsigned int vfid)804 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
805 {
806 int err;
807
808 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
809 return false;
810
811 err = pf_send_vf_stop(gt, vfid);
812 if (err == -EBUSY)
813 pf_enter_vf_stop_send_stop(gt, vfid);
814 else if (err == -EIO)
815 pf_enter_vf_stop_rejected(gt, vfid);
816 else if (err)
817 pf_enter_vf_stop_failed(gt, vfid);
818 else
819 pf_enter_vf_stop_completed(gt, vfid);
820 return true;
821 }
822
pf_enter_vf_stop_wip(struct xe_gt * gt,unsigned int vfid)823 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
824 {
825 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) {
826 pf_enter_vf_wip(gt, vfid);
827 pf_enter_vf_stop_send_stop(gt, vfid);
828 return true;
829 }
830 return false;
831 }
832
833 /**
834 * xe_gt_sriov_pf_control_stop_vf - Stop a VF.
835 * @gt: the &xe_gt
836 * @vfid: the VF identifier
837 *
838 * This function is for PF only.
839 *
840 * Return: 0 on success or a negative error code on failure.
841 */
xe_gt_sriov_pf_control_stop_vf(struct xe_gt * gt,unsigned int vfid)842 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
843 {
844 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP);
845 int err;
846
847 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
848 xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid);
849 return -ESTALE;
850 }
851
852 if (!pf_enter_vf_stop_wip(gt, vfid)) {
853 xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid);
854 return -EALREADY;
855 }
856
857 err = pf_wait_vf_wip_done(gt, vfid, timeout);
858 if (err)
859 return err;
860
861 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
862 xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid);
863 return 0;
864 }
865
866 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) {
867 xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid);
868 return -EIO;
869 }
870
871 xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid);
872 return -ECANCELED;
873 }
874
875 /**
876 * DOC: The VF FLR state machine
877 *
878 * The VF FLR state machine looks like::
879 *
880 * (READY,PAUSED,STOPPED)<------------<--------------o
881 * | \
882 * flr \
883 * | \
884 * ....V..........................FLR_WIP........... \
885 * : \ : \
886 * : \ o----<----busy : |
887 * : \ / / : |
888 * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o
889 * : | \ : | |
890 * : acked rejected----->-----------o--->(MISMATCH) |
891 * : | : ^ |
892 * : v : | |
893 * : FLR_WAIT_GUC : | |
894 * : | : | |
895 * : done : | |
896 * : | : | |
897 * : v : | |
898 * : FLR_GUC_DONE : | |
899 * : | : | |
900 * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o
901 * : | : | |
902 * : FLR_RESET_DATA : | |
903 * : | : | |
904 * : FLR_RESET_MMIO : | |
905 * : | : | |
906 * : | o----<----busy : | |
907 * : |/ / : | |
908 * : FLR_SEND_FINISH----failed--->-----------o--------+-----------o
909 * : / \ : |
910 * : acked rejected----->-----------o--------o
911 * : / :
912 * :....o..............................o...........:
913 * | |
914 * completed restart
915 * | /
916 * V /
917 * (READY)<----------<------------o
918 *
919 * For the full state machine view, see `The VF state machine`_.
920 */
921
pf_enter_vf_flr_send_start(struct xe_gt * gt,unsigned int vfid)922 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
923 {
924 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
925 pf_enter_vf_state_machine_bug(gt, vfid);
926
927 pf_queue_vf(gt, vfid);
928 }
929
pf_enter_vf_flr_wip(struct xe_gt * gt,unsigned int vfid)930 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
931 {
932 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
933 xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid);
934 return;
935 }
936
937 pf_enter_vf_wip(gt, vfid);
938 pf_enter_vf_flr_send_start(gt, vfid);
939 }
940
pf_exit_vf_flr_wip(struct xe_gt * gt,unsigned int vfid)941 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
942 {
943 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
944 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH);
945 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO);
946 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA);
947 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
948 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE);
949 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
950 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START);
951 }
952 }
953
pf_enter_vf_flr_completed(struct xe_gt * gt,unsigned int vfid)954 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid)
955 {
956 pf_enter_vf_ready(gt, vfid);
957 }
958
pf_enter_vf_flr_failed(struct xe_gt * gt,unsigned int vfid)959 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid)
960 {
961 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
962 xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid);
963 pf_exit_vf_wip(gt, vfid);
964 }
965
pf_enter_vf_flr_rejected(struct xe_gt * gt,unsigned int vfid)966 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid)
967 {
968 pf_enter_vf_mismatch(gt, vfid);
969 pf_enter_vf_flr_failed(gt, vfid);
970 }
971
pf_enter_vf_flr_send_finish(struct xe_gt * gt,unsigned int vfid)972 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
973 {
974 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
975 pf_enter_vf_state_machine_bug(gt, vfid);
976
977 pf_queue_vf(gt, vfid);
978 }
979
pf_exit_vf_flr_send_finish(struct xe_gt * gt,unsigned int vfid)980 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
981 {
982 int err;
983
984 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
985 return false;
986
987 err = pf_send_vf_flr_finish(gt, vfid);
988 if (err == -EBUSY)
989 pf_enter_vf_flr_send_finish(gt, vfid);
990 else if (err == -EIO)
991 pf_enter_vf_flr_rejected(gt, vfid);
992 else if (err)
993 pf_enter_vf_flr_failed(gt, vfid);
994 else
995 pf_enter_vf_flr_completed(gt, vfid);
996 return true;
997 }
998
pf_enter_vf_flr_reset_mmio(struct xe_gt * gt,unsigned int vfid)999 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1000 {
1001 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1002 pf_enter_vf_state_machine_bug(gt, vfid);
1003
1004 pf_queue_vf(gt, vfid);
1005 }
1006
pf_exit_vf_flr_reset_mmio(struct xe_gt * gt,unsigned int vfid)1007 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1008 {
1009 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1010 return false;
1011
1012 xe_gt_sriov_pf_sanitize_hw(gt, vfid);
1013
1014 pf_enter_vf_flr_send_finish(gt, vfid);
1015 return true;
1016 }
1017
pf_enter_vf_flr_reset_data(struct xe_gt * gt,unsigned int vfid)1018 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1019 {
1020 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1021 pf_enter_vf_state_machine_bug(gt, vfid);
1022
1023 pf_queue_vf(gt, vfid);
1024 }
1025
pf_exit_vf_flr_reset_data(struct xe_gt * gt,unsigned int vfid)1026 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1027 {
1028 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1029 return false;
1030
1031 xe_gt_sriov_pf_service_reset(gt, vfid);
1032 xe_gt_sriov_pf_monitor_flr(gt, vfid);
1033
1034 pf_enter_vf_flr_reset_mmio(gt, vfid);
1035 return true;
1036 }
1037
pf_enter_vf_flr_reset_config(struct xe_gt * gt,unsigned int vfid)1038 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1039 {
1040 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1041 pf_enter_vf_state_machine_bug(gt, vfid);
1042
1043 pf_queue_vf(gt, vfid);
1044 }
1045
pf_exit_vf_flr_reset_config(struct xe_gt * gt,unsigned int vfid)1046 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1047 {
1048 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
1049 int err;
1050
1051 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1052 return false;
1053
1054 err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout);
1055 if (err)
1056 pf_enter_vf_flr_failed(gt, vfid);
1057 else
1058 pf_enter_vf_flr_reset_data(gt, vfid);
1059 return true;
1060 }
1061
pf_enter_vf_flr_wait_guc(struct xe_gt * gt,unsigned int vfid)1062 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1063 {
1064 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC))
1065 pf_enter_vf_state_machine_bug(gt, vfid);
1066 }
1067
pf_exit_vf_flr_wait_guc(struct xe_gt * gt,unsigned int vfid)1068 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1069 {
1070 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
1071 }
1072
pf_exit_vf_flr_send_start(struct xe_gt * gt,unsigned int vfid)1073 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
1074 {
1075 int err;
1076
1077 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
1078 return false;
1079
1080 /* GuC may actually send a FLR_DONE before we get a RESPONSE */
1081 pf_enter_vf_flr_wait_guc(gt, vfid);
1082
1083 err = pf_send_vf_flr_start(gt, vfid);
1084 if (err) {
1085 /* send failed, so we shouldn't expect FLR_DONE from GuC */
1086 pf_exit_vf_flr_wait_guc(gt, vfid);
1087
1088 if (err == -EBUSY)
1089 pf_enter_vf_flr_send_start(gt, vfid);
1090 else if (err == -EIO)
1091 pf_enter_vf_flr_rejected(gt, vfid);
1092 else
1093 pf_enter_vf_flr_failed(gt, vfid);
1094 } else {
1095 /*
1096 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
1097 * but since GuC didn't complain, we may clear MISMATCH
1098 */
1099 pf_exit_vf_mismatch(gt, vfid);
1100 }
1101
1102 return true;
1103 }
1104
pf_exit_vf_flr_guc_done(struct xe_gt * gt,unsigned int vfid)1105 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1106 {
1107 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1108 return false;
1109
1110 pf_enter_vf_flr_reset_config(gt, vfid);
1111 return true;
1112 }
1113
pf_enter_vf_flr_guc_done(struct xe_gt * gt,unsigned int vfid)1114 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1115 {
1116 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1117 pf_queue_vf(gt, vfid);
1118 }
1119
1120 /**
1121 * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence.
1122 * @gt: the &xe_gt
1123 * @vfid: the VF identifier
1124 *
1125 * This function is for PF only.
1126 *
1127 * Return: 0 on success or a negative error code on failure.
1128 */
xe_gt_sriov_pf_control_trigger_flr(struct xe_gt * gt,unsigned int vfid)1129 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
1130 {
1131 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP);
1132 int err;
1133
1134 pf_enter_vf_flr_wip(gt, vfid);
1135
1136 err = pf_wait_vf_wip_done(gt, vfid, timeout);
1137 if (err) {
1138 xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n",
1139 vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
1140 return err;
1141 }
1142
1143 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1144 return -EIO;
1145
1146 return 0;
1147 }
1148
1149 /**
1150 * DOC: The VF FLR Flow with GuC
1151 *
1152 * The VF FLR flow includes several steps::
1153 *
1154 * PF GUC PCI
1155 * ========================================================
1156 * | | |
1157 * (1) | [ ] <----- FLR --|
1158 * | [ ] :
1159 * (2) [ ] <-------- NOTIFY FLR --[ ]
1160 * [ ] |
1161 * (3) [ ] |
1162 * [ ] |
1163 * [ ]-- START FLR ---------> [ ]
1164 * | [ ]
1165 * (4) | [ ]
1166 * | [ ]
1167 * [ ] <--------- FLR DONE -- [ ]
1168 * [ ] |
1169 * (5) [ ] |
1170 * [ ] |
1171 * [ ]-- FINISH FLR --------> [ ]
1172 * | |
1173 *
1174 * * Step 1: PCI HW generates interrupt to the GuC about VF FLR
1175 * * Step 2: GuC FW sends G2H notification to the PF about VF FLR
1176 * * Step 2a: on some platforms G2H is only received from root GuC
1177 * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence
1178 * * Step 3a: on some platforms PF must send H2G to all other GuCs
1179 * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done
1180 * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished
1181 */
1182
needs_dispatch_flr(struct xe_device * xe)1183 static bool needs_dispatch_flr(struct xe_device *xe)
1184 {
1185 return xe->info.platform == XE_PVC;
1186 }
1187
pf_handle_vf_flr(struct xe_gt * gt,u32 vfid)1188 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
1189 {
1190 struct xe_device *xe = gt_to_xe(gt);
1191 struct xe_gt *gtit;
1192 unsigned int gtid;
1193
1194 xe_gt_sriov_info(gt, "VF%u FLR\n", vfid);
1195
1196 if (needs_dispatch_flr(xe)) {
1197 for_each_gt(gtit, xe, gtid)
1198 pf_enter_vf_flr_wip(gtit, vfid);
1199 } else {
1200 pf_enter_vf_flr_wip(gt, vfid);
1201 }
1202 }
1203
pf_handle_vf_flr_done(struct xe_gt * gt,u32 vfid)1204 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid)
1205 {
1206 if (!pf_exit_vf_flr_wait_guc(gt, vfid)) {
1207 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid);
1208 pf_enter_vf_mismatch(gt, vfid);
1209 return;
1210 }
1211
1212 pf_enter_vf_flr_guc_done(gt, vfid);
1213 }
1214
pf_handle_vf_pause_done(struct xe_gt * gt,u32 vfid)1215 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid)
1216 {
1217 if (!pf_exit_pause_wait_guc(gt, vfid)) {
1218 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid);
1219 pf_enter_vf_mismatch(gt, vfid);
1220 return;
1221 }
1222
1223 pf_enter_vf_pause_guc_done(gt, vfid);
1224 }
1225
pf_handle_vf_event(struct xe_gt * gt,u32 vfid,u32 eventid)1226 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid)
1227 {
1228 xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid);
1229
1230 if (vfid > xe_gt_sriov_pf_get_totalvfs(gt))
1231 return -EPROTO;
1232
1233 switch (eventid) {
1234 case GUC_PF_NOTIFY_VF_FLR:
1235 pf_handle_vf_flr(gt, vfid);
1236 break;
1237 case GUC_PF_NOTIFY_VF_FLR_DONE:
1238 pf_handle_vf_flr_done(gt, vfid);
1239 break;
1240 case GUC_PF_NOTIFY_VF_PAUSE_DONE:
1241 pf_handle_vf_pause_done(gt, vfid);
1242 break;
1243 case GUC_PF_NOTIFY_VF_FIXUP_DONE:
1244 break;
1245 default:
1246 return -ENOPKG;
1247 }
1248 return 0;
1249 }
1250
pf_handle_pf_event(struct xe_gt * gt,u32 eventid)1251 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid)
1252 {
1253 switch (eventid) {
1254 case GUC_PF_NOTIFY_VF_ENABLE:
1255 xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n",
1256 str_enabled_disabled(true),
1257 str_enabled_disabled(false));
1258 break;
1259 default:
1260 return -ENOPKG;
1261 }
1262 return 0;
1263 }
1264
1265 /**
1266 * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC.
1267 * @gt: the &xe_gt
1268 * @msg: the G2H message
1269 * @len: the length of the G2H message
1270 *
1271 * This function is for PF only.
1272 *
1273 * Return: 0 on success or a negative error code on failure.
1274 */
xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt * gt,const u32 * msg,u32 len)1275 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
1276 {
1277 u32 vfid;
1278 u32 eventid;
1279
1280 xe_gt_assert(gt, len);
1281 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
1282 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
1283 xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
1284 GUC_ACTION_GUC2PF_VF_STATE_NOTIFY);
1285
1286 if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt))))
1287 return -EPROTO;
1288
1289 if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0])))
1290 return -EPFNOSUPPORT;
1291
1292 if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN))
1293 return -EPROTO;
1294
1295 vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]);
1296 eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]);
1297
1298 return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid);
1299 }
1300
pf_process_vf_state_machine(struct xe_gt * gt,unsigned int vfid)1301 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
1302 {
1303 if (pf_exit_vf_flr_send_start(gt, vfid))
1304 return true;
1305
1306 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) {
1307 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1308 control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC));
1309 return false;
1310 }
1311
1312 if (pf_exit_vf_flr_guc_done(gt, vfid))
1313 return true;
1314
1315 if (pf_exit_vf_flr_reset_config(gt, vfid))
1316 return true;
1317
1318 if (pf_exit_vf_flr_reset_data(gt, vfid))
1319 return true;
1320
1321 if (pf_exit_vf_flr_reset_mmio(gt, vfid))
1322 return true;
1323
1324 if (pf_exit_vf_flr_send_finish(gt, vfid))
1325 return true;
1326
1327 if (pf_exit_vf_stop_send_stop(gt, vfid))
1328 return true;
1329
1330 if (pf_exit_vf_pause_send_pause(gt, vfid))
1331 return true;
1332
1333 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) {
1334 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1335 control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC));
1336 return true;
1337 }
1338
1339 if (pf_exit_vf_pause_guc_done(gt, vfid))
1340 return true;
1341
1342 if (pf_exit_vf_resume_send_resume(gt, vfid))
1343 return true;
1344
1345 return false;
1346 }
1347
pf_control_state_index(struct xe_gt * gt,struct xe_gt_sriov_control_state * cs)1348 static unsigned int pf_control_state_index(struct xe_gt *gt,
1349 struct xe_gt_sriov_control_state *cs)
1350 {
1351 return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs;
1352 }
1353
pf_worker_find_work(struct xe_gt * gt)1354 static void pf_worker_find_work(struct xe_gt *gt)
1355 {
1356 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control;
1357 struct xe_gt_sriov_control_state *cs;
1358 unsigned int vfid;
1359 bool empty;
1360 bool more;
1361
1362 spin_lock(&pfc->lock);
1363 cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link);
1364 if (cs)
1365 list_del_init(&cs->link);
1366 empty = list_empty(&pfc->list);
1367 spin_unlock(&pfc->lock);
1368
1369 if (!cs)
1370 return;
1371
1372 /* VF metadata structures are indexed by the VFID */
1373 vfid = pf_control_state_index(gt, cs);
1374 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
1375
1376 more = pf_process_vf_state_machine(gt, vfid);
1377 if (more)
1378 pf_queue_vf(gt, vfid);
1379 else if (!empty)
1380 pf_queue_control_worker(gt);
1381 }
1382
control_worker_func(struct work_struct * w)1383 static void control_worker_func(struct work_struct *w)
1384 {
1385 struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker);
1386
1387 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1388 pf_worker_find_work(gt);
1389 }
1390
pf_stop_worker(struct xe_gt * gt)1391 static void pf_stop_worker(struct xe_gt *gt)
1392 {
1393 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1394 cancel_work_sync(>->sriov.pf.control.worker);
1395 }
1396
control_fini_action(struct drm_device * dev,void * data)1397 static void control_fini_action(struct drm_device *dev, void *data)
1398 {
1399 struct xe_gt *gt = data;
1400
1401 pf_stop_worker(gt);
1402 }
1403
1404 /**
1405 * xe_gt_sriov_pf_control_init() - Initialize PF's control data.
1406 * @gt: the &xe_gt
1407 *
1408 * This function is for PF only.
1409 *
1410 * Return: 0 on success or a negative error code on failure.
1411 */
xe_gt_sriov_pf_control_init(struct xe_gt * gt)1412 int xe_gt_sriov_pf_control_init(struct xe_gt *gt)
1413 {
1414 struct xe_device *xe = gt_to_xe(gt);
1415 unsigned int n, totalvfs;
1416
1417 xe_gt_assert(gt, IS_SRIOV_PF(xe));
1418
1419 totalvfs = xe_sriov_pf_get_totalvfs(xe);
1420 for (n = 0; n <= totalvfs; n++) {
1421 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n);
1422
1423 init_completion(&cs->done);
1424 INIT_LIST_HEAD(&cs->link);
1425 }
1426
1427 spin_lock_init(>->sriov.pf.control.lock);
1428 INIT_LIST_HEAD(>->sriov.pf.control.list);
1429 INIT_WORK(>->sriov.pf.control.worker, control_worker_func);
1430
1431 return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt);
1432 }
1433
1434 /**
1435 * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset.
1436 * @gt: the &xe_gt
1437 *
1438 * Any per-VF status maintained by the PF or any ongoing VF control activity
1439 * performed by the PF must be reset or cancelled when the GT is reset.
1440 *
1441 * This function is for PF only.
1442 */
xe_gt_sriov_pf_control_restart(struct xe_gt * gt)1443 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt)
1444 {
1445 struct xe_device *xe = gt_to_xe(gt);
1446 unsigned int n, totalvfs;
1447
1448 xe_gt_assert(gt, IS_SRIOV_PF(xe));
1449
1450 pf_stop_worker(gt);
1451
1452 totalvfs = xe_sriov_pf_get_totalvfs(xe);
1453 for (n = 1; n <= totalvfs; n++)
1454 pf_enter_vf_ready(gt, n);
1455 }
1456