1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/netdevice.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/eswitch.h>
36 #include <linux/mlx5/vport.h>
37 #include "lib/devcom.h"
38 #include "mlx5_core.h"
39 #include "eswitch.h"
40 #include "lag.h"
41 #include "lag_mp.h"
42
43 /* General purpose, use for short periods of time.
44 * Beware of lock dependencies (preferably, no locks should be acquired
45 * under it).
46 */
47 static DEFINE_SPINLOCK(lag_lock);
48
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2,bool shared_fdb)49 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
50 u8 remap_port2, bool shared_fdb)
51 {
52 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
53 void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
54
55 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
56
57 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
58 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
59 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
60
61 return mlx5_cmd_exec_in(dev, create_lag, in);
62 }
63
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2)64 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
65 u8 remap_port2)
66 {
67 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
68 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
69
70 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
71 MLX5_SET(modify_lag_in, in, field_select, 0x1);
72
73 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
74 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
75
76 return mlx5_cmd_exec_in(dev, modify_lag, in);
77 }
78
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)79 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
80 {
81 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
82
83 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
84
85 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
86 }
87 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
88
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)89 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
90 {
91 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
92
93 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
94
95 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
96 }
97 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
98
99 static int mlx5_lag_netdev_event(struct notifier_block *this,
100 unsigned long event, void *ptr);
101 static void mlx5_do_bond_work(struct work_struct *work);
102
mlx5_ldev_free(struct kref * ref)103 static void mlx5_ldev_free(struct kref *ref)
104 {
105 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
106
107 if (ldev->nb.notifier_call)
108 unregister_netdevice_notifier_net(&init_net, &ldev->nb);
109 mlx5_lag_mp_cleanup(ldev);
110 cancel_delayed_work_sync(&ldev->bond_work);
111 destroy_workqueue(ldev->wq);
112 kfree(ldev);
113 }
114
mlx5_ldev_put(struct mlx5_lag * ldev)115 static void mlx5_ldev_put(struct mlx5_lag *ldev)
116 {
117 kref_put(&ldev->ref, mlx5_ldev_free);
118 }
119
mlx5_ldev_get(struct mlx5_lag * ldev)120 static void mlx5_ldev_get(struct mlx5_lag *ldev)
121 {
122 kref_get(&ldev->ref);
123 }
124
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)125 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
126 {
127 struct mlx5_lag *ldev;
128 int err;
129
130 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
131 if (!ldev)
132 return NULL;
133
134 ldev->wq = create_singlethread_workqueue("mlx5_lag");
135 if (!ldev->wq) {
136 kfree(ldev);
137 return NULL;
138 }
139
140 kref_init(&ldev->ref);
141 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
142
143 ldev->nb.notifier_call = mlx5_lag_netdev_event;
144 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
145 ldev->nb.notifier_call = NULL;
146 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
147 }
148
149 err = mlx5_lag_mp_init(ldev);
150 if (err)
151 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
152 err);
153
154 return ldev;
155 }
156
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)157 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
158 struct net_device *ndev)
159 {
160 int i;
161
162 for (i = 0; i < MLX5_MAX_PORTS; i++)
163 if (ldev->pf[i].netdev == ndev)
164 return i;
165
166 return -ENOENT;
167 }
168
__mlx5_lag_is_roce(struct mlx5_lag * ldev)169 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
170 {
171 return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
172 }
173
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)174 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
175 {
176 return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
177 }
178
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,u8 * port1,u8 * port2)179 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
180 u8 *port1, u8 *port2)
181 {
182 bool p1en;
183 bool p2en;
184
185 p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
186 tracker->netdev_state[MLX5_LAG_P1].link_up;
187
188 p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
189 tracker->netdev_state[MLX5_LAG_P2].link_up;
190
191 *port1 = 1;
192 *port2 = 2;
193 if ((!p1en && !p2en) || (p1en && p2en))
194 return;
195
196 if (p1en)
197 *port2 = 1;
198 else
199 *port1 = 2;
200 }
201
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)202 void mlx5_modify_lag(struct mlx5_lag *ldev,
203 struct lag_tracker *tracker)
204 {
205 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
206 u8 v2p_port1, v2p_port2;
207 int err;
208
209 mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
210 &v2p_port2);
211
212 if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
213 v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
214 ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
215 ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
216
217 mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
218 ldev->v2p_map[MLX5_LAG_P1],
219 ldev->v2p_map[MLX5_LAG_P2]);
220
221 err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
222 if (err)
223 mlx5_core_err(dev0,
224 "Failed to modify LAG (%d)\n",
225 err);
226 }
227 }
228
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,bool shared_fdb)229 static int mlx5_create_lag(struct mlx5_lag *ldev,
230 struct lag_tracker *tracker,
231 bool shared_fdb)
232 {
233 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
234 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
235 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
236 int err;
237
238 mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
239 &ldev->v2p_map[MLX5_LAG_P2]);
240
241 mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
242 ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
243 shared_fdb);
244
245 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
246 ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
247 if (err) {
248 mlx5_core_err(dev0,
249 "Failed to create LAG (%d)\n",
250 err);
251 return err;
252 }
253
254 if (shared_fdb) {
255 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
256 dev1->priv.eswitch);
257 if (err)
258 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
259 else
260 mlx5_core_info(dev0, "Operation mode is single FDB\n");
261 }
262
263 if (err) {
264 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
265 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
266 mlx5_core_err(dev0,
267 "Failed to deactivate RoCE LAG; driver restart required\n");
268 }
269
270 return err;
271 }
272
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,u8 flags,bool shared_fdb)273 int mlx5_activate_lag(struct mlx5_lag *ldev,
274 struct lag_tracker *tracker,
275 u8 flags,
276 bool shared_fdb)
277 {
278 bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
279 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
280 int err;
281
282 err = mlx5_create_lag(ldev, tracker, shared_fdb);
283 if (err) {
284 if (roce_lag) {
285 mlx5_core_err(dev0,
286 "Failed to activate RoCE LAG\n");
287 } else {
288 mlx5_core_err(dev0,
289 "Failed to activate VF LAG\n"
290 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
291 }
292 return err;
293 }
294
295 ldev->flags |= flags;
296 ldev->shared_fdb = shared_fdb;
297 return 0;
298 }
299
mlx5_deactivate_lag(struct mlx5_lag * ldev)300 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
301 {
302 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
303 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
304 bool roce_lag = __mlx5_lag_is_roce(ldev);
305 int err;
306
307 ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
308 mlx5_lag_mp_reset(ldev);
309
310 if (ldev->shared_fdb) {
311 mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
312 ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
313 ldev->shared_fdb = false;
314 }
315
316 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
317 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
318 if (err) {
319 if (roce_lag) {
320 mlx5_core_err(dev0,
321 "Failed to deactivate RoCE LAG; driver restart required\n");
322 } else {
323 mlx5_core_err(dev0,
324 "Failed to deactivate VF LAG; driver restart required\n"
325 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
326 }
327 }
328
329 return err;
330 }
331
mlx5_lag_check_prereq(struct mlx5_lag * ldev)332 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
333 {
334 if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
335 return false;
336
337 #ifdef CONFIG_MLX5_ESWITCH
338 return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
339 ldev->pf[MLX5_LAG_P2].dev);
340 #else
341 return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
342 !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
343 #endif
344 }
345
mlx5_lag_add_devices(struct mlx5_lag * ldev)346 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
347 {
348 int i;
349
350 for (i = 0; i < MLX5_MAX_PORTS; i++) {
351 if (!ldev->pf[i].dev)
352 continue;
353
354 if (ldev->pf[i].dev->priv.flags &
355 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
356 continue;
357
358 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
359 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
360 }
361 }
362
mlx5_lag_remove_devices(struct mlx5_lag * ldev)363 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
364 {
365 int i;
366
367 for (i = 0; i < MLX5_MAX_PORTS; i++) {
368 if (!ldev->pf[i].dev)
369 continue;
370
371 if (ldev->pf[i].dev->priv.flags &
372 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
373 continue;
374
375 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
376 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
377 }
378 }
379
mlx5_disable_lag(struct mlx5_lag * ldev)380 static void mlx5_disable_lag(struct mlx5_lag *ldev)
381 {
382 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
383 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
384 bool shared_fdb = ldev->shared_fdb;
385 bool roce_lag;
386 int err;
387
388 roce_lag = __mlx5_lag_is_roce(ldev);
389
390 if (shared_fdb) {
391 mlx5_lag_remove_devices(ldev);
392 } else if (roce_lag) {
393 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
394 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
395 mlx5_rescan_drivers_locked(dev0);
396 }
397 mlx5_nic_vport_disable_roce(dev1);
398 }
399
400 err = mlx5_deactivate_lag(ldev);
401 if (err)
402 return;
403
404 if (shared_fdb || roce_lag)
405 mlx5_lag_add_devices(ldev);
406
407 if (shared_fdb) {
408 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
409 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
410 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
411 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
412 }
413 }
414
mlx5_shared_fdb_supported(struct mlx5_lag * ldev)415 static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
416 {
417 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
418 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
419
420 if (is_mdev_switchdev_mode(dev0) &&
421 is_mdev_switchdev_mode(dev1) &&
422 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
423 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
424 mlx5_devcom_is_paired(dev0->priv.devcom,
425 MLX5_DEVCOM_ESW_OFFLOADS) &&
426 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
427 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
428 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
429 return true;
430
431 return false;
432 }
433
mlx5_do_bond(struct mlx5_lag * ldev)434 static void mlx5_do_bond(struct mlx5_lag *ldev)
435 {
436 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
437 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
438 struct lag_tracker tracker = { };
439 bool do_bond, roce_lag;
440 int err;
441
442 if (!mlx5_lag_is_ready(ldev)) {
443 do_bond = false;
444 } else {
445 /* VF LAG is in multipath mode, ignore bond change requests */
446 if (mlx5_lag_is_multipath(dev0))
447 return;
448
449 tracker = ldev->tracker;
450
451 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
452 }
453
454 if (do_bond && !__mlx5_lag_is_active(ldev)) {
455 bool shared_fdb = mlx5_shared_fdb_supported(ldev);
456
457 roce_lag = !mlx5_sriov_is_enabled(dev0) &&
458 !mlx5_sriov_is_enabled(dev1);
459
460 #ifdef CONFIG_MLX5_ESWITCH
461 roce_lag = roce_lag &&
462 dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
463 dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
464 #endif
465
466 if (shared_fdb || roce_lag)
467 mlx5_lag_remove_devices(ldev);
468
469 err = mlx5_activate_lag(ldev, &tracker,
470 roce_lag ? MLX5_LAG_FLAG_ROCE :
471 MLX5_LAG_FLAG_SRIOV,
472 shared_fdb);
473 if (err) {
474 if (shared_fdb || roce_lag)
475 mlx5_lag_add_devices(ldev);
476
477 return;
478 } else if (roce_lag) {
479 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
480 mlx5_rescan_drivers_locked(dev0);
481 mlx5_nic_vport_enable_roce(dev1);
482 } else if (shared_fdb) {
483 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
484 mlx5_rescan_drivers_locked(dev0);
485
486 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
487 if (!err)
488 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
489
490 if (err) {
491 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
492 mlx5_rescan_drivers_locked(dev0);
493 mlx5_deactivate_lag(ldev);
494 mlx5_lag_add_devices(ldev);
495 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
496 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
497 mlx5_core_err(dev0, "Failed to enable lag\n");
498 return;
499 }
500 }
501 } else if (do_bond && __mlx5_lag_is_active(ldev)) {
502 mlx5_modify_lag(ldev, &tracker);
503 } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
504 mlx5_disable_lag(ldev);
505 }
506 }
507
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)508 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
509 {
510 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
511 }
512
mlx5_lag_lock_eswitches(struct mlx5_core_dev * dev0,struct mlx5_core_dev * dev1)513 static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
514 struct mlx5_core_dev *dev1)
515 {
516 if (dev0)
517 mlx5_esw_lock(dev0->priv.eswitch);
518 if (dev1)
519 mlx5_esw_lock(dev1->priv.eswitch);
520 }
521
mlx5_lag_unlock_eswitches(struct mlx5_core_dev * dev0,struct mlx5_core_dev * dev1)522 static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
523 struct mlx5_core_dev *dev1)
524 {
525 if (dev1)
526 mlx5_esw_unlock(dev1->priv.eswitch);
527 if (dev0)
528 mlx5_esw_unlock(dev0->priv.eswitch);
529 }
530
mlx5_do_bond_work(struct work_struct * work)531 static void mlx5_do_bond_work(struct work_struct *work)
532 {
533 struct delayed_work *delayed_work = to_delayed_work(work);
534 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
535 bond_work);
536 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
537 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
538 int status;
539
540 status = mlx5_dev_list_trylock();
541 if (!status) {
542 mlx5_queue_bond_work(ldev, HZ);
543 return;
544 }
545
546 if (ldev->mode_changes_in_progress) {
547 mlx5_dev_list_unlock();
548 mlx5_queue_bond_work(ldev, HZ);
549 return;
550 }
551
552 mlx5_lag_lock_eswitches(dev0, dev1);
553 mlx5_do_bond(ldev);
554 mlx5_lag_unlock_eswitches(dev0, dev1);
555 mlx5_dev_list_unlock();
556 }
557
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changeupper_info * info)558 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
559 struct lag_tracker *tracker,
560 struct net_device *ndev,
561 struct netdev_notifier_changeupper_info *info)
562 {
563 struct net_device *upper = info->upper_dev, *ndev_tmp;
564 struct netdev_lag_upper_info *lag_upper_info = NULL;
565 bool is_bonded, is_in_lag, mode_supported;
566 int bond_status = 0;
567 int num_slaves = 0;
568 int changed = 0;
569 int idx;
570
571 if (!netif_is_lag_master(upper))
572 return 0;
573
574 if (info->linking)
575 lag_upper_info = info->upper_info;
576
577 /* The event may still be of interest if the slave does not belong to
578 * us, but is enslaved to a master which has one or more of our netdevs
579 * as slaves (e.g., if a new slave is added to a master that bonds two
580 * of our netdevs, we should unbond).
581 */
582 rcu_read_lock();
583 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
584 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
585 if (idx >= 0)
586 bond_status |= (1 << idx);
587
588 num_slaves++;
589 }
590 rcu_read_unlock();
591
592 /* None of this lagdev's netdevs are slaves of this master. */
593 if (!(bond_status & 0x3))
594 return 0;
595
596 if (lag_upper_info)
597 tracker->tx_type = lag_upper_info->tx_type;
598
599 /* Determine bonding status:
600 * A device is considered bonded if both its physical ports are slaves
601 * of the same lag master, and only them.
602 */
603 is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
604
605 /* Lag mode must be activebackup or hash. */
606 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
607 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
608
609 is_bonded = is_in_lag && mode_supported;
610 if (tracker->is_bonded != is_bonded) {
611 tracker->is_bonded = is_bonded;
612 changed = 1;
613 }
614
615 if (!is_in_lag)
616 return changed;
617
618 if (!mlx5_lag_is_ready(ldev))
619 NL_SET_ERR_MSG_MOD(info->info.extack,
620 "Can't activate LAG offload, PF is configured with more than 64 VFs");
621 else if (!mode_supported)
622 NL_SET_ERR_MSG_MOD(info->info.extack,
623 "Can't activate LAG offload, TX type isn't supported");
624
625 return changed;
626 }
627
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)628 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
629 struct lag_tracker *tracker,
630 struct net_device *ndev,
631 struct netdev_notifier_changelowerstate_info *info)
632 {
633 struct netdev_lag_lower_state_info *lag_lower_info;
634 int idx;
635
636 if (!netif_is_lag_port(ndev))
637 return 0;
638
639 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
640 if (idx < 0)
641 return 0;
642
643 /* This information is used to determine virtual to physical
644 * port mapping.
645 */
646 lag_lower_info = info->lower_state_info;
647 if (!lag_lower_info)
648 return 0;
649
650 tracker->netdev_state[idx] = *lag_lower_info;
651
652 return 1;
653 }
654
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)655 static int mlx5_lag_netdev_event(struct notifier_block *this,
656 unsigned long event, void *ptr)
657 {
658 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
659 struct lag_tracker tracker;
660 struct mlx5_lag *ldev;
661 int changed = 0;
662
663 if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
664 return NOTIFY_DONE;
665
666 ldev = container_of(this, struct mlx5_lag, nb);
667
668 tracker = ldev->tracker;
669
670 switch (event) {
671 case NETDEV_CHANGEUPPER:
672 changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
673 ptr);
674 break;
675 case NETDEV_CHANGELOWERSTATE:
676 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
677 ndev, ptr);
678 break;
679 }
680
681 ldev->tracker = tracker;
682
683 if (changed)
684 mlx5_queue_bond_work(ldev, 0);
685
686 return NOTIFY_DONE;
687 }
688
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)689 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
690 struct mlx5_core_dev *dev,
691 struct net_device *netdev)
692 {
693 unsigned int fn = PCI_FUNC(dev->pdev->devfn);
694 unsigned long flags;
695
696 if (fn >= MLX5_MAX_PORTS)
697 return;
698
699 spin_lock_irqsave(&lag_lock, flags);
700 ldev->pf[fn].netdev = netdev;
701 ldev->tracker.netdev_state[fn].link_up = 0;
702 ldev->tracker.netdev_state[fn].tx_enabled = 0;
703 spin_unlock_irqrestore(&lag_lock, flags);
704 }
705
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)706 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
707 struct net_device *netdev)
708 {
709 unsigned long flags;
710 int i;
711
712 spin_lock_irqsave(&lag_lock, flags);
713 for (i = 0; i < MLX5_MAX_PORTS; i++) {
714 if (ldev->pf[i].netdev == netdev) {
715 ldev->pf[i].netdev = NULL;
716 break;
717 }
718 }
719 spin_unlock_irqrestore(&lag_lock, flags);
720 }
721
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)722 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
723 struct mlx5_core_dev *dev)
724 {
725 unsigned int fn = PCI_FUNC(dev->pdev->devfn);
726
727 if (fn >= MLX5_MAX_PORTS)
728 return;
729
730 ldev->pf[fn].dev = dev;
731 dev->priv.lag = ldev;
732 }
733
734 /* Must be called with intf_mutex held */
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)735 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
736 struct mlx5_core_dev *dev)
737 {
738 int i;
739
740 for (i = 0; i < MLX5_MAX_PORTS; i++)
741 if (ldev->pf[i].dev == dev)
742 break;
743
744 if (i == MLX5_MAX_PORTS)
745 return;
746
747 ldev->pf[i].dev = NULL;
748 dev->priv.lag = NULL;
749 }
750
751 /* Must be called with intf_mutex held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)752 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
753 {
754 struct mlx5_lag *ldev = NULL;
755 struct mlx5_core_dev *tmp_dev;
756
757 tmp_dev = mlx5_get_next_phys_dev_lag(dev);
758 if (tmp_dev)
759 ldev = tmp_dev->priv.lag;
760
761 if (!ldev) {
762 ldev = mlx5_lag_dev_alloc(dev);
763 if (!ldev) {
764 mlx5_core_err(dev, "Failed to alloc lag dev\n");
765 return 0;
766 }
767 } else {
768 if (ldev->mode_changes_in_progress)
769 return -EAGAIN;
770 mlx5_ldev_get(ldev);
771 }
772
773 mlx5_ldev_add_mdev(ldev, dev);
774
775 return 0;
776 }
777
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)778 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
779 {
780 struct mlx5_lag *ldev;
781
782 ldev = mlx5_lag_dev(dev);
783 if (!ldev)
784 return;
785
786 recheck:
787 mlx5_dev_list_lock();
788 if (ldev->mode_changes_in_progress) {
789 mlx5_dev_list_unlock();
790 msleep(100);
791 goto recheck;
792 }
793 mlx5_ldev_remove_mdev(ldev, dev);
794 mlx5_dev_list_unlock();
795 mlx5_ldev_put(ldev);
796 }
797
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)798 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
799 {
800 int err;
801
802 if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
803 !MLX5_CAP_GEN(dev, lag_master) ||
804 MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
805 return;
806
807 recheck:
808 mlx5_dev_list_lock();
809 err = __mlx5_lag_dev_add_mdev(dev);
810 if (err) {
811 mlx5_dev_list_unlock();
812 msleep(100);
813 goto recheck;
814 }
815 mlx5_dev_list_unlock();
816 }
817
818 /* Must be called with intf_mutex held */
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)819 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
820 struct net_device *netdev)
821 {
822 struct mlx5_lag *ldev;
823
824 ldev = mlx5_lag_dev(dev);
825 if (!ldev)
826 return;
827
828 mlx5_ldev_remove_netdev(ldev, netdev);
829 ldev->flags &= ~MLX5_LAG_FLAG_READY;
830
831 if (__mlx5_lag_is_active(ldev))
832 mlx5_queue_bond_work(ldev, 0);
833 }
834
835 /* Must be called with intf_mutex held */
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)836 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
837 struct net_device *netdev)
838 {
839 struct mlx5_lag *ldev;
840 int i;
841
842 ldev = mlx5_lag_dev(dev);
843 if (!ldev)
844 return;
845
846 mlx5_ldev_add_netdev(ldev, dev, netdev);
847
848 for (i = 0; i < MLX5_MAX_PORTS; i++)
849 if (!ldev->pf[i].dev)
850 break;
851
852 if (i >= MLX5_MAX_PORTS)
853 ldev->flags |= MLX5_LAG_FLAG_READY;
854 mlx5_queue_bond_work(ldev, 0);
855 }
856
mlx5_lag_is_roce(struct mlx5_core_dev * dev)857 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
858 {
859 struct mlx5_lag *ldev;
860 unsigned long flags;
861 bool res;
862
863 spin_lock_irqsave(&lag_lock, flags);
864 ldev = mlx5_lag_dev(dev);
865 res = ldev && __mlx5_lag_is_roce(ldev);
866 spin_unlock_irqrestore(&lag_lock, flags);
867
868 return res;
869 }
870 EXPORT_SYMBOL(mlx5_lag_is_roce);
871
mlx5_lag_is_active(struct mlx5_core_dev * dev)872 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
873 {
874 struct mlx5_lag *ldev;
875 unsigned long flags;
876 bool res;
877
878 spin_lock_irqsave(&lag_lock, flags);
879 ldev = mlx5_lag_dev(dev);
880 res = ldev && __mlx5_lag_is_active(ldev);
881 spin_unlock_irqrestore(&lag_lock, flags);
882
883 return res;
884 }
885 EXPORT_SYMBOL(mlx5_lag_is_active);
886
mlx5_lag_is_master(struct mlx5_core_dev * dev)887 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
888 {
889 struct mlx5_lag *ldev;
890 unsigned long flags;
891 bool res;
892
893 spin_lock_irqsave(&lag_lock, flags);
894 ldev = mlx5_lag_dev(dev);
895 res = ldev && __mlx5_lag_is_active(ldev) &&
896 dev == ldev->pf[MLX5_LAG_P1].dev;
897 spin_unlock_irqrestore(&lag_lock, flags);
898
899 return res;
900 }
901 EXPORT_SYMBOL(mlx5_lag_is_master);
902
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)903 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
904 {
905 struct mlx5_lag *ldev;
906 unsigned long flags;
907 bool res;
908
909 spin_lock_irqsave(&lag_lock, flags);
910 ldev = mlx5_lag_dev(dev);
911 res = ldev && __mlx5_lag_is_sriov(ldev);
912 spin_unlock_irqrestore(&lag_lock, flags);
913
914 return res;
915 }
916 EXPORT_SYMBOL(mlx5_lag_is_sriov);
917
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)918 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
919 {
920 struct mlx5_lag *ldev;
921 unsigned long flags;
922 bool res;
923
924 spin_lock_irqsave(&lag_lock, flags);
925 ldev = mlx5_lag_dev(dev);
926 res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
927 spin_unlock_irqrestore(&lag_lock, flags);
928
929 return res;
930 }
931 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
932
mlx5_lag_disable_change(struct mlx5_core_dev * dev)933 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
934 {
935 struct mlx5_core_dev *dev0;
936 struct mlx5_core_dev *dev1;
937 struct mlx5_lag *ldev;
938
939 ldev = mlx5_lag_dev(dev);
940 if (!ldev)
941 return;
942
943 mlx5_dev_list_lock();
944
945 dev0 = ldev->pf[MLX5_LAG_P1].dev;
946 dev1 = ldev->pf[MLX5_LAG_P2].dev;
947
948 ldev->mode_changes_in_progress++;
949 if (__mlx5_lag_is_active(ldev)) {
950 mlx5_lag_lock_eswitches(dev0, dev1);
951 mlx5_disable_lag(ldev);
952 mlx5_lag_unlock_eswitches(dev0, dev1);
953 }
954 mlx5_dev_list_unlock();
955 }
956
mlx5_lag_enable_change(struct mlx5_core_dev * dev)957 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
958 {
959 struct mlx5_lag *ldev;
960
961 ldev = mlx5_lag_dev(dev);
962 if (!ldev)
963 return;
964
965 mlx5_dev_list_lock();
966 ldev->mode_changes_in_progress--;
967 mlx5_dev_list_unlock();
968 mlx5_queue_bond_work(ldev, 0);
969 }
970
mlx5_lag_get_roce_netdev(struct mlx5_core_dev * dev)971 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
972 {
973 struct net_device *ndev = NULL;
974 struct mlx5_lag *ldev;
975 unsigned long flags;
976
977 spin_lock_irqsave(&lag_lock, flags);
978 ldev = mlx5_lag_dev(dev);
979
980 if (!(ldev && __mlx5_lag_is_roce(ldev)))
981 goto unlock;
982
983 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
984 ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
985 ldev->pf[MLX5_LAG_P1].netdev :
986 ldev->pf[MLX5_LAG_P2].netdev;
987 } else {
988 ndev = ldev->pf[MLX5_LAG_P1].netdev;
989 }
990 if (ndev)
991 dev_hold(ndev);
992
993 unlock:
994 spin_unlock_irqrestore(&lag_lock, flags);
995
996 return ndev;
997 }
998 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
999
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1000 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1001 struct net_device *slave)
1002 {
1003 struct mlx5_lag *ldev;
1004 unsigned long flags;
1005 u8 port = 0;
1006
1007 spin_lock_irqsave(&lag_lock, flags);
1008 ldev = mlx5_lag_dev(dev);
1009 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1010 goto unlock;
1011
1012 if (ldev->pf[MLX5_LAG_P1].netdev == slave)
1013 port = MLX5_LAG_P1;
1014 else
1015 port = MLX5_LAG_P2;
1016
1017 port = ldev->v2p_map[port];
1018
1019 unlock:
1020 spin_unlock_irqrestore(&lag_lock, flags);
1021 return port;
1022 }
1023 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1024
mlx5_lag_get_peer_mdev(struct mlx5_core_dev * dev)1025 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1026 {
1027 struct mlx5_core_dev *peer_dev = NULL;
1028 struct mlx5_lag *ldev;
1029 unsigned long flags;
1030
1031 spin_lock_irqsave(&lag_lock, flags);
1032 ldev = mlx5_lag_dev(dev);
1033 if (!ldev)
1034 goto unlock;
1035
1036 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1037 ldev->pf[MLX5_LAG_P2].dev :
1038 ldev->pf[MLX5_LAG_P1].dev;
1039
1040 unlock:
1041 spin_unlock_irqrestore(&lag_lock, flags);
1042 return peer_dev;
1043 }
1044 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1045
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1046 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1047 u64 *values,
1048 int num_counters,
1049 size_t *offsets)
1050 {
1051 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1052 struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
1053 struct mlx5_lag *ldev;
1054 unsigned long flags;
1055 int num_ports;
1056 int ret, i, j;
1057 void *out;
1058
1059 out = kvzalloc(outlen, GFP_KERNEL);
1060 if (!out)
1061 return -ENOMEM;
1062
1063 memset(values, 0, sizeof(*values) * num_counters);
1064
1065 spin_lock_irqsave(&lag_lock, flags);
1066 ldev = mlx5_lag_dev(dev);
1067 if (ldev && __mlx5_lag_is_active(ldev)) {
1068 num_ports = MLX5_MAX_PORTS;
1069 mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
1070 mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
1071 } else {
1072 num_ports = 1;
1073 mdev[MLX5_LAG_P1] = dev;
1074 }
1075 spin_unlock_irqrestore(&lag_lock, flags);
1076
1077 for (i = 0; i < num_ports; ++i) {
1078 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1079
1080 MLX5_SET(query_cong_statistics_in, in, opcode,
1081 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1082 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1083 out);
1084 if (ret)
1085 goto free;
1086
1087 for (j = 0; j < num_counters; ++j)
1088 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1089 }
1090
1091 free:
1092 kvfree(out);
1093 return ret;
1094 }
1095 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1096