• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/eswitch.h>
36 #include <linux/mlx5/vport.h>
37 #include "lib/devcom.h"
38 #include "mlx5_core.h"
39 #include "eswitch.h"
40 #include "lag.h"
41 #include "lag_mp.h"
42 
43 /* General purpose, use for short periods of time.
44  * Beware of lock dependencies (preferably, no locks should be acquired
45  * under it).
46  */
47 static DEFINE_SPINLOCK(lag_lock);
48 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2,bool shared_fdb)49 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
50 			       u8 remap_port2, bool shared_fdb)
51 {
52 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
53 	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
54 
55 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
56 
57 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
58 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
59 	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
60 
61 	return mlx5_cmd_exec_in(dev, create_lag, in);
62 }
63 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2)64 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
65 			       u8 remap_port2)
66 {
67 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
68 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
69 
70 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
71 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
72 
73 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
74 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
75 
76 	return mlx5_cmd_exec_in(dev, modify_lag, in);
77 }
78 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)79 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
80 {
81 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
82 
83 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
84 
85 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
86 }
87 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
88 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)89 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
90 {
91 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
92 
93 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
94 
95 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
96 }
97 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
98 
99 static int mlx5_lag_netdev_event(struct notifier_block *this,
100 				 unsigned long event, void *ptr);
101 static void mlx5_do_bond_work(struct work_struct *work);
102 
mlx5_ldev_free(struct kref * ref)103 static void mlx5_ldev_free(struct kref *ref)
104 {
105 	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
106 
107 	if (ldev->nb.notifier_call)
108 		unregister_netdevice_notifier_net(&init_net, &ldev->nb);
109 	mlx5_lag_mp_cleanup(ldev);
110 	cancel_delayed_work_sync(&ldev->bond_work);
111 	destroy_workqueue(ldev->wq);
112 	kfree(ldev);
113 }
114 
mlx5_ldev_put(struct mlx5_lag * ldev)115 static void mlx5_ldev_put(struct mlx5_lag *ldev)
116 {
117 	kref_put(&ldev->ref, mlx5_ldev_free);
118 }
119 
mlx5_ldev_get(struct mlx5_lag * ldev)120 static void mlx5_ldev_get(struct mlx5_lag *ldev)
121 {
122 	kref_get(&ldev->ref);
123 }
124 
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)125 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
126 {
127 	struct mlx5_lag *ldev;
128 	int err;
129 
130 	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
131 	if (!ldev)
132 		return NULL;
133 
134 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
135 	if (!ldev->wq) {
136 		kfree(ldev);
137 		return NULL;
138 	}
139 
140 	kref_init(&ldev->ref);
141 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
142 
143 	ldev->nb.notifier_call = mlx5_lag_netdev_event;
144 	if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
145 		ldev->nb.notifier_call = NULL;
146 		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
147 	}
148 
149 	err = mlx5_lag_mp_init(ldev);
150 	if (err)
151 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
152 			      err);
153 
154 	return ldev;
155 }
156 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)157 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
158 				struct net_device *ndev)
159 {
160 	int i;
161 
162 	for (i = 0; i < MLX5_MAX_PORTS; i++)
163 		if (ldev->pf[i].netdev == ndev)
164 			return i;
165 
166 	return -ENOENT;
167 }
168 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)169 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
170 {
171 	return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
172 }
173 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)174 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
175 {
176 	return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
177 }
178 
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,u8 * port1,u8 * port2)179 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
180 					   u8 *port1, u8 *port2)
181 {
182 	bool p1en;
183 	bool p2en;
184 
185 	p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
186 	       tracker->netdev_state[MLX5_LAG_P1].link_up;
187 
188 	p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
189 	       tracker->netdev_state[MLX5_LAG_P2].link_up;
190 
191 	*port1 = 1;
192 	*port2 = 2;
193 	if ((!p1en && !p2en) || (p1en && p2en))
194 		return;
195 
196 	if (p1en)
197 		*port2 = 1;
198 	else
199 		*port1 = 2;
200 }
201 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)202 void mlx5_modify_lag(struct mlx5_lag *ldev,
203 		     struct lag_tracker *tracker)
204 {
205 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
206 	u8 v2p_port1, v2p_port2;
207 	int err;
208 
209 	mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
210 				       &v2p_port2);
211 
212 	if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
213 	    v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
214 		ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
215 		ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
216 
217 		mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
218 			       ldev->v2p_map[MLX5_LAG_P1],
219 			       ldev->v2p_map[MLX5_LAG_P2]);
220 
221 		err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
222 		if (err)
223 			mlx5_core_err(dev0,
224 				      "Failed to modify LAG (%d)\n",
225 				      err);
226 	}
227 }
228 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,bool shared_fdb)229 static int mlx5_create_lag(struct mlx5_lag *ldev,
230 			   struct lag_tracker *tracker,
231 			   bool shared_fdb)
232 {
233 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
234 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
235 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
236 	int err;
237 
238 	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
239 				       &ldev->v2p_map[MLX5_LAG_P2]);
240 
241 	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
242 		       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
243 		       shared_fdb);
244 
245 	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
246 				  ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
247 	if (err) {
248 		mlx5_core_err(dev0,
249 			      "Failed to create LAG (%d)\n",
250 			      err);
251 		return err;
252 	}
253 
254 	if (shared_fdb) {
255 		err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
256 							      dev1->priv.eswitch);
257 		if (err)
258 			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
259 		else
260 			mlx5_core_info(dev0, "Operation mode is single FDB\n");
261 	}
262 
263 	if (err) {
264 		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
265 		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
266 			mlx5_core_err(dev0,
267 				      "Failed to deactivate RoCE LAG; driver restart required\n");
268 	}
269 
270 	return err;
271 }
272 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,u8 flags,bool shared_fdb)273 int mlx5_activate_lag(struct mlx5_lag *ldev,
274 		      struct lag_tracker *tracker,
275 		      u8 flags,
276 		      bool shared_fdb)
277 {
278 	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
279 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
280 	int err;
281 
282 	err = mlx5_create_lag(ldev, tracker, shared_fdb);
283 	if (err) {
284 		if (roce_lag) {
285 			mlx5_core_err(dev0,
286 				      "Failed to activate RoCE LAG\n");
287 		} else {
288 			mlx5_core_err(dev0,
289 				      "Failed to activate VF LAG\n"
290 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
291 		}
292 		return err;
293 	}
294 
295 	ldev->flags |= flags;
296 	ldev->shared_fdb = shared_fdb;
297 	return 0;
298 }
299 
mlx5_deactivate_lag(struct mlx5_lag * ldev)300 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
301 {
302 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
303 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
304 	bool roce_lag = __mlx5_lag_is_roce(ldev);
305 	int err;
306 
307 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
308 	mlx5_lag_mp_reset(ldev);
309 
310 	if (ldev->shared_fdb) {
311 		mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
312 							 ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
313 		ldev->shared_fdb = false;
314 	}
315 
316 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
317 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
318 	if (err) {
319 		if (roce_lag) {
320 			mlx5_core_err(dev0,
321 				      "Failed to deactivate RoCE LAG; driver restart required\n");
322 		} else {
323 			mlx5_core_err(dev0,
324 				      "Failed to deactivate VF LAG; driver restart required\n"
325 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
326 		}
327 	}
328 
329 	return err;
330 }
331 
mlx5_lag_check_prereq(struct mlx5_lag * ldev)332 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
333 {
334 	if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
335 		return false;
336 
337 #ifdef CONFIG_MLX5_ESWITCH
338 	return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
339 				   ldev->pf[MLX5_LAG_P2].dev);
340 #else
341 	return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
342 		!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
343 #endif
344 }
345 
mlx5_lag_add_devices(struct mlx5_lag * ldev)346 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
347 {
348 	int i;
349 
350 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
351 		if (!ldev->pf[i].dev)
352 			continue;
353 
354 		if (ldev->pf[i].dev->priv.flags &
355 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
356 			continue;
357 
358 		ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
359 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
360 	}
361 }
362 
mlx5_lag_remove_devices(struct mlx5_lag * ldev)363 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
364 {
365 	int i;
366 
367 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
368 		if (!ldev->pf[i].dev)
369 			continue;
370 
371 		if (ldev->pf[i].dev->priv.flags &
372 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
373 			continue;
374 
375 		ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
376 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
377 	}
378 }
379 
mlx5_disable_lag(struct mlx5_lag * ldev)380 static void mlx5_disable_lag(struct mlx5_lag *ldev)
381 {
382 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
383 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
384 	bool shared_fdb = ldev->shared_fdb;
385 	bool roce_lag;
386 	int err;
387 
388 	roce_lag = __mlx5_lag_is_roce(ldev);
389 
390 	if (shared_fdb) {
391 		mlx5_lag_remove_devices(ldev);
392 	} else if (roce_lag) {
393 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
394 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
395 			mlx5_rescan_drivers_locked(dev0);
396 		}
397 		mlx5_nic_vport_disable_roce(dev1);
398 	}
399 
400 	err = mlx5_deactivate_lag(ldev);
401 	if (err)
402 		return;
403 
404 	if (shared_fdb || roce_lag)
405 		mlx5_lag_add_devices(ldev);
406 
407 	if (shared_fdb) {
408 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
409 			mlx5_eswitch_reload_reps(dev0->priv.eswitch);
410 		if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
411 			mlx5_eswitch_reload_reps(dev1->priv.eswitch);
412 	}
413 }
414 
mlx5_shared_fdb_supported(struct mlx5_lag * ldev)415 static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
416 {
417 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
418 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
419 
420 	if (is_mdev_switchdev_mode(dev0) &&
421 	    is_mdev_switchdev_mode(dev1) &&
422 	    mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
423 	    mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
424 	    mlx5_devcom_is_paired(dev0->priv.devcom,
425 				  MLX5_DEVCOM_ESW_OFFLOADS) &&
426 	    MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
427 	    MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
428 	    MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
429 		return true;
430 
431 	return false;
432 }
433 
mlx5_do_bond(struct mlx5_lag * ldev)434 static void mlx5_do_bond(struct mlx5_lag *ldev)
435 {
436 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
437 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
438 	struct lag_tracker tracker = { };
439 	bool do_bond, roce_lag;
440 	int err;
441 
442 	if (!mlx5_lag_is_ready(ldev)) {
443 		do_bond = false;
444 	} else {
445 		/* VF LAG is in multipath mode, ignore bond change requests */
446 		if (mlx5_lag_is_multipath(dev0))
447 			return;
448 
449 		tracker = ldev->tracker;
450 
451 		do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
452 	}
453 
454 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
455 		bool shared_fdb = mlx5_shared_fdb_supported(ldev);
456 
457 		roce_lag = !mlx5_sriov_is_enabled(dev0) &&
458 			   !mlx5_sriov_is_enabled(dev1);
459 
460 #ifdef CONFIG_MLX5_ESWITCH
461 		roce_lag = roce_lag &&
462 			   dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
463 			   dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
464 #endif
465 
466 		if (shared_fdb || roce_lag)
467 			mlx5_lag_remove_devices(ldev);
468 
469 		err = mlx5_activate_lag(ldev, &tracker,
470 					roce_lag ? MLX5_LAG_FLAG_ROCE :
471 						   MLX5_LAG_FLAG_SRIOV,
472 					shared_fdb);
473 		if (err) {
474 			if (shared_fdb || roce_lag)
475 				mlx5_lag_add_devices(ldev);
476 
477 			return;
478 		} else if (roce_lag) {
479 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
480 			mlx5_rescan_drivers_locked(dev0);
481 			mlx5_nic_vport_enable_roce(dev1);
482 		} else if (shared_fdb) {
483 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
484 			mlx5_rescan_drivers_locked(dev0);
485 
486 			err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
487 			if (!err)
488 				err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
489 
490 			if (err) {
491 				dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
492 				mlx5_rescan_drivers_locked(dev0);
493 				mlx5_deactivate_lag(ldev);
494 				mlx5_lag_add_devices(ldev);
495 				mlx5_eswitch_reload_reps(dev0->priv.eswitch);
496 				mlx5_eswitch_reload_reps(dev1->priv.eswitch);
497 				mlx5_core_err(dev0, "Failed to enable lag\n");
498 				return;
499 			}
500 		}
501 	} else if (do_bond && __mlx5_lag_is_active(ldev)) {
502 		mlx5_modify_lag(ldev, &tracker);
503 	} else if (!do_bond && __mlx5_lag_is_active(ldev)) {
504 		mlx5_disable_lag(ldev);
505 	}
506 }
507 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)508 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
509 {
510 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
511 }
512 
mlx5_lag_lock_eswitches(struct mlx5_core_dev * dev0,struct mlx5_core_dev * dev1)513 static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
514 				    struct mlx5_core_dev *dev1)
515 {
516 	if (dev0)
517 		mlx5_esw_lock(dev0->priv.eswitch);
518 	if (dev1)
519 		mlx5_esw_lock(dev1->priv.eswitch);
520 }
521 
mlx5_lag_unlock_eswitches(struct mlx5_core_dev * dev0,struct mlx5_core_dev * dev1)522 static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
523 				      struct mlx5_core_dev *dev1)
524 {
525 	if (dev1)
526 		mlx5_esw_unlock(dev1->priv.eswitch);
527 	if (dev0)
528 		mlx5_esw_unlock(dev0->priv.eswitch);
529 }
530 
mlx5_do_bond_work(struct work_struct * work)531 static void mlx5_do_bond_work(struct work_struct *work)
532 {
533 	struct delayed_work *delayed_work = to_delayed_work(work);
534 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
535 					     bond_work);
536 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
537 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
538 	int status;
539 
540 	status = mlx5_dev_list_trylock();
541 	if (!status) {
542 		mlx5_queue_bond_work(ldev, HZ);
543 		return;
544 	}
545 
546 	if (ldev->mode_changes_in_progress) {
547 		mlx5_dev_list_unlock();
548 		mlx5_queue_bond_work(ldev, HZ);
549 		return;
550 	}
551 
552 	mlx5_lag_lock_eswitches(dev0, dev1);
553 	mlx5_do_bond(ldev);
554 	mlx5_lag_unlock_eswitches(dev0, dev1);
555 	mlx5_dev_list_unlock();
556 }
557 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changeupper_info * info)558 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
559 					 struct lag_tracker *tracker,
560 					 struct net_device *ndev,
561 					 struct netdev_notifier_changeupper_info *info)
562 {
563 	struct net_device *upper = info->upper_dev, *ndev_tmp;
564 	struct netdev_lag_upper_info *lag_upper_info = NULL;
565 	bool is_bonded, is_in_lag, mode_supported;
566 	int bond_status = 0;
567 	int num_slaves = 0;
568 	int changed = 0;
569 	int idx;
570 
571 	if (!netif_is_lag_master(upper))
572 		return 0;
573 
574 	if (info->linking)
575 		lag_upper_info = info->upper_info;
576 
577 	/* The event may still be of interest if the slave does not belong to
578 	 * us, but is enslaved to a master which has one or more of our netdevs
579 	 * as slaves (e.g., if a new slave is added to a master that bonds two
580 	 * of our netdevs, we should unbond).
581 	 */
582 	rcu_read_lock();
583 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
584 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
585 		if (idx >= 0)
586 			bond_status |= (1 << idx);
587 
588 		num_slaves++;
589 	}
590 	rcu_read_unlock();
591 
592 	/* None of this lagdev's netdevs are slaves of this master. */
593 	if (!(bond_status & 0x3))
594 		return 0;
595 
596 	if (lag_upper_info)
597 		tracker->tx_type = lag_upper_info->tx_type;
598 
599 	/* Determine bonding status:
600 	 * A device is considered bonded if both its physical ports are slaves
601 	 * of the same lag master, and only them.
602 	 */
603 	is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
604 
605 	/* Lag mode must be activebackup or hash. */
606 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
607 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
608 
609 	is_bonded = is_in_lag && mode_supported;
610 	if (tracker->is_bonded != is_bonded) {
611 		tracker->is_bonded = is_bonded;
612 		changed = 1;
613 	}
614 
615 	if (!is_in_lag)
616 		return changed;
617 
618 	if (!mlx5_lag_is_ready(ldev))
619 		NL_SET_ERR_MSG_MOD(info->info.extack,
620 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
621 	else if (!mode_supported)
622 		NL_SET_ERR_MSG_MOD(info->info.extack,
623 				   "Can't activate LAG offload, TX type isn't supported");
624 
625 	return changed;
626 }
627 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)628 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
629 					      struct lag_tracker *tracker,
630 					      struct net_device *ndev,
631 					      struct netdev_notifier_changelowerstate_info *info)
632 {
633 	struct netdev_lag_lower_state_info *lag_lower_info;
634 	int idx;
635 
636 	if (!netif_is_lag_port(ndev))
637 		return 0;
638 
639 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
640 	if (idx < 0)
641 		return 0;
642 
643 	/* This information is used to determine virtual to physical
644 	 * port mapping.
645 	 */
646 	lag_lower_info = info->lower_state_info;
647 	if (!lag_lower_info)
648 		return 0;
649 
650 	tracker->netdev_state[idx] = *lag_lower_info;
651 
652 	return 1;
653 }
654 
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)655 static int mlx5_lag_netdev_event(struct notifier_block *this,
656 				 unsigned long event, void *ptr)
657 {
658 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
659 	struct lag_tracker tracker;
660 	struct mlx5_lag *ldev;
661 	int changed = 0;
662 
663 	if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
664 		return NOTIFY_DONE;
665 
666 	ldev    = container_of(this, struct mlx5_lag, nb);
667 
668 	tracker = ldev->tracker;
669 
670 	switch (event) {
671 	case NETDEV_CHANGEUPPER:
672 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
673 							ptr);
674 		break;
675 	case NETDEV_CHANGELOWERSTATE:
676 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
677 							     ndev, ptr);
678 		break;
679 	}
680 
681 	ldev->tracker = tracker;
682 
683 	if (changed)
684 		mlx5_queue_bond_work(ldev, 0);
685 
686 	return NOTIFY_DONE;
687 }
688 
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)689 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
690 				 struct mlx5_core_dev *dev,
691 				 struct net_device *netdev)
692 {
693 	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
694 	unsigned long flags;
695 
696 	if (fn >= MLX5_MAX_PORTS)
697 		return;
698 
699 	spin_lock_irqsave(&lag_lock, flags);
700 	ldev->pf[fn].netdev = netdev;
701 	ldev->tracker.netdev_state[fn].link_up = 0;
702 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
703 	spin_unlock_irqrestore(&lag_lock, flags);
704 }
705 
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)706 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
707 				    struct net_device *netdev)
708 {
709 	unsigned long flags;
710 	int i;
711 
712 	spin_lock_irqsave(&lag_lock, flags);
713 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
714 		if (ldev->pf[i].netdev == netdev) {
715 			ldev->pf[i].netdev = NULL;
716 			break;
717 		}
718 	}
719 	spin_unlock_irqrestore(&lag_lock, flags);
720 }
721 
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)722 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
723 			       struct mlx5_core_dev *dev)
724 {
725 	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
726 
727 	if (fn >= MLX5_MAX_PORTS)
728 		return;
729 
730 	ldev->pf[fn].dev = dev;
731 	dev->priv.lag = ldev;
732 }
733 
734 /* Must be called with intf_mutex held */
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)735 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
736 				  struct mlx5_core_dev *dev)
737 {
738 	int i;
739 
740 	for (i = 0; i < MLX5_MAX_PORTS; i++)
741 		if (ldev->pf[i].dev == dev)
742 			break;
743 
744 	if (i == MLX5_MAX_PORTS)
745 		return;
746 
747 	ldev->pf[i].dev = NULL;
748 	dev->priv.lag = NULL;
749 }
750 
751 /* Must be called with intf_mutex held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)752 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
753 {
754 	struct mlx5_lag *ldev = NULL;
755 	struct mlx5_core_dev *tmp_dev;
756 
757 	tmp_dev = mlx5_get_next_phys_dev_lag(dev);
758 	if (tmp_dev)
759 		ldev = tmp_dev->priv.lag;
760 
761 	if (!ldev) {
762 		ldev = mlx5_lag_dev_alloc(dev);
763 		if (!ldev) {
764 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
765 			return 0;
766 		}
767 	} else {
768 		if (ldev->mode_changes_in_progress)
769 			return -EAGAIN;
770 		mlx5_ldev_get(ldev);
771 	}
772 
773 	mlx5_ldev_add_mdev(ldev, dev);
774 
775 	return 0;
776 }
777 
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)778 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
779 {
780 	struct mlx5_lag *ldev;
781 
782 	ldev = mlx5_lag_dev(dev);
783 	if (!ldev)
784 		return;
785 
786 recheck:
787 	mlx5_dev_list_lock();
788 	if (ldev->mode_changes_in_progress) {
789 		mlx5_dev_list_unlock();
790 		msleep(100);
791 		goto recheck;
792 	}
793 	mlx5_ldev_remove_mdev(ldev, dev);
794 	mlx5_dev_list_unlock();
795 	mlx5_ldev_put(ldev);
796 }
797 
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)798 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
799 {
800 	int err;
801 
802 	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
803 	    !MLX5_CAP_GEN(dev, lag_master) ||
804 	    MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
805 		return;
806 
807 recheck:
808 	mlx5_dev_list_lock();
809 	err = __mlx5_lag_dev_add_mdev(dev);
810 	if (err) {
811 		mlx5_dev_list_unlock();
812 		msleep(100);
813 		goto recheck;
814 	}
815 	mlx5_dev_list_unlock();
816 }
817 
818 /* Must be called with intf_mutex held */
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)819 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
820 			    struct net_device *netdev)
821 {
822 	struct mlx5_lag *ldev;
823 
824 	ldev = mlx5_lag_dev(dev);
825 	if (!ldev)
826 		return;
827 
828 	mlx5_ldev_remove_netdev(ldev, netdev);
829 	ldev->flags &= ~MLX5_LAG_FLAG_READY;
830 
831 	if (__mlx5_lag_is_active(ldev))
832 		mlx5_queue_bond_work(ldev, 0);
833 }
834 
835 /* Must be called with intf_mutex held */
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)836 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
837 			 struct net_device *netdev)
838 {
839 	struct mlx5_lag *ldev;
840 	int i;
841 
842 	ldev = mlx5_lag_dev(dev);
843 	if (!ldev)
844 		return;
845 
846 	mlx5_ldev_add_netdev(ldev, dev, netdev);
847 
848 	for (i = 0; i < MLX5_MAX_PORTS; i++)
849 		if (!ldev->pf[i].dev)
850 			break;
851 
852 	if (i >= MLX5_MAX_PORTS)
853 		ldev->flags |= MLX5_LAG_FLAG_READY;
854 	mlx5_queue_bond_work(ldev, 0);
855 }
856 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)857 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
858 {
859 	struct mlx5_lag *ldev;
860 	unsigned long flags;
861 	bool res;
862 
863 	spin_lock_irqsave(&lag_lock, flags);
864 	ldev = mlx5_lag_dev(dev);
865 	res  = ldev && __mlx5_lag_is_roce(ldev);
866 	spin_unlock_irqrestore(&lag_lock, flags);
867 
868 	return res;
869 }
870 EXPORT_SYMBOL(mlx5_lag_is_roce);
871 
mlx5_lag_is_active(struct mlx5_core_dev * dev)872 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
873 {
874 	struct mlx5_lag *ldev;
875 	unsigned long flags;
876 	bool res;
877 
878 	spin_lock_irqsave(&lag_lock, flags);
879 	ldev = mlx5_lag_dev(dev);
880 	res  = ldev && __mlx5_lag_is_active(ldev);
881 	spin_unlock_irqrestore(&lag_lock, flags);
882 
883 	return res;
884 }
885 EXPORT_SYMBOL(mlx5_lag_is_active);
886 
mlx5_lag_is_master(struct mlx5_core_dev * dev)887 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
888 {
889 	struct mlx5_lag *ldev;
890 	unsigned long flags;
891 	bool res;
892 
893 	spin_lock_irqsave(&lag_lock, flags);
894 	ldev = mlx5_lag_dev(dev);
895 	res = ldev && __mlx5_lag_is_active(ldev) &&
896 		dev == ldev->pf[MLX5_LAG_P1].dev;
897 	spin_unlock_irqrestore(&lag_lock, flags);
898 
899 	return res;
900 }
901 EXPORT_SYMBOL(mlx5_lag_is_master);
902 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)903 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
904 {
905 	struct mlx5_lag *ldev;
906 	unsigned long flags;
907 	bool res;
908 
909 	spin_lock_irqsave(&lag_lock, flags);
910 	ldev = mlx5_lag_dev(dev);
911 	res  = ldev && __mlx5_lag_is_sriov(ldev);
912 	spin_unlock_irqrestore(&lag_lock, flags);
913 
914 	return res;
915 }
916 EXPORT_SYMBOL(mlx5_lag_is_sriov);
917 
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)918 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
919 {
920 	struct mlx5_lag *ldev;
921 	unsigned long flags;
922 	bool res;
923 
924 	spin_lock_irqsave(&lag_lock, flags);
925 	ldev = mlx5_lag_dev(dev);
926 	res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
927 	spin_unlock_irqrestore(&lag_lock, flags);
928 
929 	return res;
930 }
931 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
932 
mlx5_lag_disable_change(struct mlx5_core_dev * dev)933 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
934 {
935 	struct mlx5_core_dev *dev0;
936 	struct mlx5_core_dev *dev1;
937 	struct mlx5_lag *ldev;
938 
939 	ldev = mlx5_lag_dev(dev);
940 	if (!ldev)
941 		return;
942 
943 	mlx5_dev_list_lock();
944 
945 	dev0 = ldev->pf[MLX5_LAG_P1].dev;
946 	dev1 = ldev->pf[MLX5_LAG_P2].dev;
947 
948 	ldev->mode_changes_in_progress++;
949 	if (__mlx5_lag_is_active(ldev)) {
950 		mlx5_lag_lock_eswitches(dev0, dev1);
951 		mlx5_disable_lag(ldev);
952 		mlx5_lag_unlock_eswitches(dev0, dev1);
953 	}
954 	mlx5_dev_list_unlock();
955 }
956 
mlx5_lag_enable_change(struct mlx5_core_dev * dev)957 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
958 {
959 	struct mlx5_lag *ldev;
960 
961 	ldev = mlx5_lag_dev(dev);
962 	if (!ldev)
963 		return;
964 
965 	mlx5_dev_list_lock();
966 	ldev->mode_changes_in_progress--;
967 	mlx5_dev_list_unlock();
968 	mlx5_queue_bond_work(ldev, 0);
969 }
970 
mlx5_lag_get_roce_netdev(struct mlx5_core_dev * dev)971 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
972 {
973 	struct net_device *ndev = NULL;
974 	struct mlx5_lag *ldev;
975 	unsigned long flags;
976 
977 	spin_lock_irqsave(&lag_lock, flags);
978 	ldev = mlx5_lag_dev(dev);
979 
980 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
981 		goto unlock;
982 
983 	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
984 		ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
985 		       ldev->pf[MLX5_LAG_P1].netdev :
986 		       ldev->pf[MLX5_LAG_P2].netdev;
987 	} else {
988 		ndev = ldev->pf[MLX5_LAG_P1].netdev;
989 	}
990 	if (ndev)
991 		dev_hold(ndev);
992 
993 unlock:
994 	spin_unlock_irqrestore(&lag_lock, flags);
995 
996 	return ndev;
997 }
998 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
999 
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1000 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1001 			   struct net_device *slave)
1002 {
1003 	struct mlx5_lag *ldev;
1004 	unsigned long flags;
1005 	u8 port = 0;
1006 
1007 	spin_lock_irqsave(&lag_lock, flags);
1008 	ldev = mlx5_lag_dev(dev);
1009 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
1010 		goto unlock;
1011 
1012 	if (ldev->pf[MLX5_LAG_P1].netdev == slave)
1013 		port = MLX5_LAG_P1;
1014 	else
1015 		port = MLX5_LAG_P2;
1016 
1017 	port = ldev->v2p_map[port];
1018 
1019 unlock:
1020 	spin_unlock_irqrestore(&lag_lock, flags);
1021 	return port;
1022 }
1023 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1024 
mlx5_lag_get_peer_mdev(struct mlx5_core_dev * dev)1025 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1026 {
1027 	struct mlx5_core_dev *peer_dev = NULL;
1028 	struct mlx5_lag *ldev;
1029 	unsigned long flags;
1030 
1031 	spin_lock_irqsave(&lag_lock, flags);
1032 	ldev = mlx5_lag_dev(dev);
1033 	if (!ldev)
1034 		goto unlock;
1035 
1036 	peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1037 			   ldev->pf[MLX5_LAG_P2].dev :
1038 			   ldev->pf[MLX5_LAG_P1].dev;
1039 
1040 unlock:
1041 	spin_unlock_irqrestore(&lag_lock, flags);
1042 	return peer_dev;
1043 }
1044 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1045 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1046 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1047 				 u64 *values,
1048 				 int num_counters,
1049 				 size_t *offsets)
1050 {
1051 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1052 	struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
1053 	struct mlx5_lag *ldev;
1054 	unsigned long flags;
1055 	int num_ports;
1056 	int ret, i, j;
1057 	void *out;
1058 
1059 	out = kvzalloc(outlen, GFP_KERNEL);
1060 	if (!out)
1061 		return -ENOMEM;
1062 
1063 	memset(values, 0, sizeof(*values) * num_counters);
1064 
1065 	spin_lock_irqsave(&lag_lock, flags);
1066 	ldev = mlx5_lag_dev(dev);
1067 	if (ldev && __mlx5_lag_is_active(ldev)) {
1068 		num_ports = MLX5_MAX_PORTS;
1069 		mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
1070 		mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
1071 	} else {
1072 		num_ports = 1;
1073 		mdev[MLX5_LAG_P1] = dev;
1074 	}
1075 	spin_unlock_irqrestore(&lag_lock, flags);
1076 
1077 	for (i = 0; i < num_ports; ++i) {
1078 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1079 
1080 		MLX5_SET(query_cong_statistics_in, in, opcode,
1081 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1082 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1083 					  out);
1084 		if (ret)
1085 			goto free;
1086 
1087 		for (j = 0; j < num_counters; ++j)
1088 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1089 	}
1090 
1091 free:
1092 	kvfree(out);
1093 	return ret;
1094 }
1095 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1096