• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/vport.h>
36 #include "mlx5_core.h"
37 #include "eswitch.h"
38 #include "lag.h"
39 #include "lag_mp.h"
40 
41 /* General purpose, use for short periods of time.
42  * Beware of lock dependencies (preferably, no locks should be acquired
43  * under it).
44  */
45 static DEFINE_SPINLOCK(lag_lock);
46 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2)47 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
48 			       u8 remap_port2)
49 {
50 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
51 	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
52 
53 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
54 
55 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
56 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
57 
58 	return mlx5_cmd_exec_in(dev, create_lag, in);
59 }
60 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2)61 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
62 			       u8 remap_port2)
63 {
64 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
65 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
66 
67 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
68 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
69 
70 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
71 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
72 
73 	return mlx5_cmd_exec_in(dev, modify_lag, in);
74 }
75 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)76 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
77 {
78 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
79 
80 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
81 
82 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
83 }
84 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
85 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)86 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
87 {
88 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
89 
90 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
91 
92 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
93 }
94 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
95 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)96 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
97 				struct net_device *ndev)
98 {
99 	int i;
100 
101 	for (i = 0; i < MLX5_MAX_PORTS; i++)
102 		if (ldev->pf[i].netdev == ndev)
103 			return i;
104 
105 	return -ENOENT;
106 }
107 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)108 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
109 {
110 	return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
111 }
112 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)113 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
114 {
115 	return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
116 }
117 
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,u8 * port1,u8 * port2)118 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
119 					   u8 *port1, u8 *port2)
120 {
121 	bool p1en;
122 	bool p2en;
123 
124 	p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
125 	       tracker->netdev_state[MLX5_LAG_P1].link_up;
126 
127 	p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
128 	       tracker->netdev_state[MLX5_LAG_P2].link_up;
129 
130 	*port1 = 1;
131 	*port2 = 2;
132 	if ((!p1en && !p2en) || (p1en && p2en))
133 		return;
134 
135 	if (p1en)
136 		*port2 = 1;
137 	else
138 		*port1 = 2;
139 }
140 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)141 void mlx5_modify_lag(struct mlx5_lag *ldev,
142 		     struct lag_tracker *tracker)
143 {
144 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
145 	u8 v2p_port1, v2p_port2;
146 	int err;
147 
148 	mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
149 				       &v2p_port2);
150 
151 	if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
152 	    v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
153 		ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
154 		ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
155 
156 		mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
157 			       ldev->v2p_map[MLX5_LAG_P1],
158 			       ldev->v2p_map[MLX5_LAG_P2]);
159 
160 		err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
161 		if (err)
162 			mlx5_core_err(dev0,
163 				      "Failed to modify LAG (%d)\n",
164 				      err);
165 	}
166 }
167 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)168 static int mlx5_create_lag(struct mlx5_lag *ldev,
169 			   struct lag_tracker *tracker)
170 {
171 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
172 	int err;
173 
174 	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
175 				       &ldev->v2p_map[MLX5_LAG_P2]);
176 
177 	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
178 		       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
179 
180 	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
181 				  ldev->v2p_map[MLX5_LAG_P2]);
182 	if (err)
183 		mlx5_core_err(dev0,
184 			      "Failed to create LAG (%d)\n",
185 			      err);
186 	return err;
187 }
188 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,u8 flags)189 int mlx5_activate_lag(struct mlx5_lag *ldev,
190 		      struct lag_tracker *tracker,
191 		      u8 flags)
192 {
193 	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
194 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
195 	int err;
196 
197 	err = mlx5_create_lag(ldev, tracker);
198 	if (err) {
199 		if (roce_lag) {
200 			mlx5_core_err(dev0,
201 				      "Failed to activate RoCE LAG\n");
202 		} else {
203 			mlx5_core_err(dev0,
204 				      "Failed to activate VF LAG\n"
205 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
206 		}
207 		return err;
208 	}
209 
210 	ldev->flags |= flags;
211 	return 0;
212 }
213 
mlx5_deactivate_lag(struct mlx5_lag * ldev)214 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
215 {
216 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
217 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
218 	bool roce_lag = __mlx5_lag_is_roce(ldev);
219 	int err;
220 
221 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
222 
223 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
224 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
225 	if (err) {
226 		if (roce_lag) {
227 			mlx5_core_err(dev0,
228 				      "Failed to deactivate RoCE LAG; driver restart required\n");
229 		} else {
230 			mlx5_core_err(dev0,
231 				      "Failed to deactivate VF LAG; driver restart required\n"
232 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
233 		}
234 	}
235 
236 	return err;
237 }
238 
mlx5_lag_check_prereq(struct mlx5_lag * ldev)239 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
240 {
241 	if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
242 		return false;
243 
244 #ifdef CONFIG_MLX5_ESWITCH
245 	return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
246 				   ldev->pf[MLX5_LAG_P2].dev);
247 #else
248 	return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
249 		!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
250 #endif
251 }
252 
mlx5_lag_add_ib_devices(struct mlx5_lag * ldev)253 static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
254 {
255 	int i;
256 
257 	for (i = 0; i < MLX5_MAX_PORTS; i++)
258 		if (ldev->pf[i].dev)
259 			mlx5_add_dev_by_protocol(ldev->pf[i].dev,
260 						 MLX5_INTERFACE_PROTOCOL_IB);
261 }
262 
mlx5_lag_remove_ib_devices(struct mlx5_lag * ldev)263 static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
264 {
265 	int i;
266 
267 	for (i = 0; i < MLX5_MAX_PORTS; i++)
268 		if (ldev->pf[i].dev)
269 			mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
270 						    MLX5_INTERFACE_PROTOCOL_IB);
271 }
272 
mlx5_do_bond(struct mlx5_lag * ldev)273 static void mlx5_do_bond(struct mlx5_lag *ldev)
274 {
275 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
276 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
277 	struct lag_tracker tracker;
278 	bool do_bond, roce_lag;
279 	int err;
280 
281 	if (!mlx5_lag_is_ready(ldev))
282 		return;
283 
284 	spin_lock(&lag_lock);
285 	tracker = ldev->tracker;
286 	spin_unlock(&lag_lock);
287 
288 	do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
289 
290 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
291 		roce_lag = !mlx5_sriov_is_enabled(dev0) &&
292 			   !mlx5_sriov_is_enabled(dev1);
293 
294 #ifdef CONFIG_MLX5_ESWITCH
295 		roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
296 			    dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
297 #endif
298 
299 		if (roce_lag)
300 			mlx5_lag_remove_ib_devices(ldev);
301 
302 		err = mlx5_activate_lag(ldev, &tracker,
303 					roce_lag ? MLX5_LAG_FLAG_ROCE :
304 					MLX5_LAG_FLAG_SRIOV);
305 		if (err) {
306 			if (roce_lag)
307 				mlx5_lag_add_ib_devices(ldev);
308 
309 			return;
310 		}
311 
312 		if (roce_lag) {
313 			mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
314 			mlx5_nic_vport_enable_roce(dev1);
315 		}
316 	} else if (do_bond && __mlx5_lag_is_active(ldev)) {
317 		mlx5_modify_lag(ldev, &tracker);
318 	} else if (!do_bond && __mlx5_lag_is_active(ldev)) {
319 		roce_lag = __mlx5_lag_is_roce(ldev);
320 
321 		if (roce_lag) {
322 			mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
323 			mlx5_nic_vport_disable_roce(dev1);
324 		}
325 
326 		err = mlx5_deactivate_lag(ldev);
327 		if (err)
328 			return;
329 
330 		if (roce_lag)
331 			mlx5_lag_add_ib_devices(ldev);
332 	}
333 }
334 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)335 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
336 {
337 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
338 }
339 
mlx5_do_bond_work(struct work_struct * work)340 static void mlx5_do_bond_work(struct work_struct *work)
341 {
342 	struct delayed_work *delayed_work = to_delayed_work(work);
343 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
344 					     bond_work);
345 	int status;
346 
347 	status = mlx5_dev_list_trylock();
348 	if (!status) {
349 		/* 1 sec delay. */
350 		mlx5_queue_bond_work(ldev, HZ);
351 		return;
352 	}
353 
354 	mlx5_do_bond(ldev);
355 	mlx5_dev_list_unlock();
356 }
357 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changeupper_info * info)358 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
359 					 struct lag_tracker *tracker,
360 					 struct net_device *ndev,
361 					 struct netdev_notifier_changeupper_info *info)
362 {
363 	struct net_device *upper = info->upper_dev, *ndev_tmp;
364 	struct netdev_lag_upper_info *lag_upper_info = NULL;
365 	bool is_bonded, is_in_lag, mode_supported;
366 	int bond_status = 0;
367 	int num_slaves = 0;
368 	int changed = 0;
369 	int idx;
370 
371 	if (!netif_is_lag_master(upper))
372 		return 0;
373 
374 	if (info->linking)
375 		lag_upper_info = info->upper_info;
376 
377 	/* The event may still be of interest if the slave does not belong to
378 	 * us, but is enslaved to a master which has one or more of our netdevs
379 	 * as slaves (e.g., if a new slave is added to a master that bonds two
380 	 * of our netdevs, we should unbond).
381 	 */
382 	rcu_read_lock();
383 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
384 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
385 		if (idx >= 0)
386 			bond_status |= (1 << idx);
387 
388 		num_slaves++;
389 	}
390 	rcu_read_unlock();
391 
392 	/* None of this lagdev's netdevs are slaves of this master. */
393 	if (!(bond_status & 0x3))
394 		return 0;
395 
396 	if (lag_upper_info)
397 		tracker->tx_type = lag_upper_info->tx_type;
398 
399 	/* Determine bonding status:
400 	 * A device is considered bonded if both its physical ports are slaves
401 	 * of the same lag master, and only them.
402 	 */
403 	is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
404 
405 	/* Lag mode must be activebackup or hash. */
406 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
407 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
408 
409 	is_bonded = is_in_lag && mode_supported;
410 	if (tracker->is_bonded != is_bonded) {
411 		tracker->is_bonded = is_bonded;
412 		changed = 1;
413 	}
414 
415 	if (!is_in_lag)
416 		return changed;
417 
418 	if (!mlx5_lag_is_ready(ldev))
419 		NL_SET_ERR_MSG_MOD(info->info.extack,
420 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
421 	else if (!mode_supported)
422 		NL_SET_ERR_MSG_MOD(info->info.extack,
423 				   "Can't activate LAG offload, TX type isn't supported");
424 
425 	return changed;
426 }
427 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)428 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
429 					      struct lag_tracker *tracker,
430 					      struct net_device *ndev,
431 					      struct netdev_notifier_changelowerstate_info *info)
432 {
433 	struct netdev_lag_lower_state_info *lag_lower_info;
434 	int idx;
435 
436 	if (!netif_is_lag_port(ndev))
437 		return 0;
438 
439 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
440 	if (idx < 0)
441 		return 0;
442 
443 	/* This information is used to determine virtual to physical
444 	 * port mapping.
445 	 */
446 	lag_lower_info = info->lower_state_info;
447 	if (!lag_lower_info)
448 		return 0;
449 
450 	tracker->netdev_state[idx] = *lag_lower_info;
451 
452 	return 1;
453 }
454 
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)455 static int mlx5_lag_netdev_event(struct notifier_block *this,
456 				 unsigned long event, void *ptr)
457 {
458 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
459 	struct lag_tracker tracker;
460 	struct mlx5_lag *ldev;
461 	int changed = 0;
462 
463 	if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
464 		return NOTIFY_DONE;
465 
466 	ldev    = container_of(this, struct mlx5_lag, nb);
467 
468 	tracker = ldev->tracker;
469 
470 	switch (event) {
471 	case NETDEV_CHANGEUPPER:
472 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
473 							ptr);
474 		break;
475 	case NETDEV_CHANGELOWERSTATE:
476 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
477 							     ndev, ptr);
478 		break;
479 	}
480 
481 	spin_lock(&lag_lock);
482 	ldev->tracker = tracker;
483 	spin_unlock(&lag_lock);
484 
485 	if (changed)
486 		mlx5_queue_bond_work(ldev, 0);
487 
488 	return NOTIFY_DONE;
489 }
490 
mlx5_lag_dev_alloc(void)491 static struct mlx5_lag *mlx5_lag_dev_alloc(void)
492 {
493 	struct mlx5_lag *ldev;
494 
495 	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
496 	if (!ldev)
497 		return NULL;
498 
499 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
500 	if (!ldev->wq) {
501 		kfree(ldev);
502 		return NULL;
503 	}
504 
505 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
506 
507 	return ldev;
508 }
509 
mlx5_lag_dev_free(struct mlx5_lag * ldev)510 static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
511 {
512 	destroy_workqueue(ldev->wq);
513 	kfree(ldev);
514 }
515 
mlx5_lag_dev_add_pf(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)516 static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
517 			       struct mlx5_core_dev *dev,
518 			       struct net_device *netdev)
519 {
520 	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
521 
522 	if (fn >= MLX5_MAX_PORTS)
523 		return -EPERM;
524 
525 	spin_lock(&lag_lock);
526 	ldev->pf[fn].dev    = dev;
527 	ldev->pf[fn].netdev = netdev;
528 	ldev->tracker.netdev_state[fn].link_up = 0;
529 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
530 
531 	dev->priv.lag = ldev;
532 
533 	spin_unlock(&lag_lock);
534 
535 	return fn;
536 }
537 
mlx5_lag_dev_remove_pf(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)538 static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
539 				   struct mlx5_core_dev *dev)
540 {
541 	int i;
542 
543 	for (i = 0; i < MLX5_MAX_PORTS; i++)
544 		if (ldev->pf[i].dev == dev)
545 			break;
546 
547 	if (i == MLX5_MAX_PORTS)
548 		return;
549 
550 	spin_lock(&lag_lock);
551 	memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
552 
553 	dev->priv.lag = NULL;
554 	spin_unlock(&lag_lock);
555 }
556 
557 /* Must be called with intf_mutex held */
mlx5_lag_add(struct mlx5_core_dev * dev,struct net_device * netdev)558 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
559 {
560 	struct mlx5_lag *ldev = NULL;
561 	struct mlx5_core_dev *tmp_dev;
562 	int i, err;
563 
564 	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
565 	    !MLX5_CAP_GEN(dev, lag_master) ||
566 	    MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
567 		return;
568 
569 	tmp_dev = mlx5_get_next_phys_dev(dev);
570 	if (tmp_dev)
571 		ldev = tmp_dev->priv.lag;
572 
573 	if (!ldev) {
574 		ldev = mlx5_lag_dev_alloc();
575 		if (!ldev) {
576 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
577 			return;
578 		}
579 	}
580 
581 	if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
582 		return;
583 
584 	for (i = 0; i < MLX5_MAX_PORTS; i++)
585 		if (!ldev->pf[i].dev)
586 			break;
587 
588 	if (i >= MLX5_MAX_PORTS)
589 		ldev->flags |= MLX5_LAG_FLAG_READY;
590 
591 	if (!ldev->nb.notifier_call) {
592 		ldev->nb.notifier_call = mlx5_lag_netdev_event;
593 		if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
594 			ldev->nb.notifier_call = NULL;
595 			mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
596 		}
597 	}
598 
599 	err = mlx5_lag_mp_init(ldev);
600 	if (err)
601 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
602 			      err);
603 }
604 
605 /* Must be called with intf_mutex held */
mlx5_lag_remove(struct mlx5_core_dev * dev)606 void mlx5_lag_remove(struct mlx5_core_dev *dev)
607 {
608 	struct mlx5_lag *ldev;
609 	int i;
610 
611 	ldev = mlx5_lag_dev_get(dev);
612 	if (!ldev)
613 		return;
614 
615 	if (__mlx5_lag_is_active(ldev))
616 		mlx5_deactivate_lag(ldev);
617 
618 	mlx5_lag_dev_remove_pf(ldev, dev);
619 
620 	ldev->flags &= ~MLX5_LAG_FLAG_READY;
621 
622 	for (i = 0; i < MLX5_MAX_PORTS; i++)
623 		if (ldev->pf[i].dev)
624 			break;
625 
626 	if (i == MLX5_MAX_PORTS) {
627 		if (ldev->nb.notifier_call) {
628 			unregister_netdevice_notifier_net(&init_net, &ldev->nb);
629 			ldev->nb.notifier_call = NULL;
630 		}
631 		mlx5_lag_mp_cleanup(ldev);
632 		cancel_delayed_work_sync(&ldev->bond_work);
633 		mlx5_lag_dev_free(ldev);
634 	}
635 }
636 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)637 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
638 {
639 	struct mlx5_lag *ldev;
640 	bool res;
641 
642 	spin_lock(&lag_lock);
643 	ldev = mlx5_lag_dev_get(dev);
644 	res  = ldev && __mlx5_lag_is_roce(ldev);
645 	spin_unlock(&lag_lock);
646 
647 	return res;
648 }
649 EXPORT_SYMBOL(mlx5_lag_is_roce);
650 
mlx5_lag_is_active(struct mlx5_core_dev * dev)651 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
652 {
653 	struct mlx5_lag *ldev;
654 	bool res;
655 
656 	spin_lock(&lag_lock);
657 	ldev = mlx5_lag_dev_get(dev);
658 	res  = ldev && __mlx5_lag_is_active(ldev);
659 	spin_unlock(&lag_lock);
660 
661 	return res;
662 }
663 EXPORT_SYMBOL(mlx5_lag_is_active);
664 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)665 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
666 {
667 	struct mlx5_lag *ldev;
668 	bool res;
669 
670 	spin_lock(&lag_lock);
671 	ldev = mlx5_lag_dev_get(dev);
672 	res  = ldev && __mlx5_lag_is_sriov(ldev);
673 	spin_unlock(&lag_lock);
674 
675 	return res;
676 }
677 EXPORT_SYMBOL(mlx5_lag_is_sriov);
678 
mlx5_lag_update(struct mlx5_core_dev * dev)679 void mlx5_lag_update(struct mlx5_core_dev *dev)
680 {
681 	struct mlx5_lag *ldev;
682 
683 	mlx5_dev_list_lock();
684 	ldev = mlx5_lag_dev_get(dev);
685 	if (!ldev)
686 		goto unlock;
687 
688 	mlx5_do_bond(ldev);
689 
690 unlock:
691 	mlx5_dev_list_unlock();
692 }
693 
mlx5_lag_get_roce_netdev(struct mlx5_core_dev * dev)694 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
695 {
696 	struct net_device *ndev = NULL;
697 	struct mlx5_lag *ldev;
698 
699 	spin_lock(&lag_lock);
700 	ldev = mlx5_lag_dev_get(dev);
701 
702 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
703 		goto unlock;
704 
705 	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
706 		ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
707 		       ldev->pf[MLX5_LAG_P1].netdev :
708 		       ldev->pf[MLX5_LAG_P2].netdev;
709 	} else {
710 		ndev = ldev->pf[MLX5_LAG_P1].netdev;
711 	}
712 	if (ndev)
713 		dev_hold(ndev);
714 
715 unlock:
716 	spin_unlock(&lag_lock);
717 
718 	return ndev;
719 }
720 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
721 
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)722 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
723 			   struct net_device *slave)
724 {
725 	struct mlx5_lag *ldev;
726 	u8 port = 0;
727 
728 	spin_lock(&lag_lock);
729 	ldev = mlx5_lag_dev_get(dev);
730 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
731 		goto unlock;
732 
733 	if (ldev->pf[MLX5_LAG_P1].netdev == slave)
734 		port = MLX5_LAG_P1;
735 	else
736 		port = MLX5_LAG_P2;
737 
738 	port = ldev->v2p_map[port];
739 
740 unlock:
741 	spin_unlock(&lag_lock);
742 	return port;
743 }
744 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
745 
mlx5_lag_intf_add(struct mlx5_interface * intf,struct mlx5_priv * priv)746 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
747 {
748 	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
749 						 priv);
750 	struct mlx5_lag *ldev;
751 
752 	if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB)
753 		return true;
754 
755 	ldev = mlx5_lag_dev_get(dev);
756 	if (!ldev || !__mlx5_lag_is_roce(ldev) ||
757 	    ldev->pf[MLX5_LAG_P1].dev == dev)
758 		return true;
759 
760 	/* If bonded, we do not add an IB device for PF1. */
761 	return false;
762 }
763 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)764 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
765 				 u64 *values,
766 				 int num_counters,
767 				 size_t *offsets)
768 {
769 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
770 	struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
771 	struct mlx5_lag *ldev;
772 	int num_ports;
773 	int ret, i, j;
774 	void *out;
775 
776 	out = kvzalloc(outlen, GFP_KERNEL);
777 	if (!out)
778 		return -ENOMEM;
779 
780 	memset(values, 0, sizeof(*values) * num_counters);
781 
782 	spin_lock(&lag_lock);
783 	ldev = mlx5_lag_dev_get(dev);
784 	if (ldev && __mlx5_lag_is_roce(ldev)) {
785 		num_ports = MLX5_MAX_PORTS;
786 		mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
787 		mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
788 	} else {
789 		num_ports = 1;
790 		mdev[MLX5_LAG_P1] = dev;
791 	}
792 	spin_unlock(&lag_lock);
793 
794 	for (i = 0; i < num_ports; ++i) {
795 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
796 
797 		MLX5_SET(query_cong_statistics_in, in, opcode,
798 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
799 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
800 					  out);
801 		if (ret)
802 			goto free;
803 
804 		for (j = 0; j < num_counters; ++j)
805 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
806 	}
807 
808 free:
809 	kvfree(out);
810 	return ret;
811 }
812 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
813