• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3 
4 #include "eswitch.h"
5 #include "esw/qos.h"
6 #include "en/port.h"
7 #define CREATE_TRACE_POINTS
8 #include "diag/qos_tracepoint.h"
9 
10 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
11 #define MLX5_MIN_BW_SHARE 1
12 
13 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
14 	min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
15 
16 struct mlx5_esw_rate_group {
17 	u32 tsar_ix;
18 	u32 max_rate;
19 	u32 min_rate;
20 	u32 bw_share;
21 	struct list_head list;
22 };
23 
esw_qos_tsar_config(struct mlx5_core_dev * dev,u32 * sched_ctx,u32 tsar_ix,u32 max_rate,u32 bw_share)24 static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
25 			       u32 tsar_ix, u32 max_rate, u32 bw_share)
26 {
27 	u32 bitmask = 0;
28 
29 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
30 		return -EOPNOTSUPP;
31 
32 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
33 	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
34 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
35 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
36 
37 	return mlx5_modify_scheduling_element_cmd(dev,
38 						  SCHEDULING_HIERARCHY_E_SWITCH,
39 						  sched_ctx,
40 						  tsar_ix,
41 						  bitmask);
42 }
43 
esw_qos_group_config(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)44 static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
45 				u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
46 {
47 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
48 	struct mlx5_core_dev *dev = esw->dev;
49 	int err;
50 
51 	err = esw_qos_tsar_config(dev, sched_ctx,
52 				  group->tsar_ix,
53 				  max_rate, bw_share);
54 	if (err)
55 		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
56 
57 	trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
58 
59 	return err;
60 }
61 
esw_qos_vport_config(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)62 static int esw_qos_vport_config(struct mlx5_eswitch *esw,
63 				struct mlx5_vport *vport,
64 				u32 max_rate, u32 bw_share,
65 				struct netlink_ext_ack *extack)
66 {
67 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
68 	struct mlx5_core_dev *dev = esw->dev;
69 	int err;
70 
71 	if (!vport->qos.enabled)
72 		return -EIO;
73 
74 	err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix,
75 				  max_rate, bw_share);
76 	if (err) {
77 		esw_warn(esw->dev,
78 			 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
79 			 vport->vport, err);
80 		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
81 		return err;
82 	}
83 
84 	trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
85 
86 	return 0;
87 }
88 
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,bool group_level)89 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
90 					      struct mlx5_esw_rate_group *group,
91 					      bool group_level)
92 {
93 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
94 	struct mlx5_vport *evport;
95 	u32 max_guarantee = 0;
96 	unsigned long i;
97 
98 	if (group_level) {
99 		struct mlx5_esw_rate_group *group;
100 
101 		list_for_each_entry(group, &esw->qos.groups, list) {
102 			if (group->min_rate < max_guarantee)
103 				continue;
104 			max_guarantee = group->min_rate;
105 		}
106 	} else {
107 		mlx5_esw_for_each_vport(esw, i, evport) {
108 			if (!evport->enabled || !evport->qos.enabled ||
109 			    evport->qos.group != group || evport->qos.min_rate < max_guarantee)
110 				continue;
111 			max_guarantee = evport->qos.min_rate;
112 		}
113 	}
114 
115 	if (max_guarantee)
116 		return max_t(u32, max_guarantee / fw_max_bw_share, 1);
117 
118 	/* If vports min rate divider is 0 but their group has bw_share configured, then
119 	 * need to set bw_share for vports to minimal value.
120 	 */
121 	if (!group_level && !max_guarantee && group && group->bw_share)
122 		return 1;
123 	return 0;
124 }
125 
esw_qos_calc_bw_share(u32 min_rate,u32 divider,u32 fw_max)126 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
127 {
128 	if (divider)
129 		return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
130 
131 	return 0;
132 }
133 
esw_qos_normalize_vports_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)134 static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
135 					     struct mlx5_esw_rate_group *group,
136 					     struct netlink_ext_ack *extack)
137 {
138 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
139 	u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
140 	struct mlx5_vport *evport;
141 	unsigned long i;
142 	u32 bw_share;
143 	int err;
144 
145 	mlx5_esw_for_each_vport(esw, i, evport) {
146 		if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
147 			continue;
148 		bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
149 
150 		if (bw_share == evport->qos.bw_share)
151 			continue;
152 
153 		err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
154 		if (err)
155 			return err;
156 
157 		evport->qos.bw_share = bw_share;
158 	}
159 
160 	return 0;
161 }
162 
esw_qos_normalize_groups_min_rate(struct mlx5_eswitch * esw,u32 divider,struct netlink_ext_ack * extack)163 static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
164 					     struct netlink_ext_ack *extack)
165 {
166 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
167 	struct mlx5_esw_rate_group *group;
168 	u32 bw_share;
169 	int err;
170 
171 	list_for_each_entry(group, &esw->qos.groups, list) {
172 		bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
173 
174 		if (bw_share == group->bw_share)
175 			continue;
176 
177 		err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
178 		if (err)
179 			return err;
180 
181 		group->bw_share = bw_share;
182 
183 		/* All the group's vports need to be set with default bw_share
184 		 * to enable them with QOS
185 		 */
186 		err = esw_qos_normalize_vports_min_rate(esw, group, extack);
187 
188 		if (err)
189 			return err;
190 	}
191 
192 	return 0;
193 }
194 
mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 min_rate,struct netlink_ext_ack * extack)195 int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
196 				    struct mlx5_vport *evport,
197 				    u32 min_rate,
198 				    struct netlink_ext_ack *extack)
199 {
200 	u32 fw_max_bw_share, previous_min_rate;
201 	bool min_rate_supported;
202 	int err;
203 
204 	lockdep_assert_held(&esw->state_lock);
205 	fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
206 	min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
207 				fw_max_bw_share >= MLX5_MIN_BW_SHARE;
208 	if (min_rate && !min_rate_supported)
209 		return -EOPNOTSUPP;
210 	if (min_rate == evport->qos.min_rate)
211 		return 0;
212 
213 	previous_min_rate = evport->qos.min_rate;
214 	evport->qos.min_rate = min_rate;
215 	err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
216 	if (err)
217 		evport->qos.min_rate = previous_min_rate;
218 
219 	return err;
220 }
221 
mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 max_rate,struct netlink_ext_ack * extack)222 int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
223 				    struct mlx5_vport *evport,
224 				    u32 max_rate,
225 				    struct netlink_ext_ack *extack)
226 {
227 	u32 act_max_rate = max_rate;
228 	bool max_rate_supported;
229 	int err;
230 
231 	lockdep_assert_held(&esw->state_lock);
232 	max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
233 
234 	if (max_rate && !max_rate_supported)
235 		return -EOPNOTSUPP;
236 	if (max_rate == evport->qos.max_rate)
237 		return 0;
238 
239 	/* If parent group has rate limit need to set to group
240 	 * value when new max rate is 0.
241 	 */
242 	if (evport->qos.group && !max_rate)
243 		act_max_rate = evport->qos.group->max_rate;
244 
245 	err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
246 
247 	if (!err)
248 		evport->qos.max_rate = max_rate;
249 
250 	return err;
251 }
252 
esw_qos_set_group_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 min_rate,struct netlink_ext_ack * extack)253 static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
254 				      u32 min_rate, struct netlink_ext_ack *extack)
255 {
256 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
257 	struct mlx5_core_dev *dev = esw->dev;
258 	u32 previous_min_rate, divider;
259 	int err;
260 
261 	if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
262 		return -EOPNOTSUPP;
263 
264 	if (min_rate == group->min_rate)
265 		return 0;
266 
267 	previous_min_rate = group->min_rate;
268 	group->min_rate = min_rate;
269 	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
270 	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
271 	if (err) {
272 		group->min_rate = previous_min_rate;
273 		NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
274 
275 		/* Attempt restoring previous configuration */
276 		divider = esw_qos_calculate_min_rate_divider(esw, group, true);
277 		if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
278 			NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
279 	}
280 
281 	return err;
282 }
283 
esw_qos_set_group_max_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,struct netlink_ext_ack * extack)284 static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
285 				      struct mlx5_esw_rate_group *group,
286 				      u32 max_rate, struct netlink_ext_ack *extack)
287 {
288 	struct mlx5_vport *vport;
289 	unsigned long i;
290 	int err;
291 
292 	if (group->max_rate == max_rate)
293 		return 0;
294 
295 	err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
296 	if (err)
297 		return err;
298 
299 	group->max_rate = max_rate;
300 
301 	/* Any unlimited vports in the group should be set
302 	 * with the value of the group.
303 	 */
304 	mlx5_esw_for_each_vport(esw, i, vport) {
305 		if (!vport->enabled || !vport->qos.enabled ||
306 		    vport->qos.group != group || vport->qos.max_rate)
307 			continue;
308 
309 		err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
310 		if (err)
311 			NL_SET_ERR_MSG_MOD(extack,
312 					   "E-Switch vport implicit rate limit setting failed");
313 	}
314 
315 	return err;
316 }
317 
esw_qos_vport_create_sched_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share)318 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
319 					      struct mlx5_vport *vport,
320 					      u32 max_rate, u32 bw_share)
321 {
322 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
323 	struct mlx5_esw_rate_group *group = vport->qos.group;
324 	struct mlx5_core_dev *dev = esw->dev;
325 	u32 parent_tsar_ix;
326 	void *vport_elem;
327 	int err;
328 
329 	parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
330 	MLX5_SET(scheduling_context, sched_ctx, element_type,
331 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
332 	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
333 	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
334 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
335 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
336 	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
337 
338 	err = mlx5_create_scheduling_element_cmd(dev,
339 						 SCHEDULING_HIERARCHY_E_SWITCH,
340 						 sched_ctx,
341 						 &vport->qos.esw_tsar_ix);
342 	if (err) {
343 		esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
344 			 vport->vport, err);
345 		return err;
346 	}
347 
348 	return 0;
349 }
350 
esw_qos_update_group_scheduling_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * curr_group,struct mlx5_esw_rate_group * new_group,struct netlink_ext_ack * extack)351 static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
352 						   struct mlx5_vport *vport,
353 						   struct mlx5_esw_rate_group *curr_group,
354 						   struct mlx5_esw_rate_group *new_group,
355 						   struct netlink_ext_ack *extack)
356 {
357 	u32 max_rate;
358 	int err;
359 
360 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
361 						  SCHEDULING_HIERARCHY_E_SWITCH,
362 						  vport->qos.esw_tsar_ix);
363 	if (err) {
364 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
365 		return err;
366 	}
367 
368 	vport->qos.group = new_group;
369 	max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
370 
371 	/* If vport is unlimited, we set the group's value.
372 	 * Therefore, if the group is limited it will apply to
373 	 * the vport as well and if not, vport will remain unlimited.
374 	 */
375 	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
376 	if (err) {
377 		NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
378 		goto err_sched;
379 	}
380 
381 	return 0;
382 
383 err_sched:
384 	vport->qos.group = curr_group;
385 	max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
386 	if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
387 		esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
388 			 vport->vport);
389 
390 	return err;
391 }
392 
esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)393 static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
394 				      struct mlx5_vport *vport,
395 				      struct mlx5_esw_rate_group *group,
396 				      struct netlink_ext_ack *extack)
397 {
398 	struct mlx5_esw_rate_group *new_group, *curr_group;
399 	int err;
400 
401 	if (!vport->enabled)
402 		return -EINVAL;
403 
404 	curr_group = vport->qos.group;
405 	new_group = group ?: esw->qos.group0;
406 	if (curr_group == new_group)
407 		return 0;
408 
409 	err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
410 	if (err)
411 		return err;
412 
413 	/* Recalculate bw share weights of old and new groups */
414 	if (vport->qos.bw_share || new_group->bw_share) {
415 		esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
416 		esw_qos_normalize_vports_min_rate(esw, new_group, extack);
417 	}
418 
419 	return 0;
420 }
421 
422 static struct mlx5_esw_rate_group *
esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)423 esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
424 {
425 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
426 	struct mlx5_esw_rate_group *group;
427 	u32 divider;
428 	int err;
429 
430 	if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
431 		return ERR_PTR(-EOPNOTSUPP);
432 
433 	group = kzalloc(sizeof(*group), GFP_KERNEL);
434 	if (!group)
435 		return ERR_PTR(-ENOMEM);
436 
437 	MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
438 		 esw->qos.root_tsar_ix);
439 	err = mlx5_create_scheduling_element_cmd(esw->dev,
440 						 SCHEDULING_HIERARCHY_E_SWITCH,
441 						 tsar_ctx,
442 						 &group->tsar_ix);
443 	if (err) {
444 		NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
445 		goto err_sched_elem;
446 	}
447 
448 	list_add_tail(&group->list, &esw->qos.groups);
449 
450 	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
451 	if (divider) {
452 		err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
453 		if (err) {
454 			NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
455 			goto err_min_rate;
456 		}
457 	}
458 	trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
459 
460 	return group;
461 
462 err_min_rate:
463 	list_del(&group->list);
464 	if (mlx5_destroy_scheduling_element_cmd(esw->dev,
465 						SCHEDULING_HIERARCHY_E_SWITCH,
466 						group->tsar_ix))
467 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
468 err_sched_elem:
469 	kfree(group);
470 	return ERR_PTR(err);
471 }
472 
esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)473 static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
474 				      struct mlx5_esw_rate_group *group,
475 				      struct netlink_ext_ack *extack)
476 {
477 	u32 divider;
478 	int err;
479 
480 	list_del(&group->list);
481 
482 	divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
483 	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
484 	if (err)
485 		NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
486 
487 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
488 						  SCHEDULING_HIERARCHY_E_SWITCH,
489 						  group->tsar_ix);
490 	if (err)
491 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
492 
493 	trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
494 	kfree(group);
495 	return err;
496 }
497 
esw_qos_element_type_supported(struct mlx5_core_dev * dev,int type)498 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
499 {
500 	switch (type) {
501 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
502 		return MLX5_CAP_QOS(dev, esw_element_type) &
503 		       ELEMENT_TYPE_CAP_MASK_TASR;
504 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
505 		return MLX5_CAP_QOS(dev, esw_element_type) &
506 		       ELEMENT_TYPE_CAP_MASK_VPORT;
507 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
508 		return MLX5_CAP_QOS(dev, esw_element_type) &
509 		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
510 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
511 		return MLX5_CAP_QOS(dev, esw_element_type) &
512 		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
513 	}
514 	return false;
515 }
516 
mlx5_esw_qos_create(struct mlx5_eswitch * esw)517 void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
518 {
519 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
520 	struct mlx5_core_dev *dev = esw->dev;
521 	__be32 *attr;
522 	int err;
523 
524 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
525 		return;
526 
527 	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
528 		return;
529 
530 	mutex_lock(&esw->state_lock);
531 	if (esw->qos.enabled)
532 		goto unlock;
533 
534 	MLX5_SET(scheduling_context, tsar_ctx, element_type,
535 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
536 
537 	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
538 	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
539 
540 	err = mlx5_create_scheduling_element_cmd(dev,
541 						 SCHEDULING_HIERARCHY_E_SWITCH,
542 						 tsar_ctx,
543 						 &esw->qos.root_tsar_ix);
544 	if (err) {
545 		esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
546 		goto unlock;
547 	}
548 
549 	INIT_LIST_HEAD(&esw->qos.groups);
550 	if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
551 		esw->qos.group0 = esw_qos_create_rate_group(esw, NULL);
552 		if (IS_ERR(esw->qos.group0)) {
553 			esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
554 				 PTR_ERR(esw->qos.group0));
555 			goto err_group0;
556 		}
557 	}
558 	esw->qos.enabled = true;
559 unlock:
560 	mutex_unlock(&esw->state_lock);
561 	return;
562 
563 err_group0:
564 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
565 						  SCHEDULING_HIERARCHY_E_SWITCH,
566 						  esw->qos.root_tsar_ix);
567 	if (err)
568 		esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
569 	mutex_unlock(&esw->state_lock);
570 }
571 
mlx5_esw_qos_destroy(struct mlx5_eswitch * esw)572 void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
573 {
574 	struct devlink *devlink = priv_to_devlink(esw->dev);
575 	int err;
576 
577 	devlink_rate_nodes_destroy(devlink);
578 	mutex_lock(&esw->state_lock);
579 	if (!esw->qos.enabled)
580 		goto unlock;
581 
582 	if (esw->qos.group0)
583 		esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
584 
585 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
586 						  SCHEDULING_HIERARCHY_E_SWITCH,
587 						  esw->qos.root_tsar_ix);
588 	if (err)
589 		esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
590 
591 	esw->qos.enabled = false;
592 unlock:
593 	mutex_unlock(&esw->state_lock);
594 }
595 
mlx5_esw_qos_vport_enable(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share)596 int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
597 			      u32 max_rate, u32 bw_share)
598 {
599 	int err;
600 
601 	lockdep_assert_held(&esw->state_lock);
602 	if (!esw->qos.enabled)
603 		return 0;
604 
605 	if (vport->qos.enabled)
606 		return -EEXIST;
607 
608 	vport->qos.group = esw->qos.group0;
609 
610 	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
611 	if (!err) {
612 		vport->qos.enabled = true;
613 		trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
614 	}
615 
616 	return err;
617 }
618 
mlx5_esw_qos_vport_disable(struct mlx5_eswitch * esw,struct mlx5_vport * vport)619 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
620 {
621 	int err;
622 
623 	lockdep_assert_held(&esw->state_lock);
624 	if (!esw->qos.enabled || !vport->qos.enabled)
625 		return;
626 	WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
627 	     "Disabling QoS on port before detaching it from group");
628 
629 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
630 						  SCHEDULING_HIERARCHY_E_SWITCH,
631 						  vport->qos.esw_tsar_ix);
632 	if (err)
633 		esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
634 			 vport->vport, err);
635 
636 	vport->qos.enabled = false;
637 	trace_mlx5_esw_vport_qos_destroy(vport);
638 }
639 
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)640 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
641 {
642 	u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
643 	struct mlx5_vport *vport;
644 	u32 bitmask;
645 
646 	vport = mlx5_eswitch_get_vport(esw, vport_num);
647 	if (IS_ERR(vport))
648 		return PTR_ERR(vport);
649 
650 	if (!vport->qos.enabled)
651 		return -EOPNOTSUPP;
652 
653 	MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
654 	bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
655 
656 	return mlx5_modify_scheduling_element_cmd(esw->dev,
657 						  SCHEDULING_HIERARCHY_E_SWITCH,
658 						  ctx,
659 						  vport->qos.esw_tsar_ix,
660 						  bitmask);
661 }
662 
663 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
664 
665 /* Converts bytes per second value passed in a pointer into megabits per
666  * second, rewriting last. If converted rate exceed link speed or is not a
667  * fraction of Mbps - returns error.
668  */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)669 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
670 					u64 *rate, struct netlink_ext_ack *extack)
671 {
672 	u32 link_speed_max, reminder;
673 	u64 value;
674 	int err;
675 
676 	err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
677 	if (err) {
678 		NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
679 		return err;
680 	}
681 
682 	value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
683 	if (reminder) {
684 		pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
685 		       name, *rate);
686 		NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
687 		return -EINVAL;
688 	}
689 
690 	if (value > link_speed_max) {
691 		pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
692 		       name, value, link_speed_max);
693 		NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
694 		return -EINVAL;
695 	}
696 
697 	*rate = value;
698 	return 0;
699 }
700 
701 /* Eswitch devlink rate API */
702 
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)703 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
704 					    u64 tx_share, struct netlink_ext_ack *extack)
705 {
706 	struct mlx5_vport *vport = priv;
707 	struct mlx5_eswitch *esw;
708 	int err;
709 
710 	esw = vport->dev->priv.eswitch;
711 	if (!mlx5_esw_allowed(esw))
712 		return -EPERM;
713 
714 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
715 	if (err)
716 		return err;
717 
718 	mutex_lock(&esw->state_lock);
719 	err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
720 	mutex_unlock(&esw->state_lock);
721 	return err;
722 }
723 
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)724 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
725 					  u64 tx_max, struct netlink_ext_ack *extack)
726 {
727 	struct mlx5_vport *vport = priv;
728 	struct mlx5_eswitch *esw;
729 	int err;
730 
731 	esw = vport->dev->priv.eswitch;
732 	if (!mlx5_esw_allowed(esw))
733 		return -EPERM;
734 
735 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
736 	if (err)
737 		return err;
738 
739 	mutex_lock(&esw->state_lock);
740 	err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
741 	mutex_unlock(&esw->state_lock);
742 	return err;
743 }
744 
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)745 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
746 					    u64 tx_share, struct netlink_ext_ack *extack)
747 {
748 	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
749 	struct mlx5_eswitch *esw = dev->priv.eswitch;
750 	struct mlx5_esw_rate_group *group = priv;
751 	int err;
752 
753 	err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
754 	if (err)
755 		return err;
756 
757 	mutex_lock(&esw->state_lock);
758 	err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
759 	mutex_unlock(&esw->state_lock);
760 	return err;
761 }
762 
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)763 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
764 					  u64 tx_max, struct netlink_ext_ack *extack)
765 {
766 	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
767 	struct mlx5_eswitch *esw = dev->priv.eswitch;
768 	struct mlx5_esw_rate_group *group = priv;
769 	int err;
770 
771 	err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
772 	if (err)
773 		return err;
774 
775 	mutex_lock(&esw->state_lock);
776 	err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
777 	mutex_unlock(&esw->state_lock);
778 	return err;
779 }
780 
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)781 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
782 				   struct netlink_ext_ack *extack)
783 {
784 	struct mlx5_esw_rate_group *group;
785 	struct mlx5_eswitch *esw;
786 	int err = 0;
787 
788 	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
789 	if (IS_ERR(esw))
790 		return PTR_ERR(esw);
791 
792 	mutex_lock(&esw->state_lock);
793 	if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
794 		NL_SET_ERR_MSG_MOD(extack,
795 				   "Rate node creation supported only in switchdev mode");
796 		err = -EOPNOTSUPP;
797 		goto unlock;
798 	}
799 
800 	group = esw_qos_create_rate_group(esw, extack);
801 	if (IS_ERR(group)) {
802 		err = PTR_ERR(group);
803 		goto unlock;
804 	}
805 
806 	*priv = group;
807 unlock:
808 	mutex_unlock(&esw->state_lock);
809 	return err;
810 }
811 
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)812 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
813 				   struct netlink_ext_ack *extack)
814 {
815 	struct mlx5_esw_rate_group *group = priv;
816 	struct mlx5_eswitch *esw;
817 	int err;
818 
819 	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
820 	if (IS_ERR(esw))
821 		return PTR_ERR(esw);
822 
823 	mutex_lock(&esw->state_lock);
824 	err = esw_qos_destroy_rate_group(esw, group, extack);
825 	mutex_unlock(&esw->state_lock);
826 	return err;
827 }
828 
mlx5_esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)829 int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
830 				    struct mlx5_vport *vport,
831 				    struct mlx5_esw_rate_group *group,
832 				    struct netlink_ext_ack *extack)
833 {
834 	int err;
835 
836 	mutex_lock(&esw->state_lock);
837 	err = esw_qos_vport_update_group(esw, vport, group, extack);
838 	mutex_unlock(&esw->state_lock);
839 	return err;
840 }
841 
mlx5_esw_devlink_rate_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)842 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
843 				     struct devlink_rate *parent,
844 				     void *priv, void *parent_priv,
845 				     struct netlink_ext_ack *extack)
846 {
847 	struct mlx5_esw_rate_group *group;
848 	struct mlx5_vport *vport = priv;
849 
850 	if (!parent)
851 		return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
852 						       vport, NULL, extack);
853 
854 	group = parent_priv;
855 	return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
856 }
857