1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include "eswitch.h"
5 #include "esw/qos.h"
6 #include "en/port.h"
7 #define CREATE_TRACE_POINTS
8 #include "diag/qos_tracepoint.h"
9
10 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
11 #define MLX5_MIN_BW_SHARE 1
12
13 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
14 min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
15
16 struct mlx5_esw_rate_group {
17 u32 tsar_ix;
18 u32 max_rate;
19 u32 min_rate;
20 u32 bw_share;
21 struct list_head list;
22 };
23
esw_qos_tsar_config(struct mlx5_core_dev * dev,u32 * sched_ctx,u32 tsar_ix,u32 max_rate,u32 bw_share)24 static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
25 u32 tsar_ix, u32 max_rate, u32 bw_share)
26 {
27 u32 bitmask = 0;
28
29 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
30 return -EOPNOTSUPP;
31
32 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
33 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
34 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
35 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
36
37 return mlx5_modify_scheduling_element_cmd(dev,
38 SCHEDULING_HIERARCHY_E_SWITCH,
39 sched_ctx,
40 tsar_ix,
41 bitmask);
42 }
43
esw_qos_group_config(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)44 static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
45 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
46 {
47 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
48 struct mlx5_core_dev *dev = esw->dev;
49 int err;
50
51 err = esw_qos_tsar_config(dev, sched_ctx,
52 group->tsar_ix,
53 max_rate, bw_share);
54 if (err)
55 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
56
57 trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
58
59 return err;
60 }
61
esw_qos_vport_config(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)62 static int esw_qos_vport_config(struct mlx5_eswitch *esw,
63 struct mlx5_vport *vport,
64 u32 max_rate, u32 bw_share,
65 struct netlink_ext_ack *extack)
66 {
67 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
68 struct mlx5_core_dev *dev = esw->dev;
69 int err;
70
71 if (!vport->qos.enabled)
72 return -EIO;
73
74 err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix,
75 max_rate, bw_share);
76 if (err) {
77 esw_warn(esw->dev,
78 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
79 vport->vport, err);
80 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
81 return err;
82 }
83
84 trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
85
86 return 0;
87 }
88
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,bool group_level)89 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
90 struct mlx5_esw_rate_group *group,
91 bool group_level)
92 {
93 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
94 struct mlx5_vport *evport;
95 u32 max_guarantee = 0;
96 unsigned long i;
97
98 if (group_level) {
99 struct mlx5_esw_rate_group *group;
100
101 list_for_each_entry(group, &esw->qos.groups, list) {
102 if (group->min_rate < max_guarantee)
103 continue;
104 max_guarantee = group->min_rate;
105 }
106 } else {
107 mlx5_esw_for_each_vport(esw, i, evport) {
108 if (!evport->enabled || !evport->qos.enabled ||
109 evport->qos.group != group || evport->qos.min_rate < max_guarantee)
110 continue;
111 max_guarantee = evport->qos.min_rate;
112 }
113 }
114
115 if (max_guarantee)
116 return max_t(u32, max_guarantee / fw_max_bw_share, 1);
117
118 /* If vports min rate divider is 0 but their group has bw_share configured, then
119 * need to set bw_share for vports to minimal value.
120 */
121 if (!group_level && !max_guarantee && group && group->bw_share)
122 return 1;
123 return 0;
124 }
125
esw_qos_calc_bw_share(u32 min_rate,u32 divider,u32 fw_max)126 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
127 {
128 if (divider)
129 return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
130
131 return 0;
132 }
133
esw_qos_normalize_vports_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)134 static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
135 struct mlx5_esw_rate_group *group,
136 struct netlink_ext_ack *extack)
137 {
138 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
139 u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
140 struct mlx5_vport *evport;
141 unsigned long i;
142 u32 bw_share;
143 int err;
144
145 mlx5_esw_for_each_vport(esw, i, evport) {
146 if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
147 continue;
148 bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
149
150 if (bw_share == evport->qos.bw_share)
151 continue;
152
153 err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
154 if (err)
155 return err;
156
157 evport->qos.bw_share = bw_share;
158 }
159
160 return 0;
161 }
162
esw_qos_normalize_groups_min_rate(struct mlx5_eswitch * esw,u32 divider,struct netlink_ext_ack * extack)163 static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
164 struct netlink_ext_ack *extack)
165 {
166 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
167 struct mlx5_esw_rate_group *group;
168 u32 bw_share;
169 int err;
170
171 list_for_each_entry(group, &esw->qos.groups, list) {
172 bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
173
174 if (bw_share == group->bw_share)
175 continue;
176
177 err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
178 if (err)
179 return err;
180
181 group->bw_share = bw_share;
182
183 /* All the group's vports need to be set with default bw_share
184 * to enable them with QOS
185 */
186 err = esw_qos_normalize_vports_min_rate(esw, group, extack);
187
188 if (err)
189 return err;
190 }
191
192 return 0;
193 }
194
mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 min_rate,struct netlink_ext_ack * extack)195 int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
196 struct mlx5_vport *evport,
197 u32 min_rate,
198 struct netlink_ext_ack *extack)
199 {
200 u32 fw_max_bw_share, previous_min_rate;
201 bool min_rate_supported;
202 int err;
203
204 lockdep_assert_held(&esw->state_lock);
205 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
206 min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
207 fw_max_bw_share >= MLX5_MIN_BW_SHARE;
208 if (min_rate && !min_rate_supported)
209 return -EOPNOTSUPP;
210 if (min_rate == evport->qos.min_rate)
211 return 0;
212
213 previous_min_rate = evport->qos.min_rate;
214 evport->qos.min_rate = min_rate;
215 err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
216 if (err)
217 evport->qos.min_rate = previous_min_rate;
218
219 return err;
220 }
221
mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 max_rate,struct netlink_ext_ack * extack)222 int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
223 struct mlx5_vport *evport,
224 u32 max_rate,
225 struct netlink_ext_ack *extack)
226 {
227 u32 act_max_rate = max_rate;
228 bool max_rate_supported;
229 int err;
230
231 lockdep_assert_held(&esw->state_lock);
232 max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
233
234 if (max_rate && !max_rate_supported)
235 return -EOPNOTSUPP;
236 if (max_rate == evport->qos.max_rate)
237 return 0;
238
239 /* If parent group has rate limit need to set to group
240 * value when new max rate is 0.
241 */
242 if (evport->qos.group && !max_rate)
243 act_max_rate = evport->qos.group->max_rate;
244
245 err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
246
247 if (!err)
248 evport->qos.max_rate = max_rate;
249
250 return err;
251 }
252
esw_qos_set_group_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 min_rate,struct netlink_ext_ack * extack)253 static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
254 u32 min_rate, struct netlink_ext_ack *extack)
255 {
256 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
257 struct mlx5_core_dev *dev = esw->dev;
258 u32 previous_min_rate, divider;
259 int err;
260
261 if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
262 return -EOPNOTSUPP;
263
264 if (min_rate == group->min_rate)
265 return 0;
266
267 previous_min_rate = group->min_rate;
268 group->min_rate = min_rate;
269 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
270 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
271 if (err) {
272 group->min_rate = previous_min_rate;
273 NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
274
275 /* Attempt restoring previous configuration */
276 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
277 if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
278 NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
279 }
280
281 return err;
282 }
283
esw_qos_set_group_max_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,struct netlink_ext_ack * extack)284 static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
285 struct mlx5_esw_rate_group *group,
286 u32 max_rate, struct netlink_ext_ack *extack)
287 {
288 struct mlx5_vport *vport;
289 unsigned long i;
290 int err;
291
292 if (group->max_rate == max_rate)
293 return 0;
294
295 err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
296 if (err)
297 return err;
298
299 group->max_rate = max_rate;
300
301 /* Any unlimited vports in the group should be set
302 * with the value of the group.
303 */
304 mlx5_esw_for_each_vport(esw, i, vport) {
305 if (!vport->enabled || !vport->qos.enabled ||
306 vport->qos.group != group || vport->qos.max_rate)
307 continue;
308
309 err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
310 if (err)
311 NL_SET_ERR_MSG_MOD(extack,
312 "E-Switch vport implicit rate limit setting failed");
313 }
314
315 return err;
316 }
317
esw_qos_vport_create_sched_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share)318 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
319 struct mlx5_vport *vport,
320 u32 max_rate, u32 bw_share)
321 {
322 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
323 struct mlx5_esw_rate_group *group = vport->qos.group;
324 struct mlx5_core_dev *dev = esw->dev;
325 u32 parent_tsar_ix;
326 void *vport_elem;
327 int err;
328
329 parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
330 MLX5_SET(scheduling_context, sched_ctx, element_type,
331 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
332 vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
333 MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
334 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
335 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
336 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
337
338 err = mlx5_create_scheduling_element_cmd(dev,
339 SCHEDULING_HIERARCHY_E_SWITCH,
340 sched_ctx,
341 &vport->qos.esw_tsar_ix);
342 if (err) {
343 esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
344 vport->vport, err);
345 return err;
346 }
347
348 return 0;
349 }
350
esw_qos_update_group_scheduling_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * curr_group,struct mlx5_esw_rate_group * new_group,struct netlink_ext_ack * extack)351 static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
352 struct mlx5_vport *vport,
353 struct mlx5_esw_rate_group *curr_group,
354 struct mlx5_esw_rate_group *new_group,
355 struct netlink_ext_ack *extack)
356 {
357 u32 max_rate;
358 int err;
359
360 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
361 SCHEDULING_HIERARCHY_E_SWITCH,
362 vport->qos.esw_tsar_ix);
363 if (err) {
364 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
365 return err;
366 }
367
368 vport->qos.group = new_group;
369 max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
370
371 /* If vport is unlimited, we set the group's value.
372 * Therefore, if the group is limited it will apply to
373 * the vport as well and if not, vport will remain unlimited.
374 */
375 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
376 if (err) {
377 NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
378 goto err_sched;
379 }
380
381 return 0;
382
383 err_sched:
384 vport->qos.group = curr_group;
385 max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
386 if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
387 esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
388 vport->vport);
389
390 return err;
391 }
392
esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)393 static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
394 struct mlx5_vport *vport,
395 struct mlx5_esw_rate_group *group,
396 struct netlink_ext_ack *extack)
397 {
398 struct mlx5_esw_rate_group *new_group, *curr_group;
399 int err;
400
401 if (!vport->enabled)
402 return -EINVAL;
403
404 curr_group = vport->qos.group;
405 new_group = group ?: esw->qos.group0;
406 if (curr_group == new_group)
407 return 0;
408
409 err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
410 if (err)
411 return err;
412
413 /* Recalculate bw share weights of old and new groups */
414 if (vport->qos.bw_share || new_group->bw_share) {
415 esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
416 esw_qos_normalize_vports_min_rate(esw, new_group, extack);
417 }
418
419 return 0;
420 }
421
422 static struct mlx5_esw_rate_group *
esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)423 esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
424 {
425 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
426 struct mlx5_esw_rate_group *group;
427 u32 divider;
428 int err;
429
430 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
431 return ERR_PTR(-EOPNOTSUPP);
432
433 group = kzalloc(sizeof(*group), GFP_KERNEL);
434 if (!group)
435 return ERR_PTR(-ENOMEM);
436
437 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
438 esw->qos.root_tsar_ix);
439 err = mlx5_create_scheduling_element_cmd(esw->dev,
440 SCHEDULING_HIERARCHY_E_SWITCH,
441 tsar_ctx,
442 &group->tsar_ix);
443 if (err) {
444 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
445 goto err_sched_elem;
446 }
447
448 list_add_tail(&group->list, &esw->qos.groups);
449
450 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
451 if (divider) {
452 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
453 if (err) {
454 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
455 goto err_min_rate;
456 }
457 }
458 trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
459
460 return group;
461
462 err_min_rate:
463 list_del(&group->list);
464 if (mlx5_destroy_scheduling_element_cmd(esw->dev,
465 SCHEDULING_HIERARCHY_E_SWITCH,
466 group->tsar_ix))
467 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
468 err_sched_elem:
469 kfree(group);
470 return ERR_PTR(err);
471 }
472
esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)473 static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
474 struct mlx5_esw_rate_group *group,
475 struct netlink_ext_ack *extack)
476 {
477 u32 divider;
478 int err;
479
480 list_del(&group->list);
481
482 divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
483 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
484 if (err)
485 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
486
487 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
488 SCHEDULING_HIERARCHY_E_SWITCH,
489 group->tsar_ix);
490 if (err)
491 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
492
493 trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
494 kfree(group);
495 return err;
496 }
497
esw_qos_element_type_supported(struct mlx5_core_dev * dev,int type)498 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
499 {
500 switch (type) {
501 case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
502 return MLX5_CAP_QOS(dev, esw_element_type) &
503 ELEMENT_TYPE_CAP_MASK_TASR;
504 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
505 return MLX5_CAP_QOS(dev, esw_element_type) &
506 ELEMENT_TYPE_CAP_MASK_VPORT;
507 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
508 return MLX5_CAP_QOS(dev, esw_element_type) &
509 ELEMENT_TYPE_CAP_MASK_VPORT_TC;
510 case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
511 return MLX5_CAP_QOS(dev, esw_element_type) &
512 ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
513 }
514 return false;
515 }
516
mlx5_esw_qos_create(struct mlx5_eswitch * esw)517 void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
518 {
519 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
520 struct mlx5_core_dev *dev = esw->dev;
521 __be32 *attr;
522 int err;
523
524 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
525 return;
526
527 if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
528 return;
529
530 mutex_lock(&esw->state_lock);
531 if (esw->qos.enabled)
532 goto unlock;
533
534 MLX5_SET(scheduling_context, tsar_ctx, element_type,
535 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
536
537 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
538 *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
539
540 err = mlx5_create_scheduling_element_cmd(dev,
541 SCHEDULING_HIERARCHY_E_SWITCH,
542 tsar_ctx,
543 &esw->qos.root_tsar_ix);
544 if (err) {
545 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
546 goto unlock;
547 }
548
549 INIT_LIST_HEAD(&esw->qos.groups);
550 if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
551 esw->qos.group0 = esw_qos_create_rate_group(esw, NULL);
552 if (IS_ERR(esw->qos.group0)) {
553 esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
554 PTR_ERR(esw->qos.group0));
555 goto err_group0;
556 }
557 }
558 esw->qos.enabled = true;
559 unlock:
560 mutex_unlock(&esw->state_lock);
561 return;
562
563 err_group0:
564 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
565 SCHEDULING_HIERARCHY_E_SWITCH,
566 esw->qos.root_tsar_ix);
567 if (err)
568 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
569 mutex_unlock(&esw->state_lock);
570 }
571
mlx5_esw_qos_destroy(struct mlx5_eswitch * esw)572 void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
573 {
574 struct devlink *devlink = priv_to_devlink(esw->dev);
575 int err;
576
577 devlink_rate_nodes_destroy(devlink);
578 mutex_lock(&esw->state_lock);
579 if (!esw->qos.enabled)
580 goto unlock;
581
582 if (esw->qos.group0)
583 esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
584
585 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
586 SCHEDULING_HIERARCHY_E_SWITCH,
587 esw->qos.root_tsar_ix);
588 if (err)
589 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
590
591 esw->qos.enabled = false;
592 unlock:
593 mutex_unlock(&esw->state_lock);
594 }
595
mlx5_esw_qos_vport_enable(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share)596 int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
597 u32 max_rate, u32 bw_share)
598 {
599 int err;
600
601 lockdep_assert_held(&esw->state_lock);
602 if (!esw->qos.enabled)
603 return 0;
604
605 if (vport->qos.enabled)
606 return -EEXIST;
607
608 vport->qos.group = esw->qos.group0;
609
610 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
611 if (!err) {
612 vport->qos.enabled = true;
613 trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
614 }
615
616 return err;
617 }
618
mlx5_esw_qos_vport_disable(struct mlx5_eswitch * esw,struct mlx5_vport * vport)619 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
620 {
621 int err;
622
623 lockdep_assert_held(&esw->state_lock);
624 if (!esw->qos.enabled || !vport->qos.enabled)
625 return;
626 WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
627 "Disabling QoS on port before detaching it from group");
628
629 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
630 SCHEDULING_HIERARCHY_E_SWITCH,
631 vport->qos.esw_tsar_ix);
632 if (err)
633 esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
634 vport->vport, err);
635
636 vport->qos.enabled = false;
637 trace_mlx5_esw_vport_qos_destroy(vport);
638 }
639
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)640 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
641 {
642 u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
643 struct mlx5_vport *vport;
644 u32 bitmask;
645
646 vport = mlx5_eswitch_get_vport(esw, vport_num);
647 if (IS_ERR(vport))
648 return PTR_ERR(vport);
649
650 if (!vport->qos.enabled)
651 return -EOPNOTSUPP;
652
653 MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
654 bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
655
656 return mlx5_modify_scheduling_element_cmd(esw->dev,
657 SCHEDULING_HIERARCHY_E_SWITCH,
658 ctx,
659 vport->qos.esw_tsar_ix,
660 bitmask);
661 }
662
663 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
664
665 /* Converts bytes per second value passed in a pointer into megabits per
666 * second, rewriting last. If converted rate exceed link speed or is not a
667 * fraction of Mbps - returns error.
668 */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)669 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
670 u64 *rate, struct netlink_ext_ack *extack)
671 {
672 u32 link_speed_max, reminder;
673 u64 value;
674 int err;
675
676 err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
677 if (err) {
678 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
679 return err;
680 }
681
682 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
683 if (reminder) {
684 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
685 name, *rate);
686 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
687 return -EINVAL;
688 }
689
690 if (value > link_speed_max) {
691 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
692 name, value, link_speed_max);
693 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
694 return -EINVAL;
695 }
696
697 *rate = value;
698 return 0;
699 }
700
701 /* Eswitch devlink rate API */
702
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)703 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
704 u64 tx_share, struct netlink_ext_ack *extack)
705 {
706 struct mlx5_vport *vport = priv;
707 struct mlx5_eswitch *esw;
708 int err;
709
710 esw = vport->dev->priv.eswitch;
711 if (!mlx5_esw_allowed(esw))
712 return -EPERM;
713
714 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
715 if (err)
716 return err;
717
718 mutex_lock(&esw->state_lock);
719 err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
720 mutex_unlock(&esw->state_lock);
721 return err;
722 }
723
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)724 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
725 u64 tx_max, struct netlink_ext_ack *extack)
726 {
727 struct mlx5_vport *vport = priv;
728 struct mlx5_eswitch *esw;
729 int err;
730
731 esw = vport->dev->priv.eswitch;
732 if (!mlx5_esw_allowed(esw))
733 return -EPERM;
734
735 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
736 if (err)
737 return err;
738
739 mutex_lock(&esw->state_lock);
740 err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
741 mutex_unlock(&esw->state_lock);
742 return err;
743 }
744
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)745 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
746 u64 tx_share, struct netlink_ext_ack *extack)
747 {
748 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
749 struct mlx5_eswitch *esw = dev->priv.eswitch;
750 struct mlx5_esw_rate_group *group = priv;
751 int err;
752
753 err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
754 if (err)
755 return err;
756
757 mutex_lock(&esw->state_lock);
758 err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
759 mutex_unlock(&esw->state_lock);
760 return err;
761 }
762
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)763 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
764 u64 tx_max, struct netlink_ext_ack *extack)
765 {
766 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
767 struct mlx5_eswitch *esw = dev->priv.eswitch;
768 struct mlx5_esw_rate_group *group = priv;
769 int err;
770
771 err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
772 if (err)
773 return err;
774
775 mutex_lock(&esw->state_lock);
776 err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
777 mutex_unlock(&esw->state_lock);
778 return err;
779 }
780
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)781 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
782 struct netlink_ext_ack *extack)
783 {
784 struct mlx5_esw_rate_group *group;
785 struct mlx5_eswitch *esw;
786 int err = 0;
787
788 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
789 if (IS_ERR(esw))
790 return PTR_ERR(esw);
791
792 mutex_lock(&esw->state_lock);
793 if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
794 NL_SET_ERR_MSG_MOD(extack,
795 "Rate node creation supported only in switchdev mode");
796 err = -EOPNOTSUPP;
797 goto unlock;
798 }
799
800 group = esw_qos_create_rate_group(esw, extack);
801 if (IS_ERR(group)) {
802 err = PTR_ERR(group);
803 goto unlock;
804 }
805
806 *priv = group;
807 unlock:
808 mutex_unlock(&esw->state_lock);
809 return err;
810 }
811
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)812 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
813 struct netlink_ext_ack *extack)
814 {
815 struct mlx5_esw_rate_group *group = priv;
816 struct mlx5_eswitch *esw;
817 int err;
818
819 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
820 if (IS_ERR(esw))
821 return PTR_ERR(esw);
822
823 mutex_lock(&esw->state_lock);
824 err = esw_qos_destroy_rate_group(esw, group, extack);
825 mutex_unlock(&esw->state_lock);
826 return err;
827 }
828
mlx5_esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)829 int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
830 struct mlx5_vport *vport,
831 struct mlx5_esw_rate_group *group,
832 struct netlink_ext_ack *extack)
833 {
834 int err;
835
836 mutex_lock(&esw->state_lock);
837 err = esw_qos_vport_update_group(esw, vport, group, extack);
838 mutex_unlock(&esw->state_lock);
839 return err;
840 }
841
mlx5_esw_devlink_rate_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)842 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
843 struct devlink_rate *parent,
844 void *priv, void *parent_priv,
845 struct netlink_ext_ack *extack)
846 {
847 struct mlx5_esw_rate_group *group;
848 struct mlx5_vport *vport = priv;
849
850 if (!parent)
851 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
852 vport, NULL, extack);
853
854 group = parent_priv;
855 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
856 }
857