1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <linux/etherdevice.h>
5 #include <linux/idr.h>
6 #include <linux/mlx5/driver.h>
7 #include <linux/mlx5/mlx5_ifc.h>
8 #include <linux/mlx5/vport.h>
9 #include <linux/mlx5/fs.h>
10 #include "mlx5_core.h"
11 #include "eswitch.h"
12 #include "en.h"
13 #include "en_tc.h"
14 #include "fs_core.h"
15 #include "esw/indir_table.h"
16 #include "lib/fs_chains.h"
17 #include "en/mod_hdr.h"
18
19 #define MLX5_ESW_INDIR_TABLE_SIZE 128
20 #define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2)
21 #define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1)
22
23 struct mlx5_esw_indir_table_rule {
24 struct list_head list;
25 struct mlx5_flow_handle *handle;
26 union {
27 __be32 v4;
28 struct in6_addr v6;
29 } dst_ip;
30 u32 vni;
31 struct mlx5_modify_hdr *mh;
32 refcount_t refcnt;
33 };
34
35 struct mlx5_esw_indir_table_entry {
36 struct hlist_node hlist;
37 struct mlx5_flow_table *ft;
38 struct mlx5_flow_group *recirc_grp;
39 struct mlx5_flow_group *fwd_grp;
40 struct mlx5_flow_handle *fwd_rule;
41 struct list_head recirc_rules;
42 int recirc_cnt;
43 int fwd_ref;
44
45 u16 vport;
46 u8 ip_version;
47 };
48
49 struct mlx5_esw_indir_table {
50 struct mutex lock; /* protects table */
51 DECLARE_HASHTABLE(table, 8);
52 };
53
54 struct mlx5_esw_indir_table *
mlx5_esw_indir_table_init(void)55 mlx5_esw_indir_table_init(void)
56 {
57 struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL);
58
59 if (!indir)
60 return ERR_PTR(-ENOMEM);
61
62 mutex_init(&indir->lock);
63 hash_init(indir->table);
64 return indir;
65 }
66
67 void
mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table * indir)68 mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
69 {
70 mutex_destroy(&indir->lock);
71 kvfree(indir);
72 }
73
74 bool
mlx5_esw_indir_table_needed(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,u16 vport_num,struct mlx5_core_dev * dest_mdev)75 mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
76 struct mlx5_flow_attr *attr,
77 u16 vport_num,
78 struct mlx5_core_dev *dest_mdev)
79 {
80 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
81
82 /* Use indirect table for all IP traffic from UL to VF with vport
83 * destination when source rewrite flag is set.
84 */
85 return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK &&
86 mlx5_eswitch_is_vf_vport(esw, vport_num) &&
87 esw->dev == dest_mdev &&
88 attr->ip_version &&
89 attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
90 }
91
92 u16
mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr * attr)93 mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr)
94 {
95 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
96
97 return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0;
98 }
99
100 static struct mlx5_esw_indir_table_rule *
mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry * e,struct mlx5_esw_flow_attr * attr)101 mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e,
102 struct mlx5_esw_flow_attr *attr)
103 {
104 struct mlx5_esw_indir_table_rule *rule;
105
106 list_for_each_entry(rule, &e->recirc_rules, list)
107 if (rule->vni == attr->rx_tun_attr->vni &&
108 !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip,
109 sizeof(attr->rx_tun_attr->dst_ip)))
110 goto found;
111 return NULL;
112
113 found:
114 refcount_inc(&rule->refcnt);
115 return rule;
116 }
117
mlx5_esw_indir_table_rule_get(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5_flow_spec * spec,struct mlx5_esw_indir_table_entry * e)118 static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw,
119 struct mlx5_flow_attr *attr,
120 struct mlx5_flow_spec *spec,
121 struct mlx5_esw_indir_table_entry *e)
122 {
123 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
124 struct mlx5_fs_chains *chains = esw_chains(esw);
125 struct mlx5e_tc_mod_hdr_acts mod_acts = {};
126 struct mlx5_flow_destination dest = {};
127 struct mlx5_esw_indir_table_rule *rule;
128 struct mlx5_flow_act flow_act = {};
129 struct mlx5_flow_spec *rule_spec;
130 struct mlx5_flow_handle *handle;
131 int err = 0;
132 u32 data;
133
134 rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr);
135 if (rule)
136 return 0;
137
138 if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX)
139 return -EINVAL;
140
141 rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
142 if (!rule_spec)
143 return -ENOMEM;
144
145 rule = kzalloc(sizeof(*rule), GFP_KERNEL);
146 if (!rule) {
147 err = -ENOMEM;
148 goto out;
149 }
150
151 rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
152 MLX5_MATCH_MISC_PARAMETERS |
153 MLX5_MATCH_MISC_PARAMETERS_2;
154 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) {
155 MLX5_SET(fte_match_param, rule_spec->match_criteria,
156 outer_headers.ip_version, 0xf);
157 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version,
158 attr->ip_version);
159 } else if (attr->ip_version) {
160 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
161 outer_headers.ethertype);
162 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype,
163 (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6));
164 } else {
165 err = -EOPNOTSUPP;
166 goto err_ethertype;
167 }
168
169 if (attr->ip_version == 4) {
170 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
171 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
172 MLX5_SET(fte_match_param, rule_spec->match_value,
173 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
174 ntohl(esw_attr->rx_tun_attr->dst_ip.v4));
175 } else if (attr->ip_version == 6) {
176 int len = sizeof(struct in6_addr);
177
178 memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
179 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
180 0xff, len);
181 memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
182 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
183 &esw_attr->rx_tun_attr->dst_ip.v6, len);
184 }
185
186 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
187 misc_parameters.vxlan_vni);
188 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni,
189 MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni));
190
191 MLX5_SET(fte_match_param, rule_spec->match_criteria,
192 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
193 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
194 mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch,
195 MLX5_VPORT_UPLINK));
196
197 /* Modify flow source to recirculate packet */
198 data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport);
199 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
200 VPORT_TO_REG, data);
201 if (err)
202 goto err_mod_hdr_regc0;
203
204 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
205 TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT);
206 if (err)
207 goto err_mod_hdr_regc1;
208
209 flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
210 mod_acts.num_actions, mod_acts.actions);
211 if (IS_ERR(flow_act.modify_hdr)) {
212 err = PTR_ERR(flow_act.modify_hdr);
213 goto err_mod_hdr_alloc;
214 }
215
216 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
217 flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
218 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
219 dest.ft = mlx5_chains_get_table(chains, 0, 1, 0);
220 if (IS_ERR(dest.ft)) {
221 err = PTR_ERR(dest.ft);
222 goto err_table;
223 }
224 handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1);
225 if (IS_ERR(handle)) {
226 err = PTR_ERR(handle);
227 goto err_handle;
228 }
229
230 mlx5e_mod_hdr_dealloc(&mod_acts);
231 rule->handle = handle;
232 rule->vni = esw_attr->rx_tun_attr->vni;
233 rule->mh = flow_act.modify_hdr;
234 memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
235 sizeof(esw_attr->rx_tun_attr->dst_ip));
236 refcount_set(&rule->refcnt, 1);
237 list_add(&rule->list, &e->recirc_rules);
238 e->recirc_cnt++;
239 goto out;
240
241 err_handle:
242 mlx5_chains_put_table(chains, 0, 1, 0);
243 err_table:
244 mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr);
245 err_mod_hdr_alloc:
246 err_mod_hdr_regc1:
247 mlx5e_mod_hdr_dealloc(&mod_acts);
248 err_mod_hdr_regc0:
249 err_ethertype:
250 kfree(rule);
251 out:
252 kvfree(rule_spec);
253 return err;
254 }
255
mlx5_esw_indir_table_rule_put(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5_esw_indir_table_entry * e)256 static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw,
257 struct mlx5_flow_attr *attr,
258 struct mlx5_esw_indir_table_entry *e)
259 {
260 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
261 struct mlx5_fs_chains *chains = esw_chains(esw);
262 struct mlx5_esw_indir_table_rule *rule;
263
264 list_for_each_entry(rule, &e->recirc_rules, list)
265 if (rule->vni == esw_attr->rx_tun_attr->vni &&
266 !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
267 sizeof(esw_attr->rx_tun_attr->dst_ip)))
268 goto found;
269
270 return;
271
272 found:
273 if (!refcount_dec_and_test(&rule->refcnt))
274 return;
275
276 mlx5_del_flow_rules(rule->handle);
277 mlx5_chains_put_table(chains, 0, 1, 0);
278 mlx5_modify_header_dealloc(esw->dev, rule->mh);
279 list_del(&rule->list);
280 kfree(rule);
281 e->recirc_cnt--;
282 }
283
mlx5_create_indir_recirc_group(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5_flow_spec * spec,struct mlx5_esw_indir_table_entry * e)284 static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw,
285 struct mlx5_flow_attr *attr,
286 struct mlx5_flow_spec *spec,
287 struct mlx5_esw_indir_table_entry *e)
288 {
289 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
290 u32 *in, *match;
291
292 in = kvzalloc(inlen, GFP_KERNEL);
293 if (!in)
294 return -ENOMEM;
295
296 MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
297 MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2);
298 match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
299
300 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version))
301 MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf);
302 else
303 MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype);
304
305 if (attr->ip_version == 4) {
306 MLX5_SET_TO_ONES(fte_match_param, match,
307 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
308 } else if (attr->ip_version == 6) {
309 memset(MLX5_ADDR_OF(fte_match_param, match,
310 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
311 0xff, sizeof(struct in6_addr));
312 } else {
313 err = -EOPNOTSUPP;
314 goto out;
315 }
316
317 MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni);
318 MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
319 mlx5_eswitch_get_vport_metadata_mask());
320 MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
321 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX);
322 e->recirc_grp = mlx5_create_flow_group(e->ft, in);
323 if (IS_ERR(e->recirc_grp)) {
324 err = PTR_ERR(e->recirc_grp);
325 goto out;
326 }
327
328 INIT_LIST_HEAD(&e->recirc_rules);
329 e->recirc_cnt = 0;
330
331 out:
332 kvfree(in);
333 return err;
334 }
335
mlx5_create_indir_fwd_group(struct mlx5_eswitch * esw,struct mlx5_esw_indir_table_entry * e)336 static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
337 struct mlx5_esw_indir_table_entry *e)
338 {
339 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
340 struct mlx5_flow_destination dest = {};
341 struct mlx5_flow_act flow_act = {};
342 struct mlx5_flow_spec *spec;
343 u32 *in;
344
345 in = kvzalloc(inlen, GFP_KERNEL);
346 if (!in)
347 return -ENOMEM;
348
349 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
350 if (!spec) {
351 kvfree(in);
352 return -ENOMEM;
353 }
354
355 /* Hold one entry */
356 MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
357 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
358 e->fwd_grp = mlx5_create_flow_group(e->ft, in);
359 if (IS_ERR(e->fwd_grp)) {
360 err = PTR_ERR(e->fwd_grp);
361 goto err_out;
362 }
363
364 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
365 dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
366 dest.vport.num = e->vport;
367 dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
368 dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
369 e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1);
370 if (IS_ERR(e->fwd_rule)) {
371 mlx5_destroy_flow_group(e->fwd_grp);
372 err = PTR_ERR(e->fwd_rule);
373 }
374
375 err_out:
376 kvfree(spec);
377 kvfree(in);
378 return err;
379 }
380
381 static struct mlx5_esw_indir_table_entry *
mlx5_esw_indir_table_entry_create(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5_flow_spec * spec,u16 vport,bool decap)382 mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
383 struct mlx5_flow_spec *spec, u16 vport, bool decap)
384 {
385 struct mlx5_flow_table_attr ft_attr = {};
386 struct mlx5_flow_namespace *root_ns;
387 struct mlx5_esw_indir_table_entry *e;
388 struct mlx5_flow_table *ft;
389 int err = 0;
390
391 root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
392 if (!root_ns)
393 return ERR_PTR(-ENOENT);
394
395 e = kzalloc(sizeof(*e), GFP_KERNEL);
396 if (!e)
397 return ERR_PTR(-ENOMEM);
398
399 ft_attr.prio = FDB_TC_OFFLOAD;
400 ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE;
401 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
402 ft_attr.level = 1;
403
404 ft = mlx5_create_flow_table(root_ns, &ft_attr);
405 if (IS_ERR(ft)) {
406 err = PTR_ERR(ft);
407 goto tbl_err;
408 }
409 e->ft = ft;
410 e->vport = vport;
411 e->ip_version = attr->ip_version;
412 e->fwd_ref = !decap;
413
414 err = mlx5_create_indir_recirc_group(esw, attr, spec, e);
415 if (err)
416 goto recirc_grp_err;
417
418 if (decap) {
419 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
420 if (err)
421 goto recirc_rule_err;
422 }
423
424 err = mlx5_create_indir_fwd_group(esw, e);
425 if (err)
426 goto fwd_grp_err;
427
428 hash_add(esw->fdb_table.offloads.indir->table, &e->hlist,
429 vport << 16 | attr->ip_version);
430
431 return e;
432
433 fwd_grp_err:
434 if (decap)
435 mlx5_esw_indir_table_rule_put(esw, attr, e);
436 recirc_rule_err:
437 mlx5_destroy_flow_group(e->recirc_grp);
438 recirc_grp_err:
439 mlx5_destroy_flow_table(e->ft);
440 tbl_err:
441 kfree(e);
442 return ERR_PTR(err);
443 }
444
445 static struct mlx5_esw_indir_table_entry *
mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch * esw,u16 vport,u8 ip_version)446 mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version)
447 {
448 struct mlx5_esw_indir_table_entry *e;
449 u32 key = vport << 16 | ip_version;
450
451 hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key)
452 if (e->vport == vport && e->ip_version == ip_version)
453 return e;
454
455 return NULL;
456 }
457
mlx5_esw_indir_table_get(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5_flow_spec * spec,u16 vport,bool decap)458 struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
459 struct mlx5_flow_attr *attr,
460 struct mlx5_flow_spec *spec,
461 u16 vport, bool decap)
462 {
463 struct mlx5_esw_indir_table_entry *e;
464 int err;
465
466 mutex_lock(&esw->fdb_table.offloads.indir->lock);
467 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
468 if (e) {
469 if (!decap) {
470 e->fwd_ref++;
471 } else {
472 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
473 if (err)
474 goto out_err;
475 }
476 } else {
477 e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap);
478 if (IS_ERR(e)) {
479 err = PTR_ERR(e);
480 esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err);
481 goto out_err;
482 }
483 }
484 mutex_unlock(&esw->fdb_table.offloads.indir->lock);
485 return e->ft;
486
487 out_err:
488 mutex_unlock(&esw->fdb_table.offloads.indir->lock);
489 return ERR_PTR(err);
490 }
491
mlx5_esw_indir_table_put(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,u16 vport,bool decap)492 void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
493 struct mlx5_flow_attr *attr,
494 u16 vport, bool decap)
495 {
496 struct mlx5_esw_indir_table_entry *e;
497
498 mutex_lock(&esw->fdb_table.offloads.indir->lock);
499 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
500 if (!e)
501 goto out;
502
503 if (!decap)
504 e->fwd_ref--;
505 else
506 mlx5_esw_indir_table_rule_put(esw, attr, e);
507
508 if (e->fwd_ref || e->recirc_cnt)
509 goto out;
510
511 hash_del(&e->hlist);
512 mlx5_destroy_flow_group(e->recirc_grp);
513 mlx5_del_flow_rules(e->fwd_rule);
514 mlx5_destroy_flow_group(e->fwd_grp);
515 mlx5_destroy_flow_table(e->ft);
516 kfree(e);
517 out:
518 mutex_unlock(&esw->fdb_table.offloads.indir->lock);
519 }
520