• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <linux/mlx5/fs.h>
38 #include <linux/mlx5/device.h>
39 #include <linux/rhashtable.h>
40 #include <linux/refcount.h>
41 #include <linux/completion.h>
42 #include <net/tc_act/tc_pedit.h>
43 #include <net/tc_act/tc_csum.h>
44 #include <net/psample.h>
45 #include <net/arp.h>
46 #include <net/ipv6_stubs.h>
47 #include <net/bareudp.h>
48 #include <net/bonding.h>
49 #include "en.h"
50 #include "en/tc/post_act.h"
51 #include "en_rep.h"
52 #include "en/rep/tc.h"
53 #include "en/rep/neigh.h"
54 #include "en_tc.h"
55 #include "eswitch.h"
56 #include "fs_core.h"
57 #include "en/port.h"
58 #include "en/tc_tun.h"
59 #include "en/mapping.h"
60 #include "en/tc_ct.h"
61 #include "en/mod_hdr.h"
62 #include "en/tc_priv.h"
63 #include "en/tc_tun_encap.h"
64 #include "en/tc/sample.h"
65 #include "lib/devcom.h"
66 #include "lib/geneve.h"
67 #include "lib/fs_chains.h"
68 #include "diag/en_tc_tracepoint.h"
69 #include <asm/div64.h>
70 #include "lag.h"
71 #include "lag_mp.h"
72 
73 #define nic_chains(priv) ((priv)->fs.tc.chains)
74 
75 #define MLX5E_TC_TABLE_NUM_GROUPS 4
76 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
77 
78 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
79 	[CHAIN_TO_REG] = {
80 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
81 		.moffset = 0,
82 		.mlen = 16,
83 	},
84 	[VPORT_TO_REG] = {
85 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
86 		.moffset = 16,
87 		.mlen = 16,
88 	},
89 	[TUNNEL_TO_REG] = {
90 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
91 		.moffset = 8,
92 		.mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
93 		.soffset = MLX5_BYTE_OFF(fte_match_param,
94 					 misc_parameters_2.metadata_reg_c_1),
95 	},
96 	[ZONE_TO_REG] = zone_to_reg_ct,
97 	[ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
98 	[CTSTATE_TO_REG] = ctstate_to_reg_ct,
99 	[MARK_TO_REG] = mark_to_reg_ct,
100 	[LABELS_TO_REG] = labels_to_reg_ct,
101 	[FTEID_TO_REG] = fteid_to_reg_ct,
102 	/* For NIC rules we store the restore metadata directly
103 	 * into reg_b that is passed to SW since we don't
104 	 * jump between steering domains.
105 	 */
106 	[NIC_CHAIN_TO_REG] = {
107 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
108 		.moffset = 0,
109 		.mlen = 16,
110 	},
111 	[NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
112 };
113 
114 /* To avoid false lock dependency warning set the tc_ht lock
115  * class different than the lock class of the ht being used when deleting
116  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
117  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
118  * it's different than the ht->mutex here.
119  */
120 static struct lock_class_key tc_ht_lock_key;
121 
122 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
123 
124 void
mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec * spec,enum mlx5e_tc_attr_to_reg type,u32 val,u32 mask)125 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
126 			    enum mlx5e_tc_attr_to_reg type,
127 			    u32 val,
128 			    u32 mask)
129 {
130 	void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
131 	int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
132 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
133 	int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
134 	u32 max_mask = GENMASK(match_len - 1, 0);
135 	__be32 curr_mask_be, curr_val_be;
136 	u32 curr_mask, curr_val;
137 
138 	fmask = headers_c + soffset;
139 	fval = headers_v + soffset;
140 
141 	memcpy(&curr_mask_be, fmask, 4);
142 	memcpy(&curr_val_be, fval, 4);
143 
144 	curr_mask = be32_to_cpu(curr_mask_be);
145 	curr_val = be32_to_cpu(curr_val_be);
146 
147 	//move to correct offset
148 	WARN_ON(mask > max_mask);
149 	mask <<= moffset;
150 	val <<= moffset;
151 	max_mask <<= moffset;
152 
153 	//zero val and mask
154 	curr_mask &= ~max_mask;
155 	curr_val &= ~max_mask;
156 
157 	//add current to mask
158 	curr_mask |= mask;
159 	curr_val |= val;
160 
161 	//back to be32 and write
162 	curr_mask_be = cpu_to_be32(curr_mask);
163 	curr_val_be = cpu_to_be32(curr_val);
164 
165 	memcpy(fmask, &curr_mask_be, 4);
166 	memcpy(fval, &curr_val_be, 4);
167 
168 	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
169 }
170 
171 void
mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec * spec,enum mlx5e_tc_attr_to_reg type,u32 * val,u32 * mask)172 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
173 				enum mlx5e_tc_attr_to_reg type,
174 				u32 *val,
175 				u32 *mask)
176 {
177 	void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
178 	int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
179 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
180 	int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
181 	u32 max_mask = GENMASK(match_len - 1, 0);
182 	__be32 curr_mask_be, curr_val_be;
183 	u32 curr_mask, curr_val;
184 
185 	fmask = headers_c + soffset;
186 	fval = headers_v + soffset;
187 
188 	memcpy(&curr_mask_be, fmask, 4);
189 	memcpy(&curr_val_be, fval, 4);
190 
191 	curr_mask = be32_to_cpu(curr_mask_be);
192 	curr_val = be32_to_cpu(curr_val_be);
193 
194 	*mask = (curr_mask >> moffset) & max_mask;
195 	*val = (curr_val >> moffset) & max_mask;
196 }
197 
198 int
mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5_flow_namespace_type ns,enum mlx5e_tc_attr_to_reg type,u32 data)199 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
200 				     struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
201 				     enum mlx5_flow_namespace_type ns,
202 				     enum mlx5e_tc_attr_to_reg type,
203 				     u32 data)
204 {
205 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
206 	int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
207 	int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
208 	char *modact;
209 	int err;
210 
211 	modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
212 	if (IS_ERR(modact))
213 		return PTR_ERR(modact);
214 
215 	/* Firmware has 5bit length field and 0 means 32bits */
216 	if (mlen == 32)
217 		mlen = 0;
218 
219 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
220 	MLX5_SET(set_action_in, modact, field, mfield);
221 	MLX5_SET(set_action_in, modact, offset, moffset);
222 	MLX5_SET(set_action_in, modact, length, mlen);
223 	MLX5_SET(set_action_in, modact, data, data);
224 	err = mod_hdr_acts->num_actions;
225 	mod_hdr_acts->num_actions++;
226 
227 	return err;
228 }
229 
230 static struct mlx5_tc_ct_priv *
get_ct_priv(struct mlx5e_priv * priv)231 get_ct_priv(struct mlx5e_priv *priv)
232 {
233 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
234 	struct mlx5_rep_uplink_priv *uplink_priv;
235 	struct mlx5e_rep_priv *uplink_rpriv;
236 
237 	if (is_mdev_switchdev_mode(priv->mdev)) {
238 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
239 		uplink_priv = &uplink_rpriv->uplink_priv;
240 
241 		return uplink_priv->ct_priv;
242 	}
243 
244 	return priv->fs.tc.ct;
245 }
246 
247 static struct mlx5e_tc_psample *
get_sample_priv(struct mlx5e_priv * priv)248 get_sample_priv(struct mlx5e_priv *priv)
249 {
250 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
251 	struct mlx5_rep_uplink_priv *uplink_priv;
252 	struct mlx5e_rep_priv *uplink_rpriv;
253 
254 	if (is_mdev_switchdev_mode(priv->mdev)) {
255 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
256 		uplink_priv = &uplink_rpriv->uplink_priv;
257 
258 		return uplink_priv->tc_psample;
259 	}
260 
261 	return NULL;
262 }
263 
264 struct mlx5_flow_handle *
mlx5_tc_rule_insert(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)265 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
266 		    struct mlx5_flow_spec *spec,
267 		    struct mlx5_flow_attr *attr)
268 {
269 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
270 
271 	if (is_mdev_switchdev_mode(priv->mdev))
272 		return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
273 
274 	return	mlx5e_add_offloaded_nic_rule(priv, spec, attr);
275 }
276 
277 void
mlx5_tc_rule_delete(struct mlx5e_priv * priv,struct mlx5_flow_handle * rule,struct mlx5_flow_attr * attr)278 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
279 		    struct mlx5_flow_handle *rule,
280 		    struct mlx5_flow_attr *attr)
281 {
282 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
283 
284 	if (is_mdev_switchdev_mode(priv->mdev)) {
285 		mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
286 
287 		return;
288 	}
289 
290 	mlx5e_del_offloaded_nic_rule(priv, rule, attr);
291 }
292 
293 int
mlx5e_tc_match_to_reg_set(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5_flow_namespace_type ns,enum mlx5e_tc_attr_to_reg type,u32 data)294 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
295 			  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
296 			  enum mlx5_flow_namespace_type ns,
297 			  enum mlx5e_tc_attr_to_reg type,
298 			  u32 data)
299 {
300 	int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
301 
302 	return ret < 0 ? ret : 0;
303 }
304 
mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5e_tc_attr_to_reg type,int act_id,u32 data)305 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
306 					  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
307 					  enum mlx5e_tc_attr_to_reg type,
308 					  int act_id, u32 data)
309 {
310 	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
311 	int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
312 	int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
313 	char *modact;
314 
315 	modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
316 
317 	/* Firmware has 5bit length field and 0 means 32bits */
318 	if (mlen == 32)
319 		mlen = 0;
320 
321 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
322 	MLX5_SET(set_action_in, modact, field, mfield);
323 	MLX5_SET(set_action_in, modact, offset, moffset);
324 	MLX5_SET(set_action_in, modact, length, mlen);
325 	MLX5_SET(set_action_in, modact, data, data);
326 }
327 
328 struct mlx5e_hairpin {
329 	struct mlx5_hairpin *pair;
330 
331 	struct mlx5_core_dev *func_mdev;
332 	struct mlx5e_priv *func_priv;
333 	u32 tdn;
334 	struct mlx5e_tir direct_tir;
335 
336 	int num_channels;
337 	struct mlx5e_rqt indir_rqt;
338 	struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
339 	struct mlx5_ttc_table *ttc;
340 };
341 
342 struct mlx5e_hairpin_entry {
343 	/* a node of a hash table which keeps all the  hairpin entries */
344 	struct hlist_node hairpin_hlist;
345 
346 	/* protects flows list */
347 	spinlock_t flows_lock;
348 	/* flows sharing the same hairpin */
349 	struct list_head flows;
350 	/* hpe's that were not fully initialized when dead peer update event
351 	 * function traversed them.
352 	 */
353 	struct list_head dead_peer_wait_list;
354 
355 	u16 peer_vhca_id;
356 	u8 prio;
357 	struct mlx5e_hairpin *hp;
358 	refcount_t refcnt;
359 	struct completion res_ready;
360 };
361 
362 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
363 			      struct mlx5e_tc_flow *flow);
364 
mlx5e_flow_get(struct mlx5e_tc_flow * flow)365 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
366 {
367 	if (!flow || !refcount_inc_not_zero(&flow->refcnt))
368 		return ERR_PTR(-EINVAL);
369 	return flow;
370 }
371 
mlx5e_flow_put(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)372 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
373 {
374 	if (refcount_dec_and_test(&flow->refcnt)) {
375 		mlx5e_tc_del_flow(priv, flow);
376 		kfree_rcu(flow, rcu_head);
377 	}
378 }
379 
mlx5e_is_eswitch_flow(struct mlx5e_tc_flow * flow)380 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
381 {
382 	return flow_flag_test(flow, ESWITCH);
383 }
384 
mlx5e_is_ft_flow(struct mlx5e_tc_flow * flow)385 static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
386 {
387 	return flow_flag_test(flow, FT);
388 }
389 
mlx5e_is_offloaded_flow(struct mlx5e_tc_flow * flow)390 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
391 {
392 	return flow_flag_test(flow, OFFLOADED);
393 }
394 
get_flow_name_space(struct mlx5e_tc_flow * flow)395 static int get_flow_name_space(struct mlx5e_tc_flow *flow)
396 {
397 	return mlx5e_is_eswitch_flow(flow) ?
398 		MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
399 }
400 
401 static struct mod_hdr_tbl *
get_mod_hdr_table(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)402 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
403 {
404 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
405 
406 	return get_flow_name_space(flow) == MLX5_FLOW_NAMESPACE_FDB ?
407 		&esw->offloads.mod_hdr :
408 		&priv->fs.tc.mod_hdr;
409 }
410 
mlx5e_attach_mod_hdr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5e_tc_flow_parse_attr * parse_attr)411 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
412 				struct mlx5e_tc_flow *flow,
413 				struct mlx5e_tc_flow_parse_attr *parse_attr)
414 {
415 	struct mlx5_modify_hdr *modify_hdr;
416 	struct mlx5e_mod_hdr_handle *mh;
417 
418 	mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
419 				  get_flow_name_space(flow),
420 				  &parse_attr->mod_hdr_acts);
421 	if (IS_ERR(mh))
422 		return PTR_ERR(mh);
423 
424 	modify_hdr = mlx5e_mod_hdr_get(mh);
425 	flow->attr->modify_hdr = modify_hdr;
426 	flow->mh = mh;
427 
428 	return 0;
429 }
430 
mlx5e_detach_mod_hdr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)431 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
432 				 struct mlx5e_tc_flow *flow)
433 {
434 	/* flow wasn't fully initialized */
435 	if (!flow->mh)
436 		return;
437 
438 	mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
439 			     flow->mh);
440 	flow->mh = NULL;
441 }
442 
443 static
mlx5e_hairpin_get_mdev(struct net * net,int ifindex)444 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
445 {
446 	struct mlx5_core_dev *mdev;
447 	struct net_device *netdev;
448 	struct mlx5e_priv *priv;
449 
450 	netdev = dev_get_by_index(net, ifindex);
451 	if (!netdev)
452 		return ERR_PTR(-ENODEV);
453 
454 	priv = netdev_priv(netdev);
455 	mdev = priv->mdev;
456 	dev_put(netdev);
457 
458 	/* Mirred tc action holds a refcount on the ifindex net_device (see
459 	 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
460 	 * after dev_put(netdev), while we're in the context of adding a tc flow.
461 	 *
462 	 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
463 	 * stored in a hairpin object, which exists until all flows, that refer to it, get
464 	 * removed.
465 	 *
466 	 * On the other hand, after a hairpin object has been created, the peer net_device may
467 	 * be removed/unbound while there are still some hairpin flows that are using it. This
468 	 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
469 	 * NETDEV_UNREGISTER event of the peer net_device.
470 	 */
471 	return mdev;
472 }
473 
mlx5e_hairpin_create_transport(struct mlx5e_hairpin * hp)474 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
475 {
476 	struct mlx5e_tir_builder *builder;
477 	int err;
478 
479 	builder = mlx5e_tir_builder_alloc(false);
480 	if (!builder)
481 		return -ENOMEM;
482 
483 	err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
484 	if (err)
485 		goto out;
486 
487 	mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
488 	err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
489 	if (err)
490 		goto create_tir_err;
491 
492 out:
493 	mlx5e_tir_builder_free(builder);
494 	return err;
495 
496 create_tir_err:
497 	mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
498 
499 	goto out;
500 }
501 
mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin * hp)502 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
503 {
504 	mlx5e_tir_destroy(&hp->direct_tir);
505 	mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
506 }
507 
mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin * hp)508 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
509 {
510 	struct mlx5e_priv *priv = hp->func_priv;
511 	struct mlx5_core_dev *mdev = priv->mdev;
512 	struct mlx5e_rss_params_indir *indir;
513 	int err;
514 
515 	indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
516 	if (!indir)
517 		return -ENOMEM;
518 
519 	mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
520 	err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
521 				   mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
522 				   indir);
523 
524 	kvfree(indir);
525 	return err;
526 }
527 
mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin * hp)528 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
529 {
530 	struct mlx5e_priv *priv = hp->func_priv;
531 	struct mlx5e_rss_params_hash rss_hash;
532 	enum mlx5_traffic_types tt, max_tt;
533 	struct mlx5e_tir_builder *builder;
534 	int err = 0;
535 
536 	builder = mlx5e_tir_builder_alloc(false);
537 	if (!builder)
538 		return -ENOMEM;
539 
540 	rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
541 
542 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
543 		struct mlx5e_rss_params_traffic_type rss_tt;
544 
545 		rss_tt = mlx5e_rss_get_default_tt_config(tt);
546 
547 		mlx5e_tir_builder_build_rqt(builder, hp->tdn,
548 					    mlx5e_rqt_get_rqtn(&hp->indir_rqt),
549 					    false);
550 		mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
551 
552 		err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
553 		if (err) {
554 			mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
555 			goto err_destroy_tirs;
556 		}
557 
558 		mlx5e_tir_builder_clear(builder);
559 	}
560 
561 out:
562 	mlx5e_tir_builder_free(builder);
563 	return err;
564 
565 err_destroy_tirs:
566 	max_tt = tt;
567 	for (tt = 0; tt < max_tt; tt++)
568 		mlx5e_tir_destroy(&hp->indir_tir[tt]);
569 
570 	goto out;
571 }
572 
mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin * hp)573 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
574 {
575 	int tt;
576 
577 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
578 		mlx5e_tir_destroy(&hp->indir_tir[tt]);
579 }
580 
mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin * hp,struct ttc_params * ttc_params)581 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
582 					 struct ttc_params *ttc_params)
583 {
584 	struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
585 	int tt;
586 
587 	memset(ttc_params, 0, sizeof(*ttc_params));
588 
589 	ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
590 						 MLX5_FLOW_NAMESPACE_KERNEL);
591 	for (tt = 0; tt < MLX5_NUM_TT; tt++) {
592 		ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
593 		ttc_params->dests[tt].tir_num =
594 			tt == MLX5_TT_ANY ?
595 				mlx5e_tir_get_tirn(&hp->direct_tir) :
596 				mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
597 	}
598 
599 	ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
600 	ft_attr->prio = MLX5E_TC_PRIO;
601 }
602 
mlx5e_hairpin_rss_init(struct mlx5e_hairpin * hp)603 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
604 {
605 	struct mlx5e_priv *priv = hp->func_priv;
606 	struct ttc_params ttc_params;
607 	int err;
608 
609 	err = mlx5e_hairpin_create_indirect_rqt(hp);
610 	if (err)
611 		return err;
612 
613 	err = mlx5e_hairpin_create_indirect_tirs(hp);
614 	if (err)
615 		goto err_create_indirect_tirs;
616 
617 	mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
618 	hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
619 	if (IS_ERR(hp->ttc)) {
620 		err = PTR_ERR(hp->ttc);
621 		goto err_create_ttc_table;
622 	}
623 
624 	netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
625 		   hp->num_channels,
626 		   mlx5_get_ttc_flow_table(priv->fs.ttc)->id);
627 
628 	return 0;
629 
630 err_create_ttc_table:
631 	mlx5e_hairpin_destroy_indirect_tirs(hp);
632 err_create_indirect_tirs:
633 	mlx5e_rqt_destroy(&hp->indir_rqt);
634 
635 	return err;
636 }
637 
mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin * hp)638 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
639 {
640 	mlx5_destroy_ttc_table(hp->ttc);
641 	mlx5e_hairpin_destroy_indirect_tirs(hp);
642 	mlx5e_rqt_destroy(&hp->indir_rqt);
643 }
644 
645 static struct mlx5e_hairpin *
mlx5e_hairpin_create(struct mlx5e_priv * priv,struct mlx5_hairpin_params * params,int peer_ifindex)646 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
647 		     int peer_ifindex)
648 {
649 	struct mlx5_core_dev *func_mdev, *peer_mdev;
650 	struct mlx5e_hairpin *hp;
651 	struct mlx5_hairpin *pair;
652 	int err;
653 
654 	hp = kzalloc(sizeof(*hp), GFP_KERNEL);
655 	if (!hp)
656 		return ERR_PTR(-ENOMEM);
657 
658 	func_mdev = priv->mdev;
659 	peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
660 	if (IS_ERR(peer_mdev)) {
661 		err = PTR_ERR(peer_mdev);
662 		goto create_pair_err;
663 	}
664 
665 	pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
666 	if (IS_ERR(pair)) {
667 		err = PTR_ERR(pair);
668 		goto create_pair_err;
669 	}
670 	hp->pair = pair;
671 	hp->func_mdev = func_mdev;
672 	hp->func_priv = priv;
673 	hp->num_channels = params->num_channels;
674 
675 	err = mlx5e_hairpin_create_transport(hp);
676 	if (err)
677 		goto create_transport_err;
678 
679 	if (hp->num_channels > 1) {
680 		err = mlx5e_hairpin_rss_init(hp);
681 		if (err)
682 			goto rss_init_err;
683 	}
684 
685 	return hp;
686 
687 rss_init_err:
688 	mlx5e_hairpin_destroy_transport(hp);
689 create_transport_err:
690 	mlx5_core_hairpin_destroy(hp->pair);
691 create_pair_err:
692 	kfree(hp);
693 	return ERR_PTR(err);
694 }
695 
mlx5e_hairpin_destroy(struct mlx5e_hairpin * hp)696 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
697 {
698 	if (hp->num_channels > 1)
699 		mlx5e_hairpin_rss_cleanup(hp);
700 	mlx5e_hairpin_destroy_transport(hp);
701 	mlx5_core_hairpin_destroy(hp->pair);
702 	kvfree(hp);
703 }
704 
hash_hairpin_info(u16 peer_vhca_id,u8 prio)705 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
706 {
707 	return (peer_vhca_id << 16 | prio);
708 }
709 
mlx5e_hairpin_get(struct mlx5e_priv * priv,u16 peer_vhca_id,u8 prio)710 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
711 						     u16 peer_vhca_id, u8 prio)
712 {
713 	struct mlx5e_hairpin_entry *hpe;
714 	u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
715 
716 	hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
717 			       hairpin_hlist, hash_key) {
718 		if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
719 			refcount_inc(&hpe->refcnt);
720 			return hpe;
721 		}
722 	}
723 
724 	return NULL;
725 }
726 
mlx5e_hairpin_put(struct mlx5e_priv * priv,struct mlx5e_hairpin_entry * hpe)727 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
728 			      struct mlx5e_hairpin_entry *hpe)
729 {
730 	/* no more hairpin flows for us, release the hairpin pair */
731 	if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
732 		return;
733 	hash_del(&hpe->hairpin_hlist);
734 	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
735 
736 	if (!IS_ERR_OR_NULL(hpe->hp)) {
737 		netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
738 			   dev_name(hpe->hp->pair->peer_mdev->device));
739 
740 		mlx5e_hairpin_destroy(hpe->hp);
741 	}
742 
743 	WARN_ON(!list_empty(&hpe->flows));
744 	kfree(hpe);
745 }
746 
747 #define UNKNOWN_MATCH_PRIO 8
748 
mlx5e_hairpin_get_prio(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,u8 * match_prio,struct netlink_ext_ack * extack)749 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
750 				  struct mlx5_flow_spec *spec, u8 *match_prio,
751 				  struct netlink_ext_ack *extack)
752 {
753 	void *headers_c, *headers_v;
754 	u8 prio_val, prio_mask = 0;
755 	bool vlan_present;
756 
757 #ifdef CONFIG_MLX5_CORE_EN_DCB
758 	if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
759 		NL_SET_ERR_MSG_MOD(extack,
760 				   "only PCP trust state supported for hairpin");
761 		return -EOPNOTSUPP;
762 	}
763 #endif
764 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
765 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
766 
767 	vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
768 	if (vlan_present) {
769 		prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
770 		prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
771 	}
772 
773 	if (!vlan_present || !prio_mask) {
774 		prio_val = UNKNOWN_MATCH_PRIO;
775 	} else if (prio_mask != 0x7) {
776 		NL_SET_ERR_MSG_MOD(extack,
777 				   "masked priority match not supported for hairpin");
778 		return -EOPNOTSUPP;
779 	}
780 
781 	*match_prio = prio_val;
782 	return 0;
783 }
784 
mlx5e_hairpin_flow_add(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5e_tc_flow_parse_attr * parse_attr,struct netlink_ext_ack * extack)785 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
786 				  struct mlx5e_tc_flow *flow,
787 				  struct mlx5e_tc_flow_parse_attr *parse_attr,
788 				  struct netlink_ext_ack *extack)
789 {
790 	int peer_ifindex = parse_attr->mirred_ifindex[0];
791 	struct mlx5_hairpin_params params;
792 	struct mlx5_core_dev *peer_mdev;
793 	struct mlx5e_hairpin_entry *hpe;
794 	struct mlx5e_hairpin *hp;
795 	u64 link_speed64;
796 	u32 link_speed;
797 	u8 match_prio;
798 	u16 peer_id;
799 	int err;
800 
801 	peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
802 	if (IS_ERR(peer_mdev)) {
803 		NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
804 		return PTR_ERR(peer_mdev);
805 	}
806 
807 	if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
808 		NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
809 		return -EOPNOTSUPP;
810 	}
811 
812 	peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
813 	err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
814 				     extack);
815 	if (err)
816 		return err;
817 
818 	mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
819 	hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
820 	if (hpe) {
821 		mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
822 		wait_for_completion(&hpe->res_ready);
823 
824 		if (IS_ERR(hpe->hp)) {
825 			err = -EREMOTEIO;
826 			goto out_err;
827 		}
828 		goto attach_flow;
829 	}
830 
831 	hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
832 	if (!hpe) {
833 		mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
834 		return -ENOMEM;
835 	}
836 
837 	spin_lock_init(&hpe->flows_lock);
838 	INIT_LIST_HEAD(&hpe->flows);
839 	INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
840 	hpe->peer_vhca_id = peer_id;
841 	hpe->prio = match_prio;
842 	refcount_set(&hpe->refcnt, 1);
843 	init_completion(&hpe->res_ready);
844 
845 	hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
846 		 hash_hairpin_info(peer_id, match_prio));
847 	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
848 
849 	params.log_data_size = 16;
850 	params.log_data_size = min_t(u8, params.log_data_size,
851 				     MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
852 	params.log_data_size = max_t(u8, params.log_data_size,
853 				     MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
854 
855 	params.log_num_packets = params.log_data_size -
856 				 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
857 	params.log_num_packets = min_t(u8, params.log_num_packets,
858 				       MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
859 
860 	params.q_counter = priv->q_counter;
861 	/* set hairpin pair per each 50Gbs share of the link */
862 	mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
863 	link_speed = max_t(u32, link_speed, 50000);
864 	link_speed64 = link_speed;
865 	do_div(link_speed64, 50000);
866 	params.num_channels = link_speed64;
867 
868 	hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
869 	hpe->hp = hp;
870 	complete_all(&hpe->res_ready);
871 	if (IS_ERR(hp)) {
872 		err = PTR_ERR(hp);
873 		goto out_err;
874 	}
875 
876 	netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
877 		   mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
878 		   dev_name(hp->pair->peer_mdev->device),
879 		   hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
880 
881 attach_flow:
882 	if (hpe->hp->num_channels > 1) {
883 		flow_flag_set(flow, HAIRPIN_RSS);
884 		flow->attr->nic_attr->hairpin_ft =
885 			mlx5_get_ttc_flow_table(hpe->hp->ttc);
886 	} else {
887 		flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
888 	}
889 
890 	flow->hpe = hpe;
891 	spin_lock(&hpe->flows_lock);
892 	list_add(&flow->hairpin, &hpe->flows);
893 	spin_unlock(&hpe->flows_lock);
894 
895 	return 0;
896 
897 out_err:
898 	mlx5e_hairpin_put(priv, hpe);
899 	return err;
900 }
901 
mlx5e_hairpin_flow_del(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)902 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
903 				   struct mlx5e_tc_flow *flow)
904 {
905 	/* flow wasn't fully initialized */
906 	if (!flow->hpe)
907 		return;
908 
909 	spin_lock(&flow->hpe->flows_lock);
910 	list_del(&flow->hairpin);
911 	spin_unlock(&flow->hpe->flows_lock);
912 
913 	mlx5e_hairpin_put(priv, flow->hpe);
914 	flow->hpe = NULL;
915 }
916 
917 struct mlx5_flow_handle *
mlx5e_add_offloaded_nic_rule(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)918 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
919 			     struct mlx5_flow_spec *spec,
920 			     struct mlx5_flow_attr *attr)
921 {
922 	struct mlx5_flow_context *flow_context = &spec->flow_context;
923 	struct mlx5_fs_chains *nic_chains = nic_chains(priv);
924 	struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
925 	struct mlx5e_tc_table *tc = &priv->fs.tc;
926 	struct mlx5_flow_destination dest[2] = {};
927 	struct mlx5_flow_act flow_act = {
928 		.action = attr->action,
929 		.flags    = FLOW_ACT_NO_APPEND,
930 	};
931 	struct mlx5_flow_handle *rule;
932 	struct mlx5_flow_table *ft;
933 	int dest_ix = 0;
934 
935 	flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
936 	flow_context->flow_tag = nic_attr->flow_tag;
937 
938 	if (attr->dest_ft) {
939 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
940 		dest[dest_ix].ft = attr->dest_ft;
941 		dest_ix++;
942 	} else if (nic_attr->hairpin_ft) {
943 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
944 		dest[dest_ix].ft = nic_attr->hairpin_ft;
945 		dest_ix++;
946 	} else if (nic_attr->hairpin_tirn) {
947 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
948 		dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
949 		dest_ix++;
950 	} else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
951 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
952 		if (attr->dest_chain) {
953 			dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
954 								 attr->dest_chain, 1,
955 								 MLX5E_TC_FT_LEVEL);
956 			if (IS_ERR(dest[dest_ix].ft))
957 				return ERR_CAST(dest[dest_ix].ft);
958 		} else {
959 			dest[dest_ix].ft = mlx5e_vlan_get_flowtable(priv->fs.vlan);
960 		}
961 		dest_ix++;
962 	}
963 
964 	if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
965 	    MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
966 		flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
967 
968 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
969 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
970 		dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
971 		dest_ix++;
972 	}
973 
974 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
975 		flow_act.modify_hdr = attr->modify_hdr;
976 
977 	mutex_lock(&tc->t_lock);
978 	if (IS_ERR_OR_NULL(tc->t)) {
979 		/* Create the root table here if doesn't exist yet */
980 		tc->t =
981 			mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
982 
983 		if (IS_ERR(tc->t)) {
984 			mutex_unlock(&tc->t_lock);
985 			netdev_err(priv->netdev,
986 				   "Failed to create tc offload table\n");
987 			rule = ERR_CAST(priv->fs.tc.t);
988 			goto err_ft_get;
989 		}
990 	}
991 	mutex_unlock(&tc->t_lock);
992 
993 	if (attr->chain || attr->prio)
994 		ft = mlx5_chains_get_table(nic_chains,
995 					   attr->chain, attr->prio,
996 					   MLX5E_TC_FT_LEVEL);
997 	else
998 		ft = attr->ft;
999 
1000 	if (IS_ERR(ft)) {
1001 		rule = ERR_CAST(ft);
1002 		goto err_ft_get;
1003 	}
1004 
1005 	if (attr->outer_match_level != MLX5_MATCH_NONE)
1006 		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1007 
1008 	rule = mlx5_add_flow_rules(ft, spec,
1009 				   &flow_act, dest, dest_ix);
1010 	if (IS_ERR(rule))
1011 		goto err_rule;
1012 
1013 	return rule;
1014 
1015 err_rule:
1016 	if (attr->chain || attr->prio)
1017 		mlx5_chains_put_table(nic_chains,
1018 				      attr->chain, attr->prio,
1019 				      MLX5E_TC_FT_LEVEL);
1020 err_ft_get:
1021 	if (attr->dest_chain)
1022 		mlx5_chains_put_table(nic_chains,
1023 				      attr->dest_chain, 1,
1024 				      MLX5E_TC_FT_LEVEL);
1025 
1026 	return ERR_CAST(rule);
1027 }
1028 
1029 static int
mlx5e_tc_add_nic_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)1030 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1031 		      struct mlx5e_tc_flow *flow,
1032 		      struct netlink_ext_ack *extack)
1033 {
1034 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1035 	struct mlx5_flow_attr *attr = flow->attr;
1036 	struct mlx5_core_dev *dev = priv->mdev;
1037 	struct mlx5_fc *counter;
1038 	int err;
1039 
1040 	parse_attr = attr->parse_attr;
1041 
1042 	if (flow_flag_test(flow, HAIRPIN)) {
1043 		err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1044 		if (err)
1045 			return err;
1046 	}
1047 
1048 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1049 		counter = mlx5_fc_create(dev, true);
1050 		if (IS_ERR(counter))
1051 			return PTR_ERR(counter);
1052 
1053 		attr->counter = counter;
1054 	}
1055 
1056 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1057 		err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1058 		mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
1059 		if (err)
1060 			return err;
1061 	}
1062 
1063 	if (flow_flag_test(flow, CT))
1064 		flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
1065 							attr, &parse_attr->mod_hdr_acts);
1066 	else
1067 		flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1068 							     attr);
1069 
1070 	return PTR_ERR_OR_ZERO(flow->rule[0]);
1071 }
1072 
mlx5e_del_offloaded_nic_rule(struct mlx5e_priv * priv,struct mlx5_flow_handle * rule,struct mlx5_flow_attr * attr)1073 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1074 				  struct mlx5_flow_handle *rule,
1075 				  struct mlx5_flow_attr *attr)
1076 {
1077 	struct mlx5_fs_chains *nic_chains = nic_chains(priv);
1078 
1079 	mlx5_del_flow_rules(rule);
1080 
1081 	if (attr->chain || attr->prio)
1082 		mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1083 				      MLX5E_TC_FT_LEVEL);
1084 
1085 	if (attr->dest_chain)
1086 		mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1087 				      MLX5E_TC_FT_LEVEL);
1088 }
1089 
mlx5e_tc_del_nic_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1090 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1091 				  struct mlx5e_tc_flow *flow)
1092 {
1093 	struct mlx5_flow_attr *attr = flow->attr;
1094 	struct mlx5e_tc_table *tc = &priv->fs.tc;
1095 
1096 	flow_flag_clear(flow, OFFLOADED);
1097 
1098 	if (flow_flag_test(flow, CT))
1099 		mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1100 	else if (!IS_ERR_OR_NULL(flow->rule[0]))
1101 		mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1102 
1103 	/* Remove root table if no rules are left to avoid
1104 	 * extra steering hops.
1105 	 */
1106 	mutex_lock(&priv->fs.tc.t_lock);
1107 	if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1108 	    !IS_ERR_OR_NULL(tc->t)) {
1109 		mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
1110 		priv->fs.tc.t = NULL;
1111 	}
1112 	mutex_unlock(&priv->fs.tc.t_lock);
1113 
1114 	kvfree(attr->parse_attr);
1115 
1116 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1117 		mlx5e_detach_mod_hdr(priv, flow);
1118 
1119 	mlx5_fc_destroy(priv->mdev, attr->counter);
1120 
1121 	if (flow_flag_test(flow, HAIRPIN))
1122 		mlx5e_hairpin_flow_del(priv, flow);
1123 
1124 	kfree(flow->attr);
1125 }
1126 
1127 struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)1128 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1129 			   struct mlx5e_tc_flow *flow,
1130 			   struct mlx5_flow_spec *spec,
1131 			   struct mlx5_flow_attr *attr)
1132 {
1133 	struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1134 	struct mlx5_flow_handle *rule;
1135 
1136 	if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
1137 		return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1138 
1139 	if (flow_flag_test(flow, CT)) {
1140 		mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1141 
1142 		rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
1143 					       flow, spec, attr,
1144 					       mod_hdr_acts);
1145 	} else if (flow_flag_test(flow, SAMPLE)) {
1146 		rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr,
1147 					       mlx5e_tc_get_flow_tun_id(flow));
1148 	} else {
1149 		rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1150 	}
1151 
1152 	if (IS_ERR(rule))
1153 		return rule;
1154 
1155 	if (attr->esw_attr->split_count) {
1156 		flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1157 		if (IS_ERR(flow->rule[1])) {
1158 			if (flow_flag_test(flow, CT))
1159 				mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1160 			else
1161 				mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
1162 			return flow->rule[1];
1163 		}
1164 	}
1165 
1166 	return rule;
1167 }
1168 
mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1169 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1170 				  struct mlx5e_tc_flow *flow,
1171 				  struct mlx5_flow_attr *attr)
1172 {
1173 	flow_flag_clear(flow, OFFLOADED);
1174 
1175 	if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
1176 		goto offload_rule_0;
1177 
1178 	if (attr->esw_attr->split_count)
1179 		mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1180 
1181 	if (flow_flag_test(flow, CT))
1182 		mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1183 	else if (flow_flag_test(flow, SAMPLE))
1184 		mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
1185 	else
1186 offload_rule_0:
1187 		mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1188 }
1189 
1190 struct mlx5_flow_handle *
mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)1191 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1192 			      struct mlx5e_tc_flow *flow,
1193 			      struct mlx5_flow_spec *spec)
1194 {
1195 	struct mlx5_flow_attr *slow_attr;
1196 	struct mlx5_flow_handle *rule;
1197 
1198 	slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1199 	if (!slow_attr)
1200 		return ERR_PTR(-ENOMEM);
1201 
1202 	memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1203 	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1204 	slow_attr->esw_attr->split_count = 0;
1205 	slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1206 
1207 	rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1208 	if (!IS_ERR(rule))
1209 		flow_flag_set(flow, SLOW);
1210 
1211 	kfree(slow_attr);
1212 
1213 	return rule;
1214 }
1215 
mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow)1216 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1217 				       struct mlx5e_tc_flow *flow)
1218 {
1219 	struct mlx5_flow_attr *slow_attr;
1220 
1221 	slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1222 	if (!slow_attr) {
1223 		mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1224 		return;
1225 	}
1226 
1227 	memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1228 	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1229 	slow_attr->esw_attr->split_count = 0;
1230 	slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1231 	mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1232 	flow_flag_clear(flow, SLOW);
1233 	kfree(slow_attr);
1234 }
1235 
1236 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1237  * function.
1238  */
unready_flow_add(struct mlx5e_tc_flow * flow,struct list_head * unready_flows)1239 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1240 			     struct list_head *unready_flows)
1241 {
1242 	flow_flag_set(flow, NOT_READY);
1243 	list_add_tail(&flow->unready, unready_flows);
1244 }
1245 
1246 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1247  * function.
1248  */
unready_flow_del(struct mlx5e_tc_flow * flow)1249 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1250 {
1251 	list_del(&flow->unready);
1252 	flow_flag_clear(flow, NOT_READY);
1253 }
1254 
add_unready_flow(struct mlx5e_tc_flow * flow)1255 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1256 {
1257 	struct mlx5_rep_uplink_priv *uplink_priv;
1258 	struct mlx5e_rep_priv *rpriv;
1259 	struct mlx5_eswitch *esw;
1260 
1261 	esw = flow->priv->mdev->priv.eswitch;
1262 	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1263 	uplink_priv = &rpriv->uplink_priv;
1264 
1265 	mutex_lock(&uplink_priv->unready_flows_lock);
1266 	unready_flow_add(flow, &uplink_priv->unready_flows);
1267 	mutex_unlock(&uplink_priv->unready_flows_lock);
1268 }
1269 
remove_unready_flow(struct mlx5e_tc_flow * flow)1270 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1271 {
1272 	struct mlx5_rep_uplink_priv *uplink_priv;
1273 	struct mlx5e_rep_priv *rpriv;
1274 	struct mlx5_eswitch *esw;
1275 
1276 	esw = flow->priv->mdev->priv.eswitch;
1277 	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1278 	uplink_priv = &rpriv->uplink_priv;
1279 
1280 	mutex_lock(&uplink_priv->unready_flows_lock);
1281 	if (flow_flag_test(flow, NOT_READY))
1282 		unready_flow_del(flow);
1283 	mutex_unlock(&uplink_priv->unready_flows_lock);
1284 }
1285 
1286 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
1287 
mlx5e_tc_is_vf_tunnel(struct net_device * out_dev,struct net_device * route_dev)1288 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1289 {
1290 	struct mlx5_core_dev *out_mdev, *route_mdev;
1291 	struct mlx5e_priv *out_priv, *route_priv;
1292 
1293 	out_priv = netdev_priv(out_dev);
1294 	out_mdev = out_priv->mdev;
1295 	route_priv = netdev_priv(route_dev);
1296 	route_mdev = route_priv->mdev;
1297 
1298 	if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
1299 	    route_mdev->coredev_type != MLX5_COREDEV_VF)
1300 		return false;
1301 
1302 	return same_hw_devs(out_priv, route_priv);
1303 }
1304 
mlx5e_tc_query_route_vport(struct net_device * out_dev,struct net_device * route_dev,u16 * vport)1305 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
1306 {
1307 	struct mlx5e_priv *out_priv, *route_priv;
1308 	struct mlx5_core_dev *route_mdev;
1309 	struct mlx5_eswitch *esw;
1310 	u16 vhca_id;
1311 
1312 	out_priv = netdev_priv(out_dev);
1313 	esw = out_priv->mdev->priv.eswitch;
1314 	route_priv = netdev_priv(route_dev);
1315 	route_mdev = route_priv->mdev;
1316 
1317 	vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1318 	if (mlx5_lag_is_active(out_priv->mdev)) {
1319 		struct mlx5_devcom *devcom;
1320 		int err;
1321 
1322 		/* In lag case we may get devices from different eswitch instances.
1323 		 * If we failed to get vport num, it means, mostly, that we on the wrong
1324 		 * eswitch.
1325 		 */
1326 		err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1327 		if (err != -ENOENT)
1328 			return err;
1329 
1330 		rcu_read_lock();
1331 		devcom = out_priv->mdev->priv.devcom;
1332 		esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1333 		err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV;
1334 		rcu_read_unlock();
1335 
1336 		return err;
1337 	}
1338 
1339 	return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1340 }
1341 
mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1342 int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
1343 			      struct mlx5e_tc_flow *flow,
1344 			      struct mlx5_flow_attr *attr)
1345 {
1346 	struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1347 	struct mlx5_modify_hdr *mod_hdr;
1348 
1349 	mod_hdr = mlx5_modify_header_alloc(priv->mdev,
1350 					   get_flow_name_space(flow),
1351 					   mod_hdr_acts->num_actions,
1352 					   mod_hdr_acts->actions);
1353 	if (IS_ERR(mod_hdr))
1354 		return PTR_ERR(mod_hdr);
1355 
1356 	WARN_ON(attr->modify_hdr);
1357 	attr->modify_hdr = mod_hdr;
1358 
1359 	return 0;
1360 }
1361 
1362 static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)1363 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1364 		      struct mlx5e_tc_flow *flow,
1365 		      struct netlink_ext_ack *extack)
1366 {
1367 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1368 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1369 	struct mlx5_flow_attr *attr = flow->attr;
1370 	bool vf_tun = false, encap_valid = true;
1371 	struct net_device *encap_dev = NULL;
1372 	struct mlx5_esw_flow_attr *esw_attr;
1373 	struct mlx5e_rep_priv *rpriv;
1374 	struct mlx5e_priv *out_priv;
1375 	struct mlx5_fc *counter;
1376 	u32 max_prio, max_chain;
1377 	int err = 0;
1378 	int out_index;
1379 
1380 	/* We check chain range only for tc flows.
1381 	 * For ft flows, we checked attr->chain was originally 0 and set it to
1382 	 * FDB_FT_CHAIN which is outside tc range.
1383 	 * See mlx5e_rep_setup_ft_cb().
1384 	 */
1385 	max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1386 	if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1387 		NL_SET_ERR_MSG_MOD(extack,
1388 				   "Requested chain is out of supported range");
1389 		err = -EOPNOTSUPP;
1390 		goto err_out;
1391 	}
1392 
1393 	max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1394 	if (attr->prio > max_prio) {
1395 		NL_SET_ERR_MSG_MOD(extack,
1396 				   "Requested priority is out of supported range");
1397 		err = -EOPNOTSUPP;
1398 		goto err_out;
1399 	}
1400 
1401 	if (flow_flag_test(flow, TUN_RX)) {
1402 		err = mlx5e_attach_decap_route(priv, flow);
1403 		if (err)
1404 			goto err_out;
1405 	}
1406 
1407 	if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1408 		err = mlx5e_attach_decap(priv, flow, extack);
1409 		if (err)
1410 			goto err_out;
1411 	}
1412 
1413 	parse_attr = attr->parse_attr;
1414 	esw_attr = attr->esw_attr;
1415 
1416 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1417 		struct net_device *out_dev;
1418 		int mirred_ifindex;
1419 
1420 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1421 			continue;
1422 
1423 		mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1424 		out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1425 		if (!out_dev) {
1426 			NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1427 			err = -ENODEV;
1428 			goto err_out;
1429 		}
1430 		err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1431 					 extack, &encap_dev, &encap_valid);
1432 		dev_put(out_dev);
1433 		if (err)
1434 			goto err_out;
1435 
1436 		if (esw_attr->dests[out_index].flags &
1437 		    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
1438 			vf_tun = true;
1439 		out_priv = netdev_priv(encap_dev);
1440 		rpriv = out_priv->ppriv;
1441 		esw_attr->dests[out_index].rep = rpriv->rep;
1442 		esw_attr->dests[out_index].mdev = out_priv->mdev;
1443 	}
1444 
1445 	if (vf_tun && esw_attr->out_count > 1) {
1446 		NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1447 		err = -EOPNOTSUPP;
1448 		goto err_out;
1449 	}
1450 
1451 	err = mlx5_eswitch_add_vlan_action(esw, attr);
1452 	if (err)
1453 		goto err_out;
1454 
1455 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1456 	    !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
1457 		if (vf_tun) {
1458 			err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
1459 			if (err)
1460 				goto err_out;
1461 		} else {
1462 			err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1463 			if (err)
1464 				goto err_out;
1465 		}
1466 	}
1467 
1468 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1469 		counter = mlx5_fc_create(esw_attr->counter_dev, true);
1470 		if (IS_ERR(counter)) {
1471 			err = PTR_ERR(counter);
1472 			goto err_out;
1473 		}
1474 
1475 		attr->counter = counter;
1476 	}
1477 
1478 	/* we get here if one of the following takes place:
1479 	 * (1) there's no error
1480 	 * (2) there's an encap action and we don't have valid neigh
1481 	 */
1482 	if (!encap_valid)
1483 		flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1484 	else
1485 		flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1486 
1487 	if (IS_ERR(flow->rule[0])) {
1488 		err = PTR_ERR(flow->rule[0]);
1489 		goto err_out;
1490 	}
1491 	flow_flag_set(flow, OFFLOADED);
1492 
1493 	return 0;
1494 
1495 err_out:
1496 	flow_flag_set(flow, FAILED);
1497 	return err;
1498 }
1499 
mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow * flow)1500 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1501 {
1502 	struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1503 	void *headers_v = MLX5_ADDR_OF(fte_match_param,
1504 				       spec->match_value,
1505 				       misc_parameters_3);
1506 	u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1507 					     headers_v,
1508 					     geneve_tlv_option_0_data);
1509 
1510 	return !!geneve_tlv_opt_0_data;
1511 }
1512 
mlx5e_tc_del_fdb_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1513 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1514 				  struct mlx5e_tc_flow *flow)
1515 {
1516 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1517 	struct mlx5_flow_attr *attr = flow->attr;
1518 	struct mlx5_esw_flow_attr *esw_attr;
1519 	bool vf_tun = false;
1520 	int out_index;
1521 
1522 	esw_attr = attr->esw_attr;
1523 	mlx5e_put_flow_tunnel_id(flow);
1524 
1525 	remove_unready_flow(flow);
1526 
1527 	if (mlx5e_is_offloaded_flow(flow)) {
1528 		if (flow_flag_test(flow, SLOW))
1529 			mlx5e_tc_unoffload_from_slow_path(esw, flow);
1530 		else
1531 			mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1532 	}
1533 	complete_all(&flow->del_hw_done);
1534 
1535 	if (mlx5_flow_has_geneve_opt(flow))
1536 		mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1537 
1538 	mlx5_eswitch_del_vlan_action(esw, attr);
1539 
1540 	if (flow->decap_route)
1541 		mlx5e_detach_decap_route(priv, flow);
1542 
1543 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1544 		if (esw_attr->dests[out_index].flags &
1545 		    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
1546 			vf_tun = true;
1547 		if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
1548 			mlx5e_detach_encap(priv, flow, out_index);
1549 			kfree(attr->parse_attr->tun_info[out_index]);
1550 		}
1551 	}
1552 
1553 	mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
1554 
1555 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1556 		mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
1557 		if (vf_tun && attr->modify_hdr)
1558 			mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1559 		else
1560 			mlx5e_detach_mod_hdr(priv, flow);
1561 	}
1562 	kfree(attr->sample_attr);
1563 	kvfree(attr->parse_attr);
1564 	kvfree(attr->esw_attr->rx_tun_attr);
1565 
1566 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1567 		mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
1568 
1569 	if (flow_flag_test(flow, L3_TO_L2_DECAP))
1570 		mlx5e_detach_decap(priv, flow);
1571 
1572 	kfree(flow->attr);
1573 }
1574 
mlx5e_tc_get_counter(struct mlx5e_tc_flow * flow)1575 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1576 {
1577 	return flow->attr->counter;
1578 }
1579 
1580 /* Iterate over tmp_list of flows attached to flow_list head. */
mlx5e_put_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1581 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1582 {
1583 	struct mlx5e_tc_flow *flow, *tmp;
1584 
1585 	list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1586 		mlx5e_flow_put(priv, flow);
1587 }
1588 
__mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow * flow)1589 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1590 {
1591 	struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1592 
1593 	if (!flow_flag_test(flow, ESWITCH) ||
1594 	    !flow_flag_test(flow, DUP))
1595 		return;
1596 
1597 	mutex_lock(&esw->offloads.peer_mutex);
1598 	list_del(&flow->peer);
1599 	mutex_unlock(&esw->offloads.peer_mutex);
1600 
1601 	flow_flag_clear(flow, DUP);
1602 
1603 	if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1604 		mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1605 		kfree(flow->peer_flow);
1606 	}
1607 
1608 	flow->peer_flow = NULL;
1609 }
1610 
mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow * flow)1611 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1612 {
1613 	struct mlx5_core_dev *dev = flow->priv->mdev;
1614 	struct mlx5_devcom *devcom = dev->priv.devcom;
1615 	struct mlx5_eswitch *peer_esw;
1616 
1617 	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1618 	if (!peer_esw)
1619 		return;
1620 
1621 	__mlx5e_tc_del_fdb_peer_flow(flow);
1622 	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1623 }
1624 
mlx5e_tc_del_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1625 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1626 			      struct mlx5e_tc_flow *flow)
1627 {
1628 	if (mlx5e_is_eswitch_flow(flow)) {
1629 		mlx5e_tc_del_fdb_peer_flow(flow);
1630 		mlx5e_tc_del_fdb_flow(priv, flow);
1631 	} else {
1632 		mlx5e_tc_del_nic_flow(priv, flow);
1633 	}
1634 }
1635 
flow_requires_tunnel_mapping(u32 chain,struct flow_cls_offload * f)1636 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
1637 {
1638 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1639 	struct flow_action *flow_action = &rule->action;
1640 	const struct flow_action_entry *act;
1641 	int i;
1642 
1643 	if (chain)
1644 		return false;
1645 
1646 	flow_action_for_each(i, act, flow_action) {
1647 		switch (act->id) {
1648 		case FLOW_ACTION_GOTO:
1649 			return true;
1650 		case FLOW_ACTION_SAMPLE:
1651 			return true;
1652 		default:
1653 			continue;
1654 		}
1655 	}
1656 
1657 	return false;
1658 }
1659 
1660 static int
enc_opts_is_dont_care_or_full_match(struct mlx5e_priv * priv,struct flow_dissector_key_enc_opts * opts,struct netlink_ext_ack * extack,bool * dont_care)1661 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1662 				    struct flow_dissector_key_enc_opts *opts,
1663 				    struct netlink_ext_ack *extack,
1664 				    bool *dont_care)
1665 {
1666 	struct geneve_opt *opt;
1667 	int off = 0;
1668 
1669 	*dont_care = true;
1670 
1671 	while (opts->len > off) {
1672 		opt = (struct geneve_opt *)&opts->data[off];
1673 
1674 		if (!(*dont_care) || opt->opt_class || opt->type ||
1675 		    memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1676 			*dont_care = false;
1677 
1678 			if (opt->opt_class != htons(U16_MAX) ||
1679 			    opt->type != U8_MAX) {
1680 				NL_SET_ERR_MSG(extack,
1681 					       "Partial match of tunnel options in chain > 0 isn't supported");
1682 				netdev_warn(priv->netdev,
1683 					    "Partial match of tunnel options in chain > 0 isn't supported");
1684 				return -EOPNOTSUPP;
1685 			}
1686 		}
1687 
1688 		off += sizeof(struct geneve_opt) + opt->length * 4;
1689 	}
1690 
1691 	return 0;
1692 }
1693 
1694 #define COPY_DISSECTOR(rule, diss_key, dst)\
1695 ({ \
1696 	struct flow_rule *__rule = (rule);\
1697 	typeof(dst) __dst = dst;\
1698 \
1699 	memcpy(__dst,\
1700 	       skb_flow_dissector_target(__rule->match.dissector,\
1701 					 diss_key,\
1702 					 __rule->match.key),\
1703 	       sizeof(*__dst));\
1704 })
1705 
mlx5e_get_flow_tunnel_id(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct flow_cls_offload * f,struct net_device * filter_dev)1706 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
1707 				    struct mlx5e_tc_flow *flow,
1708 				    struct flow_cls_offload *f,
1709 				    struct net_device *filter_dev)
1710 {
1711 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1712 	struct netlink_ext_ack *extack = f->common.extack;
1713 	struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1714 	struct flow_match_enc_opts enc_opts_match;
1715 	struct tunnel_match_enc_opts tun_enc_opts;
1716 	struct mlx5_rep_uplink_priv *uplink_priv;
1717 	struct mlx5_flow_attr *attr = flow->attr;
1718 	struct mlx5e_rep_priv *uplink_rpriv;
1719 	struct tunnel_match_key tunnel_key;
1720 	bool enc_opts_is_dont_care = true;
1721 	u32 tun_id, enc_opts_id = 0;
1722 	struct mlx5_eswitch *esw;
1723 	u32 value, mask;
1724 	int err;
1725 
1726 	esw = priv->mdev->priv.eswitch;
1727 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1728 	uplink_priv = &uplink_rpriv->uplink_priv;
1729 
1730 	memset(&tunnel_key, 0, sizeof(tunnel_key));
1731 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
1732 		       &tunnel_key.enc_control);
1733 	if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
1734 		COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1735 			       &tunnel_key.enc_ipv4);
1736 	else
1737 		COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1738 			       &tunnel_key.enc_ipv6);
1739 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
1740 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
1741 		       &tunnel_key.enc_tp);
1742 	COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
1743 		       &tunnel_key.enc_key_id);
1744 	tunnel_key.filter_ifindex = filter_dev->ifindex;
1745 
1746 	err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
1747 	if (err)
1748 		return err;
1749 
1750 	flow_rule_match_enc_opts(rule, &enc_opts_match);
1751 	err = enc_opts_is_dont_care_or_full_match(priv,
1752 						  enc_opts_match.mask,
1753 						  extack,
1754 						  &enc_opts_is_dont_care);
1755 	if (err)
1756 		goto err_enc_opts;
1757 
1758 	if (!enc_opts_is_dont_care) {
1759 		memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
1760 		memcpy(&tun_enc_opts.key, enc_opts_match.key,
1761 		       sizeof(*enc_opts_match.key));
1762 		memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
1763 		       sizeof(*enc_opts_match.mask));
1764 
1765 		err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
1766 				  &tun_enc_opts, &enc_opts_id);
1767 		if (err)
1768 			goto err_enc_opts;
1769 	}
1770 
1771 	value = tun_id << ENC_OPTS_BITS | enc_opts_id;
1772 	mask = enc_opts_id ? TUNNEL_ID_MASK :
1773 			     (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
1774 
1775 	if (attr->chain) {
1776 		mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
1777 					    TUNNEL_TO_REG, value, mask);
1778 	} else {
1779 		mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1780 		err = mlx5e_tc_match_to_reg_set(priv->mdev,
1781 						mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
1782 						TUNNEL_TO_REG, value);
1783 		if (err)
1784 			goto err_set;
1785 
1786 		attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1787 	}
1788 
1789 	flow->tunnel_id = value;
1790 	return 0;
1791 
1792 err_set:
1793 	if (enc_opts_id)
1794 		mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
1795 			       enc_opts_id);
1796 err_enc_opts:
1797 	mapping_remove(uplink_priv->tunnel_mapping, tun_id);
1798 	return err;
1799 }
1800 
mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow * flow)1801 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
1802 {
1803 	u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
1804 	u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
1805 	struct mlx5_rep_uplink_priv *uplink_priv;
1806 	struct mlx5e_rep_priv *uplink_rpriv;
1807 	struct mlx5_eswitch *esw;
1808 
1809 	esw = flow->priv->mdev->priv.eswitch;
1810 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1811 	uplink_priv = &uplink_rpriv->uplink_priv;
1812 
1813 	if (tun_id)
1814 		mapping_remove(uplink_priv->tunnel_mapping, tun_id);
1815 	if (enc_opts_id)
1816 		mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
1817 			       enc_opts_id);
1818 }
1819 
mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow * flow)1820 u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
1821 {
1822 	return flow->tunnel_id;
1823 }
1824 
mlx5e_tc_set_ethertype(struct mlx5_core_dev * mdev,struct flow_match_basic * match,bool outer,void * headers_c,void * headers_v)1825 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
1826 			    struct flow_match_basic *match, bool outer,
1827 			    void *headers_c, void *headers_v)
1828 {
1829 	bool ip_version_cap;
1830 
1831 	ip_version_cap = outer ?
1832 		MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
1833 					  ft_field_support.outer_ip_version) :
1834 		MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
1835 					  ft_field_support.inner_ip_version);
1836 
1837 	if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
1838 	    (match->key->n_proto == htons(ETH_P_IP) ||
1839 	     match->key->n_proto == htons(ETH_P_IPV6))) {
1840 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
1841 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
1842 			 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
1843 	} else {
1844 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
1845 			 ntohs(match->mask->n_proto));
1846 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
1847 			 ntohs(match->key->n_proto));
1848 	}
1849 }
1850 
mlx5e_tc_get_ip_version(struct mlx5_flow_spec * spec,bool outer)1851 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
1852 {
1853 	void *headers_v;
1854 	u16 ethertype;
1855 	u8 ip_version;
1856 
1857 	if (outer)
1858 		headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1859 	else
1860 		headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
1861 
1862 	ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
1863 	/* Return ip_version converted from ethertype anyway */
1864 	if (!ip_version) {
1865 		ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
1866 		if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
1867 			ip_version = 4;
1868 		else if (ethertype == ETH_P_IPV6)
1869 			ip_version = 6;
1870 	}
1871 	return ip_version;
1872 }
1873 
1874 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
1875  * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
1876  *      +---------+----------------------------------------+
1877  *      |Arriving |         Arriving Outer Header          |
1878  *      |   Inner +---------+---------+---------+----------+
1879  *      |  Header | Not-ECT | ECT(0)  | ECT(1)  |   CE     |
1880  *      +---------+---------+---------+---------+----------+
1881  *      | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop>   |
1882  *      |  ECT(0) |  ECT(0) | ECT(0)  | ECT(1)  |   CE*    |
1883  *      |  ECT(1) |  ECT(1) | ECT(1)  | ECT(1)* |   CE*    |
1884  *      |    CE   |   CE    |  CE     | CE      |   CE     |
1885  *      +---------+---------+---------+---------+----------+
1886  *
1887  * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
1888  * the inner ip_ecn value before hardware decap action.
1889  *
1890  * Cells marked are changed from original inner packet ip_ecn value during decap, and
1891  * so matching those values on inner ip_ecn before decap will fail.
1892  *
1893  * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
1894  * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
1895  * and such we can drop the inner ip_ecn=CE match.
1896  */
1897 
mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv * priv,struct flow_cls_offload * f,bool * match_inner_ecn)1898 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
1899 				      struct flow_cls_offload *f,
1900 				      bool *match_inner_ecn)
1901 {
1902 	u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
1903 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1904 	struct netlink_ext_ack *extack = f->common.extack;
1905 	struct flow_match_ip match;
1906 
1907 	*match_inner_ecn = true;
1908 
1909 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
1910 		flow_rule_match_enc_ip(rule, &match);
1911 		outer_ecn_key = match.key->tos & INET_ECN_MASK;
1912 		outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
1913 	}
1914 
1915 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
1916 		flow_rule_match_ip(rule, &match);
1917 		inner_ecn_key = match.key->tos & INET_ECN_MASK;
1918 		inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
1919 	}
1920 
1921 	if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
1922 		NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
1923 		netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
1924 		return -EOPNOTSUPP;
1925 	}
1926 
1927 	if (!outer_ecn_mask) {
1928 		if (!inner_ecn_mask)
1929 			return 0;
1930 
1931 		NL_SET_ERR_MSG_MOD(extack,
1932 				   "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
1933 		netdev_warn(priv->netdev,
1934 			    "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
1935 		return -EOPNOTSUPP;
1936 	}
1937 
1938 	if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
1939 		NL_SET_ERR_MSG_MOD(extack,
1940 				   "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
1941 		netdev_warn(priv->netdev,
1942 			    "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
1943 		return -EOPNOTSUPP;
1944 	}
1945 
1946 	if (!inner_ecn_mask)
1947 		return 0;
1948 
1949 	/* Both inner and outer have full mask on ecn */
1950 
1951 	if (outer_ecn_key == INET_ECN_ECT_1) {
1952 		/* inner ecn might change by DECAP action */
1953 
1954 		NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
1955 		netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
1956 		return -EOPNOTSUPP;
1957 	}
1958 
1959 	if (outer_ecn_key != INET_ECN_CE)
1960 		return 0;
1961 
1962 	if (inner_ecn_key != INET_ECN_CE) {
1963 		/* Can't happen in software, as packet ecn will be changed to CE after decap */
1964 		NL_SET_ERR_MSG_MOD(extack,
1965 				   "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
1966 		netdev_warn(priv->netdev,
1967 			    "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
1968 		return -EOPNOTSUPP;
1969 	}
1970 
1971 	/* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
1972 	 * drop match on inner ecn
1973 	 */
1974 	*match_inner_ecn = false;
1975 
1976 	return 0;
1977 }
1978 
parse_tunnel_attr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev,u8 * match_level,bool * match_inner)1979 static int parse_tunnel_attr(struct mlx5e_priv *priv,
1980 			     struct mlx5e_tc_flow *flow,
1981 			     struct mlx5_flow_spec *spec,
1982 			     struct flow_cls_offload *f,
1983 			     struct net_device *filter_dev,
1984 			     u8 *match_level,
1985 			     bool *match_inner)
1986 {
1987 	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
1988 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1989 	struct netlink_ext_ack *extack = f->common.extack;
1990 	bool needs_mapping, sets_mapping;
1991 	int err;
1992 
1993 	if (!mlx5e_is_eswitch_flow(flow))
1994 		return -EOPNOTSUPP;
1995 
1996 	needs_mapping = !!flow->attr->chain;
1997 	sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
1998 	*match_inner = !needs_mapping;
1999 
2000 	if ((needs_mapping || sets_mapping) &&
2001 	    !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2002 		NL_SET_ERR_MSG(extack,
2003 			       "Chains on tunnel devices isn't supported without register loopback support");
2004 		netdev_warn(priv->netdev,
2005 			    "Chains on tunnel devices isn't supported without register loopback support");
2006 		return -EOPNOTSUPP;
2007 	}
2008 
2009 	if (!flow->attr->chain) {
2010 		err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2011 					 match_level);
2012 		if (err) {
2013 			NL_SET_ERR_MSG_MOD(extack,
2014 					   "Failed to parse tunnel attributes");
2015 			netdev_warn(priv->netdev,
2016 				    "Failed to parse tunnel attributes");
2017 			return err;
2018 		}
2019 
2020 		/* With mpls over udp we decapsulate using packet reformat
2021 		 * object
2022 		 */
2023 		if (!netif_is_bareudp(filter_dev))
2024 			flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2025 		err = mlx5e_tc_set_attr_rx_tun(flow, spec);
2026 		if (err)
2027 			return err;
2028 	} else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
2029 		struct mlx5_flow_spec *tmp_spec;
2030 
2031 		tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
2032 		if (!tmp_spec) {
2033 			NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
2034 			netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
2035 			return -ENOMEM;
2036 		}
2037 		memcpy(tmp_spec, spec, sizeof(*tmp_spec));
2038 
2039 		err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
2040 		if (err) {
2041 			kvfree(tmp_spec);
2042 			NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
2043 			netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
2044 			return err;
2045 		}
2046 		err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
2047 		kvfree(tmp_spec);
2048 		if (err)
2049 			return err;
2050 	}
2051 
2052 	if (!needs_mapping && !sets_mapping)
2053 		return 0;
2054 
2055 	return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2056 }
2057 
get_match_inner_headers_criteria(struct mlx5_flow_spec * spec)2058 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2059 {
2060 	return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2061 			    inner_headers);
2062 }
2063 
get_match_inner_headers_value(struct mlx5_flow_spec * spec)2064 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2065 {
2066 	return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2067 			    inner_headers);
2068 }
2069 
get_match_outer_headers_criteria(struct mlx5_flow_spec * spec)2070 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2071 {
2072 	return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2073 			    outer_headers);
2074 }
2075 
get_match_outer_headers_value(struct mlx5_flow_spec * spec)2076 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2077 {
2078 	return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2079 			    outer_headers);
2080 }
2081 
get_match_headers_value(u32 flags,struct mlx5_flow_spec * spec)2082 static void *get_match_headers_value(u32 flags,
2083 				     struct mlx5_flow_spec *spec)
2084 {
2085 	return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2086 		get_match_inner_headers_value(spec) :
2087 		get_match_outer_headers_value(spec);
2088 }
2089 
get_match_headers_criteria(u32 flags,struct mlx5_flow_spec * spec)2090 static void *get_match_headers_criteria(u32 flags,
2091 					struct mlx5_flow_spec *spec)
2092 {
2093 	return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2094 		get_match_inner_headers_criteria(spec) :
2095 		get_match_outer_headers_criteria(spec);
2096 }
2097 
mlx5e_flower_parse_meta(struct net_device * filter_dev,struct flow_cls_offload * f)2098 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2099 				   struct flow_cls_offload *f)
2100 {
2101 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2102 	struct netlink_ext_ack *extack = f->common.extack;
2103 	struct net_device *ingress_dev;
2104 	struct flow_match_meta match;
2105 
2106 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2107 		return 0;
2108 
2109 	flow_rule_match_meta(rule, &match);
2110 	if (!match.mask->ingress_ifindex)
2111 		return 0;
2112 
2113 	if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2114 		NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2115 		return -EOPNOTSUPP;
2116 	}
2117 
2118 	ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2119 					 match.key->ingress_ifindex);
2120 	if (!ingress_dev) {
2121 		NL_SET_ERR_MSG_MOD(extack,
2122 				   "Can't find the ingress port to match on");
2123 		return -ENOENT;
2124 	}
2125 
2126 	if (ingress_dev != filter_dev) {
2127 		NL_SET_ERR_MSG_MOD(extack,
2128 				   "Can't match on the ingress filter port");
2129 		return -EOPNOTSUPP;
2130 	}
2131 
2132 	return 0;
2133 }
2134 
skip_key_basic(struct net_device * filter_dev,struct flow_cls_offload * f)2135 static bool skip_key_basic(struct net_device *filter_dev,
2136 			   struct flow_cls_offload *f)
2137 {
2138 	/* When doing mpls over udp decap, the user needs to provide
2139 	 * MPLS_UC as the protocol in order to be able to match on mpls
2140 	 * label fields.  However, the actual ethertype is IP so we want to
2141 	 * avoid matching on this, otherwise we'll fail the match.
2142 	 */
2143 	if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2144 		return true;
2145 
2146 	return false;
2147 }
2148 
__parse_cls_flower(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev,u8 * inner_match_level,u8 * outer_match_level)2149 static int __parse_cls_flower(struct mlx5e_priv *priv,
2150 			      struct mlx5e_tc_flow *flow,
2151 			      struct mlx5_flow_spec *spec,
2152 			      struct flow_cls_offload *f,
2153 			      struct net_device *filter_dev,
2154 			      u8 *inner_match_level, u8 *outer_match_level)
2155 {
2156 	struct netlink_ext_ack *extack = f->common.extack;
2157 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2158 				       outer_headers);
2159 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2160 				       outer_headers);
2161 	void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2162 				    misc_parameters);
2163 	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2164 				    misc_parameters);
2165 	void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2166 				    misc_parameters_3);
2167 	void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2168 				    misc_parameters_3);
2169 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2170 	struct flow_dissector *dissector = rule->match.dissector;
2171 	enum fs_flow_table_type fs_type;
2172 	bool match_inner_ecn = true;
2173 	u16 addr_type = 0;
2174 	u8 ip_proto = 0;
2175 	u8 *match_level;
2176 	int err;
2177 
2178 	fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2179 	match_level = outer_match_level;
2180 
2181 	if (dissector->used_keys &
2182 	    ~(BIT(FLOW_DISSECTOR_KEY_META) |
2183 	      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2184 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
2185 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2186 	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
2187 	      BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2188 	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2189 	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2190 	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
2191 	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2192 	      BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2193 	      BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2194 	      BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)	|
2195 	      BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2196 	      BIT(FLOW_DISSECTOR_KEY_TCP) |
2197 	      BIT(FLOW_DISSECTOR_KEY_IP)  |
2198 	      BIT(FLOW_DISSECTOR_KEY_CT) |
2199 	      BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2200 	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2201 	      BIT(FLOW_DISSECTOR_KEY_ICMP) |
2202 	      BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2203 		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2204 		netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
2205 			   dissector->used_keys);
2206 		return -EOPNOTSUPP;
2207 	}
2208 
2209 	if (mlx5e_get_tc_tun(filter_dev)) {
2210 		bool match_inner = false;
2211 
2212 		err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2213 					outer_match_level, &match_inner);
2214 		if (err)
2215 			return err;
2216 
2217 		if (match_inner) {
2218 			/* header pointers should point to the inner headers
2219 			 * if the packet was decapsulated already.
2220 			 * outer headers are set by parse_tunnel_attr.
2221 			 */
2222 			match_level = inner_match_level;
2223 			headers_c = get_match_inner_headers_criteria(spec);
2224 			headers_v = get_match_inner_headers_value(spec);
2225 		}
2226 
2227 		err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
2228 		if (err)
2229 			return err;
2230 	}
2231 
2232 	err = mlx5e_flower_parse_meta(filter_dev, f);
2233 	if (err)
2234 		return err;
2235 
2236 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2237 	    !skip_key_basic(filter_dev, f)) {
2238 		struct flow_match_basic match;
2239 
2240 		flow_rule_match_basic(rule, &match);
2241 		mlx5e_tc_set_ethertype(priv->mdev, &match,
2242 				       match_level == outer_match_level,
2243 				       headers_c, headers_v);
2244 
2245 		if (match.mask->n_proto)
2246 			*match_level = MLX5_MATCH_L2;
2247 	}
2248 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2249 	    is_vlan_dev(filter_dev)) {
2250 		struct flow_dissector_key_vlan filter_dev_mask;
2251 		struct flow_dissector_key_vlan filter_dev_key;
2252 		struct flow_match_vlan match;
2253 
2254 		if (is_vlan_dev(filter_dev)) {
2255 			match.key = &filter_dev_key;
2256 			match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2257 			match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2258 			match.key->vlan_priority = 0;
2259 			match.mask = &filter_dev_mask;
2260 			memset(match.mask, 0xff, sizeof(*match.mask));
2261 			match.mask->vlan_priority = 0;
2262 		} else {
2263 			flow_rule_match_vlan(rule, &match);
2264 		}
2265 		if (match.mask->vlan_id ||
2266 		    match.mask->vlan_priority ||
2267 		    match.mask->vlan_tpid) {
2268 			if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2269 				MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2270 					 svlan_tag, 1);
2271 				MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2272 					 svlan_tag, 1);
2273 			} else {
2274 				MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2275 					 cvlan_tag, 1);
2276 				MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2277 					 cvlan_tag, 1);
2278 			}
2279 
2280 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2281 				 match.mask->vlan_id);
2282 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2283 				 match.key->vlan_id);
2284 
2285 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2286 				 match.mask->vlan_priority);
2287 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2288 				 match.key->vlan_priority);
2289 
2290 			*match_level = MLX5_MATCH_L2;
2291 
2292 			if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
2293 			    match.mask->vlan_eth_type &&
2294 			    MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
2295 						    ft_field_support.outer_second_vid,
2296 						    fs_type)) {
2297 				MLX5_SET(fte_match_set_misc, misc_c,
2298 					 outer_second_cvlan_tag, 1);
2299 				spec->match_criteria_enable |=
2300 					MLX5_MATCH_MISC_PARAMETERS;
2301 			}
2302 		}
2303 	} else if (*match_level != MLX5_MATCH_NONE) {
2304 		/* cvlan_tag enabled in match criteria and
2305 		 * disabled in match value means both S & C tags
2306 		 * don't exist (untagged of both)
2307 		 */
2308 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2309 		*match_level = MLX5_MATCH_L2;
2310 	}
2311 
2312 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2313 		struct flow_match_vlan match;
2314 
2315 		flow_rule_match_cvlan(rule, &match);
2316 		if (match.mask->vlan_id ||
2317 		    match.mask->vlan_priority ||
2318 		    match.mask->vlan_tpid) {
2319 			if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2320 						     fs_type)) {
2321 				NL_SET_ERR_MSG_MOD(extack,
2322 						   "Matching on CVLAN is not supported");
2323 				return -EOPNOTSUPP;
2324 			}
2325 
2326 			if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2327 				MLX5_SET(fte_match_set_misc, misc_c,
2328 					 outer_second_svlan_tag, 1);
2329 				MLX5_SET(fte_match_set_misc, misc_v,
2330 					 outer_second_svlan_tag, 1);
2331 			} else {
2332 				MLX5_SET(fte_match_set_misc, misc_c,
2333 					 outer_second_cvlan_tag, 1);
2334 				MLX5_SET(fte_match_set_misc, misc_v,
2335 					 outer_second_cvlan_tag, 1);
2336 			}
2337 
2338 			MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2339 				 match.mask->vlan_id);
2340 			MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2341 				 match.key->vlan_id);
2342 			MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2343 				 match.mask->vlan_priority);
2344 			MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2345 				 match.key->vlan_priority);
2346 
2347 			*match_level = MLX5_MATCH_L2;
2348 			spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2349 		}
2350 	}
2351 
2352 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2353 		struct flow_match_eth_addrs match;
2354 
2355 		flow_rule_match_eth_addrs(rule, &match);
2356 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2357 					     dmac_47_16),
2358 				match.mask->dst);
2359 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2360 					     dmac_47_16),
2361 				match.key->dst);
2362 
2363 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2364 					     smac_47_16),
2365 				match.mask->src);
2366 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2367 					     smac_47_16),
2368 				match.key->src);
2369 
2370 		if (!is_zero_ether_addr(match.mask->src) ||
2371 		    !is_zero_ether_addr(match.mask->dst))
2372 			*match_level = MLX5_MATCH_L2;
2373 	}
2374 
2375 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2376 		struct flow_match_control match;
2377 
2378 		flow_rule_match_control(rule, &match);
2379 		addr_type = match.key->addr_type;
2380 
2381 		/* the HW doesn't support frag first/later */
2382 		if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
2383 			return -EOPNOTSUPP;
2384 
2385 		if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2386 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2387 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2388 				 match.key->flags & FLOW_DIS_IS_FRAGMENT);
2389 
2390 			/* the HW doesn't need L3 inline to match on frag=no */
2391 			if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2392 				*match_level = MLX5_MATCH_L2;
2393 	/* ***  L2 attributes parsing up to here *** */
2394 			else
2395 				*match_level = MLX5_MATCH_L3;
2396 		}
2397 	}
2398 
2399 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2400 		struct flow_match_basic match;
2401 
2402 		flow_rule_match_basic(rule, &match);
2403 		ip_proto = match.key->ip_proto;
2404 
2405 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2406 			 match.mask->ip_proto);
2407 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2408 			 match.key->ip_proto);
2409 
2410 		if (match.mask->ip_proto)
2411 			*match_level = MLX5_MATCH_L3;
2412 	}
2413 
2414 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2415 		struct flow_match_ipv4_addrs match;
2416 
2417 		flow_rule_match_ipv4_addrs(rule, &match);
2418 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2419 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
2420 		       &match.mask->src, sizeof(match.mask->src));
2421 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2422 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
2423 		       &match.key->src, sizeof(match.key->src));
2424 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2425 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2426 		       &match.mask->dst, sizeof(match.mask->dst));
2427 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2428 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2429 		       &match.key->dst, sizeof(match.key->dst));
2430 
2431 		if (match.mask->src || match.mask->dst)
2432 			*match_level = MLX5_MATCH_L3;
2433 	}
2434 
2435 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2436 		struct flow_match_ipv6_addrs match;
2437 
2438 		flow_rule_match_ipv6_addrs(rule, &match);
2439 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2440 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
2441 		       &match.mask->src, sizeof(match.mask->src));
2442 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2443 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
2444 		       &match.key->src, sizeof(match.key->src));
2445 
2446 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2447 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2448 		       &match.mask->dst, sizeof(match.mask->dst));
2449 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2450 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2451 		       &match.key->dst, sizeof(match.key->dst));
2452 
2453 		if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2454 		    ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2455 			*match_level = MLX5_MATCH_L3;
2456 	}
2457 
2458 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2459 		struct flow_match_ip match;
2460 
2461 		flow_rule_match_ip(rule, &match);
2462 		if (match_inner_ecn) {
2463 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2464 				 match.mask->tos & 0x3);
2465 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2466 				 match.key->tos & 0x3);
2467 		}
2468 
2469 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2470 			 match.mask->tos >> 2);
2471 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2472 			 match.key->tos  >> 2);
2473 
2474 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2475 			 match.mask->ttl);
2476 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2477 			 match.key->ttl);
2478 
2479 		if (match.mask->ttl &&
2480 		    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2481 						ft_field_support.outer_ipv4_ttl)) {
2482 			NL_SET_ERR_MSG_MOD(extack,
2483 					   "Matching on TTL is not supported");
2484 			return -EOPNOTSUPP;
2485 		}
2486 
2487 		if (match.mask->tos || match.mask->ttl)
2488 			*match_level = MLX5_MATCH_L3;
2489 	}
2490 
2491 	/* ***  L3 attributes parsing up to here *** */
2492 
2493 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2494 		struct flow_match_ports match;
2495 
2496 		flow_rule_match_ports(rule, &match);
2497 		switch (ip_proto) {
2498 		case IPPROTO_TCP:
2499 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2500 				 tcp_sport, ntohs(match.mask->src));
2501 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2502 				 tcp_sport, ntohs(match.key->src));
2503 
2504 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2505 				 tcp_dport, ntohs(match.mask->dst));
2506 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2507 				 tcp_dport, ntohs(match.key->dst));
2508 			break;
2509 
2510 		case IPPROTO_UDP:
2511 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2512 				 udp_sport, ntohs(match.mask->src));
2513 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2514 				 udp_sport, ntohs(match.key->src));
2515 
2516 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2517 				 udp_dport, ntohs(match.mask->dst));
2518 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2519 				 udp_dport, ntohs(match.key->dst));
2520 			break;
2521 		default:
2522 			NL_SET_ERR_MSG_MOD(extack,
2523 					   "Only UDP and TCP transports are supported for L4 matching");
2524 			netdev_err(priv->netdev,
2525 				   "Only UDP and TCP transport are supported\n");
2526 			return -EINVAL;
2527 		}
2528 
2529 		if (match.mask->src || match.mask->dst)
2530 			*match_level = MLX5_MATCH_L4;
2531 	}
2532 
2533 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2534 		struct flow_match_tcp match;
2535 
2536 		flow_rule_match_tcp(rule, &match);
2537 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2538 			 ntohs(match.mask->flags));
2539 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2540 			 ntohs(match.key->flags));
2541 
2542 		if (match.mask->flags)
2543 			*match_level = MLX5_MATCH_L4;
2544 	}
2545 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
2546 		struct flow_match_icmp match;
2547 
2548 		flow_rule_match_icmp(rule, &match);
2549 		switch (ip_proto) {
2550 		case IPPROTO_ICMP:
2551 			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2552 			      MLX5_FLEX_PROTO_ICMP))
2553 				return -EOPNOTSUPP;
2554 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
2555 				 match.mask->type);
2556 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
2557 				 match.key->type);
2558 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
2559 				 match.mask->code);
2560 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
2561 				 match.key->code);
2562 			break;
2563 		case IPPROTO_ICMPV6:
2564 			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2565 			      MLX5_FLEX_PROTO_ICMPV6))
2566 				return -EOPNOTSUPP;
2567 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
2568 				 match.mask->type);
2569 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
2570 				 match.key->type);
2571 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
2572 				 match.mask->code);
2573 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
2574 				 match.key->code);
2575 			break;
2576 		default:
2577 			NL_SET_ERR_MSG_MOD(extack,
2578 					   "Code and type matching only with ICMP and ICMPv6");
2579 			netdev_err(priv->netdev,
2580 				   "Code and type matching only with ICMP and ICMPv6\n");
2581 			return -EINVAL;
2582 		}
2583 		if (match.mask->code || match.mask->type) {
2584 			*match_level = MLX5_MATCH_L4;
2585 			spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
2586 		}
2587 	}
2588 	/* Currently supported only for MPLS over UDP */
2589 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
2590 	    !netif_is_bareudp(filter_dev)) {
2591 		NL_SET_ERR_MSG_MOD(extack,
2592 				   "Matching on MPLS is supported only for MPLS over UDP");
2593 		netdev_err(priv->netdev,
2594 			   "Matching on MPLS is supported only for MPLS over UDP\n");
2595 		return -EOPNOTSUPP;
2596 	}
2597 
2598 	return 0;
2599 }
2600 
parse_cls_flower(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev)2601 static int parse_cls_flower(struct mlx5e_priv *priv,
2602 			    struct mlx5e_tc_flow *flow,
2603 			    struct mlx5_flow_spec *spec,
2604 			    struct flow_cls_offload *f,
2605 			    struct net_device *filter_dev)
2606 {
2607 	u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2608 	struct netlink_ext_ack *extack = f->common.extack;
2609 	struct mlx5_core_dev *dev = priv->mdev;
2610 	struct mlx5_eswitch *esw = dev->priv.eswitch;
2611 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
2612 	struct mlx5_eswitch_rep *rep;
2613 	bool is_eswitch_flow;
2614 	int err;
2615 
2616 	inner_match_level = MLX5_MATCH_NONE;
2617 	outer_match_level = MLX5_MATCH_NONE;
2618 
2619 	err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2620 				 &inner_match_level, &outer_match_level);
2621 	non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2622 				 outer_match_level : inner_match_level;
2623 
2624 	is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2625 	if (!err && is_eswitch_flow) {
2626 		rep = rpriv->rep;
2627 		if (rep->vport != MLX5_VPORT_UPLINK &&
2628 		    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2629 		    esw->offloads.inline_mode < non_tunnel_match_level)) {
2630 			NL_SET_ERR_MSG_MOD(extack,
2631 					   "Flow is not offloaded due to min inline setting");
2632 			netdev_warn(priv->netdev,
2633 				    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2634 				    non_tunnel_match_level, esw->offloads.inline_mode);
2635 			return -EOPNOTSUPP;
2636 		}
2637 	}
2638 
2639 	flow->attr->inner_match_level = inner_match_level;
2640 	flow->attr->outer_match_level = outer_match_level;
2641 
2642 
2643 	return err;
2644 }
2645 
2646 struct pedit_headers {
2647 	struct ethhdr  eth;
2648 	struct vlan_hdr vlan;
2649 	struct iphdr   ip4;
2650 	struct ipv6hdr ip6;
2651 	struct tcphdr  tcp;
2652 	struct udphdr  udp;
2653 };
2654 
2655 struct pedit_headers_action {
2656 	struct pedit_headers	vals;
2657 	struct pedit_headers	masks;
2658 	u32			pedits;
2659 };
2660 
2661 static int pedit_header_offsets[] = {
2662 	[FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
2663 	[FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
2664 	[FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
2665 	[FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
2666 	[FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
2667 };
2668 
2669 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2670 
set_pedit_val(u8 hdr_type,u32 mask,u32 val,u32 offset,struct pedit_headers_action * hdrs)2671 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
2672 			 struct pedit_headers_action *hdrs)
2673 {
2674 	u32 *curr_pmask, *curr_pval;
2675 
2676 	curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
2677 	curr_pval  = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
2678 
2679 	if (*curr_pmask & mask)  /* disallow acting twice on the same location */
2680 		goto out_err;
2681 
2682 	*curr_pmask |= mask;
2683 	*curr_pval  |= (val & mask);
2684 
2685 	return 0;
2686 
2687 out_err:
2688 	return -EOPNOTSUPP;
2689 }
2690 
2691 struct mlx5_fields {
2692 	u8  field;
2693 	u8  field_bsize;
2694 	u32 field_mask;
2695 	u32 offset;
2696 	u32 match_offset;
2697 };
2698 
2699 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2700 		{MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2701 		 offsetof(struct pedit_headers, field) + (off), \
2702 		 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2703 
2704 /* masked values are the same and there are no rewrites that do not have a
2705  * match.
2706  */
2707 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2708 	type matchmaskx = *(type *)(matchmaskp); \
2709 	type matchvalx = *(type *)(matchvalp); \
2710 	type maskx = *(type *)(maskp); \
2711 	type valx = *(type *)(valp); \
2712 	\
2713 	(valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2714 								 matchmaskx)); \
2715 })
2716 
cmp_val_mask(void * valp,void * maskp,void * matchvalp,void * matchmaskp,u8 bsize)2717 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2718 			 void *matchmaskp, u8 bsize)
2719 {
2720 	bool same = false;
2721 
2722 	switch (bsize) {
2723 	case 8:
2724 		same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2725 		break;
2726 	case 16:
2727 		same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2728 		break;
2729 	case 32:
2730 		same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2731 		break;
2732 	}
2733 
2734 	return same;
2735 }
2736 
2737 static struct mlx5_fields fields[] = {
2738 	OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2739 	OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2740 	OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2741 	OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2742 	OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
2743 	OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
2744 
2745 	OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
2746 	OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
2747 	OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2748 	OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2749 
2750 	OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2751 		src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2752 	OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2753 		src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2754 	OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2755 		src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2756 	OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2757 		src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2758 	OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2759 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2760 	OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2761 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2762 	OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2763 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2764 	OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2765 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2766 	OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2767 	OFFLOAD(IP_DSCP, 16,  0x0fc0, ip6, 0, ip_dscp),
2768 
2769 	OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
2770 	OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
2771 	/* in linux iphdr tcp_flags is 8 bits long */
2772 	OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
2773 
2774 	OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2775 	OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
2776 };
2777 
mask_field_get(void * mask,struct mlx5_fields * f)2778 static u32 mask_field_get(void *mask, struct mlx5_fields *f)
2779 {
2780 	switch (f->field_bsize) {
2781 	case 32:
2782 		return be32_to_cpu(*(__be32 *)mask) & f->field_mask;
2783 	case 16:
2784 		return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask;
2785 	default:
2786 		return *(u8 *)mask & (u8)f->field_mask;
2787 	}
2788 }
2789 
mask_field_clear(void * mask,struct mlx5_fields * f)2790 static void mask_field_clear(void *mask, struct mlx5_fields *f)
2791 {
2792 	switch (f->field_bsize) {
2793 	case 32:
2794 		*(__be32 *)mask &= ~cpu_to_be32(f->field_mask);
2795 		break;
2796 	case 16:
2797 		*(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask);
2798 		break;
2799 	default:
2800 		*(u8 *)mask &= ~(u8)f->field_mask;
2801 		break;
2802 	}
2803 }
offload_pedit_fields(struct mlx5e_priv * priv,int namespace,struct pedit_headers_action * hdrs,struct mlx5e_tc_flow_parse_attr * parse_attr,u32 * action_flags,struct netlink_ext_ack * extack)2804 static int offload_pedit_fields(struct mlx5e_priv *priv,
2805 				int namespace,
2806 				struct pedit_headers_action *hdrs,
2807 				struct mlx5e_tc_flow_parse_attr *parse_attr,
2808 				u32 *action_flags,
2809 				struct netlink_ext_ack *extack)
2810 {
2811 	struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2812 	void *headers_c, *headers_v, *action, *vals_p;
2813 	struct mlx5e_tc_mod_hdr_acts *mod_acts;
2814 	void *s_masks_p, *a_masks_p;
2815 	int i, first, last, next_z;
2816 	struct mlx5_fields *f;
2817 	unsigned long mask;
2818 	u32 s_mask, a_mask;
2819 	u8 cmd;
2820 
2821 	mod_acts = &parse_attr->mod_hdr_acts;
2822 	headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
2823 	headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
2824 
2825 	set_masks = &hdrs[0].masks;
2826 	add_masks = &hdrs[1].masks;
2827 	set_vals = &hdrs[0].vals;
2828 	add_vals = &hdrs[1].vals;
2829 
2830 	for (i = 0; i < ARRAY_SIZE(fields); i++) {
2831 		bool skip;
2832 
2833 		f = &fields[i];
2834 		s_masks_p = (void *)set_masks + f->offset;
2835 		a_masks_p = (void *)add_masks + f->offset;
2836 
2837 		s_mask = mask_field_get(s_masks_p, f);
2838 		a_mask = mask_field_get(a_masks_p, f);
2839 
2840 		if (!s_mask && !a_mask) /* nothing to offload here */
2841 			continue;
2842 
2843 		if (s_mask && a_mask) {
2844 			NL_SET_ERR_MSG_MOD(extack,
2845 					   "can't set and add to the same HW field");
2846 			netdev_warn(priv->netdev,
2847 				    "mlx5: can't set and add to the same HW field (%x)\n",
2848 				    f->field);
2849 			return -EOPNOTSUPP;
2850 		}
2851 
2852 		skip = false;
2853 		if (s_mask) {
2854 			void *match_mask = headers_c + f->match_offset;
2855 			void *match_val = headers_v + f->match_offset;
2856 
2857 			cmd  = MLX5_ACTION_TYPE_SET;
2858 			mask = s_mask;
2859 			vals_p = (void *)set_vals + f->offset;
2860 			/* don't rewrite if we have a match on the same value */
2861 			if (cmp_val_mask(vals_p, s_masks_p, match_val,
2862 					 match_mask, f->field_bsize))
2863 				skip = true;
2864 			/* clear to denote we consumed this field */
2865 			mask_field_clear(s_masks_p, f);
2866 		} else {
2867 			cmd  = MLX5_ACTION_TYPE_ADD;
2868 			mask = a_mask;
2869 			vals_p = (void *)add_vals + f->offset;
2870 			/* add 0 is no change */
2871 			if (!mask_field_get(vals_p, f))
2872 				skip = true;
2873 			/* clear to denote we consumed this field */
2874 			mask_field_clear(a_masks_p, f);
2875 		}
2876 		if (skip)
2877 			continue;
2878 
2879 		first = find_first_bit(&mask, f->field_bsize);
2880 		next_z = find_next_zero_bit(&mask, f->field_bsize, first);
2881 		last  = find_last_bit(&mask, f->field_bsize);
2882 		if (first < next_z && next_z < last) {
2883 			NL_SET_ERR_MSG_MOD(extack,
2884 					   "rewrite of few sub-fields isn't supported");
2885 			netdev_warn(priv->netdev,
2886 				    "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2887 				    mask);
2888 			return -EOPNOTSUPP;
2889 		}
2890 
2891 		action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
2892 		if (IS_ERR(action)) {
2893 			NL_SET_ERR_MSG_MOD(extack,
2894 					   "too many pedit actions, can't offload");
2895 			mlx5_core_warn(priv->mdev,
2896 				       "mlx5: parsed %d pedit actions, can't do more\n",
2897 				       mod_acts->num_actions);
2898 			return PTR_ERR(action);
2899 		}
2900 
2901 		MLX5_SET(set_action_in, action, action_type, cmd);
2902 		MLX5_SET(set_action_in, action, field, f->field);
2903 
2904 		if (cmd == MLX5_ACTION_TYPE_SET) {
2905 			unsigned long field_mask = f->field_mask;
2906 			int start;
2907 
2908 			/* if field is bit sized it can start not from first bit */
2909 			start = find_first_bit(&field_mask, f->field_bsize);
2910 
2911 			MLX5_SET(set_action_in, action, offset, first - start);
2912 			/* length is num of bits to be written, zero means length of 32 */
2913 			MLX5_SET(set_action_in, action, length, (last - first + 1));
2914 		}
2915 
2916 		if (f->field_bsize == 32)
2917 			MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
2918 		else if (f->field_bsize == 16)
2919 			MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
2920 		else if (f->field_bsize == 8)
2921 			MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
2922 
2923 		++mod_acts->num_actions;
2924 	}
2925 
2926 	return 0;
2927 }
2928 
2929 static const struct pedit_headers zero_masks = {};
2930 
2931 static int
parse_pedit_to_modify_hdr(struct mlx5e_priv * priv,const struct flow_action_entry * act,int namespace,struct mlx5e_tc_flow_parse_attr * parse_attr,struct pedit_headers_action * hdrs,struct netlink_ext_ack * extack)2932 parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
2933 			  const struct flow_action_entry *act, int namespace,
2934 			  struct mlx5e_tc_flow_parse_attr *parse_attr,
2935 			  struct pedit_headers_action *hdrs,
2936 			  struct netlink_ext_ack *extack)
2937 {
2938 	u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
2939 	int err = -EOPNOTSUPP;
2940 	u32 mask, val, offset;
2941 	u8 htype;
2942 
2943 	htype = act->mangle.htype;
2944 	err = -EOPNOTSUPP; /* can't be all optimistic */
2945 
2946 	if (htype == FLOW_ACT_MANGLE_UNSPEC) {
2947 		NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
2948 		goto out_err;
2949 	}
2950 
2951 	if (!mlx5e_mod_hdr_max_actions(priv->mdev, namespace)) {
2952 		NL_SET_ERR_MSG_MOD(extack,
2953 				   "The pedit offload action is not supported");
2954 		goto out_err;
2955 	}
2956 
2957 	mask = act->mangle.mask;
2958 	val = act->mangle.val;
2959 	offset = act->mangle.offset;
2960 
2961 	err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
2962 	if (err)
2963 		goto out_err;
2964 
2965 	hdrs[cmd].pedits++;
2966 
2967 	return 0;
2968 out_err:
2969 	return err;
2970 }
2971 
2972 static int
parse_pedit_to_reformat(struct mlx5e_priv * priv,const struct flow_action_entry * act,struct mlx5e_tc_flow_parse_attr * parse_attr,struct netlink_ext_ack * extack)2973 parse_pedit_to_reformat(struct mlx5e_priv *priv,
2974 			const struct flow_action_entry *act,
2975 			struct mlx5e_tc_flow_parse_attr *parse_attr,
2976 			struct netlink_ext_ack *extack)
2977 {
2978 	u32 mask, val, offset;
2979 	u32 *p;
2980 
2981 	if (act->id != FLOW_ACTION_MANGLE)
2982 		return -EOPNOTSUPP;
2983 
2984 	if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) {
2985 		NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported");
2986 		return -EOPNOTSUPP;
2987 	}
2988 
2989 	mask = ~act->mangle.mask;
2990 	val = act->mangle.val;
2991 	offset = act->mangle.offset;
2992 	p = (u32 *)&parse_attr->eth;
2993 	*(p + (offset >> 2)) |= (val & mask);
2994 
2995 	return 0;
2996 }
2997 
parse_tc_pedit_action(struct mlx5e_priv * priv,const struct flow_action_entry * act,int namespace,struct mlx5e_tc_flow_parse_attr * parse_attr,struct pedit_headers_action * hdrs,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)2998 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
2999 				 const struct flow_action_entry *act, int namespace,
3000 				 struct mlx5e_tc_flow_parse_attr *parse_attr,
3001 				 struct pedit_headers_action *hdrs,
3002 				 struct mlx5e_tc_flow *flow,
3003 				 struct netlink_ext_ack *extack)
3004 {
3005 	if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
3006 		return parse_pedit_to_reformat(priv, act, parse_attr, extack);
3007 
3008 	return parse_pedit_to_modify_hdr(priv, act, namespace,
3009 					 parse_attr, hdrs, extack);
3010 }
3011 
alloc_tc_pedit_action(struct mlx5e_priv * priv,int namespace,struct mlx5e_tc_flow_parse_attr * parse_attr,struct pedit_headers_action * hdrs,u32 * action_flags,struct netlink_ext_ack * extack)3012 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3013 				 struct mlx5e_tc_flow_parse_attr *parse_attr,
3014 				 struct pedit_headers_action *hdrs,
3015 				 u32 *action_flags,
3016 				 struct netlink_ext_ack *extack)
3017 {
3018 	struct pedit_headers *cmd_masks;
3019 	int err;
3020 	u8 cmd;
3021 
3022 	err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
3023 				   action_flags, extack);
3024 	if (err < 0)
3025 		goto out_dealloc_parsed_actions;
3026 
3027 	for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3028 		cmd_masks = &hdrs[cmd].masks;
3029 		if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3030 			NL_SET_ERR_MSG_MOD(extack,
3031 					   "attempt to offload an unsupported field");
3032 			netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3033 			print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3034 				       16, 1, cmd_masks, sizeof(zero_masks), true);
3035 			err = -EOPNOTSUPP;
3036 			goto out_dealloc_parsed_actions;
3037 		}
3038 	}
3039 
3040 	return 0;
3041 
3042 out_dealloc_parsed_actions:
3043 	mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3044 	return err;
3045 }
3046 
csum_offload_supported(struct mlx5e_priv * priv,u32 action,u32 update_flags,struct netlink_ext_ack * extack)3047 static bool csum_offload_supported(struct mlx5e_priv *priv,
3048 				   u32 action,
3049 				   u32 update_flags,
3050 				   struct netlink_ext_ack *extack)
3051 {
3052 	u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
3053 			 TCA_CSUM_UPDATE_FLAG_UDP;
3054 
3055 	/*  The HW recalcs checksums only if re-writing headers */
3056 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
3057 		NL_SET_ERR_MSG_MOD(extack,
3058 				   "TC csum action is only offloaded with pedit");
3059 		netdev_warn(priv->netdev,
3060 			    "TC csum action is only offloaded with pedit\n");
3061 		return false;
3062 	}
3063 
3064 	if (update_flags & ~prot_flags) {
3065 		NL_SET_ERR_MSG_MOD(extack,
3066 				   "can't offload TC csum action for some header/s");
3067 		netdev_warn(priv->netdev,
3068 			    "can't offload TC csum action for some header/s - flags %#x\n",
3069 			    update_flags);
3070 		return false;
3071 	}
3072 
3073 	return true;
3074 }
3075 
3076 struct ip_ttl_word {
3077 	__u8	ttl;
3078 	__u8	protocol;
3079 	__sum16	check;
3080 };
3081 
3082 struct ipv6_hoplimit_word {
3083 	__be16	payload_len;
3084 	__u8	nexthdr;
3085 	__u8	hop_limit;
3086 };
3087 
is_action_keys_supported(const struct flow_action_entry * act,bool ct_flow,bool * modify_ip_header,bool * modify_tuple,struct netlink_ext_ack * extack)3088 static int is_action_keys_supported(const struct flow_action_entry *act,
3089 				    bool ct_flow, bool *modify_ip_header,
3090 				    bool *modify_tuple,
3091 				    struct netlink_ext_ack *extack)
3092 {
3093 	u32 mask, offset;
3094 	u8 htype;
3095 
3096 	htype = act->mangle.htype;
3097 	offset = act->mangle.offset;
3098 	mask = ~act->mangle.mask;
3099 	/* For IPv4 & IPv6 header check 4 byte word,
3100 	 * to determine that modified fields
3101 	 * are NOT ttl & hop_limit only.
3102 	 */
3103 	if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3104 		struct ip_ttl_word *ttl_word =
3105 			(struct ip_ttl_word *)&mask;
3106 
3107 		if (offset != offsetof(struct iphdr, ttl) ||
3108 		    ttl_word->protocol ||
3109 		    ttl_word->check) {
3110 			*modify_ip_header = true;
3111 		}
3112 
3113 		if (offset >= offsetof(struct iphdr, saddr))
3114 			*modify_tuple = true;
3115 
3116 		if (ct_flow && *modify_tuple) {
3117 			NL_SET_ERR_MSG_MOD(extack,
3118 					   "can't offload re-write of ipv4 address with action ct");
3119 			return -EOPNOTSUPP;
3120 		}
3121 	} else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3122 		struct ipv6_hoplimit_word *hoplimit_word =
3123 			(struct ipv6_hoplimit_word *)&mask;
3124 
3125 		if (offset != offsetof(struct ipv6hdr, payload_len) ||
3126 		    hoplimit_word->payload_len ||
3127 		    hoplimit_word->nexthdr) {
3128 			*modify_ip_header = true;
3129 		}
3130 
3131 		if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3132 			*modify_tuple = true;
3133 
3134 		if (ct_flow && *modify_tuple) {
3135 			NL_SET_ERR_MSG_MOD(extack,
3136 					   "can't offload re-write of ipv6 address with action ct");
3137 			return -EOPNOTSUPP;
3138 		}
3139 	} else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3140 		   htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3141 		*modify_tuple = true;
3142 		if (ct_flow) {
3143 			NL_SET_ERR_MSG_MOD(extack,
3144 					   "can't offload re-write of transport header ports with action ct");
3145 			return -EOPNOTSUPP;
3146 		}
3147 	}
3148 
3149 	return 0;
3150 }
3151 
modify_tuple_supported(bool modify_tuple,bool ct_clear,bool ct_flow,struct netlink_ext_ack * extack,struct mlx5e_priv * priv,struct mlx5_flow_spec * spec)3152 static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
3153 				   bool ct_flow, struct netlink_ext_ack *extack,
3154 				   struct mlx5e_priv *priv,
3155 				   struct mlx5_flow_spec *spec)
3156 {
3157 	if (!modify_tuple || ct_clear)
3158 		return true;
3159 
3160 	if (ct_flow) {
3161 		NL_SET_ERR_MSG_MOD(extack,
3162 				   "can't offload tuple modification with non-clear ct()");
3163 		netdev_info(priv->netdev,
3164 			    "can't offload tuple modification with non-clear ct()");
3165 		return false;
3166 	}
3167 
3168 	/* Add ct_state=-trk match so it will be offloaded for non ct flows
3169 	 * (or after clear action), as otherwise, since the tuple is changed,
3170 	 * we can't restore ct state
3171 	 */
3172 	if (mlx5_tc_ct_add_no_trk_match(spec)) {
3173 		NL_SET_ERR_MSG_MOD(extack,
3174 				   "can't offload tuple modification with ct matches and no ct(clear) action");
3175 		netdev_info(priv->netdev,
3176 			    "can't offload tuple modification with ct matches and no ct(clear) action");
3177 		return false;
3178 	}
3179 
3180 	return true;
3181 }
3182 
modify_header_match_supported(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct flow_action * flow_action,u32 actions,bool ct_flow,bool ct_clear,struct netlink_ext_ack * extack)3183 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3184 					  struct mlx5_flow_spec *spec,
3185 					  struct flow_action *flow_action,
3186 					  u32 actions, bool ct_flow,
3187 					  bool ct_clear,
3188 					  struct netlink_ext_ack *extack)
3189 {
3190 	const struct flow_action_entry *act;
3191 	bool modify_ip_header, modify_tuple;
3192 	void *headers_c;
3193 	void *headers_v;
3194 	u16 ethertype;
3195 	u8 ip_proto;
3196 	int i, err;
3197 
3198 	headers_c = get_match_headers_criteria(actions, spec);
3199 	headers_v = get_match_headers_value(actions, spec);
3200 	ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3201 
3202 	/* for non-IP we only re-write MACs, so we're okay */
3203 	if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3204 	    ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3205 		goto out_ok;
3206 
3207 	modify_ip_header = false;
3208 	modify_tuple = false;
3209 	flow_action_for_each(i, act, flow_action) {
3210 		if (act->id != FLOW_ACTION_MANGLE &&
3211 		    act->id != FLOW_ACTION_ADD)
3212 			continue;
3213 
3214 		err = is_action_keys_supported(act, ct_flow,
3215 					       &modify_ip_header,
3216 					       &modify_tuple, extack);
3217 		if (err)
3218 			return err;
3219 	}
3220 
3221 	if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
3222 				    priv, spec))
3223 		return false;
3224 
3225 	ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3226 	if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3227 	    ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3228 		NL_SET_ERR_MSG_MOD(extack,
3229 				   "can't offload re-write of non TCP/UDP");
3230 		netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3231 			    ip_proto);
3232 		return false;
3233 	}
3234 
3235 out_ok:
3236 	return true;
3237 }
3238 
3239 static bool
actions_match_supported_fdb(struct mlx5e_priv * priv,struct mlx5e_tc_flow_parse_attr * parse_attr,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3240 actions_match_supported_fdb(struct mlx5e_priv *priv,
3241 			    struct mlx5e_tc_flow_parse_attr *parse_attr,
3242 			    struct mlx5e_tc_flow *flow,
3243 			    struct netlink_ext_ack *extack)
3244 {
3245 	bool ct_flow, ct_clear;
3246 
3247 	ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3248 	ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3249 
3250 	if (flow->attr->esw_attr->split_count && ct_flow &&
3251 	    !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) {
3252 		/* All registers used by ct are cleared when using
3253 		 * split rules.
3254 		 */
3255 		NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct");
3256 		return false;
3257 	}
3258 
3259 	return true;
3260 }
3261 
3262 static bool
actions_match_supported(struct mlx5e_priv * priv,struct flow_action * flow_action,struct mlx5e_tc_flow_parse_attr * parse_attr,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3263 actions_match_supported(struct mlx5e_priv *priv,
3264 			struct flow_action *flow_action,
3265 			struct mlx5e_tc_flow_parse_attr *parse_attr,
3266 			struct mlx5e_tc_flow *flow,
3267 			struct netlink_ext_ack *extack)
3268 {
3269 	u32 actions = flow->attr->action;
3270 	bool ct_flow, ct_clear;
3271 
3272 	ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3273 	ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3274 
3275 	if (!(actions &
3276 	      (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3277 		NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
3278 		return false;
3279 	}
3280 
3281 	if (!(~actions &
3282 	      (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3283 		NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
3284 		return false;
3285 	}
3286 
3287 	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3288 	    actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
3289 		NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
3290 		return false;
3291 	}
3292 
3293 	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3294 	    !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
3295 					   actions, ct_flow, ct_clear, extack))
3296 		return false;
3297 
3298 	if (mlx5e_is_eswitch_flow(flow) &&
3299 	    !actions_match_supported_fdb(priv, parse_attr, flow, extack))
3300 		return false;
3301 
3302 	return true;
3303 }
3304 
same_port_devs(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)3305 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3306 {
3307 	return priv->mdev == peer_priv->mdev;
3308 }
3309 
same_hw_devs(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)3310 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3311 {
3312 	struct mlx5_core_dev *fmdev, *pmdev;
3313 	u64 fsystem_guid, psystem_guid;
3314 
3315 	fmdev = priv->mdev;
3316 	pmdev = peer_priv->mdev;
3317 
3318 	fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3319 	psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3320 
3321 	return (fsystem_guid == psystem_guid);
3322 }
3323 
same_vf_reps(struct mlx5e_priv * priv,struct net_device * out_dev)3324 static bool same_vf_reps(struct mlx5e_priv *priv,
3325 			 struct net_device *out_dev)
3326 {
3327 	return mlx5e_eswitch_vf_rep(priv->netdev) &&
3328 	       priv->netdev == out_dev;
3329 }
3330 
add_vlan_rewrite_action(struct mlx5e_priv * priv,int namespace,const struct flow_action_entry * act,struct mlx5e_tc_flow_parse_attr * parse_attr,struct pedit_headers_action * hdrs,u32 * action,struct netlink_ext_ack * extack)3331 static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
3332 				   const struct flow_action_entry *act,
3333 				   struct mlx5e_tc_flow_parse_attr *parse_attr,
3334 				   struct pedit_headers_action *hdrs,
3335 				   u32 *action, struct netlink_ext_ack *extack)
3336 {
3337 	u16 mask16 = VLAN_VID_MASK;
3338 	u16 val16 = act->vlan.vid & VLAN_VID_MASK;
3339 	const struct flow_action_entry pedit_act = {
3340 		.id = FLOW_ACTION_MANGLE,
3341 		.mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
3342 		.mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
3343 		.mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
3344 		.mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
3345 	};
3346 	u8 match_prio_mask, match_prio_val;
3347 	void *headers_c, *headers_v;
3348 	int err;
3349 
3350 	headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
3351 	headers_v = get_match_headers_value(*action, &parse_attr->spec);
3352 
3353 	if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
3354 	      MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
3355 		NL_SET_ERR_MSG_MOD(extack,
3356 				   "VLAN rewrite action must have VLAN protocol match");
3357 		return -EOPNOTSUPP;
3358 	}
3359 
3360 	match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
3361 	match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
3362 	if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
3363 		NL_SET_ERR_MSG_MOD(extack,
3364 				   "Changing VLAN prio is not supported");
3365 		return -EOPNOTSUPP;
3366 	}
3367 
3368 	err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack);
3369 	*action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3370 
3371 	return err;
3372 }
3373 
3374 static int
add_vlan_prio_tag_rewrite_action(struct mlx5e_priv * priv,struct mlx5e_tc_flow_parse_attr * parse_attr,struct pedit_headers_action * hdrs,u32 * action,struct netlink_ext_ack * extack)3375 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
3376 				 struct mlx5e_tc_flow_parse_attr *parse_attr,
3377 				 struct pedit_headers_action *hdrs,
3378 				 u32 *action, struct netlink_ext_ack *extack)
3379 {
3380 	const struct flow_action_entry prio_tag_act = {
3381 		.vlan.vid = 0,
3382 		.vlan.prio =
3383 			MLX5_GET(fte_match_set_lyr_2_4,
3384 				 get_match_headers_value(*action,
3385 							 &parse_attr->spec),
3386 				 first_prio) &
3387 			MLX5_GET(fte_match_set_lyr_2_4,
3388 				 get_match_headers_criteria(*action,
3389 							    &parse_attr->spec),
3390 				 first_prio),
3391 	};
3392 
3393 	return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3394 				       &prio_tag_act, parse_attr, hdrs, action,
3395 				       extack);
3396 }
3397 
validate_goto_chain(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,const struct flow_action_entry * act,u32 actions,struct netlink_ext_ack * extack)3398 static int validate_goto_chain(struct mlx5e_priv *priv,
3399 			       struct mlx5e_tc_flow *flow,
3400 			       const struct flow_action_entry *act,
3401 			       u32 actions,
3402 			       struct netlink_ext_ack *extack)
3403 {
3404 	bool is_esw = mlx5e_is_eswitch_flow(flow);
3405 	struct mlx5_flow_attr *attr = flow->attr;
3406 	bool ft_flow = mlx5e_is_ft_flow(flow);
3407 	u32 dest_chain = act->chain_index;
3408 	struct mlx5_fs_chains *chains;
3409 	struct mlx5_eswitch *esw;
3410 	u32 reformat_and_fwd;
3411 	u32 max_chain;
3412 
3413 	esw = priv->mdev->priv.eswitch;
3414 	chains = is_esw ? esw_chains(esw) : nic_chains(priv);
3415 	max_chain = mlx5_chains_get_chain_range(chains);
3416 	reformat_and_fwd = is_esw ?
3417 			   MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
3418 			   MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
3419 
3420 	if (ft_flow) {
3421 		NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
3422 		return -EOPNOTSUPP;
3423 	}
3424 
3425 	if (!mlx5_chains_backwards_supported(chains) &&
3426 	    dest_chain <= attr->chain) {
3427 		NL_SET_ERR_MSG_MOD(extack,
3428 				   "Goto lower numbered chain isn't supported");
3429 		return -EOPNOTSUPP;
3430 	}
3431 
3432 	if (dest_chain > max_chain) {
3433 		NL_SET_ERR_MSG_MOD(extack,
3434 				   "Requested destination chain is out of supported range");
3435 		return -EOPNOTSUPP;
3436 	}
3437 
3438 	if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
3439 		       MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
3440 	    !reformat_and_fwd) {
3441 		NL_SET_ERR_MSG_MOD(extack,
3442 				   "Goto chain is not allowed if action has reformat or decap");
3443 		return -EOPNOTSUPP;
3444 	}
3445 
3446 	return 0;
3447 }
3448 
3449 static int
actions_prepare_mod_hdr_actions(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct pedit_headers_action * hdrs,struct netlink_ext_ack * extack)3450 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
3451 				struct mlx5e_tc_flow *flow,
3452 				struct mlx5_flow_attr *attr,
3453 				struct pedit_headers_action *hdrs,
3454 				struct netlink_ext_ack *extack)
3455 {
3456 	struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3457 	enum mlx5_flow_namespace_type ns_type;
3458 	int err;
3459 
3460 	if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits &&
3461 	    !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
3462 		return 0;
3463 
3464 	ns_type = get_flow_name_space(flow);
3465 
3466 	err = alloc_tc_pedit_action(priv, ns_type, parse_attr, hdrs,
3467 				    &attr->action, extack);
3468 	if (err)
3469 		return err;
3470 
3471 	/* In case all pedit actions are skipped, remove the MOD_HDR flag. */
3472 	if (parse_attr->mod_hdr_acts.num_actions > 0)
3473 		return 0;
3474 
3475 	attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3476 	mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3477 
3478 	if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
3479 		return 0;
3480 
3481 	if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3482 	      (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3483 		attr->esw_attr->split_count = 0;
3484 
3485 	return 0;
3486 }
3487 
3488 static int
parse_tc_nic_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3489 parse_tc_nic_actions(struct mlx5e_priv *priv,
3490 		     struct flow_action *flow_action,
3491 		     struct mlx5e_tc_flow *flow,
3492 		     struct netlink_ext_ack *extack)
3493 {
3494 	struct mlx5e_tc_flow_parse_attr *parse_attr;
3495 	struct mlx5_flow_attr *attr = flow->attr;
3496 	struct pedit_headers_action hdrs[2] = {};
3497 	const struct flow_action_entry *act;
3498 	struct mlx5_nic_flow_attr *nic_attr;
3499 	u32 action = 0;
3500 	int err, i;
3501 
3502 	if (!flow_action_has_entries(flow_action))
3503 		return -EINVAL;
3504 
3505 	if (!flow_action_hw_stats_check(flow_action, extack,
3506 					FLOW_ACTION_HW_STATS_DELAYED_BIT))
3507 		return -EOPNOTSUPP;
3508 
3509 	nic_attr = attr->nic_attr;
3510 	nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3511 	parse_attr = attr->parse_attr;
3512 
3513 	flow_action_for_each(i, act, flow_action) {
3514 		switch (act->id) {
3515 		case FLOW_ACTION_ACCEPT:
3516 			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3517 				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
3518 			break;
3519 		case FLOW_ACTION_DROP:
3520 			action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
3521 				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
3522 			break;
3523 		case FLOW_ACTION_MANGLE:
3524 		case FLOW_ACTION_ADD:
3525 			err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
3526 						    parse_attr, hdrs, NULL, extack);
3527 			if (err)
3528 				return err;
3529 
3530 			action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3531 			break;
3532 		case FLOW_ACTION_VLAN_MANGLE:
3533 			err = add_vlan_rewrite_action(priv,
3534 						      MLX5_FLOW_NAMESPACE_KERNEL,
3535 						      act, parse_attr, hdrs,
3536 						      &action, extack);
3537 			if (err)
3538 				return err;
3539 
3540 			break;
3541 		case FLOW_ACTION_CSUM:
3542 			if (csum_offload_supported(priv, action,
3543 						   act->csum_flags,
3544 						   extack))
3545 				break;
3546 
3547 			return -EOPNOTSUPP;
3548 		case FLOW_ACTION_REDIRECT: {
3549 			struct net_device *peer_dev = act->dev;
3550 
3551 			if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
3552 			    same_hw_devs(priv, netdev_priv(peer_dev))) {
3553 				parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
3554 				flow_flag_set(flow, HAIRPIN);
3555 				action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3556 					  MLX5_FLOW_CONTEXT_ACTION_COUNT;
3557 			} else {
3558 				NL_SET_ERR_MSG_MOD(extack,
3559 						   "device is not on same HW, can't offload");
3560 				netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
3561 					    peer_dev->name);
3562 				return -EOPNOTSUPP;
3563 			}
3564 			}
3565 			break;
3566 		case FLOW_ACTION_MARK: {
3567 			u32 mark = act->mark;
3568 
3569 			if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
3570 				NL_SET_ERR_MSG_MOD(extack,
3571 						   "Bad flow mark - only 16 bit is supported");
3572 				return -EOPNOTSUPP;
3573 			}
3574 
3575 			nic_attr->flow_tag = mark;
3576 			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3577 			}
3578 			break;
3579 		case FLOW_ACTION_GOTO:
3580 			err = validate_goto_chain(priv, flow, act, action,
3581 						  extack);
3582 			if (err)
3583 				return err;
3584 
3585 			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3586 				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
3587 			attr->dest_chain = act->chain_index;
3588 			break;
3589 		case FLOW_ACTION_CT:
3590 			err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr,
3591 						      &parse_attr->mod_hdr_acts,
3592 						      act, extack);
3593 			if (err)
3594 				return err;
3595 
3596 			flow_flag_set(flow, CT);
3597 			break;
3598 		default:
3599 			NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3600 			return -EOPNOTSUPP;
3601 		}
3602 	}
3603 
3604 	attr->action = action;
3605 
3606 	if (attr->dest_chain && parse_attr->mirred_ifindex[0]) {
3607 		NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
3608 		return -EOPNOTSUPP;
3609 	}
3610 
3611 	err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack);
3612 	if (err)
3613 		return err;
3614 
3615 	if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
3616 		return -EOPNOTSUPP;
3617 
3618 	return 0;
3619 }
3620 
is_merged_eswitch_vfs(struct mlx5e_priv * priv,struct net_device * peer_netdev)3621 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
3622 				  struct net_device *peer_netdev)
3623 {
3624 	struct mlx5e_priv *peer_priv;
3625 
3626 	peer_priv = netdev_priv(peer_netdev);
3627 
3628 	return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3629 		mlx5e_eswitch_vf_rep(priv->netdev) &&
3630 		mlx5e_eswitch_vf_rep(peer_netdev) &&
3631 		same_hw_devs(priv, peer_priv));
3632 }
3633 
parse_tc_vlan_action(struct mlx5e_priv * priv,const struct flow_action_entry * act,struct mlx5_esw_flow_attr * attr,u32 * action)3634 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
3635 				const struct flow_action_entry *act,
3636 				struct mlx5_esw_flow_attr *attr,
3637 				u32 *action)
3638 {
3639 	u8 vlan_idx = attr->total_vlan;
3640 
3641 	if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
3642 		return -EOPNOTSUPP;
3643 
3644 	switch (act->id) {
3645 	case FLOW_ACTION_VLAN_POP:
3646 		if (vlan_idx) {
3647 			if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3648 								 MLX5_FS_VLAN_DEPTH))
3649 				return -EOPNOTSUPP;
3650 
3651 			*action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
3652 		} else {
3653 			*action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3654 		}
3655 		break;
3656 	case FLOW_ACTION_VLAN_PUSH:
3657 		attr->vlan_vid[vlan_idx] = act->vlan.vid;
3658 		attr->vlan_prio[vlan_idx] = act->vlan.prio;
3659 		attr->vlan_proto[vlan_idx] = act->vlan.proto;
3660 		if (!attr->vlan_proto[vlan_idx])
3661 			attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
3662 
3663 		if (vlan_idx) {
3664 			if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3665 								 MLX5_FS_VLAN_DEPTH))
3666 				return -EOPNOTSUPP;
3667 
3668 			*action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
3669 		} else {
3670 			if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
3671 			    (act->vlan.proto != htons(ETH_P_8021Q) ||
3672 			     act->vlan.prio))
3673 				return -EOPNOTSUPP;
3674 
3675 			*action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
3676 		}
3677 		break;
3678 	default:
3679 		return -EINVAL;
3680 	}
3681 
3682 	attr->total_vlan = vlan_idx + 1;
3683 
3684 	return 0;
3685 }
3686 
get_fdb_out_dev(struct net_device * uplink_dev,struct net_device * out_dev)3687 static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
3688 					  struct net_device *out_dev)
3689 {
3690 	struct net_device *fdb_out_dev = out_dev;
3691 	struct net_device *uplink_upper;
3692 
3693 	rcu_read_lock();
3694 	uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
3695 	if (uplink_upper && netif_is_lag_master(uplink_upper) &&
3696 	    uplink_upper == out_dev) {
3697 		fdb_out_dev = uplink_dev;
3698 	} else if (netif_is_lag_master(out_dev)) {
3699 		fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
3700 		if (fdb_out_dev &&
3701 		    (!mlx5e_eswitch_rep(fdb_out_dev) ||
3702 		     !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
3703 			fdb_out_dev = NULL;
3704 	}
3705 	rcu_read_unlock();
3706 	return fdb_out_dev;
3707 }
3708 
add_vlan_push_action(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,struct net_device ** out_dev,u32 * action)3709 static int add_vlan_push_action(struct mlx5e_priv *priv,
3710 				struct mlx5_flow_attr *attr,
3711 				struct net_device **out_dev,
3712 				u32 *action)
3713 {
3714 	struct net_device *vlan_dev = *out_dev;
3715 	struct flow_action_entry vlan_act = {
3716 		.id = FLOW_ACTION_VLAN_PUSH,
3717 		.vlan.vid = vlan_dev_vlan_id(vlan_dev),
3718 		.vlan.proto = vlan_dev_vlan_proto(vlan_dev),
3719 		.vlan.prio = 0,
3720 	};
3721 	int err;
3722 
3723 	err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
3724 	if (err)
3725 		return err;
3726 
3727 	rcu_read_lock();
3728 	*out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
3729 	rcu_read_unlock();
3730 	if (!*out_dev)
3731 		return -ENODEV;
3732 
3733 	if (is_vlan_dev(*out_dev))
3734 		err = add_vlan_push_action(priv, attr, out_dev, action);
3735 
3736 	return err;
3737 }
3738 
add_vlan_pop_action(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,u32 * action)3739 static int add_vlan_pop_action(struct mlx5e_priv *priv,
3740 			       struct mlx5_flow_attr *attr,
3741 			       u32 *action)
3742 {
3743 	struct flow_action_entry vlan_act = {
3744 		.id = FLOW_ACTION_VLAN_POP,
3745 	};
3746 	int nest_level, err = 0;
3747 
3748 	nest_level = attr->parse_attr->filter_dev->lower_level -
3749 						priv->netdev->lower_level;
3750 	while (nest_level--) {
3751 		err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
3752 		if (err)
3753 			return err;
3754 	}
3755 
3756 	return err;
3757 }
3758 
same_hw_reps(struct mlx5e_priv * priv,struct net_device * peer_netdev)3759 static bool same_hw_reps(struct mlx5e_priv *priv,
3760 			 struct net_device *peer_netdev)
3761 {
3762 	struct mlx5e_priv *peer_priv;
3763 
3764 	peer_priv = netdev_priv(peer_netdev);
3765 
3766 	return mlx5e_eswitch_rep(priv->netdev) &&
3767 	       mlx5e_eswitch_rep(peer_netdev) &&
3768 	       same_hw_devs(priv, peer_priv);
3769 }
3770 
is_lag_dev(struct mlx5e_priv * priv,struct net_device * peer_netdev)3771 static bool is_lag_dev(struct mlx5e_priv *priv,
3772 		       struct net_device *peer_netdev)
3773 {
3774 	return ((mlx5_lag_is_sriov(priv->mdev) ||
3775 		 mlx5_lag_is_multipath(priv->mdev)) &&
3776 		 same_hw_reps(priv, peer_netdev));
3777 }
3778 
mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv * priv,struct net_device * out_dev)3779 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
3780 				    struct net_device *out_dev)
3781 {
3782 	if (is_merged_eswitch_vfs(priv, out_dev))
3783 		return true;
3784 
3785 	if (is_lag_dev(priv, out_dev))
3786 		return true;
3787 
3788 	return mlx5e_eswitch_rep(out_dev) &&
3789 	       same_port_devs(priv, netdev_priv(out_dev));
3790 }
3791 
is_duplicated_output_device(struct net_device * dev,struct net_device * out_dev,int * ifindexes,int if_count,struct netlink_ext_ack * extack)3792 static bool is_duplicated_output_device(struct net_device *dev,
3793 					struct net_device *out_dev,
3794 					int *ifindexes, int if_count,
3795 					struct netlink_ext_ack *extack)
3796 {
3797 	int i;
3798 
3799 	for (i = 0; i < if_count; i++) {
3800 		if (ifindexes[i] == out_dev->ifindex) {
3801 			NL_SET_ERR_MSG_MOD(extack,
3802 					   "can't duplicate output to same device");
3803 			netdev_err(dev, "can't duplicate output to same device: %s\n",
3804 				   out_dev->name);
3805 			return true;
3806 		}
3807 	}
3808 
3809 	return false;
3810 }
3811 
verify_uplink_forwarding(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct net_device * out_dev,struct netlink_ext_ack * extack)3812 static int verify_uplink_forwarding(struct mlx5e_priv *priv,
3813 				    struct mlx5e_tc_flow *flow,
3814 				    struct net_device *out_dev,
3815 				    struct netlink_ext_ack *extack)
3816 {
3817 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
3818 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3819 	struct mlx5e_rep_priv *rep_priv;
3820 
3821 	/* Forwarding non encapsulated traffic between
3822 	 * uplink ports is allowed only if
3823 	 * termination_table_raw_traffic cap is set.
3824 	 *
3825 	 * Input vport was stored attr->in_rep.
3826 	 * In LAG case, *priv* is the private data of
3827 	 * uplink which may be not the input vport.
3828 	 */
3829 	rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
3830 
3831 	if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
3832 	      mlx5e_eswitch_uplink_rep(out_dev)))
3833 		return 0;
3834 
3835 	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
3836 					termination_table_raw_traffic)) {
3837 		NL_SET_ERR_MSG_MOD(extack,
3838 				   "devices are both uplink, can't offload forwarding");
3839 			pr_err("devices %s %s are both uplink, can't offload forwarding\n",
3840 			       priv->netdev->name, out_dev->name);
3841 			return -EOPNOTSUPP;
3842 	} else if (out_dev != rep_priv->netdev) {
3843 		NL_SET_ERR_MSG_MOD(extack,
3844 				   "devices are not the same uplink, can't offload forwarding");
3845 		pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
3846 		       priv->netdev->name, out_dev->name);
3847 		return -EOPNOTSUPP;
3848 	}
3849 	return 0;
3850 }
3851 
parse_tc_fdb_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3852 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
3853 				struct flow_action *flow_action,
3854 				struct mlx5e_tc_flow *flow,
3855 				struct netlink_ext_ack *extack)
3856 {
3857 	struct pedit_headers_action hdrs[2] = {};
3858 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3859 	struct mlx5e_tc_flow_parse_attr *parse_attr;
3860 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
3861 	struct mlx5e_sample_attr sample_attr = {};
3862 	const struct ip_tunnel_info *info = NULL;
3863 	struct mlx5_flow_attr *attr = flow->attr;
3864 	int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
3865 	bool ft_flow = mlx5e_is_ft_flow(flow);
3866 	const struct flow_action_entry *act;
3867 	struct mlx5_esw_flow_attr *esw_attr;
3868 	bool encap = false, decap = false;
3869 	u32 action = attr->action;
3870 	int err, i, if_count = 0;
3871 	bool mpls_push = false;
3872 
3873 	if (!flow_action_has_entries(flow_action))
3874 		return -EINVAL;
3875 
3876 	if (!flow_action_hw_stats_check(flow_action, extack,
3877 					FLOW_ACTION_HW_STATS_DELAYED_BIT))
3878 		return -EOPNOTSUPP;
3879 
3880 	esw_attr = attr->esw_attr;
3881 	parse_attr = attr->parse_attr;
3882 
3883 	flow_action_for_each(i, act, flow_action) {
3884 		switch (act->id) {
3885 		case FLOW_ACTION_DROP:
3886 			action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
3887 				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
3888 			break;
3889 		case FLOW_ACTION_TRAP:
3890 			if (!flow_offload_has_one_action(flow_action)) {
3891 				NL_SET_ERR_MSG_MOD(extack,
3892 						   "action trap is supported as a sole action only");
3893 				return -EOPNOTSUPP;
3894 			}
3895 			action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3896 				   MLX5_FLOW_CONTEXT_ACTION_COUNT);
3897 			attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
3898 			break;
3899 		case FLOW_ACTION_MPLS_PUSH:
3900 			if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
3901 							reformat_l2_to_l3_tunnel) ||
3902 			    act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
3903 				NL_SET_ERR_MSG_MOD(extack,
3904 						   "mpls push is supported only for mpls_uc protocol");
3905 				return -EOPNOTSUPP;
3906 			}
3907 			mpls_push = true;
3908 			break;
3909 		case FLOW_ACTION_MPLS_POP:
3910 			/* we only support mpls pop if it is the first action
3911 			 * and the filter net device is bareudp. Subsequent
3912 			 * actions can be pedit and the last can be mirred
3913 			 * egress redirect.
3914 			 */
3915 			if (i) {
3916 				NL_SET_ERR_MSG_MOD(extack,
3917 						   "mpls pop supported only as first action");
3918 				return -EOPNOTSUPP;
3919 			}
3920 			if (!netif_is_bareudp(parse_attr->filter_dev)) {
3921 				NL_SET_ERR_MSG_MOD(extack,
3922 						   "mpls pop supported only on bareudp devices");
3923 				return -EOPNOTSUPP;
3924 			}
3925 
3926 			parse_attr->eth.h_proto = act->mpls_pop.proto;
3927 			action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
3928 			flow_flag_set(flow, L3_TO_L2_DECAP);
3929 			break;
3930 		case FLOW_ACTION_MANGLE:
3931 		case FLOW_ACTION_ADD:
3932 			err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
3933 						    parse_attr, hdrs, flow, extack);
3934 			if (err)
3935 				return err;
3936 
3937 			if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
3938 				action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3939 				esw_attr->split_count = esw_attr->out_count;
3940 			}
3941 			break;
3942 		case FLOW_ACTION_CSUM:
3943 			if (csum_offload_supported(priv, action,
3944 						   act->csum_flags, extack))
3945 				break;
3946 
3947 			return -EOPNOTSUPP;
3948 		case FLOW_ACTION_REDIRECT:
3949 		case FLOW_ACTION_MIRRED: {
3950 			struct mlx5e_priv *out_priv;
3951 			struct net_device *out_dev;
3952 
3953 			out_dev = act->dev;
3954 			if (!out_dev) {
3955 				/* out_dev is NULL when filters with
3956 				 * non-existing mirred device are replayed to
3957 				 * the driver.
3958 				 */
3959 				return -EINVAL;
3960 			}
3961 
3962 			if (mpls_push && !netif_is_bareudp(out_dev)) {
3963 				NL_SET_ERR_MSG_MOD(extack,
3964 						   "mpls is supported only through a bareudp device");
3965 				return -EOPNOTSUPP;
3966 			}
3967 
3968 			if (ft_flow && out_dev == priv->netdev) {
3969 				/* Ignore forward to self rules generated
3970 				 * by adding both mlx5 devs to the flow table
3971 				 * block on a normal nft offload setup.
3972 				 */
3973 				return -EOPNOTSUPP;
3974 			}
3975 
3976 			if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
3977 				NL_SET_ERR_MSG_MOD(extack,
3978 						   "can't support more output ports, can't offload forwarding");
3979 				netdev_warn(priv->netdev,
3980 					    "can't support more than %d output ports, can't offload forwarding\n",
3981 					    esw_attr->out_count);
3982 				return -EOPNOTSUPP;
3983 			}
3984 
3985 			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3986 				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
3987 			if (encap) {
3988 				parse_attr->mirred_ifindex[esw_attr->out_count] =
3989 					out_dev->ifindex;
3990 				parse_attr->tun_info[esw_attr->out_count] =
3991 					mlx5e_dup_tun_info(info);
3992 				if (!parse_attr->tun_info[esw_attr->out_count])
3993 					return -ENOMEM;
3994 				encap = false;
3995 				esw_attr->dests[esw_attr->out_count].flags |=
3996 					MLX5_ESW_DEST_ENCAP;
3997 				esw_attr->out_count++;
3998 				/* attr->dests[].rep is resolved when we
3999 				 * handle encap
4000 				 */
4001 			} else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
4002 				struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4003 				struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
4004 
4005 				if (is_duplicated_output_device(priv->netdev,
4006 								out_dev,
4007 								ifindexes,
4008 								if_count,
4009 								extack))
4010 					return -EOPNOTSUPP;
4011 
4012 				ifindexes[if_count] = out_dev->ifindex;
4013 				if_count++;
4014 
4015 				out_dev = get_fdb_out_dev(uplink_dev, out_dev);
4016 				if (!out_dev)
4017 					return -ENODEV;
4018 
4019 				if (is_vlan_dev(out_dev)) {
4020 					err = add_vlan_push_action(priv, attr,
4021 								   &out_dev,
4022 								   &action);
4023 					if (err)
4024 						return err;
4025 				}
4026 
4027 				if (is_vlan_dev(parse_attr->filter_dev)) {
4028 					err = add_vlan_pop_action(priv, attr,
4029 								  &action);
4030 					if (err)
4031 						return err;
4032 				}
4033 
4034 				err = verify_uplink_forwarding(priv, flow, out_dev, extack);
4035 				if (err)
4036 					return err;
4037 
4038 				if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
4039 					NL_SET_ERR_MSG_MOD(extack,
4040 							   "devices are not on same switch HW, can't offload forwarding");
4041 					return -EOPNOTSUPP;
4042 				}
4043 
4044 				if (same_vf_reps(priv, out_dev)) {
4045 					NL_SET_ERR_MSG_MOD(extack,
4046 							   "can't forward from a VF to itself");
4047 					return -EOPNOTSUPP;
4048 				}
4049 
4050 				out_priv = netdev_priv(out_dev);
4051 				rpriv = out_priv->ppriv;
4052 				esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
4053 				esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
4054 				esw_attr->out_count++;
4055 			} else if (parse_attr->filter_dev != priv->netdev) {
4056 				/* All mlx5 devices are called to configure
4057 				 * high level device filters. Therefore, the
4058 				 * *attempt* to  install a filter on invalid
4059 				 * eswitch should not trigger an explicit error
4060 				 */
4061 				return -EINVAL;
4062 			} else {
4063 				NL_SET_ERR_MSG_MOD(extack,
4064 						   "devices are not on same switch HW, can't offload forwarding");
4065 				netdev_warn(priv->netdev,
4066 					    "devices %s %s not on same switch HW, can't offload forwarding\n",
4067 					    priv->netdev->name,
4068 					    out_dev->name);
4069 				return -EOPNOTSUPP;
4070 			}
4071 			}
4072 			break;
4073 		case FLOW_ACTION_TUNNEL_ENCAP:
4074 			info = act->tunnel;
4075 			if (info)
4076 				encap = true;
4077 			else
4078 				return -EOPNOTSUPP;
4079 
4080 			break;
4081 		case FLOW_ACTION_VLAN_PUSH:
4082 		case FLOW_ACTION_VLAN_POP:
4083 			if (act->id == FLOW_ACTION_VLAN_PUSH &&
4084 			    (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
4085 				/* Replace vlan pop+push with vlan modify */
4086 				action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4087 				err = add_vlan_rewrite_action(priv,
4088 							      MLX5_FLOW_NAMESPACE_FDB,
4089 							      act, parse_attr, hdrs,
4090 							      &action, extack);
4091 			} else {
4092 				err = parse_tc_vlan_action(priv, act, esw_attr, &action);
4093 			}
4094 			if (err)
4095 				return err;
4096 
4097 			esw_attr->split_count = esw_attr->out_count;
4098 			break;
4099 		case FLOW_ACTION_VLAN_MANGLE:
4100 			err = add_vlan_rewrite_action(priv,
4101 						      MLX5_FLOW_NAMESPACE_FDB,
4102 						      act, parse_attr, hdrs,
4103 						      &action, extack);
4104 			if (err)
4105 				return err;
4106 
4107 			esw_attr->split_count = esw_attr->out_count;
4108 			break;
4109 		case FLOW_ACTION_TUNNEL_DECAP:
4110 			decap = true;
4111 			break;
4112 		case FLOW_ACTION_GOTO:
4113 			err = validate_goto_chain(priv, flow, act, action,
4114 						  extack);
4115 			if (err)
4116 				return err;
4117 
4118 			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4119 				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
4120 			attr->dest_chain = act->chain_index;
4121 			break;
4122 		case FLOW_ACTION_CT:
4123 			if (flow_flag_test(flow, SAMPLE)) {
4124 				NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
4125 				return -EOPNOTSUPP;
4126 			}
4127 			err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr,
4128 						      &parse_attr->mod_hdr_acts,
4129 						      act, extack);
4130 			if (err)
4131 				return err;
4132 
4133 			flow_flag_set(flow, CT);
4134 			esw_attr->split_count = esw_attr->out_count;
4135 			break;
4136 		case FLOW_ACTION_SAMPLE:
4137 			if (flow_flag_test(flow, CT)) {
4138 				NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
4139 				return -EOPNOTSUPP;
4140 			}
4141 			sample_attr.rate = act->sample.rate;
4142 			sample_attr.group_num = act->sample.psample_group->group_num;
4143 			if (act->sample.truncate)
4144 				sample_attr.trunc_size = act->sample.trunc_size;
4145 			flow_flag_set(flow, SAMPLE);
4146 			break;
4147 		default:
4148 			NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
4149 			return -EOPNOTSUPP;
4150 		}
4151 	}
4152 
4153 	/* always set IP version for indirect table handling */
4154 	attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
4155 
4156 	if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
4157 	    action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
4158 		/* For prio tag mode, replace vlan pop with rewrite vlan prio
4159 		 * tag rewrite.
4160 		 */
4161 		action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4162 		err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
4163 						       &action, extack);
4164 		if (err)
4165 			return err;
4166 	}
4167 
4168 	attr->action = action;
4169 
4170 	err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack);
4171 	if (err)
4172 		return err;
4173 
4174 	if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
4175 		return -EOPNOTSUPP;
4176 
4177 	if (attr->dest_chain && decap) {
4178 		/* It can be supported if we'll create a mapping for
4179 		 * the tunnel device only (without tunnel), and set
4180 		 * this tunnel id with this decap flow.
4181 		 *
4182 		 * On restore (miss), we'll just set this saved tunnel
4183 		 * device.
4184 		 */
4185 
4186 		NL_SET_ERR_MSG(extack, "Decap with goto isn't supported");
4187 		netdev_warn(priv->netdev, "Decap with goto isn't supported");
4188 		return -EOPNOTSUPP;
4189 	}
4190 
4191 	if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
4192 		NL_SET_ERR_MSG_MOD(extack,
4193 				   "current firmware doesn't support split rule for port mirroring");
4194 		netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
4195 		return -EOPNOTSUPP;
4196 	}
4197 
4198 	/* Allocate sample attribute only when there is a sample action and
4199 	 * no errors after parsing.
4200 	 */
4201 	if (flow_flag_test(flow, SAMPLE)) {
4202 		attr->sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL);
4203 		if (!attr->sample_attr)
4204 			return -ENOMEM;
4205 		*attr->sample_attr = sample_attr;
4206 	}
4207 
4208 	return 0;
4209 }
4210 
get_flags(int flags,unsigned long * flow_flags)4211 static void get_flags(int flags, unsigned long *flow_flags)
4212 {
4213 	unsigned long __flow_flags = 0;
4214 
4215 	if (flags & MLX5_TC_FLAG(INGRESS))
4216 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4217 	if (flags & MLX5_TC_FLAG(EGRESS))
4218 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4219 
4220 	if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4221 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4222 	if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4223 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4224 	if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4225 		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4226 
4227 	*flow_flags = __flow_flags;
4228 }
4229 
4230 static const struct rhashtable_params tc_ht_params = {
4231 	.head_offset = offsetof(struct mlx5e_tc_flow, node),
4232 	.key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4233 	.key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4234 	.automatic_shrinking = true,
4235 };
4236 
get_tc_ht(struct mlx5e_priv * priv,unsigned long flags)4237 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4238 				    unsigned long flags)
4239 {
4240 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4241 	struct mlx5e_rep_priv *uplink_rpriv;
4242 
4243 	if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4244 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
4245 		return &uplink_rpriv->uplink_priv.tc_ht;
4246 	} else /* NIC offload */
4247 		return &priv->fs.tc.ht;
4248 }
4249 
is_peer_flow_needed(struct mlx5e_tc_flow * flow)4250 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4251 {
4252 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4253 	struct mlx5_flow_attr *attr = flow->attr;
4254 	bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4255 		flow_flag_test(flow, INGRESS);
4256 	bool act_is_encap = !!(attr->action &
4257 			       MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4258 	bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4259 						MLX5_DEVCOM_ESW_OFFLOADS);
4260 
4261 	if (!esw_paired)
4262 		return false;
4263 
4264 	if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4265 	     mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4266 	    (is_rep_ingress || act_is_encap))
4267 		return true;
4268 
4269 	return false;
4270 }
4271 
4272 struct mlx5_flow_attr *
mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)4273 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4274 {
4275 	u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB)  ?
4276 				sizeof(struct mlx5_esw_flow_attr) :
4277 				sizeof(struct mlx5_nic_flow_attr);
4278 	struct mlx5_flow_attr *attr;
4279 
4280 	return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4281 }
4282 
4283 static int
mlx5e_alloc_flow(struct mlx5e_priv * priv,int attr_size,struct flow_cls_offload * f,unsigned long flow_flags,struct mlx5e_tc_flow_parse_attr ** __parse_attr,struct mlx5e_tc_flow ** __flow)4284 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4285 		 struct flow_cls_offload *f, unsigned long flow_flags,
4286 		 struct mlx5e_tc_flow_parse_attr **__parse_attr,
4287 		 struct mlx5e_tc_flow **__flow)
4288 {
4289 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4290 	struct mlx5_flow_attr *attr;
4291 	struct mlx5e_tc_flow *flow;
4292 	int err = -ENOMEM;
4293 	int out_index;
4294 
4295 	flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4296 	parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4297 	if (!parse_attr || !flow)
4298 		goto err_free;
4299 
4300 	flow->flags = flow_flags;
4301 	flow->cookie = f->cookie;
4302 	flow->priv = priv;
4303 
4304 	attr = mlx5_alloc_flow_attr(get_flow_name_space(flow));
4305 	if (!attr)
4306 		goto err_free;
4307 
4308 	flow->attr = attr;
4309 
4310 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4311 		INIT_LIST_HEAD(&flow->encaps[out_index].list);
4312 	INIT_LIST_HEAD(&flow->hairpin);
4313 	INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4314 	refcount_set(&flow->refcnt, 1);
4315 	init_completion(&flow->init_done);
4316 	init_completion(&flow->del_hw_done);
4317 
4318 	*__flow = flow;
4319 	*__parse_attr = parse_attr;
4320 
4321 	return 0;
4322 
4323 err_free:
4324 	kfree(flow);
4325 	kvfree(parse_attr);
4326 	return err;
4327 }
4328 
4329 static void
mlx5e_flow_attr_init(struct mlx5_flow_attr * attr,struct mlx5e_tc_flow_parse_attr * parse_attr,struct flow_cls_offload * f)4330 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4331 		     struct mlx5e_tc_flow_parse_attr *parse_attr,
4332 		     struct flow_cls_offload *f)
4333 {
4334 	attr->parse_attr = parse_attr;
4335 	attr->chain = f->common.chain_index;
4336 	attr->prio = f->common.prio;
4337 }
4338 
4339 static void
mlx5e_flow_esw_attr_init(struct mlx5_flow_attr * attr,struct mlx5e_priv * priv,struct mlx5e_tc_flow_parse_attr * parse_attr,struct flow_cls_offload * f,struct mlx5_eswitch_rep * in_rep,struct mlx5_core_dev * in_mdev)4340 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4341 			 struct mlx5e_priv *priv,
4342 			 struct mlx5e_tc_flow_parse_attr *parse_attr,
4343 			 struct flow_cls_offload *f,
4344 			 struct mlx5_eswitch_rep *in_rep,
4345 			 struct mlx5_core_dev *in_mdev)
4346 {
4347 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4348 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4349 
4350 	mlx5e_flow_attr_init(attr, parse_attr, f);
4351 
4352 	esw_attr->in_rep = in_rep;
4353 	esw_attr->in_mdev = in_mdev;
4354 
4355 	if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4356 	    MLX5_COUNTER_SOURCE_ESWITCH)
4357 		esw_attr->counter_dev = in_mdev;
4358 	else
4359 		esw_attr->counter_dev = priv->mdev;
4360 }
4361 
4362 static struct mlx5e_tc_flow *
__mlx5e_add_fdb_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5_eswitch_rep * in_rep,struct mlx5_core_dev * in_mdev)4363 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4364 		     struct flow_cls_offload *f,
4365 		     unsigned long flow_flags,
4366 		     struct net_device *filter_dev,
4367 		     struct mlx5_eswitch_rep *in_rep,
4368 		     struct mlx5_core_dev *in_mdev)
4369 {
4370 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4371 	struct netlink_ext_ack *extack = f->common.extack;
4372 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4373 	struct mlx5e_tc_flow *flow;
4374 	int attr_size, err;
4375 
4376 	flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4377 	attr_size  = sizeof(struct mlx5_esw_flow_attr);
4378 	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4379 			       &parse_attr, &flow);
4380 	if (err)
4381 		goto out;
4382 
4383 	parse_attr->filter_dev = filter_dev;
4384 	mlx5e_flow_esw_attr_init(flow->attr,
4385 				 priv, parse_attr,
4386 				 f, in_rep, in_mdev);
4387 
4388 	err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4389 			       f, filter_dev);
4390 	if (err)
4391 		goto err_free;
4392 
4393 	/* actions validation depends on parsing the ct matches first */
4394 	err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4395 				   &flow->attr->ct_attr, extack);
4396 	if (err)
4397 		goto err_free;
4398 
4399 	err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
4400 	if (err)
4401 		goto err_free;
4402 
4403 	err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4404 	complete_all(&flow->init_done);
4405 	if (err) {
4406 		if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4407 			goto err_free;
4408 
4409 		add_unready_flow(flow);
4410 	}
4411 
4412 	return flow;
4413 
4414 err_free:
4415 	mlx5e_flow_put(priv, flow);
4416 out:
4417 	return ERR_PTR(err);
4418 }
4419 
mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload * f,struct mlx5e_tc_flow * flow,unsigned long flow_flags)4420 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4421 				      struct mlx5e_tc_flow *flow,
4422 				      unsigned long flow_flags)
4423 {
4424 	struct mlx5e_priv *priv = flow->priv, *peer_priv;
4425 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4426 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4427 	struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4428 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4429 	struct mlx5e_rep_priv *peer_urpriv;
4430 	struct mlx5e_tc_flow *peer_flow;
4431 	struct mlx5_core_dev *in_mdev;
4432 	int err = 0;
4433 
4434 	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4435 	if (!peer_esw)
4436 		return -ENODEV;
4437 
4438 	peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4439 	peer_priv = netdev_priv(peer_urpriv->netdev);
4440 
4441 	/* in_mdev is assigned of which the packet originated from.
4442 	 * So packets redirected to uplink use the same mdev of the
4443 	 * original flow and packets redirected from uplink use the
4444 	 * peer mdev.
4445 	 */
4446 	if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
4447 		in_mdev = peer_priv->mdev;
4448 	else
4449 		in_mdev = priv->mdev;
4450 
4451 	parse_attr = flow->attr->parse_attr;
4452 	peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4453 					 parse_attr->filter_dev,
4454 					 attr->in_rep, in_mdev);
4455 	if (IS_ERR(peer_flow)) {
4456 		err = PTR_ERR(peer_flow);
4457 		goto out;
4458 	}
4459 
4460 	flow->peer_flow = peer_flow;
4461 	flow_flag_set(flow, DUP);
4462 	mutex_lock(&esw->offloads.peer_mutex);
4463 	list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4464 	mutex_unlock(&esw->offloads.peer_mutex);
4465 
4466 out:
4467 	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4468 	return err;
4469 }
4470 
4471 static int
mlx5e_add_fdb_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** __flow)4472 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4473 		   struct flow_cls_offload *f,
4474 		   unsigned long flow_flags,
4475 		   struct net_device *filter_dev,
4476 		   struct mlx5e_tc_flow **__flow)
4477 {
4478 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4479 	struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4480 	struct mlx5_core_dev *in_mdev = priv->mdev;
4481 	struct mlx5e_tc_flow *flow;
4482 	int err;
4483 
4484 	flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4485 				    in_mdev);
4486 	if (IS_ERR(flow))
4487 		return PTR_ERR(flow);
4488 
4489 	if (is_peer_flow_needed(flow)) {
4490 		err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4491 		if (err) {
4492 			mlx5e_tc_del_fdb_flow(priv, flow);
4493 			goto out;
4494 		}
4495 	}
4496 
4497 	*__flow = flow;
4498 
4499 	return 0;
4500 
4501 out:
4502 	return err;
4503 }
4504 
4505 static int
mlx5e_add_nic_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** __flow)4506 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4507 		   struct flow_cls_offload *f,
4508 		   unsigned long flow_flags,
4509 		   struct net_device *filter_dev,
4510 		   struct mlx5e_tc_flow **__flow)
4511 {
4512 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4513 	struct netlink_ext_ack *extack = f->common.extack;
4514 	struct mlx5e_tc_flow_parse_attr *parse_attr;
4515 	struct mlx5e_tc_flow *flow;
4516 	int attr_size, err;
4517 
4518 	if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4519 		if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4520 			return -EOPNOTSUPP;
4521 	} else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4522 		return -EOPNOTSUPP;
4523 	}
4524 
4525 	flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4526 	attr_size  = sizeof(struct mlx5_nic_flow_attr);
4527 	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4528 			       &parse_attr, &flow);
4529 	if (err)
4530 		goto out;
4531 
4532 	parse_attr->filter_dev = filter_dev;
4533 	mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4534 
4535 	err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4536 			       f, filter_dev);
4537 	if (err)
4538 		goto err_free;
4539 
4540 	err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4541 				   &flow->attr->ct_attr, extack);
4542 	if (err)
4543 		goto err_free;
4544 
4545 	err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
4546 	if (err)
4547 		goto err_free;
4548 
4549 	err = mlx5e_tc_add_nic_flow(priv, flow, extack);
4550 	if (err)
4551 		goto err_free;
4552 
4553 	flow_flag_set(flow, OFFLOADED);
4554 	*__flow = flow;
4555 
4556 	return 0;
4557 
4558 err_free:
4559 	flow_flag_set(flow, FAILED);
4560 	mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
4561 	mlx5e_flow_put(priv, flow);
4562 out:
4563 	return err;
4564 }
4565 
4566 static int
mlx5e_tc_add_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** flow)4567 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4568 		  struct flow_cls_offload *f,
4569 		  unsigned long flags,
4570 		  struct net_device *filter_dev,
4571 		  struct mlx5e_tc_flow **flow)
4572 {
4573 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4574 	unsigned long flow_flags;
4575 	int err;
4576 
4577 	get_flags(flags, &flow_flags);
4578 
4579 	if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4580 		return -EOPNOTSUPP;
4581 
4582 	if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4583 		err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4584 					 filter_dev, flow);
4585 	else
4586 		err = mlx5e_add_nic_flow(priv, f, flow_flags,
4587 					 filter_dev, flow);
4588 
4589 	return err;
4590 }
4591 
is_flow_rule_duplicate_allowed(struct net_device * dev,struct mlx5e_rep_priv * rpriv)4592 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4593 					   struct mlx5e_rep_priv *rpriv)
4594 {
4595 	/* Offloaded flow rule is allowed to duplicate on non-uplink representor
4596 	 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4597 	 * function is called from NIC mode.
4598 	 */
4599 	return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4600 }
4601 
mlx5e_configure_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4602 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4603 			   struct flow_cls_offload *f, unsigned long flags)
4604 {
4605 	struct netlink_ext_ack *extack = f->common.extack;
4606 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4607 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4608 	struct mlx5e_tc_flow *flow;
4609 	int err = 0;
4610 
4611 	if (!mlx5_esw_hold(priv->mdev))
4612 		return -EAGAIN;
4613 
4614 	mlx5_esw_get(priv->mdev);
4615 
4616 	rcu_read_lock();
4617 	flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4618 	if (flow) {
4619 		/* Same flow rule offloaded to non-uplink representor sharing tc block,
4620 		 * just return 0.
4621 		 */
4622 		if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4623 			goto rcu_unlock;
4624 
4625 		NL_SET_ERR_MSG_MOD(extack,
4626 				   "flow cookie already exists, ignoring");
4627 		netdev_warn_once(priv->netdev,
4628 				 "flow cookie %lx already exists, ignoring\n",
4629 				 f->cookie);
4630 		err = -EEXIST;
4631 		goto rcu_unlock;
4632 	}
4633 rcu_unlock:
4634 	rcu_read_unlock();
4635 	if (flow)
4636 		goto out;
4637 
4638 	trace_mlx5e_configure_flower(f);
4639 	err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4640 	if (err)
4641 		goto out;
4642 
4643 	/* Flow rule offloaded to non-uplink representor sharing tc block,
4644 	 * set the flow's owner dev.
4645 	 */
4646 	if (is_flow_rule_duplicate_allowed(dev, rpriv))
4647 		flow->orig_dev = dev;
4648 
4649 	err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4650 	if (err)
4651 		goto err_free;
4652 
4653 	mlx5_esw_release(priv->mdev);
4654 	return 0;
4655 
4656 err_free:
4657 	mlx5e_flow_put(priv, flow);
4658 out:
4659 	mlx5_esw_put(priv->mdev);
4660 	mlx5_esw_release(priv->mdev);
4661 	return err;
4662 }
4663 
same_flow_direction(struct mlx5e_tc_flow * flow,int flags)4664 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4665 {
4666 	bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4667 	bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4668 
4669 	return flow_flag_test(flow, INGRESS) == dir_ingress &&
4670 		flow_flag_test(flow, EGRESS) == dir_egress;
4671 }
4672 
mlx5e_delete_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4673 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4674 			struct flow_cls_offload *f, unsigned long flags)
4675 {
4676 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4677 	struct mlx5e_tc_flow *flow;
4678 	int err;
4679 
4680 	rcu_read_lock();
4681 	flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4682 	if (!flow || !same_flow_direction(flow, flags)) {
4683 		err = -EINVAL;
4684 		goto errout;
4685 	}
4686 
4687 	/* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4688 	 * set.
4689 	 */
4690 	if (flow_flag_test_and_set(flow, DELETED)) {
4691 		err = -EINVAL;
4692 		goto errout;
4693 	}
4694 	rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4695 	rcu_read_unlock();
4696 
4697 	trace_mlx5e_delete_flower(f);
4698 	mlx5e_flow_put(priv, flow);
4699 
4700 	mlx5_esw_put(priv->mdev);
4701 	return 0;
4702 
4703 errout:
4704 	rcu_read_unlock();
4705 	return err;
4706 }
4707 
mlx5e_stats_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4708 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4709 		       struct flow_cls_offload *f, unsigned long flags)
4710 {
4711 	struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4712 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4713 	struct mlx5_eswitch *peer_esw;
4714 	struct mlx5e_tc_flow *flow;
4715 	struct mlx5_fc *counter;
4716 	u64 lastuse = 0;
4717 	u64 packets = 0;
4718 	u64 bytes = 0;
4719 	int err = 0;
4720 
4721 	rcu_read_lock();
4722 	flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4723 						tc_ht_params));
4724 	rcu_read_unlock();
4725 	if (IS_ERR(flow))
4726 		return PTR_ERR(flow);
4727 
4728 	if (!same_flow_direction(flow, flags)) {
4729 		err = -EINVAL;
4730 		goto errout;
4731 	}
4732 
4733 	if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
4734 		counter = mlx5e_tc_get_counter(flow);
4735 		if (!counter)
4736 			goto errout;
4737 
4738 		mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4739 	}
4740 
4741 	/* Under multipath it's possible for one rule to be currently
4742 	 * un-offloaded while the other rule is offloaded.
4743 	 */
4744 	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4745 	if (!peer_esw)
4746 		goto out;
4747 
4748 	if (flow_flag_test(flow, DUP) &&
4749 	    flow_flag_test(flow->peer_flow, OFFLOADED)) {
4750 		u64 bytes2;
4751 		u64 packets2;
4752 		u64 lastuse2;
4753 
4754 		counter = mlx5e_tc_get_counter(flow->peer_flow);
4755 		if (!counter)
4756 			goto no_peer_counter;
4757 		mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4758 
4759 		bytes += bytes2;
4760 		packets += packets2;
4761 		lastuse = max_t(u64, lastuse, lastuse2);
4762 	}
4763 
4764 no_peer_counter:
4765 	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4766 out:
4767 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
4768 			  FLOW_ACTION_HW_STATS_DELAYED);
4769 	trace_mlx5e_stats_flower(f);
4770 errout:
4771 	mlx5e_flow_put(priv, flow);
4772 	return err;
4773 }
4774 
apply_police_params(struct mlx5e_priv * priv,u64 rate,struct netlink_ext_ack * extack)4775 static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
4776 			       struct netlink_ext_ack *extack)
4777 {
4778 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4779 	struct mlx5_eswitch *esw;
4780 	u32 rate_mbps = 0;
4781 	u16 vport_num;
4782 	int err;
4783 
4784 	vport_num = rpriv->rep->vport;
4785 	if (vport_num >= MLX5_VPORT_ECPF) {
4786 		NL_SET_ERR_MSG_MOD(extack,
4787 				   "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4788 		return -EOPNOTSUPP;
4789 	}
4790 
4791 	esw = priv->mdev->priv.eswitch;
4792 	/* rate is given in bytes/sec.
4793 	 * First convert to bits/sec and then round to the nearest mbit/secs.
4794 	 * mbit means million bits.
4795 	 * Moreover, if rate is non zero we choose to configure to a minimum of
4796 	 * 1 mbit/sec.
4797 	 */
4798 	if (rate) {
4799 		rate = (rate * BITS_PER_BYTE) + 500000;
4800 		do_div(rate, 1000000);
4801 		rate_mbps = max_t(u32, rate, 1);
4802 	}
4803 
4804 	err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
4805 	if (err)
4806 		NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4807 
4808 	return err;
4809 }
4810 
scan_tc_matchall_fdb_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct netlink_ext_ack * extack)4811 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4812 					struct flow_action *flow_action,
4813 					struct netlink_ext_ack *extack)
4814 {
4815 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4816 	const struct flow_action_entry *act;
4817 	int err;
4818 	int i;
4819 
4820 	if (!flow_action_has_entries(flow_action)) {
4821 		NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4822 		return -EINVAL;
4823 	}
4824 
4825 	if (!flow_offload_has_one_action(flow_action)) {
4826 		NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4827 		return -EOPNOTSUPP;
4828 	}
4829 
4830 	if (!flow_action_basic_hw_stats_check(flow_action, extack))
4831 		return -EOPNOTSUPP;
4832 
4833 	flow_action_for_each(i, act, flow_action) {
4834 		switch (act->id) {
4835 		case FLOW_ACTION_POLICE:
4836 			if (act->police.rate_pkt_ps) {
4837 				NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
4838 				return -EOPNOTSUPP;
4839 			}
4840 			err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4841 			if (err)
4842 				return err;
4843 
4844 			rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
4845 			break;
4846 		default:
4847 			NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
4848 			return -EOPNOTSUPP;
4849 		}
4850 	}
4851 
4852 	return 0;
4853 }
4854 
mlx5e_tc_configure_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)4855 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
4856 				struct tc_cls_matchall_offload *ma)
4857 {
4858 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4859 	struct netlink_ext_ack *extack = ma->common.extack;
4860 
4861 	if (!mlx5_esw_qos_enabled(esw)) {
4862 		NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device");
4863 		return -EOPNOTSUPP;
4864 	}
4865 
4866 	if (ma->common.prio != 1) {
4867 		NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
4868 		return -EINVAL;
4869 	}
4870 
4871 	return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
4872 }
4873 
mlx5e_tc_delete_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)4874 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
4875 			     struct tc_cls_matchall_offload *ma)
4876 {
4877 	struct netlink_ext_ack *extack = ma->common.extack;
4878 
4879 	return apply_police_params(priv, 0, extack);
4880 }
4881 
mlx5e_tc_stats_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)4882 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
4883 			     struct tc_cls_matchall_offload *ma)
4884 {
4885 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
4886 	struct rtnl_link_stats64 cur_stats;
4887 	u64 dbytes;
4888 	u64 dpkts;
4889 
4890 	cur_stats = priv->stats.vf_vport;
4891 	dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
4892 	dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
4893 	rpriv->prev_vf_vport_stats = cur_stats;
4894 	flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
4895 			  FLOW_ACTION_HW_STATS_DELAYED);
4896 }
4897 
mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)4898 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
4899 					      struct mlx5e_priv *peer_priv)
4900 {
4901 	struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
4902 	struct mlx5e_hairpin_entry *hpe, *tmp;
4903 	LIST_HEAD(init_wait_list);
4904 	u16 peer_vhca_id;
4905 	int bkt;
4906 
4907 	if (!same_hw_devs(priv, peer_priv))
4908 		return;
4909 
4910 	peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
4911 
4912 	mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
4913 	hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
4914 		if (refcount_inc_not_zero(&hpe->refcnt))
4915 			list_add(&hpe->dead_peer_wait_list, &init_wait_list);
4916 	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
4917 
4918 	list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
4919 		wait_for_completion(&hpe->res_ready);
4920 		if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
4921 			mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
4922 
4923 		mlx5e_hairpin_put(priv, hpe);
4924 	}
4925 }
4926 
mlx5e_tc_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)4927 static int mlx5e_tc_netdev_event(struct notifier_block *this,
4928 				 unsigned long event, void *ptr)
4929 {
4930 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4931 	struct mlx5e_flow_steering *fs;
4932 	struct mlx5e_priv *peer_priv;
4933 	struct mlx5e_tc_table *tc;
4934 	struct mlx5e_priv *priv;
4935 
4936 	if (ndev->netdev_ops != &mlx5e_netdev_ops ||
4937 	    event != NETDEV_UNREGISTER ||
4938 	    ndev->reg_state == NETREG_REGISTERED)
4939 		return NOTIFY_DONE;
4940 
4941 	tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
4942 	fs = container_of(tc, struct mlx5e_flow_steering, tc);
4943 	priv = container_of(fs, struct mlx5e_priv, fs);
4944 	peer_priv = netdev_priv(ndev);
4945 	if (priv == peer_priv ||
4946 	    !(priv->netdev->features & NETIF_F_HW_TC))
4947 		return NOTIFY_DONE;
4948 
4949 	mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
4950 
4951 	return NOTIFY_DONE;
4952 }
4953 
mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev * dev)4954 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
4955 {
4956 	int tc_grp_size, tc_tbl_size;
4957 	u32 max_flow_counter;
4958 
4959 	max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
4960 			    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
4961 
4962 	tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
4963 
4964 	tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
4965 			    BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
4966 
4967 	return tc_tbl_size;
4968 }
4969 
mlx5e_tc_nic_create_miss_table(struct mlx5e_priv * priv)4970 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
4971 {
4972 	struct mlx5_flow_table **ft = &priv->fs.tc.miss_t;
4973 	struct mlx5_flow_table_attr ft_attr = {};
4974 	struct mlx5_flow_namespace *ns;
4975 	int err = 0;
4976 
4977 	ft_attr.max_fte = 1;
4978 	ft_attr.autogroup.max_num_groups = 1;
4979 	ft_attr.level = MLX5E_TC_MISS_LEVEL;
4980 	ft_attr.prio = 0;
4981 	ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
4982 
4983 	*ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
4984 	if (IS_ERR(*ft)) {
4985 		err = PTR_ERR(*ft);
4986 		netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
4987 	}
4988 
4989 	return err;
4990 }
4991 
mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv * priv)4992 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
4993 {
4994 	mlx5_destroy_flow_table(priv->fs.tc.miss_t);
4995 }
4996 
mlx5e_tc_nic_init(struct mlx5e_priv * priv)4997 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
4998 {
4999 	struct mlx5e_tc_table *tc = &priv->fs.tc;
5000 	struct mlx5_core_dev *dev = priv->mdev;
5001 	struct mapping_ctx *chains_mapping;
5002 	struct mlx5_chains_attr attr = {};
5003 	u64 mapping_id;
5004 	int err;
5005 
5006 	mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
5007 	mutex_init(&tc->t_lock);
5008 	mutex_init(&tc->hairpin_tbl_lock);
5009 	hash_init(tc->hairpin_tbl);
5010 
5011 	err = rhashtable_init(&tc->ht, &tc_ht_params);
5012 	if (err)
5013 		return err;
5014 
5015 	lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
5016 
5017 	mapping_id = mlx5_query_nic_system_image_guid(dev);
5018 
5019 	chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
5020 					       sizeof(struct mlx5_mapped_obj),
5021 					       MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
5022 
5023 	if (IS_ERR(chains_mapping)) {
5024 		err = PTR_ERR(chains_mapping);
5025 		goto err_mapping;
5026 	}
5027 	tc->mapping = chains_mapping;
5028 
5029 	err = mlx5e_tc_nic_create_miss_table(priv);
5030 	if (err)
5031 		goto err_chains;
5032 
5033 	if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
5034 		attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
5035 			MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
5036 	attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
5037 	attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
5038 	attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
5039 	attr.default_ft = priv->fs.tc.miss_t;
5040 	attr.mapping = chains_mapping;
5041 
5042 	tc->chains = mlx5_chains_create(dev, &attr);
5043 	if (IS_ERR(tc->chains)) {
5044 		err = PTR_ERR(tc->chains);
5045 		goto err_miss;
5046 	}
5047 
5048 	tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
5049 	tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
5050 				 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
5051 
5052 	tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5053 	err = register_netdevice_notifier_dev_net(priv->netdev,
5054 						  &tc->netdevice_nb,
5055 						  &tc->netdevice_nn);
5056 	if (err) {
5057 		tc->netdevice_nb.notifier_call = NULL;
5058 		mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5059 		goto err_reg;
5060 	}
5061 
5062 	return 0;
5063 
5064 err_reg:
5065 	mlx5_tc_ct_clean(tc->ct);
5066 	mlx5e_tc_post_act_destroy(tc->post_act);
5067 	mlx5_chains_destroy(tc->chains);
5068 err_miss:
5069 	mlx5e_tc_nic_destroy_miss_table(priv);
5070 err_chains:
5071 	mapping_destroy(chains_mapping);
5072 err_mapping:
5073 	rhashtable_destroy(&tc->ht);
5074 	return err;
5075 }
5076 
_mlx5e_tc_del_flow(void * ptr,void * arg)5077 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
5078 {
5079 	struct mlx5e_tc_flow *flow = ptr;
5080 	struct mlx5e_priv *priv = flow->priv;
5081 
5082 	mlx5e_tc_del_flow(priv, flow);
5083 	kfree(flow);
5084 }
5085 
mlx5e_tc_nic_cleanup(struct mlx5e_priv * priv)5086 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
5087 {
5088 	struct mlx5e_tc_table *tc = &priv->fs.tc;
5089 
5090 	if (tc->netdevice_nb.notifier_call)
5091 		unregister_netdevice_notifier_dev_net(priv->netdev,
5092 						      &tc->netdevice_nb,
5093 						      &tc->netdevice_nn);
5094 
5095 	mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
5096 	mutex_destroy(&tc->hairpin_tbl_lock);
5097 
5098 	rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
5099 
5100 	if (!IS_ERR_OR_NULL(tc->t)) {
5101 		mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
5102 		tc->t = NULL;
5103 	}
5104 	mutex_destroy(&tc->t_lock);
5105 
5106 	mlx5_tc_ct_clean(tc->ct);
5107 	mlx5e_tc_post_act_destroy(tc->post_act);
5108 	mapping_destroy(tc->mapping);
5109 	mlx5_chains_destroy(tc->chains);
5110 	mlx5e_tc_nic_destroy_miss_table(priv);
5111 }
5112 
mlx5e_tc_esw_init(struct rhashtable * tc_ht)5113 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
5114 {
5115 	const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5116 	struct mlx5_rep_uplink_priv *uplink_priv;
5117 	struct mlx5e_rep_priv *rpriv;
5118 	struct mapping_ctx *mapping;
5119 	struct mlx5_eswitch *esw;
5120 	struct mlx5e_priv *priv;
5121 	u64 mapping_id;
5122 	int err = 0;
5123 
5124 	uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5125 	rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5126 	priv = netdev_priv(rpriv->netdev);
5127 	esw = priv->mdev->priv.eswitch;
5128 
5129 	uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
5130 						       MLX5_FLOW_NAMESPACE_FDB);
5131 	uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5132 					       esw_chains(esw),
5133 					       &esw->offloads.mod_hdr,
5134 					       MLX5_FLOW_NAMESPACE_FDB,
5135 					       uplink_priv->post_act);
5136 
5137 	uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
5138 
5139 	mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
5140 
5141 	mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
5142 					sizeof(struct tunnel_match_key),
5143 					TUNNEL_INFO_BITS_MASK, true);
5144 
5145 	if (IS_ERR(mapping)) {
5146 		err = PTR_ERR(mapping);
5147 		goto err_tun_mapping;
5148 	}
5149 	uplink_priv->tunnel_mapping = mapping;
5150 
5151 	/* 0xFFF is reserved for stack devices slow path table mark */
5152 	mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
5153 					sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
5154 	if (IS_ERR(mapping)) {
5155 		err = PTR_ERR(mapping);
5156 		goto err_enc_opts_mapping;
5157 	}
5158 	uplink_priv->tunnel_enc_opts_mapping = mapping;
5159 
5160 	err = rhashtable_init(tc_ht, &tc_ht_params);
5161 	if (err)
5162 		goto err_ht_init;
5163 
5164 	lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
5165 
5166 	uplink_priv->encap = mlx5e_tc_tun_init(priv);
5167 	if (IS_ERR(uplink_priv->encap)) {
5168 		err = PTR_ERR(uplink_priv->encap);
5169 		goto err_register_fib_notifier;
5170 	}
5171 
5172 	return 0;
5173 
5174 err_register_fib_notifier:
5175 	rhashtable_destroy(tc_ht);
5176 err_ht_init:
5177 	mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5178 err_enc_opts_mapping:
5179 	mapping_destroy(uplink_priv->tunnel_mapping);
5180 err_tun_mapping:
5181 	mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5182 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
5183 	netdev_warn(priv->netdev,
5184 		    "Failed to initialize tc (eswitch), err: %d", err);
5185 	mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5186 	return err;
5187 }
5188 
mlx5e_tc_esw_cleanup(struct rhashtable * tc_ht)5189 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
5190 {
5191 	struct mlx5_rep_uplink_priv *uplink_priv;
5192 
5193 	uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5194 
5195 	rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5196 	mlx5e_tc_tun_cleanup(uplink_priv->encap);
5197 
5198 	mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5199 	mapping_destroy(uplink_priv->tunnel_mapping);
5200 
5201 	mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5202 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
5203 	mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5204 }
5205 
mlx5e_tc_num_filters(struct mlx5e_priv * priv,unsigned long flags)5206 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5207 {
5208 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5209 
5210 	return atomic_read(&tc_ht->nelems);
5211 }
5212 
mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch * esw)5213 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5214 {
5215 	struct mlx5e_tc_flow *flow, *tmp;
5216 
5217 	list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5218 		__mlx5e_tc_del_fdb_peer_flow(flow);
5219 }
5220 
mlx5e_tc_reoffload_flows_work(struct work_struct * work)5221 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5222 {
5223 	struct mlx5_rep_uplink_priv *rpriv =
5224 		container_of(work, struct mlx5_rep_uplink_priv,
5225 			     reoffload_flows_work);
5226 	struct mlx5e_tc_flow *flow, *tmp;
5227 
5228 	mutex_lock(&rpriv->unready_flows_lock);
5229 	list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5230 		if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5231 			unready_flow_del(flow);
5232 	}
5233 	mutex_unlock(&rpriv->unready_flows_lock);
5234 }
5235 
mlx5e_setup_tc_cls_flower(struct mlx5e_priv * priv,struct flow_cls_offload * cls_flower,unsigned long flags)5236 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5237 				     struct flow_cls_offload *cls_flower,
5238 				     unsigned long flags)
5239 {
5240 	switch (cls_flower->command) {
5241 	case FLOW_CLS_REPLACE:
5242 		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5243 					      flags);
5244 	case FLOW_CLS_DESTROY:
5245 		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5246 					   flags);
5247 	case FLOW_CLS_STATS:
5248 		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5249 					  flags);
5250 	default:
5251 		return -EOPNOTSUPP;
5252 	}
5253 }
5254 
mlx5e_setup_tc_block_cb(enum tc_setup_type type,void * type_data,void * cb_priv)5255 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5256 			    void *cb_priv)
5257 {
5258 	unsigned long flags = MLX5_TC_FLAG(INGRESS);
5259 	struct mlx5e_priv *priv = cb_priv;
5260 
5261 	if (!priv->netdev || !netif_device_present(priv->netdev))
5262 		return -EOPNOTSUPP;
5263 
5264 	if (mlx5e_is_uplink_rep(priv))
5265 		flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
5266 	else
5267 		flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
5268 
5269 	switch (type) {
5270 	case TC_SETUP_CLSFLOWER:
5271 		return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5272 	default:
5273 		return -EOPNOTSUPP;
5274 	}
5275 }
5276 
mlx5e_tc_update_skb(struct mlx5_cqe64 * cqe,struct sk_buff * skb)5277 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
5278 			 struct sk_buff *skb)
5279 {
5280 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5281 	u32 chain = 0, chain_tag, reg_b, zone_restore_id;
5282 	struct mlx5e_priv *priv = netdev_priv(skb->dev);
5283 	struct mlx5e_tc_table *tc = &priv->fs.tc;
5284 	struct mlx5_mapped_obj mapped_obj;
5285 	struct tc_skb_ext *tc_skb_ext;
5286 	int err;
5287 
5288 	reg_b = be32_to_cpu(cqe->ft_metadata);
5289 
5290 	chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5291 
5292 	err = mapping_find(tc->mapping, chain_tag, &mapped_obj);
5293 	if (err) {
5294 		netdev_dbg(priv->netdev,
5295 			   "Couldn't find chain for chain tag: %d, err: %d\n",
5296 			   chain_tag, err);
5297 		return false;
5298 	}
5299 
5300 	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
5301 		chain = mapped_obj.chain;
5302 		tc_skb_ext = tc_skb_ext_alloc(skb);
5303 		if (WARN_ON(!tc_skb_ext))
5304 			return false;
5305 
5306 		tc_skb_ext->chain = chain;
5307 
5308 		zone_restore_id = (reg_b >> REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
5309 			ESW_ZONE_ID_MASK;
5310 
5311 		if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
5312 					      zone_restore_id))
5313 			return false;
5314 	} else {
5315 		netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
5316 		return false;
5317 	}
5318 #endif /* CONFIG_NET_TC_SKB_EXT */
5319 
5320 	return true;
5321 }
5322