• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17 #include <linux/if_macvlan.h>
18 #include <linux/debugfs.h>
19 
20 #include "lib/fs_chains.h"
21 #include "en/tc_ct.h"
22 #include "en/tc/ct_fs.h"
23 #include "en/tc_priv.h"
24 #include "en/mod_hdr.h"
25 #include "en/mapping.h"
26 #include "en/tc/post_act.h"
27 #include "en.h"
28 #include "en_tc.h"
29 #include "en_rep.h"
30 #include "fs_core.h"
31 
32 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
33 #define MLX5_CT_STATE_TRK_BIT BIT(2)
34 #define MLX5_CT_STATE_NAT_BIT BIT(3)
35 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
36 #define MLX5_CT_STATE_RELATED_BIT BIT(5)
37 #define MLX5_CT_STATE_INVALID_BIT BIT(6)
38 #define MLX5_CT_STATE_NEW_BIT BIT(7)
39 
40 #define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG)
41 #define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG)
42 
43 /* Statically allocate modify actions for
44  * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
45  * This will be increased dynamically if needed (for the ipv6 snat + dnat).
46  */
47 #define MLX5_CT_MIN_MOD_ACTS 10
48 
49 #define ct_dbg(fmt, args...)\
50 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
51 
52 struct mlx5_tc_ct_debugfs {
53 	struct {
54 		atomic_t offloaded;
55 		atomic_t rx_dropped;
56 	} stats;
57 
58 	struct dentry *root;
59 };
60 
61 struct mlx5_tc_ct_priv {
62 	struct mlx5_core_dev *dev;
63 	struct mlx5e_priv *priv;
64 	const struct net_device *netdev;
65 	struct mod_hdr_tbl *mod_hdr_tbl;
66 	struct xarray tuple_ids;
67 	struct rhashtable zone_ht;
68 	struct rhashtable ct_tuples_ht;
69 	struct rhashtable ct_tuples_nat_ht;
70 	struct mlx5_flow_table *ct;
71 	struct mlx5_flow_table *ct_nat;
72 	struct mlx5e_post_act *post_act;
73 	struct mutex control_lock; /* guards parallel adds/dels */
74 	struct mapping_ctx *zone_mapping;
75 	struct mapping_ctx *labels_mapping;
76 	enum mlx5_flow_namespace_type ns_type;
77 	struct mlx5_fs_chains *chains;
78 	struct mlx5_ct_fs *fs;
79 	struct mlx5_ct_fs_ops *fs_ops;
80 	spinlock_t ht_lock; /* protects ft entries */
81 	struct workqueue_struct *wq;
82 
83 	struct mlx5_tc_ct_debugfs debugfs;
84 };
85 
86 struct mlx5_ct_zone_rule {
87 	struct mlx5_ct_fs_rule *rule;
88 	struct mlx5e_mod_hdr_handle *mh;
89 	struct mlx5_flow_attr *attr;
90 	bool nat;
91 };
92 
93 struct mlx5_tc_ct_pre {
94 	struct mlx5_flow_table *ft;
95 	struct mlx5_flow_group *flow_grp;
96 	struct mlx5_flow_group *miss_grp;
97 	struct mlx5_flow_handle *flow_rule;
98 	struct mlx5_flow_handle *miss_rule;
99 	struct mlx5_modify_hdr *modify_hdr;
100 };
101 
102 struct mlx5_ct_ft {
103 	struct rhash_head node;
104 	u16 zone;
105 	u32 zone_restore_id;
106 	refcount_t refcount;
107 	struct nf_flowtable *nf_ft;
108 	struct mlx5_tc_ct_priv *ct_priv;
109 	struct rhashtable ct_entries_ht;
110 	struct mlx5_tc_ct_pre pre_ct;
111 	struct mlx5_tc_ct_pre pre_ct_nat;
112 };
113 
114 struct mlx5_ct_tuple {
115 	u16 addr_type;
116 	__be16 n_proto;
117 	u8 ip_proto;
118 	struct {
119 		union {
120 			__be32 src_v4;
121 			struct in6_addr src_v6;
122 		};
123 		union {
124 			__be32 dst_v4;
125 			struct in6_addr dst_v6;
126 		};
127 	} ip;
128 	struct {
129 		__be16 src;
130 		__be16 dst;
131 	} port;
132 
133 	u16 zone;
134 };
135 
136 struct mlx5_ct_counter {
137 	struct mlx5_fc *counter;
138 	refcount_t refcount;
139 	bool is_shared;
140 };
141 
142 enum {
143 	MLX5_CT_ENTRY_FLAG_VALID,
144 };
145 
146 struct mlx5_ct_entry {
147 	struct rhash_head node;
148 	struct rhash_head tuple_node;
149 	struct rhash_head tuple_nat_node;
150 	struct mlx5_ct_counter *counter;
151 	unsigned long cookie;
152 	unsigned long restore_cookie;
153 	struct mlx5_ct_tuple tuple;
154 	struct mlx5_ct_tuple tuple_nat;
155 	struct mlx5_ct_zone_rule zone_rules[2];
156 
157 	struct mlx5_tc_ct_priv *ct_priv;
158 	struct work_struct work;
159 
160 	refcount_t refcnt;
161 	unsigned long flags;
162 };
163 
164 static void
165 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
166 				 struct mlx5_flow_attr *attr,
167 				 struct mlx5e_mod_hdr_handle *mh);
168 
169 static const struct rhashtable_params cts_ht_params = {
170 	.head_offset = offsetof(struct mlx5_ct_entry, node),
171 	.key_offset = offsetof(struct mlx5_ct_entry, cookie),
172 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
173 	.automatic_shrinking = true,
174 	.min_size = 16 * 1024,
175 };
176 
177 static const struct rhashtable_params zone_params = {
178 	.head_offset = offsetof(struct mlx5_ct_ft, node),
179 	.key_offset = offsetof(struct mlx5_ct_ft, zone),
180 	.key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
181 	.automatic_shrinking = true,
182 };
183 
184 static const struct rhashtable_params tuples_ht_params = {
185 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
186 	.key_offset = offsetof(struct mlx5_ct_entry, tuple),
187 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
188 	.automatic_shrinking = true,
189 	.min_size = 16 * 1024,
190 };
191 
192 static const struct rhashtable_params tuples_nat_ht_params = {
193 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
194 	.key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
195 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
196 	.automatic_shrinking = true,
197 	.min_size = 16 * 1024,
198 };
199 
200 static bool
mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry * entry)201 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
202 {
203 	return !!(entry->tuple_nat_node.next);
204 }
205 
206 static int
mlx5_get_label_mapping(struct mlx5_tc_ct_priv * ct_priv,u32 * labels,u32 * id)207 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
208 		       u32 *labels, u32 *id)
209 {
210 	if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
211 		*id = 0;
212 		return 0;
213 	}
214 
215 	if (mapping_add(ct_priv->labels_mapping, labels, id))
216 		return -EOPNOTSUPP;
217 
218 	return 0;
219 }
220 
221 static void
mlx5_put_label_mapping(struct mlx5_tc_ct_priv * ct_priv,u32 id)222 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
223 {
224 	if (id)
225 		mapping_remove(ct_priv->labels_mapping, id);
226 }
227 
228 static int
mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)229 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
230 {
231 	struct flow_match_control control;
232 	struct flow_match_basic basic;
233 
234 	flow_rule_match_basic(rule, &basic);
235 	flow_rule_match_control(rule, &control);
236 
237 	tuple->n_proto = basic.key->n_proto;
238 	tuple->ip_proto = basic.key->ip_proto;
239 	tuple->addr_type = control.key->addr_type;
240 
241 	if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
242 		struct flow_match_ipv4_addrs match;
243 
244 		flow_rule_match_ipv4_addrs(rule, &match);
245 		tuple->ip.src_v4 = match.key->src;
246 		tuple->ip.dst_v4 = match.key->dst;
247 	} else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
248 		struct flow_match_ipv6_addrs match;
249 
250 		flow_rule_match_ipv6_addrs(rule, &match);
251 		tuple->ip.src_v6 = match.key->src;
252 		tuple->ip.dst_v6 = match.key->dst;
253 	} else {
254 		return -EOPNOTSUPP;
255 	}
256 
257 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
258 		struct flow_match_ports match;
259 
260 		flow_rule_match_ports(rule, &match);
261 		switch (tuple->ip_proto) {
262 		case IPPROTO_TCP:
263 		case IPPROTO_UDP:
264 			tuple->port.src = match.key->src;
265 			tuple->port.dst = match.key->dst;
266 			break;
267 		default:
268 			return -EOPNOTSUPP;
269 		}
270 	} else {
271 		if (tuple->ip_proto != IPPROTO_GRE)
272 			return -EOPNOTSUPP;
273 	}
274 
275 	return 0;
276 }
277 
278 static int
mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)279 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
280 			     struct flow_rule *rule)
281 {
282 	struct flow_action *flow_action = &rule->action;
283 	struct flow_action_entry *act;
284 	u32 offset, val, ip6_offset;
285 	int i;
286 
287 	flow_action_for_each(i, act, flow_action) {
288 		if (act->id != FLOW_ACTION_MANGLE)
289 			continue;
290 
291 		offset = act->mangle.offset;
292 		val = act->mangle.val;
293 		switch (act->mangle.htype) {
294 		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
295 			if (offset == offsetof(struct iphdr, saddr))
296 				tuple->ip.src_v4 = cpu_to_be32(val);
297 			else if (offset == offsetof(struct iphdr, daddr))
298 				tuple->ip.dst_v4 = cpu_to_be32(val);
299 			else
300 				return -EOPNOTSUPP;
301 			break;
302 
303 		case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
304 			ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
305 			ip6_offset /= 4;
306 			if (ip6_offset < 4)
307 				tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
308 			else if (ip6_offset < 8)
309 				tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
310 			else
311 				return -EOPNOTSUPP;
312 			break;
313 
314 		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
315 			if (offset == offsetof(struct tcphdr, source))
316 				tuple->port.src = cpu_to_be16(val);
317 			else if (offset == offsetof(struct tcphdr, dest))
318 				tuple->port.dst = cpu_to_be16(val);
319 			else
320 				return -EOPNOTSUPP;
321 			break;
322 
323 		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
324 			if (offset == offsetof(struct udphdr, source))
325 				tuple->port.src = cpu_to_be16(val);
326 			else if (offset == offsetof(struct udphdr, dest))
327 				tuple->port.dst = cpu_to_be16(val);
328 			else
329 				return -EOPNOTSUPP;
330 			break;
331 
332 		default:
333 			return -EOPNOTSUPP;
334 		}
335 	}
336 
337 	return 0;
338 }
339 
340 static int
mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv * ct_priv,struct net_device * ndev)341 mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
342 				 struct net_device *ndev)
343 {
344 	struct mlx5e_priv *other_priv = netdev_priv(ndev);
345 	struct mlx5_core_dev *mdev = ct_priv->dev;
346 	bool vf_rep, uplink_rep;
347 
348 	vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
349 	uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
350 
351 	if (vf_rep)
352 		return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
353 	if (uplink_rep)
354 		return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
355 	if (is_vlan_dev(ndev))
356 		return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
357 	if (netif_is_macvlan(ndev))
358 		return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
359 	if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
360 		return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
361 
362 	return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
363 }
364 
365 static int
mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_spec * spec,struct flow_rule * rule)366 mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
367 			   struct mlx5_flow_spec *spec,
368 			   struct flow_rule *rule)
369 {
370 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
371 				       outer_headers);
372 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
373 				       outer_headers);
374 	u16 addr_type = 0;
375 	u8 ip_proto = 0;
376 
377 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
378 		struct flow_match_basic match;
379 
380 		flow_rule_match_basic(rule, &match);
381 
382 		mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
383 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
384 			 match.mask->ip_proto);
385 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
386 			 match.key->ip_proto);
387 
388 		ip_proto = match.key->ip_proto;
389 	}
390 
391 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
392 		struct flow_match_control match;
393 
394 		flow_rule_match_control(rule, &match);
395 		addr_type = match.key->addr_type;
396 	}
397 
398 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
399 		struct flow_match_ipv4_addrs match;
400 
401 		flow_rule_match_ipv4_addrs(rule, &match);
402 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
403 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
404 		       &match.mask->src, sizeof(match.mask->src));
405 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
406 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
407 		       &match.key->src, sizeof(match.key->src));
408 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
409 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
410 		       &match.mask->dst, sizeof(match.mask->dst));
411 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
412 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
413 		       &match.key->dst, sizeof(match.key->dst));
414 	}
415 
416 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
417 		struct flow_match_ipv6_addrs match;
418 
419 		flow_rule_match_ipv6_addrs(rule, &match);
420 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
421 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
422 		       &match.mask->src, sizeof(match.mask->src));
423 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
424 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
425 		       &match.key->src, sizeof(match.key->src));
426 
427 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
428 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
429 		       &match.mask->dst, sizeof(match.mask->dst));
430 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
431 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
432 		       &match.key->dst, sizeof(match.key->dst));
433 	}
434 
435 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
436 		struct flow_match_ports match;
437 
438 		flow_rule_match_ports(rule, &match);
439 		switch (ip_proto) {
440 		case IPPROTO_TCP:
441 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
442 				 tcp_sport, ntohs(match.mask->src));
443 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
444 				 tcp_sport, ntohs(match.key->src));
445 
446 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
447 				 tcp_dport, ntohs(match.mask->dst));
448 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
449 				 tcp_dport, ntohs(match.key->dst));
450 			break;
451 
452 		case IPPROTO_UDP:
453 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
454 				 udp_sport, ntohs(match.mask->src));
455 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
456 				 udp_sport, ntohs(match.key->src));
457 
458 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
459 				 udp_dport, ntohs(match.mask->dst));
460 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
461 				 udp_dport, ntohs(match.key->dst));
462 			break;
463 		default:
464 			break;
465 		}
466 	}
467 
468 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
469 		struct flow_match_tcp match;
470 
471 		flow_rule_match_tcp(rule, &match);
472 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
473 			 ntohs(match.mask->flags));
474 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
475 			 ntohs(match.key->flags));
476 	}
477 
478 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
479 		struct flow_match_meta match;
480 
481 		flow_rule_match_meta(rule, &match);
482 
483 		if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
484 			struct net_device *dev;
485 
486 			dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
487 			if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
488 				spec->flow_context.flow_source =
489 					mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
490 
491 			dev_put(dev);
492 		}
493 	}
494 
495 	return 0;
496 }
497 
498 static void
mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)499 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
500 {
501 	if (entry->counter->is_shared &&
502 	    !refcount_dec_and_test(&entry->counter->refcount))
503 		return;
504 
505 	mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
506 	kfree(entry->counter);
507 }
508 
509 static void
mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry,bool nat)510 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
511 			  struct mlx5_ct_entry *entry,
512 			  bool nat)
513 {
514 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
515 	struct mlx5_flow_attr *attr = zone_rule->attr;
516 
517 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
518 
519 	ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
520 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
521 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
522 	kfree(attr);
523 }
524 
525 static void
mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)526 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
527 			   struct mlx5_ct_entry *entry)
528 {
529 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
530 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
531 
532 	atomic_dec(&ct_priv->debugfs.stats.offloaded);
533 }
534 
535 static struct flow_action_entry *
mlx5_tc_ct_get_ct_metadata_action(struct flow_rule * flow_rule)536 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
537 {
538 	struct flow_action *flow_action = &flow_rule->action;
539 	struct flow_action_entry *act;
540 	int i;
541 
542 	flow_action_for_each(i, act, flow_action) {
543 		if (act->id == FLOW_ACTION_CT_METADATA)
544 			return act;
545 	}
546 
547 	return NULL;
548 }
549 
550 static int
mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_mod_hdr_acts * mod_acts,u8 ct_state,u32 mark,u32 labels_id,u8 zone_restore_id)551 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
552 			       struct mlx5e_tc_mod_hdr_acts *mod_acts,
553 			       u8 ct_state,
554 			       u32 mark,
555 			       u32 labels_id,
556 			       u8 zone_restore_id)
557 {
558 	enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
559 	struct mlx5_core_dev *dev = ct_priv->dev;
560 	int err;
561 
562 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
563 					CTSTATE_TO_REG, ct_state);
564 	if (err)
565 		return err;
566 
567 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
568 					MARK_TO_REG, mark);
569 	if (err)
570 		return err;
571 
572 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
573 					LABELS_TO_REG, labels_id);
574 	if (err)
575 		return err;
576 
577 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
578 					ZONE_RESTORE_TO_REG, zone_restore_id);
579 	if (err)
580 		return err;
581 
582 	/* Make another copy of zone id in reg_b for
583 	 * NIC rx flows since we don't copy reg_c1 to
584 	 * reg_b upon miss.
585 	 */
586 	if (ns != MLX5_FLOW_NAMESPACE_FDB) {
587 		err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
588 						NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
589 		if (err)
590 			return err;
591 	}
592 	return 0;
593 }
594 
595 static int
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry * act,char * modact)596 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
597 				   char *modact)
598 {
599 	u32 offset = act->mangle.offset, field;
600 
601 	switch (act->mangle.htype) {
602 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
603 		MLX5_SET(set_action_in, modact, length, 0);
604 		if (offset == offsetof(struct iphdr, saddr))
605 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
606 		else if (offset == offsetof(struct iphdr, daddr))
607 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
608 		else
609 			return -EOPNOTSUPP;
610 		break;
611 
612 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
613 		MLX5_SET(set_action_in, modact, length, 0);
614 		if (offset == offsetof(struct ipv6hdr, saddr) + 12)
615 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
616 		else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
617 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
618 		else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
619 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
620 		else if (offset == offsetof(struct ipv6hdr, saddr))
621 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
622 		else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
623 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
624 		else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
625 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
626 		else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
627 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
628 		else if (offset == offsetof(struct ipv6hdr, daddr))
629 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
630 		else
631 			return -EOPNOTSUPP;
632 		break;
633 
634 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
635 		MLX5_SET(set_action_in, modact, length, 16);
636 		if (offset == offsetof(struct tcphdr, source))
637 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
638 		else if (offset == offsetof(struct tcphdr, dest))
639 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
640 		else
641 			return -EOPNOTSUPP;
642 		break;
643 
644 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
645 		MLX5_SET(set_action_in, modact, length, 16);
646 		if (offset == offsetof(struct udphdr, source))
647 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
648 		else if (offset == offsetof(struct udphdr, dest))
649 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
650 		else
651 			return -EOPNOTSUPP;
652 		break;
653 
654 	default:
655 		return -EOPNOTSUPP;
656 	}
657 
658 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
659 	MLX5_SET(set_action_in, modact, offset, 0);
660 	MLX5_SET(set_action_in, modact, field, field);
661 	MLX5_SET(set_action_in, modact, data, act->mangle.val);
662 
663 	return 0;
664 }
665 
666 static int
mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5e_tc_mod_hdr_acts * mod_acts)667 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
668 			    struct flow_rule *flow_rule,
669 			    struct mlx5e_tc_mod_hdr_acts *mod_acts)
670 {
671 	struct flow_action *flow_action = &flow_rule->action;
672 	struct mlx5_core_dev *mdev = ct_priv->dev;
673 	struct flow_action_entry *act;
674 	char *modact;
675 	int err, i;
676 
677 	flow_action_for_each(i, act, flow_action) {
678 		switch (act->id) {
679 		case FLOW_ACTION_MANGLE: {
680 			modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
681 			if (IS_ERR(modact))
682 				return PTR_ERR(modact);
683 
684 			err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
685 			if (err)
686 				return err;
687 
688 			mod_acts->num_actions++;
689 		}
690 		break;
691 
692 		case FLOW_ACTION_CT_METADATA:
693 			/* Handled earlier */
694 			continue;
695 		default:
696 			return -EOPNOTSUPP;
697 		}
698 	}
699 
700 	return 0;
701 }
702 
703 static int
mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr,struct flow_rule * flow_rule,struct mlx5e_mod_hdr_handle ** mh,u8 zone_restore_id,bool nat_table,bool has_nat)704 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
705 				struct mlx5_flow_attr *attr,
706 				struct flow_rule *flow_rule,
707 				struct mlx5e_mod_hdr_handle **mh,
708 				u8 zone_restore_id, bool nat_table, bool has_nat)
709 {
710 	DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
711 	DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
712 	struct flow_action_entry *meta;
713 	enum ip_conntrack_info ctinfo;
714 	u16 ct_state = 0;
715 	int err;
716 
717 	meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
718 	if (!meta)
719 		return -EOPNOTSUPP;
720 	ctinfo = meta->ct_metadata.cookie & NFCT_INFOMASK;
721 
722 	err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
723 				     &attr->ct_attr.ct_labels_id);
724 	if (err)
725 		return -EOPNOTSUPP;
726 	if (nat_table) {
727 		if (has_nat) {
728 			err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
729 			if (err)
730 				goto err_mapping;
731 		}
732 
733 		ct_state |= MLX5_CT_STATE_NAT_BIT;
734 	}
735 
736 	ct_state |= MLX5_CT_STATE_TRK_BIT;
737 	ct_state |= ctinfo == IP_CT_NEW ? MLX5_CT_STATE_NEW_BIT : MLX5_CT_STATE_ESTABLISHED_BIT;
738 	ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
739 	err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
740 					     ct_state,
741 					     meta->ct_metadata.mark,
742 					     attr->ct_attr.ct_labels_id,
743 					     zone_restore_id);
744 	if (err)
745 		goto err_mapping;
746 
747 	if (nat_table && has_nat) {
748 		attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
749 							    mod_acts.num_actions,
750 							    mod_acts.actions);
751 		if (IS_ERR(attr->modify_hdr)) {
752 			err = PTR_ERR(attr->modify_hdr);
753 			goto err_mapping;
754 		}
755 
756 		*mh = NULL;
757 	} else {
758 		*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
759 					   ct_priv->mod_hdr_tbl,
760 					   ct_priv->ns_type,
761 					   &mod_acts);
762 		if (IS_ERR(*mh)) {
763 			err = PTR_ERR(*mh);
764 			goto err_mapping;
765 		}
766 		attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
767 	}
768 
769 	mlx5e_mod_hdr_dealloc(&mod_acts);
770 	return 0;
771 
772 err_mapping:
773 	mlx5e_mod_hdr_dealloc(&mod_acts);
774 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
775 	return err;
776 }
777 
778 static void
mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr,struct mlx5e_mod_hdr_handle * mh)779 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
780 				 struct mlx5_flow_attr *attr,
781 				 struct mlx5e_mod_hdr_handle *mh)
782 {
783 	if (mh)
784 		mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
785 	else
786 		mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
787 }
788 
789 static int
mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,bool nat,u8 zone_restore_id)790 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
791 			  struct flow_rule *flow_rule,
792 			  struct mlx5_ct_entry *entry,
793 			  bool nat, u8 zone_restore_id)
794 {
795 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
796 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
797 	struct mlx5_flow_spec *spec = NULL;
798 	struct mlx5_flow_attr *attr;
799 	int err;
800 
801 	zone_rule->nat = nat;
802 
803 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
804 	if (!spec)
805 		return -ENOMEM;
806 
807 	attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
808 	if (!attr) {
809 		err = -ENOMEM;
810 		goto err_attr;
811 	}
812 
813 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
814 					      &zone_rule->mh,
815 					      zone_restore_id,
816 					      nat,
817 					      mlx5_tc_ct_entry_has_nat(entry));
818 	if (err) {
819 		ct_dbg("Failed to create ct entry mod hdr");
820 		goto err_mod_hdr;
821 	}
822 
823 	attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
824 		       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
825 		       MLX5_FLOW_CONTEXT_ACTION_COUNT;
826 	attr->dest_chain = 0;
827 	attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
828 	attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
829 	if (entry->tuple.ip_proto == IPPROTO_TCP ||
830 	    entry->tuple.ip_proto == IPPROTO_UDP)
831 		attr->outer_match_level = MLX5_MATCH_L4;
832 	else
833 		attr->outer_match_level = MLX5_MATCH_L3;
834 	attr->counter = entry->counter->counter;
835 	attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
836 	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
837 		attr->esw_attr->in_mdev = priv->mdev;
838 
839 	mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
840 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
841 
842 	zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
843 	if (IS_ERR(zone_rule->rule)) {
844 		err = PTR_ERR(zone_rule->rule);
845 		ct_dbg("Failed to add ct entry rule, nat: %d", nat);
846 		goto err_rule;
847 	}
848 
849 	zone_rule->attr = attr;
850 
851 	kvfree(spec);
852 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
853 
854 	return 0;
855 
856 err_rule:
857 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
858 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
859 err_mod_hdr:
860 	kfree(attr);
861 err_attr:
862 	kvfree(spec);
863 	return err;
864 }
865 
866 static int
mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,bool nat,u8 zone_restore_id)867 mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv,
868 			      struct flow_rule *flow_rule,
869 			      struct mlx5_ct_entry *entry,
870 			      bool nat, u8 zone_restore_id)
871 {
872 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
873 	struct mlx5_flow_attr *attr = zone_rule->attr, *old_attr;
874 	struct mlx5e_mod_hdr_handle *mh;
875 	struct mlx5_ct_fs_rule *rule;
876 	struct mlx5_flow_spec *spec;
877 	int err;
878 
879 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
880 	if (!spec)
881 		return -ENOMEM;
882 
883 	old_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
884 	if (!old_attr) {
885 		err = -ENOMEM;
886 		goto err_attr;
887 	}
888 	*old_attr = *attr;
889 
890 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &mh, zone_restore_id,
891 					      nat, mlx5_tc_ct_entry_has_nat(entry));
892 	if (err) {
893 		ct_dbg("Failed to create ct entry mod hdr");
894 		goto err_mod_hdr;
895 	}
896 
897 	mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
898 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
899 
900 	rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
901 	if (IS_ERR(rule)) {
902 		err = PTR_ERR(rule);
903 		ct_dbg("Failed to add replacement ct entry rule, nat: %d", nat);
904 		goto err_rule;
905 	}
906 
907 	ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
908 	zone_rule->rule = rule;
909 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, old_attr, zone_rule->mh);
910 	zone_rule->mh = mh;
911 	mlx5_put_label_mapping(ct_priv, old_attr->ct_attr.ct_labels_id);
912 
913 	kfree(old_attr);
914 	kvfree(spec);
915 	ct_dbg("Replaced ct entry rule in zone %d", entry->tuple.zone);
916 
917 	return 0;
918 
919 err_rule:
920 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, mh);
921 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
922 err_mod_hdr:
923 	*attr = *old_attr;
924 	kfree(old_attr);
925 err_attr:
926 	kvfree(spec);
927 	return err;
928 }
929 
930 static bool
mlx5_tc_ct_entry_valid(struct mlx5_ct_entry * entry)931 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
932 {
933 	return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
934 }
935 
936 static struct mlx5_ct_entry *
mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_tuple * tuple)937 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
938 {
939 	struct mlx5_ct_entry *entry;
940 
941 	entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
942 				       tuples_ht_params);
943 	if (entry && mlx5_tc_ct_entry_valid(entry) &&
944 	    refcount_inc_not_zero(&entry->refcnt)) {
945 		return entry;
946 	} else if (!entry) {
947 		entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
948 					       tuple, tuples_nat_ht_params);
949 		if (entry && mlx5_tc_ct_entry_valid(entry) &&
950 		    refcount_inc_not_zero(&entry->refcnt))
951 			return entry;
952 	}
953 
954 	return entry ? ERR_PTR(-EINVAL) : NULL;
955 }
956 
mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry * entry)957 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
958 {
959 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
960 
961 	rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
962 			       &entry->tuple_nat_node,
963 			       tuples_nat_ht_params);
964 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
965 			       tuples_ht_params);
966 }
967 
mlx5_tc_ct_entry_del(struct mlx5_ct_entry * entry)968 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
969 {
970 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
971 
972 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
973 
974 	spin_lock_bh(&ct_priv->ht_lock);
975 	mlx5_tc_ct_entry_remove_from_tuples(entry);
976 	spin_unlock_bh(&ct_priv->ht_lock);
977 
978 	mlx5_tc_ct_counter_put(ct_priv, entry);
979 	kfree(entry);
980 }
981 
982 static void
mlx5_tc_ct_entry_put(struct mlx5_ct_entry * entry)983 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
984 {
985 	if (!refcount_dec_and_test(&entry->refcnt))
986 		return;
987 
988 	mlx5_tc_ct_entry_del(entry);
989 }
990 
mlx5_tc_ct_entry_del_work(struct work_struct * work)991 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
992 {
993 	struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
994 
995 	mlx5_tc_ct_entry_del(entry);
996 }
997 
998 static void
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry * entry)999 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
1000 {
1001 	if (!refcount_dec_and_test(&entry->refcnt))
1002 		return;
1003 
1004 	INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
1005 	queue_work(entry->ct_priv->wq, &entry->work);
1006 }
1007 
1008 static struct mlx5_ct_counter *
mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv * ct_priv)1009 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
1010 {
1011 	struct mlx5_ct_counter *counter;
1012 	int ret;
1013 
1014 	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
1015 	if (!counter)
1016 		return ERR_PTR(-ENOMEM);
1017 
1018 	counter->is_shared = false;
1019 	counter->counter = mlx5_fc_create_ex(ct_priv->dev, true);
1020 	if (IS_ERR(counter->counter)) {
1021 		ct_dbg("Failed to create counter for ct entry");
1022 		ret = PTR_ERR(counter->counter);
1023 		kfree(counter);
1024 		return ERR_PTR(ret);
1025 	}
1026 
1027 	return counter;
1028 }
1029 
1030 static struct mlx5_ct_counter *
mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)1031 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
1032 			      struct mlx5_ct_entry *entry)
1033 {
1034 	struct mlx5_ct_tuple rev_tuple = entry->tuple;
1035 	struct mlx5_ct_counter *shared_counter;
1036 	struct mlx5_ct_entry *rev_entry;
1037 
1038 	/* get the reversed tuple */
1039 	swap(rev_tuple.port.src, rev_tuple.port.dst);
1040 
1041 	if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1042 		__be32 tmp_addr = rev_tuple.ip.src_v4;
1043 
1044 		rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
1045 		rev_tuple.ip.dst_v4 = tmp_addr;
1046 	} else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1047 		struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
1048 
1049 		rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
1050 		rev_tuple.ip.dst_v6 = tmp_addr;
1051 	} else {
1052 		return ERR_PTR(-EOPNOTSUPP);
1053 	}
1054 
1055 	/* Use the same counter as the reverse direction */
1056 	spin_lock_bh(&ct_priv->ht_lock);
1057 	rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
1058 
1059 	if (IS_ERR(rev_entry)) {
1060 		spin_unlock_bh(&ct_priv->ht_lock);
1061 		goto create_counter;
1062 	}
1063 
1064 	if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
1065 		ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
1066 		shared_counter = rev_entry->counter;
1067 		spin_unlock_bh(&ct_priv->ht_lock);
1068 
1069 		mlx5_tc_ct_entry_put(rev_entry);
1070 		return shared_counter;
1071 	}
1072 
1073 	spin_unlock_bh(&ct_priv->ht_lock);
1074 
1075 create_counter:
1076 
1077 	shared_counter = mlx5_tc_ct_counter_create(ct_priv);
1078 	if (IS_ERR(shared_counter))
1079 		return shared_counter;
1080 
1081 	shared_counter->is_shared = true;
1082 	refcount_set(&shared_counter->refcount, 1);
1083 	return shared_counter;
1084 }
1085 
1086 static int
mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,u8 zone_restore_id)1087 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
1088 			   struct flow_rule *flow_rule,
1089 			   struct mlx5_ct_entry *entry,
1090 			   u8 zone_restore_id)
1091 {
1092 	int err;
1093 
1094 	if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
1095 		entry->counter = mlx5_tc_ct_counter_create(ct_priv);
1096 	else
1097 		entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
1098 
1099 	if (IS_ERR(entry->counter)) {
1100 		err = PTR_ERR(entry->counter);
1101 		return err;
1102 	}
1103 
1104 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
1105 					zone_restore_id);
1106 	if (err)
1107 		goto err_orig;
1108 
1109 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
1110 					zone_restore_id);
1111 	if (err)
1112 		goto err_nat;
1113 
1114 	atomic_inc(&ct_priv->debugfs.stats.offloaded);
1115 	return 0;
1116 
1117 err_nat:
1118 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
1119 err_orig:
1120 	mlx5_tc_ct_counter_put(ct_priv, entry);
1121 	return err;
1122 }
1123 
1124 static int
mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,u8 zone_restore_id)1125 mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv *ct_priv,
1126 			       struct flow_rule *flow_rule,
1127 			       struct mlx5_ct_entry *entry,
1128 			       u8 zone_restore_id)
1129 {
1130 	int err;
1131 
1132 	err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, false,
1133 					    zone_restore_id);
1134 	if (err)
1135 		return err;
1136 
1137 	err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, true,
1138 					    zone_restore_id);
1139 	if (err)
1140 		mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
1141 	return err;
1142 }
1143 
1144 static int
mlx5_tc_ct_block_flow_offload_replace(struct mlx5_ct_ft * ft,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,unsigned long cookie)1145 mlx5_tc_ct_block_flow_offload_replace(struct mlx5_ct_ft *ft, struct flow_rule *flow_rule,
1146 				      struct mlx5_ct_entry *entry, unsigned long cookie)
1147 {
1148 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1149 	int err;
1150 
1151 	err = mlx5_tc_ct_entry_replace_rules(ct_priv, flow_rule, entry, ft->zone_restore_id);
1152 	if (!err)
1153 		return 0;
1154 
1155 	/* If failed to update the entry, then look it up again under ht_lock
1156 	 * protection and properly delete it.
1157 	 */
1158 	spin_lock_bh(&ct_priv->ht_lock);
1159 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1160 	if (entry) {
1161 		rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1162 		spin_unlock_bh(&ct_priv->ht_lock);
1163 		mlx5_tc_ct_entry_put(entry);
1164 	} else {
1165 		spin_unlock_bh(&ct_priv->ht_lock);
1166 	}
1167 	return err;
1168 }
1169 
1170 static int
mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)1171 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
1172 				  struct flow_cls_offload *flow)
1173 {
1174 	struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
1175 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1176 	struct flow_action_entry *meta_action;
1177 	unsigned long cookie = flow->cookie;
1178 	struct mlx5_ct_entry *entry;
1179 	int err;
1180 
1181 	meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
1182 	if (!meta_action)
1183 		return -EOPNOTSUPP;
1184 
1185 	spin_lock_bh(&ct_priv->ht_lock);
1186 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1187 	if (entry && refcount_inc_not_zero(&entry->refcnt)) {
1188 		if (entry->restore_cookie == meta_action->ct_metadata.cookie) {
1189 			spin_unlock_bh(&ct_priv->ht_lock);
1190 			mlx5_tc_ct_entry_put(entry);
1191 			return -EEXIST;
1192 		}
1193 		entry->restore_cookie = meta_action->ct_metadata.cookie;
1194 		spin_unlock_bh(&ct_priv->ht_lock);
1195 
1196 		err = mlx5_tc_ct_block_flow_offload_replace(ft, flow_rule, entry, cookie);
1197 		mlx5_tc_ct_entry_put(entry);
1198 		return err;
1199 	}
1200 	spin_unlock_bh(&ct_priv->ht_lock);
1201 
1202 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1203 	if (!entry)
1204 		return -ENOMEM;
1205 
1206 	entry->tuple.zone = ft->zone;
1207 	entry->cookie = flow->cookie;
1208 	entry->restore_cookie = meta_action->ct_metadata.cookie;
1209 	refcount_set(&entry->refcnt, 2);
1210 	entry->ct_priv = ct_priv;
1211 
1212 	err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
1213 	if (err)
1214 		goto err_set;
1215 
1216 	memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1217 	err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
1218 	if (err)
1219 		goto err_set;
1220 
1221 	spin_lock_bh(&ct_priv->ht_lock);
1222 
1223 	err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1224 					    cts_ht_params);
1225 	if (err)
1226 		goto err_entries;
1227 
1228 	err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1229 					    &entry->tuple_node,
1230 					    tuples_ht_params);
1231 	if (err)
1232 		goto err_tuple;
1233 
1234 	if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1235 		err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1236 						    &entry->tuple_nat_node,
1237 						    tuples_nat_ht_params);
1238 		if (err)
1239 			goto err_tuple_nat;
1240 	}
1241 	spin_unlock_bh(&ct_priv->ht_lock);
1242 
1243 	err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1244 					 ft->zone_restore_id);
1245 	if (err)
1246 		goto err_rules;
1247 
1248 	set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1249 	mlx5_tc_ct_entry_put(entry); /* this function reference */
1250 
1251 	return 0;
1252 
1253 err_rules:
1254 	spin_lock_bh(&ct_priv->ht_lock);
1255 	if (mlx5_tc_ct_entry_has_nat(entry))
1256 		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1257 				       &entry->tuple_nat_node, tuples_nat_ht_params);
1258 err_tuple_nat:
1259 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1260 			       &entry->tuple_node,
1261 			       tuples_ht_params);
1262 err_tuple:
1263 	rhashtable_remove_fast(&ft->ct_entries_ht,
1264 			       &entry->node,
1265 			       cts_ht_params);
1266 err_entries:
1267 	spin_unlock_bh(&ct_priv->ht_lock);
1268 err_set:
1269 	kfree(entry);
1270 	if (err != -EEXIST)
1271 		netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1272 	return err;
1273 }
1274 
1275 static int
mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)1276 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1277 				  struct flow_cls_offload *flow)
1278 {
1279 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1280 	unsigned long cookie = flow->cookie;
1281 	struct mlx5_ct_entry *entry;
1282 
1283 	spin_lock_bh(&ct_priv->ht_lock);
1284 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1285 	if (!entry) {
1286 		spin_unlock_bh(&ct_priv->ht_lock);
1287 		return -ENOENT;
1288 	}
1289 
1290 	if (!mlx5_tc_ct_entry_valid(entry)) {
1291 		spin_unlock_bh(&ct_priv->ht_lock);
1292 		return -EINVAL;
1293 	}
1294 
1295 	rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1296 	spin_unlock_bh(&ct_priv->ht_lock);
1297 
1298 	mlx5_tc_ct_entry_put(entry);
1299 
1300 	return 0;
1301 }
1302 
1303 static int
mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft * ft,struct flow_cls_offload * f)1304 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1305 				    struct flow_cls_offload *f)
1306 {
1307 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1308 	unsigned long cookie = f->cookie;
1309 	struct mlx5_ct_entry *entry;
1310 	u64 lastuse, packets, bytes;
1311 
1312 	spin_lock_bh(&ct_priv->ht_lock);
1313 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1314 	if (!entry) {
1315 		spin_unlock_bh(&ct_priv->ht_lock);
1316 		return -ENOENT;
1317 	}
1318 
1319 	if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1320 		spin_unlock_bh(&ct_priv->ht_lock);
1321 		return -EINVAL;
1322 	}
1323 
1324 	spin_unlock_bh(&ct_priv->ht_lock);
1325 
1326 	mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1327 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1328 			  FLOW_ACTION_HW_STATS_DELAYED);
1329 
1330 	mlx5_tc_ct_entry_put(entry);
1331 	return 0;
1332 }
1333 
1334 static int
mlx5_tc_ct_block_flow_offload(enum tc_setup_type type,void * type_data,void * cb_priv)1335 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1336 			      void *cb_priv)
1337 {
1338 	struct flow_cls_offload *f = type_data;
1339 	struct mlx5_ct_ft *ft = cb_priv;
1340 
1341 	if (type != TC_SETUP_CLSFLOWER)
1342 		return -EOPNOTSUPP;
1343 
1344 	switch (f->command) {
1345 	case FLOW_CLS_REPLACE:
1346 		return mlx5_tc_ct_block_flow_offload_add(ft, f);
1347 	case FLOW_CLS_DESTROY:
1348 		return mlx5_tc_ct_block_flow_offload_del(ft, f);
1349 	case FLOW_CLS_STATS:
1350 		return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1351 	default:
1352 		break;
1353 	}
1354 
1355 	return -EOPNOTSUPP;
1356 }
1357 
1358 static bool
mlx5_tc_ct_skb_to_tuple(struct sk_buff * skb,struct mlx5_ct_tuple * tuple,u16 zone)1359 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1360 			u16 zone)
1361 {
1362 	struct flow_keys flow_keys;
1363 
1364 	skb_reset_network_header(skb);
1365 	skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
1366 
1367 	tuple->zone = zone;
1368 
1369 	if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1370 	    flow_keys.basic.ip_proto != IPPROTO_UDP &&
1371 	    flow_keys.basic.ip_proto != IPPROTO_GRE)
1372 		return false;
1373 
1374 	if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
1375 	    flow_keys.basic.ip_proto == IPPROTO_UDP) {
1376 		tuple->port.src = flow_keys.ports.src;
1377 		tuple->port.dst = flow_keys.ports.dst;
1378 	}
1379 	tuple->n_proto = flow_keys.basic.n_proto;
1380 	tuple->ip_proto = flow_keys.basic.ip_proto;
1381 
1382 	switch (flow_keys.basic.n_proto) {
1383 	case htons(ETH_P_IP):
1384 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1385 		tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1386 		tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1387 		break;
1388 
1389 	case htons(ETH_P_IPV6):
1390 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1391 		tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1392 		tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1393 		break;
1394 	default:
1395 		goto out;
1396 	}
1397 
1398 	return true;
1399 
1400 out:
1401 	return false;
1402 }
1403 
mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec * spec)1404 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1405 {
1406 	u32 ctstate = 0, ctstate_mask = 0;
1407 
1408 	mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1409 					&ctstate, &ctstate_mask);
1410 
1411 	if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1412 		return -EOPNOTSUPP;
1413 
1414 	ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1415 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1416 				    ctstate, ctstate_mask);
1417 
1418 	return 0;
1419 }
1420 
mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv * priv,struct mlx5_ct_attr * ct_attr)1421 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1422 {
1423 	if (!priv || !ct_attr->ct_labels_id)
1424 		return;
1425 
1426 	mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
1427 }
1428 
1429 int
mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct mlx5_ct_attr * ct_attr,struct netlink_ext_ack * extack)1430 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1431 		     struct mlx5_flow_spec *spec,
1432 		     struct flow_cls_offload *f,
1433 		     struct mlx5_ct_attr *ct_attr,
1434 		     struct netlink_ext_ack *extack)
1435 {
1436 	bool trk, est, untrk, unnew, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
1437 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1438 	struct flow_dissector_key_ct *mask, *key;
1439 	u32 ctstate = 0, ctstate_mask = 0;
1440 	u16 ct_state_on, ct_state_off;
1441 	u16 ct_state, ct_state_mask;
1442 	struct flow_match_ct match;
1443 	u32 ct_labels[4];
1444 
1445 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1446 		return 0;
1447 
1448 	if (!priv) {
1449 		NL_SET_ERR_MSG_MOD(extack,
1450 				   "offload of ct matching isn't available");
1451 		return -EOPNOTSUPP;
1452 	}
1453 
1454 	flow_rule_match_ct(rule, &match);
1455 
1456 	key = match.key;
1457 	mask = match.mask;
1458 
1459 	ct_state = key->ct_state;
1460 	ct_state_mask = mask->ct_state;
1461 
1462 	if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1463 			      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1464 			      TCA_FLOWER_KEY_CT_FLAGS_NEW |
1465 			      TCA_FLOWER_KEY_CT_FLAGS_REPLY |
1466 			      TCA_FLOWER_KEY_CT_FLAGS_RELATED |
1467 			      TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
1468 		NL_SET_ERR_MSG_MOD(extack,
1469 				   "only ct_state trk, est, new and rpl are supported for offload");
1470 		return -EOPNOTSUPP;
1471 	}
1472 
1473 	ct_state_on = ct_state & ct_state_mask;
1474 	ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1475 	trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1476 	new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1477 	est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1478 	rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1479 	rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1480 	inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1481 	untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1482 	unnew = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1483 	unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1484 	unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1485 	unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1486 	uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1487 
1488 	ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1489 	ctstate |= new ? MLX5_CT_STATE_NEW_BIT : 0;
1490 	ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1491 	ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1492 	ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1493 	ctstate_mask |= (unnew || new) ? MLX5_CT_STATE_NEW_BIT : 0;
1494 	ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1495 	ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1496 	ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
1497 	ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
1498 
1499 	if (rel) {
1500 		NL_SET_ERR_MSG_MOD(extack,
1501 				   "matching on ct_state +rel isn't supported");
1502 		return -EOPNOTSUPP;
1503 	}
1504 
1505 	if (inv) {
1506 		NL_SET_ERR_MSG_MOD(extack,
1507 				   "matching on ct_state +inv isn't supported");
1508 		return -EOPNOTSUPP;
1509 	}
1510 
1511 	if (mask->ct_zone)
1512 		mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1513 					    key->ct_zone, MLX5_CT_ZONE_MASK);
1514 	if (ctstate_mask)
1515 		mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1516 					    ctstate, ctstate_mask);
1517 	if (mask->ct_mark)
1518 		mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1519 					    key->ct_mark, mask->ct_mark);
1520 	if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1521 	    mask->ct_labels[3]) {
1522 		ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1523 		ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1524 		ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1525 		ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1526 		if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
1527 			return -EOPNOTSUPP;
1528 		mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1529 					    MLX5_CT_LABELS_MASK);
1530 	}
1531 
1532 	return 0;
1533 }
1534 
1535 int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr,const struct flow_action_entry * act,struct netlink_ext_ack * extack)1536 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1537 			struct mlx5_flow_attr *attr,
1538 			const struct flow_action_entry *act,
1539 			struct netlink_ext_ack *extack)
1540 {
1541 	if (!priv) {
1542 		NL_SET_ERR_MSG_MOD(extack,
1543 				   "offload of ct action isn't available");
1544 		return -EOPNOTSUPP;
1545 	}
1546 
1547 	attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */
1548 	attr->ct_attr.zone = act->ct.zone;
1549 	if (!(act->ct.action & TCA_CT_ACT_CLEAR))
1550 		attr->ct_attr.nf_ft = act->ct.flow_table;
1551 	attr->ct_attr.act_miss_cookie = act->miss_cookie;
1552 
1553 	return 0;
1554 }
1555 
tc_ct_pre_ct_add_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1556 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1557 				  struct mlx5_tc_ct_pre *pre_ct,
1558 				  bool nat)
1559 {
1560 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1561 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1562 	struct mlx5_core_dev *dev = ct_priv->dev;
1563 	struct mlx5_flow_table *ft = pre_ct->ft;
1564 	struct mlx5_flow_destination dest = {};
1565 	struct mlx5_flow_act flow_act = {};
1566 	struct mlx5_modify_hdr *mod_hdr;
1567 	struct mlx5_flow_handle *rule;
1568 	struct mlx5_flow_spec *spec;
1569 	u32 ctstate;
1570 	u16 zone;
1571 	int err;
1572 
1573 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1574 	if (!spec)
1575 		return -ENOMEM;
1576 
1577 	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1578 	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1579 					ZONE_TO_REG, zone);
1580 	if (err) {
1581 		ct_dbg("Failed to set zone register mapping");
1582 		goto err_mapping;
1583 	}
1584 
1585 	mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1586 					   pre_mod_acts.num_actions,
1587 					   pre_mod_acts.actions);
1588 
1589 	if (IS_ERR(mod_hdr)) {
1590 		err = PTR_ERR(mod_hdr);
1591 		ct_dbg("Failed to create pre ct mod hdr");
1592 		goto err_mapping;
1593 	}
1594 	pre_ct->modify_hdr = mod_hdr;
1595 
1596 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1597 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1598 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1599 	flow_act.modify_hdr = mod_hdr;
1600 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1601 
1602 	/* add flow rule */
1603 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1604 				    zone, MLX5_CT_ZONE_MASK);
1605 	ctstate = MLX5_CT_STATE_TRK_BIT;
1606 	if (nat)
1607 		ctstate |= MLX5_CT_STATE_NAT_BIT;
1608 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1609 
1610 	dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
1611 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1612 	if (IS_ERR(rule)) {
1613 		err = PTR_ERR(rule);
1614 		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1615 		goto err_flow_rule;
1616 	}
1617 	pre_ct->flow_rule = rule;
1618 
1619 	/* add miss rule */
1620 	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1621 	rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1622 	if (IS_ERR(rule)) {
1623 		err = PTR_ERR(rule);
1624 		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1625 		goto err_miss_rule;
1626 	}
1627 	pre_ct->miss_rule = rule;
1628 
1629 	mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1630 	kvfree(spec);
1631 	return 0;
1632 
1633 err_miss_rule:
1634 	mlx5_del_flow_rules(pre_ct->flow_rule);
1635 err_flow_rule:
1636 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1637 err_mapping:
1638 	mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1639 	kvfree(spec);
1640 	return err;
1641 }
1642 
1643 static void
tc_ct_pre_ct_del_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1644 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1645 		       struct mlx5_tc_ct_pre *pre_ct)
1646 {
1647 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1648 	struct mlx5_core_dev *dev = ct_priv->dev;
1649 
1650 	mlx5_del_flow_rules(pre_ct->flow_rule);
1651 	mlx5_del_flow_rules(pre_ct->miss_rule);
1652 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1653 }
1654 
1655 static int
mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1656 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1657 			struct mlx5_tc_ct_pre *pre_ct,
1658 			bool nat)
1659 {
1660 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1661 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1662 	struct mlx5_core_dev *dev = ct_priv->dev;
1663 	struct mlx5_flow_table_attr ft_attr = {};
1664 	struct mlx5_flow_namespace *ns;
1665 	struct mlx5_flow_table *ft;
1666 	struct mlx5_flow_group *g;
1667 	u32 metadata_reg_c_2_mask;
1668 	u32 *flow_group_in;
1669 	void *misc;
1670 	int err;
1671 
1672 	ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1673 	if (!ns) {
1674 		err = -EOPNOTSUPP;
1675 		ct_dbg("Failed to get flow namespace");
1676 		return err;
1677 	}
1678 
1679 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1680 	if (!flow_group_in)
1681 		return -ENOMEM;
1682 
1683 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1684 	ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1685 			FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1686 	ft_attr.max_fte = 2;
1687 	ft_attr.level = 1;
1688 	ft = mlx5_create_flow_table(ns, &ft_attr);
1689 	if (IS_ERR(ft)) {
1690 		err = PTR_ERR(ft);
1691 		ct_dbg("Failed to create pre ct table");
1692 		goto out_free;
1693 	}
1694 	pre_ct->ft = ft;
1695 
1696 	/* create flow group */
1697 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1698 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1699 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1700 		 MLX5_MATCH_MISC_PARAMETERS_2);
1701 
1702 	misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1703 			    match_criteria.misc_parameters_2);
1704 
1705 	metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1706 	metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1707 	if (nat)
1708 		metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1709 
1710 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1711 		 metadata_reg_c_2_mask);
1712 
1713 	g = mlx5_create_flow_group(ft, flow_group_in);
1714 	if (IS_ERR(g)) {
1715 		err = PTR_ERR(g);
1716 		ct_dbg("Failed to create pre ct group");
1717 		goto err_flow_grp;
1718 	}
1719 	pre_ct->flow_grp = g;
1720 
1721 	/* create miss group */
1722 	memset(flow_group_in, 0, inlen);
1723 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1724 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1725 	g = mlx5_create_flow_group(ft, flow_group_in);
1726 	if (IS_ERR(g)) {
1727 		err = PTR_ERR(g);
1728 		ct_dbg("Failed to create pre ct miss group");
1729 		goto err_miss_grp;
1730 	}
1731 	pre_ct->miss_grp = g;
1732 
1733 	err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1734 	if (err)
1735 		goto err_add_rules;
1736 
1737 	kvfree(flow_group_in);
1738 	return 0;
1739 
1740 err_add_rules:
1741 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1742 err_miss_grp:
1743 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1744 err_flow_grp:
1745 	mlx5_destroy_flow_table(ft);
1746 out_free:
1747 	kvfree(flow_group_in);
1748 	return err;
1749 }
1750 
1751 static void
mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1752 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1753 		       struct mlx5_tc_ct_pre *pre_ct)
1754 {
1755 	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1756 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1757 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1758 	mlx5_destroy_flow_table(pre_ct->ft);
1759 }
1760 
1761 static int
mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft * ft)1762 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1763 {
1764 	int err;
1765 
1766 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1767 	if (err)
1768 		return err;
1769 
1770 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1771 	if (err)
1772 		goto err_pre_ct_nat;
1773 
1774 	return 0;
1775 
1776 err_pre_ct_nat:
1777 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1778 	return err;
1779 }
1780 
1781 static void
mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft * ft)1782 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1783 {
1784 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1785 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1786 }
1787 
1788 /* To avoid false lock dependency warning set the ct_entries_ht lock
1789  * class different than the lock class of the ht being used when deleting
1790  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
1791  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
1792  * it's different than the ht->mutex here.
1793  */
1794 static struct lock_class_key ct_entries_ht_lock_key;
1795 
1796 static struct mlx5_ct_ft *
mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv * ct_priv,u16 zone,struct nf_flowtable * nf_ft)1797 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1798 		     struct nf_flowtable *nf_ft)
1799 {
1800 	struct mlx5_ct_ft *ft;
1801 	int err;
1802 
1803 	ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1804 	if (ft) {
1805 		refcount_inc(&ft->refcount);
1806 		return ft;
1807 	}
1808 
1809 	ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1810 	if (!ft)
1811 		return ERR_PTR(-ENOMEM);
1812 
1813 	err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1814 	if (err)
1815 		goto err_mapping;
1816 
1817 	ft->zone = zone;
1818 	ft->nf_ft = nf_ft;
1819 	ft->ct_priv = ct_priv;
1820 	refcount_set(&ft->refcount, 1);
1821 
1822 	err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1823 	if (err)
1824 		goto err_alloc_pre_ct;
1825 
1826 	err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1827 	if (err)
1828 		goto err_init;
1829 
1830 	lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
1831 
1832 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1833 				     zone_params);
1834 	if (err)
1835 		goto err_insert;
1836 
1837 	err = nf_flow_table_offload_add_cb(ft->nf_ft,
1838 					   mlx5_tc_ct_block_flow_offload, ft);
1839 	if (err)
1840 		goto err_add_cb;
1841 
1842 	return ft;
1843 
1844 err_add_cb:
1845 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1846 err_insert:
1847 	rhashtable_destroy(&ft->ct_entries_ht);
1848 err_init:
1849 	mlx5_tc_ct_free_pre_ct_tables(ft);
1850 err_alloc_pre_ct:
1851 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1852 err_mapping:
1853 	kfree(ft);
1854 	return ERR_PTR(err);
1855 }
1856 
1857 static void
mlx5_tc_ct_flush_ft_entry(void * ptr,void * arg)1858 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1859 {
1860 	struct mlx5_ct_entry *entry = ptr;
1861 
1862 	mlx5_tc_ct_entry_put(entry);
1863 }
1864 
1865 static void
mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_ft * ft)1866 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1867 {
1868 	if (!refcount_dec_and_test(&ft->refcount))
1869 		return;
1870 
1871 	flush_workqueue(ct_priv->wq);
1872 	nf_flow_table_offload_del_cb(ft->nf_ft,
1873 				     mlx5_tc_ct_block_flow_offload, ft);
1874 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1875 	rhashtable_free_and_destroy(&ft->ct_entries_ht,
1876 				    mlx5_tc_ct_flush_ft_entry,
1877 				    ct_priv);
1878 	mlx5_tc_ct_free_pre_ct_tables(ft);
1879 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1880 	kfree(ft);
1881 }
1882 
1883 /* We translate the tc filter with CT action to the following HW model:
1884  *
1885  *	+-----------------------+
1886  *	+ rule (either original +
1887  *	+ or post_act rule)     +
1888  *	+-----------------------+
1889  *		 | set act_miss_cookie mapping
1890  *		 | set fte_id
1891  *		 | set tunnel_id
1892  *		 | rest of actions before the CT action (for this orig/post_act rule)
1893  *		 |
1894  * +-------------+
1895  * | Chain 0	 |
1896  * | optimization|
1897  * |		 v
1898  * |	+---------------------+
1899  * |	+ pre_ct/pre_ct_nat   +  if matches     +----------------------+
1900  * |	+ zone+nat match      +---------------->+ post_act (see below) +
1901  * |	+---------------------+  set zone       +----------------------+
1902  * |		 |
1903  * +-------------+ set zone
1904  *		 |
1905  *		 v
1906  *	+--------------------+
1907  *	+ CT (nat or no nat) +
1908  *	+ tuple + zone match +
1909  *	+--------------------+
1910  *		 | set mark
1911  *		 | set labels_id
1912  *		 | set established
1913  *		 | set zone_restore
1914  *		 | do nat (if needed)
1915  *		 v
1916  *	+--------------+
1917  *	+ post_act     + rest of parsed filter's actions
1918  *	+ fte_id match +------------------------>
1919  *	+--------------+
1920  *
1921  */
1922 static int
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr)1923 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1924 			  struct mlx5_flow_attr *attr)
1925 {
1926 	bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1927 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1928 	int act_miss_mapping = 0, err;
1929 	struct mlx5_ct_ft *ft;
1930 	u16 zone;
1931 
1932 	/* Register for CT established events */
1933 	ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1934 				  attr->ct_attr.nf_ft);
1935 	if (IS_ERR(ft)) {
1936 		err = PTR_ERR(ft);
1937 		ct_dbg("Failed to register to ft callback");
1938 		goto err_ft;
1939 	}
1940 	attr->ct_attr.ft = ft;
1941 
1942 	err = mlx5e_tc_action_miss_mapping_get(ct_priv->priv, attr, attr->ct_attr.act_miss_cookie,
1943 					       &act_miss_mapping);
1944 	if (err) {
1945 		ct_dbg("Failed to get register mapping for act miss");
1946 		goto err_get_act_miss;
1947 	}
1948 
1949 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts,
1950 					ct_priv->ns_type, MAPPED_OBJ_TO_REG, act_miss_mapping);
1951 	if (err) {
1952 		ct_dbg("Failed to set act miss register mapping");
1953 		goto err_mapping;
1954 	}
1955 
1956 	/* Chain 0 sets the zone and jumps to ct table
1957 	 * Other chains jump to pre_ct table to align with act_ct cached logic
1958 	 */
1959 	if (!attr->chain) {
1960 		zone = ft->zone & MLX5_CT_ZONE_MASK;
1961 		err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts,
1962 						ct_priv->ns_type, ZONE_TO_REG, zone);
1963 		if (err) {
1964 			ct_dbg("Failed to set zone register mapping");
1965 			goto err_mapping;
1966 		}
1967 
1968 		attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1969 	} else {
1970 		attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1971 	}
1972 
1973 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1974 	attr->ct_attr.act_miss_mapping = act_miss_mapping;
1975 
1976 	return 0;
1977 
1978 err_mapping:
1979 	mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, act_miss_mapping);
1980 err_get_act_miss:
1981 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1982 err_ft:
1983 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1984 	return err;
1985 }
1986 
1987 int
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr)1988 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr)
1989 {
1990 	int err;
1991 
1992 	if (!priv)
1993 		return -EOPNOTSUPP;
1994 
1995 	if (attr->ct_attr.offloaded)
1996 		return 0;
1997 
1998 	if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) {
1999 		err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts,
2000 						     0, 0, 0, 0);
2001 		if (err)
2002 			return err;
2003 
2004 		attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2005 	}
2006 
2007 	if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */
2008 		attr->ct_attr.offloaded = true;
2009 		return 0;
2010 	}
2011 
2012 	mutex_lock(&priv->control_lock);
2013 	err = __mlx5_tc_ct_flow_offload(priv, attr);
2014 	if (!err)
2015 		attr->ct_attr.offloaded = true;
2016 	mutex_unlock(&priv->control_lock);
2017 
2018 	return err;
2019 }
2020 
2021 static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr)2022 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
2023 			 struct mlx5_flow_attr *attr)
2024 {
2025 	mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, attr->ct_attr.act_miss_mapping);
2026 	mlx5_tc_ct_del_ft_cb(ct_priv, attr->ct_attr.ft);
2027 }
2028 
2029 void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr)2030 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
2031 		       struct mlx5_flow_attr *attr)
2032 {
2033 	if (!attr->ct_attr.offloaded) /* no ct action, return */
2034 		return;
2035 	if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
2036 		return;
2037 
2038 	mutex_lock(&priv->control_lock);
2039 	__mlx5_tc_ct_delete_flow(priv, attr);
2040 	mutex_unlock(&priv->control_lock);
2041 }
2042 
2043 static int
mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv * ct_priv)2044 mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
2045 {
2046 	struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
2047 	struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
2048 	int err;
2049 
2050 	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
2051 	    ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
2052 		ct_dbg("Using SMFS ct flow steering provider");
2053 		fs_ops = mlx5_ct_fs_smfs_ops_get();
2054 	}
2055 
2056 	ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
2057 	if (!ct_priv->fs)
2058 		return -ENOMEM;
2059 
2060 	ct_priv->fs->netdev = ct_priv->netdev;
2061 	ct_priv->fs->dev = ct_priv->dev;
2062 	ct_priv->fs_ops = fs_ops;
2063 
2064 	err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
2065 	if (err)
2066 		goto err_init;
2067 
2068 	return 0;
2069 
2070 err_init:
2071 	kfree(ct_priv->fs);
2072 	return err;
2073 }
2074 
2075 static int
mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch * esw,const char ** err_msg)2076 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2077 				  const char **err_msg)
2078 {
2079 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
2080 		/* vlan workaround should be avoided for multi chain rules.
2081 		 * This is just a sanity check as pop vlan action should
2082 		 * be supported by any FW that supports ignore_flow_level
2083 		 */
2084 
2085 		*err_msg = "firmware vlan actions support is missing";
2086 		return -EOPNOTSUPP;
2087 	}
2088 
2089 	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2090 				    fdb_modify_header_fwd_to_table)) {
2091 		/* CT always writes to registers which are mod header actions.
2092 		 * Therefore, mod header and goto is required
2093 		 */
2094 
2095 		*err_msg = "firmware fwd and modify support is missing";
2096 		return -EOPNOTSUPP;
2097 	}
2098 
2099 	if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2100 		*err_msg = "register loopback isn't supported";
2101 		return -EOPNOTSUPP;
2102 	}
2103 
2104 	return 0;
2105 }
2106 
2107 static int
mlx5_tc_ct_init_check_support(struct mlx5e_priv * priv,enum mlx5_flow_namespace_type ns_type,struct mlx5e_post_act * post_act)2108 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2109 			      enum mlx5_flow_namespace_type ns_type,
2110 			      struct mlx5e_post_act *post_act)
2111 {
2112 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2113 	const char *err_msg = NULL;
2114 	int err = 0;
2115 
2116 	if (IS_ERR_OR_NULL(post_act)) {
2117 		/* Ignore_flow_level support isn't supported by default for VFs and so post_act
2118 		 * won't be supported. Skip showing error msg.
2119 		 */
2120 		if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
2121 			err_msg = "post action is missing";
2122 		err = -EOPNOTSUPP;
2123 		goto out_err;
2124 	}
2125 
2126 	if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2127 		err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg);
2128 
2129 out_err:
2130 	if (err && err_msg)
2131 		netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg);
2132 	return err;
2133 }
2134 
2135 static void
mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv * ct_priv)2136 mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2137 {
2138 	struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
2139 
2140 	ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev));
2141 	debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
2142 				&ct_dbgfs->stats.offloaded);
2143 	debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
2144 				&ct_dbgfs->stats.rx_dropped);
2145 }
2146 
2147 static void
mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv * ct_priv)2148 mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2149 {
2150 	debugfs_remove_recursive(ct_priv->debugfs.root);
2151 }
2152 
2153 #define INIT_ERR_PREFIX "tc ct offload init failed"
2154 
2155 struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv * priv,struct mlx5_fs_chains * chains,struct mod_hdr_tbl * mod_hdr,enum mlx5_flow_namespace_type ns_type,struct mlx5e_post_act * post_act)2156 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2157 		struct mod_hdr_tbl *mod_hdr,
2158 		enum mlx5_flow_namespace_type ns_type,
2159 		struct mlx5e_post_act *post_act)
2160 {
2161 	struct mlx5_tc_ct_priv *ct_priv;
2162 	struct mlx5_core_dev *dev;
2163 	u64 mapping_id;
2164 	int err;
2165 
2166 	dev = priv->mdev;
2167 	err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act);
2168 	if (err)
2169 		goto err_support;
2170 
2171 	ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2172 	if (!ct_priv)
2173 		goto err_alloc;
2174 
2175 	mapping_id = mlx5_query_nic_system_image_guid(dev);
2176 
2177 	ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
2178 						      sizeof(u16), 0, true);
2179 	if (IS_ERR(ct_priv->zone_mapping)) {
2180 		err = PTR_ERR(ct_priv->zone_mapping);
2181 		goto err_mapping_zone;
2182 	}
2183 
2184 	ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
2185 							sizeof(u32) * 4, 0, true);
2186 	if (IS_ERR(ct_priv->labels_mapping)) {
2187 		err = PTR_ERR(ct_priv->labels_mapping);
2188 		goto err_mapping_labels;
2189 	}
2190 
2191 	spin_lock_init(&ct_priv->ht_lock);
2192 	ct_priv->priv = priv;
2193 	ct_priv->ns_type = ns_type;
2194 	ct_priv->chains = chains;
2195 	ct_priv->netdev = priv->netdev;
2196 	ct_priv->dev = priv->mdev;
2197 	ct_priv->mod_hdr_tbl = mod_hdr;
2198 	ct_priv->ct = mlx5_chains_create_global_table(chains);
2199 	if (IS_ERR(ct_priv->ct)) {
2200 		err = PTR_ERR(ct_priv->ct);
2201 		mlx5_core_warn(dev,
2202 			       "%s, failed to create ct table err: %d\n",
2203 			       INIT_ERR_PREFIX, err);
2204 		goto err_ct_tbl;
2205 	}
2206 
2207 	ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2208 	if (IS_ERR(ct_priv->ct_nat)) {
2209 		err = PTR_ERR(ct_priv->ct_nat);
2210 		mlx5_core_warn(dev,
2211 			       "%s, failed to create ct nat table err: %d\n",
2212 			       INIT_ERR_PREFIX, err);
2213 		goto err_ct_nat_tbl;
2214 	}
2215 
2216 	ct_priv->post_act = post_act;
2217 	mutex_init(&ct_priv->control_lock);
2218 	if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
2219 		goto err_ct_zone_ht;
2220 	if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params))
2221 		goto err_ct_tuples_ht;
2222 	if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
2223 		goto err_ct_tuples_nat_ht;
2224 
2225 	ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
2226 	if (!ct_priv->wq) {
2227 		err = -ENOMEM;
2228 		goto err_wq;
2229 	}
2230 
2231 	err = mlx5_tc_ct_fs_init(ct_priv);
2232 	if (err)
2233 		goto err_init_fs;
2234 
2235 	mlx5_ct_tc_create_dbgfs(ct_priv);
2236 	return ct_priv;
2237 
2238 err_init_fs:
2239 	destroy_workqueue(ct_priv->wq);
2240 err_wq:
2241 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2242 err_ct_tuples_nat_ht:
2243 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
2244 err_ct_tuples_ht:
2245 	rhashtable_destroy(&ct_priv->zone_ht);
2246 err_ct_zone_ht:
2247 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2248 err_ct_nat_tbl:
2249 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2250 err_ct_tbl:
2251 	mapping_destroy(ct_priv->labels_mapping);
2252 err_mapping_labels:
2253 	mapping_destroy(ct_priv->zone_mapping);
2254 err_mapping_zone:
2255 	kfree(ct_priv);
2256 err_alloc:
2257 err_support:
2258 
2259 	return NULL;
2260 }
2261 
2262 void
mlx5_tc_ct_clean(struct mlx5_tc_ct_priv * ct_priv)2263 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2264 {
2265 	struct mlx5_fs_chains *chains;
2266 
2267 	if (!ct_priv)
2268 		return;
2269 
2270 	destroy_workqueue(ct_priv->wq);
2271 	mlx5_ct_tc_remove_dbgfs(ct_priv);
2272 	chains = ct_priv->chains;
2273 
2274 	ct_priv->fs_ops->destroy(ct_priv->fs);
2275 	kfree(ct_priv->fs);
2276 
2277 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2278 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2279 	mapping_destroy(ct_priv->zone_mapping);
2280 	mapping_destroy(ct_priv->labels_mapping);
2281 
2282 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
2283 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2284 	rhashtable_destroy(&ct_priv->zone_ht);
2285 	mutex_destroy(&ct_priv->control_lock);
2286 	kfree(ct_priv);
2287 }
2288 
2289 bool
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv * ct_priv,struct sk_buff * skb,u8 zone_restore_id)2290 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2291 			 struct sk_buff *skb, u8 zone_restore_id)
2292 {
2293 	struct mlx5_ct_tuple tuple = {};
2294 	struct mlx5_ct_entry *entry;
2295 	u16 zone;
2296 
2297 	if (!ct_priv || !zone_restore_id)
2298 		return true;
2299 
2300 	if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2301 		goto out_inc_drop;
2302 
2303 	if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2304 		goto out_inc_drop;
2305 
2306 	spin_lock(&ct_priv->ht_lock);
2307 
2308 	entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2309 	if (!entry) {
2310 		spin_unlock(&ct_priv->ht_lock);
2311 		goto out_inc_drop;
2312 	}
2313 
2314 	if (IS_ERR(entry)) {
2315 		spin_unlock(&ct_priv->ht_lock);
2316 		goto out_inc_drop;
2317 	}
2318 	spin_unlock(&ct_priv->ht_lock);
2319 
2320 	tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2321 	__mlx5_tc_ct_entry_put(entry);
2322 
2323 	return true;
2324 
2325 out_inc_drop:
2326 	atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
2327 	return false;
2328 }
2329