1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17 #include <linux/if_macvlan.h>
18 #include <linux/debugfs.h>
19
20 #include "lib/fs_chains.h"
21 #include "en/tc_ct.h"
22 #include "en/tc/ct_fs.h"
23 #include "en/tc_priv.h"
24 #include "en/mod_hdr.h"
25 #include "en/mapping.h"
26 #include "en/tc/post_act.h"
27 #include "en.h"
28 #include "en_tc.h"
29 #include "en_rep.h"
30 #include "fs_core.h"
31
32 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
33 #define MLX5_CT_STATE_TRK_BIT BIT(2)
34 #define MLX5_CT_STATE_NAT_BIT BIT(3)
35 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
36 #define MLX5_CT_STATE_RELATED_BIT BIT(5)
37 #define MLX5_CT_STATE_INVALID_BIT BIT(6)
38 #define MLX5_CT_STATE_NEW_BIT BIT(7)
39
40 #define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG)
41 #define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG)
42
43 /* Statically allocate modify actions for
44 * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
45 * This will be increased dynamically if needed (for the ipv6 snat + dnat).
46 */
47 #define MLX5_CT_MIN_MOD_ACTS 10
48
49 #define ct_dbg(fmt, args...)\
50 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
51
52 struct mlx5_tc_ct_debugfs {
53 struct {
54 atomic_t offloaded;
55 atomic_t rx_dropped;
56 } stats;
57
58 struct dentry *root;
59 };
60
61 struct mlx5_tc_ct_priv {
62 struct mlx5_core_dev *dev;
63 struct mlx5e_priv *priv;
64 const struct net_device *netdev;
65 struct mod_hdr_tbl *mod_hdr_tbl;
66 struct xarray tuple_ids;
67 struct rhashtable zone_ht;
68 struct rhashtable ct_tuples_ht;
69 struct rhashtable ct_tuples_nat_ht;
70 struct mlx5_flow_table *ct;
71 struct mlx5_flow_table *ct_nat;
72 struct mlx5e_post_act *post_act;
73 struct mutex control_lock; /* guards parallel adds/dels */
74 struct mapping_ctx *zone_mapping;
75 struct mapping_ctx *labels_mapping;
76 enum mlx5_flow_namespace_type ns_type;
77 struct mlx5_fs_chains *chains;
78 struct mlx5_ct_fs *fs;
79 struct mlx5_ct_fs_ops *fs_ops;
80 spinlock_t ht_lock; /* protects ft entries */
81 struct workqueue_struct *wq;
82
83 struct mlx5_tc_ct_debugfs debugfs;
84 };
85
86 struct mlx5_ct_zone_rule {
87 struct mlx5_ct_fs_rule *rule;
88 struct mlx5e_mod_hdr_handle *mh;
89 struct mlx5_flow_attr *attr;
90 bool nat;
91 };
92
93 struct mlx5_tc_ct_pre {
94 struct mlx5_flow_table *ft;
95 struct mlx5_flow_group *flow_grp;
96 struct mlx5_flow_group *miss_grp;
97 struct mlx5_flow_handle *flow_rule;
98 struct mlx5_flow_handle *miss_rule;
99 struct mlx5_modify_hdr *modify_hdr;
100 };
101
102 struct mlx5_ct_ft {
103 struct rhash_head node;
104 u16 zone;
105 u32 zone_restore_id;
106 refcount_t refcount;
107 struct nf_flowtable *nf_ft;
108 struct mlx5_tc_ct_priv *ct_priv;
109 struct rhashtable ct_entries_ht;
110 struct mlx5_tc_ct_pre pre_ct;
111 struct mlx5_tc_ct_pre pre_ct_nat;
112 };
113
114 struct mlx5_ct_tuple {
115 u16 addr_type;
116 __be16 n_proto;
117 u8 ip_proto;
118 struct {
119 union {
120 __be32 src_v4;
121 struct in6_addr src_v6;
122 };
123 union {
124 __be32 dst_v4;
125 struct in6_addr dst_v6;
126 };
127 } ip;
128 struct {
129 __be16 src;
130 __be16 dst;
131 } port;
132
133 u16 zone;
134 };
135
136 struct mlx5_ct_counter {
137 struct mlx5_fc *counter;
138 refcount_t refcount;
139 bool is_shared;
140 };
141
142 enum {
143 MLX5_CT_ENTRY_FLAG_VALID,
144 };
145
146 struct mlx5_ct_entry {
147 struct rhash_head node;
148 struct rhash_head tuple_node;
149 struct rhash_head tuple_nat_node;
150 struct mlx5_ct_counter *counter;
151 unsigned long cookie;
152 unsigned long restore_cookie;
153 struct mlx5_ct_tuple tuple;
154 struct mlx5_ct_tuple tuple_nat;
155 struct mlx5_ct_zone_rule zone_rules[2];
156
157 struct mlx5_tc_ct_priv *ct_priv;
158 struct work_struct work;
159
160 refcount_t refcnt;
161 unsigned long flags;
162 };
163
164 static void
165 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
166 struct mlx5_flow_attr *attr,
167 struct mlx5e_mod_hdr_handle *mh);
168
169 static const struct rhashtable_params cts_ht_params = {
170 .head_offset = offsetof(struct mlx5_ct_entry, node),
171 .key_offset = offsetof(struct mlx5_ct_entry, cookie),
172 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
173 .automatic_shrinking = true,
174 .min_size = 16 * 1024,
175 };
176
177 static const struct rhashtable_params zone_params = {
178 .head_offset = offsetof(struct mlx5_ct_ft, node),
179 .key_offset = offsetof(struct mlx5_ct_ft, zone),
180 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
181 .automatic_shrinking = true,
182 };
183
184 static const struct rhashtable_params tuples_ht_params = {
185 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
186 .key_offset = offsetof(struct mlx5_ct_entry, tuple),
187 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
188 .automatic_shrinking = true,
189 .min_size = 16 * 1024,
190 };
191
192 static const struct rhashtable_params tuples_nat_ht_params = {
193 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
194 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
195 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
196 .automatic_shrinking = true,
197 .min_size = 16 * 1024,
198 };
199
200 static bool
mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry * entry)201 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
202 {
203 return !!(entry->tuple_nat_node.next);
204 }
205
206 static int
mlx5_get_label_mapping(struct mlx5_tc_ct_priv * ct_priv,u32 * labels,u32 * id)207 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
208 u32 *labels, u32 *id)
209 {
210 if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
211 *id = 0;
212 return 0;
213 }
214
215 if (mapping_add(ct_priv->labels_mapping, labels, id))
216 return -EOPNOTSUPP;
217
218 return 0;
219 }
220
221 static void
mlx5_put_label_mapping(struct mlx5_tc_ct_priv * ct_priv,u32 id)222 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
223 {
224 if (id)
225 mapping_remove(ct_priv->labels_mapping, id);
226 }
227
228 static int
mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)229 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
230 {
231 struct flow_match_control control;
232 struct flow_match_basic basic;
233
234 flow_rule_match_basic(rule, &basic);
235 flow_rule_match_control(rule, &control);
236
237 tuple->n_proto = basic.key->n_proto;
238 tuple->ip_proto = basic.key->ip_proto;
239 tuple->addr_type = control.key->addr_type;
240
241 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
242 struct flow_match_ipv4_addrs match;
243
244 flow_rule_match_ipv4_addrs(rule, &match);
245 tuple->ip.src_v4 = match.key->src;
246 tuple->ip.dst_v4 = match.key->dst;
247 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
248 struct flow_match_ipv6_addrs match;
249
250 flow_rule_match_ipv6_addrs(rule, &match);
251 tuple->ip.src_v6 = match.key->src;
252 tuple->ip.dst_v6 = match.key->dst;
253 } else {
254 return -EOPNOTSUPP;
255 }
256
257 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
258 struct flow_match_ports match;
259
260 flow_rule_match_ports(rule, &match);
261 switch (tuple->ip_proto) {
262 case IPPROTO_TCP:
263 case IPPROTO_UDP:
264 tuple->port.src = match.key->src;
265 tuple->port.dst = match.key->dst;
266 break;
267 default:
268 return -EOPNOTSUPP;
269 }
270 } else {
271 if (tuple->ip_proto != IPPROTO_GRE)
272 return -EOPNOTSUPP;
273 }
274
275 return 0;
276 }
277
278 static int
mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)279 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
280 struct flow_rule *rule)
281 {
282 struct flow_action *flow_action = &rule->action;
283 struct flow_action_entry *act;
284 u32 offset, val, ip6_offset;
285 int i;
286
287 flow_action_for_each(i, act, flow_action) {
288 if (act->id != FLOW_ACTION_MANGLE)
289 continue;
290
291 offset = act->mangle.offset;
292 val = act->mangle.val;
293 switch (act->mangle.htype) {
294 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
295 if (offset == offsetof(struct iphdr, saddr))
296 tuple->ip.src_v4 = cpu_to_be32(val);
297 else if (offset == offsetof(struct iphdr, daddr))
298 tuple->ip.dst_v4 = cpu_to_be32(val);
299 else
300 return -EOPNOTSUPP;
301 break;
302
303 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
304 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
305 ip6_offset /= 4;
306 if (ip6_offset < 4)
307 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
308 else if (ip6_offset < 8)
309 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
310 else
311 return -EOPNOTSUPP;
312 break;
313
314 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
315 if (offset == offsetof(struct tcphdr, source))
316 tuple->port.src = cpu_to_be16(val);
317 else if (offset == offsetof(struct tcphdr, dest))
318 tuple->port.dst = cpu_to_be16(val);
319 else
320 return -EOPNOTSUPP;
321 break;
322
323 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
324 if (offset == offsetof(struct udphdr, source))
325 tuple->port.src = cpu_to_be16(val);
326 else if (offset == offsetof(struct udphdr, dest))
327 tuple->port.dst = cpu_to_be16(val);
328 else
329 return -EOPNOTSUPP;
330 break;
331
332 default:
333 return -EOPNOTSUPP;
334 }
335 }
336
337 return 0;
338 }
339
340 static int
mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv * ct_priv,struct net_device * ndev)341 mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
342 struct net_device *ndev)
343 {
344 struct mlx5e_priv *other_priv = netdev_priv(ndev);
345 struct mlx5_core_dev *mdev = ct_priv->dev;
346 bool vf_rep, uplink_rep;
347
348 vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
349 uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
350
351 if (vf_rep)
352 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
353 if (uplink_rep)
354 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
355 if (is_vlan_dev(ndev))
356 return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
357 if (netif_is_macvlan(ndev))
358 return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
359 if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
360 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
361
362 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
363 }
364
365 static int
mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_spec * spec,struct flow_rule * rule)366 mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
367 struct mlx5_flow_spec *spec,
368 struct flow_rule *rule)
369 {
370 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
371 outer_headers);
372 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
373 outer_headers);
374 u16 addr_type = 0;
375 u8 ip_proto = 0;
376
377 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
378 struct flow_match_basic match;
379
380 flow_rule_match_basic(rule, &match);
381
382 mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
383 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
384 match.mask->ip_proto);
385 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
386 match.key->ip_proto);
387
388 ip_proto = match.key->ip_proto;
389 }
390
391 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
392 struct flow_match_control match;
393
394 flow_rule_match_control(rule, &match);
395 addr_type = match.key->addr_type;
396 }
397
398 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
399 struct flow_match_ipv4_addrs match;
400
401 flow_rule_match_ipv4_addrs(rule, &match);
402 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
403 src_ipv4_src_ipv6.ipv4_layout.ipv4),
404 &match.mask->src, sizeof(match.mask->src));
405 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
406 src_ipv4_src_ipv6.ipv4_layout.ipv4),
407 &match.key->src, sizeof(match.key->src));
408 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
409 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
410 &match.mask->dst, sizeof(match.mask->dst));
411 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
412 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
413 &match.key->dst, sizeof(match.key->dst));
414 }
415
416 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
417 struct flow_match_ipv6_addrs match;
418
419 flow_rule_match_ipv6_addrs(rule, &match);
420 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
421 src_ipv4_src_ipv6.ipv6_layout.ipv6),
422 &match.mask->src, sizeof(match.mask->src));
423 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
424 src_ipv4_src_ipv6.ipv6_layout.ipv6),
425 &match.key->src, sizeof(match.key->src));
426
427 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
428 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
429 &match.mask->dst, sizeof(match.mask->dst));
430 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
431 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
432 &match.key->dst, sizeof(match.key->dst));
433 }
434
435 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
436 struct flow_match_ports match;
437
438 flow_rule_match_ports(rule, &match);
439 switch (ip_proto) {
440 case IPPROTO_TCP:
441 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
442 tcp_sport, ntohs(match.mask->src));
443 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
444 tcp_sport, ntohs(match.key->src));
445
446 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
447 tcp_dport, ntohs(match.mask->dst));
448 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
449 tcp_dport, ntohs(match.key->dst));
450 break;
451
452 case IPPROTO_UDP:
453 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
454 udp_sport, ntohs(match.mask->src));
455 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
456 udp_sport, ntohs(match.key->src));
457
458 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
459 udp_dport, ntohs(match.mask->dst));
460 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
461 udp_dport, ntohs(match.key->dst));
462 break;
463 default:
464 break;
465 }
466 }
467
468 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
469 struct flow_match_tcp match;
470
471 flow_rule_match_tcp(rule, &match);
472 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
473 ntohs(match.mask->flags));
474 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
475 ntohs(match.key->flags));
476 }
477
478 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
479 struct flow_match_meta match;
480
481 flow_rule_match_meta(rule, &match);
482
483 if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
484 struct net_device *dev;
485
486 dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
487 if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
488 spec->flow_context.flow_source =
489 mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
490
491 dev_put(dev);
492 }
493 }
494
495 return 0;
496 }
497
498 static void
mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)499 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
500 {
501 if (entry->counter->is_shared &&
502 !refcount_dec_and_test(&entry->counter->refcount))
503 return;
504
505 mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
506 kfree(entry->counter);
507 }
508
509 static void
mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry,bool nat)510 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
511 struct mlx5_ct_entry *entry,
512 bool nat)
513 {
514 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
515 struct mlx5_flow_attr *attr = zone_rule->attr;
516
517 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
518
519 ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
520 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
521 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
522 kfree(attr);
523 }
524
525 static void
mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)526 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
527 struct mlx5_ct_entry *entry)
528 {
529 mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
530 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
531
532 atomic_dec(&ct_priv->debugfs.stats.offloaded);
533 }
534
535 static struct flow_action_entry *
mlx5_tc_ct_get_ct_metadata_action(struct flow_rule * flow_rule)536 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
537 {
538 struct flow_action *flow_action = &flow_rule->action;
539 struct flow_action_entry *act;
540 int i;
541
542 flow_action_for_each(i, act, flow_action) {
543 if (act->id == FLOW_ACTION_CT_METADATA)
544 return act;
545 }
546
547 return NULL;
548 }
549
550 static int
mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_mod_hdr_acts * mod_acts,u8 ct_state,u32 mark,u32 labels_id,u8 zone_restore_id)551 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
552 struct mlx5e_tc_mod_hdr_acts *mod_acts,
553 u8 ct_state,
554 u32 mark,
555 u32 labels_id,
556 u8 zone_restore_id)
557 {
558 enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
559 struct mlx5_core_dev *dev = ct_priv->dev;
560 int err;
561
562 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
563 CTSTATE_TO_REG, ct_state);
564 if (err)
565 return err;
566
567 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
568 MARK_TO_REG, mark);
569 if (err)
570 return err;
571
572 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
573 LABELS_TO_REG, labels_id);
574 if (err)
575 return err;
576
577 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
578 ZONE_RESTORE_TO_REG, zone_restore_id);
579 if (err)
580 return err;
581
582 /* Make another copy of zone id in reg_b for
583 * NIC rx flows since we don't copy reg_c1 to
584 * reg_b upon miss.
585 */
586 if (ns != MLX5_FLOW_NAMESPACE_FDB) {
587 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
588 NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
589 if (err)
590 return err;
591 }
592 return 0;
593 }
594
595 static int
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry * act,char * modact)596 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
597 char *modact)
598 {
599 u32 offset = act->mangle.offset, field;
600
601 switch (act->mangle.htype) {
602 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
603 MLX5_SET(set_action_in, modact, length, 0);
604 if (offset == offsetof(struct iphdr, saddr))
605 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
606 else if (offset == offsetof(struct iphdr, daddr))
607 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
608 else
609 return -EOPNOTSUPP;
610 break;
611
612 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
613 MLX5_SET(set_action_in, modact, length, 0);
614 if (offset == offsetof(struct ipv6hdr, saddr) + 12)
615 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
616 else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
617 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
618 else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
619 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
620 else if (offset == offsetof(struct ipv6hdr, saddr))
621 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
622 else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
623 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
624 else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
625 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
626 else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
627 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
628 else if (offset == offsetof(struct ipv6hdr, daddr))
629 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
630 else
631 return -EOPNOTSUPP;
632 break;
633
634 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
635 MLX5_SET(set_action_in, modact, length, 16);
636 if (offset == offsetof(struct tcphdr, source))
637 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
638 else if (offset == offsetof(struct tcphdr, dest))
639 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
640 else
641 return -EOPNOTSUPP;
642 break;
643
644 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
645 MLX5_SET(set_action_in, modact, length, 16);
646 if (offset == offsetof(struct udphdr, source))
647 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
648 else if (offset == offsetof(struct udphdr, dest))
649 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
650 else
651 return -EOPNOTSUPP;
652 break;
653
654 default:
655 return -EOPNOTSUPP;
656 }
657
658 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
659 MLX5_SET(set_action_in, modact, offset, 0);
660 MLX5_SET(set_action_in, modact, field, field);
661 MLX5_SET(set_action_in, modact, data, act->mangle.val);
662
663 return 0;
664 }
665
666 static int
mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5e_tc_mod_hdr_acts * mod_acts)667 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
668 struct flow_rule *flow_rule,
669 struct mlx5e_tc_mod_hdr_acts *mod_acts)
670 {
671 struct flow_action *flow_action = &flow_rule->action;
672 struct mlx5_core_dev *mdev = ct_priv->dev;
673 struct flow_action_entry *act;
674 char *modact;
675 int err, i;
676
677 flow_action_for_each(i, act, flow_action) {
678 switch (act->id) {
679 case FLOW_ACTION_MANGLE: {
680 modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
681 if (IS_ERR(modact))
682 return PTR_ERR(modact);
683
684 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
685 if (err)
686 return err;
687
688 mod_acts->num_actions++;
689 }
690 break;
691
692 case FLOW_ACTION_CT_METADATA:
693 /* Handled earlier */
694 continue;
695 default:
696 return -EOPNOTSUPP;
697 }
698 }
699
700 return 0;
701 }
702
703 static int
mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr,struct flow_rule * flow_rule,struct mlx5e_mod_hdr_handle ** mh,u8 zone_restore_id,bool nat_table,bool has_nat)704 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
705 struct mlx5_flow_attr *attr,
706 struct flow_rule *flow_rule,
707 struct mlx5e_mod_hdr_handle **mh,
708 u8 zone_restore_id, bool nat_table, bool has_nat)
709 {
710 DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
711 DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
712 struct flow_action_entry *meta;
713 enum ip_conntrack_info ctinfo;
714 u16 ct_state = 0;
715 int err;
716
717 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
718 if (!meta)
719 return -EOPNOTSUPP;
720 ctinfo = meta->ct_metadata.cookie & NFCT_INFOMASK;
721
722 err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
723 &attr->ct_attr.ct_labels_id);
724 if (err)
725 return -EOPNOTSUPP;
726 if (nat_table) {
727 if (has_nat) {
728 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
729 if (err)
730 goto err_mapping;
731 }
732
733 ct_state |= MLX5_CT_STATE_NAT_BIT;
734 }
735
736 ct_state |= MLX5_CT_STATE_TRK_BIT;
737 ct_state |= ctinfo == IP_CT_NEW ? MLX5_CT_STATE_NEW_BIT : MLX5_CT_STATE_ESTABLISHED_BIT;
738 ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
739 err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
740 ct_state,
741 meta->ct_metadata.mark,
742 attr->ct_attr.ct_labels_id,
743 zone_restore_id);
744 if (err)
745 goto err_mapping;
746
747 if (nat_table && has_nat) {
748 attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
749 mod_acts.num_actions,
750 mod_acts.actions);
751 if (IS_ERR(attr->modify_hdr)) {
752 err = PTR_ERR(attr->modify_hdr);
753 goto err_mapping;
754 }
755
756 *mh = NULL;
757 } else {
758 *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
759 ct_priv->mod_hdr_tbl,
760 ct_priv->ns_type,
761 &mod_acts);
762 if (IS_ERR(*mh)) {
763 err = PTR_ERR(*mh);
764 goto err_mapping;
765 }
766 attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
767 }
768
769 mlx5e_mod_hdr_dealloc(&mod_acts);
770 return 0;
771
772 err_mapping:
773 mlx5e_mod_hdr_dealloc(&mod_acts);
774 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
775 return err;
776 }
777
778 static void
mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr,struct mlx5e_mod_hdr_handle * mh)779 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
780 struct mlx5_flow_attr *attr,
781 struct mlx5e_mod_hdr_handle *mh)
782 {
783 if (mh)
784 mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
785 else
786 mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
787 }
788
789 static int
mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,bool nat,u8 zone_restore_id)790 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
791 struct flow_rule *flow_rule,
792 struct mlx5_ct_entry *entry,
793 bool nat, u8 zone_restore_id)
794 {
795 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
796 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
797 struct mlx5_flow_spec *spec = NULL;
798 struct mlx5_flow_attr *attr;
799 int err;
800
801 zone_rule->nat = nat;
802
803 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
804 if (!spec)
805 return -ENOMEM;
806
807 attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
808 if (!attr) {
809 err = -ENOMEM;
810 goto err_attr;
811 }
812
813 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
814 &zone_rule->mh,
815 zone_restore_id,
816 nat,
817 mlx5_tc_ct_entry_has_nat(entry));
818 if (err) {
819 ct_dbg("Failed to create ct entry mod hdr");
820 goto err_mod_hdr;
821 }
822
823 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
824 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
825 MLX5_FLOW_CONTEXT_ACTION_COUNT;
826 attr->dest_chain = 0;
827 attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
828 attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
829 if (entry->tuple.ip_proto == IPPROTO_TCP ||
830 entry->tuple.ip_proto == IPPROTO_UDP)
831 attr->outer_match_level = MLX5_MATCH_L4;
832 else
833 attr->outer_match_level = MLX5_MATCH_L3;
834 attr->counter = entry->counter->counter;
835 attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
836 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
837 attr->esw_attr->in_mdev = priv->mdev;
838
839 mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
840 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
841
842 zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
843 if (IS_ERR(zone_rule->rule)) {
844 err = PTR_ERR(zone_rule->rule);
845 ct_dbg("Failed to add ct entry rule, nat: %d", nat);
846 goto err_rule;
847 }
848
849 zone_rule->attr = attr;
850
851 kvfree(spec);
852 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
853
854 return 0;
855
856 err_rule:
857 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
858 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
859 err_mod_hdr:
860 kfree(attr);
861 err_attr:
862 kvfree(spec);
863 return err;
864 }
865
866 static int
mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,bool nat,u8 zone_restore_id)867 mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv,
868 struct flow_rule *flow_rule,
869 struct mlx5_ct_entry *entry,
870 bool nat, u8 zone_restore_id)
871 {
872 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
873 struct mlx5_flow_attr *attr = zone_rule->attr, *old_attr;
874 struct mlx5e_mod_hdr_handle *mh;
875 struct mlx5_ct_fs_rule *rule;
876 struct mlx5_flow_spec *spec;
877 int err;
878
879 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
880 if (!spec)
881 return -ENOMEM;
882
883 old_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
884 if (!old_attr) {
885 err = -ENOMEM;
886 goto err_attr;
887 }
888 *old_attr = *attr;
889
890 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &mh, zone_restore_id,
891 nat, mlx5_tc_ct_entry_has_nat(entry));
892 if (err) {
893 ct_dbg("Failed to create ct entry mod hdr");
894 goto err_mod_hdr;
895 }
896
897 mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
898 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
899
900 rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
901 if (IS_ERR(rule)) {
902 err = PTR_ERR(rule);
903 ct_dbg("Failed to add replacement ct entry rule, nat: %d", nat);
904 goto err_rule;
905 }
906
907 ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
908 zone_rule->rule = rule;
909 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, old_attr, zone_rule->mh);
910 zone_rule->mh = mh;
911 mlx5_put_label_mapping(ct_priv, old_attr->ct_attr.ct_labels_id);
912
913 kfree(old_attr);
914 kvfree(spec);
915 ct_dbg("Replaced ct entry rule in zone %d", entry->tuple.zone);
916
917 return 0;
918
919 err_rule:
920 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, mh);
921 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
922 err_mod_hdr:
923 *attr = *old_attr;
924 kfree(old_attr);
925 err_attr:
926 kvfree(spec);
927 return err;
928 }
929
930 static bool
mlx5_tc_ct_entry_valid(struct mlx5_ct_entry * entry)931 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
932 {
933 return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
934 }
935
936 static struct mlx5_ct_entry *
mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_tuple * tuple)937 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
938 {
939 struct mlx5_ct_entry *entry;
940
941 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
942 tuples_ht_params);
943 if (entry && mlx5_tc_ct_entry_valid(entry) &&
944 refcount_inc_not_zero(&entry->refcnt)) {
945 return entry;
946 } else if (!entry) {
947 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
948 tuple, tuples_nat_ht_params);
949 if (entry && mlx5_tc_ct_entry_valid(entry) &&
950 refcount_inc_not_zero(&entry->refcnt))
951 return entry;
952 }
953
954 return entry ? ERR_PTR(-EINVAL) : NULL;
955 }
956
mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry * entry)957 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
958 {
959 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
960
961 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
962 &entry->tuple_nat_node,
963 tuples_nat_ht_params);
964 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
965 tuples_ht_params);
966 }
967
mlx5_tc_ct_entry_del(struct mlx5_ct_entry * entry)968 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
969 {
970 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
971
972 mlx5_tc_ct_entry_del_rules(ct_priv, entry);
973
974 spin_lock_bh(&ct_priv->ht_lock);
975 mlx5_tc_ct_entry_remove_from_tuples(entry);
976 spin_unlock_bh(&ct_priv->ht_lock);
977
978 mlx5_tc_ct_counter_put(ct_priv, entry);
979 kfree(entry);
980 }
981
982 static void
mlx5_tc_ct_entry_put(struct mlx5_ct_entry * entry)983 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
984 {
985 if (!refcount_dec_and_test(&entry->refcnt))
986 return;
987
988 mlx5_tc_ct_entry_del(entry);
989 }
990
mlx5_tc_ct_entry_del_work(struct work_struct * work)991 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
992 {
993 struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
994
995 mlx5_tc_ct_entry_del(entry);
996 }
997
998 static void
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry * entry)999 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
1000 {
1001 if (!refcount_dec_and_test(&entry->refcnt))
1002 return;
1003
1004 INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
1005 queue_work(entry->ct_priv->wq, &entry->work);
1006 }
1007
1008 static struct mlx5_ct_counter *
mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv * ct_priv)1009 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
1010 {
1011 struct mlx5_ct_counter *counter;
1012 int ret;
1013
1014 counter = kzalloc(sizeof(*counter), GFP_KERNEL);
1015 if (!counter)
1016 return ERR_PTR(-ENOMEM);
1017
1018 counter->is_shared = false;
1019 counter->counter = mlx5_fc_create_ex(ct_priv->dev, true);
1020 if (IS_ERR(counter->counter)) {
1021 ct_dbg("Failed to create counter for ct entry");
1022 ret = PTR_ERR(counter->counter);
1023 kfree(counter);
1024 return ERR_PTR(ret);
1025 }
1026
1027 return counter;
1028 }
1029
1030 static struct mlx5_ct_counter *
mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)1031 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
1032 struct mlx5_ct_entry *entry)
1033 {
1034 struct mlx5_ct_tuple rev_tuple = entry->tuple;
1035 struct mlx5_ct_counter *shared_counter;
1036 struct mlx5_ct_entry *rev_entry;
1037
1038 /* get the reversed tuple */
1039 swap(rev_tuple.port.src, rev_tuple.port.dst);
1040
1041 if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1042 __be32 tmp_addr = rev_tuple.ip.src_v4;
1043
1044 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
1045 rev_tuple.ip.dst_v4 = tmp_addr;
1046 } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1047 struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
1048
1049 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
1050 rev_tuple.ip.dst_v6 = tmp_addr;
1051 } else {
1052 return ERR_PTR(-EOPNOTSUPP);
1053 }
1054
1055 /* Use the same counter as the reverse direction */
1056 spin_lock_bh(&ct_priv->ht_lock);
1057 rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
1058
1059 if (IS_ERR(rev_entry)) {
1060 spin_unlock_bh(&ct_priv->ht_lock);
1061 goto create_counter;
1062 }
1063
1064 if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
1065 ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
1066 shared_counter = rev_entry->counter;
1067 spin_unlock_bh(&ct_priv->ht_lock);
1068
1069 mlx5_tc_ct_entry_put(rev_entry);
1070 return shared_counter;
1071 }
1072
1073 spin_unlock_bh(&ct_priv->ht_lock);
1074
1075 create_counter:
1076
1077 shared_counter = mlx5_tc_ct_counter_create(ct_priv);
1078 if (IS_ERR(shared_counter))
1079 return shared_counter;
1080
1081 shared_counter->is_shared = true;
1082 refcount_set(&shared_counter->refcount, 1);
1083 return shared_counter;
1084 }
1085
1086 static int
mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,u8 zone_restore_id)1087 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
1088 struct flow_rule *flow_rule,
1089 struct mlx5_ct_entry *entry,
1090 u8 zone_restore_id)
1091 {
1092 int err;
1093
1094 if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
1095 entry->counter = mlx5_tc_ct_counter_create(ct_priv);
1096 else
1097 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
1098
1099 if (IS_ERR(entry->counter)) {
1100 err = PTR_ERR(entry->counter);
1101 return err;
1102 }
1103
1104 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
1105 zone_restore_id);
1106 if (err)
1107 goto err_orig;
1108
1109 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
1110 zone_restore_id);
1111 if (err)
1112 goto err_nat;
1113
1114 atomic_inc(&ct_priv->debugfs.stats.offloaded);
1115 return 0;
1116
1117 err_nat:
1118 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
1119 err_orig:
1120 mlx5_tc_ct_counter_put(ct_priv, entry);
1121 return err;
1122 }
1123
1124 static int
mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,u8 zone_restore_id)1125 mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv *ct_priv,
1126 struct flow_rule *flow_rule,
1127 struct mlx5_ct_entry *entry,
1128 u8 zone_restore_id)
1129 {
1130 int err;
1131
1132 err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, false,
1133 zone_restore_id);
1134 if (err)
1135 return err;
1136
1137 err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, true,
1138 zone_restore_id);
1139 if (err)
1140 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
1141 return err;
1142 }
1143
1144 static int
mlx5_tc_ct_block_flow_offload_replace(struct mlx5_ct_ft * ft,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,unsigned long cookie)1145 mlx5_tc_ct_block_flow_offload_replace(struct mlx5_ct_ft *ft, struct flow_rule *flow_rule,
1146 struct mlx5_ct_entry *entry, unsigned long cookie)
1147 {
1148 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1149 int err;
1150
1151 err = mlx5_tc_ct_entry_replace_rules(ct_priv, flow_rule, entry, ft->zone_restore_id);
1152 if (!err)
1153 return 0;
1154
1155 /* If failed to update the entry, then look it up again under ht_lock
1156 * protection and properly delete it.
1157 */
1158 spin_lock_bh(&ct_priv->ht_lock);
1159 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1160 if (entry) {
1161 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1162 spin_unlock_bh(&ct_priv->ht_lock);
1163 mlx5_tc_ct_entry_put(entry);
1164 } else {
1165 spin_unlock_bh(&ct_priv->ht_lock);
1166 }
1167 return err;
1168 }
1169
1170 static int
mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)1171 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
1172 struct flow_cls_offload *flow)
1173 {
1174 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
1175 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1176 struct flow_action_entry *meta_action;
1177 unsigned long cookie = flow->cookie;
1178 struct mlx5_ct_entry *entry;
1179 int err;
1180
1181 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
1182 if (!meta_action)
1183 return -EOPNOTSUPP;
1184
1185 spin_lock_bh(&ct_priv->ht_lock);
1186 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1187 if (entry && refcount_inc_not_zero(&entry->refcnt)) {
1188 if (entry->restore_cookie == meta_action->ct_metadata.cookie) {
1189 spin_unlock_bh(&ct_priv->ht_lock);
1190 mlx5_tc_ct_entry_put(entry);
1191 return -EEXIST;
1192 }
1193 entry->restore_cookie = meta_action->ct_metadata.cookie;
1194 spin_unlock_bh(&ct_priv->ht_lock);
1195
1196 err = mlx5_tc_ct_block_flow_offload_replace(ft, flow_rule, entry, cookie);
1197 mlx5_tc_ct_entry_put(entry);
1198 return err;
1199 }
1200 spin_unlock_bh(&ct_priv->ht_lock);
1201
1202 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1203 if (!entry)
1204 return -ENOMEM;
1205
1206 entry->tuple.zone = ft->zone;
1207 entry->cookie = flow->cookie;
1208 entry->restore_cookie = meta_action->ct_metadata.cookie;
1209 refcount_set(&entry->refcnt, 2);
1210 entry->ct_priv = ct_priv;
1211
1212 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
1213 if (err)
1214 goto err_set;
1215
1216 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1217 err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
1218 if (err)
1219 goto err_set;
1220
1221 spin_lock_bh(&ct_priv->ht_lock);
1222
1223 err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1224 cts_ht_params);
1225 if (err)
1226 goto err_entries;
1227
1228 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1229 &entry->tuple_node,
1230 tuples_ht_params);
1231 if (err)
1232 goto err_tuple;
1233
1234 if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1235 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1236 &entry->tuple_nat_node,
1237 tuples_nat_ht_params);
1238 if (err)
1239 goto err_tuple_nat;
1240 }
1241 spin_unlock_bh(&ct_priv->ht_lock);
1242
1243 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1244 ft->zone_restore_id);
1245 if (err)
1246 goto err_rules;
1247
1248 set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1249 mlx5_tc_ct_entry_put(entry); /* this function reference */
1250
1251 return 0;
1252
1253 err_rules:
1254 spin_lock_bh(&ct_priv->ht_lock);
1255 if (mlx5_tc_ct_entry_has_nat(entry))
1256 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1257 &entry->tuple_nat_node, tuples_nat_ht_params);
1258 err_tuple_nat:
1259 rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1260 &entry->tuple_node,
1261 tuples_ht_params);
1262 err_tuple:
1263 rhashtable_remove_fast(&ft->ct_entries_ht,
1264 &entry->node,
1265 cts_ht_params);
1266 err_entries:
1267 spin_unlock_bh(&ct_priv->ht_lock);
1268 err_set:
1269 kfree(entry);
1270 if (err != -EEXIST)
1271 netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1272 return err;
1273 }
1274
1275 static int
mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)1276 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1277 struct flow_cls_offload *flow)
1278 {
1279 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1280 unsigned long cookie = flow->cookie;
1281 struct mlx5_ct_entry *entry;
1282
1283 spin_lock_bh(&ct_priv->ht_lock);
1284 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1285 if (!entry) {
1286 spin_unlock_bh(&ct_priv->ht_lock);
1287 return -ENOENT;
1288 }
1289
1290 if (!mlx5_tc_ct_entry_valid(entry)) {
1291 spin_unlock_bh(&ct_priv->ht_lock);
1292 return -EINVAL;
1293 }
1294
1295 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1296 spin_unlock_bh(&ct_priv->ht_lock);
1297
1298 mlx5_tc_ct_entry_put(entry);
1299
1300 return 0;
1301 }
1302
1303 static int
mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft * ft,struct flow_cls_offload * f)1304 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1305 struct flow_cls_offload *f)
1306 {
1307 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1308 unsigned long cookie = f->cookie;
1309 struct mlx5_ct_entry *entry;
1310 u64 lastuse, packets, bytes;
1311
1312 spin_lock_bh(&ct_priv->ht_lock);
1313 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1314 if (!entry) {
1315 spin_unlock_bh(&ct_priv->ht_lock);
1316 return -ENOENT;
1317 }
1318
1319 if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1320 spin_unlock_bh(&ct_priv->ht_lock);
1321 return -EINVAL;
1322 }
1323
1324 spin_unlock_bh(&ct_priv->ht_lock);
1325
1326 mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1327 flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1328 FLOW_ACTION_HW_STATS_DELAYED);
1329
1330 mlx5_tc_ct_entry_put(entry);
1331 return 0;
1332 }
1333
1334 static int
mlx5_tc_ct_block_flow_offload(enum tc_setup_type type,void * type_data,void * cb_priv)1335 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1336 void *cb_priv)
1337 {
1338 struct flow_cls_offload *f = type_data;
1339 struct mlx5_ct_ft *ft = cb_priv;
1340
1341 if (type != TC_SETUP_CLSFLOWER)
1342 return -EOPNOTSUPP;
1343
1344 switch (f->command) {
1345 case FLOW_CLS_REPLACE:
1346 return mlx5_tc_ct_block_flow_offload_add(ft, f);
1347 case FLOW_CLS_DESTROY:
1348 return mlx5_tc_ct_block_flow_offload_del(ft, f);
1349 case FLOW_CLS_STATS:
1350 return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1351 default:
1352 break;
1353 }
1354
1355 return -EOPNOTSUPP;
1356 }
1357
1358 static bool
mlx5_tc_ct_skb_to_tuple(struct sk_buff * skb,struct mlx5_ct_tuple * tuple,u16 zone)1359 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1360 u16 zone)
1361 {
1362 struct flow_keys flow_keys;
1363
1364 skb_reset_network_header(skb);
1365 skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
1366
1367 tuple->zone = zone;
1368
1369 if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1370 flow_keys.basic.ip_proto != IPPROTO_UDP &&
1371 flow_keys.basic.ip_proto != IPPROTO_GRE)
1372 return false;
1373
1374 if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
1375 flow_keys.basic.ip_proto == IPPROTO_UDP) {
1376 tuple->port.src = flow_keys.ports.src;
1377 tuple->port.dst = flow_keys.ports.dst;
1378 }
1379 tuple->n_proto = flow_keys.basic.n_proto;
1380 tuple->ip_proto = flow_keys.basic.ip_proto;
1381
1382 switch (flow_keys.basic.n_proto) {
1383 case htons(ETH_P_IP):
1384 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1385 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1386 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1387 break;
1388
1389 case htons(ETH_P_IPV6):
1390 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1391 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1392 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1393 break;
1394 default:
1395 goto out;
1396 }
1397
1398 return true;
1399
1400 out:
1401 return false;
1402 }
1403
mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec * spec)1404 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1405 {
1406 u32 ctstate = 0, ctstate_mask = 0;
1407
1408 mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1409 &ctstate, &ctstate_mask);
1410
1411 if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1412 return -EOPNOTSUPP;
1413
1414 ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1415 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1416 ctstate, ctstate_mask);
1417
1418 return 0;
1419 }
1420
mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv * priv,struct mlx5_ct_attr * ct_attr)1421 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1422 {
1423 if (!priv || !ct_attr->ct_labels_id)
1424 return;
1425
1426 mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
1427 }
1428
1429 int
mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct mlx5_ct_attr * ct_attr,struct netlink_ext_ack * extack)1430 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1431 struct mlx5_flow_spec *spec,
1432 struct flow_cls_offload *f,
1433 struct mlx5_ct_attr *ct_attr,
1434 struct netlink_ext_ack *extack)
1435 {
1436 bool trk, est, untrk, unnew, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
1437 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1438 struct flow_dissector_key_ct *mask, *key;
1439 u32 ctstate = 0, ctstate_mask = 0;
1440 u16 ct_state_on, ct_state_off;
1441 u16 ct_state, ct_state_mask;
1442 struct flow_match_ct match;
1443 u32 ct_labels[4];
1444
1445 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1446 return 0;
1447
1448 if (!priv) {
1449 NL_SET_ERR_MSG_MOD(extack,
1450 "offload of ct matching isn't available");
1451 return -EOPNOTSUPP;
1452 }
1453
1454 flow_rule_match_ct(rule, &match);
1455
1456 key = match.key;
1457 mask = match.mask;
1458
1459 ct_state = key->ct_state;
1460 ct_state_mask = mask->ct_state;
1461
1462 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1463 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1464 TCA_FLOWER_KEY_CT_FLAGS_NEW |
1465 TCA_FLOWER_KEY_CT_FLAGS_REPLY |
1466 TCA_FLOWER_KEY_CT_FLAGS_RELATED |
1467 TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
1468 NL_SET_ERR_MSG_MOD(extack,
1469 "only ct_state trk, est, new and rpl are supported for offload");
1470 return -EOPNOTSUPP;
1471 }
1472
1473 ct_state_on = ct_state & ct_state_mask;
1474 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1475 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1476 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1477 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1478 rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1479 rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1480 inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1481 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1482 unnew = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1483 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1484 unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1485 unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1486 uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1487
1488 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1489 ctstate |= new ? MLX5_CT_STATE_NEW_BIT : 0;
1490 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1491 ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1492 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1493 ctstate_mask |= (unnew || new) ? MLX5_CT_STATE_NEW_BIT : 0;
1494 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1495 ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1496 ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
1497 ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
1498
1499 if (rel) {
1500 NL_SET_ERR_MSG_MOD(extack,
1501 "matching on ct_state +rel isn't supported");
1502 return -EOPNOTSUPP;
1503 }
1504
1505 if (inv) {
1506 NL_SET_ERR_MSG_MOD(extack,
1507 "matching on ct_state +inv isn't supported");
1508 return -EOPNOTSUPP;
1509 }
1510
1511 if (mask->ct_zone)
1512 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1513 key->ct_zone, MLX5_CT_ZONE_MASK);
1514 if (ctstate_mask)
1515 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1516 ctstate, ctstate_mask);
1517 if (mask->ct_mark)
1518 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1519 key->ct_mark, mask->ct_mark);
1520 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1521 mask->ct_labels[3]) {
1522 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1523 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1524 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1525 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1526 if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
1527 return -EOPNOTSUPP;
1528 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1529 MLX5_CT_LABELS_MASK);
1530 }
1531
1532 return 0;
1533 }
1534
1535 int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr,const struct flow_action_entry * act,struct netlink_ext_ack * extack)1536 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1537 struct mlx5_flow_attr *attr,
1538 const struct flow_action_entry *act,
1539 struct netlink_ext_ack *extack)
1540 {
1541 if (!priv) {
1542 NL_SET_ERR_MSG_MOD(extack,
1543 "offload of ct action isn't available");
1544 return -EOPNOTSUPP;
1545 }
1546
1547 attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */
1548 attr->ct_attr.zone = act->ct.zone;
1549 if (!(act->ct.action & TCA_CT_ACT_CLEAR))
1550 attr->ct_attr.nf_ft = act->ct.flow_table;
1551 attr->ct_attr.act_miss_cookie = act->miss_cookie;
1552
1553 return 0;
1554 }
1555
tc_ct_pre_ct_add_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1556 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1557 struct mlx5_tc_ct_pre *pre_ct,
1558 bool nat)
1559 {
1560 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1561 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1562 struct mlx5_core_dev *dev = ct_priv->dev;
1563 struct mlx5_flow_table *ft = pre_ct->ft;
1564 struct mlx5_flow_destination dest = {};
1565 struct mlx5_flow_act flow_act = {};
1566 struct mlx5_modify_hdr *mod_hdr;
1567 struct mlx5_flow_handle *rule;
1568 struct mlx5_flow_spec *spec;
1569 u32 ctstate;
1570 u16 zone;
1571 int err;
1572
1573 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1574 if (!spec)
1575 return -ENOMEM;
1576
1577 zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1578 err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1579 ZONE_TO_REG, zone);
1580 if (err) {
1581 ct_dbg("Failed to set zone register mapping");
1582 goto err_mapping;
1583 }
1584
1585 mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1586 pre_mod_acts.num_actions,
1587 pre_mod_acts.actions);
1588
1589 if (IS_ERR(mod_hdr)) {
1590 err = PTR_ERR(mod_hdr);
1591 ct_dbg("Failed to create pre ct mod hdr");
1592 goto err_mapping;
1593 }
1594 pre_ct->modify_hdr = mod_hdr;
1595
1596 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1597 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1598 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1599 flow_act.modify_hdr = mod_hdr;
1600 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1601
1602 /* add flow rule */
1603 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1604 zone, MLX5_CT_ZONE_MASK);
1605 ctstate = MLX5_CT_STATE_TRK_BIT;
1606 if (nat)
1607 ctstate |= MLX5_CT_STATE_NAT_BIT;
1608 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1609
1610 dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
1611 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1612 if (IS_ERR(rule)) {
1613 err = PTR_ERR(rule);
1614 ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1615 goto err_flow_rule;
1616 }
1617 pre_ct->flow_rule = rule;
1618
1619 /* add miss rule */
1620 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1621 rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1622 if (IS_ERR(rule)) {
1623 err = PTR_ERR(rule);
1624 ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1625 goto err_miss_rule;
1626 }
1627 pre_ct->miss_rule = rule;
1628
1629 mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1630 kvfree(spec);
1631 return 0;
1632
1633 err_miss_rule:
1634 mlx5_del_flow_rules(pre_ct->flow_rule);
1635 err_flow_rule:
1636 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1637 err_mapping:
1638 mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1639 kvfree(spec);
1640 return err;
1641 }
1642
1643 static void
tc_ct_pre_ct_del_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1644 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1645 struct mlx5_tc_ct_pre *pre_ct)
1646 {
1647 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1648 struct mlx5_core_dev *dev = ct_priv->dev;
1649
1650 mlx5_del_flow_rules(pre_ct->flow_rule);
1651 mlx5_del_flow_rules(pre_ct->miss_rule);
1652 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1653 }
1654
1655 static int
mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1656 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1657 struct mlx5_tc_ct_pre *pre_ct,
1658 bool nat)
1659 {
1660 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1661 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1662 struct mlx5_core_dev *dev = ct_priv->dev;
1663 struct mlx5_flow_table_attr ft_attr = {};
1664 struct mlx5_flow_namespace *ns;
1665 struct mlx5_flow_table *ft;
1666 struct mlx5_flow_group *g;
1667 u32 metadata_reg_c_2_mask;
1668 u32 *flow_group_in;
1669 void *misc;
1670 int err;
1671
1672 ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1673 if (!ns) {
1674 err = -EOPNOTSUPP;
1675 ct_dbg("Failed to get flow namespace");
1676 return err;
1677 }
1678
1679 flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1680 if (!flow_group_in)
1681 return -ENOMEM;
1682
1683 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1684 ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
1685 FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1686 ft_attr.max_fte = 2;
1687 ft_attr.level = 1;
1688 ft = mlx5_create_flow_table(ns, &ft_attr);
1689 if (IS_ERR(ft)) {
1690 err = PTR_ERR(ft);
1691 ct_dbg("Failed to create pre ct table");
1692 goto out_free;
1693 }
1694 pre_ct->ft = ft;
1695
1696 /* create flow group */
1697 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1698 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1699 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1700 MLX5_MATCH_MISC_PARAMETERS_2);
1701
1702 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1703 match_criteria.misc_parameters_2);
1704
1705 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1706 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1707 if (nat)
1708 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1709
1710 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1711 metadata_reg_c_2_mask);
1712
1713 g = mlx5_create_flow_group(ft, flow_group_in);
1714 if (IS_ERR(g)) {
1715 err = PTR_ERR(g);
1716 ct_dbg("Failed to create pre ct group");
1717 goto err_flow_grp;
1718 }
1719 pre_ct->flow_grp = g;
1720
1721 /* create miss group */
1722 memset(flow_group_in, 0, inlen);
1723 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1724 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1725 g = mlx5_create_flow_group(ft, flow_group_in);
1726 if (IS_ERR(g)) {
1727 err = PTR_ERR(g);
1728 ct_dbg("Failed to create pre ct miss group");
1729 goto err_miss_grp;
1730 }
1731 pre_ct->miss_grp = g;
1732
1733 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1734 if (err)
1735 goto err_add_rules;
1736
1737 kvfree(flow_group_in);
1738 return 0;
1739
1740 err_add_rules:
1741 mlx5_destroy_flow_group(pre_ct->miss_grp);
1742 err_miss_grp:
1743 mlx5_destroy_flow_group(pre_ct->flow_grp);
1744 err_flow_grp:
1745 mlx5_destroy_flow_table(ft);
1746 out_free:
1747 kvfree(flow_group_in);
1748 return err;
1749 }
1750
1751 static void
mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1752 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1753 struct mlx5_tc_ct_pre *pre_ct)
1754 {
1755 tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1756 mlx5_destroy_flow_group(pre_ct->miss_grp);
1757 mlx5_destroy_flow_group(pre_ct->flow_grp);
1758 mlx5_destroy_flow_table(pre_ct->ft);
1759 }
1760
1761 static int
mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft * ft)1762 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1763 {
1764 int err;
1765
1766 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1767 if (err)
1768 return err;
1769
1770 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1771 if (err)
1772 goto err_pre_ct_nat;
1773
1774 return 0;
1775
1776 err_pre_ct_nat:
1777 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1778 return err;
1779 }
1780
1781 static void
mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft * ft)1782 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1783 {
1784 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1785 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1786 }
1787
1788 /* To avoid false lock dependency warning set the ct_entries_ht lock
1789 * class different than the lock class of the ht being used when deleting
1790 * last flow from a group and then deleting a group, we get into del_sw_flow_group()
1791 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
1792 * it's different than the ht->mutex here.
1793 */
1794 static struct lock_class_key ct_entries_ht_lock_key;
1795
1796 static struct mlx5_ct_ft *
mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv * ct_priv,u16 zone,struct nf_flowtable * nf_ft)1797 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1798 struct nf_flowtable *nf_ft)
1799 {
1800 struct mlx5_ct_ft *ft;
1801 int err;
1802
1803 ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1804 if (ft) {
1805 refcount_inc(&ft->refcount);
1806 return ft;
1807 }
1808
1809 ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1810 if (!ft)
1811 return ERR_PTR(-ENOMEM);
1812
1813 err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1814 if (err)
1815 goto err_mapping;
1816
1817 ft->zone = zone;
1818 ft->nf_ft = nf_ft;
1819 ft->ct_priv = ct_priv;
1820 refcount_set(&ft->refcount, 1);
1821
1822 err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1823 if (err)
1824 goto err_alloc_pre_ct;
1825
1826 err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1827 if (err)
1828 goto err_init;
1829
1830 lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
1831
1832 err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1833 zone_params);
1834 if (err)
1835 goto err_insert;
1836
1837 err = nf_flow_table_offload_add_cb(ft->nf_ft,
1838 mlx5_tc_ct_block_flow_offload, ft);
1839 if (err)
1840 goto err_add_cb;
1841
1842 return ft;
1843
1844 err_add_cb:
1845 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1846 err_insert:
1847 rhashtable_destroy(&ft->ct_entries_ht);
1848 err_init:
1849 mlx5_tc_ct_free_pre_ct_tables(ft);
1850 err_alloc_pre_ct:
1851 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1852 err_mapping:
1853 kfree(ft);
1854 return ERR_PTR(err);
1855 }
1856
1857 static void
mlx5_tc_ct_flush_ft_entry(void * ptr,void * arg)1858 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1859 {
1860 struct mlx5_ct_entry *entry = ptr;
1861
1862 mlx5_tc_ct_entry_put(entry);
1863 }
1864
1865 static void
mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_ft * ft)1866 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1867 {
1868 if (!refcount_dec_and_test(&ft->refcount))
1869 return;
1870
1871 flush_workqueue(ct_priv->wq);
1872 nf_flow_table_offload_del_cb(ft->nf_ft,
1873 mlx5_tc_ct_block_flow_offload, ft);
1874 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1875 rhashtable_free_and_destroy(&ft->ct_entries_ht,
1876 mlx5_tc_ct_flush_ft_entry,
1877 ct_priv);
1878 mlx5_tc_ct_free_pre_ct_tables(ft);
1879 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1880 kfree(ft);
1881 }
1882
1883 /* We translate the tc filter with CT action to the following HW model:
1884 *
1885 * +-----------------------+
1886 * + rule (either original +
1887 * + or post_act rule) +
1888 * +-----------------------+
1889 * | set act_miss_cookie mapping
1890 * | set fte_id
1891 * | set tunnel_id
1892 * | rest of actions before the CT action (for this orig/post_act rule)
1893 * |
1894 * +-------------+
1895 * | Chain 0 |
1896 * | optimization|
1897 * | v
1898 * | +---------------------+
1899 * | + pre_ct/pre_ct_nat + if matches +----------------------+
1900 * | + zone+nat match +---------------->+ post_act (see below) +
1901 * | +---------------------+ set zone +----------------------+
1902 * | |
1903 * +-------------+ set zone
1904 * |
1905 * v
1906 * +--------------------+
1907 * + CT (nat or no nat) +
1908 * + tuple + zone match +
1909 * +--------------------+
1910 * | set mark
1911 * | set labels_id
1912 * | set established
1913 * | set zone_restore
1914 * | do nat (if needed)
1915 * v
1916 * +--------------+
1917 * + post_act + rest of parsed filter's actions
1918 * + fte_id match +------------------------>
1919 * +--------------+
1920 *
1921 */
1922 static int
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr)1923 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1924 struct mlx5_flow_attr *attr)
1925 {
1926 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1927 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1928 int act_miss_mapping = 0, err;
1929 struct mlx5_ct_ft *ft;
1930 u16 zone;
1931
1932 /* Register for CT established events */
1933 ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1934 attr->ct_attr.nf_ft);
1935 if (IS_ERR(ft)) {
1936 err = PTR_ERR(ft);
1937 ct_dbg("Failed to register to ft callback");
1938 goto err_ft;
1939 }
1940 attr->ct_attr.ft = ft;
1941
1942 err = mlx5e_tc_action_miss_mapping_get(ct_priv->priv, attr, attr->ct_attr.act_miss_cookie,
1943 &act_miss_mapping);
1944 if (err) {
1945 ct_dbg("Failed to get register mapping for act miss");
1946 goto err_get_act_miss;
1947 }
1948
1949 err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts,
1950 ct_priv->ns_type, MAPPED_OBJ_TO_REG, act_miss_mapping);
1951 if (err) {
1952 ct_dbg("Failed to set act miss register mapping");
1953 goto err_mapping;
1954 }
1955
1956 /* Chain 0 sets the zone and jumps to ct table
1957 * Other chains jump to pre_ct table to align with act_ct cached logic
1958 */
1959 if (!attr->chain) {
1960 zone = ft->zone & MLX5_CT_ZONE_MASK;
1961 err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts,
1962 ct_priv->ns_type, ZONE_TO_REG, zone);
1963 if (err) {
1964 ct_dbg("Failed to set zone register mapping");
1965 goto err_mapping;
1966 }
1967
1968 attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1969 } else {
1970 attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1971 }
1972
1973 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1974 attr->ct_attr.act_miss_mapping = act_miss_mapping;
1975
1976 return 0;
1977
1978 err_mapping:
1979 mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, act_miss_mapping);
1980 err_get_act_miss:
1981 mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1982 err_ft:
1983 netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1984 return err;
1985 }
1986
1987 int
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr)1988 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr)
1989 {
1990 int err;
1991
1992 if (!priv)
1993 return -EOPNOTSUPP;
1994
1995 if (attr->ct_attr.offloaded)
1996 return 0;
1997
1998 if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) {
1999 err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts,
2000 0, 0, 0, 0);
2001 if (err)
2002 return err;
2003
2004 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2005 }
2006
2007 if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */
2008 attr->ct_attr.offloaded = true;
2009 return 0;
2010 }
2011
2012 mutex_lock(&priv->control_lock);
2013 err = __mlx5_tc_ct_flow_offload(priv, attr);
2014 if (!err)
2015 attr->ct_attr.offloaded = true;
2016 mutex_unlock(&priv->control_lock);
2017
2018 return err;
2019 }
2020
2021 static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr)2022 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
2023 struct mlx5_flow_attr *attr)
2024 {
2025 mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, attr->ct_attr.act_miss_mapping);
2026 mlx5_tc_ct_del_ft_cb(ct_priv, attr->ct_attr.ft);
2027 }
2028
2029 void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr)2030 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
2031 struct mlx5_flow_attr *attr)
2032 {
2033 if (!attr->ct_attr.offloaded) /* no ct action, return */
2034 return;
2035 if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
2036 return;
2037
2038 mutex_lock(&priv->control_lock);
2039 __mlx5_tc_ct_delete_flow(priv, attr);
2040 mutex_unlock(&priv->control_lock);
2041 }
2042
2043 static int
mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv * ct_priv)2044 mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
2045 {
2046 struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
2047 struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
2048 int err;
2049
2050 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
2051 ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
2052 ct_dbg("Using SMFS ct flow steering provider");
2053 fs_ops = mlx5_ct_fs_smfs_ops_get();
2054 }
2055
2056 ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
2057 if (!ct_priv->fs)
2058 return -ENOMEM;
2059
2060 ct_priv->fs->netdev = ct_priv->netdev;
2061 ct_priv->fs->dev = ct_priv->dev;
2062 ct_priv->fs_ops = fs_ops;
2063
2064 err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
2065 if (err)
2066 goto err_init;
2067
2068 return 0;
2069
2070 err_init:
2071 kfree(ct_priv->fs);
2072 return err;
2073 }
2074
2075 static int
mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch * esw,const char ** err_msg)2076 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2077 const char **err_msg)
2078 {
2079 if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
2080 /* vlan workaround should be avoided for multi chain rules.
2081 * This is just a sanity check as pop vlan action should
2082 * be supported by any FW that supports ignore_flow_level
2083 */
2084
2085 *err_msg = "firmware vlan actions support is missing";
2086 return -EOPNOTSUPP;
2087 }
2088
2089 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2090 fdb_modify_header_fwd_to_table)) {
2091 /* CT always writes to registers which are mod header actions.
2092 * Therefore, mod header and goto is required
2093 */
2094
2095 *err_msg = "firmware fwd and modify support is missing";
2096 return -EOPNOTSUPP;
2097 }
2098
2099 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2100 *err_msg = "register loopback isn't supported";
2101 return -EOPNOTSUPP;
2102 }
2103
2104 return 0;
2105 }
2106
2107 static int
mlx5_tc_ct_init_check_support(struct mlx5e_priv * priv,enum mlx5_flow_namespace_type ns_type,struct mlx5e_post_act * post_act)2108 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2109 enum mlx5_flow_namespace_type ns_type,
2110 struct mlx5e_post_act *post_act)
2111 {
2112 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2113 const char *err_msg = NULL;
2114 int err = 0;
2115
2116 if (IS_ERR_OR_NULL(post_act)) {
2117 /* Ignore_flow_level support isn't supported by default for VFs and so post_act
2118 * won't be supported. Skip showing error msg.
2119 */
2120 if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
2121 err_msg = "post action is missing";
2122 err = -EOPNOTSUPP;
2123 goto out_err;
2124 }
2125
2126 if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2127 err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg);
2128
2129 out_err:
2130 if (err && err_msg)
2131 netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg);
2132 return err;
2133 }
2134
2135 static void
mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv * ct_priv)2136 mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2137 {
2138 struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
2139
2140 ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev));
2141 debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
2142 &ct_dbgfs->stats.offloaded);
2143 debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
2144 &ct_dbgfs->stats.rx_dropped);
2145 }
2146
2147 static void
mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv * ct_priv)2148 mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2149 {
2150 debugfs_remove_recursive(ct_priv->debugfs.root);
2151 }
2152
2153 #define INIT_ERR_PREFIX "tc ct offload init failed"
2154
2155 struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv * priv,struct mlx5_fs_chains * chains,struct mod_hdr_tbl * mod_hdr,enum mlx5_flow_namespace_type ns_type,struct mlx5e_post_act * post_act)2156 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2157 struct mod_hdr_tbl *mod_hdr,
2158 enum mlx5_flow_namespace_type ns_type,
2159 struct mlx5e_post_act *post_act)
2160 {
2161 struct mlx5_tc_ct_priv *ct_priv;
2162 struct mlx5_core_dev *dev;
2163 u64 mapping_id;
2164 int err;
2165
2166 dev = priv->mdev;
2167 err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act);
2168 if (err)
2169 goto err_support;
2170
2171 ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2172 if (!ct_priv)
2173 goto err_alloc;
2174
2175 mapping_id = mlx5_query_nic_system_image_guid(dev);
2176
2177 ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
2178 sizeof(u16), 0, true);
2179 if (IS_ERR(ct_priv->zone_mapping)) {
2180 err = PTR_ERR(ct_priv->zone_mapping);
2181 goto err_mapping_zone;
2182 }
2183
2184 ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
2185 sizeof(u32) * 4, 0, true);
2186 if (IS_ERR(ct_priv->labels_mapping)) {
2187 err = PTR_ERR(ct_priv->labels_mapping);
2188 goto err_mapping_labels;
2189 }
2190
2191 spin_lock_init(&ct_priv->ht_lock);
2192 ct_priv->priv = priv;
2193 ct_priv->ns_type = ns_type;
2194 ct_priv->chains = chains;
2195 ct_priv->netdev = priv->netdev;
2196 ct_priv->dev = priv->mdev;
2197 ct_priv->mod_hdr_tbl = mod_hdr;
2198 ct_priv->ct = mlx5_chains_create_global_table(chains);
2199 if (IS_ERR(ct_priv->ct)) {
2200 err = PTR_ERR(ct_priv->ct);
2201 mlx5_core_warn(dev,
2202 "%s, failed to create ct table err: %d\n",
2203 INIT_ERR_PREFIX, err);
2204 goto err_ct_tbl;
2205 }
2206
2207 ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2208 if (IS_ERR(ct_priv->ct_nat)) {
2209 err = PTR_ERR(ct_priv->ct_nat);
2210 mlx5_core_warn(dev,
2211 "%s, failed to create ct nat table err: %d\n",
2212 INIT_ERR_PREFIX, err);
2213 goto err_ct_nat_tbl;
2214 }
2215
2216 ct_priv->post_act = post_act;
2217 mutex_init(&ct_priv->control_lock);
2218 if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
2219 goto err_ct_zone_ht;
2220 if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params))
2221 goto err_ct_tuples_ht;
2222 if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
2223 goto err_ct_tuples_nat_ht;
2224
2225 ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
2226 if (!ct_priv->wq) {
2227 err = -ENOMEM;
2228 goto err_wq;
2229 }
2230
2231 err = mlx5_tc_ct_fs_init(ct_priv);
2232 if (err)
2233 goto err_init_fs;
2234
2235 mlx5_ct_tc_create_dbgfs(ct_priv);
2236 return ct_priv;
2237
2238 err_init_fs:
2239 destroy_workqueue(ct_priv->wq);
2240 err_wq:
2241 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2242 err_ct_tuples_nat_ht:
2243 rhashtable_destroy(&ct_priv->ct_tuples_ht);
2244 err_ct_tuples_ht:
2245 rhashtable_destroy(&ct_priv->zone_ht);
2246 err_ct_zone_ht:
2247 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2248 err_ct_nat_tbl:
2249 mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2250 err_ct_tbl:
2251 mapping_destroy(ct_priv->labels_mapping);
2252 err_mapping_labels:
2253 mapping_destroy(ct_priv->zone_mapping);
2254 err_mapping_zone:
2255 kfree(ct_priv);
2256 err_alloc:
2257 err_support:
2258
2259 return NULL;
2260 }
2261
2262 void
mlx5_tc_ct_clean(struct mlx5_tc_ct_priv * ct_priv)2263 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2264 {
2265 struct mlx5_fs_chains *chains;
2266
2267 if (!ct_priv)
2268 return;
2269
2270 destroy_workqueue(ct_priv->wq);
2271 mlx5_ct_tc_remove_dbgfs(ct_priv);
2272 chains = ct_priv->chains;
2273
2274 ct_priv->fs_ops->destroy(ct_priv->fs);
2275 kfree(ct_priv->fs);
2276
2277 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2278 mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2279 mapping_destroy(ct_priv->zone_mapping);
2280 mapping_destroy(ct_priv->labels_mapping);
2281
2282 rhashtable_destroy(&ct_priv->ct_tuples_ht);
2283 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2284 rhashtable_destroy(&ct_priv->zone_ht);
2285 mutex_destroy(&ct_priv->control_lock);
2286 kfree(ct_priv);
2287 }
2288
2289 bool
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv * ct_priv,struct sk_buff * skb,u8 zone_restore_id)2290 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2291 struct sk_buff *skb, u8 zone_restore_id)
2292 {
2293 struct mlx5_ct_tuple tuple = {};
2294 struct mlx5_ct_entry *entry;
2295 u16 zone;
2296
2297 if (!ct_priv || !zone_restore_id)
2298 return true;
2299
2300 if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2301 goto out_inc_drop;
2302
2303 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2304 goto out_inc_drop;
2305
2306 spin_lock(&ct_priv->ht_lock);
2307
2308 entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2309 if (!entry) {
2310 spin_unlock(&ct_priv->ht_lock);
2311 goto out_inc_drop;
2312 }
2313
2314 if (IS_ERR(entry)) {
2315 spin_unlock(&ct_priv->ht_lock);
2316 goto out_inc_drop;
2317 }
2318 spin_unlock(&ct_priv->ht_lock);
2319
2320 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2321 __mlx5_tc_ct_entry_put(entry);
2322
2323 return true;
2324
2325 out_inc_drop:
2326 atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
2327 return false;
2328 }
2329