• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3 
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include "tc_tun_encap.h"
7 #include "en_tc.h"
8 #include "tc_tun.h"
9 #include "rep/tc.h"
10 #include "diag/en_tc_tracepoint.h"
11 
12 enum {
13 	MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
14 };
15 
16 struct mlx5e_route_key {
17 	int ip_version;
18 	union {
19 		__be32 v4;
20 		struct in6_addr v6;
21 	} endpoint_ip;
22 };
23 
24 struct mlx5e_route_entry {
25 	struct mlx5e_route_key key;
26 	struct list_head encap_entries;
27 	struct list_head decap_flows;
28 	u32 flags;
29 	struct hlist_node hlist;
30 	refcount_t refcnt;
31 	int tunnel_dev_index;
32 	struct rcu_head rcu;
33 };
34 
35 struct mlx5e_tc_tun_encap {
36 	struct mlx5e_priv *priv;
37 	struct notifier_block fib_nb;
38 	spinlock_t route_lock; /* protects route_tbl */
39 	unsigned long route_tbl_last_update;
40 	DECLARE_HASHTABLE(route_tbl, 8);
41 };
42 
mlx5e_route_entry_valid(struct mlx5e_route_entry * r)43 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
44 {
45 	return r->flags & MLX5E_ROUTE_ENTRY_VALID;
46 }
47 
mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)48 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
49 			     struct mlx5_flow_spec *spec)
50 {
51 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
52 	struct mlx5_rx_tun_attr *tun_attr;
53 	void *daddr, *saddr;
54 	u8 ip_version;
55 
56 	tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
57 	if (!tun_attr)
58 		return -ENOMEM;
59 
60 	esw_attr->rx_tun_attr = tun_attr;
61 	ip_version = mlx5e_tc_get_ip_version(spec, true);
62 
63 	if (ip_version == 4) {
64 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
65 				     outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
66 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
67 				     outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
68 		tun_attr->dst_ip.v4 = *(__be32 *)daddr;
69 		tun_attr->src_ip.v4 = *(__be32 *)saddr;
70 		if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
71 			return 0;
72 	}
73 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
74 	else if (ip_version == 6) {
75 		int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
76 		struct in6_addr zerov6 = {};
77 
78 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
79 				     outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
80 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
81 				     outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
82 		memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
83 		memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
84 		if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
85 		    !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
86 			return 0;
87 	}
88 #endif
89 	/* Only set the flag if both src and dst ip addresses exist. They are
90 	 * required to establish routing.
91 	 */
92 	flow_flag_set(flow, TUN_RX);
93 	flow->attr->tun_ip_version = ip_version;
94 	return 0;
95 }
96 
mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr * esw_attr)97 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
98 {
99 	bool all_flow_encaps_valid = true;
100 	int i;
101 
102 	/* Flow can be associated with multiple encap entries.
103 	 * Before offloading the flow verify that all of them have
104 	 * a valid neighbour.
105 	 */
106 	for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
107 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
108 			continue;
109 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
110 			all_flow_encaps_valid = false;
111 			break;
112 		}
113 	}
114 
115 	return all_flow_encaps_valid;
116 }
117 
mlx5e_tc_encap_flows_add(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)118 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
119 			      struct mlx5e_encap_entry *e,
120 			      struct list_head *flow_list)
121 {
122 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
123 	struct mlx5_pkt_reformat_params reformat_params;
124 	struct mlx5_esw_flow_attr *esw_attr;
125 	struct mlx5_flow_handle *rule;
126 	struct mlx5_flow_attr *attr;
127 	struct mlx5_flow_spec *spec;
128 	struct mlx5e_tc_flow *flow;
129 	int err;
130 
131 	if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
132 		return;
133 
134 	memset(&reformat_params, 0, sizeof(reformat_params));
135 	reformat_params.type = e->reformat_type;
136 	reformat_params.size = e->encap_size;
137 	reformat_params.data = e->encap_header;
138 	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
139 						     &reformat_params,
140 						     MLX5_FLOW_NAMESPACE_FDB);
141 	if (IS_ERR(e->pkt_reformat)) {
142 		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
143 			       PTR_ERR(e->pkt_reformat));
144 		return;
145 	}
146 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
147 	mlx5e_rep_queue_neigh_stats_work(priv);
148 
149 	list_for_each_entry(flow, flow_list, tmp_list) {
150 		if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
151 			continue;
152 		attr = flow->attr;
153 		esw_attr = attr->esw_attr;
154 		spec = &attr->parse_attr->spec;
155 
156 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
157 		esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
158 
159 		/* Do not offload flows with unresolved neighbors */
160 		if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
161 			continue;
162 		/* update from slow path rule to encap rule */
163 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
164 		if (IS_ERR(rule)) {
165 			err = PTR_ERR(rule);
166 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
167 				       err);
168 			continue;
169 		}
170 
171 		mlx5e_tc_unoffload_from_slow_path(esw, flow);
172 		flow->rule[0] = rule;
173 		/* was unset when slow path rule removed */
174 		flow_flag_set(flow, OFFLOADED);
175 	}
176 }
177 
mlx5e_tc_encap_flows_del(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)178 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
179 			      struct mlx5e_encap_entry *e,
180 			      struct list_head *flow_list)
181 {
182 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
183 	struct mlx5_esw_flow_attr *esw_attr;
184 	struct mlx5_flow_handle *rule;
185 	struct mlx5_flow_attr *attr;
186 	struct mlx5_flow_spec *spec;
187 	struct mlx5e_tc_flow *flow;
188 	int err;
189 
190 	list_for_each_entry(flow, flow_list, tmp_list) {
191 		if (!mlx5e_is_offloaded_flow(flow))
192 			continue;
193 		attr = flow->attr;
194 		esw_attr = attr->esw_attr;
195 		spec = &attr->parse_attr->spec;
196 
197 		/* Clear pkt_reformat before checking slow path flag. Because
198 		 * in next iteration, the same flow is already set slow path
199 		 * flag, but still need to clear the pkt_reformat.
200 		 */
201 		if (flow_flag_test(flow, SLOW))
202 			continue;
203 
204 		/* update from encap rule to slow path rule */
205 		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
206 		/* mark the flow's encap dest as non-valid */
207 		esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
208 
209 		if (IS_ERR(rule)) {
210 			err = PTR_ERR(rule);
211 			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
212 				       err);
213 			continue;
214 		}
215 
216 		mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
217 		flow->rule[0] = rule;
218 		/* was unset when fast path rule removed */
219 		flow_flag_set(flow, OFFLOADED);
220 	}
221 
222 	/* we know that the encap is valid */
223 	e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
224 	mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
225 }
226 
mlx5e_take_tmp_flow(struct mlx5e_tc_flow * flow,struct list_head * flow_list,int index)227 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
228 				struct list_head *flow_list,
229 				int index)
230 {
231 	if (IS_ERR(mlx5e_flow_get(flow))) {
232 		/* Flow is being deleted concurrently. Wait for it to be
233 		 * unoffloaded from hardware, otherwise deleting encap will
234 		 * fail.
235 		 */
236 		wait_for_completion(&flow->del_hw_done);
237 		return;
238 	}
239 	wait_for_completion(&flow->init_done);
240 
241 	flow->tmp_entry_index = index;
242 	list_add(&flow->tmp_list, flow_list);
243 }
244 
245 /* Takes reference to all flows attached to encap and adds the flows to
246  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
247  */
mlx5e_take_all_encap_flows(struct mlx5e_encap_entry * e,struct list_head * flow_list)248 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
249 {
250 	struct encap_flow_item *efi;
251 	struct mlx5e_tc_flow *flow;
252 
253 	list_for_each_entry(efi, &e->flows, list) {
254 		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
255 		mlx5e_take_tmp_flow(flow, flow_list, efi->index);
256 	}
257 }
258 
259 /* Takes reference to all flows attached to route and adds the flows to
260  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
261  */
mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry * r,struct list_head * flow_list)262 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
263 					     struct list_head *flow_list)
264 {
265 	struct mlx5e_tc_flow *flow;
266 
267 	list_for_each_entry(flow, &r->decap_flows, decap_routes)
268 		mlx5e_take_tmp_flow(flow, flow_list, 0);
269 }
270 
271 typedef bool (match_cb)(struct mlx5e_encap_entry *);
272 
273 static struct mlx5e_encap_entry *
mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e,match_cb match)274 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
275 			      struct mlx5e_encap_entry *e,
276 			      match_cb match)
277 {
278 	struct mlx5e_encap_entry *next = NULL;
279 
280 retry:
281 	rcu_read_lock();
282 
283 	/* find encap with non-zero reference counter value */
284 	for (next = e ?
285 		     list_next_or_null_rcu(&nhe->encap_list,
286 					   &e->encap_list,
287 					   struct mlx5e_encap_entry,
288 					   encap_list) :
289 		     list_first_or_null_rcu(&nhe->encap_list,
290 					    struct mlx5e_encap_entry,
291 					    encap_list);
292 	     next;
293 	     next = list_next_or_null_rcu(&nhe->encap_list,
294 					  &next->encap_list,
295 					  struct mlx5e_encap_entry,
296 					  encap_list))
297 		if (mlx5e_encap_take(next))
298 			break;
299 
300 	rcu_read_unlock();
301 
302 	/* release starting encap */
303 	if (e)
304 		mlx5e_encap_put(netdev_priv(e->out_dev), e);
305 	if (!next)
306 		return next;
307 
308 	/* wait for encap to be fully initialized */
309 	wait_for_completion(&next->res_ready);
310 	/* continue searching if encap entry is not in valid state after completion */
311 	if (!match(next)) {
312 		e = next;
313 		goto retry;
314 	}
315 
316 	return next;
317 }
318 
mlx5e_encap_valid(struct mlx5e_encap_entry * e)319 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
320 {
321 	return e->flags & MLX5_ENCAP_ENTRY_VALID;
322 }
323 
324 static struct mlx5e_encap_entry *
mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)325 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
326 			   struct mlx5e_encap_entry *e)
327 {
328 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
329 }
330 
mlx5e_encap_initialized(struct mlx5e_encap_entry * e)331 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
332 {
333 	return e->compl_result >= 0;
334 }
335 
336 struct mlx5e_encap_entry *
mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)337 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
338 			  struct mlx5e_encap_entry *e)
339 {
340 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
341 }
342 
mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry * nhe)343 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
344 {
345 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
346 	struct mlx5e_encap_entry *e = NULL;
347 	struct mlx5e_tc_flow *flow;
348 	struct mlx5_fc *counter;
349 	struct neigh_table *tbl;
350 	bool neigh_used = false;
351 	struct neighbour *n;
352 	u64 lastuse;
353 
354 	if (m_neigh->family == AF_INET)
355 		tbl = &arp_tbl;
356 #if IS_ENABLED(CONFIG_IPV6)
357 	else if (m_neigh->family == AF_INET6)
358 		tbl = ipv6_stub->nd_tbl;
359 #endif
360 	else
361 		return;
362 
363 	/* mlx5e_get_next_valid_encap() releases previous encap before returning
364 	 * next one.
365 	 */
366 	while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
367 		struct mlx5e_priv *priv = netdev_priv(e->out_dev);
368 		struct encap_flow_item *efi, *tmp;
369 		struct mlx5_eswitch *esw;
370 		LIST_HEAD(flow_list);
371 
372 		esw = priv->mdev->priv.eswitch;
373 		mutex_lock(&esw->offloads.encap_tbl_lock);
374 		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
375 			flow = container_of(efi, struct mlx5e_tc_flow,
376 					    encaps[efi->index]);
377 			if (IS_ERR(mlx5e_flow_get(flow)))
378 				continue;
379 			list_add(&flow->tmp_list, &flow_list);
380 
381 			if (mlx5e_is_offloaded_flow(flow)) {
382 				counter = mlx5e_tc_get_counter(flow);
383 				lastuse = mlx5_fc_query_lastuse(counter);
384 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
385 					neigh_used = true;
386 					break;
387 				}
388 			}
389 		}
390 		mutex_unlock(&esw->offloads.encap_tbl_lock);
391 
392 		mlx5e_put_flow_list(priv, &flow_list);
393 		if (neigh_used) {
394 			/* release current encap before breaking the loop */
395 			mlx5e_encap_put(priv, e);
396 			break;
397 		}
398 	}
399 
400 	trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
401 
402 	if (neigh_used) {
403 		nhe->reported_lastuse = jiffies;
404 
405 		/* find the relevant neigh according to the cached device and
406 		 * dst ip pair
407 		 */
408 		n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
409 		if (!n)
410 			return;
411 
412 		neigh_event_send(n, NULL);
413 		neigh_release(n);
414 	}
415 }
416 
mlx5e_encap_dealloc(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)417 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
418 {
419 	WARN_ON(!list_empty(&e->flows));
420 
421 	if (e->compl_result > 0) {
422 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
423 
424 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
425 			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
426 	}
427 
428 	kfree(e->tun_info);
429 	kfree(e->encap_header);
430 	kfree_rcu(e, rcu);
431 }
432 
mlx5e_decap_dealloc(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)433 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
434 				struct mlx5e_decap_entry *d)
435 {
436 	WARN_ON(!list_empty(&d->flows));
437 
438 	if (!d->compl_result)
439 		mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
440 
441 	kfree_rcu(d, rcu);
442 }
443 
mlx5e_encap_put(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)444 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
445 {
446 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
447 
448 	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
449 		return;
450 	list_del(&e->route_list);
451 	hash_del_rcu(&e->encap_hlist);
452 	mutex_unlock(&esw->offloads.encap_tbl_lock);
453 
454 	mlx5e_encap_dealloc(priv, e);
455 }
456 
mlx5e_decap_put(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)457 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
458 {
459 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
460 
461 	if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
462 		return;
463 	hash_del_rcu(&d->hlist);
464 	mutex_unlock(&esw->offloads.decap_tbl_lock);
465 
466 	mlx5e_decap_dealloc(priv, d);
467 }
468 
469 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
470 				     struct mlx5e_tc_flow *flow,
471 				     int out_index);
472 
mlx5e_detach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index)473 void mlx5e_detach_encap(struct mlx5e_priv *priv,
474 			struct mlx5e_tc_flow *flow, int out_index)
475 {
476 	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
477 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
478 
479 	if (flow->attr->esw_attr->dests[out_index].flags &
480 	    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
481 		mlx5e_detach_encap_route(priv, flow, out_index);
482 
483 	/* flow wasn't fully initialized */
484 	if (!e)
485 		return;
486 
487 	mutex_lock(&esw->offloads.encap_tbl_lock);
488 	list_del(&flow->encaps[out_index].list);
489 	flow->encaps[out_index].e = NULL;
490 	if (!refcount_dec_and_test(&e->refcnt)) {
491 		mutex_unlock(&esw->offloads.encap_tbl_lock);
492 		return;
493 	}
494 	list_del(&e->route_list);
495 	hash_del_rcu(&e->encap_hlist);
496 	mutex_unlock(&esw->offloads.encap_tbl_lock);
497 
498 	mlx5e_encap_dealloc(priv, e);
499 }
500 
mlx5e_detach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)501 void mlx5e_detach_decap(struct mlx5e_priv *priv,
502 			struct mlx5e_tc_flow *flow)
503 {
504 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
505 	struct mlx5e_decap_entry *d = flow->decap_reformat;
506 
507 	if (!d)
508 		return;
509 
510 	mutex_lock(&esw->offloads.decap_tbl_lock);
511 	list_del(&flow->l3_to_l2_reformat);
512 	flow->decap_reformat = NULL;
513 
514 	if (!refcount_dec_and_test(&d->refcnt)) {
515 		mutex_unlock(&esw->offloads.decap_tbl_lock);
516 		return;
517 	}
518 	hash_del_rcu(&d->hlist);
519 	mutex_unlock(&esw->offloads.decap_tbl_lock);
520 
521 	mlx5e_decap_dealloc(priv, d);
522 }
523 
mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b)524 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
525 					   struct mlx5e_encap_key *b)
526 {
527 	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
528 		a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
529 }
530 
cmp_decap_info(struct mlx5e_decap_key * a,struct mlx5e_decap_key * b)531 static int cmp_decap_info(struct mlx5e_decap_key *a,
532 			  struct mlx5e_decap_key *b)
533 {
534 	return memcmp(&a->key, &b->key, sizeof(b->key));
535 }
536 
hash_encap_info(struct mlx5e_encap_key * key)537 static int hash_encap_info(struct mlx5e_encap_key *key)
538 {
539 	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
540 		     key->tc_tunnel->tunnel_type);
541 }
542 
hash_decap_info(struct mlx5e_decap_key * key)543 static int hash_decap_info(struct mlx5e_decap_key *key)
544 {
545 	return jhash(&key->key, sizeof(key->key), 0);
546 }
547 
mlx5e_encap_take(struct mlx5e_encap_entry * e)548 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
549 {
550 	return refcount_inc_not_zero(&e->refcnt);
551 }
552 
mlx5e_decap_take(struct mlx5e_decap_entry * e)553 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
554 {
555 	return refcount_inc_not_zero(&e->refcnt);
556 }
557 
558 static struct mlx5e_encap_entry *
mlx5e_encap_get(struct mlx5e_priv * priv,struct mlx5e_encap_key * key,uintptr_t hash_key)559 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
560 		uintptr_t hash_key)
561 {
562 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
563 	struct mlx5e_encap_key e_key;
564 	struct mlx5e_encap_entry *e;
565 
566 	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
567 				   encap_hlist, hash_key) {
568 		e_key.ip_tun_key = &e->tun_info->key;
569 		e_key.tc_tunnel = e->tunnel;
570 		if (e->tunnel->encap_info_equal(&e_key, key) &&
571 		    mlx5e_encap_take(e))
572 			return e;
573 	}
574 
575 	return NULL;
576 }
577 
578 static struct mlx5e_decap_entry *
mlx5e_decap_get(struct mlx5e_priv * priv,struct mlx5e_decap_key * key,uintptr_t hash_key)579 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
580 		uintptr_t hash_key)
581 {
582 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
583 	struct mlx5e_decap_key r_key;
584 	struct mlx5e_decap_entry *e;
585 
586 	hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
587 				   hlist, hash_key) {
588 		r_key = e->key;
589 		if (!cmp_decap_info(&r_key, key) &&
590 		    mlx5e_decap_take(e))
591 			return e;
592 	}
593 	return NULL;
594 }
595 
mlx5e_dup_tun_info(const struct ip_tunnel_info * tun_info)596 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
597 {
598 	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
599 
600 	return kmemdup(tun_info, tun_size, GFP_KERNEL);
601 }
602 
is_duplicated_encap_entry(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index,struct mlx5e_encap_entry * e,struct netlink_ext_ack * extack)603 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
604 				      struct mlx5e_tc_flow *flow,
605 				      int out_index,
606 				      struct mlx5e_encap_entry *e,
607 				      struct netlink_ext_ack *extack)
608 {
609 	int i;
610 
611 	for (i = 0; i < out_index; i++) {
612 		if (flow->encaps[i].e != e)
613 			continue;
614 		NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
615 		netdev_err(priv->netdev, "can't duplicate encap action\n");
616 		return true;
617 	}
618 
619 	return false;
620 }
621 
mlx5e_set_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)622 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
623 			       struct mlx5_flow_attr *attr,
624 			       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
625 			       struct net_device *out_dev,
626 			       int route_dev_ifindex,
627 			       int out_index)
628 {
629 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
630 	struct net_device *route_dev;
631 	u16 vport_num;
632 	int err = 0;
633 	u32 data;
634 
635 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
636 
637 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
638 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
639 		goto out;
640 
641 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
642 	if (err)
643 		goto out;
644 
645 	attr->dest_chain = 0;
646 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
647 	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
648 	data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
649 						       vport_num);
650 	err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
651 						   MLX5_FLOW_NAMESPACE_FDB,
652 						   VPORT_TO_REG, data);
653 	if (err >= 0) {
654 		esw_attr->dests[out_index].src_port_rewrite_act_id = err;
655 		err = 0;
656 	}
657 
658 out:
659 	if (route_dev)
660 		dev_put(route_dev);
661 	return err;
662 }
663 
mlx5e_update_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_esw_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)664 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
665 				  struct mlx5_esw_flow_attr *attr,
666 				  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
667 				  struct net_device *out_dev,
668 				  int route_dev_ifindex,
669 				  int out_index)
670 {
671 	int act_id = attr->dests[out_index].src_port_rewrite_act_id;
672 	struct net_device *route_dev;
673 	u16 vport_num;
674 	int err = 0;
675 	u32 data;
676 
677 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
678 
679 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
680 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
681 		err = -ENODEV;
682 		goto out;
683 	}
684 
685 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
686 	if (err)
687 		goto out;
688 
689 	data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
690 						       vport_num);
691 	mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
692 
693 out:
694 	if (route_dev)
695 		dev_put(route_dev);
696 	return err;
697 }
698 
mlx5e_route_tbl_get_last_update(struct mlx5e_priv * priv)699 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
700 {
701 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
702 	struct mlx5_rep_uplink_priv *uplink_priv;
703 	struct mlx5e_rep_priv *uplink_rpriv;
704 	struct mlx5e_tc_tun_encap *encap;
705 	unsigned int ret;
706 
707 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
708 	uplink_priv = &uplink_rpriv->uplink_priv;
709 	encap = uplink_priv->encap;
710 
711 	spin_lock_bh(&encap->route_lock);
712 	ret = encap->route_tbl_last_update;
713 	spin_unlock_bh(&encap->route_lock);
714 	return ret;
715 }
716 
717 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
718 				    struct mlx5e_tc_flow *flow,
719 				    struct mlx5e_encap_entry *e,
720 				    bool new_encap_entry,
721 				    unsigned long tbl_time_before,
722 				    int out_index);
723 
mlx5e_attach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct net_device * mirred_dev,int out_index,struct netlink_ext_ack * extack,struct net_device ** encap_dev,bool * encap_valid)724 int mlx5e_attach_encap(struct mlx5e_priv *priv,
725 		       struct mlx5e_tc_flow *flow,
726 		       struct net_device *mirred_dev,
727 		       int out_index,
728 		       struct netlink_ext_ack *extack,
729 		       struct net_device **encap_dev,
730 		       bool *encap_valid)
731 {
732 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
733 	struct mlx5e_tc_flow_parse_attr *parse_attr;
734 	struct mlx5_flow_attr *attr = flow->attr;
735 	const struct ip_tunnel_info *tun_info;
736 	unsigned long tbl_time_before = 0;
737 	struct mlx5e_encap_entry *e;
738 	struct mlx5e_encap_key key;
739 	bool entry_created = false;
740 	unsigned short family;
741 	uintptr_t hash_key;
742 	int err = 0;
743 
744 	parse_attr = attr->parse_attr;
745 	tun_info = parse_attr->tun_info[out_index];
746 	family = ip_tunnel_info_af(tun_info);
747 	key.ip_tun_key = &tun_info->key;
748 	key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
749 	if (!key.tc_tunnel) {
750 		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
751 		return -EOPNOTSUPP;
752 	}
753 
754 	hash_key = hash_encap_info(&key);
755 
756 	mutex_lock(&esw->offloads.encap_tbl_lock);
757 	e = mlx5e_encap_get(priv, &key, hash_key);
758 
759 	/* must verify if encap is valid or not */
760 	if (e) {
761 		/* Check that entry was not already attached to this flow */
762 		if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
763 			err = -EOPNOTSUPP;
764 			goto out_err;
765 		}
766 
767 		mutex_unlock(&esw->offloads.encap_tbl_lock);
768 		wait_for_completion(&e->res_ready);
769 
770 		/* Protect against concurrent neigh update. */
771 		mutex_lock(&esw->offloads.encap_tbl_lock);
772 		if (e->compl_result < 0) {
773 			err = -EREMOTEIO;
774 			goto out_err;
775 		}
776 		goto attach_flow;
777 	}
778 
779 	e = kzalloc(sizeof(*e), GFP_KERNEL);
780 	if (!e) {
781 		err = -ENOMEM;
782 		goto out_err;
783 	}
784 
785 	refcount_set(&e->refcnt, 1);
786 	init_completion(&e->res_ready);
787 	entry_created = true;
788 	INIT_LIST_HEAD(&e->route_list);
789 
790 	tun_info = mlx5e_dup_tun_info(tun_info);
791 	if (!tun_info) {
792 		err = -ENOMEM;
793 		goto out_err_init;
794 	}
795 	e->tun_info = tun_info;
796 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
797 	if (err)
798 		goto out_err_init;
799 
800 	INIT_LIST_HEAD(&e->flows);
801 	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
802 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
803 	mutex_unlock(&esw->offloads.encap_tbl_lock);
804 
805 	if (family == AF_INET)
806 		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
807 	else if (family == AF_INET6)
808 		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
809 
810 	/* Protect against concurrent neigh update. */
811 	mutex_lock(&esw->offloads.encap_tbl_lock);
812 	complete_all(&e->res_ready);
813 	if (err) {
814 		e->compl_result = err;
815 		goto out_err;
816 	}
817 	e->compl_result = 1;
818 
819 attach_flow:
820 	err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
821 				       out_index);
822 	if (err)
823 		goto out_err;
824 
825 	flow->encaps[out_index].e = e;
826 	list_add(&flow->encaps[out_index].list, &e->flows);
827 	flow->encaps[out_index].index = out_index;
828 	*encap_dev = e->out_dev;
829 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
830 		attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
831 		attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
832 		*encap_valid = true;
833 	} else {
834 		*encap_valid = false;
835 	}
836 	mutex_unlock(&esw->offloads.encap_tbl_lock);
837 
838 	return err;
839 
840 out_err:
841 	mutex_unlock(&esw->offloads.encap_tbl_lock);
842 	if (e)
843 		mlx5e_encap_put(priv, e);
844 	return err;
845 
846 out_err_init:
847 	mutex_unlock(&esw->offloads.encap_tbl_lock);
848 	kfree(tun_info);
849 	kfree(e);
850 	return err;
851 }
852 
mlx5e_attach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)853 int mlx5e_attach_decap(struct mlx5e_priv *priv,
854 		       struct mlx5e_tc_flow *flow,
855 		       struct netlink_ext_ack *extack)
856 {
857 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
858 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
859 	struct mlx5_pkt_reformat_params reformat_params;
860 	struct mlx5e_tc_flow_parse_attr *parse_attr;
861 	struct mlx5e_decap_entry *d;
862 	struct mlx5e_decap_key key;
863 	uintptr_t hash_key;
864 	int err = 0;
865 
866 	parse_attr = flow->attr->parse_attr;
867 	if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
868 		NL_SET_ERR_MSG_MOD(extack,
869 				   "encap header larger than max supported");
870 		return -EOPNOTSUPP;
871 	}
872 
873 	key.key = parse_attr->eth;
874 	hash_key = hash_decap_info(&key);
875 	mutex_lock(&esw->offloads.decap_tbl_lock);
876 	d = mlx5e_decap_get(priv, &key, hash_key);
877 	if (d) {
878 		mutex_unlock(&esw->offloads.decap_tbl_lock);
879 		wait_for_completion(&d->res_ready);
880 		mutex_lock(&esw->offloads.decap_tbl_lock);
881 		if (d->compl_result) {
882 			err = -EREMOTEIO;
883 			goto out_free;
884 		}
885 		goto found;
886 	}
887 
888 	d = kzalloc(sizeof(*d), GFP_KERNEL);
889 	if (!d) {
890 		err = -ENOMEM;
891 		goto out_err;
892 	}
893 
894 	d->key = key;
895 	refcount_set(&d->refcnt, 1);
896 	init_completion(&d->res_ready);
897 	INIT_LIST_HEAD(&d->flows);
898 	hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
899 	mutex_unlock(&esw->offloads.decap_tbl_lock);
900 
901 	memset(&reformat_params, 0, sizeof(reformat_params));
902 	reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
903 	reformat_params.size = sizeof(parse_attr->eth);
904 	reformat_params.data = &parse_attr->eth;
905 	d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
906 						     &reformat_params,
907 						     MLX5_FLOW_NAMESPACE_FDB);
908 	if (IS_ERR(d->pkt_reformat)) {
909 		err = PTR_ERR(d->pkt_reformat);
910 		d->compl_result = err;
911 	}
912 	mutex_lock(&esw->offloads.decap_tbl_lock);
913 	complete_all(&d->res_ready);
914 	if (err)
915 		goto out_free;
916 
917 found:
918 	flow->decap_reformat = d;
919 	attr->decap_pkt_reformat = d->pkt_reformat;
920 	list_add(&flow->l3_to_l2_reformat, &d->flows);
921 	mutex_unlock(&esw->offloads.decap_tbl_lock);
922 	return 0;
923 
924 out_free:
925 	mutex_unlock(&esw->offloads.decap_tbl_lock);
926 	mlx5e_decap_put(priv, d);
927 	return err;
928 
929 out_err:
930 	mutex_unlock(&esw->offloads.decap_tbl_lock);
931 	return err;
932 }
933 
cmp_route_info(struct mlx5e_route_key * a,struct mlx5e_route_key * b)934 static int cmp_route_info(struct mlx5e_route_key *a,
935 			  struct mlx5e_route_key *b)
936 {
937 	if (a->ip_version == 4 && b->ip_version == 4)
938 		return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
939 			      sizeof(a->endpoint_ip.v4));
940 	else if (a->ip_version == 6 && b->ip_version == 6)
941 		return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
942 			      sizeof(a->endpoint_ip.v6));
943 	return 1;
944 }
945 
hash_route_info(struct mlx5e_route_key * key)946 static u32 hash_route_info(struct mlx5e_route_key *key)
947 {
948 	if (key->ip_version == 4)
949 		return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
950 	return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
951 }
952 
mlx5e_route_dealloc(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)953 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
954 				struct mlx5e_route_entry *r)
955 {
956 	WARN_ON(!list_empty(&r->decap_flows));
957 	WARN_ON(!list_empty(&r->encap_entries));
958 
959 	kfree_rcu(r, rcu);
960 }
961 
mlx5e_route_put(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)962 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
963 {
964 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
965 
966 	if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
967 		return;
968 
969 	hash_del_rcu(&r->hlist);
970 	mutex_unlock(&esw->offloads.encap_tbl_lock);
971 
972 	mlx5e_route_dealloc(priv, r);
973 }
974 
mlx5e_route_put_locked(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)975 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
976 {
977 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
978 
979 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
980 
981 	if (!refcount_dec_and_test(&r->refcnt))
982 		return;
983 	hash_del_rcu(&r->hlist);
984 	mlx5e_route_dealloc(priv, r);
985 }
986 
987 static struct mlx5e_route_entry *
mlx5e_route_get(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key,u32 hash_key)988 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
989 		u32 hash_key)
990 {
991 	struct mlx5e_route_key r_key;
992 	struct mlx5e_route_entry *r;
993 
994 	hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
995 		r_key = r->key;
996 		if (!cmp_route_info(&r_key, key) &&
997 		    refcount_inc_not_zero(&r->refcnt))
998 			return r;
999 	}
1000 	return NULL;
1001 }
1002 
1003 static struct mlx5e_route_entry *
mlx5e_route_get_create(struct mlx5e_priv * priv,struct mlx5e_route_key * key,int tunnel_dev_index,unsigned long * route_tbl_change_time)1004 mlx5e_route_get_create(struct mlx5e_priv *priv,
1005 		       struct mlx5e_route_key *key,
1006 		       int tunnel_dev_index,
1007 		       unsigned long *route_tbl_change_time)
1008 {
1009 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1010 	struct mlx5_rep_uplink_priv *uplink_priv;
1011 	struct mlx5e_rep_priv *uplink_rpriv;
1012 	struct mlx5e_tc_tun_encap *encap;
1013 	struct mlx5e_route_entry *r;
1014 	u32 hash_key;
1015 
1016 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1017 	uplink_priv = &uplink_rpriv->uplink_priv;
1018 	encap = uplink_priv->encap;
1019 
1020 	hash_key = hash_route_info(key);
1021 	spin_lock_bh(&encap->route_lock);
1022 	r = mlx5e_route_get(encap, key, hash_key);
1023 	spin_unlock_bh(&encap->route_lock);
1024 	if (r) {
1025 		if (!mlx5e_route_entry_valid(r)) {
1026 			mlx5e_route_put_locked(priv, r);
1027 			return ERR_PTR(-EINVAL);
1028 		}
1029 		return r;
1030 	}
1031 
1032 	r = kzalloc(sizeof(*r), GFP_KERNEL);
1033 	if (!r)
1034 		return ERR_PTR(-ENOMEM);
1035 
1036 	r->key = *key;
1037 	r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1038 	r->tunnel_dev_index = tunnel_dev_index;
1039 	refcount_set(&r->refcnt, 1);
1040 	INIT_LIST_HEAD(&r->decap_flows);
1041 	INIT_LIST_HEAD(&r->encap_entries);
1042 
1043 	spin_lock_bh(&encap->route_lock);
1044 	*route_tbl_change_time = encap->route_tbl_last_update;
1045 	hash_add(encap->route_tbl, &r->hlist, hash_key);
1046 	spin_unlock_bh(&encap->route_lock);
1047 
1048 	return r;
1049 }
1050 
1051 static struct mlx5e_route_entry *
mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key)1052 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1053 {
1054 	u32 hash_key = hash_route_info(key);
1055 	struct mlx5e_route_entry *r;
1056 
1057 	spin_lock_bh(&encap->route_lock);
1058 	encap->route_tbl_last_update = jiffies;
1059 	r = mlx5e_route_get(encap, key, hash_key);
1060 	spin_unlock_bh(&encap->route_lock);
1061 
1062 	return r;
1063 }
1064 
1065 struct mlx5e_tc_fib_event_data {
1066 	struct work_struct work;
1067 	unsigned long event;
1068 	struct mlx5e_route_entry *r;
1069 	struct net_device *ul_dev;
1070 };
1071 
1072 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1073 static struct mlx5e_tc_fib_event_data *
mlx5e_tc_init_fib_work(unsigned long event,struct net_device * ul_dev,gfp_t flags)1074 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1075 {
1076 	struct mlx5e_tc_fib_event_data *fib_work;
1077 
1078 	fib_work = kzalloc(sizeof(*fib_work), flags);
1079 	if (WARN_ON(!fib_work))
1080 		return NULL;
1081 
1082 	INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1083 	fib_work->event = event;
1084 	fib_work->ul_dev = ul_dev;
1085 
1086 	return fib_work;
1087 }
1088 
1089 static int
mlx5e_route_enqueue_update(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,unsigned long event)1090 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1091 			   struct mlx5e_route_entry *r,
1092 			   unsigned long event)
1093 {
1094 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1095 	struct mlx5e_tc_fib_event_data *fib_work;
1096 	struct mlx5e_rep_priv *uplink_rpriv;
1097 	struct net_device *ul_dev;
1098 
1099 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1100 	ul_dev = uplink_rpriv->netdev;
1101 
1102 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1103 	if (!fib_work)
1104 		return -ENOMEM;
1105 
1106 	dev_hold(ul_dev);
1107 	refcount_inc(&r->refcnt);
1108 	fib_work->r = r;
1109 	queue_work(priv->wq, &fib_work->work);
1110 
1111 	return 0;
1112 }
1113 
mlx5e_attach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1114 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1115 			     struct mlx5e_tc_flow *flow)
1116 {
1117 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1118 	unsigned long tbl_time_before, tbl_time_after;
1119 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1120 	struct mlx5_flow_attr *attr = flow->attr;
1121 	struct mlx5_esw_flow_attr *esw_attr;
1122 	struct mlx5e_route_entry *r;
1123 	struct mlx5e_route_key key;
1124 	int err = 0;
1125 
1126 	esw_attr = attr->esw_attr;
1127 	parse_attr = attr->parse_attr;
1128 	mutex_lock(&esw->offloads.encap_tbl_lock);
1129 	if (!esw_attr->rx_tun_attr)
1130 		goto out;
1131 
1132 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1133 	tbl_time_after = tbl_time_before;
1134 	err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
1135 	if (err || !esw_attr->rx_tun_attr->decap_vport)
1136 		goto out;
1137 
1138 	key.ip_version = attr->tun_ip_version;
1139 	if (key.ip_version == 4)
1140 		key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1141 	else
1142 		key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1143 
1144 	r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1145 				   &tbl_time_after);
1146 	if (IS_ERR(r)) {
1147 		err = PTR_ERR(r);
1148 		goto out;
1149 	}
1150 	/* Routing changed concurrently. FIB event handler might have missed new
1151 	 * entry, schedule update.
1152 	 */
1153 	if (tbl_time_before != tbl_time_after) {
1154 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1155 		if (err) {
1156 			mlx5e_route_put_locked(priv, r);
1157 			goto out;
1158 		}
1159 	}
1160 
1161 	flow->decap_route = r;
1162 	list_add(&flow->decap_routes, &r->decap_flows);
1163 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1164 	return 0;
1165 
1166 out:
1167 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1168 	return err;
1169 }
1170 
mlx5e_attach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5e_encap_entry * e,bool new_encap_entry,unsigned long tbl_time_before,int out_index)1171 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1172 				    struct mlx5e_tc_flow *flow,
1173 				    struct mlx5e_encap_entry *e,
1174 				    bool new_encap_entry,
1175 				    unsigned long tbl_time_before,
1176 				    int out_index)
1177 {
1178 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1179 	unsigned long tbl_time_after = tbl_time_before;
1180 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1181 	struct mlx5_flow_attr *attr = flow->attr;
1182 	const struct ip_tunnel_info *tun_info;
1183 	struct mlx5_esw_flow_attr *esw_attr;
1184 	struct mlx5e_route_entry *r;
1185 	struct mlx5e_route_key key;
1186 	unsigned short family;
1187 	int err = 0;
1188 
1189 	esw_attr = attr->esw_attr;
1190 	parse_attr = attr->parse_attr;
1191 	tun_info = parse_attr->tun_info[out_index];
1192 	family = ip_tunnel_info_af(tun_info);
1193 
1194 	if (family == AF_INET) {
1195 		key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1196 		key.ip_version = 4;
1197 	} else if (family == AF_INET6) {
1198 		key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1199 		key.ip_version = 6;
1200 	}
1201 
1202 	err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1203 				  e->route_dev_ifindex, out_index);
1204 	if (err || !(esw_attr->dests[out_index].flags &
1205 		     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1206 		return err;
1207 
1208 	r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1209 				   &tbl_time_after);
1210 	if (IS_ERR(r))
1211 		return PTR_ERR(r);
1212 	/* Routing changed concurrently. FIB event handler might have missed new
1213 	 * entry, schedule update.
1214 	 */
1215 	if (tbl_time_before != tbl_time_after) {
1216 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1217 		if (err) {
1218 			mlx5e_route_put_locked(priv, r);
1219 			return err;
1220 		}
1221 	}
1222 
1223 	flow->encap_routes[out_index].r = r;
1224 	if (new_encap_entry)
1225 		list_add(&e->route_list, &r->encap_entries);
1226 	flow->encap_routes[out_index].index = out_index;
1227 	return 0;
1228 }
1229 
mlx5e_detach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1230 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1231 			      struct mlx5e_tc_flow *flow)
1232 {
1233 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1234 	struct mlx5e_route_entry *r = flow->decap_route;
1235 
1236 	if (!r)
1237 		return;
1238 
1239 	mutex_lock(&esw->offloads.encap_tbl_lock);
1240 	list_del(&flow->decap_routes);
1241 	flow->decap_route = NULL;
1242 
1243 	if (!refcount_dec_and_test(&r->refcnt)) {
1244 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1245 		return;
1246 	}
1247 	hash_del_rcu(&r->hlist);
1248 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1249 
1250 	mlx5e_route_dealloc(priv, r);
1251 }
1252 
mlx5e_detach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index)1253 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1254 				     struct mlx5e_tc_flow *flow,
1255 				     int out_index)
1256 {
1257 	struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1258 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1259 	struct mlx5e_encap_entry *e, *tmp;
1260 
1261 	if (!r)
1262 		return;
1263 
1264 	mutex_lock(&esw->offloads.encap_tbl_lock);
1265 	flow->encap_routes[out_index].r = NULL;
1266 
1267 	if (!refcount_dec_and_test(&r->refcnt)) {
1268 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1269 		return;
1270 	}
1271 	list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1272 		list_del_init(&e->route_list);
1273 	hash_del_rcu(&r->hlist);
1274 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1275 
1276 	mlx5e_route_dealloc(priv, r);
1277 }
1278 
mlx5e_invalidate_encap(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1279 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1280 				   struct mlx5e_encap_entry *e,
1281 				   struct list_head *encap_flows)
1282 {
1283 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1284 	struct mlx5e_tc_flow *flow;
1285 
1286 	list_for_each_entry(flow, encap_flows, tmp_list) {
1287 		struct mlx5_flow_attr *attr = flow->attr;
1288 		struct mlx5_esw_flow_attr *esw_attr;
1289 
1290 		if (!mlx5e_is_offloaded_flow(flow))
1291 			continue;
1292 		esw_attr = attr->esw_attr;
1293 
1294 		if (flow_flag_test(flow, SLOW))
1295 			mlx5e_tc_unoffload_from_slow_path(esw, flow);
1296 		else
1297 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1298 		mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1299 		attr->modify_hdr = NULL;
1300 
1301 		esw_attr->dests[flow->tmp_entry_index].flags &=
1302 			~MLX5_ESW_DEST_ENCAP_VALID;
1303 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1304 	}
1305 
1306 	e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1307 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1308 		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1309 		mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1310 		e->pkt_reformat = NULL;
1311 	}
1312 }
1313 
mlx5e_reoffload_encap(struct mlx5e_priv * priv,struct net_device * tunnel_dev,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1314 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1315 				  struct net_device *tunnel_dev,
1316 				  struct mlx5e_encap_entry *e,
1317 				  struct list_head *encap_flows)
1318 {
1319 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1320 	struct mlx5e_tc_flow *flow;
1321 	int err;
1322 
1323 	err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1324 		mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1325 		mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1326 	if (err)
1327 		mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1328 	e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1329 
1330 	list_for_each_entry(flow, encap_flows, tmp_list) {
1331 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1332 		struct mlx5_flow_attr *attr = flow->attr;
1333 		struct mlx5_esw_flow_attr *esw_attr;
1334 		struct mlx5_flow_handle *rule;
1335 		struct mlx5_flow_spec *spec;
1336 
1337 		if (flow_flag_test(flow, FAILED))
1338 			continue;
1339 
1340 		esw_attr = attr->esw_attr;
1341 		parse_attr = attr->parse_attr;
1342 		spec = &parse_attr->spec;
1343 
1344 		err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1345 					     e->out_dev, e->route_dev_ifindex,
1346 					     flow->tmp_entry_index);
1347 		if (err) {
1348 			mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1349 			continue;
1350 		}
1351 
1352 		err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
1353 		if (err) {
1354 			mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1355 				       err);
1356 			continue;
1357 		}
1358 
1359 		if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1360 			esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1361 			esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1362 			if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1363 				goto offload_to_slow_path;
1364 			/* update from slow path rule to encap rule */
1365 			rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1366 			if (IS_ERR(rule)) {
1367 				err = PTR_ERR(rule);
1368 				mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1369 					       err);
1370 			} else {
1371 				flow->rule[0] = rule;
1372 			}
1373 		} else {
1374 offload_to_slow_path:
1375 			rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1376 			/* mark the flow's encap dest as non-valid */
1377 			esw_attr->dests[flow->tmp_entry_index].flags &=
1378 				~MLX5_ESW_DEST_ENCAP_VALID;
1379 
1380 			if (IS_ERR(rule)) {
1381 				err = PTR_ERR(rule);
1382 				mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1383 					       err);
1384 			} else {
1385 				flow->rule[0] = rule;
1386 			}
1387 		}
1388 		flow_flag_set(flow, OFFLOADED);
1389 	}
1390 }
1391 
mlx5e_update_route_encaps(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1392 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1393 				     struct mlx5e_route_entry *r,
1394 				     struct list_head *flow_list,
1395 				     bool replace)
1396 {
1397 	struct net_device *tunnel_dev;
1398 	struct mlx5e_encap_entry *e;
1399 
1400 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1401 	if (!tunnel_dev)
1402 		return -ENODEV;
1403 
1404 	list_for_each_entry(e, &r->encap_entries, route_list) {
1405 		LIST_HEAD(encap_flows);
1406 
1407 		mlx5e_take_all_encap_flows(e, &encap_flows);
1408 		if (list_empty(&encap_flows))
1409 			continue;
1410 
1411 		if (mlx5e_route_entry_valid(r))
1412 			mlx5e_invalidate_encap(priv, e, &encap_flows);
1413 
1414 		if (!replace) {
1415 			list_splice(&encap_flows, flow_list);
1416 			continue;
1417 		}
1418 
1419 		mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1420 		list_splice(&encap_flows, flow_list);
1421 	}
1422 
1423 	return 0;
1424 }
1425 
mlx5e_unoffload_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1426 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1427 				      struct list_head *flow_list)
1428 {
1429 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1430 	struct mlx5e_tc_flow *flow;
1431 
1432 	list_for_each_entry(flow, flow_list, tmp_list)
1433 		if (mlx5e_is_offloaded_flow(flow))
1434 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1435 }
1436 
mlx5e_reoffload_decap(struct mlx5e_priv * priv,struct list_head * decap_flows)1437 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1438 				  struct list_head *decap_flows)
1439 {
1440 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1441 	struct mlx5e_tc_flow *flow;
1442 
1443 	list_for_each_entry(flow, decap_flows, tmp_list) {
1444 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1445 		struct mlx5_flow_attr *attr = flow->attr;
1446 		struct mlx5_flow_handle *rule;
1447 		struct mlx5_flow_spec *spec;
1448 		int err;
1449 
1450 		if (flow_flag_test(flow, FAILED))
1451 			continue;
1452 
1453 		parse_attr = attr->parse_attr;
1454 		spec = &parse_attr->spec;
1455 		err = mlx5e_tc_tun_route_lookup(priv, spec, attr);
1456 		if (err) {
1457 			mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1458 				       err);
1459 			continue;
1460 		}
1461 
1462 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1463 		if (IS_ERR(rule)) {
1464 			err = PTR_ERR(rule);
1465 			mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1466 				       err);
1467 		} else {
1468 			flow->rule[0] = rule;
1469 			flow_flag_set(flow, OFFLOADED);
1470 		}
1471 	}
1472 }
1473 
mlx5e_update_route_decap_flows(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1474 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1475 					  struct mlx5e_route_entry *r,
1476 					  struct list_head *flow_list,
1477 					  bool replace)
1478 {
1479 	struct net_device *tunnel_dev;
1480 	LIST_HEAD(decap_flows);
1481 
1482 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1483 	if (!tunnel_dev)
1484 		return -ENODEV;
1485 
1486 	mlx5e_take_all_route_decap_flows(r, &decap_flows);
1487 	if (mlx5e_route_entry_valid(r))
1488 		mlx5e_unoffload_flow_list(priv, &decap_flows);
1489 	if (replace)
1490 		mlx5e_reoffload_decap(priv, &decap_flows);
1491 
1492 	list_splice(&decap_flows, flow_list);
1493 
1494 	return 0;
1495 }
1496 
mlx5e_tc_fib_event_work(struct work_struct * work)1497 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1498 {
1499 	struct mlx5e_tc_fib_event_data *event_data =
1500 		container_of(work, struct mlx5e_tc_fib_event_data, work);
1501 	struct net_device *ul_dev = event_data->ul_dev;
1502 	struct mlx5e_priv *priv = netdev_priv(ul_dev);
1503 	struct mlx5e_route_entry *r = event_data->r;
1504 	struct mlx5_eswitch *esw;
1505 	LIST_HEAD(flow_list);
1506 	bool replace;
1507 	int err;
1508 
1509 	/* sync with concurrent neigh updates */
1510 	rtnl_lock();
1511 	esw = priv->mdev->priv.eswitch;
1512 	mutex_lock(&esw->offloads.encap_tbl_lock);
1513 	replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1514 
1515 	if (!mlx5e_route_entry_valid(r) && !replace)
1516 		goto out;
1517 
1518 	err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1519 	if (err)
1520 		mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1521 			       err);
1522 
1523 	err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1524 	if (err)
1525 		mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1526 			       err);
1527 
1528 	if (replace)
1529 		r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1530 out:
1531 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1532 	rtnl_unlock();
1533 
1534 	mlx5e_put_flow_list(priv, &flow_list);
1535 	mlx5e_route_put(priv, event_data->r);
1536 	dev_put(event_data->ul_dev);
1537 	kfree(event_data);
1538 }
1539 
1540 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv4(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1541 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1542 			 struct net_device *ul_dev,
1543 			 struct mlx5e_tc_tun_encap *encap,
1544 			 unsigned long event,
1545 			 struct fib_notifier_info *info)
1546 {
1547 	struct fib_entry_notifier_info *fen_info;
1548 	struct mlx5e_tc_fib_event_data *fib_work;
1549 	struct mlx5e_route_entry *r;
1550 	struct mlx5e_route_key key;
1551 	struct net_device *fib_dev;
1552 
1553 	fen_info = container_of(info, struct fib_entry_notifier_info, info);
1554 	if (fen_info->fi->nh)
1555 		return NULL;
1556 	fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1557 	if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1558 	    fen_info->dst_len != 32)
1559 		return NULL;
1560 
1561 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1562 	if (!fib_work)
1563 		return ERR_PTR(-ENOMEM);
1564 
1565 	key.endpoint_ip.v4 = htonl(fen_info->dst);
1566 	key.ip_version = 4;
1567 
1568 	/* Can't fail after this point because releasing reference to r
1569 	 * requires obtaining sleeping mutex which we can't do in atomic
1570 	 * context.
1571 	 */
1572 	r = mlx5e_route_lookup_for_update(encap, &key);
1573 	if (!r)
1574 		goto out;
1575 	fib_work->r = r;
1576 	dev_hold(ul_dev);
1577 
1578 	return fib_work;
1579 
1580 out:
1581 	kfree(fib_work);
1582 	return NULL;
1583 }
1584 
1585 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv6(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1586 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1587 			 struct net_device *ul_dev,
1588 			 struct mlx5e_tc_tun_encap *encap,
1589 			 unsigned long event,
1590 			 struct fib_notifier_info *info)
1591 {
1592 	struct fib6_entry_notifier_info *fen_info;
1593 	struct mlx5e_tc_fib_event_data *fib_work;
1594 	struct mlx5e_route_entry *r;
1595 	struct mlx5e_route_key key;
1596 	struct net_device *fib_dev;
1597 
1598 	fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1599 	fib_dev = fib6_info_nh_dev(fen_info->rt);
1600 	if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1601 	    fen_info->rt->fib6_dst.plen != 128)
1602 		return NULL;
1603 
1604 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1605 	if (!fib_work)
1606 		return ERR_PTR(-ENOMEM);
1607 
1608 	memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1609 	       sizeof(fen_info->rt->fib6_dst.addr));
1610 	key.ip_version = 6;
1611 
1612 	/* Can't fail after this point because releasing reference to r
1613 	 * requires obtaining sleeping mutex which we can't do in atomic
1614 	 * context.
1615 	 */
1616 	r = mlx5e_route_lookup_for_update(encap, &key);
1617 	if (!r)
1618 		goto out;
1619 	fib_work->r = r;
1620 	dev_hold(ul_dev);
1621 
1622 	return fib_work;
1623 
1624 out:
1625 	kfree(fib_work);
1626 	return NULL;
1627 }
1628 
mlx5e_tc_tun_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)1629 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1630 {
1631 	struct mlx5e_tc_fib_event_data *fib_work;
1632 	struct fib_notifier_info *info = ptr;
1633 	struct mlx5e_tc_tun_encap *encap;
1634 	struct net_device *ul_dev;
1635 	struct mlx5e_priv *priv;
1636 
1637 	encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1638 	priv = encap->priv;
1639 	ul_dev = priv->netdev;
1640 	priv = netdev_priv(ul_dev);
1641 
1642 	switch (event) {
1643 	case FIB_EVENT_ENTRY_REPLACE:
1644 	case FIB_EVENT_ENTRY_DEL:
1645 		if (info->family == AF_INET)
1646 			fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1647 		else if (info->family == AF_INET6)
1648 			fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1649 		else
1650 			return NOTIFY_DONE;
1651 
1652 		if (!IS_ERR_OR_NULL(fib_work)) {
1653 			queue_work(priv->wq, &fib_work->work);
1654 		} else if (IS_ERR(fib_work)) {
1655 			NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1656 			mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1657 				       PTR_ERR(fib_work));
1658 		}
1659 
1660 		break;
1661 	default:
1662 		return NOTIFY_DONE;
1663 	}
1664 
1665 	return NOTIFY_DONE;
1666 }
1667 
mlx5e_tc_tun_init(struct mlx5e_priv * priv)1668 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1669 {
1670 	struct mlx5e_tc_tun_encap *encap;
1671 	int err;
1672 
1673 	encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1674 	if (!encap)
1675 		return ERR_PTR(-ENOMEM);
1676 
1677 	encap->priv = priv;
1678 	encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1679 	spin_lock_init(&encap->route_lock);
1680 	hash_init(encap->route_tbl);
1681 	err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1682 				    NULL, NULL);
1683 	if (err) {
1684 		kvfree(encap);
1685 		return ERR_PTR(err);
1686 	}
1687 
1688 	return encap;
1689 }
1690 
mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap * encap)1691 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1692 {
1693 	if (!encap)
1694 		return;
1695 
1696 	unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1697 	flush_workqueue(encap->priv->wq); /* flush fib event works */
1698 	kvfree(encap);
1699 }
1700