• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/netfilter.h>
6 #include <linux/rhashtable.h>
7 #include <linux/ip.h>
8 #include <linux/ipv6.h>
9 #include <linux/netdevice.h>
10 #include <net/ip.h>
11 #include <net/ipv6.h>
12 #include <net/ip6_route.h>
13 #include <net/neighbour.h>
14 #include <net/netfilter/nf_flow_table.h>
15 /* For layer 4 checksum field offset. */
16 #include <linux/tcp.h>
17 #include <linux/udp.h>
18 
nf_flow_state_check(struct flow_offload * flow,int proto,struct sk_buff * skb,unsigned int thoff)19 static int nf_flow_state_check(struct flow_offload *flow, int proto,
20 			       struct sk_buff *skb, unsigned int thoff)
21 {
22 	struct tcphdr *tcph;
23 
24 	if (proto != IPPROTO_TCP)
25 		return 0;
26 
27 	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
28 		return -1;
29 
30 	tcph = (void *)(skb_network_header(skb) + thoff);
31 	if (unlikely(tcph->fin || tcph->rst)) {
32 		flow_offload_teardown(flow);
33 		return -1;
34 	}
35 
36 	return 0;
37 }
38 
nf_flow_nat_ip_tcp(struct sk_buff * skb,unsigned int thoff,__be32 addr,__be32 new_addr)39 static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
40 			      __be32 addr, __be32 new_addr)
41 {
42 	struct tcphdr *tcph;
43 
44 	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
45 	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
46 		return -1;
47 
48 	tcph = (void *)(skb_network_header(skb) + thoff);
49 	inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
50 
51 	return 0;
52 }
53 
nf_flow_nat_ip_udp(struct sk_buff * skb,unsigned int thoff,__be32 addr,__be32 new_addr)54 static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
55 			      __be32 addr, __be32 new_addr)
56 {
57 	struct udphdr *udph;
58 
59 	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
60 	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
61 		return -1;
62 
63 	udph = (void *)(skb_network_header(skb) + thoff);
64 	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
65 		inet_proto_csum_replace4(&udph->check, skb, addr,
66 					 new_addr, true);
67 		if (!udph->check)
68 			udph->check = CSUM_MANGLED_0;
69 	}
70 
71 	return 0;
72 }
73 
nf_flow_nat_ip_l4proto(struct sk_buff * skb,struct iphdr * iph,unsigned int thoff,__be32 addr,__be32 new_addr)74 static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
75 				  unsigned int thoff, __be32 addr,
76 				  __be32 new_addr)
77 {
78 	switch (iph->protocol) {
79 	case IPPROTO_TCP:
80 		if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
81 			return NF_DROP;
82 		break;
83 	case IPPROTO_UDP:
84 		if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
85 			return NF_DROP;
86 		break;
87 	}
88 
89 	return 0;
90 }
91 
nf_flow_snat_ip(const struct flow_offload * flow,struct sk_buff * skb,struct iphdr * iph,unsigned int thoff,enum flow_offload_tuple_dir dir)92 static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
93 			   struct iphdr *iph, unsigned int thoff,
94 			   enum flow_offload_tuple_dir dir)
95 {
96 	__be32 addr, new_addr;
97 
98 	switch (dir) {
99 	case FLOW_OFFLOAD_DIR_ORIGINAL:
100 		addr = iph->saddr;
101 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
102 		iph->saddr = new_addr;
103 		break;
104 	case FLOW_OFFLOAD_DIR_REPLY:
105 		addr = iph->daddr;
106 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
107 		iph->daddr = new_addr;
108 		break;
109 	default:
110 		return -1;
111 	}
112 	csum_replace4(&iph->check, addr, new_addr);
113 
114 	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
115 }
116 
nf_flow_dnat_ip(const struct flow_offload * flow,struct sk_buff * skb,struct iphdr * iph,unsigned int thoff,enum flow_offload_tuple_dir dir)117 static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
118 			   struct iphdr *iph, unsigned int thoff,
119 			   enum flow_offload_tuple_dir dir)
120 {
121 	__be32 addr, new_addr;
122 
123 	switch (dir) {
124 	case FLOW_OFFLOAD_DIR_ORIGINAL:
125 		addr = iph->daddr;
126 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
127 		iph->daddr = new_addr;
128 		break;
129 	case FLOW_OFFLOAD_DIR_REPLY:
130 		addr = iph->saddr;
131 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
132 		iph->saddr = new_addr;
133 		break;
134 	default:
135 		return -1;
136 	}
137 	csum_replace4(&iph->check, addr, new_addr);
138 
139 	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
140 }
141 
nf_flow_nat_ip(const struct flow_offload * flow,struct sk_buff * skb,unsigned int thoff,enum flow_offload_tuple_dir dir)142 static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
143 			  unsigned int thoff, enum flow_offload_tuple_dir dir)
144 {
145 	struct iphdr *iph = ip_hdr(skb);
146 
147 	if (flow->flags & FLOW_OFFLOAD_SNAT &&
148 	    (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
149 	     nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
150 		return -1;
151 	if (flow->flags & FLOW_OFFLOAD_DNAT &&
152 	    (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
153 	     nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
154 		return -1;
155 
156 	return 0;
157 }
158 
ip_has_options(unsigned int thoff)159 static bool ip_has_options(unsigned int thoff)
160 {
161 	return thoff != sizeof(struct iphdr);
162 }
163 
nf_flow_tuple_ip(struct sk_buff * skb,const struct net_device * dev,struct flow_offload_tuple * tuple)164 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
165 			    struct flow_offload_tuple *tuple)
166 {
167 	struct flow_ports *ports;
168 	unsigned int thoff;
169 	struct iphdr *iph;
170 
171 	if (!pskb_may_pull(skb, sizeof(*iph)))
172 		return -1;
173 
174 	iph = ip_hdr(skb);
175 	thoff = iph->ihl * 4;
176 
177 	if (ip_is_fragment(iph) ||
178 	    unlikely(ip_has_options(thoff)))
179 		return -1;
180 
181 	if (iph->protocol != IPPROTO_TCP &&
182 	    iph->protocol != IPPROTO_UDP)
183 		return -1;
184 
185 	if (iph->ttl <= 1)
186 		return -1;
187 
188 	thoff = iph->ihl * 4;
189 	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
190 		return -1;
191 
192 	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
193 
194 	tuple->src_v4.s_addr	= iph->saddr;
195 	tuple->dst_v4.s_addr	= iph->daddr;
196 	tuple->src_port		= ports->source;
197 	tuple->dst_port		= ports->dest;
198 	tuple->l3proto		= AF_INET;
199 	tuple->l4proto		= iph->protocol;
200 	tuple->iifidx		= dev->ifindex;
201 
202 	return 0;
203 }
204 
205 /* Based on ip_exceeds_mtu(). */
nf_flow_exceeds_mtu(const struct sk_buff * skb,unsigned int mtu)206 static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
207 {
208 	if (skb->len <= mtu)
209 		return false;
210 
211 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
212 		return false;
213 
214 	return true;
215 }
216 
nf_flow_offload_dst_check(struct dst_entry * dst)217 static int nf_flow_offload_dst_check(struct dst_entry *dst)
218 {
219 	if (unlikely(dst_xfrm(dst)))
220 		return dst_check(dst, 0) ? 0 : -1;
221 
222 	return 0;
223 }
224 
nf_flow_xmit_xfrm(struct sk_buff * skb,const struct nf_hook_state * state,struct dst_entry * dst)225 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
226 				      const struct nf_hook_state *state,
227 				      struct dst_entry *dst)
228 {
229 	skb_orphan(skb);
230 	skb_dst_set_noref(skb, dst);
231 	dst_output(state->net, state->sk, skb);
232 	return NF_STOLEN;
233 }
234 
235 unsigned int
nf_flow_offload_ip_hook(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)236 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
237 			const struct nf_hook_state *state)
238 {
239 	struct flow_offload_tuple_rhash *tuplehash;
240 	struct nf_flowtable *flow_table = priv;
241 	struct flow_offload_tuple tuple = {};
242 	enum flow_offload_tuple_dir dir;
243 	struct flow_offload *flow;
244 	struct net_device *outdev;
245 	struct rtable *rt;
246 	unsigned int thoff;
247 	struct iphdr *iph;
248 	__be32 nexthop;
249 
250 	if (skb->protocol != htons(ETH_P_IP))
251 		return NF_ACCEPT;
252 
253 	if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
254 		return NF_ACCEPT;
255 
256 	tuplehash = flow_offload_lookup(flow_table, &tuple);
257 	if (tuplehash == NULL)
258 		return NF_ACCEPT;
259 
260 	dir = tuplehash->tuple.dir;
261 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
262 	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
263 	outdev = rt->dst.dev;
264 
265 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
266 		return NF_ACCEPT;
267 
268 	if (skb_try_make_writable(skb, sizeof(*iph)))
269 		return NF_DROP;
270 
271 	thoff = ip_hdr(skb)->ihl * 4;
272 	if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
273 		return NF_ACCEPT;
274 
275 	if (nf_flow_offload_dst_check(&rt->dst)) {
276 		flow_offload_teardown(flow);
277 		return NF_ACCEPT;
278 	}
279 
280 	if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
281 		return NF_DROP;
282 
283 	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
284 	iph = ip_hdr(skb);
285 	ip_decrease_ttl(iph);
286 	skb->tstamp = 0;
287 
288 	if (unlikely(dst_xfrm(&rt->dst))) {
289 		memset(skb->cb, 0, sizeof(struct inet_skb_parm));
290 		IPCB(skb)->iif = skb->dev->ifindex;
291 		IPCB(skb)->flags = IPSKB_FORWARDED;
292 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
293 	}
294 
295 	skb->dev = outdev;
296 	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
297 	skb_dst_set_noref(skb, &rt->dst);
298 	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
299 
300 	return NF_STOLEN;
301 }
302 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
303 
nf_flow_nat_ipv6_tcp(struct sk_buff * skb,unsigned int thoff,struct in6_addr * addr,struct in6_addr * new_addr)304 static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
305 				struct in6_addr *addr,
306 				struct in6_addr *new_addr)
307 {
308 	struct tcphdr *tcph;
309 
310 	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
311 	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
312 		return -1;
313 
314 	tcph = (void *)(skb_network_header(skb) + thoff);
315 	inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
316 				  new_addr->s6_addr32, true);
317 
318 	return 0;
319 }
320 
nf_flow_nat_ipv6_udp(struct sk_buff * skb,unsigned int thoff,struct in6_addr * addr,struct in6_addr * new_addr)321 static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
322 				struct in6_addr *addr,
323 				struct in6_addr *new_addr)
324 {
325 	struct udphdr *udph;
326 
327 	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
328 	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
329 		return -1;
330 
331 	udph = (void *)(skb_network_header(skb) + thoff);
332 	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
333 		inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
334 					  new_addr->s6_addr32, true);
335 		if (!udph->check)
336 			udph->check = CSUM_MANGLED_0;
337 	}
338 
339 	return 0;
340 }
341 
nf_flow_nat_ipv6_l4proto(struct sk_buff * skb,struct ipv6hdr * ip6h,unsigned int thoff,struct in6_addr * addr,struct in6_addr * new_addr)342 static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
343 				    unsigned int thoff, struct in6_addr *addr,
344 				    struct in6_addr *new_addr)
345 {
346 	switch (ip6h->nexthdr) {
347 	case IPPROTO_TCP:
348 		if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
349 			return NF_DROP;
350 		break;
351 	case IPPROTO_UDP:
352 		if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
353 			return NF_DROP;
354 		break;
355 	}
356 
357 	return 0;
358 }
359 
nf_flow_snat_ipv6(const struct flow_offload * flow,struct sk_buff * skb,struct ipv6hdr * ip6h,unsigned int thoff,enum flow_offload_tuple_dir dir)360 static int nf_flow_snat_ipv6(const struct flow_offload *flow,
361 			     struct sk_buff *skb, struct ipv6hdr *ip6h,
362 			     unsigned int thoff,
363 			     enum flow_offload_tuple_dir dir)
364 {
365 	struct in6_addr addr, new_addr;
366 
367 	switch (dir) {
368 	case FLOW_OFFLOAD_DIR_ORIGINAL:
369 		addr = ip6h->saddr;
370 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
371 		ip6h->saddr = new_addr;
372 		break;
373 	case FLOW_OFFLOAD_DIR_REPLY:
374 		addr = ip6h->daddr;
375 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
376 		ip6h->daddr = new_addr;
377 		break;
378 	default:
379 		return -1;
380 	}
381 
382 	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
383 }
384 
nf_flow_dnat_ipv6(const struct flow_offload * flow,struct sk_buff * skb,struct ipv6hdr * ip6h,unsigned int thoff,enum flow_offload_tuple_dir dir)385 static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
386 			     struct sk_buff *skb, struct ipv6hdr *ip6h,
387 			     unsigned int thoff,
388 			     enum flow_offload_tuple_dir dir)
389 {
390 	struct in6_addr addr, new_addr;
391 
392 	switch (dir) {
393 	case FLOW_OFFLOAD_DIR_ORIGINAL:
394 		addr = ip6h->daddr;
395 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
396 		ip6h->daddr = new_addr;
397 		break;
398 	case FLOW_OFFLOAD_DIR_REPLY:
399 		addr = ip6h->saddr;
400 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
401 		ip6h->saddr = new_addr;
402 		break;
403 	default:
404 		return -1;
405 	}
406 
407 	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
408 }
409 
nf_flow_nat_ipv6(const struct flow_offload * flow,struct sk_buff * skb,enum flow_offload_tuple_dir dir)410 static int nf_flow_nat_ipv6(const struct flow_offload *flow,
411 			    struct sk_buff *skb,
412 			    enum flow_offload_tuple_dir dir)
413 {
414 	struct ipv6hdr *ip6h = ipv6_hdr(skb);
415 	unsigned int thoff = sizeof(*ip6h);
416 
417 	if (flow->flags & FLOW_OFFLOAD_SNAT &&
418 	    (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
419 	     nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
420 		return -1;
421 	if (flow->flags & FLOW_OFFLOAD_DNAT &&
422 	    (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
423 	     nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
424 		return -1;
425 
426 	return 0;
427 }
428 
nf_flow_tuple_ipv6(struct sk_buff * skb,const struct net_device * dev,struct flow_offload_tuple * tuple)429 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
430 			      struct flow_offload_tuple *tuple)
431 {
432 	struct flow_ports *ports;
433 	struct ipv6hdr *ip6h;
434 	unsigned int thoff;
435 
436 	if (!pskb_may_pull(skb, sizeof(*ip6h)))
437 		return -1;
438 
439 	ip6h = ipv6_hdr(skb);
440 
441 	if (ip6h->nexthdr != IPPROTO_TCP &&
442 	    ip6h->nexthdr != IPPROTO_UDP)
443 		return -1;
444 
445 	if (ip6h->hop_limit <= 1)
446 		return -1;
447 
448 	thoff = sizeof(*ip6h);
449 	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
450 		return -1;
451 
452 	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
453 
454 	tuple->src_v6		= ip6h->saddr;
455 	tuple->dst_v6		= ip6h->daddr;
456 	tuple->src_port		= ports->source;
457 	tuple->dst_port		= ports->dest;
458 	tuple->l3proto		= AF_INET6;
459 	tuple->l4proto		= ip6h->nexthdr;
460 	tuple->iifidx		= dev->ifindex;
461 
462 	return 0;
463 }
464 
465 unsigned int
nf_flow_offload_ipv6_hook(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)466 nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
467 			  const struct nf_hook_state *state)
468 {
469 	struct flow_offload_tuple_rhash *tuplehash;
470 	struct nf_flowtable *flow_table = priv;
471 	struct flow_offload_tuple tuple = {};
472 	enum flow_offload_tuple_dir dir;
473 	const struct in6_addr *nexthop;
474 	struct flow_offload *flow;
475 	struct net_device *outdev;
476 	struct ipv6hdr *ip6h;
477 	struct rt6_info *rt;
478 
479 	if (skb->protocol != htons(ETH_P_IPV6))
480 		return NF_ACCEPT;
481 
482 	if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
483 		return NF_ACCEPT;
484 
485 	tuplehash = flow_offload_lookup(flow_table, &tuple);
486 	if (tuplehash == NULL)
487 		return NF_ACCEPT;
488 
489 	dir = tuplehash->tuple.dir;
490 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
491 	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
492 	outdev = rt->dst.dev;
493 
494 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
495 		return NF_ACCEPT;
496 
497 	if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
498 				sizeof(*ip6h)))
499 		return NF_ACCEPT;
500 
501 	if (nf_flow_offload_dst_check(&rt->dst)) {
502 		flow_offload_teardown(flow);
503 		return NF_ACCEPT;
504 	}
505 
506 	if (skb_try_make_writable(skb, sizeof(*ip6h)))
507 		return NF_DROP;
508 
509 	if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
510 		return NF_DROP;
511 
512 	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
513 	ip6h = ipv6_hdr(skb);
514 	ip6h->hop_limit--;
515 	skb->tstamp = 0;
516 
517 	if (unlikely(dst_xfrm(&rt->dst))) {
518 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
519 		IP6CB(skb)->iif = skb->dev->ifindex;
520 		IP6CB(skb)->flags = IP6SKB_FORWARDED;
521 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
522 	}
523 
524 	skb->dev = outdev;
525 	nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
526 	skb_dst_set_noref(skb, &rt->dst);
527 	neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
528 
529 	return NF_STOLEN;
530 }
531 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
532