• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * ip_vs_xmit.c: various packet transmitters for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  * Description of forwarding methods:
15  * - all transmitters are called from LOCAL_IN (remote clients) and
16  * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17  * - not all connections have destination server, for example,
18  * connections in backup server when fwmark is used
19  * - bypass connections use daddr from packet
20  * - we can use dst without ref while sending in RCU section, we use
21  * ref when returning NF_ACCEPT for NAT-ed packet via loopback
22  * LOCAL_OUT rules:
23  * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
24  * - skb->pkt_type is not set yet
25  * - the only place where we can see skb->sk != NULL
26  */
27 
28 #define KMSG_COMPONENT "IPVS"
29 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
30 
31 #include <linux/kernel.h>
32 #include <linux/slab.h>
33 #include <linux/tcp.h>                  /* for tcphdr */
34 #include <net/ip.h>
35 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
36 #include <net/udp.h>
37 #include <net/icmp.h>                   /* for icmp_send */
38 #include <net/route.h>                  /* for ip_route_output */
39 #include <net/ipv6.h>
40 #include <net/ip6_route.h>
41 #include <net/addrconf.h>
42 #include <linux/icmpv6.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv4.h>
45 
46 #include <net/ip_vs.h>
47 
48 enum {
49 	IP_VS_RT_MODE_LOCAL	= 1, /* Allow local dest */
50 	IP_VS_RT_MODE_NON_LOCAL	= 2, /* Allow non-local dest */
51 	IP_VS_RT_MODE_RDR	= 4, /* Allow redirect from remote daddr to
52 				      * local
53 				      */
54 	IP_VS_RT_MODE_CONNECT	= 8, /* Always bind route to saddr */
55 	IP_VS_RT_MODE_KNOWN_NH	= 16,/* Route via remote addr */
56 	IP_VS_RT_MODE_TUNNEL	= 32,/* Tunnel mode */
57 };
58 
ip_vs_dest_dst_alloc(void)59 static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void)
60 {
61 	return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC);
62 }
63 
ip_vs_dest_dst_free(struct ip_vs_dest_dst * dest_dst)64 static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst)
65 {
66 	kfree(dest_dst);
67 }
68 
69 /*
70  *      Destination cache to speed up outgoing route lookup
71  */
72 static inline void
__ip_vs_dst_set(struct ip_vs_dest * dest,struct ip_vs_dest_dst * dest_dst,struct dst_entry * dst,u32 dst_cookie)73 __ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst,
74 		struct dst_entry *dst, u32 dst_cookie)
75 {
76 	struct ip_vs_dest_dst *old;
77 
78 	old = rcu_dereference_protected(dest->dest_dst,
79 					lockdep_is_held(&dest->dst_lock));
80 
81 	if (dest_dst) {
82 		dest_dst->dst_cache = dst;
83 		dest_dst->dst_cookie = dst_cookie;
84 	}
85 	rcu_assign_pointer(dest->dest_dst, dest_dst);
86 
87 	if (old)
88 		call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
89 }
90 
91 static inline struct ip_vs_dest_dst *
__ip_vs_dst_check(struct ip_vs_dest * dest)92 __ip_vs_dst_check(struct ip_vs_dest *dest)
93 {
94 	struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst);
95 	struct dst_entry *dst;
96 
97 	if (!dest_dst)
98 		return NULL;
99 	dst = dest_dst->dst_cache;
100 	if (dst->obsolete &&
101 	    dst->ops->check(dst, dest_dst->dst_cookie) == NULL)
102 		return NULL;
103 	return dest_dst;
104 }
105 
106 static inline bool
__mtu_check_toobig_v6(const struct sk_buff * skb,u32 mtu)107 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
108 {
109 	if (IP6CB(skb)->frag_max_size) {
110 		/* frag_max_size tell us that, this packet have been
111 		 * defragmented by netfilter IPv6 conntrack module.
112 		 */
113 		if (IP6CB(skb)->frag_max_size > mtu)
114 			return true; /* largest fragment violate MTU */
115 	}
116 	else if (skb->len > mtu && !skb_is_gso(skb)) {
117 		return true; /* Packet size violate MTU size */
118 	}
119 	return false;
120 }
121 
122 /* Get route to daddr, update *saddr, optionally bind route to saddr */
do_output_route4(struct net * net,__be32 daddr,int rt_mode,__be32 * saddr)123 static struct rtable *do_output_route4(struct net *net, __be32 daddr,
124 				       int rt_mode, __be32 *saddr)
125 {
126 	struct flowi4 fl4;
127 	struct rtable *rt;
128 	int loop = 0;
129 
130 	memset(&fl4, 0, sizeof(fl4));
131 	fl4.daddr = daddr;
132 	fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
133 	fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
134 			   FLOWI_FLAG_KNOWN_NH : 0;
135 
136 retry:
137 	rt = ip_route_output_key(net, &fl4);
138 	if (IS_ERR(rt)) {
139 		/* Invalid saddr ? */
140 		if (PTR_ERR(rt) == -EINVAL && *saddr &&
141 		    rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
142 			*saddr = 0;
143 			flowi4_update_output(&fl4, 0, 0, daddr, 0);
144 			goto retry;
145 		}
146 		IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
147 		return NULL;
148 	} else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
149 		ip_rt_put(rt);
150 		*saddr = fl4.saddr;
151 		flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
152 		loop++;
153 		goto retry;
154 	}
155 	*saddr = fl4.saddr;
156 	return rt;
157 }
158 
159 /* Get route to destination or remote server */
160 static int
__ip_vs_get_out_rt(struct sk_buff * skb,struct ip_vs_dest * dest,__be32 daddr,int rt_mode,__be32 * ret_saddr)161 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
162 		   __be32 daddr, int rt_mode, __be32 *ret_saddr)
163 {
164 	struct net *net = dev_net(skb_dst(skb)->dev);
165 	struct netns_ipvs *ipvs = net_ipvs(net);
166 	struct ip_vs_dest_dst *dest_dst;
167 	struct rtable *rt;			/* Route to the other host */
168 	struct rtable *ort;			/* Original route */
169 	struct iphdr *iph;
170 	__be16 df;
171 	int mtu;
172 	int local, noref = 1;
173 
174 	if (dest) {
175 		dest_dst = __ip_vs_dst_check(dest);
176 		if (likely(dest_dst))
177 			rt = (struct rtable *) dest_dst->dst_cache;
178 		else {
179 			dest_dst = ip_vs_dest_dst_alloc();
180 			spin_lock_bh(&dest->dst_lock);
181 			if (!dest_dst) {
182 				__ip_vs_dst_set(dest, NULL, NULL, 0);
183 				spin_unlock_bh(&dest->dst_lock);
184 				goto err_unreach;
185 			}
186 			rt = do_output_route4(net, dest->addr.ip, rt_mode,
187 					      &dest_dst->dst_saddr.ip);
188 			if (!rt) {
189 				__ip_vs_dst_set(dest, NULL, NULL, 0);
190 				spin_unlock_bh(&dest->dst_lock);
191 				ip_vs_dest_dst_free(dest_dst);
192 				goto err_unreach;
193 			}
194 			__ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
195 			spin_unlock_bh(&dest->dst_lock);
196 			IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
197 				  &dest->addr.ip, &dest_dst->dst_saddr.ip,
198 				  atomic_read(&rt->dst.__refcnt));
199 		}
200 		daddr = dest->addr.ip;
201 		if (ret_saddr)
202 			*ret_saddr = dest_dst->dst_saddr.ip;
203 	} else {
204 		__be32 saddr = htonl(INADDR_ANY);
205 
206 		noref = 0;
207 
208 		/* For such unconfigured boxes avoid many route lookups
209 		 * for performance reasons because we do not remember saddr
210 		 */
211 		rt_mode &= ~IP_VS_RT_MODE_CONNECT;
212 		rt = do_output_route4(net, daddr, rt_mode, &saddr);
213 		if (!rt)
214 			goto err_unreach;
215 		if (ret_saddr)
216 			*ret_saddr = saddr;
217 	}
218 
219 	local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
220 	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
221 	      rt_mode)) {
222 		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
223 			     (rt->rt_flags & RTCF_LOCAL) ?
224 			     "local":"non-local", &daddr);
225 		goto err_put;
226 	}
227 	iph = ip_hdr(skb);
228 	if (likely(!local)) {
229 		if (unlikely(ipv4_is_loopback(iph->saddr))) {
230 			IP_VS_DBG_RL("Stopping traffic from loopback address "
231 				     "%pI4 to non-local address, dest: %pI4\n",
232 				     &iph->saddr, &daddr);
233 			goto err_put;
234 		}
235 	} else {
236 		ort = skb_rtable(skb);
237 		if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
238 		    !(ort->rt_flags & RTCF_LOCAL)) {
239 			IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
240 				     "local requires NAT method, dest: %pI4\n",
241 				     &iph->daddr, &daddr);
242 			goto err_put;
243 		}
244 		/* skb to local stack, preserve old route */
245 		if (!noref)
246 			ip_rt_put(rt);
247 		return local;
248 	}
249 
250 	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
251 		mtu = dst_mtu(&rt->dst);
252 		df = iph->frag_off & htons(IP_DF);
253 	} else {
254 		struct sock *sk = skb->sk;
255 
256 		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
257 		if (mtu < 68) {
258 			IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
259 			goto err_put;
260 		}
261 		ort = skb_rtable(skb);
262 		if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
263 			ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
264 		/* MTU check allowed? */
265 		df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
266 	}
267 
268 	/* MTU checking */
269 	if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
270 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
271 		IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
272 		goto err_put;
273 	}
274 
275 	skb_dst_drop(skb);
276 	if (noref) {
277 		if (!local)
278 			skb_dst_set_noref_force(skb, &rt->dst);
279 		else
280 			skb_dst_set(skb, dst_clone(&rt->dst));
281 	} else
282 		skb_dst_set(skb, &rt->dst);
283 
284 	return local;
285 
286 err_put:
287 	if (!noref)
288 		ip_rt_put(rt);
289 	return -1;
290 
291 err_unreach:
292 	dst_link_failure(skb);
293 	return -1;
294 }
295 
296 #ifdef CONFIG_IP_VS_IPV6
297 
__ip_vs_is_local_route6(struct rt6_info * rt)298 static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
299 {
300 	return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
301 }
302 
303 static struct dst_entry *
__ip_vs_route_output_v6(struct net * net,struct in6_addr * daddr,struct in6_addr * ret_saddr,int do_xfrm)304 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
305 			struct in6_addr *ret_saddr, int do_xfrm)
306 {
307 	struct dst_entry *dst;
308 	struct flowi6 fl6 = {
309 		.daddr = *daddr,
310 	};
311 
312 	dst = ip6_route_output(net, NULL, &fl6);
313 	if (dst->error)
314 		goto out_err;
315 	if (!ret_saddr)
316 		return dst;
317 	if (ipv6_addr_any(&fl6.saddr) &&
318 	    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
319 			       &fl6.daddr, 0, &fl6.saddr) < 0)
320 		goto out_err;
321 	if (do_xfrm) {
322 		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
323 		if (IS_ERR(dst)) {
324 			dst = NULL;
325 			goto out_err;
326 		}
327 	}
328 	*ret_saddr = fl6.saddr;
329 	return dst;
330 
331 out_err:
332 	dst_release(dst);
333 	IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
334 	return NULL;
335 }
336 
337 /*
338  * Get route to destination or remote server
339  */
340 static int
__ip_vs_get_out_rt_v6(struct sk_buff * skb,struct ip_vs_dest * dest,struct in6_addr * daddr,struct in6_addr * ret_saddr,struct ip_vs_iphdr * ipvsh,int do_xfrm,int rt_mode)341 __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
342 		      struct in6_addr *daddr, struct in6_addr *ret_saddr,
343 		      struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
344 {
345 	struct net *net = dev_net(skb_dst(skb)->dev);
346 	struct ip_vs_dest_dst *dest_dst;
347 	struct rt6_info *rt;			/* Route to the other host */
348 	struct rt6_info *ort;			/* Original route */
349 	struct dst_entry *dst;
350 	int mtu;
351 	int local, noref = 1;
352 
353 	if (dest) {
354 		dest_dst = __ip_vs_dst_check(dest);
355 		if (likely(dest_dst))
356 			rt = (struct rt6_info *) dest_dst->dst_cache;
357 		else {
358 			u32 cookie;
359 
360 			dest_dst = ip_vs_dest_dst_alloc();
361 			spin_lock_bh(&dest->dst_lock);
362 			if (!dest_dst) {
363 				__ip_vs_dst_set(dest, NULL, NULL, 0);
364 				spin_unlock_bh(&dest->dst_lock);
365 				goto err_unreach;
366 			}
367 			dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
368 						      &dest_dst->dst_saddr.in6,
369 						      do_xfrm);
370 			if (!dst) {
371 				__ip_vs_dst_set(dest, NULL, NULL, 0);
372 				spin_unlock_bh(&dest->dst_lock);
373 				ip_vs_dest_dst_free(dest_dst);
374 				goto err_unreach;
375 			}
376 			rt = (struct rt6_info *) dst;
377 			cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
378 			__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
379 			spin_unlock_bh(&dest->dst_lock);
380 			IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
381 				  &dest->addr.in6, &dest_dst->dst_saddr.in6,
382 				  atomic_read(&rt->dst.__refcnt));
383 		}
384 		if (ret_saddr)
385 			*ret_saddr = dest_dst->dst_saddr.in6;
386 	} else {
387 		noref = 0;
388 		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
389 		if (!dst)
390 			goto err_unreach;
391 		rt = (struct rt6_info *) dst;
392 	}
393 
394 	local = __ip_vs_is_local_route6(rt);
395 	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
396 	      rt_mode)) {
397 		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
398 			     local ? "local":"non-local", daddr);
399 		goto err_put;
400 	}
401 	if (likely(!local)) {
402 		if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
403 			     ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
404 					    IPV6_ADDR_LOOPBACK)) {
405 			IP_VS_DBG_RL("Stopping traffic from loopback address "
406 				     "%pI6c to non-local address, "
407 				     "dest: %pI6c\n",
408 				     &ipv6_hdr(skb)->saddr, daddr);
409 			goto err_put;
410 		}
411 	} else {
412 		ort = (struct rt6_info *) skb_dst(skb);
413 		if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
414 		    !__ip_vs_is_local_route6(ort)) {
415 			IP_VS_DBG_RL("Redirect from non-local address %pI6c "
416 				     "to local requires NAT method, "
417 				     "dest: %pI6c\n",
418 				     &ipv6_hdr(skb)->daddr, daddr);
419 			goto err_put;
420 		}
421 		/* skb to local stack, preserve old route */
422 		if (!noref)
423 			dst_release(&rt->dst);
424 		return local;
425 	}
426 
427 	/* MTU checking */
428 	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
429 		mtu = dst_mtu(&rt->dst);
430 	else {
431 		struct sock *sk = skb->sk;
432 
433 		mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
434 		if (mtu < IPV6_MIN_MTU) {
435 			IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
436 				     IPV6_MIN_MTU);
437 			goto err_put;
438 		}
439 		ort = (struct rt6_info *) skb_dst(skb);
440 		if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
441 			ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
442 	}
443 
444 	if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
445 		if (!skb->dev)
446 			skb->dev = net->loopback_dev;
447 		/* only send ICMP too big on first fragment */
448 		if (!ipvsh->fragoffs)
449 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
450 		IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
451 		goto err_put;
452 	}
453 
454 	skb_dst_drop(skb);
455 	if (noref) {
456 		if (!local)
457 			skb_dst_set_noref_force(skb, &rt->dst);
458 		else
459 			skb_dst_set(skb, dst_clone(&rt->dst));
460 	} else
461 		skb_dst_set(skb, &rt->dst);
462 
463 	return local;
464 
465 err_put:
466 	if (!noref)
467 		dst_release(&rt->dst);
468 	return -1;
469 
470 err_unreach:
471 	dst_link_failure(skb);
472 	return -1;
473 }
474 #endif
475 
476 
477 /* return NF_ACCEPT to allow forwarding or other NF_xxx on error */
ip_vs_tunnel_xmit_prepare(struct sk_buff * skb,struct ip_vs_conn * cp)478 static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
479 					    struct ip_vs_conn *cp)
480 {
481 	int ret = NF_ACCEPT;
482 
483 	skb->ipvs_property = 1;
484 	if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
485 		ret = ip_vs_confirm_conntrack(skb);
486 	if (ret == NF_ACCEPT) {
487 		nf_reset(skb);
488 		skb_forward_csum(skb);
489 	}
490 	return ret;
491 }
492 
493 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
ip_vs_nat_send_or_cont(int pf,struct sk_buff * skb,struct ip_vs_conn * cp,int local)494 static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
495 					 struct ip_vs_conn *cp, int local)
496 {
497 	int ret = NF_STOLEN;
498 
499 	skb->ipvs_property = 1;
500 	if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
501 		ip_vs_notrack(skb);
502 	else
503 		ip_vs_update_conntrack(skb, cp, 1);
504 	if (!local) {
505 		skb_forward_csum(skb);
506 		NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
507 			dst_output);
508 	} else
509 		ret = NF_ACCEPT;
510 	return ret;
511 }
512 
513 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
ip_vs_send_or_cont(int pf,struct sk_buff * skb,struct ip_vs_conn * cp,int local)514 static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
515 				     struct ip_vs_conn *cp, int local)
516 {
517 	int ret = NF_STOLEN;
518 
519 	skb->ipvs_property = 1;
520 	if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
521 		ip_vs_notrack(skb);
522 	if (!local) {
523 		skb_forward_csum(skb);
524 		NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
525 			dst_output);
526 	} else
527 		ret = NF_ACCEPT;
528 	return ret;
529 }
530 
531 
532 /*
533  *      NULL transmitter (do nothing except return NF_ACCEPT)
534  */
535 int
ip_vs_null_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,struct ip_vs_iphdr * ipvsh)536 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
537 		struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
538 {
539 	/* we do not touch skb and do not need pskb ptr */
540 	return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
541 }
542 
543 
544 /*
545  *      Bypass transmitter
546  *      Let packets bypass the destination when the destination is not
547  *      available, it may be only used in transparent cache cluster.
548  */
549 int
ip_vs_bypass_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,struct ip_vs_iphdr * ipvsh)550 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
551 		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
552 {
553 	struct iphdr  *iph = ip_hdr(skb);
554 
555 	EnterFunction(10);
556 
557 	rcu_read_lock();
558 	if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
559 			       NULL) < 0)
560 		goto tx_error;
561 
562 	ip_send_check(iph);
563 
564 	/* Another hack: avoid icmp_send in ip_fragment */
565 	skb->local_df = 1;
566 
567 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
568 	rcu_read_unlock();
569 
570 	LeaveFunction(10);
571 	return NF_STOLEN;
572 
573  tx_error:
574 	kfree_skb(skb);
575 	rcu_read_unlock();
576 	LeaveFunction(10);
577 	return NF_STOLEN;
578 }
579 
580 #ifdef CONFIG_IP_VS_IPV6
581 int
ip_vs_bypass_xmit_v6(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,struct ip_vs_iphdr * ipvsh)582 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
583 		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
584 {
585 	EnterFunction(10);
586 
587 	rcu_read_lock();
588 	if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL,
589 				  ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
590 		goto tx_error;
591 
592 	/* Another hack: avoid icmp_send in ip_fragment */
593 	skb->local_df = 1;
594 
595 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
596 	rcu_read_unlock();
597 
598 	LeaveFunction(10);
599 	return NF_STOLEN;
600 
601  tx_error:
602 	kfree_skb(skb);
603 	rcu_read_unlock();
604 	LeaveFunction(10);
605 	return NF_STOLEN;
606 }
607 #endif
608 
609 /*
610  *      NAT transmitter (only for outside-to-inside nat forwarding)
611  *      Not used for related ICMP
612  */
613 int
ip_vs_nat_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,struct ip_vs_iphdr * ipvsh)614 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
615 	       struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
616 {
617 	struct rtable *rt;		/* Route to the other host */
618 	int local, rc, was_input;
619 
620 	EnterFunction(10);
621 
622 	rcu_read_lock();
623 	/* check if it is a connection of no-client-port */
624 	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
625 		__be16 _pt, *p;
626 
627 		p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
628 		if (p == NULL)
629 			goto tx_error;
630 		ip_vs_conn_fill_cport(cp, *p);
631 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
632 	}
633 
634 	was_input = rt_is_input_route(skb_rtable(skb));
635 	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
636 				   IP_VS_RT_MODE_LOCAL |
637 				   IP_VS_RT_MODE_NON_LOCAL |
638 				   IP_VS_RT_MODE_RDR, NULL);
639 	if (local < 0)
640 		goto tx_error;
641 	rt = skb_rtable(skb);
642 	/*
643 	 * Avoid duplicate tuple in reply direction for NAT traffic
644 	 * to local address when connection is sync-ed
645 	 */
646 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
647 	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
648 		enum ip_conntrack_info ctinfo;
649 		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
650 
651 		if (ct && !nf_ct_is_untracked(ct)) {
652 			IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
653 					 "ip_vs_nat_xmit(): "
654 					 "stopping DNAT to local address");
655 			goto tx_error;
656 		}
657 	}
658 #endif
659 
660 	/* From world but DNAT to loopback address? */
661 	if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
662 		IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
663 				 "stopping DNAT to loopback address");
664 		goto tx_error;
665 	}
666 
667 	/* copy-on-write the packet before mangling it */
668 	if (!skb_make_writable(skb, sizeof(struct iphdr)))
669 		goto tx_error;
670 
671 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
672 		goto tx_error;
673 
674 	/* mangle the packet */
675 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
676 		goto tx_error;
677 	ip_hdr(skb)->daddr = cp->daddr.ip;
678 	ip_send_check(ip_hdr(skb));
679 
680 	IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
681 
682 	/* FIXME: when application helper enlarges the packet and the length
683 	   is larger than the MTU of outgoing device, there will be still
684 	   MTU problem. */
685 
686 	/* Another hack: avoid icmp_send in ip_fragment */
687 	skb->local_df = 1;
688 
689 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
690 	rcu_read_unlock();
691 
692 	LeaveFunction(10);
693 	return rc;
694 
695   tx_error:
696 	kfree_skb(skb);
697 	rcu_read_unlock();
698 	LeaveFunction(10);
699 	return NF_STOLEN;
700 }
701 
702 #ifdef CONFIG_IP_VS_IPV6
703 int
ip_vs_nat_xmit_v6(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,struct ip_vs_iphdr * ipvsh)704 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
705 		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
706 {
707 	struct rt6_info *rt;		/* Route to the other host */
708 	int local, rc;
709 
710 	EnterFunction(10);
711 
712 	rcu_read_lock();
713 	/* check if it is a connection of no-client-port */
714 	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
715 		__be16 _pt, *p;
716 		p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
717 		if (p == NULL)
718 			goto tx_error;
719 		ip_vs_conn_fill_cport(cp, *p);
720 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
721 	}
722 
723 	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
724 				      ipvsh, 0,
725 				      IP_VS_RT_MODE_LOCAL |
726 				      IP_VS_RT_MODE_NON_LOCAL |
727 				      IP_VS_RT_MODE_RDR);
728 	if (local < 0)
729 		goto tx_error;
730 	rt = (struct rt6_info *) skb_dst(skb);
731 	/*
732 	 * Avoid duplicate tuple in reply direction for NAT traffic
733 	 * to local address when connection is sync-ed
734 	 */
735 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
736 	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
737 		enum ip_conntrack_info ctinfo;
738 		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
739 
740 		if (ct && !nf_ct_is_untracked(ct)) {
741 			IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
742 					 "ip_vs_nat_xmit_v6(): "
743 					 "stopping DNAT to local address");
744 			goto tx_error;
745 		}
746 	}
747 #endif
748 
749 	/* From world but DNAT to loopback address? */
750 	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
751 	    ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
752 		IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
753 				 "ip_vs_nat_xmit_v6(): "
754 				 "stopping DNAT to loopback address");
755 		goto tx_error;
756 	}
757 
758 	/* copy-on-write the packet before mangling it */
759 	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
760 		goto tx_error;
761 
762 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
763 		goto tx_error;
764 
765 	/* mangle the packet */
766 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
767 		goto tx_error;
768 	ipv6_hdr(skb)->daddr = cp->daddr.in6;
769 
770 	IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
771 
772 	/* FIXME: when application helper enlarges the packet and the length
773 	   is larger than the MTU of outgoing device, there will be still
774 	   MTU problem. */
775 
776 	/* Another hack: avoid icmp_send in ip_fragment */
777 	skb->local_df = 1;
778 
779 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
780 	rcu_read_unlock();
781 
782 	LeaveFunction(10);
783 	return rc;
784 
785 tx_error:
786 	LeaveFunction(10);
787 	kfree_skb(skb);
788 	rcu_read_unlock();
789 	return NF_STOLEN;
790 }
791 #endif
792 
793 
794 /*
795  *   IP Tunneling transmitter
796  *
797  *   This function encapsulates the packet in a new IP packet, its
798  *   destination will be set to cp->daddr. Most code of this function
799  *   is taken from ipip.c.
800  *
801  *   It is used in VS/TUN cluster. The load balancer selects a real
802  *   server from a cluster based on a scheduling algorithm,
803  *   encapsulates the request packet and forwards it to the selected
804  *   server. For example, all real servers are configured with
805  *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
806  *   the encapsulated packet, it will decapsulate the packet, processe
807  *   the request and return the response packets directly to the client
808  *   without passing the load balancer. This can greatly increase the
809  *   scalability of virtual server.
810  *
811  *   Used for ANY protocol
812  */
813 int
ip_vs_tunnel_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,struct ip_vs_iphdr * ipvsh)814 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
815 		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
816 {
817 	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
818 	struct rtable *rt;			/* Route to the other host */
819 	__be32 saddr;				/* Source for tunnel */
820 	struct net_device *tdev;		/* Device to other host */
821 	struct iphdr  *old_iph = ip_hdr(skb);
822 	u8     tos = old_iph->tos;
823 	__be16 df;
824 	struct iphdr  *iph;			/* Our new IP header */
825 	unsigned int max_headroom;		/* The extra header space needed */
826 	int ret, local;
827 
828 	EnterFunction(10);
829 
830 	rcu_read_lock();
831 	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
832 				   IP_VS_RT_MODE_LOCAL |
833 				   IP_VS_RT_MODE_NON_LOCAL |
834 				   IP_VS_RT_MODE_CONNECT |
835 				   IP_VS_RT_MODE_TUNNEL, &saddr);
836 	if (local < 0)
837 		goto tx_error;
838 	if (local) {
839 		rcu_read_unlock();
840 		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
841 	}
842 
843 	rt = skb_rtable(skb);
844 	tdev = rt->dst.dev;
845 
846 	/* Copy DF, reset fragment offset and MF */
847 	df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
848 
849 	/*
850 	 * Okay, now see if we can stuff it in the buffer as-is.
851 	 */
852 	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
853 
854 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
855 		struct sk_buff *new_skb =
856 			skb_realloc_headroom(skb, max_headroom);
857 
858 		if (!new_skb)
859 			goto tx_error;
860 		consume_skb(skb);
861 		skb = new_skb;
862 		old_iph = ip_hdr(skb);
863 	}
864 
865 	skb->transport_header = skb->network_header;
866 
867 	/* fix old IP header checksum */
868 	ip_send_check(old_iph);
869 
870 	skb_push(skb, sizeof(struct iphdr));
871 	skb_reset_network_header(skb);
872 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
873 
874 	/*
875 	 *	Push down and install the IPIP header.
876 	 */
877 	iph			=	ip_hdr(skb);
878 	iph->version		=	4;
879 	iph->ihl		=	sizeof(struct iphdr)>>2;
880 	iph->frag_off		=	df;
881 	iph->protocol		=	IPPROTO_IPIP;
882 	iph->tos		=	tos;
883 	iph->daddr		=	cp->daddr.ip;
884 	iph->saddr		=	saddr;
885 	iph->ttl		=	old_iph->ttl;
886 	ip_select_ident(iph, &rt->dst, NULL);
887 
888 	/* Another hack: avoid icmp_send in ip_fragment */
889 	skb->local_df = 1;
890 
891 	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
892 	if (ret == NF_ACCEPT)
893 		ip_local_out(skb);
894 	else if (ret == NF_DROP)
895 		kfree_skb(skb);
896 	rcu_read_unlock();
897 
898 	LeaveFunction(10);
899 
900 	return NF_STOLEN;
901 
902   tx_error:
903 	kfree_skb(skb);
904 	rcu_read_unlock();
905 	LeaveFunction(10);
906 	return NF_STOLEN;
907 }
908 
909 #ifdef CONFIG_IP_VS_IPV6
910 int
ip_vs_tunnel_xmit_v6(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,struct ip_vs_iphdr * ipvsh)911 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
912 		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
913 {
914 	struct rt6_info *rt;		/* Route to the other host */
915 	struct in6_addr saddr;		/* Source for tunnel */
916 	struct net_device *tdev;	/* Device to other host */
917 	struct ipv6hdr  *old_iph = ipv6_hdr(skb);
918 	struct ipv6hdr  *iph;		/* Our new IP header */
919 	unsigned int max_headroom;	/* The extra header space needed */
920 	int ret, local;
921 
922 	EnterFunction(10);
923 
924 	rcu_read_lock();
925 	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
926 				      &saddr, ipvsh, 1,
927 				      IP_VS_RT_MODE_LOCAL |
928 				      IP_VS_RT_MODE_NON_LOCAL |
929 				      IP_VS_RT_MODE_TUNNEL);
930 	if (local < 0)
931 		goto tx_error;
932 	if (local) {
933 		rcu_read_unlock();
934 		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
935 	}
936 
937 	rt = (struct rt6_info *) skb_dst(skb);
938 	tdev = rt->dst.dev;
939 
940 	/*
941 	 * Okay, now see if we can stuff it in the buffer as-is.
942 	 */
943 	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
944 
945 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
946 		struct sk_buff *new_skb =
947 			skb_realloc_headroom(skb, max_headroom);
948 
949 		if (!new_skb)
950 			goto tx_error;
951 		consume_skb(skb);
952 		skb = new_skb;
953 		old_iph = ipv6_hdr(skb);
954 	}
955 
956 	skb->transport_header = skb->network_header;
957 
958 	skb_push(skb, sizeof(struct ipv6hdr));
959 	skb_reset_network_header(skb);
960 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
961 
962 	/*
963 	 *	Push down and install the IPIP header.
964 	 */
965 	iph			=	ipv6_hdr(skb);
966 	iph->version		=	6;
967 	iph->nexthdr		=	IPPROTO_IPV6;
968 	iph->payload_len	=	old_iph->payload_len;
969 	be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
970 	iph->priority		=	old_iph->priority;
971 	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
972 	iph->daddr = cp->daddr.in6;
973 	iph->saddr = saddr;
974 	iph->hop_limit		=	old_iph->hop_limit;
975 
976 	/* Another hack: avoid icmp_send in ip_fragment */
977 	skb->local_df = 1;
978 
979 	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
980 	if (ret == NF_ACCEPT)
981 		ip6_local_out(skb);
982 	else if (ret == NF_DROP)
983 		kfree_skb(skb);
984 	rcu_read_unlock();
985 
986 	LeaveFunction(10);
987 
988 	return NF_STOLEN;
989 
990 tx_error:
991 	kfree_skb(skb);
992 	rcu_read_unlock();
993 	LeaveFunction(10);
994 	return NF_STOLEN;
995 }
996 #endif
997 
998 
999 /*
1000  *      Direct Routing transmitter
1001  *      Used for ANY protocol
1002  */
1003 int
ip_vs_dr_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,struct ip_vs_iphdr * ipvsh)1004 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1005 	      struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1006 {
1007 	int local;
1008 
1009 	EnterFunction(10);
1010 
1011 	rcu_read_lock();
1012 	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1013 				   IP_VS_RT_MODE_LOCAL |
1014 				   IP_VS_RT_MODE_NON_LOCAL |
1015 				   IP_VS_RT_MODE_KNOWN_NH, NULL);
1016 	if (local < 0)
1017 		goto tx_error;
1018 	if (local) {
1019 		rcu_read_unlock();
1020 		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
1021 	}
1022 
1023 	ip_send_check(ip_hdr(skb));
1024 
1025 	/* Another hack: avoid icmp_send in ip_fragment */
1026 	skb->local_df = 1;
1027 
1028 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
1029 	rcu_read_unlock();
1030 
1031 	LeaveFunction(10);
1032 	return NF_STOLEN;
1033 
1034   tx_error:
1035 	kfree_skb(skb);
1036 	rcu_read_unlock();
1037 	LeaveFunction(10);
1038 	return NF_STOLEN;
1039 }
1040 
1041 #ifdef CONFIG_IP_VS_IPV6
1042 int
ip_vs_dr_xmit_v6(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,struct ip_vs_iphdr * ipvsh)1043 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1044 		 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1045 {
1046 	int local;
1047 
1048 	EnterFunction(10);
1049 
1050 	rcu_read_lock();
1051 	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1052 				      ipvsh, 0,
1053 				      IP_VS_RT_MODE_LOCAL |
1054 				      IP_VS_RT_MODE_NON_LOCAL);
1055 	if (local < 0)
1056 		goto tx_error;
1057 	if (local) {
1058 		rcu_read_unlock();
1059 		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
1060 	}
1061 
1062 	/* Another hack: avoid icmp_send in ip_fragment */
1063 	skb->local_df = 1;
1064 
1065 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
1066 	rcu_read_unlock();
1067 
1068 	LeaveFunction(10);
1069 	return NF_STOLEN;
1070 
1071 tx_error:
1072 	kfree_skb(skb);
1073 	rcu_read_unlock();
1074 	LeaveFunction(10);
1075 	return NF_STOLEN;
1076 }
1077 #endif
1078 
1079 
1080 /*
1081  *	ICMP packet transmitter
1082  *	called by the ip_vs_in_icmp
1083  */
1084 int
ip_vs_icmp_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,int offset,unsigned int hooknum,struct ip_vs_iphdr * iph)1085 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1086 		struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1087 		struct ip_vs_iphdr *iph)
1088 {
1089 	struct rtable	*rt;	/* Route to the other host */
1090 	int rc;
1091 	int local;
1092 	int rt_mode, was_input;
1093 
1094 	EnterFunction(10);
1095 
1096 	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1097 	   forwarded directly here, because there is no need to
1098 	   translate address/port back */
1099 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1100 		if (cp->packet_xmit)
1101 			rc = cp->packet_xmit(skb, cp, pp, iph);
1102 		else
1103 			rc = NF_ACCEPT;
1104 		/* do not touch skb anymore */
1105 		atomic_inc(&cp->in_pkts);
1106 		goto out;
1107 	}
1108 
1109 	/*
1110 	 * mangle and send the packet here (only for VS/NAT)
1111 	 */
1112 	was_input = rt_is_input_route(skb_rtable(skb));
1113 
1114 	/* LOCALNODE from FORWARD hook is not supported */
1115 	rt_mode = (hooknum != NF_INET_FORWARD) ?
1116 		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1117 		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1118 	rcu_read_lock();
1119 	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
1120 	if (local < 0)
1121 		goto tx_error;
1122 	rt = skb_rtable(skb);
1123 
1124 	/*
1125 	 * Avoid duplicate tuple in reply direction for NAT traffic
1126 	 * to local address when connection is sync-ed
1127 	 */
1128 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1129 	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1130 		enum ip_conntrack_info ctinfo;
1131 		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1132 
1133 		if (ct && !nf_ct_is_untracked(ct)) {
1134 			IP_VS_DBG(10, "%s(): "
1135 				  "stopping DNAT to local address %pI4\n",
1136 				  __func__, &cp->daddr.ip);
1137 			goto tx_error;
1138 		}
1139 	}
1140 #endif
1141 
1142 	/* From world but DNAT to loopback address? */
1143 	if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
1144 		IP_VS_DBG(1, "%s(): "
1145 			  "stopping DNAT to loopback %pI4\n",
1146 			  __func__, &cp->daddr.ip);
1147 		goto tx_error;
1148 	}
1149 
1150 	/* copy-on-write the packet before mangling it */
1151 	if (!skb_make_writable(skb, offset))
1152 		goto tx_error;
1153 
1154 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
1155 		goto tx_error;
1156 
1157 	ip_vs_nat_icmp(skb, pp, cp, 0);
1158 
1159 	/* Another hack: avoid icmp_send in ip_fragment */
1160 	skb->local_df = 1;
1161 
1162 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
1163 	rcu_read_unlock();
1164 	goto out;
1165 
1166   tx_error:
1167 	kfree_skb(skb);
1168 	rcu_read_unlock();
1169 	rc = NF_STOLEN;
1170   out:
1171 	LeaveFunction(10);
1172 	return rc;
1173 }
1174 
1175 #ifdef CONFIG_IP_VS_IPV6
1176 int
ip_vs_icmp_xmit_v6(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,int offset,unsigned int hooknum,struct ip_vs_iphdr * ipvsh)1177 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1178 		struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1179 		struct ip_vs_iphdr *ipvsh)
1180 {
1181 	struct rt6_info	*rt;	/* Route to the other host */
1182 	int rc;
1183 	int local;
1184 	int rt_mode;
1185 
1186 	EnterFunction(10);
1187 
1188 	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1189 	   forwarded directly here, because there is no need to
1190 	   translate address/port back */
1191 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1192 		if (cp->packet_xmit)
1193 			rc = cp->packet_xmit(skb, cp, pp, ipvsh);
1194 		else
1195 			rc = NF_ACCEPT;
1196 		/* do not touch skb anymore */
1197 		atomic_inc(&cp->in_pkts);
1198 		goto out;
1199 	}
1200 
1201 	/*
1202 	 * mangle and send the packet here (only for VS/NAT)
1203 	 */
1204 
1205 	/* LOCALNODE from FORWARD hook is not supported */
1206 	rt_mode = (hooknum != NF_INET_FORWARD) ?
1207 		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1208 		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1209 	rcu_read_lock();
1210 	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1211 				      ipvsh, 0, rt_mode);
1212 	if (local < 0)
1213 		goto tx_error;
1214 	rt = (struct rt6_info *) skb_dst(skb);
1215 	/*
1216 	 * Avoid duplicate tuple in reply direction for NAT traffic
1217 	 * to local address when connection is sync-ed
1218 	 */
1219 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1220 	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1221 		enum ip_conntrack_info ctinfo;
1222 		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1223 
1224 		if (ct && !nf_ct_is_untracked(ct)) {
1225 			IP_VS_DBG(10, "%s(): "
1226 				  "stopping DNAT to local address %pI6\n",
1227 				  __func__, &cp->daddr.in6);
1228 			goto tx_error;
1229 		}
1230 	}
1231 #endif
1232 
1233 	/* From world but DNAT to loopback address? */
1234 	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1235 	    ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1236 		IP_VS_DBG(1, "%s(): "
1237 			  "stopping DNAT to loopback %pI6\n",
1238 			  __func__, &cp->daddr.in6);
1239 		goto tx_error;
1240 	}
1241 
1242 	/* copy-on-write the packet before mangling it */
1243 	if (!skb_make_writable(skb, offset))
1244 		goto tx_error;
1245 
1246 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
1247 		goto tx_error;
1248 
1249 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1250 
1251 	/* Another hack: avoid icmp_send in ip_fragment */
1252 	skb->local_df = 1;
1253 
1254 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
1255 	rcu_read_unlock();
1256 	goto out;
1257 
1258 tx_error:
1259 	kfree_skb(skb);
1260 	rcu_read_unlock();
1261 	rc = NF_STOLEN;
1262 out:
1263 	LeaveFunction(10);
1264 	return rc;
1265 }
1266 #endif
1267