• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * ip_vs_xmit.c: various packet transmitters for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15 
16 #include <linux/kernel.h>
17 #include <linux/tcp.h>                  /* for tcphdr */
18 #include <net/ip.h>
19 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
20 #include <net/udp.h>
21 #include <net/icmp.h>                   /* for icmp_send */
22 #include <net/route.h>                  /* for ip_route_output */
23 #include <net/ipv6.h>
24 #include <net/ip6_route.h>
25 #include <linux/icmpv6.h>
26 #include <linux/netfilter.h>
27 #include <linux/netfilter_ipv4.h>
28 
29 #include <net/ip_vs.h>
30 
31 
32 /*
33  *      Destination cache to speed up outgoing route lookup
34  */
35 static inline void
__ip_vs_dst_set(struct ip_vs_dest * dest,u32 rtos,struct dst_entry * dst)36 __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
37 {
38 	struct dst_entry *old_dst;
39 
40 	old_dst = dest->dst_cache;
41 	dest->dst_cache = dst;
42 	dest->dst_rtos = rtos;
43 	dst_release(old_dst);
44 }
45 
46 static inline struct dst_entry *
__ip_vs_dst_check(struct ip_vs_dest * dest,u32 rtos,u32 cookie)47 __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
48 {
49 	struct dst_entry *dst = dest->dst_cache;
50 
51 	if (!dst)
52 		return NULL;
53 	if ((dst->obsolete
54 	     || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
55 	    dst->ops->check(dst, cookie) == NULL) {
56 		dest->dst_cache = NULL;
57 		dst_release(dst);
58 		return NULL;
59 	}
60 	dst_hold(dst);
61 	return dst;
62 }
63 
64 static struct rtable *
__ip_vs_get_out_rt(struct ip_vs_conn * cp,u32 rtos)65 __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
66 {
67 	struct rtable *rt;			/* Route to the other host */
68 	struct ip_vs_dest *dest = cp->dest;
69 
70 	if (dest) {
71 		spin_lock(&dest->dst_lock);
72 		if (!(rt = (struct rtable *)
73 		      __ip_vs_dst_check(dest, rtos, 0))) {
74 			struct flowi fl = {
75 				.oif = 0,
76 				.nl_u = {
77 					.ip4_u = {
78 						.daddr = dest->addr.ip,
79 						.saddr = 0,
80 						.tos = rtos, } },
81 			};
82 
83 			if (ip_route_output_key(&init_net, &rt, &fl)) {
84 				spin_unlock(&dest->dst_lock);
85 				IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
86 					     &dest->addr.ip);
87 				return NULL;
88 			}
89 			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
90 			IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
91 				  &dest->addr.ip,
92 				  atomic_read(&rt->u.dst.__refcnt), rtos);
93 		}
94 		spin_unlock(&dest->dst_lock);
95 	} else {
96 		struct flowi fl = {
97 			.oif = 0,
98 			.nl_u = {
99 				.ip4_u = {
100 					.daddr = cp->daddr.ip,
101 					.saddr = 0,
102 					.tos = rtos, } },
103 		};
104 
105 		if (ip_route_output_key(&init_net, &rt, &fl)) {
106 			IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
107 				     &cp->daddr.ip);
108 			return NULL;
109 		}
110 	}
111 
112 	return rt;
113 }
114 
115 #ifdef CONFIG_IP_VS_IPV6
116 static struct rt6_info *
__ip_vs_get_out_rt_v6(struct ip_vs_conn * cp)117 __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
118 {
119 	struct rt6_info *rt;			/* Route to the other host */
120 	struct ip_vs_dest *dest = cp->dest;
121 
122 	if (dest) {
123 		spin_lock(&dest->dst_lock);
124 		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
125 		if (!rt) {
126 			struct flowi fl = {
127 				.oif = 0,
128 				.nl_u = {
129 					.ip6_u = {
130 						.daddr = dest->addr.in6,
131 						.saddr = {
132 							.s6_addr32 =
133 								{ 0, 0, 0, 0 },
134 						},
135 					},
136 				},
137 			};
138 
139 			rt = (struct rt6_info *)ip6_route_output(&init_net,
140 								 NULL, &fl);
141 			if (!rt) {
142 				spin_unlock(&dest->dst_lock);
143 				IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
144 					     &dest->addr.in6);
145 				return NULL;
146 			}
147 			__ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
148 			IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
149 				  &dest->addr.in6,
150 				  atomic_read(&rt->u.dst.__refcnt));
151 		}
152 		spin_unlock(&dest->dst_lock);
153 	} else {
154 		struct flowi fl = {
155 			.oif = 0,
156 			.nl_u = {
157 				.ip6_u = {
158 					.daddr = cp->daddr.in6,
159 					.saddr = {
160 						.s6_addr32 = { 0, 0, 0, 0 },
161 					},
162 				},
163 			},
164 		};
165 
166 		rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
167 		if (!rt) {
168 			IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
169 				     &cp->daddr.in6);
170 			return NULL;
171 		}
172 	}
173 
174 	return rt;
175 }
176 #endif
177 
178 
179 /*
180  *	Release dest->dst_cache before a dest is removed
181  */
182 void
ip_vs_dst_reset(struct ip_vs_dest * dest)183 ip_vs_dst_reset(struct ip_vs_dest *dest)
184 {
185 	struct dst_entry *old_dst;
186 
187 	old_dst = dest->dst_cache;
188 	dest->dst_cache = NULL;
189 	dst_release(old_dst);
190 }
191 
192 #define IP_VS_XMIT(pf, skb, rt)				\
193 do {							\
194 	(skb)->ipvs_property = 1;			\
195 	skb_forward_csum(skb);				\
196 	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
197 		(rt)->u.dst.dev, dst_output);		\
198 } while (0)
199 
200 
201 /*
202  *      NULL transmitter (do nothing except return NF_ACCEPT)
203  */
204 int
ip_vs_null_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp)205 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
206 		struct ip_vs_protocol *pp)
207 {
208 	/* we do not touch skb and do not need pskb ptr */
209 	return NF_ACCEPT;
210 }
211 
212 
213 /*
214  *      Bypass transmitter
215  *      Let packets bypass the destination when the destination is not
216  *      available, it may be only used in transparent cache cluster.
217  */
218 int
ip_vs_bypass_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp)219 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
220 		  struct ip_vs_protocol *pp)
221 {
222 	struct rtable *rt;			/* Route to the other host */
223 	struct iphdr  *iph = ip_hdr(skb);
224 	u8     tos = iph->tos;
225 	int    mtu;
226 	struct flowi fl = {
227 		.oif = 0,
228 		.nl_u = {
229 			.ip4_u = {
230 				.daddr = iph->daddr,
231 				.saddr = 0,
232 				.tos = RT_TOS(tos), } },
233 	};
234 
235 	EnterFunction(10);
236 
237 	if (ip_route_output_key(&init_net, &rt, &fl)) {
238 		IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, dest: %pI4\n",
239 			     &iph->daddr);
240 		goto tx_error_icmp;
241 	}
242 
243 	/* MTU checking */
244 	mtu = dst_mtu(&rt->u.dst);
245 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
246 		ip_rt_put(rt);
247 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
248 		IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
249 		goto tx_error;
250 	}
251 
252 	/*
253 	 * Call ip_send_check because we are not sure it is called
254 	 * after ip_defrag. Is copy-on-write needed?
255 	 */
256 	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
257 		ip_rt_put(rt);
258 		return NF_STOLEN;
259 	}
260 	ip_send_check(ip_hdr(skb));
261 
262 	/* drop old route */
263 	dst_release(skb->dst);
264 	skb->dst = &rt->u.dst;
265 
266 	/* Another hack: avoid icmp_send in ip_fragment */
267 	skb->local_df = 1;
268 
269 	IP_VS_XMIT(PF_INET, skb, rt);
270 
271 	LeaveFunction(10);
272 	return NF_STOLEN;
273 
274  tx_error_icmp:
275 	dst_link_failure(skb);
276  tx_error:
277 	kfree_skb(skb);
278 	LeaveFunction(10);
279 	return NF_STOLEN;
280 }
281 
282 #ifdef CONFIG_IP_VS_IPV6
283 int
ip_vs_bypass_xmit_v6(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp)284 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
285 		     struct ip_vs_protocol *pp)
286 {
287 	struct rt6_info *rt;			/* Route to the other host */
288 	struct ipv6hdr  *iph = ipv6_hdr(skb);
289 	int    mtu;
290 	struct flowi fl = {
291 		.oif = 0,
292 		.nl_u = {
293 			.ip6_u = {
294 				.daddr = iph->daddr,
295 				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
296 	};
297 
298 	EnterFunction(10);
299 
300 	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
301 	if (!rt) {
302 		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %pI6\n",
303 			     &iph->daddr);
304 		goto tx_error_icmp;
305 	}
306 
307 	/* MTU checking */
308 	mtu = dst_mtu(&rt->u.dst);
309 	if (skb->len > mtu) {
310 		dst_release(&rt->u.dst);
311 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
312 		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
313 		goto tx_error;
314 	}
315 
316 	/*
317 	 * Call ip_send_check because we are not sure it is called
318 	 * after ip_defrag. Is copy-on-write needed?
319 	 */
320 	skb = skb_share_check(skb, GFP_ATOMIC);
321 	if (unlikely(skb == NULL)) {
322 		dst_release(&rt->u.dst);
323 		return NF_STOLEN;
324 	}
325 
326 	/* drop old route */
327 	dst_release(skb->dst);
328 	skb->dst = &rt->u.dst;
329 
330 	/* Another hack: avoid icmp_send in ip_fragment */
331 	skb->local_df = 1;
332 
333 	IP_VS_XMIT(PF_INET6, skb, rt);
334 
335 	LeaveFunction(10);
336 	return NF_STOLEN;
337 
338  tx_error_icmp:
339 	dst_link_failure(skb);
340  tx_error:
341 	kfree_skb(skb);
342 	LeaveFunction(10);
343 	return NF_STOLEN;
344 }
345 #endif
346 
347 /*
348  *      NAT transmitter (only for outside-to-inside nat forwarding)
349  *      Not used for related ICMP
350  */
351 int
ip_vs_nat_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp)352 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
353 	       struct ip_vs_protocol *pp)
354 {
355 	struct rtable *rt;		/* Route to the other host */
356 	int mtu;
357 	struct iphdr *iph = ip_hdr(skb);
358 
359 	EnterFunction(10);
360 
361 	/* check if it is a connection of no-client-port */
362 	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
363 		__be16 _pt, *p;
364 		p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
365 		if (p == NULL)
366 			goto tx_error;
367 		ip_vs_conn_fill_cport(cp, *p);
368 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
369 	}
370 
371 	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
372 		goto tx_error_icmp;
373 
374 	/* MTU checking */
375 	mtu = dst_mtu(&rt->u.dst);
376 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
377 		ip_rt_put(rt);
378 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
379 		IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
380 		goto tx_error;
381 	}
382 
383 	/* copy-on-write the packet before mangling it */
384 	if (!skb_make_writable(skb, sizeof(struct iphdr)))
385 		goto tx_error_put;
386 
387 	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
388 		goto tx_error_put;
389 
390 	/* drop old route */
391 	dst_release(skb->dst);
392 	skb->dst = &rt->u.dst;
393 
394 	/* mangle the packet */
395 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
396 		goto tx_error;
397 	ip_hdr(skb)->daddr = cp->daddr.ip;
398 	ip_send_check(ip_hdr(skb));
399 
400 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
401 
402 	/* FIXME: when application helper enlarges the packet and the length
403 	   is larger than the MTU of outgoing device, there will be still
404 	   MTU problem. */
405 
406 	/* Another hack: avoid icmp_send in ip_fragment */
407 	skb->local_df = 1;
408 
409 	IP_VS_XMIT(PF_INET, skb, rt);
410 
411 	LeaveFunction(10);
412 	return NF_STOLEN;
413 
414   tx_error_icmp:
415 	dst_link_failure(skb);
416   tx_error:
417 	LeaveFunction(10);
418 	kfree_skb(skb);
419 	return NF_STOLEN;
420   tx_error_put:
421 	ip_rt_put(rt);
422 	goto tx_error;
423 }
424 
425 #ifdef CONFIG_IP_VS_IPV6
426 int
ip_vs_nat_xmit_v6(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp)427 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
428 		  struct ip_vs_protocol *pp)
429 {
430 	struct rt6_info *rt;		/* Route to the other host */
431 	int mtu;
432 
433 	EnterFunction(10);
434 
435 	/* check if it is a connection of no-client-port */
436 	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
437 		__be16 _pt, *p;
438 		p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
439 				       sizeof(_pt), &_pt);
440 		if (p == NULL)
441 			goto tx_error;
442 		ip_vs_conn_fill_cport(cp, *p);
443 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
444 	}
445 
446 	rt = __ip_vs_get_out_rt_v6(cp);
447 	if (!rt)
448 		goto tx_error_icmp;
449 
450 	/* MTU checking */
451 	mtu = dst_mtu(&rt->u.dst);
452 	if (skb->len > mtu) {
453 		dst_release(&rt->u.dst);
454 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
455 		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
456 				 "ip_vs_nat_xmit_v6(): frag needed for");
457 		goto tx_error;
458 	}
459 
460 	/* copy-on-write the packet before mangling it */
461 	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
462 		goto tx_error_put;
463 
464 	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
465 		goto tx_error_put;
466 
467 	/* drop old route */
468 	dst_release(skb->dst);
469 	skb->dst = &rt->u.dst;
470 
471 	/* mangle the packet */
472 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
473 		goto tx_error;
474 	ipv6_hdr(skb)->daddr = cp->daddr.in6;
475 
476 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
477 
478 	/* FIXME: when application helper enlarges the packet and the length
479 	   is larger than the MTU of outgoing device, there will be still
480 	   MTU problem. */
481 
482 	/* Another hack: avoid icmp_send in ip_fragment */
483 	skb->local_df = 1;
484 
485 	IP_VS_XMIT(PF_INET6, skb, rt);
486 
487 	LeaveFunction(10);
488 	return NF_STOLEN;
489 
490 tx_error_icmp:
491 	dst_link_failure(skb);
492 tx_error:
493 	LeaveFunction(10);
494 	kfree_skb(skb);
495 	return NF_STOLEN;
496 tx_error_put:
497 	dst_release(&rt->u.dst);
498 	goto tx_error;
499 }
500 #endif
501 
502 
503 /*
504  *   IP Tunneling transmitter
505  *
506  *   This function encapsulates the packet in a new IP packet, its
507  *   destination will be set to cp->daddr. Most code of this function
508  *   is taken from ipip.c.
509  *
510  *   It is used in VS/TUN cluster. The load balancer selects a real
511  *   server from a cluster based on a scheduling algorithm,
512  *   encapsulates the request packet and forwards it to the selected
513  *   server. For example, all real servers are configured with
514  *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
515  *   the encapsulated packet, it will decapsulate the packet, processe
516  *   the request and return the response packets directly to the client
517  *   without passing the load balancer. This can greatly increase the
518  *   scalability of virtual server.
519  *
520  *   Used for ANY protocol
521  */
522 int
ip_vs_tunnel_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp)523 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
524 		  struct ip_vs_protocol *pp)
525 {
526 	struct rtable *rt;			/* Route to the other host */
527 	struct net_device *tdev;		/* Device to other host */
528 	struct iphdr  *old_iph = ip_hdr(skb);
529 	u8     tos = old_iph->tos;
530 	__be16 df = old_iph->frag_off;
531 	sk_buff_data_t old_transport_header = skb->transport_header;
532 	struct iphdr  *iph;			/* Our new IP header */
533 	unsigned int max_headroom;		/* The extra header space needed */
534 	int    mtu;
535 
536 	EnterFunction(10);
537 
538 	if (skb->protocol != htons(ETH_P_IP)) {
539 		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
540 			     "ETH_P_IP: %d, skb protocol: %d\n",
541 			     htons(ETH_P_IP), skb->protocol);
542 		goto tx_error;
543 	}
544 
545 	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
546 		goto tx_error_icmp;
547 
548 	tdev = rt->u.dst.dev;
549 
550 	mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
551 	if (mtu < 68) {
552 		ip_rt_put(rt);
553 		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
554 		goto tx_error;
555 	}
556 	if (skb->dst)
557 		skb->dst->ops->update_pmtu(skb->dst, mtu);
558 
559 	df |= (old_iph->frag_off & htons(IP_DF));
560 
561 	if ((old_iph->frag_off & htons(IP_DF))
562 	    && mtu < ntohs(old_iph->tot_len)) {
563 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
564 		ip_rt_put(rt);
565 		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
566 		goto tx_error;
567 	}
568 
569 	/*
570 	 * Okay, now see if we can stuff it in the buffer as-is.
571 	 */
572 	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
573 
574 	if (skb_headroom(skb) < max_headroom
575 	    || skb_cloned(skb) || skb_shared(skb)) {
576 		struct sk_buff *new_skb =
577 			skb_realloc_headroom(skb, max_headroom);
578 		if (!new_skb) {
579 			ip_rt_put(rt);
580 			kfree_skb(skb);
581 			IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
582 			return NF_STOLEN;
583 		}
584 		kfree_skb(skb);
585 		skb = new_skb;
586 		old_iph = ip_hdr(skb);
587 	}
588 
589 	skb->transport_header = old_transport_header;
590 
591 	/* fix old IP header checksum */
592 	ip_send_check(old_iph);
593 
594 	skb_push(skb, sizeof(struct iphdr));
595 	skb_reset_network_header(skb);
596 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
597 
598 	/* drop old route */
599 	dst_release(skb->dst);
600 	skb->dst = &rt->u.dst;
601 
602 	/*
603 	 *	Push down and install the IPIP header.
604 	 */
605 	iph			=	ip_hdr(skb);
606 	iph->version		=	4;
607 	iph->ihl		=	sizeof(struct iphdr)>>2;
608 	iph->frag_off		=	df;
609 	iph->protocol		=	IPPROTO_IPIP;
610 	iph->tos		=	tos;
611 	iph->daddr		=	rt->rt_dst;
612 	iph->saddr		=	rt->rt_src;
613 	iph->ttl		=	old_iph->ttl;
614 	ip_select_ident(iph, &rt->u.dst, NULL);
615 
616 	/* Another hack: avoid icmp_send in ip_fragment */
617 	skb->local_df = 1;
618 
619 	ip_local_out(skb);
620 
621 	LeaveFunction(10);
622 
623 	return NF_STOLEN;
624 
625   tx_error_icmp:
626 	dst_link_failure(skb);
627   tx_error:
628 	kfree_skb(skb);
629 	LeaveFunction(10);
630 	return NF_STOLEN;
631 }
632 
633 #ifdef CONFIG_IP_VS_IPV6
634 int
ip_vs_tunnel_xmit_v6(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp)635 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
636 		     struct ip_vs_protocol *pp)
637 {
638 	struct rt6_info *rt;		/* Route to the other host */
639 	struct net_device *tdev;	/* Device to other host */
640 	struct ipv6hdr  *old_iph = ipv6_hdr(skb);
641 	sk_buff_data_t old_transport_header = skb->transport_header;
642 	struct ipv6hdr  *iph;		/* Our new IP header */
643 	unsigned int max_headroom;	/* The extra header space needed */
644 	int    mtu;
645 
646 	EnterFunction(10);
647 
648 	if (skb->protocol != htons(ETH_P_IPV6)) {
649 		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
650 			     "ETH_P_IPV6: %d, skb protocol: %d\n",
651 			     htons(ETH_P_IPV6), skb->protocol);
652 		goto tx_error;
653 	}
654 
655 	rt = __ip_vs_get_out_rt_v6(cp);
656 	if (!rt)
657 		goto tx_error_icmp;
658 
659 	tdev = rt->u.dst.dev;
660 
661 	mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
662 	/* TODO IPv6: do we need this check in IPv6? */
663 	if (mtu < 1280) {
664 		dst_release(&rt->u.dst);
665 		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
666 		goto tx_error;
667 	}
668 	if (skb->dst)
669 		skb->dst->ops->update_pmtu(skb->dst, mtu);
670 
671 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
672 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
673 		dst_release(&rt->u.dst);
674 		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
675 		goto tx_error;
676 	}
677 
678 	/*
679 	 * Okay, now see if we can stuff it in the buffer as-is.
680 	 */
681 	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
682 
683 	if (skb_headroom(skb) < max_headroom
684 	    || skb_cloned(skb) || skb_shared(skb)) {
685 		struct sk_buff *new_skb =
686 			skb_realloc_headroom(skb, max_headroom);
687 		if (!new_skb) {
688 			dst_release(&rt->u.dst);
689 			kfree_skb(skb);
690 			IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
691 			return NF_STOLEN;
692 		}
693 		kfree_skb(skb);
694 		skb = new_skb;
695 		old_iph = ipv6_hdr(skb);
696 	}
697 
698 	skb->transport_header = old_transport_header;
699 
700 	skb_push(skb, sizeof(struct ipv6hdr));
701 	skb_reset_network_header(skb);
702 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
703 
704 	/* drop old route */
705 	dst_release(skb->dst);
706 	skb->dst = &rt->u.dst;
707 
708 	/*
709 	 *	Push down and install the IPIP header.
710 	 */
711 	iph			=	ipv6_hdr(skb);
712 	iph->version		=	6;
713 	iph->nexthdr		=	IPPROTO_IPV6;
714 	iph->payload_len	=	old_iph->payload_len;
715 	be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
716 	iph->priority		=	old_iph->priority;
717 	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
718 	iph->daddr		=	rt->rt6i_dst.addr;
719 	iph->saddr		=	cp->vaddr.in6; /* rt->rt6i_src.addr; */
720 	iph->hop_limit		=	old_iph->hop_limit;
721 
722 	/* Another hack: avoid icmp_send in ip_fragment */
723 	skb->local_df = 1;
724 
725 	ip6_local_out(skb);
726 
727 	LeaveFunction(10);
728 
729 	return NF_STOLEN;
730 
731 tx_error_icmp:
732 	dst_link_failure(skb);
733 tx_error:
734 	kfree_skb(skb);
735 	LeaveFunction(10);
736 	return NF_STOLEN;
737 }
738 #endif
739 
740 
741 /*
742  *      Direct Routing transmitter
743  *      Used for ANY protocol
744  */
745 int
ip_vs_dr_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp)746 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
747 	      struct ip_vs_protocol *pp)
748 {
749 	struct rtable *rt;			/* Route to the other host */
750 	struct iphdr  *iph = ip_hdr(skb);
751 	int    mtu;
752 
753 	EnterFunction(10);
754 
755 	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
756 		goto tx_error_icmp;
757 
758 	/* MTU checking */
759 	mtu = dst_mtu(&rt->u.dst);
760 	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
761 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
762 		ip_rt_put(rt);
763 		IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
764 		goto tx_error;
765 	}
766 
767 	/*
768 	 * Call ip_send_check because we are not sure it is called
769 	 * after ip_defrag. Is copy-on-write needed?
770 	 */
771 	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
772 		ip_rt_put(rt);
773 		return NF_STOLEN;
774 	}
775 	ip_send_check(ip_hdr(skb));
776 
777 	/* drop old route */
778 	dst_release(skb->dst);
779 	skb->dst = &rt->u.dst;
780 
781 	/* Another hack: avoid icmp_send in ip_fragment */
782 	skb->local_df = 1;
783 
784 	IP_VS_XMIT(PF_INET, skb, rt);
785 
786 	LeaveFunction(10);
787 	return NF_STOLEN;
788 
789   tx_error_icmp:
790 	dst_link_failure(skb);
791   tx_error:
792 	kfree_skb(skb);
793 	LeaveFunction(10);
794 	return NF_STOLEN;
795 }
796 
797 #ifdef CONFIG_IP_VS_IPV6
798 int
ip_vs_dr_xmit_v6(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp)799 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
800 		 struct ip_vs_protocol *pp)
801 {
802 	struct rt6_info *rt;			/* Route to the other host */
803 	int    mtu;
804 
805 	EnterFunction(10);
806 
807 	rt = __ip_vs_get_out_rt_v6(cp);
808 	if (!rt)
809 		goto tx_error_icmp;
810 
811 	/* MTU checking */
812 	mtu = dst_mtu(&rt->u.dst);
813 	if (skb->len > mtu) {
814 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
815 		dst_release(&rt->u.dst);
816 		IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
817 		goto tx_error;
818 	}
819 
820 	/*
821 	 * Call ip_send_check because we are not sure it is called
822 	 * after ip_defrag. Is copy-on-write needed?
823 	 */
824 	skb = skb_share_check(skb, GFP_ATOMIC);
825 	if (unlikely(skb == NULL)) {
826 		dst_release(&rt->u.dst);
827 		return NF_STOLEN;
828 	}
829 
830 	/* drop old route */
831 	dst_release(skb->dst);
832 	skb->dst = &rt->u.dst;
833 
834 	/* Another hack: avoid icmp_send in ip_fragment */
835 	skb->local_df = 1;
836 
837 	IP_VS_XMIT(PF_INET6, skb, rt);
838 
839 	LeaveFunction(10);
840 	return NF_STOLEN;
841 
842 tx_error_icmp:
843 	dst_link_failure(skb);
844 tx_error:
845 	kfree_skb(skb);
846 	LeaveFunction(10);
847 	return NF_STOLEN;
848 }
849 #endif
850 
851 
852 /*
853  *	ICMP packet transmitter
854  *	called by the ip_vs_in_icmp
855  */
856 int
ip_vs_icmp_xmit(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,int offset)857 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
858 		struct ip_vs_protocol *pp, int offset)
859 {
860 	struct rtable	*rt;	/* Route to the other host */
861 	int mtu;
862 	int rc;
863 
864 	EnterFunction(10);
865 
866 	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
867 	   forwarded directly here, because there is no need to
868 	   translate address/port back */
869 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
870 		if (cp->packet_xmit)
871 			rc = cp->packet_xmit(skb, cp, pp);
872 		else
873 			rc = NF_ACCEPT;
874 		/* do not touch skb anymore */
875 		atomic_inc(&cp->in_pkts);
876 		goto out;
877 	}
878 
879 	/*
880 	 * mangle and send the packet here (only for VS/NAT)
881 	 */
882 
883 	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
884 		goto tx_error_icmp;
885 
886 	/* MTU checking */
887 	mtu = dst_mtu(&rt->u.dst);
888 	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
889 		ip_rt_put(rt);
890 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
891 		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
892 		goto tx_error;
893 	}
894 
895 	/* copy-on-write the packet before mangling it */
896 	if (!skb_make_writable(skb, offset))
897 		goto tx_error_put;
898 
899 	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
900 		goto tx_error_put;
901 
902 	/* drop the old route when skb is not shared */
903 	dst_release(skb->dst);
904 	skb->dst = &rt->u.dst;
905 
906 	ip_vs_nat_icmp(skb, pp, cp, 0);
907 
908 	/* Another hack: avoid icmp_send in ip_fragment */
909 	skb->local_df = 1;
910 
911 	IP_VS_XMIT(PF_INET, skb, rt);
912 
913 	rc = NF_STOLEN;
914 	goto out;
915 
916   tx_error_icmp:
917 	dst_link_failure(skb);
918   tx_error:
919 	dev_kfree_skb(skb);
920 	rc = NF_STOLEN;
921   out:
922 	LeaveFunction(10);
923 	return rc;
924   tx_error_put:
925 	ip_rt_put(rt);
926 	goto tx_error;
927 }
928 
929 #ifdef CONFIG_IP_VS_IPV6
930 int
ip_vs_icmp_xmit_v6(struct sk_buff * skb,struct ip_vs_conn * cp,struct ip_vs_protocol * pp,int offset)931 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
932 		struct ip_vs_protocol *pp, int offset)
933 {
934 	struct rt6_info	*rt;	/* Route to the other host */
935 	int mtu;
936 	int rc;
937 
938 	EnterFunction(10);
939 
940 	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
941 	   forwarded directly here, because there is no need to
942 	   translate address/port back */
943 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
944 		if (cp->packet_xmit)
945 			rc = cp->packet_xmit(skb, cp, pp);
946 		else
947 			rc = NF_ACCEPT;
948 		/* do not touch skb anymore */
949 		atomic_inc(&cp->in_pkts);
950 		goto out;
951 	}
952 
953 	/*
954 	 * mangle and send the packet here (only for VS/NAT)
955 	 */
956 
957 	rt = __ip_vs_get_out_rt_v6(cp);
958 	if (!rt)
959 		goto tx_error_icmp;
960 
961 	/* MTU checking */
962 	mtu = dst_mtu(&rt->u.dst);
963 	if (skb->len > mtu) {
964 		dst_release(&rt->u.dst);
965 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
966 		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
967 		goto tx_error;
968 	}
969 
970 	/* copy-on-write the packet before mangling it */
971 	if (!skb_make_writable(skb, offset))
972 		goto tx_error_put;
973 
974 	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
975 		goto tx_error_put;
976 
977 	/* drop the old route when skb is not shared */
978 	dst_release(skb->dst);
979 	skb->dst = &rt->u.dst;
980 
981 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
982 
983 	/* Another hack: avoid icmp_send in ip_fragment */
984 	skb->local_df = 1;
985 
986 	IP_VS_XMIT(PF_INET6, skb, rt);
987 
988 	rc = NF_STOLEN;
989 	goto out;
990 
991 tx_error_icmp:
992 	dst_link_failure(skb);
993 tx_error:
994 	dev_kfree_skb(skb);
995 	rc = NF_STOLEN;
996 out:
997 	LeaveFunction(10);
998 	return rc;
999 tx_error_put:
1000 	dst_release(&rt->u.dst);
1001 	goto tx_error;
1002 }
1003 #endif
1004