• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	IPv6 output functions
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on linux/net/ipv4/ip_output.c
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *	Changes:
16  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
17  *				extension headers are implemented.
18  *				route changes now work.
19  *				ip6_forward does not confuse sniffers.
20  *				etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *	Imran Patel	: 	frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *			:       add ip6_append_data and related functions
26  *				for datagram xmit
27  */
28 
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
43 
44 #include <net/sock.h>
45 #include <net/snmp.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
57 
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59 
ipv6_select_ident(struct sk_buff * skb,struct frag_hdr * fhdr)60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61 {
62 	static u32 ipv6_fragmentation_id = 1;
63 	static DEFINE_SPINLOCK(ip6_id_lock);
64 
65 	spin_lock_bh(&ip6_id_lock);
66 	fhdr->identification = htonl(ipv6_fragmentation_id);
67 	if (++ipv6_fragmentation_id == 0)
68 		ipv6_fragmentation_id = 1;
69 	spin_unlock_bh(&ip6_id_lock);
70 }
71 
__ip6_local_out(struct sk_buff * skb)72 int __ip6_local_out(struct sk_buff *skb)
73 {
74 	int len;
75 
76 	len = skb->len - sizeof(struct ipv6hdr);
77 	if (len > IPV6_MAXPLEN)
78 		len = 0;
79 	ipv6_hdr(skb)->payload_len = htons(len);
80 
81 	return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
82 		       dst_output);
83 }
84 
ip6_local_out(struct sk_buff * skb)85 int ip6_local_out(struct sk_buff *skb)
86 {
87 	int err;
88 
89 	err = __ip6_local_out(skb);
90 	if (likely(err == 1))
91 		err = dst_output(skb);
92 
93 	return err;
94 }
95 EXPORT_SYMBOL_GPL(ip6_local_out);
96 
ip6_output_finish(struct sk_buff * skb)97 static int ip6_output_finish(struct sk_buff *skb)
98 {
99 	struct dst_entry *dst = skb->dst;
100 
101 	if (dst->hh)
102 		return neigh_hh_output(dst->hh, skb);
103 	else if (dst->neighbour)
104 		return dst->neighbour->output(skb);
105 
106 	IP6_INC_STATS_BH(dev_net(dst->dev),
107 			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
108 	kfree_skb(skb);
109 	return -EINVAL;
110 
111 }
112 
113 /* dev_loopback_xmit for use with netfilter. */
ip6_dev_loopback_xmit(struct sk_buff * newskb)114 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115 {
116 	skb_reset_mac_header(newskb);
117 	__skb_pull(newskb, skb_network_offset(newskb));
118 	newskb->pkt_type = PACKET_LOOPBACK;
119 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
120 	WARN_ON(!newskb->dst);
121 
122 	netif_rx(newskb);
123 	return 0;
124 }
125 
126 
ip6_output2(struct sk_buff * skb)127 static int ip6_output2(struct sk_buff *skb)
128 {
129 	struct dst_entry *dst = skb->dst;
130 	struct net_device *dev = dst->dev;
131 
132 	skb->protocol = htons(ETH_P_IPV6);
133 	skb->dev = dev;
134 
135 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
136 		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
137 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
138 
139 		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
140 		    ((mroute6_socket(dev_net(dev)) &&
141 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
142 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
143 					 &ipv6_hdr(skb)->saddr))) {
144 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
145 
146 			/* Do not check for IFF_ALLMULTI; multicast routing
147 			   is not supported in any case.
148 			 */
149 			if (newskb)
150 				NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
151 					NULL, newskb->dev,
152 					ip6_dev_loopback_xmit);
153 
154 			if (ipv6_hdr(skb)->hop_limit == 0) {
155 				IP6_INC_STATS(dev_net(dev), idev,
156 					      IPSTATS_MIB_OUTDISCARDS);
157 				kfree_skb(skb);
158 				return 0;
159 			}
160 		}
161 
162 		IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS);
163 	}
164 
165 	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
166 		       ip6_output_finish);
167 }
168 
ip6_skb_dst_mtu(struct sk_buff * skb)169 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
170 {
171 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
172 
173 	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
174 	       skb->dst->dev->mtu : dst_mtu(skb->dst);
175 }
176 
ip6_output(struct sk_buff * skb)177 int ip6_output(struct sk_buff *skb)
178 {
179 	struct inet6_dev *idev = ip6_dst_idev(skb->dst);
180 	if (unlikely(idev->cnf.disable_ipv6)) {
181 		IP6_INC_STATS(dev_net(skb->dst->dev), idev,
182 			      IPSTATS_MIB_OUTDISCARDS);
183 		kfree_skb(skb);
184 		return 0;
185 	}
186 
187 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
188 				dst_allfrag(skb->dst))
189 		return ip6_fragment(skb, ip6_output2);
190 	else
191 		return ip6_output2(skb);
192 }
193 
194 /*
195  *	xmit an sk_buff (used by TCP)
196  */
197 
ip6_xmit(struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct ipv6_txoptions * opt,int ipfragok)198 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
199 	     struct ipv6_txoptions *opt, int ipfragok)
200 {
201 	struct net *net = sock_net(sk);
202 	struct ipv6_pinfo *np = inet6_sk(sk);
203 	struct in6_addr *first_hop = &fl->fl6_dst;
204 	struct dst_entry *dst = skb->dst;
205 	struct ipv6hdr *hdr;
206 	u8  proto = fl->proto;
207 	int seg_len = skb->len;
208 	int hlimit, tclass;
209 	u32 mtu;
210 
211 	if (opt) {
212 		unsigned int head_room;
213 
214 		/* First: exthdrs may take lots of space (~8K for now)
215 		   MAX_HEADER is not enough.
216 		 */
217 		head_room = opt->opt_nflen + opt->opt_flen;
218 		seg_len += head_room;
219 		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
220 
221 		if (skb_headroom(skb) < head_room) {
222 			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
223 			if (skb2 == NULL) {
224 				IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
225 					      IPSTATS_MIB_OUTDISCARDS);
226 				kfree_skb(skb);
227 				return -ENOBUFS;
228 			}
229 			kfree_skb(skb);
230 			skb = skb2;
231 			if (sk)
232 				skb_set_owner_w(skb, sk);
233 		}
234 		if (opt->opt_flen)
235 			ipv6_push_frag_opts(skb, opt, &proto);
236 		if (opt->opt_nflen)
237 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
238 	}
239 
240 	skb_push(skb, sizeof(struct ipv6hdr));
241 	skb_reset_network_header(skb);
242 	hdr = ipv6_hdr(skb);
243 
244 	/* Allow local fragmentation. */
245 	if (ipfragok)
246 		skb->local_df = 1;
247 
248 	/*
249 	 *	Fill in the IPv6 header
250 	 */
251 
252 	hlimit = -1;
253 	if (np)
254 		hlimit = np->hop_limit;
255 	if (hlimit < 0)
256 		hlimit = ip6_dst_hoplimit(dst);
257 
258 	tclass = -1;
259 	if (np)
260 		tclass = np->tclass;
261 	if (tclass < 0)
262 		tclass = 0;
263 
264 	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
265 
266 	hdr->payload_len = htons(seg_len);
267 	hdr->nexthdr = proto;
268 	hdr->hop_limit = hlimit;
269 
270 	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
271 	ipv6_addr_copy(&hdr->daddr, first_hop);
272 
273 	skb->priority = sk->sk_priority;
274 	skb->mark = sk->sk_mark;
275 
276 	mtu = dst_mtu(dst);
277 	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
278 		IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
279 			      IPSTATS_MIB_OUTREQUESTS);
280 		return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
281 				dst_output);
282 	}
283 
284 	if (net_ratelimit())
285 		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
286 	skb->dev = dst->dev;
287 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
288 	IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
289 	kfree_skb(skb);
290 	return -EMSGSIZE;
291 }
292 
293 EXPORT_SYMBOL(ip6_xmit);
294 
295 /*
296  *	To avoid extra problems ND packets are send through this
297  *	routine. It's code duplication but I really want to avoid
298  *	extra checks since ipv6_build_header is used by TCP (which
299  *	is for us performance critical)
300  */
301 
ip6_nd_hdr(struct sock * sk,struct sk_buff * skb,struct net_device * dev,const struct in6_addr * saddr,const struct in6_addr * daddr,int proto,int len)302 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
303 	       const struct in6_addr *saddr, const struct in6_addr *daddr,
304 	       int proto, int len)
305 {
306 	struct ipv6_pinfo *np = inet6_sk(sk);
307 	struct ipv6hdr *hdr;
308 	int totlen;
309 
310 	skb->protocol = htons(ETH_P_IPV6);
311 	skb->dev = dev;
312 
313 	totlen = len + sizeof(struct ipv6hdr);
314 
315 	skb_reset_network_header(skb);
316 	skb_put(skb, sizeof(struct ipv6hdr));
317 	hdr = ipv6_hdr(skb);
318 
319 	*(__be32*)hdr = htonl(0x60000000);
320 
321 	hdr->payload_len = htons(len);
322 	hdr->nexthdr = proto;
323 	hdr->hop_limit = np->hop_limit;
324 
325 	ipv6_addr_copy(&hdr->saddr, saddr);
326 	ipv6_addr_copy(&hdr->daddr, daddr);
327 
328 	return 0;
329 }
330 
ip6_call_ra_chain(struct sk_buff * skb,int sel)331 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
332 {
333 	struct ip6_ra_chain *ra;
334 	struct sock *last = NULL;
335 
336 	read_lock(&ip6_ra_lock);
337 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
338 		struct sock *sk = ra->sk;
339 		if (sk && ra->sel == sel &&
340 		    (!sk->sk_bound_dev_if ||
341 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
342 			if (last) {
343 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
344 				if (skb2)
345 					rawv6_rcv(last, skb2);
346 			}
347 			last = sk;
348 		}
349 	}
350 
351 	if (last) {
352 		rawv6_rcv(last, skb);
353 		read_unlock(&ip6_ra_lock);
354 		return 1;
355 	}
356 	read_unlock(&ip6_ra_lock);
357 	return 0;
358 }
359 
ip6_forward_proxy_check(struct sk_buff * skb)360 static int ip6_forward_proxy_check(struct sk_buff *skb)
361 {
362 	struct ipv6hdr *hdr = ipv6_hdr(skb);
363 	u8 nexthdr = hdr->nexthdr;
364 	int offset;
365 
366 	if (ipv6_ext_hdr(nexthdr)) {
367 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
368 		if (offset < 0)
369 			return 0;
370 	} else
371 		offset = sizeof(struct ipv6hdr);
372 
373 	if (nexthdr == IPPROTO_ICMPV6) {
374 		struct icmp6hdr *icmp6;
375 
376 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
377 					 offset + 1 - skb->data)))
378 			return 0;
379 
380 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
381 
382 		switch (icmp6->icmp6_type) {
383 		case NDISC_ROUTER_SOLICITATION:
384 		case NDISC_ROUTER_ADVERTISEMENT:
385 		case NDISC_NEIGHBOUR_SOLICITATION:
386 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
387 		case NDISC_REDIRECT:
388 			/* For reaction involving unicast neighbor discovery
389 			 * message destined to the proxied address, pass it to
390 			 * input function.
391 			 */
392 			return 1;
393 		default:
394 			break;
395 		}
396 	}
397 
398 	/*
399 	 * The proxying router can't forward traffic sent to a link-local
400 	 * address, so signal the sender and discard the packet. This
401 	 * behavior is clarified by the MIPv6 specification.
402 	 */
403 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
404 		dst_link_failure(skb);
405 		return -1;
406 	}
407 
408 	return 0;
409 }
410 
ip6_forward_finish(struct sk_buff * skb)411 static inline int ip6_forward_finish(struct sk_buff *skb)
412 {
413 	return dst_output(skb);
414 }
415 
ip6_forward(struct sk_buff * skb)416 int ip6_forward(struct sk_buff *skb)
417 {
418 	struct dst_entry *dst = skb->dst;
419 	struct ipv6hdr *hdr = ipv6_hdr(skb);
420 	struct inet6_skb_parm *opt = IP6CB(skb);
421 	struct net *net = dev_net(dst->dev);
422 
423 	if (net->ipv6.devconf_all->forwarding == 0)
424 		goto error;
425 
426 	if (skb_warn_if_lro(skb))
427 		goto drop;
428 
429 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
430 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
431 		goto drop;
432 	}
433 
434 	skb_forward_csum(skb);
435 
436 	/*
437 	 *	We DO NOT make any processing on
438 	 *	RA packets, pushing them to user level AS IS
439 	 *	without ane WARRANTY that application will be able
440 	 *	to interpret them. The reason is that we
441 	 *	cannot make anything clever here.
442 	 *
443 	 *	We are not end-node, so that if packet contains
444 	 *	AH/ESP, we cannot make anything.
445 	 *	Defragmentation also would be mistake, RA packets
446 	 *	cannot be fragmented, because there is no warranty
447 	 *	that different fragments will go along one path. --ANK
448 	 */
449 	if (opt->ra) {
450 		u8 *ptr = skb_network_header(skb) + opt->ra;
451 		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
452 			return 0;
453 	}
454 
455 	/*
456 	 *	check and decrement ttl
457 	 */
458 	if (hdr->hop_limit <= 1) {
459 		/* Force OUTPUT device used as source address */
460 		skb->dev = dst->dev;
461 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
462 			    0, skb->dev);
463 		IP6_INC_STATS_BH(net,
464 				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
465 
466 		kfree_skb(skb);
467 		return -ETIMEDOUT;
468 	}
469 
470 	/* XXX: idev->cnf.proxy_ndp? */
471 	if (net->ipv6.devconf_all->proxy_ndp &&
472 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
473 		int proxied = ip6_forward_proxy_check(skb);
474 		if (proxied > 0)
475 			return ip6_input(skb);
476 		else if (proxied < 0) {
477 			IP6_INC_STATS(net, ip6_dst_idev(dst),
478 				      IPSTATS_MIB_INDISCARDS);
479 			goto drop;
480 		}
481 	}
482 
483 	if (!xfrm6_route_forward(skb)) {
484 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
485 		goto drop;
486 	}
487 	dst = skb->dst;
488 
489 	/* IPv6 specs say nothing about it, but it is clear that we cannot
490 	   send redirects to source routed frames.
491 	   We don't send redirects to frames decapsulated from IPsec.
492 	 */
493 	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
494 	    !skb_sec_path(skb)) {
495 		struct in6_addr *target = NULL;
496 		struct rt6_info *rt;
497 		struct neighbour *n = dst->neighbour;
498 
499 		/*
500 		 *	incoming and outgoing devices are the same
501 		 *	send a redirect.
502 		 */
503 
504 		rt = (struct rt6_info *) dst;
505 		if ((rt->rt6i_flags & RTF_GATEWAY))
506 			target = (struct in6_addr*)&n->primary_key;
507 		else
508 			target = &hdr->daddr;
509 
510 		/* Limit redirects both by destination (here)
511 		   and by source (inside ndisc_send_redirect)
512 		 */
513 		if (xrlim_allow(dst, 1*HZ))
514 			ndisc_send_redirect(skb, n, target);
515 	} else {
516 		int addrtype = ipv6_addr_type(&hdr->saddr);
517 
518 		/* This check is security critical. */
519 		if (addrtype == IPV6_ADDR_ANY ||
520 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
521 			goto error;
522 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
523 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
524 				ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
525 			goto error;
526 		}
527 	}
528 
529 	if (skb->len > dst_mtu(dst)) {
530 		/* Again, force OUTPUT device used as source address */
531 		skb->dev = dst->dev;
532 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
533 		IP6_INC_STATS_BH(net,
534 				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
535 		IP6_INC_STATS_BH(net,
536 				 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
537 		kfree_skb(skb);
538 		return -EMSGSIZE;
539 	}
540 
541 	if (skb_cow(skb, dst->dev->hard_header_len)) {
542 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
543 		goto drop;
544 	}
545 
546 	hdr = ipv6_hdr(skb);
547 
548 	/* Mangling hops number delayed to point after skb COW */
549 
550 	hdr->hop_limit--;
551 
552 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
553 	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
554 		       ip6_forward_finish);
555 
556 error:
557 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
558 drop:
559 	kfree_skb(skb);
560 	return -EINVAL;
561 }
562 
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)563 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
564 {
565 	to->pkt_type = from->pkt_type;
566 	to->priority = from->priority;
567 	to->protocol = from->protocol;
568 	dst_release(to->dst);
569 	to->dst = dst_clone(from->dst);
570 	to->dev = from->dev;
571 	to->mark = from->mark;
572 
573 #ifdef CONFIG_NET_SCHED
574 	to->tc_index = from->tc_index;
575 #endif
576 	nf_copy(to, from);
577 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
578     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
579 	to->nf_trace = from->nf_trace;
580 #endif
581 	skb_copy_secmark(to, from);
582 }
583 
ip6_find_1stfragopt(struct sk_buff * skb,u8 ** nexthdr)584 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
585 {
586 	u16 offset = sizeof(struct ipv6hdr);
587 	struct ipv6_opt_hdr *exthdr =
588 				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
589 	unsigned int packet_len = skb->tail - skb->network_header;
590 	int found_rhdr = 0;
591 	*nexthdr = &ipv6_hdr(skb)->nexthdr;
592 
593 	while (offset + 1 <= packet_len) {
594 
595 		switch (**nexthdr) {
596 
597 		case NEXTHDR_HOP:
598 			break;
599 		case NEXTHDR_ROUTING:
600 			found_rhdr = 1;
601 			break;
602 		case NEXTHDR_DEST:
603 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
604 			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
605 				break;
606 #endif
607 			if (found_rhdr)
608 				return offset;
609 			break;
610 		default :
611 			return offset;
612 		}
613 
614 		offset += ipv6_optlen(exthdr);
615 		*nexthdr = &exthdr->nexthdr;
616 		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
617 						 offset);
618 	}
619 
620 	return offset;
621 }
622 
ip6_fragment(struct sk_buff * skb,int (* output)(struct sk_buff *))623 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
624 {
625 	struct sk_buff *frag;
626 	struct rt6_info *rt = (struct rt6_info*)skb->dst;
627 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
628 	struct ipv6hdr *tmp_hdr;
629 	struct frag_hdr *fh;
630 	unsigned int mtu, hlen, left, len;
631 	__be32 frag_id = 0;
632 	int ptr, offset = 0, err=0;
633 	u8 *prevhdr, nexthdr = 0;
634 	struct net *net = dev_net(skb->dst->dev);
635 
636 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
637 	nexthdr = *prevhdr;
638 
639 	mtu = ip6_skb_dst_mtu(skb);
640 
641 	/* We must not fragment if the socket is set to force MTU discovery
642 	 * or if the skb it not generated by a local socket.  (This last
643 	 * check should be redundant, but it's free.)
644 	 */
645 	if (!skb->local_df) {
646 		skb->dev = skb->dst->dev;
647 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
648 		IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
649 			      IPSTATS_MIB_FRAGFAILS);
650 		kfree_skb(skb);
651 		return -EMSGSIZE;
652 	}
653 
654 	if (np && np->frag_size < mtu) {
655 		if (np->frag_size)
656 			mtu = np->frag_size;
657 	}
658 	mtu -= hlen + sizeof(struct frag_hdr);
659 
660 	if (skb_shinfo(skb)->frag_list) {
661 		int first_len = skb_pagelen(skb);
662 		int truesizes = 0;
663 
664 		if (first_len - hlen > mtu ||
665 		    ((first_len - hlen) & 7) ||
666 		    skb_cloned(skb))
667 			goto slow_path;
668 
669 		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
670 			/* Correct geometry. */
671 			if (frag->len > mtu ||
672 			    ((frag->len & 7) && frag->next) ||
673 			    skb_headroom(frag) < hlen)
674 			    goto slow_path;
675 
676 			/* Partially cloned skb? */
677 			if (skb_shared(frag))
678 				goto slow_path;
679 
680 			BUG_ON(frag->sk);
681 			if (skb->sk) {
682 				sock_hold(skb->sk);
683 				frag->sk = skb->sk;
684 				frag->destructor = sock_wfree;
685 				truesizes += frag->truesize;
686 			}
687 		}
688 
689 		err = 0;
690 		offset = 0;
691 		frag = skb_shinfo(skb)->frag_list;
692 		skb_shinfo(skb)->frag_list = NULL;
693 		/* BUILD HEADER */
694 
695 		*prevhdr = NEXTHDR_FRAGMENT;
696 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
697 		if (!tmp_hdr) {
698 			IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
699 				      IPSTATS_MIB_FRAGFAILS);
700 			return -ENOMEM;
701 		}
702 
703 		__skb_pull(skb, hlen);
704 		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
705 		__skb_push(skb, hlen);
706 		skb_reset_network_header(skb);
707 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
708 
709 		ipv6_select_ident(skb, fh);
710 		fh->nexthdr = nexthdr;
711 		fh->reserved = 0;
712 		fh->frag_off = htons(IP6_MF);
713 		frag_id = fh->identification;
714 
715 		first_len = skb_pagelen(skb);
716 		skb->data_len = first_len - skb_headlen(skb);
717 		skb->truesize -= truesizes;
718 		skb->len = first_len;
719 		ipv6_hdr(skb)->payload_len = htons(first_len -
720 						   sizeof(struct ipv6hdr));
721 
722 		dst_hold(&rt->u.dst);
723 
724 		for (;;) {
725 			/* Prepare header of the next frame,
726 			 * before previous one went down. */
727 			if (frag) {
728 				frag->ip_summed = CHECKSUM_NONE;
729 				skb_reset_transport_header(frag);
730 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
731 				__skb_push(frag, hlen);
732 				skb_reset_network_header(frag);
733 				memcpy(skb_network_header(frag), tmp_hdr,
734 				       hlen);
735 				offset += skb->len - hlen - sizeof(struct frag_hdr);
736 				fh->nexthdr = nexthdr;
737 				fh->reserved = 0;
738 				fh->frag_off = htons(offset);
739 				if (frag->next != NULL)
740 					fh->frag_off |= htons(IP6_MF);
741 				fh->identification = frag_id;
742 				ipv6_hdr(frag)->payload_len =
743 						htons(frag->len -
744 						      sizeof(struct ipv6hdr));
745 				ip6_copy_metadata(frag, skb);
746 			}
747 
748 			err = output(skb);
749 			if(!err)
750 				IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
751 					      IPSTATS_MIB_FRAGCREATES);
752 
753 			if (err || !frag)
754 				break;
755 
756 			skb = frag;
757 			frag = skb->next;
758 			skb->next = NULL;
759 		}
760 
761 		kfree(tmp_hdr);
762 
763 		if (err == 0) {
764 			IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
765 				      IPSTATS_MIB_FRAGOKS);
766 			dst_release(&rt->u.dst);
767 			return 0;
768 		}
769 
770 		while (frag) {
771 			skb = frag->next;
772 			kfree_skb(frag);
773 			frag = skb;
774 		}
775 
776 		IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
777 			      IPSTATS_MIB_FRAGFAILS);
778 		dst_release(&rt->u.dst);
779 		return err;
780 	}
781 
782 slow_path:
783 	left = skb->len - hlen;		/* Space per frame */
784 	ptr = hlen;			/* Where to start from */
785 
786 	/*
787 	 *	Fragment the datagram.
788 	 */
789 
790 	*prevhdr = NEXTHDR_FRAGMENT;
791 
792 	/*
793 	 *	Keep copying data until we run out.
794 	 */
795 	while(left > 0)	{
796 		len = left;
797 		/* IF: it doesn't fit, use 'mtu' - the data space left */
798 		if (len > mtu)
799 			len = mtu;
800 		/* IF: we are not sending upto and including the packet end
801 		   then align the next start on an eight byte boundary */
802 		if (len < left)	{
803 			len &= ~7;
804 		}
805 		/*
806 		 *	Allocate buffer.
807 		 */
808 
809 		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
810 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
811 			IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
812 				      IPSTATS_MIB_FRAGFAILS);
813 			err = -ENOMEM;
814 			goto fail;
815 		}
816 
817 		/*
818 		 *	Set up data on packet
819 		 */
820 
821 		ip6_copy_metadata(frag, skb);
822 		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
823 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
824 		skb_reset_network_header(frag);
825 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
826 		frag->transport_header = (frag->network_header + hlen +
827 					  sizeof(struct frag_hdr));
828 
829 		/*
830 		 *	Charge the memory for the fragment to any owner
831 		 *	it might possess
832 		 */
833 		if (skb->sk)
834 			skb_set_owner_w(frag, skb->sk);
835 
836 		/*
837 		 *	Copy the packet header into the new buffer.
838 		 */
839 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
840 
841 		/*
842 		 *	Build fragment header.
843 		 */
844 		fh->nexthdr = nexthdr;
845 		fh->reserved = 0;
846 		if (!frag_id) {
847 			ipv6_select_ident(skb, fh);
848 			frag_id = fh->identification;
849 		} else
850 			fh->identification = frag_id;
851 
852 		/*
853 		 *	Copy a block of the IP datagram.
854 		 */
855 		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
856 			BUG();
857 		left -= len;
858 
859 		fh->frag_off = htons(offset);
860 		if (left > 0)
861 			fh->frag_off |= htons(IP6_MF);
862 		ipv6_hdr(frag)->payload_len = htons(frag->len -
863 						    sizeof(struct ipv6hdr));
864 
865 		ptr += len;
866 		offset += len;
867 
868 		/*
869 		 *	Put this fragment into the sending queue.
870 		 */
871 		err = output(frag);
872 		if (err)
873 			goto fail;
874 
875 		IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
876 			      IPSTATS_MIB_FRAGCREATES);
877 	}
878 	IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
879 		      IPSTATS_MIB_FRAGOKS);
880 	kfree_skb(skb);
881 	return err;
882 
883 fail:
884 	IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
885 		      IPSTATS_MIB_FRAGFAILS);
886 	kfree_skb(skb);
887 	return err;
888 }
889 
ip6_rt_check(struct rt6key * rt_key,struct in6_addr * fl_addr,struct in6_addr * addr_cache)890 static inline int ip6_rt_check(struct rt6key *rt_key,
891 			       struct in6_addr *fl_addr,
892 			       struct in6_addr *addr_cache)
893 {
894 	return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
895 		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
896 }
897 
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,struct flowi * fl)898 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
899 					  struct dst_entry *dst,
900 					  struct flowi *fl)
901 {
902 	struct ipv6_pinfo *np = inet6_sk(sk);
903 	struct rt6_info *rt = (struct rt6_info *)dst;
904 
905 	if (!dst)
906 		goto out;
907 
908 	/* Yes, checking route validity in not connected
909 	 * case is not very simple. Take into account,
910 	 * that we do not support routing by source, TOS,
911 	 * and MSG_DONTROUTE 		--ANK (980726)
912 	 *
913 	 * 1. ip6_rt_check(): If route was host route,
914 	 *    check that cached destination is current.
915 	 *    If it is network route, we still may
916 	 *    check its validity using saved pointer
917 	 *    to the last used address: daddr_cache.
918 	 *    We do not want to save whole address now,
919 	 *    (because main consumer of this service
920 	 *    is tcp, which has not this problem),
921 	 *    so that the last trick works only on connected
922 	 *    sockets.
923 	 * 2. oif also should be the same.
924 	 */
925 	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
926 #ifdef CONFIG_IPV6_SUBTREES
927 	    ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
928 #endif
929 	    (fl->oif && fl->oif != dst->dev->ifindex)) {
930 		dst_release(dst);
931 		dst = NULL;
932 	}
933 
934 out:
935 	return dst;
936 }
937 
ip6_dst_lookup_tail(struct sock * sk,struct dst_entry ** dst,struct flowi * fl)938 static int ip6_dst_lookup_tail(struct sock *sk,
939 			       struct dst_entry **dst, struct flowi *fl)
940 {
941 	int err;
942 	struct net *net = sock_net(sk);
943 
944 	if (*dst == NULL)
945 		*dst = ip6_route_output(net, sk, fl);
946 
947 	if ((err = (*dst)->error))
948 		goto out_err_release;
949 
950 	if (ipv6_addr_any(&fl->fl6_src)) {
951 		err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
952 					 &fl->fl6_dst,
953 					 sk ? inet6_sk(sk)->srcprefs : 0,
954 					 &fl->fl6_src);
955 		if (err)
956 			goto out_err_release;
957 	}
958 
959 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
960 	/*
961 	 * Here if the dst entry we've looked up
962 	 * has a neighbour entry that is in the INCOMPLETE
963 	 * state and the src address from the flow is
964 	 * marked as OPTIMISTIC, we release the found
965 	 * dst entry and replace it instead with the
966 	 * dst entry of the nexthop router
967 	 */
968 	if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
969 		struct inet6_ifaddr *ifp;
970 		struct flowi fl_gw;
971 		int redirect;
972 
973 		ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
974 				      (*dst)->dev, 1);
975 
976 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
977 		if (ifp)
978 			in6_ifa_put(ifp);
979 
980 		if (redirect) {
981 			/*
982 			 * We need to get the dst entry for the
983 			 * default router instead
984 			 */
985 			dst_release(*dst);
986 			memcpy(&fl_gw, fl, sizeof(struct flowi));
987 			memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
988 			*dst = ip6_route_output(net, sk, &fl_gw);
989 			if ((err = (*dst)->error))
990 				goto out_err_release;
991 		}
992 	}
993 #endif
994 
995 	return 0;
996 
997 out_err_release:
998 	if (err == -ENETUNREACH)
999 		IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1000 	dst_release(*dst);
1001 	*dst = NULL;
1002 	return err;
1003 }
1004 
1005 /**
1006  *	ip6_dst_lookup - perform route lookup on flow
1007  *	@sk: socket which provides route info
1008  *	@dst: pointer to dst_entry * for result
1009  *	@fl: flow to lookup
1010  *
1011  *	This function performs a route lookup on the given flow.
1012  *
1013  *	It returns zero on success, or a standard errno code on error.
1014  */
ip6_dst_lookup(struct sock * sk,struct dst_entry ** dst,struct flowi * fl)1015 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1016 {
1017 	*dst = NULL;
1018 	return ip6_dst_lookup_tail(sk, dst, fl);
1019 }
1020 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1021 
1022 /**
1023  *	ip6_sk_dst_lookup - perform socket cached route lookup on flow
1024  *	@sk: socket which provides the dst cache and route info
1025  *	@dst: pointer to dst_entry * for result
1026  *	@fl: flow to lookup
1027  *
1028  *	This function performs a route lookup on the given flow with the
1029  *	possibility of using the cached route in the socket if it is valid.
1030  *	It will take the socket dst lock when operating on the dst cache.
1031  *	As a result, this function can only be used in process context.
1032  *
1033  *	It returns zero on success, or a standard errno code on error.
1034  */
ip6_sk_dst_lookup(struct sock * sk,struct dst_entry ** dst,struct flowi * fl)1035 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1036 {
1037 	*dst = NULL;
1038 	if (sk) {
1039 		*dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1040 		*dst = ip6_sk_dst_check(sk, *dst, fl);
1041 	}
1042 
1043 	return ip6_dst_lookup_tail(sk, dst, fl);
1044 }
1045 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1046 
ip6_ufo_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int hh_len,int fragheaderlen,int transhdrlen,int mtu,unsigned int flags)1047 static inline int ip6_ufo_append_data(struct sock *sk,
1048 			int getfrag(void *from, char *to, int offset, int len,
1049 			int odd, struct sk_buff *skb),
1050 			void *from, int length, int hh_len, int fragheaderlen,
1051 			int transhdrlen, int mtu,unsigned int flags)
1052 
1053 {
1054 	struct sk_buff *skb;
1055 	int err;
1056 
1057 	/* There is support for UDP large send offload by network
1058 	 * device, so create one single skb packet containing complete
1059 	 * udp datagram
1060 	 */
1061 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1062 		skb = sock_alloc_send_skb(sk,
1063 			hh_len + fragheaderlen + transhdrlen + 20,
1064 			(flags & MSG_DONTWAIT), &err);
1065 		if (skb == NULL)
1066 			return -ENOMEM;
1067 
1068 		/* reserve space for Hardware header */
1069 		skb_reserve(skb, hh_len);
1070 
1071 		/* create space for UDP/IP header */
1072 		skb_put(skb,fragheaderlen + transhdrlen);
1073 
1074 		/* initialize network header pointer */
1075 		skb_reset_network_header(skb);
1076 
1077 		/* initialize protocol header pointer */
1078 		skb->transport_header = skb->network_header + fragheaderlen;
1079 
1080 		skb->ip_summed = CHECKSUM_PARTIAL;
1081 		skb->csum = 0;
1082 		sk->sk_sndmsg_off = 0;
1083 	}
1084 
1085 	err = skb_append_datato_frags(sk,skb, getfrag, from,
1086 				      (length - transhdrlen));
1087 	if (!err) {
1088 		struct frag_hdr fhdr;
1089 
1090 		/* specify the length of each IP datagram fragment*/
1091 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1092 					    sizeof(struct frag_hdr);
1093 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1094 		ipv6_select_ident(skb, &fhdr);
1095 		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1096 		__skb_queue_tail(&sk->sk_write_queue, skb);
1097 
1098 		return 0;
1099 	}
1100 	/* There is not enough support do UPD LSO,
1101 	 * so follow normal path
1102 	 */
1103 	kfree_skb(skb);
1104 
1105 	return err;
1106 }
1107 
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1108 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1109 					       gfp_t gfp)
1110 {
1111 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1112 }
1113 
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1114 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1115 						gfp_t gfp)
1116 {
1117 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1118 }
1119 
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,int hlimit,int tclass,struct ipv6_txoptions * opt,struct flowi * fl,struct rt6_info * rt,unsigned int flags)1120 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1121 	int offset, int len, int odd, struct sk_buff *skb),
1122 	void *from, int length, int transhdrlen,
1123 	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1124 	struct rt6_info *rt, unsigned int flags)
1125 {
1126 	struct inet_sock *inet = inet_sk(sk);
1127 	struct ipv6_pinfo *np = inet6_sk(sk);
1128 	struct sk_buff *skb;
1129 	unsigned int maxfraglen, fragheaderlen;
1130 	int exthdrlen;
1131 	int hh_len;
1132 	int mtu;
1133 	int copy;
1134 	int err;
1135 	int offset = 0;
1136 	int csummode = CHECKSUM_NONE;
1137 
1138 	if (flags&MSG_PROBE)
1139 		return 0;
1140 	if (skb_queue_empty(&sk->sk_write_queue)) {
1141 		/*
1142 		 * setup for corking
1143 		 */
1144 		if (opt) {
1145 			if (WARN_ON(np->cork.opt))
1146 				return -EINVAL;
1147 
1148 			np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1149 			if (unlikely(np->cork.opt == NULL))
1150 				return -ENOBUFS;
1151 
1152 			np->cork.opt->tot_len = opt->tot_len;
1153 			np->cork.opt->opt_flen = opt->opt_flen;
1154 			np->cork.opt->opt_nflen = opt->opt_nflen;
1155 
1156 			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1157 							    sk->sk_allocation);
1158 			if (opt->dst0opt && !np->cork.opt->dst0opt)
1159 				return -ENOBUFS;
1160 
1161 			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1162 							    sk->sk_allocation);
1163 			if (opt->dst1opt && !np->cork.opt->dst1opt)
1164 				return -ENOBUFS;
1165 
1166 			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1167 							   sk->sk_allocation);
1168 			if (opt->hopopt && !np->cork.opt->hopopt)
1169 				return -ENOBUFS;
1170 
1171 			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1172 							    sk->sk_allocation);
1173 			if (opt->srcrt && !np->cork.opt->srcrt)
1174 				return -ENOBUFS;
1175 
1176 			/* need source address above miyazawa*/
1177 		}
1178 		dst_hold(&rt->u.dst);
1179 		inet->cork.dst = &rt->u.dst;
1180 		inet->cork.fl = *fl;
1181 		np->cork.hop_limit = hlimit;
1182 		np->cork.tclass = tclass;
1183 		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1184 		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1185 		if (np->frag_size < mtu) {
1186 			if (np->frag_size)
1187 				mtu = np->frag_size;
1188 		}
1189 		inet->cork.fragsize = mtu;
1190 		if (dst_allfrag(rt->u.dst.path))
1191 			inet->cork.flags |= IPCORK_ALLFRAG;
1192 		inet->cork.length = 0;
1193 		sk->sk_sndmsg_page = NULL;
1194 		sk->sk_sndmsg_off = 0;
1195 		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1196 			    rt->rt6i_nfheader_len;
1197 		length += exthdrlen;
1198 		transhdrlen += exthdrlen;
1199 	} else {
1200 		rt = (struct rt6_info *)inet->cork.dst;
1201 		fl = &inet->cork.fl;
1202 		opt = np->cork.opt;
1203 		transhdrlen = 0;
1204 		exthdrlen = 0;
1205 		mtu = inet->cork.fragsize;
1206 	}
1207 
1208 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1209 
1210 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1211 			(opt ? opt->opt_nflen : 0);
1212 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1213 
1214 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1215 		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1216 			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1217 			return -EMSGSIZE;
1218 		}
1219 	}
1220 
1221 	/*
1222 	 * Let's try using as much space as possible.
1223 	 * Use MTU if total length of the message fits into the MTU.
1224 	 * Otherwise, we need to reserve fragment header and
1225 	 * fragment alignment (= 8-15 octects, in total).
1226 	 *
1227 	 * Note that we may need to "move" the data from the tail of
1228 	 * of the buffer to the new fragment when we split
1229 	 * the message.
1230 	 *
1231 	 * FIXME: It may be fragmented into multiple chunks
1232 	 *        at once if non-fragmentable extension headers
1233 	 *        are too large.
1234 	 * --yoshfuji
1235 	 */
1236 
1237 	inet->cork.length += length;
1238 	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1239 	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
1240 
1241 		err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1242 					  fragheaderlen, transhdrlen, mtu,
1243 					  flags);
1244 		if (err)
1245 			goto error;
1246 		return 0;
1247 	}
1248 
1249 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1250 		goto alloc_new_skb;
1251 
1252 	while (length > 0) {
1253 		/* Check if the remaining data fits into current packet. */
1254 		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1255 		if (copy < length)
1256 			copy = maxfraglen - skb->len;
1257 
1258 		if (copy <= 0) {
1259 			char *data;
1260 			unsigned int datalen;
1261 			unsigned int fraglen;
1262 			unsigned int fraggap;
1263 			unsigned int alloclen;
1264 			struct sk_buff *skb_prev;
1265 alloc_new_skb:
1266 			skb_prev = skb;
1267 
1268 			/* There's no room in the current skb */
1269 			if (skb_prev)
1270 				fraggap = skb_prev->len - maxfraglen;
1271 			else
1272 				fraggap = 0;
1273 
1274 			/*
1275 			 * If remaining data exceeds the mtu,
1276 			 * we know we need more fragment(s).
1277 			 */
1278 			datalen = length + fraggap;
1279 			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1280 				datalen = maxfraglen - fragheaderlen;
1281 
1282 			fraglen = datalen + fragheaderlen;
1283 			if ((flags & MSG_MORE) &&
1284 			    !(rt->u.dst.dev->features&NETIF_F_SG))
1285 				alloclen = mtu;
1286 			else
1287 				alloclen = datalen + fragheaderlen;
1288 
1289 			/*
1290 			 * The last fragment gets additional space at tail.
1291 			 * Note: we overallocate on fragments with MSG_MODE
1292 			 * because we have no idea if we're the last one.
1293 			 */
1294 			if (datalen == length + fraggap)
1295 				alloclen += rt->u.dst.trailer_len;
1296 
1297 			/*
1298 			 * We just reserve space for fragment header.
1299 			 * Note: this may be overallocation if the message
1300 			 * (without MSG_MORE) fits into the MTU.
1301 			 */
1302 			alloclen += sizeof(struct frag_hdr);
1303 
1304 			if (transhdrlen) {
1305 				skb = sock_alloc_send_skb(sk,
1306 						alloclen + hh_len,
1307 						(flags & MSG_DONTWAIT), &err);
1308 			} else {
1309 				skb = NULL;
1310 				if (atomic_read(&sk->sk_wmem_alloc) <=
1311 				    2 * sk->sk_sndbuf)
1312 					skb = sock_wmalloc(sk,
1313 							   alloclen + hh_len, 1,
1314 							   sk->sk_allocation);
1315 				if (unlikely(skb == NULL))
1316 					err = -ENOBUFS;
1317 			}
1318 			if (skb == NULL)
1319 				goto error;
1320 			/*
1321 			 *	Fill in the control structures
1322 			 */
1323 			skb->ip_summed = csummode;
1324 			skb->csum = 0;
1325 			/* reserve for fragmentation */
1326 			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1327 
1328 			/*
1329 			 *	Find where to start putting bytes
1330 			 */
1331 			data = skb_put(skb, fraglen);
1332 			skb_set_network_header(skb, exthdrlen);
1333 			data += fragheaderlen;
1334 			skb->transport_header = (skb->network_header +
1335 						 fragheaderlen);
1336 			if (fraggap) {
1337 				skb->csum = skb_copy_and_csum_bits(
1338 					skb_prev, maxfraglen,
1339 					data + transhdrlen, fraggap, 0);
1340 				skb_prev->csum = csum_sub(skb_prev->csum,
1341 							  skb->csum);
1342 				data += fraggap;
1343 				pskb_trim_unique(skb_prev, maxfraglen);
1344 			}
1345 			copy = datalen - transhdrlen - fraggap;
1346 			if (copy < 0) {
1347 				err = -EINVAL;
1348 				kfree_skb(skb);
1349 				goto error;
1350 			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1351 				err = -EFAULT;
1352 				kfree_skb(skb);
1353 				goto error;
1354 			}
1355 
1356 			offset += copy;
1357 			length -= datalen - fraggap;
1358 			transhdrlen = 0;
1359 			exthdrlen = 0;
1360 			csummode = CHECKSUM_NONE;
1361 
1362 			/*
1363 			 * Put the packet on the pending queue
1364 			 */
1365 			__skb_queue_tail(&sk->sk_write_queue, skb);
1366 			continue;
1367 		}
1368 
1369 		if (copy > length)
1370 			copy = length;
1371 
1372 		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1373 			unsigned int off;
1374 
1375 			off = skb->len;
1376 			if (getfrag(from, skb_put(skb, copy),
1377 						offset, copy, off, skb) < 0) {
1378 				__skb_trim(skb, off);
1379 				err = -EFAULT;
1380 				goto error;
1381 			}
1382 		} else {
1383 			int i = skb_shinfo(skb)->nr_frags;
1384 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1385 			struct page *page = sk->sk_sndmsg_page;
1386 			int off = sk->sk_sndmsg_off;
1387 			unsigned int left;
1388 
1389 			if (page && (left = PAGE_SIZE - off) > 0) {
1390 				if (copy >= left)
1391 					copy = left;
1392 				if (page != frag->page) {
1393 					if (i == MAX_SKB_FRAGS) {
1394 						err = -EMSGSIZE;
1395 						goto error;
1396 					}
1397 					get_page(page);
1398 					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1399 					frag = &skb_shinfo(skb)->frags[i];
1400 				}
1401 			} else if(i < MAX_SKB_FRAGS) {
1402 				if (copy > PAGE_SIZE)
1403 					copy = PAGE_SIZE;
1404 				page = alloc_pages(sk->sk_allocation, 0);
1405 				if (page == NULL) {
1406 					err = -ENOMEM;
1407 					goto error;
1408 				}
1409 				sk->sk_sndmsg_page = page;
1410 				sk->sk_sndmsg_off = 0;
1411 
1412 				skb_fill_page_desc(skb, i, page, 0, 0);
1413 				frag = &skb_shinfo(skb)->frags[i];
1414 			} else {
1415 				err = -EMSGSIZE;
1416 				goto error;
1417 			}
1418 			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1419 				err = -EFAULT;
1420 				goto error;
1421 			}
1422 			sk->sk_sndmsg_off += copy;
1423 			frag->size += copy;
1424 			skb->len += copy;
1425 			skb->data_len += copy;
1426 			skb->truesize += copy;
1427 			atomic_add(copy, &sk->sk_wmem_alloc);
1428 		}
1429 		offset += copy;
1430 		length -= copy;
1431 	}
1432 	return 0;
1433 error:
1434 	inet->cork.length -= length;
1435 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1436 	return err;
1437 }
1438 
ip6_cork_release(struct inet_sock * inet,struct ipv6_pinfo * np)1439 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1440 {
1441 	if (np->cork.opt) {
1442 		kfree(np->cork.opt->dst0opt);
1443 		kfree(np->cork.opt->dst1opt);
1444 		kfree(np->cork.opt->hopopt);
1445 		kfree(np->cork.opt->srcrt);
1446 		kfree(np->cork.opt);
1447 		np->cork.opt = NULL;
1448 	}
1449 
1450 	if (inet->cork.dst) {
1451 		dst_release(inet->cork.dst);
1452 		inet->cork.dst = NULL;
1453 		inet->cork.flags &= ~IPCORK_ALLFRAG;
1454 	}
1455 	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1456 }
1457 
ip6_push_pending_frames(struct sock * sk)1458 int ip6_push_pending_frames(struct sock *sk)
1459 {
1460 	struct sk_buff *skb, *tmp_skb;
1461 	struct sk_buff **tail_skb;
1462 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1463 	struct inet_sock *inet = inet_sk(sk);
1464 	struct ipv6_pinfo *np = inet6_sk(sk);
1465 	struct net *net = sock_net(sk);
1466 	struct ipv6hdr *hdr;
1467 	struct ipv6_txoptions *opt = np->cork.opt;
1468 	struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1469 	struct flowi *fl = &inet->cork.fl;
1470 	unsigned char proto = fl->proto;
1471 	int err = 0;
1472 
1473 	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1474 		goto out;
1475 	tail_skb = &(skb_shinfo(skb)->frag_list);
1476 
1477 	/* move skb->data to ip header from ext header */
1478 	if (skb->data < skb_network_header(skb))
1479 		__skb_pull(skb, skb_network_offset(skb));
1480 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1481 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1482 		*tail_skb = tmp_skb;
1483 		tail_skb = &(tmp_skb->next);
1484 		skb->len += tmp_skb->len;
1485 		skb->data_len += tmp_skb->len;
1486 		skb->truesize += tmp_skb->truesize;
1487 		__sock_put(tmp_skb->sk);
1488 		tmp_skb->destructor = NULL;
1489 		tmp_skb->sk = NULL;
1490 	}
1491 
1492 	/* Allow local fragmentation. */
1493 	if (np->pmtudisc < IPV6_PMTUDISC_DO)
1494 		skb->local_df = 1;
1495 
1496 	ipv6_addr_copy(final_dst, &fl->fl6_dst);
1497 	__skb_pull(skb, skb_network_header_len(skb));
1498 	if (opt && opt->opt_flen)
1499 		ipv6_push_frag_opts(skb, opt, &proto);
1500 	if (opt && opt->opt_nflen)
1501 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1502 
1503 	skb_push(skb, sizeof(struct ipv6hdr));
1504 	skb_reset_network_header(skb);
1505 	hdr = ipv6_hdr(skb);
1506 
1507 	*(__be32*)hdr = fl->fl6_flowlabel |
1508 		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1509 
1510 	hdr->hop_limit = np->cork.hop_limit;
1511 	hdr->nexthdr = proto;
1512 	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1513 	ipv6_addr_copy(&hdr->daddr, final_dst);
1514 
1515 	skb->priority = sk->sk_priority;
1516 	skb->mark = sk->sk_mark;
1517 
1518 	skb->dst = dst_clone(&rt->u.dst);
1519 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1520 	if (proto == IPPROTO_ICMPV6) {
1521 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1522 
1523 		ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1524 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1525 	}
1526 
1527 	err = ip6_local_out(skb);
1528 	if (err) {
1529 		if (err > 0)
1530 			err = np->recverr ? net_xmit_errno(err) : 0;
1531 		if (err)
1532 			goto error;
1533 	}
1534 
1535 out:
1536 	ip6_cork_release(inet, np);
1537 	return err;
1538 error:
1539 	goto out;
1540 }
1541 
ip6_flush_pending_frames(struct sock * sk)1542 void ip6_flush_pending_frames(struct sock *sk)
1543 {
1544 	struct sk_buff *skb;
1545 
1546 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1547 		if (skb->dst)
1548 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst),
1549 				      IPSTATS_MIB_OUTDISCARDS);
1550 		kfree_skb(skb);
1551 	}
1552 
1553 	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1554 }
1555