• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	IPv6 output functions
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on linux/net/ipv4/ip_output.c
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *	Changes:
16  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
17  *				extension headers are implemented.
18  *				route changes now work.
19  *				ip6_forward does not confuse sniffers.
20  *				etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *	Imran Patel	: 	frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *			:       add ip6_append_data and related functions
26  *				for datagram xmit
27  */
28 
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41 
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44 
45 #include <net/sock.h>
46 #include <net/snmp.h>
47 
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58 
__ip6_local_out(struct sk_buff * skb)59 int __ip6_local_out(struct sk_buff *skb)
60 {
61 	int len;
62 
63 	len = skb->len - sizeof(struct ipv6hdr);
64 	if (len > IPV6_MAXPLEN)
65 		len = 0;
66 	ipv6_hdr(skb)->payload_len = htons(len);
67 
68 	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
69 		       skb_dst(skb)->dev, dst_output);
70 }
71 
ip6_local_out(struct sk_buff * skb)72 int ip6_local_out(struct sk_buff *skb)
73 {
74 	int err;
75 
76 	err = __ip6_local_out(skb);
77 	if (likely(err == 1))
78 		err = dst_output(skb);
79 
80 	return err;
81 }
82 EXPORT_SYMBOL_GPL(ip6_local_out);
83 
ip6_finish_output2(struct sk_buff * skb)84 static int ip6_finish_output2(struct sk_buff *skb)
85 {
86 	struct dst_entry *dst = skb_dst(skb);
87 	struct net_device *dev = dst->dev;
88 	struct neighbour *neigh;
89 	struct in6_addr *nexthop;
90 	int ret;
91 
92 	skb->protocol = htons(ETH_P_IPV6);
93 	skb->dev = dev;
94 
95 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
96 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
97 
98 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
99 		    ((mroute6_socket(dev_net(dev), skb) &&
100 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
101 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
102 					 &ipv6_hdr(skb)->saddr))) {
103 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
104 
105 			/* Do not check for IFF_ALLMULTI; multicast routing
106 			   is not supported in any case.
107 			 */
108 			if (newskb)
109 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
110 					newskb, NULL, newskb->dev,
111 					dev_loopback_xmit);
112 
113 			if (ipv6_hdr(skb)->hop_limit == 0) {
114 				IP6_INC_STATS(dev_net(dev), idev,
115 					      IPSTATS_MIB_OUTDISCARDS);
116 				kfree_skb(skb);
117 				return 0;
118 			}
119 		}
120 
121 		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
122 				skb->len);
123 
124 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
125 		    IPV6_ADDR_SCOPE_NODELOCAL &&
126 		    !(dev->flags & IFF_LOOPBACK)) {
127 			kfree_skb(skb);
128 			return 0;
129 		}
130 	}
131 
132 	rcu_read_lock_bh();
133 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
134 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
135 	if (unlikely(!neigh))
136 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
137 	if (!IS_ERR(neigh)) {
138 		ret = dst_neigh_output(dst, neigh, skb);
139 		rcu_read_unlock_bh();
140 		return ret;
141 	}
142 	rcu_read_unlock_bh();
143 
144 	IP6_INC_STATS_BH(dev_net(dst->dev),
145 			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
146 	kfree_skb(skb);
147 	return -EINVAL;
148 }
149 
ip6_finish_output(struct sk_buff * skb)150 static int ip6_finish_output(struct sk_buff *skb)
151 {
152 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
153 	    dst_allfrag(skb_dst(skb)))
154 		return ip6_fragment(skb, ip6_finish_output2);
155 	else
156 		return ip6_finish_output2(skb);
157 }
158 
ip6_output(struct sk_buff * skb)159 int ip6_output(struct sk_buff *skb)
160 {
161 	struct net_device *dev = skb_dst(skb)->dev;
162 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
163 	if (unlikely(idev->cnf.disable_ipv6)) {
164 		IP6_INC_STATS(dev_net(dev), idev,
165 			      IPSTATS_MIB_OUTDISCARDS);
166 		kfree_skb(skb);
167 		return 0;
168 	}
169 
170 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
171 			    ip6_finish_output,
172 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
173 }
174 
175 /*
176  *	xmit an sk_buff (used by TCP, SCTP and DCCP)
177  */
178 
ip6_xmit(struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,struct ipv6_txoptions * opt,int tclass)179 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
180 	     struct ipv6_txoptions *opt, int tclass)
181 {
182 	struct net *net = sock_net(sk);
183 	struct ipv6_pinfo *np = inet6_sk(sk);
184 	struct in6_addr *first_hop = &fl6->daddr;
185 	struct dst_entry *dst = skb_dst(skb);
186 	struct ipv6hdr *hdr;
187 	u8  proto = fl6->flowi6_proto;
188 	int seg_len = skb->len;
189 	int hlimit = -1;
190 	u32 mtu;
191 
192 	if (opt) {
193 		unsigned int head_room;
194 
195 		/* First: exthdrs may take lots of space (~8K for now)
196 		   MAX_HEADER is not enough.
197 		 */
198 		head_room = opt->opt_nflen + opt->opt_flen;
199 		seg_len += head_room;
200 		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
201 
202 		if (skb_headroom(skb) < head_room) {
203 			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
204 			if (skb2 == NULL) {
205 				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
206 					      IPSTATS_MIB_OUTDISCARDS);
207 				kfree_skb(skb);
208 				return -ENOBUFS;
209 			}
210 			consume_skb(skb);
211 			skb = skb2;
212 			skb_set_owner_w(skb, sk);
213 		}
214 		if (opt->opt_flen)
215 			ipv6_push_frag_opts(skb, opt, &proto);
216 		if (opt->opt_nflen)
217 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
218 	}
219 
220 	skb_push(skb, sizeof(struct ipv6hdr));
221 	skb_reset_network_header(skb);
222 	hdr = ipv6_hdr(skb);
223 
224 	/*
225 	 *	Fill in the IPv6 header
226 	 */
227 	if (np)
228 		hlimit = np->hop_limit;
229 	if (hlimit < 0)
230 		hlimit = ip6_dst_hoplimit(dst);
231 
232 	ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
233 
234 	hdr->payload_len = htons(seg_len);
235 	hdr->nexthdr = proto;
236 	hdr->hop_limit = hlimit;
237 
238 	hdr->saddr = fl6->saddr;
239 	hdr->daddr = *first_hop;
240 
241 	skb->priority = sk->sk_priority;
242 	skb->mark = sk->sk_mark;
243 
244 	mtu = dst_mtu(dst);
245 	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
246 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
247 			      IPSTATS_MIB_OUT, skb->len);
248 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
249 			       dst->dev, dst_output);
250 	}
251 
252 	skb->dev = dst->dev;
253 	ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
254 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
255 	kfree_skb(skb);
256 	return -EMSGSIZE;
257 }
258 
259 EXPORT_SYMBOL(ip6_xmit);
260 
ip6_call_ra_chain(struct sk_buff * skb,int sel)261 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
262 {
263 	struct ip6_ra_chain *ra;
264 	struct sock *last = NULL;
265 
266 	read_lock(&ip6_ra_lock);
267 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
268 		struct sock *sk = ra->sk;
269 		if (sk && ra->sel == sel &&
270 		    (!sk->sk_bound_dev_if ||
271 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
272 			if (last) {
273 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
274 				if (skb2)
275 					rawv6_rcv(last, skb2);
276 			}
277 			last = sk;
278 		}
279 	}
280 
281 	if (last) {
282 		rawv6_rcv(last, skb);
283 		read_unlock(&ip6_ra_lock);
284 		return 1;
285 	}
286 	read_unlock(&ip6_ra_lock);
287 	return 0;
288 }
289 
ip6_forward_proxy_check(struct sk_buff * skb)290 static int ip6_forward_proxy_check(struct sk_buff *skb)
291 {
292 	struct ipv6hdr *hdr = ipv6_hdr(skb);
293 	u8 nexthdr = hdr->nexthdr;
294 	__be16 frag_off;
295 	int offset;
296 
297 	if (ipv6_ext_hdr(nexthdr)) {
298 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
299 		if (offset < 0)
300 			return 0;
301 	} else
302 		offset = sizeof(struct ipv6hdr);
303 
304 	if (nexthdr == IPPROTO_ICMPV6) {
305 		struct icmp6hdr *icmp6;
306 
307 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
308 					 offset + 1 - skb->data)))
309 			return 0;
310 
311 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
312 
313 		switch (icmp6->icmp6_type) {
314 		case NDISC_ROUTER_SOLICITATION:
315 		case NDISC_ROUTER_ADVERTISEMENT:
316 		case NDISC_NEIGHBOUR_SOLICITATION:
317 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
318 		case NDISC_REDIRECT:
319 			/* For reaction involving unicast neighbor discovery
320 			 * message destined to the proxied address, pass it to
321 			 * input function.
322 			 */
323 			return 1;
324 		default:
325 			break;
326 		}
327 	}
328 
329 	/*
330 	 * The proxying router can't forward traffic sent to a link-local
331 	 * address, so signal the sender and discard the packet. This
332 	 * behavior is clarified by the MIPv6 specification.
333 	 */
334 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
335 		dst_link_failure(skb);
336 		return -1;
337 	}
338 
339 	return 0;
340 }
341 
ip6_forward_finish(struct sk_buff * skb)342 static inline int ip6_forward_finish(struct sk_buff *skb)
343 {
344 	return dst_output(skb);
345 }
346 
ip6_forward(struct sk_buff * skb)347 int ip6_forward(struct sk_buff *skb)
348 {
349 	struct dst_entry *dst = skb_dst(skb);
350 	struct ipv6hdr *hdr = ipv6_hdr(skb);
351 	struct inet6_skb_parm *opt = IP6CB(skb);
352 	struct net *net = dev_net(dst->dev);
353 	u32 mtu;
354 
355 	if (net->ipv6.devconf_all->forwarding == 0)
356 		goto error;
357 
358 	if (skb_warn_if_lro(skb))
359 		goto drop;
360 
361 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
362 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
363 		goto drop;
364 	}
365 
366 	if (skb->pkt_type != PACKET_HOST)
367 		goto drop;
368 
369 	skb_forward_csum(skb);
370 
371 	/*
372 	 *	We DO NOT make any processing on
373 	 *	RA packets, pushing them to user level AS IS
374 	 *	without ane WARRANTY that application will be able
375 	 *	to interpret them. The reason is that we
376 	 *	cannot make anything clever here.
377 	 *
378 	 *	We are not end-node, so that if packet contains
379 	 *	AH/ESP, we cannot make anything.
380 	 *	Defragmentation also would be mistake, RA packets
381 	 *	cannot be fragmented, because there is no warranty
382 	 *	that different fragments will go along one path. --ANK
383 	 */
384 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
385 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
386 			return 0;
387 	}
388 
389 	/*
390 	 *	check and decrement ttl
391 	 */
392 	if (hdr->hop_limit <= 1) {
393 		/* Force OUTPUT device used as source address */
394 		skb->dev = dst->dev;
395 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
396 		IP6_INC_STATS_BH(net,
397 				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
398 
399 		kfree_skb(skb);
400 		return -ETIMEDOUT;
401 	}
402 
403 	/* XXX: idev->cnf.proxy_ndp? */
404 	if (net->ipv6.devconf_all->proxy_ndp &&
405 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
406 		int proxied = ip6_forward_proxy_check(skb);
407 		if (proxied > 0)
408 			return ip6_input(skb);
409 		else if (proxied < 0) {
410 			IP6_INC_STATS(net, ip6_dst_idev(dst),
411 				      IPSTATS_MIB_INDISCARDS);
412 			goto drop;
413 		}
414 	}
415 
416 	if (!xfrm6_route_forward(skb)) {
417 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
418 		goto drop;
419 	}
420 	dst = skb_dst(skb);
421 
422 	/* IPv6 specs say nothing about it, but it is clear that we cannot
423 	   send redirects to source routed frames.
424 	   We don't send redirects to frames decapsulated from IPsec.
425 	 */
426 	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
427 		struct in6_addr *target = NULL;
428 		struct inet_peer *peer;
429 		struct rt6_info *rt;
430 
431 		/*
432 		 *	incoming and outgoing devices are the same
433 		 *	send a redirect.
434 		 */
435 
436 		rt = (struct rt6_info *) dst;
437 		if (rt->rt6i_flags & RTF_GATEWAY)
438 			target = &rt->rt6i_gateway;
439 		else
440 			target = &hdr->daddr;
441 
442 		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
443 
444 		/* Limit redirects both by destination (here)
445 		   and by source (inside ndisc_send_redirect)
446 		 */
447 		if (inet_peer_xrlim_allow(peer, 1*HZ))
448 			ndisc_send_redirect(skb, target);
449 		if (peer)
450 			inet_putpeer(peer);
451 	} else {
452 		int addrtype = ipv6_addr_type(&hdr->saddr);
453 
454 		/* This check is security critical. */
455 		if (addrtype == IPV6_ADDR_ANY ||
456 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
457 			goto error;
458 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
459 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
460 				    ICMPV6_NOT_NEIGHBOUR, 0);
461 			goto error;
462 		}
463 	}
464 
465 	mtu = dst_mtu(dst);
466 	if (mtu < IPV6_MIN_MTU)
467 		mtu = IPV6_MIN_MTU;
468 
469 	if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
470 	    (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
471 		/* Again, force OUTPUT device used as source address */
472 		skb->dev = dst->dev;
473 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
474 		IP6_INC_STATS_BH(net,
475 				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
476 		IP6_INC_STATS_BH(net,
477 				 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
478 		kfree_skb(skb);
479 		return -EMSGSIZE;
480 	}
481 
482 	if (skb_cow(skb, dst->dev->hard_header_len)) {
483 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
484 		goto drop;
485 	}
486 
487 	hdr = ipv6_hdr(skb);
488 
489 	/* Mangling hops number delayed to point after skb COW */
490 
491 	hdr->hop_limit--;
492 
493 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
494 	IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
495 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
496 		       ip6_forward_finish);
497 
498 error:
499 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
500 drop:
501 	kfree_skb(skb);
502 	return -EINVAL;
503 }
504 
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)505 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
506 {
507 	to->pkt_type = from->pkt_type;
508 	to->priority = from->priority;
509 	to->protocol = from->protocol;
510 	skb_dst_drop(to);
511 	skb_dst_set(to, dst_clone(skb_dst(from)));
512 	to->dev = from->dev;
513 	to->mark = from->mark;
514 
515 #ifdef CONFIG_NET_SCHED
516 	to->tc_index = from->tc_index;
517 #endif
518 	nf_copy(to, from);
519 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
520 	to->nf_trace = from->nf_trace;
521 #endif
522 	skb_copy_secmark(to, from);
523 }
524 
ip6_fragment(struct sk_buff * skb,int (* output)(struct sk_buff *))525 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
526 {
527 	struct sk_buff *frag;
528 	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
529 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
530 	struct ipv6hdr *tmp_hdr;
531 	struct frag_hdr *fh;
532 	unsigned int mtu, hlen, left, len;
533 	int hroom, troom;
534 	__be32 frag_id = 0;
535 	int ptr, offset = 0, err=0;
536 	u8 *prevhdr, nexthdr = 0;
537 	struct net *net = dev_net(skb_dst(skb)->dev);
538 
539 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
540 	nexthdr = *prevhdr;
541 
542 	mtu = ip6_skb_dst_mtu(skb);
543 
544 	/* We must not fragment if the socket is set to force MTU discovery
545 	 * or if the skb it not generated by a local socket.
546 	 */
547 	if (unlikely(!skb->local_df && skb->len > mtu) ||
548 		     (IP6CB(skb)->frag_max_size &&
549 		      IP6CB(skb)->frag_max_size > mtu)) {
550 		if (skb->sk && dst_allfrag(skb_dst(skb)))
551 			sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
552 
553 		skb->dev = skb_dst(skb)->dev;
554 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
555 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
556 			      IPSTATS_MIB_FRAGFAILS);
557 		kfree_skb(skb);
558 		return -EMSGSIZE;
559 	}
560 
561 	if (np && np->frag_size < mtu) {
562 		if (np->frag_size)
563 			mtu = np->frag_size;
564 	}
565 	mtu -= hlen + sizeof(struct frag_hdr);
566 
567 	if (skb_has_frag_list(skb)) {
568 		int first_len = skb_pagelen(skb);
569 		struct sk_buff *frag2;
570 
571 		if (first_len - hlen > mtu ||
572 		    ((first_len - hlen) & 7) ||
573 		    skb_cloned(skb))
574 			goto slow_path;
575 
576 		skb_walk_frags(skb, frag) {
577 			/* Correct geometry. */
578 			if (frag->len > mtu ||
579 			    ((frag->len & 7) && frag->next) ||
580 			    skb_headroom(frag) < hlen)
581 				goto slow_path_clean;
582 
583 			/* Partially cloned skb? */
584 			if (skb_shared(frag))
585 				goto slow_path_clean;
586 
587 			BUG_ON(frag->sk);
588 			if (skb->sk) {
589 				frag->sk = skb->sk;
590 				frag->destructor = sock_wfree;
591 			}
592 			skb->truesize -= frag->truesize;
593 		}
594 
595 		err = 0;
596 		offset = 0;
597 		frag = skb_shinfo(skb)->frag_list;
598 		skb_frag_list_init(skb);
599 		/* BUILD HEADER */
600 
601 		*prevhdr = NEXTHDR_FRAGMENT;
602 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
603 		if (!tmp_hdr) {
604 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
605 				      IPSTATS_MIB_FRAGFAILS);
606 			return -ENOMEM;
607 		}
608 
609 		__skb_pull(skb, hlen);
610 		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
611 		__skb_push(skb, hlen);
612 		skb_reset_network_header(skb);
613 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
614 
615 		ipv6_select_ident(fh, rt);
616 		fh->nexthdr = nexthdr;
617 		fh->reserved = 0;
618 		fh->frag_off = htons(IP6_MF);
619 		frag_id = fh->identification;
620 
621 		first_len = skb_pagelen(skb);
622 		skb->data_len = first_len - skb_headlen(skb);
623 		skb->len = first_len;
624 		ipv6_hdr(skb)->payload_len = htons(first_len -
625 						   sizeof(struct ipv6hdr));
626 
627 		dst_hold(&rt->dst);
628 
629 		for (;;) {
630 			/* Prepare header of the next frame,
631 			 * before previous one went down. */
632 			if (frag) {
633 				frag->ip_summed = CHECKSUM_NONE;
634 				skb_reset_transport_header(frag);
635 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
636 				__skb_push(frag, hlen);
637 				skb_reset_network_header(frag);
638 				memcpy(skb_network_header(frag), tmp_hdr,
639 				       hlen);
640 				offset += skb->len - hlen - sizeof(struct frag_hdr);
641 				fh->nexthdr = nexthdr;
642 				fh->reserved = 0;
643 				fh->frag_off = htons(offset);
644 				if (frag->next != NULL)
645 					fh->frag_off |= htons(IP6_MF);
646 				fh->identification = frag_id;
647 				ipv6_hdr(frag)->payload_len =
648 						htons(frag->len -
649 						      sizeof(struct ipv6hdr));
650 				ip6_copy_metadata(frag, skb);
651 			}
652 
653 			err = output(skb);
654 			if(!err)
655 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
656 					      IPSTATS_MIB_FRAGCREATES);
657 
658 			if (err || !frag)
659 				break;
660 
661 			skb = frag;
662 			frag = skb->next;
663 			skb->next = NULL;
664 		}
665 
666 		kfree(tmp_hdr);
667 
668 		if (err == 0) {
669 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
670 				      IPSTATS_MIB_FRAGOKS);
671 			ip6_rt_put(rt);
672 			return 0;
673 		}
674 
675 		while (frag) {
676 			skb = frag->next;
677 			kfree_skb(frag);
678 			frag = skb;
679 		}
680 
681 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
682 			      IPSTATS_MIB_FRAGFAILS);
683 		ip6_rt_put(rt);
684 		return err;
685 
686 slow_path_clean:
687 		skb_walk_frags(skb, frag2) {
688 			if (frag2 == frag)
689 				break;
690 			frag2->sk = NULL;
691 			frag2->destructor = NULL;
692 			skb->truesize += frag2->truesize;
693 		}
694 	}
695 
696 slow_path:
697 	if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
698 	    skb_checksum_help(skb))
699 		goto fail;
700 
701 	left = skb->len - hlen;		/* Space per frame */
702 	ptr = hlen;			/* Where to start from */
703 
704 	/*
705 	 *	Fragment the datagram.
706 	 */
707 
708 	*prevhdr = NEXTHDR_FRAGMENT;
709 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
710 	troom = rt->dst.dev->needed_tailroom;
711 
712 	/*
713 	 *	Keep copying data until we run out.
714 	 */
715 	while(left > 0)	{
716 		len = left;
717 		/* IF: it doesn't fit, use 'mtu' - the data space left */
718 		if (len > mtu)
719 			len = mtu;
720 		/* IF: we are not sending up to and including the packet end
721 		   then align the next start on an eight byte boundary */
722 		if (len < left)	{
723 			len &= ~7;
724 		}
725 		/*
726 		 *	Allocate buffer.
727 		 */
728 
729 		if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
730 				      hroom + troom, GFP_ATOMIC)) == NULL) {
731 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
732 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
733 				      IPSTATS_MIB_FRAGFAILS);
734 			err = -ENOMEM;
735 			goto fail;
736 		}
737 
738 		/*
739 		 *	Set up data on packet
740 		 */
741 
742 		ip6_copy_metadata(frag, skb);
743 		skb_reserve(frag, hroom);
744 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
745 		skb_reset_network_header(frag);
746 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
747 		frag->transport_header = (frag->network_header + hlen +
748 					  sizeof(struct frag_hdr));
749 
750 		/*
751 		 *	Charge the memory for the fragment to any owner
752 		 *	it might possess
753 		 */
754 		if (skb->sk)
755 			skb_set_owner_w(frag, skb->sk);
756 
757 		/*
758 		 *	Copy the packet header into the new buffer.
759 		 */
760 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
761 
762 		/*
763 		 *	Build fragment header.
764 		 */
765 		fh->nexthdr = nexthdr;
766 		fh->reserved = 0;
767 		if (!frag_id) {
768 			ipv6_select_ident(fh, rt);
769 			frag_id = fh->identification;
770 		} else
771 			fh->identification = frag_id;
772 
773 		/*
774 		 *	Copy a block of the IP datagram.
775 		 */
776 		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
777 			BUG();
778 		left -= len;
779 
780 		fh->frag_off = htons(offset);
781 		if (left > 0)
782 			fh->frag_off |= htons(IP6_MF);
783 		ipv6_hdr(frag)->payload_len = htons(frag->len -
784 						    sizeof(struct ipv6hdr));
785 
786 		ptr += len;
787 		offset += len;
788 
789 		/*
790 		 *	Put this fragment into the sending queue.
791 		 */
792 		err = output(frag);
793 		if (err)
794 			goto fail;
795 
796 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
797 			      IPSTATS_MIB_FRAGCREATES);
798 	}
799 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
800 		      IPSTATS_MIB_FRAGOKS);
801 	consume_skb(skb);
802 	return err;
803 
804 fail:
805 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
806 		      IPSTATS_MIB_FRAGFAILS);
807 	kfree_skb(skb);
808 	return err;
809 }
810 
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)811 static inline int ip6_rt_check(const struct rt6key *rt_key,
812 			       const struct in6_addr *fl_addr,
813 			       const struct in6_addr *addr_cache)
814 {
815 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
816 		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
817 }
818 
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)819 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
820 					  struct dst_entry *dst,
821 					  const struct flowi6 *fl6)
822 {
823 	struct ipv6_pinfo *np = inet6_sk(sk);
824 	struct rt6_info *rt;
825 
826 	if (!dst)
827 		goto out;
828 
829 	if (dst->ops->family != AF_INET6) {
830 		dst_release(dst);
831 		return NULL;
832 	}
833 
834 	rt = (struct rt6_info *)dst;
835 	/* Yes, checking route validity in not connected
836 	 * case is not very simple. Take into account,
837 	 * that we do not support routing by source, TOS,
838 	 * and MSG_DONTROUTE 		--ANK (980726)
839 	 *
840 	 * 1. ip6_rt_check(): If route was host route,
841 	 *    check that cached destination is current.
842 	 *    If it is network route, we still may
843 	 *    check its validity using saved pointer
844 	 *    to the last used address: daddr_cache.
845 	 *    We do not want to save whole address now,
846 	 *    (because main consumer of this service
847 	 *    is tcp, which has not this problem),
848 	 *    so that the last trick works only on connected
849 	 *    sockets.
850 	 * 2. oif also should be the same.
851 	 */
852 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
853 #ifdef CONFIG_IPV6_SUBTREES
854 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
855 #endif
856 	    (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
857 		dst_release(dst);
858 		dst = NULL;
859 	}
860 
861 out:
862 	return dst;
863 }
864 
ip6_dst_lookup_tail(struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)865 static int ip6_dst_lookup_tail(struct sock *sk,
866 			       struct dst_entry **dst, struct flowi6 *fl6)
867 {
868 	struct net *net = sock_net(sk);
869 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
870 	struct neighbour *n;
871 	struct rt6_info *rt;
872 #endif
873 	int err;
874 
875 	if (*dst == NULL)
876 		*dst = ip6_route_output(net, sk, fl6);
877 
878 	if ((err = (*dst)->error))
879 		goto out_err_release;
880 
881 	if (ipv6_addr_any(&fl6->saddr)) {
882 		struct rt6_info *rt = (struct rt6_info *) *dst;
883 		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
884 					  sk ? inet6_sk(sk)->srcprefs : 0,
885 					  &fl6->saddr);
886 		if (err)
887 			goto out_err_release;
888 	}
889 
890 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
891 	/*
892 	 * Here if the dst entry we've looked up
893 	 * has a neighbour entry that is in the INCOMPLETE
894 	 * state and the src address from the flow is
895 	 * marked as OPTIMISTIC, we release the found
896 	 * dst entry and replace it instead with the
897 	 * dst entry of the nexthop router
898 	 */
899 	rt = (struct rt6_info *) *dst;
900 	rcu_read_lock_bh();
901 	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
902 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
903 	rcu_read_unlock_bh();
904 
905 	if (err) {
906 		struct inet6_ifaddr *ifp;
907 		struct flowi6 fl_gw6;
908 		int redirect;
909 
910 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
911 				      (*dst)->dev, 1);
912 
913 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
914 		if (ifp)
915 			in6_ifa_put(ifp);
916 
917 		if (redirect) {
918 			/*
919 			 * We need to get the dst entry for the
920 			 * default router instead
921 			 */
922 			dst_release(*dst);
923 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
924 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
925 			*dst = ip6_route_output(net, sk, &fl_gw6);
926 			if ((err = (*dst)->error))
927 				goto out_err_release;
928 		}
929 	}
930 #endif
931 
932 	return 0;
933 
934 out_err_release:
935 	if (err == -ENETUNREACH)
936 		IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
937 	dst_release(*dst);
938 	*dst = NULL;
939 	return err;
940 }
941 
942 /**
943  *	ip6_dst_lookup - perform route lookup on flow
944  *	@sk: socket which provides route info
945  *	@dst: pointer to dst_entry * for result
946  *	@fl6: flow to lookup
947  *
948  *	This function performs a route lookup on the given flow.
949  *
950  *	It returns zero on success, or a standard errno code on error.
951  */
ip6_dst_lookup(struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)952 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
953 {
954 	*dst = NULL;
955 	return ip6_dst_lookup_tail(sk, dst, fl6);
956 }
957 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
958 
959 /**
960  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
961  *	@sk: socket which provides route info
962  *	@fl6: flow to lookup
963  *	@final_dst: final destination address for ipsec lookup
964  *	@can_sleep: we are in a sleepable context
965  *
966  *	This function performs a route lookup on the given flow.
967  *
968  *	It returns a valid dst pointer on success, or a pointer encoded
969  *	error code.
970  */
ip6_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool can_sleep)971 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
972 				      const struct in6_addr *final_dst,
973 				      bool can_sleep)
974 {
975 	struct dst_entry *dst = NULL;
976 	int err;
977 
978 	err = ip6_dst_lookup_tail(sk, &dst, fl6);
979 	if (err)
980 		return ERR_PTR(err);
981 	if (final_dst)
982 		fl6->daddr = *final_dst;
983 	if (can_sleep)
984 		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
985 
986 	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
987 }
988 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
989 
990 /**
991  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
992  *	@sk: socket which provides the dst cache and route info
993  *	@fl6: flow to lookup
994  *	@final_dst: final destination address for ipsec lookup
995  *	@can_sleep: we are in a sleepable context
996  *
997  *	This function performs a route lookup on the given flow with the
998  *	possibility of using the cached route in the socket if it is valid.
999  *	It will take the socket dst lock when operating on the dst cache.
1000  *	As a result, this function can only be used in process context.
1001  *
1002  *	It returns a valid dst pointer on success, or a pointer encoded
1003  *	error code.
1004  */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool can_sleep)1005 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1006 					 const struct in6_addr *final_dst,
1007 					 bool can_sleep)
1008 {
1009 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1010 	int err;
1011 
1012 	dst = ip6_sk_dst_check(sk, dst, fl6);
1013 
1014 	err = ip6_dst_lookup_tail(sk, &dst, fl6);
1015 	if (err)
1016 		return ERR_PTR(err);
1017 	if (final_dst)
1018 		fl6->daddr = *final_dst;
1019 	if (can_sleep)
1020 		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1021 
1022 	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1023 }
1024 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1025 
ip6_ufo_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int hh_len,int fragheaderlen,int transhdrlen,int mtu,unsigned int flags,struct rt6_info * rt)1026 static inline int ip6_ufo_append_data(struct sock *sk,
1027 			int getfrag(void *from, char *to, int offset, int len,
1028 			int odd, struct sk_buff *skb),
1029 			void *from, int length, int hh_len, int fragheaderlen,
1030 			int transhdrlen, int mtu,unsigned int flags,
1031 			struct rt6_info *rt)
1032 
1033 {
1034 	struct sk_buff *skb;
1035 	int err;
1036 
1037 	/* There is support for UDP large send offload by network
1038 	 * device, so create one single skb packet containing complete
1039 	 * udp datagram
1040 	 */
1041 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1042 		skb = sock_alloc_send_skb(sk,
1043 			hh_len + fragheaderlen + transhdrlen + 20,
1044 			(flags & MSG_DONTWAIT), &err);
1045 		if (skb == NULL)
1046 			return err;
1047 
1048 		/* reserve space for Hardware header */
1049 		skb_reserve(skb, hh_len);
1050 
1051 		/* create space for UDP/IP header */
1052 		skb_put(skb,fragheaderlen + transhdrlen);
1053 
1054 		/* initialize network header pointer */
1055 		skb_reset_network_header(skb);
1056 
1057 		/* initialize protocol header pointer */
1058 		skb->transport_header = skb->network_header + fragheaderlen;
1059 
1060 		skb->ip_summed = CHECKSUM_PARTIAL;
1061 		skb->csum = 0;
1062 	}
1063 
1064 	err = skb_append_datato_frags(sk,skb, getfrag, from,
1065 				      (length - transhdrlen));
1066 	if (!err) {
1067 		struct frag_hdr fhdr;
1068 
1069 		/* Specify the length of each IPv6 datagram fragment.
1070 		 * It has to be a multiple of 8.
1071 		 */
1072 		skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1073 					     sizeof(struct frag_hdr)) & ~7;
1074 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1075 		ipv6_select_ident(&fhdr, rt);
1076 		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1077 		__skb_queue_tail(&sk->sk_write_queue, skb);
1078 
1079 		return 0;
1080 	}
1081 	/* There is not enough support do UPD LSO,
1082 	 * so follow normal path
1083 	 */
1084 	kfree_skb(skb);
1085 
1086 	return err;
1087 }
1088 
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1089 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1090 					       gfp_t gfp)
1091 {
1092 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1093 }
1094 
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1095 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1096 						gfp_t gfp)
1097 {
1098 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1099 }
1100 
ip6_append_data_mtu(int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt)1101 static void ip6_append_data_mtu(int *mtu,
1102 				int *maxfraglen,
1103 				unsigned int fragheaderlen,
1104 				struct sk_buff *skb,
1105 				struct rt6_info *rt)
1106 {
1107 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1108 		if (skb == NULL) {
1109 			/* first fragment, reserve header_len */
1110 			*mtu = *mtu - rt->dst.header_len;
1111 
1112 		} else {
1113 			/*
1114 			 * this fragment is not first, the headers
1115 			 * space is regarded as data space.
1116 			 */
1117 			*mtu = dst_mtu(rt->dst.path);
1118 		}
1119 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1120 			      + fragheaderlen - sizeof(struct frag_hdr);
1121 	}
1122 }
1123 
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,int hlimit,int tclass,struct ipv6_txoptions * opt,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,int dontfrag)1124 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1125 	int offset, int len, int odd, struct sk_buff *skb),
1126 	void *from, int length, int transhdrlen,
1127 	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1128 	struct rt6_info *rt, unsigned int flags, int dontfrag)
1129 {
1130 	struct inet_sock *inet = inet_sk(sk);
1131 	struct ipv6_pinfo *np = inet6_sk(sk);
1132 	struct inet_cork *cork;
1133 	struct sk_buff *skb, *skb_prev = NULL;
1134 	unsigned int maxfraglen, fragheaderlen;
1135 	int exthdrlen;
1136 	int dst_exthdrlen;
1137 	int hh_len;
1138 	int mtu;
1139 	int copy;
1140 	int err;
1141 	int offset = 0;
1142 	__u8 tx_flags = 0;
1143 
1144 	if (flags&MSG_PROBE)
1145 		return 0;
1146 	cork = &inet->cork.base;
1147 	if (skb_queue_empty(&sk->sk_write_queue)) {
1148 		/*
1149 		 * setup for corking
1150 		 */
1151 		if (opt) {
1152 			if (WARN_ON(np->cork.opt))
1153 				return -EINVAL;
1154 
1155 			np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1156 			if (unlikely(np->cork.opt == NULL))
1157 				return -ENOBUFS;
1158 
1159 			np->cork.opt->tot_len = opt->tot_len;
1160 			np->cork.opt->opt_flen = opt->opt_flen;
1161 			np->cork.opt->opt_nflen = opt->opt_nflen;
1162 
1163 			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1164 							    sk->sk_allocation);
1165 			if (opt->dst0opt && !np->cork.opt->dst0opt)
1166 				return -ENOBUFS;
1167 
1168 			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1169 							    sk->sk_allocation);
1170 			if (opt->dst1opt && !np->cork.opt->dst1opt)
1171 				return -ENOBUFS;
1172 
1173 			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1174 							   sk->sk_allocation);
1175 			if (opt->hopopt && !np->cork.opt->hopopt)
1176 				return -ENOBUFS;
1177 
1178 			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1179 							    sk->sk_allocation);
1180 			if (opt->srcrt && !np->cork.opt->srcrt)
1181 				return -ENOBUFS;
1182 
1183 			/* need source address above miyazawa*/
1184 		}
1185 		dst_hold(&rt->dst);
1186 		cork->dst = &rt->dst;
1187 		inet->cork.fl.u.ip6 = *fl6;
1188 		np->cork.hop_limit = hlimit;
1189 		np->cork.tclass = tclass;
1190 		if (rt->dst.flags & DST_XFRM_TUNNEL)
1191 			mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1192 			      rt->dst.dev->mtu : dst_mtu(&rt->dst);
1193 		else
1194 			mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1195 			      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1196 		if (np->frag_size < mtu) {
1197 			if (np->frag_size)
1198 				mtu = np->frag_size;
1199 		}
1200 		cork->fragsize = mtu;
1201 		if (dst_allfrag(rt->dst.path))
1202 			cork->flags |= IPCORK_ALLFRAG;
1203 		cork->length = 0;
1204 		exthdrlen = (opt ? opt->opt_flen : 0);
1205 		length += exthdrlen;
1206 		transhdrlen += exthdrlen;
1207 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1208 	} else {
1209 		rt = (struct rt6_info *)cork->dst;
1210 		fl6 = &inet->cork.fl.u.ip6;
1211 		opt = np->cork.opt;
1212 		transhdrlen = 0;
1213 		exthdrlen = 0;
1214 		dst_exthdrlen = 0;
1215 		mtu = cork->fragsize;
1216 	}
1217 
1218 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1219 
1220 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1221 			(opt ? opt->opt_nflen : 0);
1222 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1223 
1224 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1225 		if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1226 			ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1227 			return -EMSGSIZE;
1228 		}
1229 	}
1230 
1231 	/* For UDP, check if TX timestamp is enabled */
1232 	if (sk->sk_type == SOCK_DGRAM)
1233 		sock_tx_timestamp(sk, &tx_flags);
1234 
1235 	/*
1236 	 * Let's try using as much space as possible.
1237 	 * Use MTU if total length of the message fits into the MTU.
1238 	 * Otherwise, we need to reserve fragment header and
1239 	 * fragment alignment (= 8-15 octects, in total).
1240 	 *
1241 	 * Note that we may need to "move" the data from the tail of
1242 	 * of the buffer to the new fragment when we split
1243 	 * the message.
1244 	 *
1245 	 * FIXME: It may be fragmented into multiple chunks
1246 	 *        at once if non-fragmentable extension headers
1247 	 *        are too large.
1248 	 * --yoshfuji
1249 	 */
1250 
1251 	cork->length += length;
1252 	if (length > mtu) {
1253 		int proto = sk->sk_protocol;
1254 		if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1255 			ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1256 			return -EMSGSIZE;
1257 		}
1258 
1259 		if (proto == IPPROTO_UDP &&
1260 		    (rt->dst.dev->features & NETIF_F_UFO)) {
1261 
1262 			err = ip6_ufo_append_data(sk, getfrag, from, length,
1263 						  hh_len, fragheaderlen,
1264 						  transhdrlen, mtu, flags, rt);
1265 			if (err)
1266 				goto error;
1267 			return 0;
1268 		}
1269 	}
1270 
1271 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1272 		goto alloc_new_skb;
1273 
1274 	while (length > 0) {
1275 		/* Check if the remaining data fits into current packet. */
1276 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1277 		if (copy < length)
1278 			copy = maxfraglen - skb->len;
1279 
1280 		if (copy <= 0) {
1281 			char *data;
1282 			unsigned int datalen;
1283 			unsigned int fraglen;
1284 			unsigned int fraggap;
1285 			unsigned int alloclen;
1286 alloc_new_skb:
1287 			/* There's no room in the current skb */
1288 			if (skb)
1289 				fraggap = skb->len - maxfraglen;
1290 			else
1291 				fraggap = 0;
1292 			/* update mtu and maxfraglen if necessary */
1293 			if (skb == NULL || skb_prev == NULL)
1294 				ip6_append_data_mtu(&mtu, &maxfraglen,
1295 						    fragheaderlen, skb, rt);
1296 
1297 			skb_prev = skb;
1298 
1299 			/*
1300 			 * If remaining data exceeds the mtu,
1301 			 * we know we need more fragment(s).
1302 			 */
1303 			datalen = length + fraggap;
1304 
1305 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1306 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1307 			if ((flags & MSG_MORE) &&
1308 			    !(rt->dst.dev->features&NETIF_F_SG))
1309 				alloclen = mtu;
1310 			else
1311 				alloclen = datalen + fragheaderlen;
1312 
1313 			alloclen += dst_exthdrlen;
1314 
1315 			if (datalen != length + fraggap) {
1316 				/*
1317 				 * this is not the last fragment, the trailer
1318 				 * space is regarded as data space.
1319 				 */
1320 				datalen += rt->dst.trailer_len;
1321 			}
1322 
1323 			alloclen += rt->dst.trailer_len;
1324 			fraglen = datalen + fragheaderlen;
1325 
1326 			/*
1327 			 * We just reserve space for fragment header.
1328 			 * Note: this may be overallocation if the message
1329 			 * (without MSG_MORE) fits into the MTU.
1330 			 */
1331 			alloclen += sizeof(struct frag_hdr);
1332 
1333 			if (transhdrlen) {
1334 				skb = sock_alloc_send_skb(sk,
1335 						alloclen + hh_len,
1336 						(flags & MSG_DONTWAIT), &err);
1337 			} else {
1338 				skb = NULL;
1339 				if (atomic_read(&sk->sk_wmem_alloc) <=
1340 				    2 * sk->sk_sndbuf)
1341 					skb = sock_wmalloc(sk,
1342 							   alloclen + hh_len, 1,
1343 							   sk->sk_allocation);
1344 				if (unlikely(skb == NULL))
1345 					err = -ENOBUFS;
1346 				else {
1347 					/* Only the initial fragment
1348 					 * is time stamped.
1349 					 */
1350 					tx_flags = 0;
1351 				}
1352 			}
1353 			if (skb == NULL)
1354 				goto error;
1355 			/*
1356 			 *	Fill in the control structures
1357 			 */
1358 			skb->ip_summed = CHECKSUM_NONE;
1359 			skb->csum = 0;
1360 			/* reserve for fragmentation and ipsec header */
1361 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1362 				    dst_exthdrlen);
1363 
1364 			if (sk->sk_type == SOCK_DGRAM)
1365 				skb_shinfo(skb)->tx_flags = tx_flags;
1366 
1367 			/*
1368 			 *	Find where to start putting bytes
1369 			 */
1370 			data = skb_put(skb, fraglen);
1371 			skb_set_network_header(skb, exthdrlen);
1372 			data += fragheaderlen;
1373 			skb->transport_header = (skb->network_header +
1374 						 fragheaderlen);
1375 			if (fraggap) {
1376 				skb->csum = skb_copy_and_csum_bits(
1377 					skb_prev, maxfraglen,
1378 					data + transhdrlen, fraggap, 0);
1379 				skb_prev->csum = csum_sub(skb_prev->csum,
1380 							  skb->csum);
1381 				data += fraggap;
1382 				pskb_trim_unique(skb_prev, maxfraglen);
1383 			}
1384 			copy = datalen - transhdrlen - fraggap;
1385 
1386 			if (copy < 0) {
1387 				err = -EINVAL;
1388 				kfree_skb(skb);
1389 				goto error;
1390 			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1391 				err = -EFAULT;
1392 				kfree_skb(skb);
1393 				goto error;
1394 			}
1395 
1396 			offset += copy;
1397 			length -= datalen - fraggap;
1398 			transhdrlen = 0;
1399 			exthdrlen = 0;
1400 			dst_exthdrlen = 0;
1401 
1402 			/*
1403 			 * Put the packet on the pending queue
1404 			 */
1405 			__skb_queue_tail(&sk->sk_write_queue, skb);
1406 			continue;
1407 		}
1408 
1409 		if (copy > length)
1410 			copy = length;
1411 
1412 		if (!(rt->dst.dev->features&NETIF_F_SG)) {
1413 			unsigned int off;
1414 
1415 			off = skb->len;
1416 			if (getfrag(from, skb_put(skb, copy),
1417 						offset, copy, off, skb) < 0) {
1418 				__skb_trim(skb, off);
1419 				err = -EFAULT;
1420 				goto error;
1421 			}
1422 		} else {
1423 			int i = skb_shinfo(skb)->nr_frags;
1424 			struct page_frag *pfrag = sk_page_frag(sk);
1425 
1426 			err = -ENOMEM;
1427 			if (!sk_page_frag_refill(sk, pfrag))
1428 				goto error;
1429 
1430 			if (!skb_can_coalesce(skb, i, pfrag->page,
1431 					      pfrag->offset)) {
1432 				err = -EMSGSIZE;
1433 				if (i == MAX_SKB_FRAGS)
1434 					goto error;
1435 
1436 				__skb_fill_page_desc(skb, i, pfrag->page,
1437 						     pfrag->offset, 0);
1438 				skb_shinfo(skb)->nr_frags = ++i;
1439 				get_page(pfrag->page);
1440 			}
1441 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1442 			if (getfrag(from,
1443 				    page_address(pfrag->page) + pfrag->offset,
1444 				    offset, copy, skb->len, skb) < 0)
1445 				goto error_efault;
1446 
1447 			pfrag->offset += copy;
1448 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1449 			skb->len += copy;
1450 			skb->data_len += copy;
1451 			skb->truesize += copy;
1452 			atomic_add(copy, &sk->sk_wmem_alloc);
1453 		}
1454 		offset += copy;
1455 		length -= copy;
1456 	}
1457 
1458 	return 0;
1459 
1460 error_efault:
1461 	err = -EFAULT;
1462 error:
1463 	cork->length -= length;
1464 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1465 	return err;
1466 }
1467 EXPORT_SYMBOL_GPL(ip6_append_data);
1468 
ip6_cork_release(struct inet_sock * inet,struct ipv6_pinfo * np)1469 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1470 {
1471 	if (np->cork.opt) {
1472 		kfree(np->cork.opt->dst0opt);
1473 		kfree(np->cork.opt->dst1opt);
1474 		kfree(np->cork.opt->hopopt);
1475 		kfree(np->cork.opt->srcrt);
1476 		kfree(np->cork.opt);
1477 		np->cork.opt = NULL;
1478 	}
1479 
1480 	if (inet->cork.base.dst) {
1481 		dst_release(inet->cork.base.dst);
1482 		inet->cork.base.dst = NULL;
1483 		inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1484 	}
1485 	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1486 }
1487 
ip6_push_pending_frames(struct sock * sk)1488 int ip6_push_pending_frames(struct sock *sk)
1489 {
1490 	struct sk_buff *skb, *tmp_skb;
1491 	struct sk_buff **tail_skb;
1492 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1493 	struct inet_sock *inet = inet_sk(sk);
1494 	struct ipv6_pinfo *np = inet6_sk(sk);
1495 	struct net *net = sock_net(sk);
1496 	struct ipv6hdr *hdr;
1497 	struct ipv6_txoptions *opt = np->cork.opt;
1498 	struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1499 	struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1500 	unsigned char proto = fl6->flowi6_proto;
1501 	int err = 0;
1502 
1503 	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1504 		goto out;
1505 	tail_skb = &(skb_shinfo(skb)->frag_list);
1506 
1507 	/* move skb->data to ip header from ext header */
1508 	if (skb->data < skb_network_header(skb))
1509 		__skb_pull(skb, skb_network_offset(skb));
1510 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1511 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1512 		*tail_skb = tmp_skb;
1513 		tail_skb = &(tmp_skb->next);
1514 		skb->len += tmp_skb->len;
1515 		skb->data_len += tmp_skb->len;
1516 		skb->truesize += tmp_skb->truesize;
1517 		tmp_skb->destructor = NULL;
1518 		tmp_skb->sk = NULL;
1519 	}
1520 
1521 	/* Allow local fragmentation. */
1522 	if (np->pmtudisc < IPV6_PMTUDISC_DO)
1523 		skb->local_df = 1;
1524 
1525 	*final_dst = fl6->daddr;
1526 	__skb_pull(skb, skb_network_header_len(skb));
1527 	if (opt && opt->opt_flen)
1528 		ipv6_push_frag_opts(skb, opt, &proto);
1529 	if (opt && opt->opt_nflen)
1530 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1531 
1532 	skb_push(skb, sizeof(struct ipv6hdr));
1533 	skb_reset_network_header(skb);
1534 	hdr = ipv6_hdr(skb);
1535 
1536 	ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1537 	hdr->hop_limit = np->cork.hop_limit;
1538 	hdr->nexthdr = proto;
1539 	hdr->saddr = fl6->saddr;
1540 	hdr->daddr = *final_dst;
1541 
1542 	skb->priority = sk->sk_priority;
1543 	skb->mark = sk->sk_mark;
1544 
1545 	skb_dst_set(skb, dst_clone(&rt->dst));
1546 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1547 	if (proto == IPPROTO_ICMPV6) {
1548 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1549 
1550 		ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1551 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1552 	}
1553 
1554 	err = ip6_local_out(skb);
1555 	if (err) {
1556 		if (err > 0)
1557 			err = net_xmit_errno(err);
1558 		if (err)
1559 			goto error;
1560 	}
1561 
1562 out:
1563 	ip6_cork_release(inet, np);
1564 	return err;
1565 error:
1566 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1567 	goto out;
1568 }
1569 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1570 
ip6_flush_pending_frames(struct sock * sk)1571 void ip6_flush_pending_frames(struct sock *sk)
1572 {
1573 	struct sk_buff *skb;
1574 
1575 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1576 		if (skb_dst(skb))
1577 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1578 				      IPSTATS_MIB_OUTDISCARDS);
1579 		kfree_skb(skb);
1580 	}
1581 
1582 	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1583 }
1584 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1585