• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	IPv6 output functions
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on linux/net/ipv4/ip_output.c
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *	Changes:
16  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
17  *				extension headers are implemented.
18  *				route changes now work.
19  *				ip6_forward does not confuse sniffers.
20  *				etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *	Imran Patel	:	frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *			:       add ip6_append_data and related functions
26  *				for datagram xmit
27  */
28 
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41 
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45 
46 #include <net/sock.h>
47 #include <net/snmp.h>
48 
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
61 
ip6_finish_output2(struct net * net,struct sock * sk,struct sk_buff * skb)62 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
63 {
64 	struct dst_entry *dst = skb_dst(skb);
65 	struct net_device *dev = dst->dev;
66 	struct neighbour *neigh;
67 	struct in6_addr *nexthop;
68 	int ret;
69 
70 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72 
73 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 		    ((mroute6_is_socket(net, skb) &&
75 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 					 &ipv6_hdr(skb)->saddr))) {
78 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79 
80 			/* Do not check for IFF_ALLMULTI; multicast routing
81 			   is not supported in any case.
82 			 */
83 			if (newskb)
84 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 					net, sk, newskb, NULL, newskb->dev,
86 					dev_loopback_xmit);
87 
88 			if (ipv6_hdr(skb)->hop_limit == 0) {
89 				IP6_INC_STATS(net, idev,
90 					      IPSTATS_MIB_OUTDISCARDS);
91 				kfree_skb(skb);
92 				return 0;
93 			}
94 		}
95 
96 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
97 
98 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 		    IPV6_ADDR_SCOPE_NODELOCAL &&
100 		    !(dev->flags & IFF_LOOPBACK)) {
101 			kfree_skb(skb);
102 			return 0;
103 		}
104 	}
105 
106 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 		int res = lwtunnel_xmit(skb);
108 
109 		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
110 			return res;
111 	}
112 
113 	rcu_read_lock_bh();
114 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
115 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 	if (unlikely(!neigh))
117 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 	if (!IS_ERR(neigh)) {
119 		sock_confirm_neigh(skb, neigh);
120 		ret = neigh_output(neigh, skb);
121 		rcu_read_unlock_bh();
122 		return ret;
123 	}
124 	rcu_read_unlock_bh();
125 
126 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
127 	kfree_skb(skb);
128 	return -EINVAL;
129 }
130 
ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)131 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
132 {
133 	int ret;
134 
135 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
136 	if (ret) {
137 		kfree_skb(skb);
138 		return ret;
139 	}
140 
141 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 	/* Policy lookup after SNAT yielded a new policy */
143 	if (skb_dst(skb)->xfrm) {
144 		IPCB(skb)->flags |= IPSKB_REROUTED;
145 		return dst_output(net, sk, skb);
146 	}
147 #endif
148 
149 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
150 	    dst_allfrag(skb_dst(skb)) ||
151 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
152 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
153 	else
154 		return ip6_finish_output2(net, sk, skb);
155 }
156 
ip6_output(struct net * net,struct sock * sk,struct sk_buff * skb)157 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
158 {
159 	struct net_device *dev = skb_dst(skb)->dev;
160 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
161 
162 	skb->protocol = htons(ETH_P_IPV6);
163 	skb->dev = dev;
164 
165 	if (unlikely(idev->cnf.disable_ipv6)) {
166 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
167 		kfree_skb(skb);
168 		return 0;
169 	}
170 
171 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 			    net, sk, skb, NULL, dev,
173 			    ip6_finish_output,
174 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
175 }
176 
ip6_autoflowlabel(struct net * net,const struct ipv6_pinfo * np)177 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
178 {
179 	if (!np->autoflowlabel_set)
180 		return ip6_default_np_autolabel(net);
181 	else
182 		return np->autoflowlabel;
183 }
184 
185 /*
186  * xmit an sk_buff (used by TCP, SCTP and DCCP)
187  * Note : socket lock is not held for SYNACK packets, but might be modified
188  * by calls to skb_set_owner_w() and ipv6_local_error(),
189  * which are using proper atomic operations or spinlocks.
190  */
ip6_xmit(const struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,__u32 mark,struct ipv6_txoptions * opt,int tclass)191 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
192 	     __u32 mark, struct ipv6_txoptions *opt, int tclass)
193 {
194 	struct net *net = sock_net(sk);
195 	const struct ipv6_pinfo *np = inet6_sk(sk);
196 	struct in6_addr *first_hop = &fl6->daddr;
197 	struct dst_entry *dst = skb_dst(skb);
198 	unsigned int head_room;
199 	struct ipv6hdr *hdr;
200 	u8  proto = fl6->flowi6_proto;
201 	int seg_len = skb->len;
202 	int hlimit = -1;
203 	u32 mtu;
204 
205 	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
206 	if (opt)
207 		head_room += opt->opt_nflen + opt->opt_flen;
208 
209 	if (unlikely(skb_headroom(skb) < head_room)) {
210 		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
211 		if (!skb2) {
212 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
213 				      IPSTATS_MIB_OUTDISCARDS);
214 			kfree_skb(skb);
215 			return -ENOBUFS;
216 		}
217 		if (skb->sk)
218 			skb_set_owner_w(skb2, skb->sk);
219 		consume_skb(skb);
220 		skb = skb2;
221 	}
222 
223 	if (opt) {
224 		seg_len += opt->opt_nflen + opt->opt_flen;
225 
226 		if (opt->opt_flen)
227 			ipv6_push_frag_opts(skb, opt, &proto);
228 
229 		if (opt->opt_nflen)
230 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
231 					     &fl6->saddr);
232 	}
233 
234 	skb_push(skb, sizeof(struct ipv6hdr));
235 	skb_reset_network_header(skb);
236 	hdr = ipv6_hdr(skb);
237 
238 	/*
239 	 *	Fill in the IPv6 header
240 	 */
241 	if (np)
242 		hlimit = np->hop_limit;
243 	if (hlimit < 0)
244 		hlimit = ip6_dst_hoplimit(dst);
245 
246 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
247 				ip6_autoflowlabel(net, np), fl6));
248 
249 	hdr->payload_len = htons(seg_len);
250 	hdr->nexthdr = proto;
251 	hdr->hop_limit = hlimit;
252 
253 	hdr->saddr = fl6->saddr;
254 	hdr->daddr = *first_hop;
255 
256 	skb->protocol = htons(ETH_P_IPV6);
257 	skb->priority = sk->sk_priority;
258 	skb->mark = mark;
259 
260 	mtu = dst_mtu(dst);
261 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
262 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
263 			      IPSTATS_MIB_OUT, skb->len);
264 
265 		/* if egress device is enslaved to an L3 master device pass the
266 		 * skb to its handler for processing
267 		 */
268 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
269 		if (unlikely(!skb))
270 			return 0;
271 
272 		/* hooks should never assume socket lock is held.
273 		 * we promote our socket to non const
274 		 */
275 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
276 			       net, (struct sock *)sk, skb, NULL, dst->dev,
277 			       dst_output);
278 	}
279 
280 	skb->dev = dst->dev;
281 	/* ipv6_local_error() does not require socket lock,
282 	 * we promote our socket to non const
283 	 */
284 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
285 
286 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
287 	kfree_skb(skb);
288 	return -EMSGSIZE;
289 }
290 EXPORT_SYMBOL(ip6_xmit);
291 
ip6_call_ra_chain(struct sk_buff * skb,int sel)292 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
293 {
294 	struct ip6_ra_chain *ra;
295 	struct sock *last = NULL;
296 
297 	read_lock(&ip6_ra_lock);
298 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
299 		struct sock *sk = ra->sk;
300 		if (sk && ra->sel == sel &&
301 		    (!sk->sk_bound_dev_if ||
302 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
303 			if (last) {
304 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
305 				if (skb2)
306 					rawv6_rcv(last, skb2);
307 			}
308 			last = sk;
309 		}
310 	}
311 
312 	if (last) {
313 		rawv6_rcv(last, skb);
314 		read_unlock(&ip6_ra_lock);
315 		return 1;
316 	}
317 	read_unlock(&ip6_ra_lock);
318 	return 0;
319 }
320 
ip6_forward_proxy_check(struct sk_buff * skb)321 static int ip6_forward_proxy_check(struct sk_buff *skb)
322 {
323 	struct ipv6hdr *hdr = ipv6_hdr(skb);
324 	u8 nexthdr = hdr->nexthdr;
325 	__be16 frag_off;
326 	int offset;
327 
328 	if (ipv6_ext_hdr(nexthdr)) {
329 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
330 		if (offset < 0)
331 			return 0;
332 	} else
333 		offset = sizeof(struct ipv6hdr);
334 
335 	if (nexthdr == IPPROTO_ICMPV6) {
336 		struct icmp6hdr *icmp6;
337 
338 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
339 					 offset + 1 - skb->data)))
340 			return 0;
341 
342 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
343 
344 		switch (icmp6->icmp6_type) {
345 		case NDISC_ROUTER_SOLICITATION:
346 		case NDISC_ROUTER_ADVERTISEMENT:
347 		case NDISC_NEIGHBOUR_SOLICITATION:
348 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
349 		case NDISC_REDIRECT:
350 			/* For reaction involving unicast neighbor discovery
351 			 * message destined to the proxied address, pass it to
352 			 * input function.
353 			 */
354 			return 1;
355 		default:
356 			break;
357 		}
358 	}
359 
360 	/*
361 	 * The proxying router can't forward traffic sent to a link-local
362 	 * address, so signal the sender and discard the packet. This
363 	 * behavior is clarified by the MIPv6 specification.
364 	 */
365 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
366 		dst_link_failure(skb);
367 		return -1;
368 	}
369 
370 	return 0;
371 }
372 
ip6_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)373 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
374 				     struct sk_buff *skb)
375 {
376 	struct dst_entry *dst = skb_dst(skb);
377 
378 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
379 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
380 
381 	skb->tstamp = 0;
382 	return dst_output(net, sk, skb);
383 }
384 
ip6_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)385 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
386 {
387 	if (skb->len <= mtu)
388 		return false;
389 
390 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
391 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
392 		return true;
393 
394 	if (skb->ignore_df)
395 		return false;
396 
397 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
398 		return false;
399 
400 	return true;
401 }
402 
ip6_forward(struct sk_buff * skb)403 int ip6_forward(struct sk_buff *skb)
404 {
405 	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
406 	struct dst_entry *dst = skb_dst(skb);
407 	struct ipv6hdr *hdr = ipv6_hdr(skb);
408 	struct inet6_skb_parm *opt = IP6CB(skb);
409 	struct net *net = dev_net(dst->dev);
410 	u32 mtu;
411 
412 	if (net->ipv6.devconf_all->forwarding == 0)
413 		goto error;
414 
415 	if (skb->pkt_type != PACKET_HOST)
416 		goto drop;
417 
418 	if (unlikely(skb->sk))
419 		goto drop;
420 
421 	if (skb_warn_if_lro(skb))
422 		goto drop;
423 
424 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
425 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
426 		goto drop;
427 	}
428 
429 	skb_forward_csum(skb);
430 
431 	/*
432 	 *	We DO NOT make any processing on
433 	 *	RA packets, pushing them to user level AS IS
434 	 *	without ane WARRANTY that application will be able
435 	 *	to interpret them. The reason is that we
436 	 *	cannot make anything clever here.
437 	 *
438 	 *	We are not end-node, so that if packet contains
439 	 *	AH/ESP, we cannot make anything.
440 	 *	Defragmentation also would be mistake, RA packets
441 	 *	cannot be fragmented, because there is no warranty
442 	 *	that different fragments will go along one path. --ANK
443 	 */
444 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
445 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
446 			return 0;
447 	}
448 
449 	/*
450 	 *	check and decrement ttl
451 	 */
452 	if (hdr->hop_limit <= 1) {
453 		/* Force OUTPUT device used as source address */
454 		skb->dev = dst->dev;
455 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
456 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
457 
458 		kfree_skb(skb);
459 		return -ETIMEDOUT;
460 	}
461 
462 	/* XXX: idev->cnf.proxy_ndp? */
463 	if (net->ipv6.devconf_all->proxy_ndp &&
464 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
465 		int proxied = ip6_forward_proxy_check(skb);
466 		if (proxied > 0)
467 			return ip6_input(skb);
468 		else if (proxied < 0) {
469 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
470 			goto drop;
471 		}
472 	}
473 
474 	if (!xfrm6_route_forward(skb)) {
475 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
476 		goto drop;
477 	}
478 	dst = skb_dst(skb);
479 
480 	/* IPv6 specs say nothing about it, but it is clear that we cannot
481 	   send redirects to source routed frames.
482 	   We don't send redirects to frames decapsulated from IPsec.
483 	 */
484 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
485 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
486 		struct in6_addr *target = NULL;
487 		struct inet_peer *peer;
488 		struct rt6_info *rt;
489 
490 		/*
491 		 *	incoming and outgoing devices are the same
492 		 *	send a redirect.
493 		 */
494 
495 		rt = (struct rt6_info *) dst;
496 		if (rt->rt6i_flags & RTF_GATEWAY)
497 			target = &rt->rt6i_gateway;
498 		else
499 			target = &hdr->daddr;
500 
501 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
502 
503 		/* Limit redirects both by destination (here)
504 		   and by source (inside ndisc_send_redirect)
505 		 */
506 		if (inet_peer_xrlim_allow(peer, 1*HZ))
507 			ndisc_send_redirect(skb, target);
508 		if (peer)
509 			inet_putpeer(peer);
510 	} else {
511 		int addrtype = ipv6_addr_type(&hdr->saddr);
512 
513 		/* This check is security critical. */
514 		if (addrtype == IPV6_ADDR_ANY ||
515 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
516 			goto error;
517 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
518 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
519 				    ICMPV6_NOT_NEIGHBOUR, 0);
520 			goto error;
521 		}
522 	}
523 
524 	mtu = ip6_dst_mtu_forward(dst);
525 	if (mtu < IPV6_MIN_MTU)
526 		mtu = IPV6_MIN_MTU;
527 
528 	if (ip6_pkt_too_big(skb, mtu)) {
529 		/* Again, force OUTPUT device used as source address */
530 		skb->dev = dst->dev;
531 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
532 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
533 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
534 				IPSTATS_MIB_FRAGFAILS);
535 		kfree_skb(skb);
536 		return -EMSGSIZE;
537 	}
538 
539 	if (skb_cow(skb, dst->dev->hard_header_len)) {
540 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
541 				IPSTATS_MIB_OUTDISCARDS);
542 		goto drop;
543 	}
544 
545 	hdr = ipv6_hdr(skb);
546 
547 	/* Mangling hops number delayed to point after skb COW */
548 
549 	hdr->hop_limit--;
550 
551 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
552 		       net, NULL, skb, skb->dev, dst->dev,
553 		       ip6_forward_finish);
554 
555 error:
556 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
557 drop:
558 	kfree_skb(skb);
559 	return -EINVAL;
560 }
561 
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)562 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
563 {
564 	to->pkt_type = from->pkt_type;
565 	to->priority = from->priority;
566 	to->protocol = from->protocol;
567 	skb_dst_drop(to);
568 	skb_dst_set(to, dst_clone(skb_dst(from)));
569 	to->dev = from->dev;
570 	to->mark = from->mark;
571 
572 	skb_copy_hash(to, from);
573 
574 #ifdef CONFIG_NET_SCHED
575 	to->tc_index = from->tc_index;
576 #endif
577 	nf_copy(to, from);
578 	skb_copy_secmark(to, from);
579 }
580 
ip6_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,int (* output)(struct net *,struct sock *,struct sk_buff *))581 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
582 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
583 {
584 	struct sk_buff *frag;
585 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
586 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
587 				inet6_sk(skb->sk) : NULL;
588 	struct ipv6hdr *tmp_hdr;
589 	struct frag_hdr *fh;
590 	unsigned int mtu, hlen, left, len, nexthdr_offset;
591 	int hroom, troom;
592 	__be32 frag_id;
593 	int ptr, offset = 0, err = 0;
594 	u8 *prevhdr, nexthdr = 0;
595 
596 	err = ip6_find_1stfragopt(skb, &prevhdr);
597 	if (err < 0)
598 		goto fail;
599 	hlen = err;
600 	nexthdr = *prevhdr;
601 	nexthdr_offset = prevhdr - skb_network_header(skb);
602 
603 	mtu = ip6_skb_dst_mtu(skb);
604 
605 	/* We must not fragment if the socket is set to force MTU discovery
606 	 * or if the skb it not generated by a local socket.
607 	 */
608 	if (unlikely(!skb->ignore_df && skb->len > mtu))
609 		goto fail_toobig;
610 
611 	if (IP6CB(skb)->frag_max_size) {
612 		if (IP6CB(skb)->frag_max_size > mtu)
613 			goto fail_toobig;
614 
615 		/* don't send fragments larger than what we received */
616 		mtu = IP6CB(skb)->frag_max_size;
617 		if (mtu < IPV6_MIN_MTU)
618 			mtu = IPV6_MIN_MTU;
619 	}
620 
621 	if (np && np->frag_size < mtu) {
622 		if (np->frag_size)
623 			mtu = np->frag_size;
624 	}
625 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
626 		goto fail_toobig;
627 	mtu -= hlen + sizeof(struct frag_hdr);
628 
629 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
630 				    &ipv6_hdr(skb)->saddr);
631 
632 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
633 	    (err = skb_checksum_help(skb)))
634 		goto fail;
635 
636 	prevhdr = skb_network_header(skb) + nexthdr_offset;
637 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
638 	if (skb_has_frag_list(skb)) {
639 		unsigned int first_len = skb_pagelen(skb);
640 		struct sk_buff *frag2;
641 
642 		if (first_len - hlen > mtu ||
643 		    ((first_len - hlen) & 7) ||
644 		    skb_cloned(skb) ||
645 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
646 			goto slow_path;
647 
648 		skb_walk_frags(skb, frag) {
649 			/* Correct geometry. */
650 			if (frag->len > mtu ||
651 			    ((frag->len & 7) && frag->next) ||
652 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
653 				goto slow_path_clean;
654 
655 			/* Partially cloned skb? */
656 			if (skb_shared(frag))
657 				goto slow_path_clean;
658 
659 			BUG_ON(frag->sk);
660 			if (skb->sk) {
661 				frag->sk = skb->sk;
662 				frag->destructor = sock_wfree;
663 			}
664 			skb->truesize -= frag->truesize;
665 		}
666 
667 		err = 0;
668 		offset = 0;
669 		/* BUILD HEADER */
670 
671 		*prevhdr = NEXTHDR_FRAGMENT;
672 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
673 		if (!tmp_hdr) {
674 			err = -ENOMEM;
675 			goto fail;
676 		}
677 		frag = skb_shinfo(skb)->frag_list;
678 		skb_frag_list_init(skb);
679 
680 		__skb_pull(skb, hlen);
681 		fh = __skb_push(skb, sizeof(struct frag_hdr));
682 		__skb_push(skb, hlen);
683 		skb_reset_network_header(skb);
684 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
685 
686 		fh->nexthdr = nexthdr;
687 		fh->reserved = 0;
688 		fh->frag_off = htons(IP6_MF);
689 		fh->identification = frag_id;
690 
691 		first_len = skb_pagelen(skb);
692 		skb->data_len = first_len - skb_headlen(skb);
693 		skb->len = first_len;
694 		ipv6_hdr(skb)->payload_len = htons(first_len -
695 						   sizeof(struct ipv6hdr));
696 
697 		for (;;) {
698 			/* Prepare header of the next frame,
699 			 * before previous one went down. */
700 			if (frag) {
701 				frag->ip_summed = CHECKSUM_NONE;
702 				skb_reset_transport_header(frag);
703 				fh = __skb_push(frag, sizeof(struct frag_hdr));
704 				__skb_push(frag, hlen);
705 				skb_reset_network_header(frag);
706 				memcpy(skb_network_header(frag), tmp_hdr,
707 				       hlen);
708 				offset += skb->len - hlen - sizeof(struct frag_hdr);
709 				fh->nexthdr = nexthdr;
710 				fh->reserved = 0;
711 				fh->frag_off = htons(offset);
712 				if (frag->next)
713 					fh->frag_off |= htons(IP6_MF);
714 				fh->identification = frag_id;
715 				ipv6_hdr(frag)->payload_len =
716 						htons(frag->len -
717 						      sizeof(struct ipv6hdr));
718 				ip6_copy_metadata(frag, skb);
719 			}
720 
721 			err = output(net, sk, skb);
722 			if (!err)
723 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
724 					      IPSTATS_MIB_FRAGCREATES);
725 
726 			if (err || !frag)
727 				break;
728 
729 			skb = frag;
730 			frag = skb->next;
731 			skb->next = NULL;
732 		}
733 
734 		kfree(tmp_hdr);
735 
736 		if (err == 0) {
737 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
738 				      IPSTATS_MIB_FRAGOKS);
739 			return 0;
740 		}
741 
742 		kfree_skb_list(frag);
743 
744 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
745 			      IPSTATS_MIB_FRAGFAILS);
746 		return err;
747 
748 slow_path_clean:
749 		skb_walk_frags(skb, frag2) {
750 			if (frag2 == frag)
751 				break;
752 			frag2->sk = NULL;
753 			frag2->destructor = NULL;
754 			skb->truesize += frag2->truesize;
755 		}
756 	}
757 
758 slow_path:
759 	left = skb->len - hlen;		/* Space per frame */
760 	ptr = hlen;			/* Where to start from */
761 
762 	/*
763 	 *	Fragment the datagram.
764 	 */
765 
766 	troom = rt->dst.dev->needed_tailroom;
767 
768 	/*
769 	 *	Keep copying data until we run out.
770 	 */
771 	while (left > 0)	{
772 		u8 *fragnexthdr_offset;
773 
774 		len = left;
775 		/* IF: it doesn't fit, use 'mtu' - the data space left */
776 		if (len > mtu)
777 			len = mtu;
778 		/* IF: we are not sending up to and including the packet end
779 		   then align the next start on an eight byte boundary */
780 		if (len < left)	{
781 			len &= ~7;
782 		}
783 
784 		/* Allocate buffer */
785 		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
786 				 hroom + troom, GFP_ATOMIC);
787 		if (!frag) {
788 			err = -ENOMEM;
789 			goto fail;
790 		}
791 
792 		/*
793 		 *	Set up data on packet
794 		 */
795 
796 		ip6_copy_metadata(frag, skb);
797 		skb_reserve(frag, hroom);
798 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
799 		skb_reset_network_header(frag);
800 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
801 		frag->transport_header = (frag->network_header + hlen +
802 					  sizeof(struct frag_hdr));
803 
804 		/*
805 		 *	Charge the memory for the fragment to any owner
806 		 *	it might possess
807 		 */
808 		if (skb->sk)
809 			skb_set_owner_w(frag, skb->sk);
810 
811 		/*
812 		 *	Copy the packet header into the new buffer.
813 		 */
814 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
815 
816 		fragnexthdr_offset = skb_network_header(frag);
817 		fragnexthdr_offset += prevhdr - skb_network_header(skb);
818 		*fragnexthdr_offset = NEXTHDR_FRAGMENT;
819 
820 		/*
821 		 *	Build fragment header.
822 		 */
823 		fh->nexthdr = nexthdr;
824 		fh->reserved = 0;
825 		fh->identification = frag_id;
826 
827 		/*
828 		 *	Copy a block of the IP datagram.
829 		 */
830 		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
831 				     len));
832 		left -= len;
833 
834 		fh->frag_off = htons(offset);
835 		if (left > 0)
836 			fh->frag_off |= htons(IP6_MF);
837 		ipv6_hdr(frag)->payload_len = htons(frag->len -
838 						    sizeof(struct ipv6hdr));
839 
840 		ptr += len;
841 		offset += len;
842 
843 		/*
844 		 *	Put this fragment into the sending queue.
845 		 */
846 		err = output(net, sk, frag);
847 		if (err)
848 			goto fail;
849 
850 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
851 			      IPSTATS_MIB_FRAGCREATES);
852 	}
853 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
854 		      IPSTATS_MIB_FRAGOKS);
855 	consume_skb(skb);
856 	return err;
857 
858 fail_toobig:
859 	if (skb->sk && dst_allfrag(skb_dst(skb)))
860 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
861 
862 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
863 	err = -EMSGSIZE;
864 
865 fail:
866 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867 		      IPSTATS_MIB_FRAGFAILS);
868 	kfree_skb(skb);
869 	return err;
870 }
871 
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)872 static inline int ip6_rt_check(const struct rt6key *rt_key,
873 			       const struct in6_addr *fl_addr,
874 			       const struct in6_addr *addr_cache)
875 {
876 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
878 }
879 
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)880 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 					  struct dst_entry *dst,
882 					  const struct flowi6 *fl6)
883 {
884 	struct ipv6_pinfo *np = inet6_sk(sk);
885 	struct rt6_info *rt;
886 
887 	if (!dst)
888 		goto out;
889 
890 	if (dst->ops->family != AF_INET6) {
891 		dst_release(dst);
892 		return NULL;
893 	}
894 
895 	rt = (struct rt6_info *)dst;
896 	/* Yes, checking route validity in not connected
897 	 * case is not very simple. Take into account,
898 	 * that we do not support routing by source, TOS,
899 	 * and MSG_DONTROUTE		--ANK (980726)
900 	 *
901 	 * 1. ip6_rt_check(): If route was host route,
902 	 *    check that cached destination is current.
903 	 *    If it is network route, we still may
904 	 *    check its validity using saved pointer
905 	 *    to the last used address: daddr_cache.
906 	 *    We do not want to save whole address now,
907 	 *    (because main consumer of this service
908 	 *    is tcp, which has not this problem),
909 	 *    so that the last trick works only on connected
910 	 *    sockets.
911 	 * 2. oif also should be the same.
912 	 */
913 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
914 #ifdef CONFIG_IPV6_SUBTREES
915 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
916 #endif
917 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
918 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
919 		dst_release(dst);
920 		dst = NULL;
921 	}
922 
923 out:
924 	return dst;
925 }
926 
ip6_dst_lookup_tail(struct net * net,const struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)927 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
928 			       struct dst_entry **dst, struct flowi6 *fl6)
929 {
930 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
931 	struct neighbour *n;
932 	struct rt6_info *rt;
933 #endif
934 	int err;
935 	int flags = 0;
936 
937 	/* The correct way to handle this would be to do
938 	 * ip6_route_get_saddr, and then ip6_route_output; however,
939 	 * the route-specific preferred source forces the
940 	 * ip6_route_output call _before_ ip6_route_get_saddr.
941 	 *
942 	 * In source specific routing (no src=any default route),
943 	 * ip6_route_output will fail given src=any saddr, though, so
944 	 * that's why we try it again later.
945 	 */
946 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
947 		struct fib6_info *from;
948 		struct rt6_info *rt;
949 		bool had_dst = *dst != NULL;
950 
951 		if (!had_dst)
952 			*dst = ip6_route_output(net, sk, fl6);
953 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
954 
955 		rcu_read_lock();
956 		from = rt ? rcu_dereference(rt->from) : NULL;
957 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
958 					  sk ? inet6_sk(sk)->srcprefs : 0,
959 					  &fl6->saddr);
960 		rcu_read_unlock();
961 
962 		if (err)
963 			goto out_err_release;
964 
965 		/* If we had an erroneous initial result, pretend it
966 		 * never existed and let the SA-enabled version take
967 		 * over.
968 		 */
969 		if (!had_dst && (*dst)->error) {
970 			dst_release(*dst);
971 			*dst = NULL;
972 		}
973 
974 		if (fl6->flowi6_oif)
975 			flags |= RT6_LOOKUP_F_IFACE;
976 	}
977 
978 	if (!*dst)
979 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
980 
981 	err = (*dst)->error;
982 	if (err)
983 		goto out_err_release;
984 
985 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
986 	/*
987 	 * Here if the dst entry we've looked up
988 	 * has a neighbour entry that is in the INCOMPLETE
989 	 * state and the src address from the flow is
990 	 * marked as OPTIMISTIC, we release the found
991 	 * dst entry and replace it instead with the
992 	 * dst entry of the nexthop router
993 	 */
994 	rt = (struct rt6_info *) *dst;
995 	rcu_read_lock_bh();
996 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
997 				      rt6_nexthop(rt, &fl6->daddr));
998 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
999 	rcu_read_unlock_bh();
1000 
1001 	if (err) {
1002 		struct inet6_ifaddr *ifp;
1003 		struct flowi6 fl_gw6;
1004 		int redirect;
1005 
1006 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1007 				      (*dst)->dev, 1);
1008 
1009 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1010 		if (ifp)
1011 			in6_ifa_put(ifp);
1012 
1013 		if (redirect) {
1014 			/*
1015 			 * We need to get the dst entry for the
1016 			 * default router instead
1017 			 */
1018 			dst_release(*dst);
1019 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1020 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1021 			*dst = ip6_route_output(net, sk, &fl_gw6);
1022 			err = (*dst)->error;
1023 			if (err)
1024 				goto out_err_release;
1025 		}
1026 	}
1027 #endif
1028 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1029 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1030 		err = -EAFNOSUPPORT;
1031 		goto out_err_release;
1032 	}
1033 
1034 	return 0;
1035 
1036 out_err_release:
1037 	dst_release(*dst);
1038 	*dst = NULL;
1039 
1040 	if (err == -ENETUNREACH)
1041 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1042 	return err;
1043 }
1044 
1045 /**
1046  *	ip6_dst_lookup - perform route lookup on flow
1047  *	@sk: socket which provides route info
1048  *	@dst: pointer to dst_entry * for result
1049  *	@fl6: flow to lookup
1050  *
1051  *	This function performs a route lookup on the given flow.
1052  *
1053  *	It returns zero on success, or a standard errno code on error.
1054  */
ip6_dst_lookup(struct net * net,struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1055 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1056 		   struct flowi6 *fl6)
1057 {
1058 	*dst = NULL;
1059 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1060 }
1061 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1062 
1063 /**
1064  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1065  *	@sk: socket which provides route info
1066  *	@fl6: flow to lookup
1067  *	@final_dst: final destination address for ipsec lookup
1068  *
1069  *	This function performs a route lookup on the given flow.
1070  *
1071  *	It returns a valid dst pointer on success, or a pointer encoded
1072  *	error code.
1073  */
ip6_dst_lookup_flow(struct net * net,const struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1074 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1075 				      const struct in6_addr *final_dst)
1076 {
1077 	struct dst_entry *dst = NULL;
1078 	int err;
1079 
1080 	err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1081 	if (err)
1082 		return ERR_PTR(err);
1083 	if (final_dst)
1084 		fl6->daddr = *final_dst;
1085 
1086 	return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1087 }
1088 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1089 
1090 /**
1091  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1092  *	@sk: socket which provides the dst cache and route info
1093  *	@fl6: flow to lookup
1094  *	@final_dst: final destination address for ipsec lookup
1095  *	@connected: whether @sk is connected or not
1096  *
1097  *	This function performs a route lookup on the given flow with the
1098  *	possibility of using the cached route in the socket if it is valid.
1099  *	It will take the socket dst lock when operating on the dst cache.
1100  *	As a result, this function can only be used in process context.
1101  *
1102  *	In addition, for a connected socket, cache the dst in the socket
1103  *	if the current cache is not valid.
1104  *
1105  *	It returns a valid dst pointer on success, or a pointer encoded
1106  *	error code.
1107  */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool connected)1108 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1109 					 const struct in6_addr *final_dst,
1110 					 bool connected)
1111 {
1112 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1113 
1114 	dst = ip6_sk_dst_check(sk, dst, fl6);
1115 	if (dst)
1116 		return dst;
1117 
1118 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1119 	if (connected && !IS_ERR(dst))
1120 		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1121 
1122 	return dst;
1123 }
1124 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1125 
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1126 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1127 					       gfp_t gfp)
1128 {
1129 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1130 }
1131 
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1132 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1133 						gfp_t gfp)
1134 {
1135 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1136 }
1137 
ip6_append_data_mtu(unsigned int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt,unsigned int orig_mtu)1138 static void ip6_append_data_mtu(unsigned int *mtu,
1139 				int *maxfraglen,
1140 				unsigned int fragheaderlen,
1141 				struct sk_buff *skb,
1142 				struct rt6_info *rt,
1143 				unsigned int orig_mtu)
1144 {
1145 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1146 		if (!skb) {
1147 			/* first fragment, reserve header_len */
1148 			*mtu = orig_mtu - rt->dst.header_len;
1149 
1150 		} else {
1151 			/*
1152 			 * this fragment is not first, the headers
1153 			 * space is regarded as data space.
1154 			 */
1155 			*mtu = orig_mtu;
1156 		}
1157 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1158 			      + fragheaderlen - sizeof(struct frag_hdr);
1159 	}
1160 }
1161 
ip6_setup_cork(struct sock * sk,struct inet_cork_full * cork,struct inet6_cork * v6_cork,struct ipcm6_cookie * ipc6,struct rt6_info * rt,struct flowi6 * fl6)1162 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1163 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1164 			  struct rt6_info *rt, struct flowi6 *fl6)
1165 {
1166 	struct ipv6_pinfo *np = inet6_sk(sk);
1167 	unsigned int mtu;
1168 	struct ipv6_txoptions *opt = ipc6->opt;
1169 
1170 	/*
1171 	 * setup for corking
1172 	 */
1173 	if (opt) {
1174 		if (WARN_ON(v6_cork->opt))
1175 			return -EINVAL;
1176 
1177 		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1178 		if (unlikely(!v6_cork->opt))
1179 			return -ENOBUFS;
1180 
1181 		v6_cork->opt->tot_len = sizeof(*opt);
1182 		v6_cork->opt->opt_flen = opt->opt_flen;
1183 		v6_cork->opt->opt_nflen = opt->opt_nflen;
1184 
1185 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1186 						    sk->sk_allocation);
1187 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1188 			return -ENOBUFS;
1189 
1190 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1191 						    sk->sk_allocation);
1192 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1193 			return -ENOBUFS;
1194 
1195 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1196 						   sk->sk_allocation);
1197 		if (opt->hopopt && !v6_cork->opt->hopopt)
1198 			return -ENOBUFS;
1199 
1200 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1201 						    sk->sk_allocation);
1202 		if (opt->srcrt && !v6_cork->opt->srcrt)
1203 			return -ENOBUFS;
1204 
1205 		/* need source address above miyazawa*/
1206 	}
1207 	dst_hold(&rt->dst);
1208 	cork->base.dst = &rt->dst;
1209 	cork->fl.u.ip6 = *fl6;
1210 	v6_cork->hop_limit = ipc6->hlimit;
1211 	v6_cork->tclass = ipc6->tclass;
1212 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1213 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1214 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1215 	else
1216 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1217 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1218 	if (np->frag_size < mtu) {
1219 		if (np->frag_size)
1220 			mtu = np->frag_size;
1221 	}
1222 	if (mtu < IPV6_MIN_MTU)
1223 		return -EINVAL;
1224 	cork->base.fragsize = mtu;
1225 	cork->base.gso_size = ipc6->gso_size;
1226 	cork->base.tx_flags = 0;
1227 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1228 
1229 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1230 		cork->base.flags |= IPCORK_ALLFRAG;
1231 	cork->base.length = 0;
1232 
1233 	cork->base.transmit_time = ipc6->sockc.transmit_time;
1234 
1235 	return 0;
1236 }
1237 
__ip6_append_data(struct sock * sk,struct flowi6 * fl6,struct sk_buff_head * queue,struct inet_cork * cork,struct inet6_cork * v6_cork,struct page_frag * pfrag,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,unsigned int flags,struct ipcm6_cookie * ipc6)1238 static int __ip6_append_data(struct sock *sk,
1239 			     struct flowi6 *fl6,
1240 			     struct sk_buff_head *queue,
1241 			     struct inet_cork *cork,
1242 			     struct inet6_cork *v6_cork,
1243 			     struct page_frag *pfrag,
1244 			     int getfrag(void *from, char *to, int offset,
1245 					 int len, int odd, struct sk_buff *skb),
1246 			     void *from, int length, int transhdrlen,
1247 			     unsigned int flags, struct ipcm6_cookie *ipc6)
1248 {
1249 	struct sk_buff *skb, *skb_prev = NULL;
1250 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1251 	int exthdrlen = 0;
1252 	int dst_exthdrlen = 0;
1253 	int hh_len;
1254 	int copy;
1255 	int err;
1256 	int offset = 0;
1257 	u32 tskey = 0;
1258 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1259 	struct ipv6_txoptions *opt = v6_cork->opt;
1260 	int csummode = CHECKSUM_NONE;
1261 	unsigned int maxnonfragsize, headersize;
1262 	unsigned int wmem_alloc_delta = 0;
1263 	bool paged;
1264 
1265 	skb = skb_peek_tail(queue);
1266 	if (!skb) {
1267 		exthdrlen = opt ? opt->opt_flen : 0;
1268 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1269 	}
1270 
1271 	paged = !!cork->gso_size;
1272 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1273 	orig_mtu = mtu;
1274 
1275 	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1276 	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1277 		tskey = sk->sk_tskey++;
1278 
1279 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1280 
1281 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1282 			(opt ? opt->opt_nflen : 0);
1283 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1284 		     sizeof(struct frag_hdr);
1285 
1286 	headersize = sizeof(struct ipv6hdr) +
1287 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1288 		     (dst_allfrag(&rt->dst) ?
1289 		      sizeof(struct frag_hdr) : 0) +
1290 		     rt->rt6i_nfheader_len;
1291 
1292 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1293 	 * the first fragment
1294 	 */
1295 	if (headersize + transhdrlen > mtu)
1296 		goto emsgsize;
1297 
1298 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1299 	    (sk->sk_protocol == IPPROTO_UDP ||
1300 	     sk->sk_protocol == IPPROTO_RAW)) {
1301 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1302 				sizeof(struct ipv6hdr));
1303 		goto emsgsize;
1304 	}
1305 
1306 	if (ip6_sk_ignore_df(sk))
1307 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1308 	else
1309 		maxnonfragsize = mtu;
1310 
1311 	if (cork->length + length > maxnonfragsize - headersize) {
1312 emsgsize:
1313 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1314 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1315 		return -EMSGSIZE;
1316 	}
1317 
1318 	/* CHECKSUM_PARTIAL only with no extension headers and when
1319 	 * we are not going to fragment
1320 	 */
1321 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1322 	    headersize == sizeof(struct ipv6hdr) &&
1323 	    length <= mtu - headersize &&
1324 	    (!(flags & MSG_MORE) || cork->gso_size) &&
1325 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1326 		csummode = CHECKSUM_PARTIAL;
1327 
1328 	/*
1329 	 * Let's try using as much space as possible.
1330 	 * Use MTU if total length of the message fits into the MTU.
1331 	 * Otherwise, we need to reserve fragment header and
1332 	 * fragment alignment (= 8-15 octects, in total).
1333 	 *
1334 	 * Note that we may need to "move" the data from the tail of
1335 	 * of the buffer to the new fragment when we split
1336 	 * the message.
1337 	 *
1338 	 * FIXME: It may be fragmented into multiple chunks
1339 	 *        at once if non-fragmentable extension headers
1340 	 *        are too large.
1341 	 * --yoshfuji
1342 	 */
1343 
1344 	cork->length += length;
1345 	if (!skb)
1346 		goto alloc_new_skb;
1347 
1348 	while (length > 0) {
1349 		/* Check if the remaining data fits into current packet. */
1350 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1351 		if (copy < length)
1352 			copy = maxfraglen - skb->len;
1353 
1354 		if (copy <= 0) {
1355 			char *data;
1356 			unsigned int datalen;
1357 			unsigned int fraglen;
1358 			unsigned int fraggap;
1359 			unsigned int alloclen;
1360 			unsigned int pagedlen;
1361 alloc_new_skb:
1362 			/* There's no room in the current skb */
1363 			if (skb)
1364 				fraggap = skb->len - maxfraglen;
1365 			else
1366 				fraggap = 0;
1367 			/* update mtu and maxfraglen if necessary */
1368 			if (!skb || !skb_prev)
1369 				ip6_append_data_mtu(&mtu, &maxfraglen,
1370 						    fragheaderlen, skb, rt,
1371 						    orig_mtu);
1372 
1373 			skb_prev = skb;
1374 
1375 			/*
1376 			 * If remaining data exceeds the mtu,
1377 			 * we know we need more fragment(s).
1378 			 */
1379 			datalen = length + fraggap;
1380 
1381 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1382 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1383 			fraglen = datalen + fragheaderlen;
1384 			pagedlen = 0;
1385 
1386 			if ((flags & MSG_MORE) &&
1387 			    !(rt->dst.dev->features&NETIF_F_SG))
1388 				alloclen = mtu;
1389 			else if (!paged)
1390 				alloclen = fraglen;
1391 			else {
1392 				alloclen = min_t(int, fraglen, MAX_HEADER);
1393 				pagedlen = fraglen - alloclen;
1394 			}
1395 
1396 			alloclen += dst_exthdrlen;
1397 
1398 			if (datalen != length + fraggap) {
1399 				/*
1400 				 * this is not the last fragment, the trailer
1401 				 * space is regarded as data space.
1402 				 */
1403 				datalen += rt->dst.trailer_len;
1404 			}
1405 
1406 			alloclen += rt->dst.trailer_len;
1407 			fraglen = datalen + fragheaderlen;
1408 
1409 			/*
1410 			 * We just reserve space for fragment header.
1411 			 * Note: this may be overallocation if the message
1412 			 * (without MSG_MORE) fits into the MTU.
1413 			 */
1414 			alloclen += sizeof(struct frag_hdr);
1415 
1416 			copy = datalen - transhdrlen - fraggap - pagedlen;
1417 			if (copy < 0) {
1418 				err = -EINVAL;
1419 				goto error;
1420 			}
1421 			if (transhdrlen) {
1422 				skb = sock_alloc_send_skb(sk,
1423 						alloclen + hh_len,
1424 						(flags & MSG_DONTWAIT), &err);
1425 			} else {
1426 				skb = NULL;
1427 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1428 				    2 * sk->sk_sndbuf)
1429 					skb = alloc_skb(alloclen + hh_len,
1430 							sk->sk_allocation);
1431 				if (unlikely(!skb))
1432 					err = -ENOBUFS;
1433 			}
1434 			if (!skb)
1435 				goto error;
1436 			/*
1437 			 *	Fill in the control structures
1438 			 */
1439 			skb->protocol = htons(ETH_P_IPV6);
1440 			skb->ip_summed = csummode;
1441 			skb->csum = 0;
1442 			/* reserve for fragmentation and ipsec header */
1443 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1444 				    dst_exthdrlen);
1445 
1446 			/* Only the initial fragment is time stamped */
1447 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1448 			cork->tx_flags = 0;
1449 			skb_shinfo(skb)->tskey = tskey;
1450 			tskey = 0;
1451 
1452 			/*
1453 			 *	Find where to start putting bytes
1454 			 */
1455 			data = skb_put(skb, fraglen - pagedlen);
1456 			skb_set_network_header(skb, exthdrlen);
1457 			data += fragheaderlen;
1458 			skb->transport_header = (skb->network_header +
1459 						 fragheaderlen);
1460 			if (fraggap) {
1461 				skb->csum = skb_copy_and_csum_bits(
1462 					skb_prev, maxfraglen,
1463 					data + transhdrlen, fraggap, 0);
1464 				skb_prev->csum = csum_sub(skb_prev->csum,
1465 							  skb->csum);
1466 				data += fraggap;
1467 				pskb_trim_unique(skb_prev, maxfraglen);
1468 			}
1469 			if (copy > 0 &&
1470 			    getfrag(from, data + transhdrlen, offset,
1471 				    copy, fraggap, skb) < 0) {
1472 				err = -EFAULT;
1473 				kfree_skb(skb);
1474 				goto error;
1475 			}
1476 
1477 			offset += copy;
1478 			length -= copy + transhdrlen;
1479 			transhdrlen = 0;
1480 			exthdrlen = 0;
1481 			dst_exthdrlen = 0;
1482 
1483 			if ((flags & MSG_CONFIRM) && !skb_prev)
1484 				skb_set_dst_pending_confirm(skb, 1);
1485 
1486 			/*
1487 			 * Put the packet on the pending queue
1488 			 */
1489 			if (!skb->destructor) {
1490 				skb->destructor = sock_wfree;
1491 				skb->sk = sk;
1492 				wmem_alloc_delta += skb->truesize;
1493 			}
1494 			__skb_queue_tail(queue, skb);
1495 			continue;
1496 		}
1497 
1498 		if (copy > length)
1499 			copy = length;
1500 
1501 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1502 		    skb_tailroom(skb) >= copy) {
1503 			unsigned int off;
1504 
1505 			off = skb->len;
1506 			if (getfrag(from, skb_put(skb, copy),
1507 						offset, copy, off, skb) < 0) {
1508 				__skb_trim(skb, off);
1509 				err = -EFAULT;
1510 				goto error;
1511 			}
1512 		} else {
1513 			int i = skb_shinfo(skb)->nr_frags;
1514 
1515 			err = -ENOMEM;
1516 			if (!sk_page_frag_refill(sk, pfrag))
1517 				goto error;
1518 
1519 			if (!skb_can_coalesce(skb, i, pfrag->page,
1520 					      pfrag->offset)) {
1521 				err = -EMSGSIZE;
1522 				if (i == MAX_SKB_FRAGS)
1523 					goto error;
1524 
1525 				__skb_fill_page_desc(skb, i, pfrag->page,
1526 						     pfrag->offset, 0);
1527 				skb_shinfo(skb)->nr_frags = ++i;
1528 				get_page(pfrag->page);
1529 			}
1530 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1531 			if (getfrag(from,
1532 				    page_address(pfrag->page) + pfrag->offset,
1533 				    offset, copy, skb->len, skb) < 0)
1534 				goto error_efault;
1535 
1536 			pfrag->offset += copy;
1537 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1538 			skb->len += copy;
1539 			skb->data_len += copy;
1540 			skb->truesize += copy;
1541 			wmem_alloc_delta += copy;
1542 		}
1543 		offset += copy;
1544 		length -= copy;
1545 	}
1546 
1547 	if (wmem_alloc_delta)
1548 		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1549 	return 0;
1550 
1551 error_efault:
1552 	err = -EFAULT;
1553 error:
1554 	cork->length -= length;
1555 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1556 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1557 	return err;
1558 }
1559 
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags)1560 int ip6_append_data(struct sock *sk,
1561 		    int getfrag(void *from, char *to, int offset, int len,
1562 				int odd, struct sk_buff *skb),
1563 		    void *from, int length, int transhdrlen,
1564 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1565 		    struct rt6_info *rt, unsigned int flags)
1566 {
1567 	struct inet_sock *inet = inet_sk(sk);
1568 	struct ipv6_pinfo *np = inet6_sk(sk);
1569 	int exthdrlen;
1570 	int err;
1571 
1572 	if (flags&MSG_PROBE)
1573 		return 0;
1574 	if (skb_queue_empty(&sk->sk_write_queue)) {
1575 		/*
1576 		 * setup for corking
1577 		 */
1578 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1579 				     ipc6, rt, fl6);
1580 		if (err)
1581 			return err;
1582 
1583 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1584 		length += exthdrlen;
1585 		transhdrlen += exthdrlen;
1586 	} else {
1587 		fl6 = &inet->cork.fl.u.ip6;
1588 		transhdrlen = 0;
1589 	}
1590 
1591 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1592 				 &np->cork, sk_page_frag(sk), getfrag,
1593 				 from, length, transhdrlen, flags, ipc6);
1594 }
1595 EXPORT_SYMBOL_GPL(ip6_append_data);
1596 
ip6_cork_release(struct inet_cork_full * cork,struct inet6_cork * v6_cork)1597 static void ip6_cork_release(struct inet_cork_full *cork,
1598 			     struct inet6_cork *v6_cork)
1599 {
1600 	if (v6_cork->opt) {
1601 		kfree(v6_cork->opt->dst0opt);
1602 		kfree(v6_cork->opt->dst1opt);
1603 		kfree(v6_cork->opt->hopopt);
1604 		kfree(v6_cork->opt->srcrt);
1605 		kfree(v6_cork->opt);
1606 		v6_cork->opt = NULL;
1607 	}
1608 
1609 	if (cork->base.dst) {
1610 		dst_release(cork->base.dst);
1611 		cork->base.dst = NULL;
1612 		cork->base.flags &= ~IPCORK_ALLFRAG;
1613 	}
1614 	memset(&cork->fl, 0, sizeof(cork->fl));
1615 }
1616 
__ip6_make_skb(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1617 struct sk_buff *__ip6_make_skb(struct sock *sk,
1618 			       struct sk_buff_head *queue,
1619 			       struct inet_cork_full *cork,
1620 			       struct inet6_cork *v6_cork)
1621 {
1622 	struct sk_buff *skb, *tmp_skb;
1623 	struct sk_buff **tail_skb;
1624 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1625 	struct ipv6_pinfo *np = inet6_sk(sk);
1626 	struct net *net = sock_net(sk);
1627 	struct ipv6hdr *hdr;
1628 	struct ipv6_txoptions *opt = v6_cork->opt;
1629 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1630 	struct flowi6 *fl6 = &cork->fl.u.ip6;
1631 	unsigned char proto = fl6->flowi6_proto;
1632 
1633 	skb = __skb_dequeue(queue);
1634 	if (!skb)
1635 		goto out;
1636 	tail_skb = &(skb_shinfo(skb)->frag_list);
1637 
1638 	/* move skb->data to ip header from ext header */
1639 	if (skb->data < skb_network_header(skb))
1640 		__skb_pull(skb, skb_network_offset(skb));
1641 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1642 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1643 		*tail_skb = tmp_skb;
1644 		tail_skb = &(tmp_skb->next);
1645 		skb->len += tmp_skb->len;
1646 		skb->data_len += tmp_skb->len;
1647 		skb->truesize += tmp_skb->truesize;
1648 		tmp_skb->destructor = NULL;
1649 		tmp_skb->sk = NULL;
1650 	}
1651 
1652 	/* Allow local fragmentation. */
1653 	skb->ignore_df = ip6_sk_ignore_df(sk);
1654 
1655 	*final_dst = fl6->daddr;
1656 	__skb_pull(skb, skb_network_header_len(skb));
1657 	if (opt && opt->opt_flen)
1658 		ipv6_push_frag_opts(skb, opt, &proto);
1659 	if (opt && opt->opt_nflen)
1660 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1661 
1662 	skb_push(skb, sizeof(struct ipv6hdr));
1663 	skb_reset_network_header(skb);
1664 	hdr = ipv6_hdr(skb);
1665 
1666 	ip6_flow_hdr(hdr, v6_cork->tclass,
1667 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1668 					ip6_autoflowlabel(net, np), fl6));
1669 	hdr->hop_limit = v6_cork->hop_limit;
1670 	hdr->nexthdr = proto;
1671 	hdr->saddr = fl6->saddr;
1672 	hdr->daddr = *final_dst;
1673 
1674 	skb->priority = sk->sk_priority;
1675 	skb->mark = sk->sk_mark;
1676 
1677 	skb->tstamp = cork->base.transmit_time;
1678 
1679 	skb_dst_set(skb, dst_clone(&rt->dst));
1680 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1681 	if (proto == IPPROTO_ICMPV6) {
1682 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1683 
1684 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1685 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1686 	}
1687 
1688 	ip6_cork_release(cork, v6_cork);
1689 out:
1690 	return skb;
1691 }
1692 
ip6_send_skb(struct sk_buff * skb)1693 int ip6_send_skb(struct sk_buff *skb)
1694 {
1695 	struct net *net = sock_net(skb->sk);
1696 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1697 	int err;
1698 
1699 	err = ip6_local_out(net, skb->sk, skb);
1700 	if (err) {
1701 		if (err > 0)
1702 			err = net_xmit_errno(err);
1703 		if (err)
1704 			IP6_INC_STATS(net, rt->rt6i_idev,
1705 				      IPSTATS_MIB_OUTDISCARDS);
1706 	}
1707 
1708 	return err;
1709 }
1710 
ip6_push_pending_frames(struct sock * sk)1711 int ip6_push_pending_frames(struct sock *sk)
1712 {
1713 	struct sk_buff *skb;
1714 
1715 	skb = ip6_finish_skb(sk);
1716 	if (!skb)
1717 		return 0;
1718 
1719 	return ip6_send_skb(skb);
1720 }
1721 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1722 
__ip6_flush_pending_frames(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1723 static void __ip6_flush_pending_frames(struct sock *sk,
1724 				       struct sk_buff_head *queue,
1725 				       struct inet_cork_full *cork,
1726 				       struct inet6_cork *v6_cork)
1727 {
1728 	struct sk_buff *skb;
1729 
1730 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1731 		if (skb_dst(skb))
1732 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1733 				      IPSTATS_MIB_OUTDISCARDS);
1734 		kfree_skb(skb);
1735 	}
1736 
1737 	ip6_cork_release(cork, v6_cork);
1738 }
1739 
ip6_flush_pending_frames(struct sock * sk)1740 void ip6_flush_pending_frames(struct sock *sk)
1741 {
1742 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1743 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1744 }
1745 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1746 
ip6_make_skb(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,struct inet_cork_full * cork)1747 struct sk_buff *ip6_make_skb(struct sock *sk,
1748 			     int getfrag(void *from, char *to, int offset,
1749 					 int len, int odd, struct sk_buff *skb),
1750 			     void *from, int length, int transhdrlen,
1751 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1752 			     struct rt6_info *rt, unsigned int flags,
1753 			     struct inet_cork_full *cork)
1754 {
1755 	struct inet6_cork v6_cork;
1756 	struct sk_buff_head queue;
1757 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1758 	int err;
1759 
1760 	if (flags & MSG_PROBE)
1761 		return NULL;
1762 
1763 	__skb_queue_head_init(&queue);
1764 
1765 	cork->base.flags = 0;
1766 	cork->base.addr = 0;
1767 	cork->base.opt = NULL;
1768 	cork->base.dst = NULL;
1769 	v6_cork.opt = NULL;
1770 	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1771 	if (err) {
1772 		ip6_cork_release(cork, &v6_cork);
1773 		return ERR_PTR(err);
1774 	}
1775 	if (ipc6->dontfrag < 0)
1776 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1777 
1778 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1779 				&current->task_frag, getfrag, from,
1780 				length + exthdrlen, transhdrlen + exthdrlen,
1781 				flags, ipc6);
1782 	if (err) {
1783 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1784 		return ERR_PTR(err);
1785 	}
1786 
1787 	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1788 }
1789