• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	IPv6 output functions
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on linux/net/ipv4/ip_output.c
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *	Changes:
16  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
17  *				extension headers are implemented.
18  *				route changes now work.
19  *				ip6_forward does not confuse sniffers.
20  *				etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *	Imran Patel	:	frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *			:       add ip6_append_data and related functions
26  *				for datagram xmit
27  */
28 
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41 
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45 
46 #include <net/sock.h>
47 #include <net/snmp.h>
48 
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
61 
ip6_finish_output2(struct net * net,struct sock * sk,struct sk_buff * skb)62 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
63 {
64 	struct dst_entry *dst = skb_dst(skb);
65 	struct net_device *dev = dst->dev;
66 	struct neighbour *neigh;
67 	struct in6_addr *nexthop;
68 	int ret;
69 
70 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72 
73 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 		    ((mroute6_socket(net, skb) &&
75 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 					 &ipv6_hdr(skb)->saddr))) {
78 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79 
80 			/* Do not check for IFF_ALLMULTI; multicast routing
81 			   is not supported in any case.
82 			 */
83 			if (newskb)
84 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 					net, sk, newskb, NULL, newskb->dev,
86 					dev_loopback_xmit);
87 
88 			if (ipv6_hdr(skb)->hop_limit == 0) {
89 				IP6_INC_STATS(net, idev,
90 					      IPSTATS_MIB_OUTDISCARDS);
91 				kfree_skb(skb);
92 				return 0;
93 			}
94 		}
95 
96 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
97 
98 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 		    IPV6_ADDR_SCOPE_NODELOCAL &&
100 		    !(dev->flags & IFF_LOOPBACK)) {
101 			kfree_skb(skb);
102 			return 0;
103 		}
104 	}
105 
106 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 		int res = lwtunnel_xmit(skb);
108 
109 		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
110 			return res;
111 	}
112 
113 	rcu_read_lock_bh();
114 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
115 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 	if (unlikely(!neigh))
117 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 	if (!IS_ERR(neigh)) {
119 		sock_confirm_neigh(skb, neigh);
120 		ret = neigh_output(neigh, skb);
121 		rcu_read_unlock_bh();
122 		return ret;
123 	}
124 	rcu_read_unlock_bh();
125 
126 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
127 	kfree_skb(skb);
128 	return -EINVAL;
129 }
130 
ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)131 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
132 {
133 	int ret;
134 
135 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
136 	if (ret) {
137 		kfree_skb(skb);
138 		return ret;
139 	}
140 
141 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 	/* Policy lookup after SNAT yielded a new policy */
143 	if (skb_dst(skb)->xfrm) {
144 		IPCB(skb)->flags |= IPSKB_REROUTED;
145 		return dst_output(net, sk, skb);
146 	}
147 #endif
148 
149 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
150 	    dst_allfrag(skb_dst(skb)) ||
151 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
152 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
153 	else
154 		return ip6_finish_output2(net, sk, skb);
155 }
156 
ip6_output(struct net * net,struct sock * sk,struct sk_buff * skb)157 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
158 {
159 	struct net_device *dev = skb_dst(skb)->dev;
160 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
161 
162 	skb->protocol = htons(ETH_P_IPV6);
163 	skb->dev = dev;
164 
165 	if (unlikely(idev->cnf.disable_ipv6)) {
166 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
167 		kfree_skb(skb);
168 		return 0;
169 	}
170 
171 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 			    net, sk, skb, NULL, dev,
173 			    ip6_finish_output,
174 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
175 }
176 
ip6_autoflowlabel(struct net * net,const struct ipv6_pinfo * np)177 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
178 {
179 	if (!np->autoflowlabel_set)
180 		return ip6_default_np_autolabel(net);
181 	else
182 		return np->autoflowlabel;
183 }
184 
185 /*
186  * xmit an sk_buff (used by TCP, SCTP and DCCP)
187  * Note : socket lock is not held for SYNACK packets, but might be modified
188  * by calls to skb_set_owner_w() and ipv6_local_error(),
189  * which are using proper atomic operations or spinlocks.
190  */
ip6_xmit(const struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,__u32 mark,struct ipv6_txoptions * opt,int tclass)191 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
192 	     __u32 mark, struct ipv6_txoptions *opt, int tclass)
193 {
194 	struct net *net = sock_net(sk);
195 	const struct ipv6_pinfo *np = inet6_sk(sk);
196 	struct in6_addr *first_hop = &fl6->daddr;
197 	struct dst_entry *dst = skb_dst(skb);
198 	unsigned int head_room;
199 	struct ipv6hdr *hdr;
200 	u8  proto = fl6->flowi6_proto;
201 	int seg_len = skb->len;
202 	int hlimit = -1;
203 	u32 mtu;
204 
205 	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
206 	if (opt)
207 		head_room += opt->opt_nflen + opt->opt_flen;
208 
209 	if (unlikely(skb_headroom(skb) < head_room)) {
210 		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
211 		if (!skb2) {
212 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
213 				      IPSTATS_MIB_OUTDISCARDS);
214 			kfree_skb(skb);
215 			return -ENOBUFS;
216 		}
217 		if (skb->sk)
218 			skb_set_owner_w(skb2, skb->sk);
219 		consume_skb(skb);
220 		skb = skb2;
221 	}
222 
223 	if (opt) {
224 		seg_len += opt->opt_nflen + opt->opt_flen;
225 
226 		if (opt->opt_flen)
227 			ipv6_push_frag_opts(skb, opt, &proto);
228 
229 		if (opt->opt_nflen)
230 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
231 					     &fl6->saddr);
232 	}
233 
234 	skb_push(skb, sizeof(struct ipv6hdr));
235 	skb_reset_network_header(skb);
236 	hdr = ipv6_hdr(skb);
237 
238 	/*
239 	 *	Fill in the IPv6 header
240 	 */
241 	if (np)
242 		hlimit = np->hop_limit;
243 	if (hlimit < 0)
244 		hlimit = ip6_dst_hoplimit(dst);
245 
246 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
247 				ip6_autoflowlabel(net, np), fl6));
248 
249 	hdr->payload_len = htons(seg_len);
250 	hdr->nexthdr = proto;
251 	hdr->hop_limit = hlimit;
252 
253 	hdr->saddr = fl6->saddr;
254 	hdr->daddr = *first_hop;
255 
256 	skb->protocol = htons(ETH_P_IPV6);
257 	skb->priority = sk->sk_priority;
258 	skb->mark = mark;
259 
260 	mtu = dst_mtu(dst);
261 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
262 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
263 			      IPSTATS_MIB_OUT, skb->len);
264 
265 		/* if egress device is enslaved to an L3 master device pass the
266 		 * skb to its handler for processing
267 		 */
268 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
269 		if (unlikely(!skb))
270 			return 0;
271 
272 		/* hooks should never assume socket lock is held.
273 		 * we promote our socket to non const
274 		 */
275 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
276 			       net, (struct sock *)sk, skb, NULL, dst->dev,
277 			       dst_output);
278 	}
279 
280 	skb->dev = dst->dev;
281 	/* ipv6_local_error() does not require socket lock,
282 	 * we promote our socket to non const
283 	 */
284 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
285 
286 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
287 	kfree_skb(skb);
288 	return -EMSGSIZE;
289 }
290 EXPORT_SYMBOL(ip6_xmit);
291 
ip6_call_ra_chain(struct sk_buff * skb,int sel)292 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
293 {
294 	struct ip6_ra_chain *ra;
295 	struct sock *last = NULL;
296 
297 	read_lock(&ip6_ra_lock);
298 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
299 		struct sock *sk = ra->sk;
300 		if (sk && ra->sel == sel &&
301 		    (!sk->sk_bound_dev_if ||
302 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
303 			if (last) {
304 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
305 				if (skb2)
306 					rawv6_rcv(last, skb2);
307 			}
308 			last = sk;
309 		}
310 	}
311 
312 	if (last) {
313 		rawv6_rcv(last, skb);
314 		read_unlock(&ip6_ra_lock);
315 		return 1;
316 	}
317 	read_unlock(&ip6_ra_lock);
318 	return 0;
319 }
320 
ip6_forward_proxy_check(struct sk_buff * skb)321 static int ip6_forward_proxy_check(struct sk_buff *skb)
322 {
323 	struct ipv6hdr *hdr = ipv6_hdr(skb);
324 	u8 nexthdr = hdr->nexthdr;
325 	__be16 frag_off;
326 	int offset;
327 
328 	if (ipv6_ext_hdr(nexthdr)) {
329 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
330 		if (offset < 0)
331 			return 0;
332 	} else
333 		offset = sizeof(struct ipv6hdr);
334 
335 	if (nexthdr == IPPROTO_ICMPV6) {
336 		struct icmp6hdr *icmp6;
337 
338 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
339 					 offset + 1 - skb->data)))
340 			return 0;
341 
342 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
343 
344 		switch (icmp6->icmp6_type) {
345 		case NDISC_ROUTER_SOLICITATION:
346 		case NDISC_ROUTER_ADVERTISEMENT:
347 		case NDISC_NEIGHBOUR_SOLICITATION:
348 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
349 		case NDISC_REDIRECT:
350 			/* For reaction involving unicast neighbor discovery
351 			 * message destined to the proxied address, pass it to
352 			 * input function.
353 			 */
354 			return 1;
355 		default:
356 			break;
357 		}
358 	}
359 
360 	/*
361 	 * The proxying router can't forward traffic sent to a link-local
362 	 * address, so signal the sender and discard the packet. This
363 	 * behavior is clarified by the MIPv6 specification.
364 	 */
365 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
366 		dst_link_failure(skb);
367 		return -1;
368 	}
369 
370 	return 0;
371 }
372 
ip6_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)373 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
374 				     struct sk_buff *skb)
375 {
376 	struct dst_entry *dst = skb_dst(skb);
377 
378 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
379 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
380 
381 	return dst_output(net, sk, skb);
382 }
383 
ip6_dst_mtu_forward(const struct dst_entry * dst)384 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
385 {
386 	unsigned int mtu;
387 	struct inet6_dev *idev;
388 
389 	if (dst_metric_locked(dst, RTAX_MTU)) {
390 		mtu = dst_metric_raw(dst, RTAX_MTU);
391 		if (mtu)
392 			return mtu;
393 	}
394 
395 	mtu = IPV6_MIN_MTU;
396 	rcu_read_lock();
397 	idev = __in6_dev_get(dst->dev);
398 	if (idev)
399 		mtu = idev->cnf.mtu6;
400 	rcu_read_unlock();
401 
402 	return mtu;
403 }
404 
ip6_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)405 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
406 {
407 	if (skb->len <= mtu)
408 		return false;
409 
410 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
411 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
412 		return true;
413 
414 	if (skb->ignore_df)
415 		return false;
416 
417 	if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
418 		return false;
419 
420 	return true;
421 }
422 
ip6_forward(struct sk_buff * skb)423 int ip6_forward(struct sk_buff *skb)
424 {
425 	struct dst_entry *dst = skb_dst(skb);
426 	struct ipv6hdr *hdr = ipv6_hdr(skb);
427 	struct inet6_skb_parm *opt = IP6CB(skb);
428 	struct net *net = dev_net(dst->dev);
429 	u32 mtu;
430 
431 	if (net->ipv6.devconf_all->forwarding == 0)
432 		goto error;
433 
434 	if (skb->pkt_type != PACKET_HOST)
435 		goto drop;
436 
437 	if (unlikely(skb->sk))
438 		goto drop;
439 
440 	if (skb_warn_if_lro(skb))
441 		goto drop;
442 
443 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
444 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
445 				IPSTATS_MIB_INDISCARDS);
446 		goto drop;
447 	}
448 
449 	skb_forward_csum(skb);
450 
451 	/*
452 	 *	We DO NOT make any processing on
453 	 *	RA packets, pushing them to user level AS IS
454 	 *	without ane WARRANTY that application will be able
455 	 *	to interpret them. The reason is that we
456 	 *	cannot make anything clever here.
457 	 *
458 	 *	We are not end-node, so that if packet contains
459 	 *	AH/ESP, we cannot make anything.
460 	 *	Defragmentation also would be mistake, RA packets
461 	 *	cannot be fragmented, because there is no warranty
462 	 *	that different fragments will go along one path. --ANK
463 	 */
464 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
465 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
466 			return 0;
467 	}
468 
469 	/*
470 	 *	check and decrement ttl
471 	 */
472 	if (hdr->hop_limit <= 1) {
473 		/* Force OUTPUT device used as source address */
474 		skb->dev = dst->dev;
475 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
476 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
477 				IPSTATS_MIB_INHDRERRORS);
478 
479 		kfree_skb(skb);
480 		return -ETIMEDOUT;
481 	}
482 
483 	/* XXX: idev->cnf.proxy_ndp? */
484 	if (net->ipv6.devconf_all->proxy_ndp &&
485 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
486 		int proxied = ip6_forward_proxy_check(skb);
487 		if (proxied > 0)
488 			return ip6_input(skb);
489 		else if (proxied < 0) {
490 			__IP6_INC_STATS(net, ip6_dst_idev(dst),
491 					IPSTATS_MIB_INDISCARDS);
492 			goto drop;
493 		}
494 	}
495 
496 	if (!xfrm6_route_forward(skb)) {
497 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
498 				IPSTATS_MIB_INDISCARDS);
499 		goto drop;
500 	}
501 	dst = skb_dst(skb);
502 
503 	/* IPv6 specs say nothing about it, but it is clear that we cannot
504 	   send redirects to source routed frames.
505 	   We don't send redirects to frames decapsulated from IPsec.
506 	 */
507 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
508 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
509 		struct in6_addr *target = NULL;
510 		struct inet_peer *peer;
511 		struct rt6_info *rt;
512 
513 		/*
514 		 *	incoming and outgoing devices are the same
515 		 *	send a redirect.
516 		 */
517 
518 		rt = (struct rt6_info *) dst;
519 		if (rt->rt6i_flags & RTF_GATEWAY)
520 			target = &rt->rt6i_gateway;
521 		else
522 			target = &hdr->daddr;
523 
524 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
525 
526 		/* Limit redirects both by destination (here)
527 		   and by source (inside ndisc_send_redirect)
528 		 */
529 		if (inet_peer_xrlim_allow(peer, 1*HZ))
530 			ndisc_send_redirect(skb, target);
531 		if (peer)
532 			inet_putpeer(peer);
533 	} else {
534 		int addrtype = ipv6_addr_type(&hdr->saddr);
535 
536 		/* This check is security critical. */
537 		if (addrtype == IPV6_ADDR_ANY ||
538 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
539 			goto error;
540 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
541 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
542 				    ICMPV6_NOT_NEIGHBOUR, 0);
543 			goto error;
544 		}
545 	}
546 
547 	mtu = ip6_dst_mtu_forward(dst);
548 	if (mtu < IPV6_MIN_MTU)
549 		mtu = IPV6_MIN_MTU;
550 
551 	if (ip6_pkt_too_big(skb, mtu)) {
552 		/* Again, force OUTPUT device used as source address */
553 		skb->dev = dst->dev;
554 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
555 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
556 				IPSTATS_MIB_INTOOBIGERRORS);
557 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
558 				IPSTATS_MIB_FRAGFAILS);
559 		kfree_skb(skb);
560 		return -EMSGSIZE;
561 	}
562 
563 	if (skb_cow(skb, dst->dev->hard_header_len)) {
564 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
565 				IPSTATS_MIB_OUTDISCARDS);
566 		goto drop;
567 	}
568 
569 	hdr = ipv6_hdr(skb);
570 
571 	/* Mangling hops number delayed to point after skb COW */
572 
573 	hdr->hop_limit--;
574 
575 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
576 		       net, NULL, skb, skb->dev, dst->dev,
577 		       ip6_forward_finish);
578 
579 error:
580 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
581 drop:
582 	kfree_skb(skb);
583 	return -EINVAL;
584 }
585 
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)586 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
587 {
588 	to->pkt_type = from->pkt_type;
589 	to->priority = from->priority;
590 	to->protocol = from->protocol;
591 	skb_dst_drop(to);
592 	skb_dst_set(to, dst_clone(skb_dst(from)));
593 	to->dev = from->dev;
594 	to->mark = from->mark;
595 
596 	skb_copy_hash(to, from);
597 
598 #ifdef CONFIG_NET_SCHED
599 	to->tc_index = from->tc_index;
600 #endif
601 	nf_copy(to, from);
602 	skb_copy_secmark(to, from);
603 }
604 
ip6_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,int (* output)(struct net *,struct sock *,struct sk_buff *))605 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
606 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
607 {
608 	struct sk_buff *frag;
609 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
610 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
611 				inet6_sk(skb->sk) : NULL;
612 	struct ipv6hdr *tmp_hdr;
613 	struct frag_hdr *fh;
614 	unsigned int mtu, hlen, left, len, nexthdr_offset;
615 	int hroom, troom;
616 	__be32 frag_id;
617 	int ptr, offset = 0, err = 0;
618 	u8 *prevhdr, nexthdr = 0;
619 
620 	err = ip6_find_1stfragopt(skb, &prevhdr);
621 	if (err < 0)
622 		goto fail;
623 	hlen = err;
624 	nexthdr = *prevhdr;
625 	nexthdr_offset = prevhdr - skb_network_header(skb);
626 
627 	mtu = ip6_skb_dst_mtu(skb);
628 
629 	/* We must not fragment if the socket is set to force MTU discovery
630 	 * or if the skb it not generated by a local socket.
631 	 */
632 	if (unlikely(!skb->ignore_df && skb->len > mtu))
633 		goto fail_toobig;
634 
635 	if (IP6CB(skb)->frag_max_size) {
636 		if (IP6CB(skb)->frag_max_size > mtu)
637 			goto fail_toobig;
638 
639 		/* don't send fragments larger than what we received */
640 		mtu = IP6CB(skb)->frag_max_size;
641 		if (mtu < IPV6_MIN_MTU)
642 			mtu = IPV6_MIN_MTU;
643 	}
644 
645 	if (np && np->frag_size < mtu) {
646 		if (np->frag_size)
647 			mtu = np->frag_size;
648 	}
649 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
650 		goto fail_toobig;
651 	mtu -= hlen + sizeof(struct frag_hdr);
652 
653 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
654 				    &ipv6_hdr(skb)->saddr);
655 
656 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
657 	    (err = skb_checksum_help(skb)))
658 		goto fail;
659 
660 	prevhdr = skb_network_header(skb) + nexthdr_offset;
661 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
662 	if (skb_has_frag_list(skb)) {
663 		unsigned int first_len = skb_pagelen(skb);
664 		struct sk_buff *frag2;
665 
666 		if (first_len - hlen > mtu ||
667 		    ((first_len - hlen) & 7) ||
668 		    skb_cloned(skb) ||
669 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
670 			goto slow_path;
671 
672 		skb_walk_frags(skb, frag) {
673 			/* Correct geometry. */
674 			if (frag->len > mtu ||
675 			    ((frag->len & 7) && frag->next) ||
676 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
677 				goto slow_path_clean;
678 
679 			/* Partially cloned skb? */
680 			if (skb_shared(frag))
681 				goto slow_path_clean;
682 
683 			BUG_ON(frag->sk);
684 			if (skb->sk) {
685 				frag->sk = skb->sk;
686 				frag->destructor = sock_wfree;
687 			}
688 			skb->truesize -= frag->truesize;
689 		}
690 
691 		err = 0;
692 		offset = 0;
693 		/* BUILD HEADER */
694 
695 		*prevhdr = NEXTHDR_FRAGMENT;
696 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
697 		if (!tmp_hdr) {
698 			err = -ENOMEM;
699 			goto fail;
700 		}
701 		frag = skb_shinfo(skb)->frag_list;
702 		skb_frag_list_init(skb);
703 
704 		__skb_pull(skb, hlen);
705 		fh = __skb_push(skb, sizeof(struct frag_hdr));
706 		__skb_push(skb, hlen);
707 		skb_reset_network_header(skb);
708 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
709 
710 		fh->nexthdr = nexthdr;
711 		fh->reserved = 0;
712 		fh->frag_off = htons(IP6_MF);
713 		fh->identification = frag_id;
714 
715 		first_len = skb_pagelen(skb);
716 		skb->data_len = first_len - skb_headlen(skb);
717 		skb->len = first_len;
718 		ipv6_hdr(skb)->payload_len = htons(first_len -
719 						   sizeof(struct ipv6hdr));
720 
721 		for (;;) {
722 			/* Prepare header of the next frame,
723 			 * before previous one went down. */
724 			if (frag) {
725 				frag->ip_summed = CHECKSUM_NONE;
726 				skb_reset_transport_header(frag);
727 				fh = __skb_push(frag, sizeof(struct frag_hdr));
728 				__skb_push(frag, hlen);
729 				skb_reset_network_header(frag);
730 				memcpy(skb_network_header(frag), tmp_hdr,
731 				       hlen);
732 				offset += skb->len - hlen - sizeof(struct frag_hdr);
733 				fh->nexthdr = nexthdr;
734 				fh->reserved = 0;
735 				fh->frag_off = htons(offset);
736 				if (frag->next)
737 					fh->frag_off |= htons(IP6_MF);
738 				fh->identification = frag_id;
739 				ipv6_hdr(frag)->payload_len =
740 						htons(frag->len -
741 						      sizeof(struct ipv6hdr));
742 				ip6_copy_metadata(frag, skb);
743 			}
744 
745 			err = output(net, sk, skb);
746 			if (!err)
747 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
748 					      IPSTATS_MIB_FRAGCREATES);
749 
750 			if (err || !frag)
751 				break;
752 
753 			skb = frag;
754 			frag = skb->next;
755 			skb->next = NULL;
756 		}
757 
758 		kfree(tmp_hdr);
759 
760 		if (err == 0) {
761 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
762 				      IPSTATS_MIB_FRAGOKS);
763 			return 0;
764 		}
765 
766 		kfree_skb_list(frag);
767 
768 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
769 			      IPSTATS_MIB_FRAGFAILS);
770 		return err;
771 
772 slow_path_clean:
773 		skb_walk_frags(skb, frag2) {
774 			if (frag2 == frag)
775 				break;
776 			frag2->sk = NULL;
777 			frag2->destructor = NULL;
778 			skb->truesize += frag2->truesize;
779 		}
780 	}
781 
782 slow_path:
783 	left = skb->len - hlen;		/* Space per frame */
784 	ptr = hlen;			/* Where to start from */
785 
786 	/*
787 	 *	Fragment the datagram.
788 	 */
789 
790 	troom = rt->dst.dev->needed_tailroom;
791 
792 	/*
793 	 *	Keep copying data until we run out.
794 	 */
795 	while (left > 0)	{
796 		u8 *fragnexthdr_offset;
797 
798 		len = left;
799 		/* IF: it doesn't fit, use 'mtu' - the data space left */
800 		if (len > mtu)
801 			len = mtu;
802 		/* IF: we are not sending up to and including the packet end
803 		   then align the next start on an eight byte boundary */
804 		if (len < left)	{
805 			len &= ~7;
806 		}
807 
808 		/* Allocate buffer */
809 		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
810 				 hroom + troom, GFP_ATOMIC);
811 		if (!frag) {
812 			err = -ENOMEM;
813 			goto fail;
814 		}
815 
816 		/*
817 		 *	Set up data on packet
818 		 */
819 
820 		ip6_copy_metadata(frag, skb);
821 		skb_reserve(frag, hroom);
822 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
823 		skb_reset_network_header(frag);
824 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
825 		frag->transport_header = (frag->network_header + hlen +
826 					  sizeof(struct frag_hdr));
827 
828 		/*
829 		 *	Charge the memory for the fragment to any owner
830 		 *	it might possess
831 		 */
832 		if (skb->sk)
833 			skb_set_owner_w(frag, skb->sk);
834 
835 		/*
836 		 *	Copy the packet header into the new buffer.
837 		 */
838 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
839 
840 		fragnexthdr_offset = skb_network_header(frag);
841 		fragnexthdr_offset += prevhdr - skb_network_header(skb);
842 		*fragnexthdr_offset = NEXTHDR_FRAGMENT;
843 
844 		/*
845 		 *	Build fragment header.
846 		 */
847 		fh->nexthdr = nexthdr;
848 		fh->reserved = 0;
849 		fh->identification = frag_id;
850 
851 		/*
852 		 *	Copy a block of the IP datagram.
853 		 */
854 		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
855 				     len));
856 		left -= len;
857 
858 		fh->frag_off = htons(offset);
859 		if (left > 0)
860 			fh->frag_off |= htons(IP6_MF);
861 		ipv6_hdr(frag)->payload_len = htons(frag->len -
862 						    sizeof(struct ipv6hdr));
863 
864 		ptr += len;
865 		offset += len;
866 
867 		/*
868 		 *	Put this fragment into the sending queue.
869 		 */
870 		err = output(net, sk, frag);
871 		if (err)
872 			goto fail;
873 
874 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
875 			      IPSTATS_MIB_FRAGCREATES);
876 	}
877 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
878 		      IPSTATS_MIB_FRAGOKS);
879 	consume_skb(skb);
880 	return err;
881 
882 fail_toobig:
883 	if (skb->sk && dst_allfrag(skb_dst(skb)))
884 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
885 
886 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
887 	err = -EMSGSIZE;
888 
889 fail:
890 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
891 		      IPSTATS_MIB_FRAGFAILS);
892 	kfree_skb(skb);
893 	return err;
894 }
895 
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)896 static inline int ip6_rt_check(const struct rt6key *rt_key,
897 			       const struct in6_addr *fl_addr,
898 			       const struct in6_addr *addr_cache)
899 {
900 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
901 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
902 }
903 
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)904 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
905 					  struct dst_entry *dst,
906 					  const struct flowi6 *fl6)
907 {
908 	struct ipv6_pinfo *np = inet6_sk(sk);
909 	struct rt6_info *rt;
910 
911 	if (!dst)
912 		goto out;
913 
914 	if (dst->ops->family != AF_INET6) {
915 		dst_release(dst);
916 		return NULL;
917 	}
918 
919 	rt = (struct rt6_info *)dst;
920 	/* Yes, checking route validity in not connected
921 	 * case is not very simple. Take into account,
922 	 * that we do not support routing by source, TOS,
923 	 * and MSG_DONTROUTE		--ANK (980726)
924 	 *
925 	 * 1. ip6_rt_check(): If route was host route,
926 	 *    check that cached destination is current.
927 	 *    If it is network route, we still may
928 	 *    check its validity using saved pointer
929 	 *    to the last used address: daddr_cache.
930 	 *    We do not want to save whole address now,
931 	 *    (because main consumer of this service
932 	 *    is tcp, which has not this problem),
933 	 *    so that the last trick works only on connected
934 	 *    sockets.
935 	 * 2. oif also should be the same.
936 	 */
937 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
938 #ifdef CONFIG_IPV6_SUBTREES
939 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
940 #endif
941 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
942 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
943 		dst_release(dst);
944 		dst = NULL;
945 	}
946 
947 out:
948 	return dst;
949 }
950 
ip6_dst_lookup_tail(struct net * net,const struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)951 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
952 			       struct dst_entry **dst, struct flowi6 *fl6)
953 {
954 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
955 	struct neighbour *n;
956 	struct rt6_info *rt;
957 #endif
958 	int err;
959 	int flags = 0;
960 
961 	/* The correct way to handle this would be to do
962 	 * ip6_route_get_saddr, and then ip6_route_output; however,
963 	 * the route-specific preferred source forces the
964 	 * ip6_route_output call _before_ ip6_route_get_saddr.
965 	 *
966 	 * In source specific routing (no src=any default route),
967 	 * ip6_route_output will fail given src=any saddr, though, so
968 	 * that's why we try it again later.
969 	 */
970 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
971 		struct rt6_info *rt;
972 		bool had_dst = *dst != NULL;
973 
974 		if (!had_dst)
975 			*dst = ip6_route_output(net, sk, fl6);
976 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
977 		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
978 					  sk ? inet6_sk(sk)->srcprefs : 0,
979 					  &fl6->saddr);
980 		if (err)
981 			goto out_err_release;
982 
983 		/* If we had an erroneous initial result, pretend it
984 		 * never existed and let the SA-enabled version take
985 		 * over.
986 		 */
987 		if (!had_dst && (*dst)->error) {
988 			dst_release(*dst);
989 			*dst = NULL;
990 		}
991 
992 		if (fl6->flowi6_oif)
993 			flags |= RT6_LOOKUP_F_IFACE;
994 	}
995 
996 	if (!*dst)
997 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
998 
999 	err = (*dst)->error;
1000 	if (err)
1001 		goto out_err_release;
1002 
1003 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1004 	/*
1005 	 * Here if the dst entry we've looked up
1006 	 * has a neighbour entry that is in the INCOMPLETE
1007 	 * state and the src address from the flow is
1008 	 * marked as OPTIMISTIC, we release the found
1009 	 * dst entry and replace it instead with the
1010 	 * dst entry of the nexthop router
1011 	 */
1012 	rt = (struct rt6_info *) *dst;
1013 	rcu_read_lock_bh();
1014 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1015 				      rt6_nexthop(rt, &fl6->daddr));
1016 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1017 	rcu_read_unlock_bh();
1018 
1019 	if (err) {
1020 		struct inet6_ifaddr *ifp;
1021 		struct flowi6 fl_gw6;
1022 		int redirect;
1023 
1024 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1025 				      (*dst)->dev, 1);
1026 
1027 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1028 		if (ifp)
1029 			in6_ifa_put(ifp);
1030 
1031 		if (redirect) {
1032 			/*
1033 			 * We need to get the dst entry for the
1034 			 * default router instead
1035 			 */
1036 			dst_release(*dst);
1037 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1038 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1039 			*dst = ip6_route_output(net, sk, &fl_gw6);
1040 			err = (*dst)->error;
1041 			if (err)
1042 				goto out_err_release;
1043 		}
1044 	}
1045 #endif
1046 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1047 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1048 		err = -EAFNOSUPPORT;
1049 		goto out_err_release;
1050 	}
1051 
1052 	return 0;
1053 
1054 out_err_release:
1055 	dst_release(*dst);
1056 	*dst = NULL;
1057 
1058 	if (err == -ENETUNREACH)
1059 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1060 	return err;
1061 }
1062 
1063 /**
1064  *	ip6_dst_lookup - perform route lookup on flow
1065  *	@sk: socket which provides route info
1066  *	@dst: pointer to dst_entry * for result
1067  *	@fl6: flow to lookup
1068  *
1069  *	This function performs a route lookup on the given flow.
1070  *
1071  *	It returns zero on success, or a standard errno code on error.
1072  */
ip6_dst_lookup(struct net * net,struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1073 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1074 		   struct flowi6 *fl6)
1075 {
1076 	*dst = NULL;
1077 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1078 }
1079 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1080 
1081 /**
1082  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1083  *	@sk: socket which provides route info
1084  *	@fl6: flow to lookup
1085  *	@final_dst: final destination address for ipsec lookup
1086  *
1087  *	This function performs a route lookup on the given flow.
1088  *
1089  *	It returns a valid dst pointer on success, or a pointer encoded
1090  *	error code.
1091  */
ip6_dst_lookup_flow(const struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1092 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1093 				      const struct in6_addr *final_dst)
1094 {
1095 	struct dst_entry *dst = NULL;
1096 	int err;
1097 
1098 	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1099 	if (err)
1100 		return ERR_PTR(err);
1101 	if (final_dst)
1102 		fl6->daddr = *final_dst;
1103 
1104 	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1105 }
1106 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1107 
1108 /**
1109  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1110  *	@sk: socket which provides the dst cache and route info
1111  *	@fl6: flow to lookup
1112  *	@final_dst: final destination address for ipsec lookup
1113  *
1114  *	This function performs a route lookup on the given flow with the
1115  *	possibility of using the cached route in the socket if it is valid.
1116  *	It will take the socket dst lock when operating on the dst cache.
1117  *	As a result, this function can only be used in process context.
1118  *
1119  *	It returns a valid dst pointer on success, or a pointer encoded
1120  *	error code.
1121  */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1122 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1123 					 const struct in6_addr *final_dst)
1124 {
1125 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1126 
1127 	dst = ip6_sk_dst_check(sk, dst, fl6);
1128 	if (!dst)
1129 		dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1130 
1131 	return dst;
1132 }
1133 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1134 
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1135 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1136 					       gfp_t gfp)
1137 {
1138 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1139 }
1140 
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1141 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1142 						gfp_t gfp)
1143 {
1144 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1145 }
1146 
ip6_append_data_mtu(unsigned int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt,unsigned int orig_mtu)1147 static void ip6_append_data_mtu(unsigned int *mtu,
1148 				int *maxfraglen,
1149 				unsigned int fragheaderlen,
1150 				struct sk_buff *skb,
1151 				struct rt6_info *rt,
1152 				unsigned int orig_mtu)
1153 {
1154 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1155 		if (!skb) {
1156 			/* first fragment, reserve header_len */
1157 			*mtu = orig_mtu - rt->dst.header_len;
1158 
1159 		} else {
1160 			/*
1161 			 * this fragment is not first, the headers
1162 			 * space is regarded as data space.
1163 			 */
1164 			*mtu = orig_mtu;
1165 		}
1166 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1167 			      + fragheaderlen - sizeof(struct frag_hdr);
1168 	}
1169 }
1170 
ip6_setup_cork(struct sock * sk,struct inet_cork_full * cork,struct inet6_cork * v6_cork,struct ipcm6_cookie * ipc6,struct rt6_info * rt,struct flowi6 * fl6)1171 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1172 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1173 			  struct rt6_info *rt, struct flowi6 *fl6)
1174 {
1175 	struct ipv6_pinfo *np = inet6_sk(sk);
1176 	unsigned int mtu;
1177 	struct ipv6_txoptions *opt = ipc6->opt;
1178 
1179 	/*
1180 	 * setup for corking
1181 	 */
1182 	if (opt) {
1183 		if (WARN_ON(v6_cork->opt))
1184 			return -EINVAL;
1185 
1186 		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1187 		if (unlikely(!v6_cork->opt))
1188 			return -ENOBUFS;
1189 
1190 		v6_cork->opt->tot_len = sizeof(*opt);
1191 		v6_cork->opt->opt_flen = opt->opt_flen;
1192 		v6_cork->opt->opt_nflen = opt->opt_nflen;
1193 
1194 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1195 						    sk->sk_allocation);
1196 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1197 			return -ENOBUFS;
1198 
1199 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1200 						    sk->sk_allocation);
1201 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1202 			return -ENOBUFS;
1203 
1204 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1205 						   sk->sk_allocation);
1206 		if (opt->hopopt && !v6_cork->opt->hopopt)
1207 			return -ENOBUFS;
1208 
1209 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1210 						    sk->sk_allocation);
1211 		if (opt->srcrt && !v6_cork->opt->srcrt)
1212 			return -ENOBUFS;
1213 
1214 		/* need source address above miyazawa*/
1215 	}
1216 	dst_hold(&rt->dst);
1217 	cork->base.dst = &rt->dst;
1218 	cork->fl.u.ip6 = *fl6;
1219 	v6_cork->hop_limit = ipc6->hlimit;
1220 	v6_cork->tclass = ipc6->tclass;
1221 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1222 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1223 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1224 	else
1225 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1226 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
1227 	if (np->frag_size < mtu) {
1228 		if (np->frag_size)
1229 			mtu = np->frag_size;
1230 	}
1231 	if (mtu < IPV6_MIN_MTU)
1232 		return -EINVAL;
1233 	cork->base.fragsize = mtu;
1234 	if (dst_allfrag(rt->dst.path))
1235 		cork->base.flags |= IPCORK_ALLFRAG;
1236 	cork->base.length = 0;
1237 
1238 	return 0;
1239 }
1240 
__ip6_append_data(struct sock * sk,struct flowi6 * fl6,struct sk_buff_head * queue,struct inet_cork * cork,struct inet6_cork * v6_cork,struct page_frag * pfrag,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,unsigned int flags,struct ipcm6_cookie * ipc6,const struct sockcm_cookie * sockc)1241 static int __ip6_append_data(struct sock *sk,
1242 			     struct flowi6 *fl6,
1243 			     struct sk_buff_head *queue,
1244 			     struct inet_cork *cork,
1245 			     struct inet6_cork *v6_cork,
1246 			     struct page_frag *pfrag,
1247 			     int getfrag(void *from, char *to, int offset,
1248 					 int len, int odd, struct sk_buff *skb),
1249 			     void *from, int length, int transhdrlen,
1250 			     unsigned int flags, struct ipcm6_cookie *ipc6,
1251 			     const struct sockcm_cookie *sockc)
1252 {
1253 	struct sk_buff *skb, *skb_prev = NULL;
1254 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1255 	int exthdrlen = 0;
1256 	int dst_exthdrlen = 0;
1257 	int hh_len;
1258 	int copy;
1259 	int err;
1260 	int offset = 0;
1261 	__u8 tx_flags = 0;
1262 	u32 tskey = 0;
1263 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1264 	struct ipv6_txoptions *opt = v6_cork->opt;
1265 	int csummode = CHECKSUM_NONE;
1266 	unsigned int maxnonfragsize, headersize;
1267 
1268 	skb = skb_peek_tail(queue);
1269 	if (!skb) {
1270 		exthdrlen = opt ? opt->opt_flen : 0;
1271 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1272 	}
1273 
1274 	mtu = cork->fragsize;
1275 	orig_mtu = mtu;
1276 
1277 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1278 
1279 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1280 			(opt ? opt->opt_nflen : 0);
1281 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1282 		     sizeof(struct frag_hdr);
1283 
1284 	headersize = sizeof(struct ipv6hdr) +
1285 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1286 		     (dst_allfrag(&rt->dst) ?
1287 		      sizeof(struct frag_hdr) : 0) +
1288 		     rt->rt6i_nfheader_len;
1289 
1290 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1291 	 * the first fragment
1292 	 */
1293 	if (headersize + transhdrlen > mtu)
1294 		goto emsgsize;
1295 
1296 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1297 	    (sk->sk_protocol == IPPROTO_UDP ||
1298 	     sk->sk_protocol == IPPROTO_RAW)) {
1299 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1300 				sizeof(struct ipv6hdr));
1301 		goto emsgsize;
1302 	}
1303 
1304 	if (ip6_sk_ignore_df(sk))
1305 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1306 	else
1307 		maxnonfragsize = mtu;
1308 
1309 	if (cork->length + length > maxnonfragsize - headersize) {
1310 emsgsize:
1311 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1312 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1313 		return -EMSGSIZE;
1314 	}
1315 
1316 	/* CHECKSUM_PARTIAL only with no extension headers and when
1317 	 * we are not going to fragment
1318 	 */
1319 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1320 	    headersize == sizeof(struct ipv6hdr) &&
1321 	    length <= mtu - headersize &&
1322 	    !(flags & MSG_MORE) &&
1323 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1324 		csummode = CHECKSUM_PARTIAL;
1325 
1326 	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1327 		sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
1328 		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1329 		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1330 			tskey = sk->sk_tskey++;
1331 	}
1332 
1333 	/*
1334 	 * Let's try using as much space as possible.
1335 	 * Use MTU if total length of the message fits into the MTU.
1336 	 * Otherwise, we need to reserve fragment header and
1337 	 * fragment alignment (= 8-15 octects, in total).
1338 	 *
1339 	 * Note that we may need to "move" the data from the tail of
1340 	 * of the buffer to the new fragment when we split
1341 	 * the message.
1342 	 *
1343 	 * FIXME: It may be fragmented into multiple chunks
1344 	 *        at once if non-fragmentable extension headers
1345 	 *        are too large.
1346 	 * --yoshfuji
1347 	 */
1348 
1349 	cork->length += length;
1350 	if (!skb)
1351 		goto alloc_new_skb;
1352 
1353 	while (length > 0) {
1354 		/* Check if the remaining data fits into current packet. */
1355 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1356 		if (copy < length)
1357 			copy = maxfraglen - skb->len;
1358 
1359 		if (copy <= 0) {
1360 			char *data;
1361 			unsigned int datalen;
1362 			unsigned int fraglen;
1363 			unsigned int fraggap;
1364 			unsigned int alloclen;
1365 alloc_new_skb:
1366 			/* There's no room in the current skb */
1367 			if (skb)
1368 				fraggap = skb->len - maxfraglen;
1369 			else
1370 				fraggap = 0;
1371 			/* update mtu and maxfraglen if necessary */
1372 			if (!skb || !skb_prev)
1373 				ip6_append_data_mtu(&mtu, &maxfraglen,
1374 						    fragheaderlen, skb, rt,
1375 						    orig_mtu);
1376 
1377 			skb_prev = skb;
1378 
1379 			/*
1380 			 * If remaining data exceeds the mtu,
1381 			 * we know we need more fragment(s).
1382 			 */
1383 			datalen = length + fraggap;
1384 
1385 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1386 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1387 			if ((flags & MSG_MORE) &&
1388 			    !(rt->dst.dev->features&NETIF_F_SG))
1389 				alloclen = mtu;
1390 			else
1391 				alloclen = datalen + fragheaderlen;
1392 
1393 			alloclen += dst_exthdrlen;
1394 
1395 			if (datalen != length + fraggap) {
1396 				/*
1397 				 * this is not the last fragment, the trailer
1398 				 * space is regarded as data space.
1399 				 */
1400 				datalen += rt->dst.trailer_len;
1401 			}
1402 
1403 			alloclen += rt->dst.trailer_len;
1404 			fraglen = datalen + fragheaderlen;
1405 
1406 			/*
1407 			 * We just reserve space for fragment header.
1408 			 * Note: this may be overallocation if the message
1409 			 * (without MSG_MORE) fits into the MTU.
1410 			 */
1411 			alloclen += sizeof(struct frag_hdr);
1412 
1413 			copy = datalen - transhdrlen - fraggap;
1414 			if (copy < 0) {
1415 				err = -EINVAL;
1416 				goto error;
1417 			}
1418 			if (transhdrlen) {
1419 				skb = sock_alloc_send_skb(sk,
1420 						alloclen + hh_len,
1421 						(flags & MSG_DONTWAIT), &err);
1422 			} else {
1423 				skb = NULL;
1424 				if (refcount_read(&sk->sk_wmem_alloc) <=
1425 				    2 * sk->sk_sndbuf)
1426 					skb = sock_wmalloc(sk,
1427 							   alloclen + hh_len, 1,
1428 							   sk->sk_allocation);
1429 				if (unlikely(!skb))
1430 					err = -ENOBUFS;
1431 			}
1432 			if (!skb)
1433 				goto error;
1434 			/*
1435 			 *	Fill in the control structures
1436 			 */
1437 			skb->protocol = htons(ETH_P_IPV6);
1438 			skb->ip_summed = csummode;
1439 			skb->csum = 0;
1440 			/* reserve for fragmentation and ipsec header */
1441 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1442 				    dst_exthdrlen);
1443 
1444 			/* Only the initial fragment is time stamped */
1445 			skb_shinfo(skb)->tx_flags = tx_flags;
1446 			tx_flags = 0;
1447 			skb_shinfo(skb)->tskey = tskey;
1448 			tskey = 0;
1449 
1450 			/*
1451 			 *	Find where to start putting bytes
1452 			 */
1453 			data = skb_put(skb, fraglen);
1454 			skb_set_network_header(skb, exthdrlen);
1455 			data += fragheaderlen;
1456 			skb->transport_header = (skb->network_header +
1457 						 fragheaderlen);
1458 			if (fraggap) {
1459 				skb->csum = skb_copy_and_csum_bits(
1460 					skb_prev, maxfraglen,
1461 					data + transhdrlen, fraggap, 0);
1462 				skb_prev->csum = csum_sub(skb_prev->csum,
1463 							  skb->csum);
1464 				data += fraggap;
1465 				pskb_trim_unique(skb_prev, maxfraglen);
1466 			}
1467 			if (copy > 0 &&
1468 			    getfrag(from, data + transhdrlen, offset,
1469 				    copy, fraggap, skb) < 0) {
1470 				err = -EFAULT;
1471 				kfree_skb(skb);
1472 				goto error;
1473 			}
1474 
1475 			offset += copy;
1476 			length -= datalen - fraggap;
1477 			transhdrlen = 0;
1478 			exthdrlen = 0;
1479 			dst_exthdrlen = 0;
1480 
1481 			if ((flags & MSG_CONFIRM) && !skb_prev)
1482 				skb_set_dst_pending_confirm(skb, 1);
1483 
1484 			/*
1485 			 * Put the packet on the pending queue
1486 			 */
1487 			__skb_queue_tail(queue, skb);
1488 			continue;
1489 		}
1490 
1491 		if (copy > length)
1492 			copy = length;
1493 
1494 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1495 		    skb_tailroom(skb) >= copy) {
1496 			unsigned int off;
1497 
1498 			off = skb->len;
1499 			if (getfrag(from, skb_put(skb, copy),
1500 						offset, copy, off, skb) < 0) {
1501 				__skb_trim(skb, off);
1502 				err = -EFAULT;
1503 				goto error;
1504 			}
1505 		} else {
1506 			int i = skb_shinfo(skb)->nr_frags;
1507 
1508 			err = -ENOMEM;
1509 			if (!sk_page_frag_refill(sk, pfrag))
1510 				goto error;
1511 
1512 			if (!skb_can_coalesce(skb, i, pfrag->page,
1513 					      pfrag->offset)) {
1514 				err = -EMSGSIZE;
1515 				if (i == MAX_SKB_FRAGS)
1516 					goto error;
1517 
1518 				__skb_fill_page_desc(skb, i, pfrag->page,
1519 						     pfrag->offset, 0);
1520 				skb_shinfo(skb)->nr_frags = ++i;
1521 				get_page(pfrag->page);
1522 			}
1523 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1524 			if (getfrag(from,
1525 				    page_address(pfrag->page) + pfrag->offset,
1526 				    offset, copy, skb->len, skb) < 0)
1527 				goto error_efault;
1528 
1529 			pfrag->offset += copy;
1530 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1531 			skb->len += copy;
1532 			skb->data_len += copy;
1533 			skb->truesize += copy;
1534 			refcount_add(copy, &sk->sk_wmem_alloc);
1535 		}
1536 		offset += copy;
1537 		length -= copy;
1538 	}
1539 
1540 	return 0;
1541 
1542 error_efault:
1543 	err = -EFAULT;
1544 error:
1545 	cork->length -= length;
1546 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1547 	return err;
1548 }
1549 
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,const struct sockcm_cookie * sockc)1550 int ip6_append_data(struct sock *sk,
1551 		    int getfrag(void *from, char *to, int offset, int len,
1552 				int odd, struct sk_buff *skb),
1553 		    void *from, int length, int transhdrlen,
1554 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1555 		    struct rt6_info *rt, unsigned int flags,
1556 		    const struct sockcm_cookie *sockc)
1557 {
1558 	struct inet_sock *inet = inet_sk(sk);
1559 	struct ipv6_pinfo *np = inet6_sk(sk);
1560 	int exthdrlen;
1561 	int err;
1562 
1563 	if (flags&MSG_PROBE)
1564 		return 0;
1565 	if (skb_queue_empty(&sk->sk_write_queue)) {
1566 		/*
1567 		 * setup for corking
1568 		 */
1569 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1570 				     ipc6, rt, fl6);
1571 		if (err)
1572 			return err;
1573 
1574 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1575 		length += exthdrlen;
1576 		transhdrlen += exthdrlen;
1577 	} else {
1578 		fl6 = &inet->cork.fl.u.ip6;
1579 		transhdrlen = 0;
1580 	}
1581 
1582 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1583 				 &np->cork, sk_page_frag(sk), getfrag,
1584 				 from, length, transhdrlen, flags, ipc6, sockc);
1585 }
1586 EXPORT_SYMBOL_GPL(ip6_append_data);
1587 
ip6_cork_release(struct inet_cork_full * cork,struct inet6_cork * v6_cork)1588 static void ip6_cork_release(struct inet_cork_full *cork,
1589 			     struct inet6_cork *v6_cork)
1590 {
1591 	if (v6_cork->opt) {
1592 		kfree(v6_cork->opt->dst0opt);
1593 		kfree(v6_cork->opt->dst1opt);
1594 		kfree(v6_cork->opt->hopopt);
1595 		kfree(v6_cork->opt->srcrt);
1596 		kfree(v6_cork->opt);
1597 		v6_cork->opt = NULL;
1598 	}
1599 
1600 	if (cork->base.dst) {
1601 		dst_release(cork->base.dst);
1602 		cork->base.dst = NULL;
1603 		cork->base.flags &= ~IPCORK_ALLFRAG;
1604 	}
1605 	memset(&cork->fl, 0, sizeof(cork->fl));
1606 }
1607 
__ip6_make_skb(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1608 struct sk_buff *__ip6_make_skb(struct sock *sk,
1609 			       struct sk_buff_head *queue,
1610 			       struct inet_cork_full *cork,
1611 			       struct inet6_cork *v6_cork)
1612 {
1613 	struct sk_buff *skb, *tmp_skb;
1614 	struct sk_buff **tail_skb;
1615 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1616 	struct ipv6_pinfo *np = inet6_sk(sk);
1617 	struct net *net = sock_net(sk);
1618 	struct ipv6hdr *hdr;
1619 	struct ipv6_txoptions *opt = v6_cork->opt;
1620 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1621 	struct flowi6 *fl6 = &cork->fl.u.ip6;
1622 	unsigned char proto = fl6->flowi6_proto;
1623 
1624 	skb = __skb_dequeue(queue);
1625 	if (!skb)
1626 		goto out;
1627 	tail_skb = &(skb_shinfo(skb)->frag_list);
1628 
1629 	/* move skb->data to ip header from ext header */
1630 	if (skb->data < skb_network_header(skb))
1631 		__skb_pull(skb, skb_network_offset(skb));
1632 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1633 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1634 		*tail_skb = tmp_skb;
1635 		tail_skb = &(tmp_skb->next);
1636 		skb->len += tmp_skb->len;
1637 		skb->data_len += tmp_skb->len;
1638 		skb->truesize += tmp_skb->truesize;
1639 		tmp_skb->destructor = NULL;
1640 		tmp_skb->sk = NULL;
1641 	}
1642 
1643 	/* Allow local fragmentation. */
1644 	skb->ignore_df = ip6_sk_ignore_df(sk);
1645 
1646 	*final_dst = fl6->daddr;
1647 	__skb_pull(skb, skb_network_header_len(skb));
1648 	if (opt && opt->opt_flen)
1649 		ipv6_push_frag_opts(skb, opt, &proto);
1650 	if (opt && opt->opt_nflen)
1651 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1652 
1653 	skb_push(skb, sizeof(struct ipv6hdr));
1654 	skb_reset_network_header(skb);
1655 	hdr = ipv6_hdr(skb);
1656 
1657 	ip6_flow_hdr(hdr, v6_cork->tclass,
1658 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1659 					ip6_autoflowlabel(net, np), fl6));
1660 	hdr->hop_limit = v6_cork->hop_limit;
1661 	hdr->nexthdr = proto;
1662 	hdr->saddr = fl6->saddr;
1663 	hdr->daddr = *final_dst;
1664 
1665 	skb->priority = sk->sk_priority;
1666 	skb->mark = sk->sk_mark;
1667 
1668 	skb_dst_set(skb, dst_clone(&rt->dst));
1669 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1670 	if (proto == IPPROTO_ICMPV6) {
1671 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1672 
1673 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1674 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1675 	}
1676 
1677 	ip6_cork_release(cork, v6_cork);
1678 out:
1679 	return skb;
1680 }
1681 
ip6_send_skb(struct sk_buff * skb)1682 int ip6_send_skb(struct sk_buff *skb)
1683 {
1684 	struct net *net = sock_net(skb->sk);
1685 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1686 	int err;
1687 
1688 	err = ip6_local_out(net, skb->sk, skb);
1689 	if (err) {
1690 		if (err > 0)
1691 			err = net_xmit_errno(err);
1692 		if (err)
1693 			IP6_INC_STATS(net, rt->rt6i_idev,
1694 				      IPSTATS_MIB_OUTDISCARDS);
1695 	}
1696 
1697 	return err;
1698 }
1699 
ip6_push_pending_frames(struct sock * sk)1700 int ip6_push_pending_frames(struct sock *sk)
1701 {
1702 	struct sk_buff *skb;
1703 
1704 	skb = ip6_finish_skb(sk);
1705 	if (!skb)
1706 		return 0;
1707 
1708 	return ip6_send_skb(skb);
1709 }
1710 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1711 
__ip6_flush_pending_frames(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1712 static void __ip6_flush_pending_frames(struct sock *sk,
1713 				       struct sk_buff_head *queue,
1714 				       struct inet_cork_full *cork,
1715 				       struct inet6_cork *v6_cork)
1716 {
1717 	struct sk_buff *skb;
1718 
1719 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1720 		if (skb_dst(skb))
1721 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1722 				      IPSTATS_MIB_OUTDISCARDS);
1723 		kfree_skb(skb);
1724 	}
1725 
1726 	ip6_cork_release(cork, v6_cork);
1727 }
1728 
ip6_flush_pending_frames(struct sock * sk)1729 void ip6_flush_pending_frames(struct sock *sk)
1730 {
1731 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1732 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1733 }
1734 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1735 
ip6_make_skb(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,const struct sockcm_cookie * sockc)1736 struct sk_buff *ip6_make_skb(struct sock *sk,
1737 			     int getfrag(void *from, char *to, int offset,
1738 					 int len, int odd, struct sk_buff *skb),
1739 			     void *from, int length, int transhdrlen,
1740 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1741 			     struct rt6_info *rt, unsigned int flags,
1742 			     const struct sockcm_cookie *sockc)
1743 {
1744 	struct inet_cork_full cork;
1745 	struct inet6_cork v6_cork;
1746 	struct sk_buff_head queue;
1747 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1748 	int err;
1749 
1750 	if (flags & MSG_PROBE)
1751 		return NULL;
1752 
1753 	__skb_queue_head_init(&queue);
1754 
1755 	cork.base.flags = 0;
1756 	cork.base.addr = 0;
1757 	cork.base.opt = NULL;
1758 	cork.base.dst = NULL;
1759 	v6_cork.opt = NULL;
1760 	err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
1761 	if (err) {
1762 		ip6_cork_release(&cork, &v6_cork);
1763 		return ERR_PTR(err);
1764 	}
1765 	if (ipc6->dontfrag < 0)
1766 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1767 
1768 	err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1769 				&current->task_frag, getfrag, from,
1770 				length + exthdrlen, transhdrlen + exthdrlen,
1771 				flags, ipc6, sockc);
1772 	if (err) {
1773 		__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1774 		return ERR_PTR(err);
1775 	}
1776 
1777 	return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1778 }
1779