• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	IPv6 output functions
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on linux/net/ipv4/ip_output.c
10  *
11  *	Changes:
12  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
13  *				extension headers are implemented.
14  *				route changes now work.
15  *				ip6_forward does not confuse sniffers.
16  *				etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *	Imran Patel	:	frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *			:       add ip6_append_data and related functions
22  *				for datagram xmit
23  */
24 
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37 
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41 
42 #include <net/sock.h>
43 #include <net/snmp.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 
ip6_finish_output2(struct net * net,struct sock * sk,struct sk_buff * skb)58 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
59 {
60 	struct dst_entry *dst = skb_dst(skb);
61 	struct net_device *dev = dst->dev;
62 	const struct in6_addr *nexthop;
63 	struct neighbour *neigh;
64 	int ret;
65 
66 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
67 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
68 
69 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
70 		    ((mroute6_is_socket(net, skb) &&
71 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
72 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
73 					 &ipv6_hdr(skb)->saddr))) {
74 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
75 
76 			/* Do not check for IFF_ALLMULTI; multicast routing
77 			   is not supported in any case.
78 			 */
79 			if (newskb)
80 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
81 					net, sk, newskb, NULL, newskb->dev,
82 					dev_loopback_xmit);
83 
84 			if (ipv6_hdr(skb)->hop_limit == 0) {
85 				IP6_INC_STATS(net, idev,
86 					      IPSTATS_MIB_OUTDISCARDS);
87 				kfree_skb(skb);
88 				return 0;
89 			}
90 		}
91 
92 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
93 
94 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
95 		    IPV6_ADDR_SCOPE_NODELOCAL &&
96 		    !(dev->flags & IFF_LOOPBACK)) {
97 			kfree_skb(skb);
98 			return 0;
99 		}
100 	}
101 
102 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
103 		int res = lwtunnel_xmit(skb);
104 
105 		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
106 			return res;
107 	}
108 
109 	rcu_read_lock_bh();
110 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
111 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
112 	if (unlikely(!neigh))
113 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
114 	if (!IS_ERR(neigh)) {
115 		sock_confirm_neigh(skb, neigh);
116 		ret = neigh_output(neigh, skb, false);
117 		rcu_read_unlock_bh();
118 		return ret;
119 	}
120 	rcu_read_unlock_bh();
121 
122 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
123 	kfree_skb(skb);
124 	return -EINVAL;
125 }
126 
__ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)127 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
128 {
129 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
130 	/* Policy lookup after SNAT yielded a new policy */
131 	if (skb_dst(skb)->xfrm) {
132 		IPCB(skb)->flags |= IPSKB_REROUTED;
133 		return dst_output(net, sk, skb);
134 	}
135 #endif
136 
137 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
138 	    dst_allfrag(skb_dst(skb)) ||
139 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
140 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
141 	else
142 		return ip6_finish_output2(net, sk, skb);
143 }
144 
ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)145 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
146 {
147 	int ret;
148 
149 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
150 	switch (ret) {
151 	case NET_XMIT_SUCCESS:
152 		return __ip6_finish_output(net, sk, skb);
153 	case NET_XMIT_CN:
154 		return __ip6_finish_output(net, sk, skb) ? : ret;
155 	default:
156 		kfree_skb(skb);
157 		return ret;
158 	}
159 }
160 
ip6_output(struct net * net,struct sock * sk,struct sk_buff * skb)161 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
162 {
163 	struct net_device *dev = skb_dst(skb)->dev;
164 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
165 
166 	skb->protocol = htons(ETH_P_IPV6);
167 	skb->dev = dev;
168 
169 	if (unlikely(idev->cnf.disable_ipv6)) {
170 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
171 		kfree_skb(skb);
172 		return 0;
173 	}
174 
175 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
176 			    net, sk, skb, NULL, dev,
177 			    ip6_finish_output,
178 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
179 }
180 
ip6_autoflowlabel(struct net * net,const struct ipv6_pinfo * np)181 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
182 {
183 	if (!np->autoflowlabel_set)
184 		return ip6_default_np_autolabel(net);
185 	else
186 		return np->autoflowlabel;
187 }
188 
189 /*
190  * xmit an sk_buff (used by TCP, SCTP and DCCP)
191  * Note : socket lock is not held for SYNACK packets, but might be modified
192  * by calls to skb_set_owner_w() and ipv6_local_error(),
193  * which are using proper atomic operations or spinlocks.
194  */
ip6_xmit(const struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,__u32 mark,struct ipv6_txoptions * opt,int tclass,u32 priority)195 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
196 	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
197 {
198 	struct net *net = sock_net(sk);
199 	const struct ipv6_pinfo *np = inet6_sk(sk);
200 	struct in6_addr *first_hop = &fl6->daddr;
201 	struct dst_entry *dst = skb_dst(skb);
202 	unsigned int head_room;
203 	struct ipv6hdr *hdr;
204 	u8  proto = fl6->flowi6_proto;
205 	int seg_len = skb->len;
206 	int hlimit = -1;
207 	u32 mtu;
208 
209 	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210 	if (opt)
211 		head_room += opt->opt_nflen + opt->opt_flen;
212 
213 	if (unlikely(skb_headroom(skb) < head_room)) {
214 		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
215 		if (!skb2) {
216 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
217 				      IPSTATS_MIB_OUTDISCARDS);
218 			kfree_skb(skb);
219 			return -ENOBUFS;
220 		}
221 		if (skb->sk)
222 			skb_set_owner_w(skb2, skb->sk);
223 		consume_skb(skb);
224 		skb = skb2;
225 	}
226 
227 	if (opt) {
228 		seg_len += opt->opt_nflen + opt->opt_flen;
229 
230 		if (opt->opt_flen)
231 			ipv6_push_frag_opts(skb, opt, &proto);
232 
233 		if (opt->opt_nflen)
234 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
235 					     &fl6->saddr);
236 	}
237 
238 	skb_push(skb, sizeof(struct ipv6hdr));
239 	skb_reset_network_header(skb);
240 	hdr = ipv6_hdr(skb);
241 
242 	/*
243 	 *	Fill in the IPv6 header
244 	 */
245 	if (np)
246 		hlimit = np->hop_limit;
247 	if (hlimit < 0)
248 		hlimit = ip6_dst_hoplimit(dst);
249 
250 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
251 				ip6_autoflowlabel(net, np), fl6));
252 
253 	hdr->payload_len = htons(seg_len);
254 	hdr->nexthdr = proto;
255 	hdr->hop_limit = hlimit;
256 
257 	hdr->saddr = fl6->saddr;
258 	hdr->daddr = *first_hop;
259 
260 	skb->protocol = htons(ETH_P_IPV6);
261 	skb->priority = priority;
262 	skb->mark = mark;
263 
264 	mtu = dst_mtu(dst);
265 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
266 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
267 			      IPSTATS_MIB_OUT, skb->len);
268 
269 		/* if egress device is enslaved to an L3 master device pass the
270 		 * skb to its handler for processing
271 		 */
272 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
273 		if (unlikely(!skb))
274 			return 0;
275 
276 		/* hooks should never assume socket lock is held.
277 		 * we promote our socket to non const
278 		 */
279 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
280 			       net, (struct sock *)sk, skb, NULL, dst->dev,
281 			       dst_output);
282 	}
283 
284 	skb->dev = dst->dev;
285 	/* ipv6_local_error() does not require socket lock,
286 	 * we promote our socket to non const
287 	 */
288 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
289 
290 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
291 	kfree_skb(skb);
292 	return -EMSGSIZE;
293 }
294 EXPORT_SYMBOL(ip6_xmit);
295 
ip6_call_ra_chain(struct sk_buff * skb,int sel)296 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
297 {
298 	struct ip6_ra_chain *ra;
299 	struct sock *last = NULL;
300 
301 	read_lock(&ip6_ra_lock);
302 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
303 		struct sock *sk = ra->sk;
304 		if (sk && ra->sel == sel &&
305 		    (!sk->sk_bound_dev_if ||
306 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
307 			struct ipv6_pinfo *np = inet6_sk(sk);
308 
309 			if (np && np->rtalert_isolate &&
310 			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
311 				continue;
312 			}
313 			if (last) {
314 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
315 				if (skb2)
316 					rawv6_rcv(last, skb2);
317 			}
318 			last = sk;
319 		}
320 	}
321 
322 	if (last) {
323 		rawv6_rcv(last, skb);
324 		read_unlock(&ip6_ra_lock);
325 		return 1;
326 	}
327 	read_unlock(&ip6_ra_lock);
328 	return 0;
329 }
330 
ip6_forward_proxy_check(struct sk_buff * skb)331 static int ip6_forward_proxy_check(struct sk_buff *skb)
332 {
333 	struct ipv6hdr *hdr = ipv6_hdr(skb);
334 	u8 nexthdr = hdr->nexthdr;
335 	__be16 frag_off;
336 	int offset;
337 
338 	if (ipv6_ext_hdr(nexthdr)) {
339 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
340 		if (offset < 0)
341 			return 0;
342 	} else
343 		offset = sizeof(struct ipv6hdr);
344 
345 	if (nexthdr == IPPROTO_ICMPV6) {
346 		struct icmp6hdr *icmp6;
347 
348 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
349 					 offset + 1 - skb->data)))
350 			return 0;
351 
352 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
353 
354 		switch (icmp6->icmp6_type) {
355 		case NDISC_ROUTER_SOLICITATION:
356 		case NDISC_ROUTER_ADVERTISEMENT:
357 		case NDISC_NEIGHBOUR_SOLICITATION:
358 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
359 		case NDISC_REDIRECT:
360 			/* For reaction involving unicast neighbor discovery
361 			 * message destined to the proxied address, pass it to
362 			 * input function.
363 			 */
364 			return 1;
365 		default:
366 			break;
367 		}
368 	}
369 
370 	/*
371 	 * The proxying router can't forward traffic sent to a link-local
372 	 * address, so signal the sender and discard the packet. This
373 	 * behavior is clarified by the MIPv6 specification.
374 	 */
375 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
376 		dst_link_failure(skb);
377 		return -1;
378 	}
379 
380 	return 0;
381 }
382 
ip6_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)383 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
384 				     struct sk_buff *skb)
385 {
386 	struct dst_entry *dst = skb_dst(skb);
387 
388 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
389 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
390 
391 #ifdef CONFIG_NET_SWITCHDEV
392 	if (skb->offload_l3_fwd_mark) {
393 		consume_skb(skb);
394 		return 0;
395 	}
396 #endif
397 
398 	skb->tstamp = 0;
399 	return dst_output(net, sk, skb);
400 }
401 
ip6_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)402 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
403 {
404 	if (skb->len <= mtu)
405 		return false;
406 
407 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
408 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
409 		return true;
410 
411 	if (skb->ignore_df)
412 		return false;
413 
414 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
415 		return false;
416 
417 	return true;
418 }
419 
ip6_forward(struct sk_buff * skb)420 int ip6_forward(struct sk_buff *skb)
421 {
422 	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
423 	struct dst_entry *dst = skb_dst(skb);
424 	struct ipv6hdr *hdr = ipv6_hdr(skb);
425 	struct inet6_skb_parm *opt = IP6CB(skb);
426 	struct net *net = dev_net(dst->dev);
427 	u32 mtu;
428 
429 	if (net->ipv6.devconf_all->forwarding == 0)
430 		goto error;
431 
432 	if (skb->pkt_type != PACKET_HOST)
433 		goto drop;
434 
435 	if (unlikely(skb->sk))
436 		goto drop;
437 
438 	if (skb_warn_if_lro(skb))
439 		goto drop;
440 
441 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
442 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
443 		goto drop;
444 	}
445 
446 	skb_forward_csum(skb);
447 
448 	/*
449 	 *	We DO NOT make any processing on
450 	 *	RA packets, pushing them to user level AS IS
451 	 *	without ane WARRANTY that application will be able
452 	 *	to interpret them. The reason is that we
453 	 *	cannot make anything clever here.
454 	 *
455 	 *	We are not end-node, so that if packet contains
456 	 *	AH/ESP, we cannot make anything.
457 	 *	Defragmentation also would be mistake, RA packets
458 	 *	cannot be fragmented, because there is no warranty
459 	 *	that different fragments will go along one path. --ANK
460 	 */
461 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
462 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
463 			return 0;
464 	}
465 
466 	/*
467 	 *	check and decrement ttl
468 	 */
469 	if (hdr->hop_limit <= 1) {
470 		/* Force OUTPUT device used as source address */
471 		skb->dev = dst->dev;
472 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
473 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
474 
475 		kfree_skb(skb);
476 		return -ETIMEDOUT;
477 	}
478 
479 	/* XXX: idev->cnf.proxy_ndp? */
480 	if (net->ipv6.devconf_all->proxy_ndp &&
481 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
482 		int proxied = ip6_forward_proxy_check(skb);
483 		if (proxied > 0)
484 			return ip6_input(skb);
485 		else if (proxied < 0) {
486 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
487 			goto drop;
488 		}
489 	}
490 
491 	if (!xfrm6_route_forward(skb)) {
492 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
493 		goto drop;
494 	}
495 	dst = skb_dst(skb);
496 
497 	/* IPv6 specs say nothing about it, but it is clear that we cannot
498 	   send redirects to source routed frames.
499 	   We don't send redirects to frames decapsulated from IPsec.
500 	 */
501 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
502 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
503 		struct in6_addr *target = NULL;
504 		struct inet_peer *peer;
505 		struct rt6_info *rt;
506 
507 		/*
508 		 *	incoming and outgoing devices are the same
509 		 *	send a redirect.
510 		 */
511 
512 		rt = (struct rt6_info *) dst;
513 		if (rt->rt6i_flags & RTF_GATEWAY)
514 			target = &rt->rt6i_gateway;
515 		else
516 			target = &hdr->daddr;
517 
518 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
519 
520 		/* Limit redirects both by destination (here)
521 		   and by source (inside ndisc_send_redirect)
522 		 */
523 		if (inet_peer_xrlim_allow(peer, 1*HZ))
524 			ndisc_send_redirect(skb, target);
525 		if (peer)
526 			inet_putpeer(peer);
527 	} else {
528 		int addrtype = ipv6_addr_type(&hdr->saddr);
529 
530 		/* This check is security critical. */
531 		if (addrtype == IPV6_ADDR_ANY ||
532 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
533 			goto error;
534 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
535 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
536 				    ICMPV6_NOT_NEIGHBOUR, 0);
537 			goto error;
538 		}
539 	}
540 
541 	mtu = ip6_dst_mtu_forward(dst);
542 	if (mtu < IPV6_MIN_MTU)
543 		mtu = IPV6_MIN_MTU;
544 
545 	if (ip6_pkt_too_big(skb, mtu)) {
546 		/* Again, force OUTPUT device used as source address */
547 		skb->dev = dst->dev;
548 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
549 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
550 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
551 				IPSTATS_MIB_FRAGFAILS);
552 		kfree_skb(skb);
553 		return -EMSGSIZE;
554 	}
555 
556 	if (skb_cow(skb, dst->dev->hard_header_len)) {
557 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
558 				IPSTATS_MIB_OUTDISCARDS);
559 		goto drop;
560 	}
561 
562 	hdr = ipv6_hdr(skb);
563 
564 	/* Mangling hops number delayed to point after skb COW */
565 
566 	hdr->hop_limit--;
567 
568 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
569 		       net, NULL, skb, skb->dev, dst->dev,
570 		       ip6_forward_finish);
571 
572 error:
573 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
574 drop:
575 	kfree_skb(skb);
576 	return -EINVAL;
577 }
578 
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)579 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
580 {
581 	to->pkt_type = from->pkt_type;
582 	to->priority = from->priority;
583 	to->protocol = from->protocol;
584 	skb_dst_drop(to);
585 	skb_dst_set(to, dst_clone(skb_dst(from)));
586 	to->dev = from->dev;
587 	to->mark = from->mark;
588 
589 	skb_copy_hash(to, from);
590 
591 #ifdef CONFIG_NET_SCHED
592 	to->tc_index = from->tc_index;
593 #endif
594 	nf_copy(to, from);
595 	skb_ext_copy(to, from);
596 	skb_copy_secmark(to, from);
597 }
598 
ip6_fraglist_init(struct sk_buff * skb,unsigned int hlen,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_fraglist_iter * iter)599 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
600 		      u8 nexthdr, __be32 frag_id,
601 		      struct ip6_fraglist_iter *iter)
602 {
603 	unsigned int first_len;
604 	struct frag_hdr *fh;
605 
606 	/* BUILD HEADER */
607 	*prevhdr = NEXTHDR_FRAGMENT;
608 	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
609 	if (!iter->tmp_hdr)
610 		return -ENOMEM;
611 
612 	iter->frag = skb_shinfo(skb)->frag_list;
613 	skb_frag_list_init(skb);
614 
615 	iter->offset = 0;
616 	iter->hlen = hlen;
617 	iter->frag_id = frag_id;
618 	iter->nexthdr = nexthdr;
619 
620 	__skb_pull(skb, hlen);
621 	fh = __skb_push(skb, sizeof(struct frag_hdr));
622 	__skb_push(skb, hlen);
623 	skb_reset_network_header(skb);
624 	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
625 
626 	fh->nexthdr = nexthdr;
627 	fh->reserved = 0;
628 	fh->frag_off = htons(IP6_MF);
629 	fh->identification = frag_id;
630 
631 	first_len = skb_pagelen(skb);
632 	skb->data_len = first_len - skb_headlen(skb);
633 	skb->len = first_len;
634 	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
635 
636 	return 0;
637 }
638 EXPORT_SYMBOL(ip6_fraglist_init);
639 
ip6_fraglist_prepare(struct sk_buff * skb,struct ip6_fraglist_iter * iter)640 void ip6_fraglist_prepare(struct sk_buff *skb,
641 			  struct ip6_fraglist_iter *iter)
642 {
643 	struct sk_buff *frag = iter->frag;
644 	unsigned int hlen = iter->hlen;
645 	struct frag_hdr *fh;
646 
647 	frag->ip_summed = CHECKSUM_NONE;
648 	skb_reset_transport_header(frag);
649 	fh = __skb_push(frag, sizeof(struct frag_hdr));
650 	__skb_push(frag, hlen);
651 	skb_reset_network_header(frag);
652 	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
653 	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
654 	fh->nexthdr = iter->nexthdr;
655 	fh->reserved = 0;
656 	fh->frag_off = htons(iter->offset);
657 	if (frag->next)
658 		fh->frag_off |= htons(IP6_MF);
659 	fh->identification = iter->frag_id;
660 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
661 	ip6_copy_metadata(frag, skb);
662 }
663 EXPORT_SYMBOL(ip6_fraglist_prepare);
664 
ip6_frag_init(struct sk_buff * skb,unsigned int hlen,unsigned int mtu,unsigned short needed_tailroom,int hdr_room,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_frag_state * state)665 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
666 		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
667 		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
668 {
669 	state->prevhdr = prevhdr;
670 	state->nexthdr = nexthdr;
671 	state->frag_id = frag_id;
672 
673 	state->hlen = hlen;
674 	state->mtu = mtu;
675 
676 	state->left = skb->len - hlen;	/* Space per frame */
677 	state->ptr = hlen;		/* Where to start from */
678 
679 	state->hroom = hdr_room;
680 	state->troom = needed_tailroom;
681 
682 	state->offset = 0;
683 }
684 EXPORT_SYMBOL(ip6_frag_init);
685 
ip6_frag_next(struct sk_buff * skb,struct ip6_frag_state * state)686 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
687 {
688 	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
689 	struct sk_buff *frag;
690 	struct frag_hdr *fh;
691 	unsigned int len;
692 
693 	len = state->left;
694 	/* IF: it doesn't fit, use 'mtu' - the data space left */
695 	if (len > state->mtu)
696 		len = state->mtu;
697 	/* IF: we are not sending up to and including the packet end
698 	   then align the next start on an eight byte boundary */
699 	if (len < state->left)
700 		len &= ~7;
701 
702 	/* Allocate buffer */
703 	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
704 			 state->hroom + state->troom, GFP_ATOMIC);
705 	if (!frag)
706 		return ERR_PTR(-ENOMEM);
707 
708 	/*
709 	 *	Set up data on packet
710 	 */
711 
712 	ip6_copy_metadata(frag, skb);
713 	skb_reserve(frag, state->hroom);
714 	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
715 	skb_reset_network_header(frag);
716 	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
717 	frag->transport_header = (frag->network_header + state->hlen +
718 				  sizeof(struct frag_hdr));
719 
720 	/*
721 	 *	Charge the memory for the fragment to any owner
722 	 *	it might possess
723 	 */
724 	if (skb->sk)
725 		skb_set_owner_w(frag, skb->sk);
726 
727 	/*
728 	 *	Copy the packet header into the new buffer.
729 	 */
730 	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
731 
732 	fragnexthdr_offset = skb_network_header(frag);
733 	fragnexthdr_offset += prevhdr - skb_network_header(skb);
734 	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
735 
736 	/*
737 	 *	Build fragment header.
738 	 */
739 	fh->nexthdr = state->nexthdr;
740 	fh->reserved = 0;
741 	fh->identification = state->frag_id;
742 
743 	/*
744 	 *	Copy a block of the IP datagram.
745 	 */
746 	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
747 			     len));
748 	state->left -= len;
749 
750 	fh->frag_off = htons(state->offset);
751 	if (state->left > 0)
752 		fh->frag_off |= htons(IP6_MF);
753 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
754 
755 	state->ptr += len;
756 	state->offset += len;
757 
758 	return frag;
759 }
760 EXPORT_SYMBOL(ip6_frag_next);
761 
ip6_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,int (* output)(struct net *,struct sock *,struct sk_buff *))762 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
763 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
764 {
765 	struct sk_buff *frag;
766 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
767 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
768 				inet6_sk(skb->sk) : NULL;
769 	struct ip6_frag_state state;
770 	unsigned int mtu, hlen, nexthdr_offset;
771 	ktime_t tstamp = skb->tstamp;
772 	int hroom, err = 0;
773 	__be32 frag_id;
774 	u8 *prevhdr, nexthdr = 0;
775 
776 	err = ip6_find_1stfragopt(skb, &prevhdr);
777 	if (err < 0)
778 		goto fail;
779 	hlen = err;
780 	nexthdr = *prevhdr;
781 	nexthdr_offset = prevhdr - skb_network_header(skb);
782 
783 	mtu = ip6_skb_dst_mtu(skb);
784 
785 	/* We must not fragment if the socket is set to force MTU discovery
786 	 * or if the skb it not generated by a local socket.
787 	 */
788 	if (unlikely(!skb->ignore_df && skb->len > mtu))
789 		goto fail_toobig;
790 
791 	if (IP6CB(skb)->frag_max_size) {
792 		if (IP6CB(skb)->frag_max_size > mtu)
793 			goto fail_toobig;
794 
795 		/* don't send fragments larger than what we received */
796 		mtu = IP6CB(skb)->frag_max_size;
797 		if (mtu < IPV6_MIN_MTU)
798 			mtu = IPV6_MIN_MTU;
799 	}
800 
801 	if (np && np->frag_size < mtu) {
802 		if (np->frag_size)
803 			mtu = np->frag_size;
804 	}
805 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
806 		goto fail_toobig;
807 	mtu -= hlen + sizeof(struct frag_hdr);
808 
809 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
810 				    &ipv6_hdr(skb)->saddr);
811 
812 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
813 	    (err = skb_checksum_help(skb)))
814 		goto fail;
815 
816 	prevhdr = skb_network_header(skb) + nexthdr_offset;
817 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
818 	if (skb_has_frag_list(skb)) {
819 		unsigned int first_len = skb_pagelen(skb);
820 		struct ip6_fraglist_iter iter;
821 		struct sk_buff *frag2;
822 
823 		if (first_len - hlen > mtu ||
824 		    ((first_len - hlen) & 7) ||
825 		    skb_cloned(skb) ||
826 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
827 			goto slow_path;
828 
829 		skb_walk_frags(skb, frag) {
830 			/* Correct geometry. */
831 			if (frag->len > mtu ||
832 			    ((frag->len & 7) && frag->next) ||
833 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
834 				goto slow_path_clean;
835 
836 			/* Partially cloned skb? */
837 			if (skb_shared(frag))
838 				goto slow_path_clean;
839 
840 			BUG_ON(frag->sk);
841 			if (skb->sk) {
842 				frag->sk = skb->sk;
843 				frag->destructor = sock_wfree;
844 			}
845 			skb->truesize -= frag->truesize;
846 		}
847 
848 		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
849 					&iter);
850 		if (err < 0)
851 			goto fail;
852 
853 		for (;;) {
854 			/* Prepare header of the next frame,
855 			 * before previous one went down. */
856 			if (iter.frag)
857 				ip6_fraglist_prepare(skb, &iter);
858 
859 			skb->tstamp = tstamp;
860 			err = output(net, sk, skb);
861 			if (!err)
862 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
863 					      IPSTATS_MIB_FRAGCREATES);
864 
865 			if (err || !iter.frag)
866 				break;
867 
868 			skb = ip6_fraglist_next(&iter);
869 		}
870 
871 		kfree(iter.tmp_hdr);
872 
873 		if (err == 0) {
874 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
875 				      IPSTATS_MIB_FRAGOKS);
876 			return 0;
877 		}
878 
879 		kfree_skb_list(iter.frag);
880 
881 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
882 			      IPSTATS_MIB_FRAGFAILS);
883 		return err;
884 
885 slow_path_clean:
886 		skb_walk_frags(skb, frag2) {
887 			if (frag2 == frag)
888 				break;
889 			frag2->sk = NULL;
890 			frag2->destructor = NULL;
891 			skb->truesize += frag2->truesize;
892 		}
893 	}
894 
895 slow_path:
896 	/*
897 	 *	Fragment the datagram.
898 	 */
899 
900 	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
901 		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
902 		      &state);
903 
904 	/*
905 	 *	Keep copying data until we run out.
906 	 */
907 
908 	while (state.left > 0) {
909 		frag = ip6_frag_next(skb, &state);
910 		if (IS_ERR(frag)) {
911 			err = PTR_ERR(frag);
912 			goto fail;
913 		}
914 
915 		/*
916 		 *	Put this fragment into the sending queue.
917 		 */
918 		frag->tstamp = tstamp;
919 		err = output(net, sk, frag);
920 		if (err)
921 			goto fail;
922 
923 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
924 			      IPSTATS_MIB_FRAGCREATES);
925 	}
926 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
927 		      IPSTATS_MIB_FRAGOKS);
928 	consume_skb(skb);
929 	return err;
930 
931 fail_toobig:
932 	if (skb->sk && dst_allfrag(skb_dst(skb)))
933 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
934 
935 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
936 	err = -EMSGSIZE;
937 
938 fail:
939 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
940 		      IPSTATS_MIB_FRAGFAILS);
941 	kfree_skb(skb);
942 	return err;
943 }
944 
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)945 static inline int ip6_rt_check(const struct rt6key *rt_key,
946 			       const struct in6_addr *fl_addr,
947 			       const struct in6_addr *addr_cache)
948 {
949 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
950 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
951 }
952 
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)953 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
954 					  struct dst_entry *dst,
955 					  const struct flowi6 *fl6)
956 {
957 	struct ipv6_pinfo *np = inet6_sk(sk);
958 	struct rt6_info *rt;
959 
960 	if (!dst)
961 		goto out;
962 
963 	if (dst->ops->family != AF_INET6) {
964 		dst_release(dst);
965 		return NULL;
966 	}
967 
968 	rt = (struct rt6_info *)dst;
969 	/* Yes, checking route validity in not connected
970 	 * case is not very simple. Take into account,
971 	 * that we do not support routing by source, TOS,
972 	 * and MSG_DONTROUTE		--ANK (980726)
973 	 *
974 	 * 1. ip6_rt_check(): If route was host route,
975 	 *    check that cached destination is current.
976 	 *    If it is network route, we still may
977 	 *    check its validity using saved pointer
978 	 *    to the last used address: daddr_cache.
979 	 *    We do not want to save whole address now,
980 	 *    (because main consumer of this service
981 	 *    is tcp, which has not this problem),
982 	 *    so that the last trick works only on connected
983 	 *    sockets.
984 	 * 2. oif also should be the same.
985 	 */
986 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
987 #ifdef CONFIG_IPV6_SUBTREES
988 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
989 #endif
990 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
991 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
992 		dst_release(dst);
993 		dst = NULL;
994 	}
995 
996 out:
997 	return dst;
998 }
999 
ip6_dst_lookup_tail(struct net * net,const struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1000 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1001 			       struct dst_entry **dst, struct flowi6 *fl6)
1002 {
1003 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1004 	struct neighbour *n;
1005 	struct rt6_info *rt;
1006 #endif
1007 	int err;
1008 	int flags = 0;
1009 
1010 	/* The correct way to handle this would be to do
1011 	 * ip6_route_get_saddr, and then ip6_route_output; however,
1012 	 * the route-specific preferred source forces the
1013 	 * ip6_route_output call _before_ ip6_route_get_saddr.
1014 	 *
1015 	 * In source specific routing (no src=any default route),
1016 	 * ip6_route_output will fail given src=any saddr, though, so
1017 	 * that's why we try it again later.
1018 	 */
1019 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1020 		struct fib6_info *from;
1021 		struct rt6_info *rt;
1022 		bool had_dst = *dst != NULL;
1023 
1024 		if (!had_dst)
1025 			*dst = ip6_route_output(net, sk, fl6);
1026 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1027 
1028 		rcu_read_lock();
1029 		from = rt ? rcu_dereference(rt->from) : NULL;
1030 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
1031 					  sk ? inet6_sk(sk)->srcprefs : 0,
1032 					  &fl6->saddr);
1033 		rcu_read_unlock();
1034 
1035 		if (err)
1036 			goto out_err_release;
1037 
1038 		/* If we had an erroneous initial result, pretend it
1039 		 * never existed and let the SA-enabled version take
1040 		 * over.
1041 		 */
1042 		if (!had_dst && (*dst)->error) {
1043 			dst_release(*dst);
1044 			*dst = NULL;
1045 		}
1046 
1047 		if (fl6->flowi6_oif)
1048 			flags |= RT6_LOOKUP_F_IFACE;
1049 	}
1050 
1051 	if (!*dst)
1052 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
1053 
1054 	err = (*dst)->error;
1055 	if (err)
1056 		goto out_err_release;
1057 
1058 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1059 	/*
1060 	 * Here if the dst entry we've looked up
1061 	 * has a neighbour entry that is in the INCOMPLETE
1062 	 * state and the src address from the flow is
1063 	 * marked as OPTIMISTIC, we release the found
1064 	 * dst entry and replace it instead with the
1065 	 * dst entry of the nexthop router
1066 	 */
1067 	rt = (struct rt6_info *) *dst;
1068 	rcu_read_lock_bh();
1069 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1070 				      rt6_nexthop(rt, &fl6->daddr));
1071 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1072 	rcu_read_unlock_bh();
1073 
1074 	if (err) {
1075 		struct inet6_ifaddr *ifp;
1076 		struct flowi6 fl_gw6;
1077 		int redirect;
1078 
1079 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1080 				      (*dst)->dev, 1);
1081 
1082 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1083 		if (ifp)
1084 			in6_ifa_put(ifp);
1085 
1086 		if (redirect) {
1087 			/*
1088 			 * We need to get the dst entry for the
1089 			 * default router instead
1090 			 */
1091 			dst_release(*dst);
1092 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1093 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1094 			*dst = ip6_route_output(net, sk, &fl_gw6);
1095 			err = (*dst)->error;
1096 			if (err)
1097 				goto out_err_release;
1098 		}
1099 	}
1100 #endif
1101 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1102 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1103 		err = -EAFNOSUPPORT;
1104 		goto out_err_release;
1105 	}
1106 
1107 	return 0;
1108 
1109 out_err_release:
1110 	dst_release(*dst);
1111 	*dst = NULL;
1112 
1113 	if (err == -ENETUNREACH)
1114 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1115 	return err;
1116 }
1117 
1118 /**
1119  *	ip6_dst_lookup - perform route lookup on flow
1120  *	@sk: socket which provides route info
1121  *	@dst: pointer to dst_entry * for result
1122  *	@fl6: flow to lookup
1123  *
1124  *	This function performs a route lookup on the given flow.
1125  *
1126  *	It returns zero on success, or a standard errno code on error.
1127  */
ip6_dst_lookup(struct net * net,struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1128 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1129 		   struct flowi6 *fl6)
1130 {
1131 	*dst = NULL;
1132 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1133 }
1134 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1135 
1136 /**
1137  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1138  *	@sk: socket which provides route info
1139  *	@fl6: flow to lookup
1140  *	@final_dst: final destination address for ipsec lookup
1141  *
1142  *	This function performs a route lookup on the given flow.
1143  *
1144  *	It returns a valid dst pointer on success, or a pointer encoded
1145  *	error code.
1146  */
ip6_dst_lookup_flow(struct net * net,const struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1147 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1148 				      const struct in6_addr *final_dst)
1149 {
1150 	struct dst_entry *dst = NULL;
1151 	int err;
1152 
1153 	err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1154 	if (err)
1155 		return ERR_PTR(err);
1156 	if (final_dst)
1157 		fl6->daddr = *final_dst;
1158 
1159 	return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1160 }
1161 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1162 
1163 /**
1164  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1165  *	@sk: socket which provides the dst cache and route info
1166  *	@fl6: flow to lookup
1167  *	@final_dst: final destination address for ipsec lookup
1168  *	@connected: whether @sk is connected or not
1169  *
1170  *	This function performs a route lookup on the given flow with the
1171  *	possibility of using the cached route in the socket if it is valid.
1172  *	It will take the socket dst lock when operating on the dst cache.
1173  *	As a result, this function can only be used in process context.
1174  *
1175  *	In addition, for a connected socket, cache the dst in the socket
1176  *	if the current cache is not valid.
1177  *
1178  *	It returns a valid dst pointer on success, or a pointer encoded
1179  *	error code.
1180  */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool connected)1181 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1182 					 const struct in6_addr *final_dst,
1183 					 bool connected)
1184 {
1185 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1186 
1187 	dst = ip6_sk_dst_check(sk, dst, fl6);
1188 	if (dst)
1189 		return dst;
1190 
1191 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1192 	if (connected && !IS_ERR(dst))
1193 		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1194 
1195 	return dst;
1196 }
1197 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1198 
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1199 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1200 					       gfp_t gfp)
1201 {
1202 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1203 }
1204 
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1205 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1206 						gfp_t gfp)
1207 {
1208 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1209 }
1210 
ip6_append_data_mtu(unsigned int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt,unsigned int orig_mtu)1211 static void ip6_append_data_mtu(unsigned int *mtu,
1212 				int *maxfraglen,
1213 				unsigned int fragheaderlen,
1214 				struct sk_buff *skb,
1215 				struct rt6_info *rt,
1216 				unsigned int orig_mtu)
1217 {
1218 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1219 		if (!skb) {
1220 			/* first fragment, reserve header_len */
1221 			*mtu = orig_mtu - rt->dst.header_len;
1222 
1223 		} else {
1224 			/*
1225 			 * this fragment is not first, the headers
1226 			 * space is regarded as data space.
1227 			 */
1228 			*mtu = orig_mtu;
1229 		}
1230 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1231 			      + fragheaderlen - sizeof(struct frag_hdr);
1232 	}
1233 }
1234 
ip6_setup_cork(struct sock * sk,struct inet_cork_full * cork,struct inet6_cork * v6_cork,struct ipcm6_cookie * ipc6,struct rt6_info * rt,struct flowi6 * fl6)1235 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1236 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1237 			  struct rt6_info *rt, struct flowi6 *fl6)
1238 {
1239 	struct ipv6_pinfo *np = inet6_sk(sk);
1240 	unsigned int mtu;
1241 	struct ipv6_txoptions *opt = ipc6->opt;
1242 
1243 	/*
1244 	 * setup for corking
1245 	 */
1246 	if (opt) {
1247 		if (WARN_ON(v6_cork->opt))
1248 			return -EINVAL;
1249 
1250 		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1251 		if (unlikely(!v6_cork->opt))
1252 			return -ENOBUFS;
1253 
1254 		v6_cork->opt->tot_len = sizeof(*opt);
1255 		v6_cork->opt->opt_flen = opt->opt_flen;
1256 		v6_cork->opt->opt_nflen = opt->opt_nflen;
1257 
1258 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1259 						    sk->sk_allocation);
1260 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1261 			return -ENOBUFS;
1262 
1263 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1264 						    sk->sk_allocation);
1265 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1266 			return -ENOBUFS;
1267 
1268 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1269 						   sk->sk_allocation);
1270 		if (opt->hopopt && !v6_cork->opt->hopopt)
1271 			return -ENOBUFS;
1272 
1273 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1274 						    sk->sk_allocation);
1275 		if (opt->srcrt && !v6_cork->opt->srcrt)
1276 			return -ENOBUFS;
1277 
1278 		/* need source address above miyazawa*/
1279 	}
1280 	dst_hold(&rt->dst);
1281 	cork->base.dst = &rt->dst;
1282 	cork->fl.u.ip6 = *fl6;
1283 	v6_cork->hop_limit = ipc6->hlimit;
1284 	v6_cork->tclass = ipc6->tclass;
1285 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1286 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1287 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1288 	else
1289 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1290 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1291 	if (np->frag_size < mtu) {
1292 		if (np->frag_size)
1293 			mtu = np->frag_size;
1294 	}
1295 	if (mtu < IPV6_MIN_MTU)
1296 		return -EINVAL;
1297 	cork->base.fragsize = mtu;
1298 	cork->base.gso_size = ipc6->gso_size;
1299 	cork->base.tx_flags = 0;
1300 	cork->base.mark = ipc6->sockc.mark;
1301 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1302 
1303 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1304 		cork->base.flags |= IPCORK_ALLFRAG;
1305 	cork->base.length = 0;
1306 
1307 	cork->base.transmit_time = ipc6->sockc.transmit_time;
1308 
1309 	return 0;
1310 }
1311 
__ip6_append_data(struct sock * sk,struct flowi6 * fl6,struct sk_buff_head * queue,struct inet_cork * cork,struct inet6_cork * v6_cork,struct page_frag * pfrag,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,unsigned int flags,struct ipcm6_cookie * ipc6)1312 static int __ip6_append_data(struct sock *sk,
1313 			     struct flowi6 *fl6,
1314 			     struct sk_buff_head *queue,
1315 			     struct inet_cork *cork,
1316 			     struct inet6_cork *v6_cork,
1317 			     struct page_frag *pfrag,
1318 			     int getfrag(void *from, char *to, int offset,
1319 					 int len, int odd, struct sk_buff *skb),
1320 			     void *from, int length, int transhdrlen,
1321 			     unsigned int flags, struct ipcm6_cookie *ipc6)
1322 {
1323 	struct sk_buff *skb, *skb_prev = NULL;
1324 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1325 	struct ubuf_info *uarg = NULL;
1326 	int exthdrlen = 0;
1327 	int dst_exthdrlen = 0;
1328 	int hh_len;
1329 	int copy;
1330 	int err;
1331 	int offset = 0;
1332 	u32 tskey = 0;
1333 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1334 	struct ipv6_txoptions *opt = v6_cork->opt;
1335 	int csummode = CHECKSUM_NONE;
1336 	unsigned int maxnonfragsize, headersize;
1337 	unsigned int wmem_alloc_delta = 0;
1338 	bool paged, extra_uref = false;
1339 
1340 	skb = skb_peek_tail(queue);
1341 	if (!skb) {
1342 		exthdrlen = opt ? opt->opt_flen : 0;
1343 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1344 	}
1345 
1346 	paged = !!cork->gso_size;
1347 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1348 	orig_mtu = mtu;
1349 
1350 	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1351 	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1352 		tskey = sk->sk_tskey++;
1353 
1354 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1355 
1356 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1357 			(opt ? opt->opt_nflen : 0);
1358 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1359 		     sizeof(struct frag_hdr);
1360 
1361 	headersize = sizeof(struct ipv6hdr) +
1362 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1363 		     (dst_allfrag(&rt->dst) ?
1364 		      sizeof(struct frag_hdr) : 0) +
1365 		     rt->rt6i_nfheader_len;
1366 
1367 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1368 	 * the first fragment
1369 	 */
1370 	if (headersize + transhdrlen > mtu)
1371 		goto emsgsize;
1372 
1373 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1374 	    (sk->sk_protocol == IPPROTO_UDP ||
1375 	     sk->sk_protocol == IPPROTO_RAW)) {
1376 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1377 				sizeof(struct ipv6hdr));
1378 		goto emsgsize;
1379 	}
1380 
1381 	if (ip6_sk_ignore_df(sk))
1382 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1383 	else
1384 		maxnonfragsize = mtu;
1385 
1386 	if (cork->length + length > maxnonfragsize - headersize) {
1387 emsgsize:
1388 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1389 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1390 		return -EMSGSIZE;
1391 	}
1392 
1393 	/* CHECKSUM_PARTIAL only with no extension headers and when
1394 	 * we are not going to fragment
1395 	 */
1396 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1397 	    headersize == sizeof(struct ipv6hdr) &&
1398 	    length <= mtu - headersize &&
1399 	    (!(flags & MSG_MORE) || cork->gso_size) &&
1400 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1401 		csummode = CHECKSUM_PARTIAL;
1402 
1403 	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1404 		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1405 		if (!uarg)
1406 			return -ENOBUFS;
1407 		extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
1408 		if (rt->dst.dev->features & NETIF_F_SG &&
1409 		    csummode == CHECKSUM_PARTIAL) {
1410 			paged = true;
1411 		} else {
1412 			uarg->zerocopy = 0;
1413 			skb_zcopy_set(skb, uarg, &extra_uref);
1414 		}
1415 	}
1416 
1417 	/*
1418 	 * Let's try using as much space as possible.
1419 	 * Use MTU if total length of the message fits into the MTU.
1420 	 * Otherwise, we need to reserve fragment header and
1421 	 * fragment alignment (= 8-15 octects, in total).
1422 	 *
1423 	 * Note that we may need to "move" the data from the tail of
1424 	 * of the buffer to the new fragment when we split
1425 	 * the message.
1426 	 *
1427 	 * FIXME: It may be fragmented into multiple chunks
1428 	 *        at once if non-fragmentable extension headers
1429 	 *        are too large.
1430 	 * --yoshfuji
1431 	 */
1432 
1433 	cork->length += length;
1434 	if (!skb)
1435 		goto alloc_new_skb;
1436 
1437 	while (length > 0) {
1438 		/* Check if the remaining data fits into current packet. */
1439 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1440 		if (copy < length)
1441 			copy = maxfraglen - skb->len;
1442 
1443 		if (copy <= 0) {
1444 			char *data;
1445 			unsigned int datalen;
1446 			unsigned int fraglen;
1447 			unsigned int fraggap;
1448 			unsigned int alloclen;
1449 			unsigned int pagedlen;
1450 alloc_new_skb:
1451 			/* There's no room in the current skb */
1452 			if (skb)
1453 				fraggap = skb->len - maxfraglen;
1454 			else
1455 				fraggap = 0;
1456 			/* update mtu and maxfraglen if necessary */
1457 			if (!skb || !skb_prev)
1458 				ip6_append_data_mtu(&mtu, &maxfraglen,
1459 						    fragheaderlen, skb, rt,
1460 						    orig_mtu);
1461 
1462 			skb_prev = skb;
1463 
1464 			/*
1465 			 * If remaining data exceeds the mtu,
1466 			 * we know we need more fragment(s).
1467 			 */
1468 			datalen = length + fraggap;
1469 
1470 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1471 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1472 			fraglen = datalen + fragheaderlen;
1473 			pagedlen = 0;
1474 
1475 			if ((flags & MSG_MORE) &&
1476 			    !(rt->dst.dev->features&NETIF_F_SG))
1477 				alloclen = mtu;
1478 			else if (!paged)
1479 				alloclen = fraglen;
1480 			else {
1481 				alloclen = min_t(int, fraglen, MAX_HEADER);
1482 				pagedlen = fraglen - alloclen;
1483 			}
1484 
1485 			alloclen += dst_exthdrlen;
1486 
1487 			if (datalen != length + fraggap) {
1488 				/*
1489 				 * this is not the last fragment, the trailer
1490 				 * space is regarded as data space.
1491 				 */
1492 				datalen += rt->dst.trailer_len;
1493 			}
1494 
1495 			alloclen += rt->dst.trailer_len;
1496 			fraglen = datalen + fragheaderlen;
1497 
1498 			/*
1499 			 * We just reserve space for fragment header.
1500 			 * Note: this may be overallocation if the message
1501 			 * (without MSG_MORE) fits into the MTU.
1502 			 */
1503 			alloclen += sizeof(struct frag_hdr);
1504 
1505 			copy = datalen - transhdrlen - fraggap - pagedlen;
1506 			if (copy < 0) {
1507 				err = -EINVAL;
1508 				goto error;
1509 			}
1510 			if (transhdrlen) {
1511 				skb = sock_alloc_send_skb(sk,
1512 						alloclen + hh_len,
1513 						(flags & MSG_DONTWAIT), &err);
1514 			} else {
1515 				skb = NULL;
1516 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1517 				    2 * sk->sk_sndbuf)
1518 					skb = alloc_skb(alloclen + hh_len,
1519 							sk->sk_allocation);
1520 				if (unlikely(!skb))
1521 					err = -ENOBUFS;
1522 			}
1523 			if (!skb)
1524 				goto error;
1525 			/*
1526 			 *	Fill in the control structures
1527 			 */
1528 			skb->protocol = htons(ETH_P_IPV6);
1529 			skb->ip_summed = csummode;
1530 			skb->csum = 0;
1531 			/* reserve for fragmentation and ipsec header */
1532 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1533 				    dst_exthdrlen);
1534 
1535 			/*
1536 			 *	Find where to start putting bytes
1537 			 */
1538 			data = skb_put(skb, fraglen - pagedlen);
1539 			skb_set_network_header(skb, exthdrlen);
1540 			data += fragheaderlen;
1541 			skb->transport_header = (skb->network_header +
1542 						 fragheaderlen);
1543 			if (fraggap) {
1544 				skb->csum = skb_copy_and_csum_bits(
1545 					skb_prev, maxfraglen,
1546 					data + transhdrlen, fraggap, 0);
1547 				skb_prev->csum = csum_sub(skb_prev->csum,
1548 							  skb->csum);
1549 				data += fraggap;
1550 				pskb_trim_unique(skb_prev, maxfraglen);
1551 			}
1552 			if (copy > 0 &&
1553 			    getfrag(from, data + transhdrlen, offset,
1554 				    copy, fraggap, skb) < 0) {
1555 				err = -EFAULT;
1556 				kfree_skb(skb);
1557 				goto error;
1558 			}
1559 
1560 			offset += copy;
1561 			length -= copy + transhdrlen;
1562 			transhdrlen = 0;
1563 			exthdrlen = 0;
1564 			dst_exthdrlen = 0;
1565 
1566 			/* Only the initial fragment is time stamped */
1567 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1568 			cork->tx_flags = 0;
1569 			skb_shinfo(skb)->tskey = tskey;
1570 			tskey = 0;
1571 			skb_zcopy_set(skb, uarg, &extra_uref);
1572 
1573 			if ((flags & MSG_CONFIRM) && !skb_prev)
1574 				skb_set_dst_pending_confirm(skb, 1);
1575 
1576 			/*
1577 			 * Put the packet on the pending queue
1578 			 */
1579 			if (!skb->destructor) {
1580 				skb->destructor = sock_wfree;
1581 				skb->sk = sk;
1582 				wmem_alloc_delta += skb->truesize;
1583 			}
1584 			__skb_queue_tail(queue, skb);
1585 			continue;
1586 		}
1587 
1588 		if (copy > length)
1589 			copy = length;
1590 
1591 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1592 		    skb_tailroom(skb) >= copy) {
1593 			unsigned int off;
1594 
1595 			off = skb->len;
1596 			if (getfrag(from, skb_put(skb, copy),
1597 						offset, copy, off, skb) < 0) {
1598 				__skb_trim(skb, off);
1599 				err = -EFAULT;
1600 				goto error;
1601 			}
1602 		} else if (!uarg || !uarg->zerocopy) {
1603 			int i = skb_shinfo(skb)->nr_frags;
1604 
1605 			err = -ENOMEM;
1606 			if (!sk_page_frag_refill(sk, pfrag))
1607 				goto error;
1608 
1609 			if (!skb_can_coalesce(skb, i, pfrag->page,
1610 					      pfrag->offset)) {
1611 				err = -EMSGSIZE;
1612 				if (i == MAX_SKB_FRAGS)
1613 					goto error;
1614 
1615 				__skb_fill_page_desc(skb, i, pfrag->page,
1616 						     pfrag->offset, 0);
1617 				skb_shinfo(skb)->nr_frags = ++i;
1618 				get_page(pfrag->page);
1619 			}
1620 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1621 			if (getfrag(from,
1622 				    page_address(pfrag->page) + pfrag->offset,
1623 				    offset, copy, skb->len, skb) < 0)
1624 				goto error_efault;
1625 
1626 			pfrag->offset += copy;
1627 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1628 			skb->len += copy;
1629 			skb->data_len += copy;
1630 			skb->truesize += copy;
1631 			wmem_alloc_delta += copy;
1632 		} else {
1633 			err = skb_zerocopy_iter_dgram(skb, from, copy);
1634 			if (err < 0)
1635 				goto error;
1636 		}
1637 		offset += copy;
1638 		length -= copy;
1639 	}
1640 
1641 	if (wmem_alloc_delta)
1642 		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1643 	return 0;
1644 
1645 error_efault:
1646 	err = -EFAULT;
1647 error:
1648 	if (uarg)
1649 		sock_zerocopy_put_abort(uarg, extra_uref);
1650 	cork->length -= length;
1651 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1652 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1653 	return err;
1654 }
1655 
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags)1656 int ip6_append_data(struct sock *sk,
1657 		    int getfrag(void *from, char *to, int offset, int len,
1658 				int odd, struct sk_buff *skb),
1659 		    void *from, int length, int transhdrlen,
1660 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1661 		    struct rt6_info *rt, unsigned int flags)
1662 {
1663 	struct inet_sock *inet = inet_sk(sk);
1664 	struct ipv6_pinfo *np = inet6_sk(sk);
1665 	int exthdrlen;
1666 	int err;
1667 
1668 	if (flags&MSG_PROBE)
1669 		return 0;
1670 	if (skb_queue_empty(&sk->sk_write_queue)) {
1671 		/*
1672 		 * setup for corking
1673 		 */
1674 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1675 				     ipc6, rt, fl6);
1676 		if (err)
1677 			return err;
1678 
1679 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1680 		length += exthdrlen;
1681 		transhdrlen += exthdrlen;
1682 	} else {
1683 		fl6 = &inet->cork.fl.u.ip6;
1684 		transhdrlen = 0;
1685 	}
1686 
1687 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1688 				 &np->cork, sk_page_frag(sk), getfrag,
1689 				 from, length, transhdrlen, flags, ipc6);
1690 }
1691 EXPORT_SYMBOL_GPL(ip6_append_data);
1692 
ip6_cork_release(struct inet_cork_full * cork,struct inet6_cork * v6_cork)1693 static void ip6_cork_release(struct inet_cork_full *cork,
1694 			     struct inet6_cork *v6_cork)
1695 {
1696 	if (v6_cork->opt) {
1697 		kfree(v6_cork->opt->dst0opt);
1698 		kfree(v6_cork->opt->dst1opt);
1699 		kfree(v6_cork->opt->hopopt);
1700 		kfree(v6_cork->opt->srcrt);
1701 		kfree(v6_cork->opt);
1702 		v6_cork->opt = NULL;
1703 	}
1704 
1705 	if (cork->base.dst) {
1706 		dst_release(cork->base.dst);
1707 		cork->base.dst = NULL;
1708 		cork->base.flags &= ~IPCORK_ALLFRAG;
1709 	}
1710 	memset(&cork->fl, 0, sizeof(cork->fl));
1711 }
1712 
__ip6_make_skb(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1713 struct sk_buff *__ip6_make_skb(struct sock *sk,
1714 			       struct sk_buff_head *queue,
1715 			       struct inet_cork_full *cork,
1716 			       struct inet6_cork *v6_cork)
1717 {
1718 	struct sk_buff *skb, *tmp_skb;
1719 	struct sk_buff **tail_skb;
1720 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1721 	struct ipv6_pinfo *np = inet6_sk(sk);
1722 	struct net *net = sock_net(sk);
1723 	struct ipv6hdr *hdr;
1724 	struct ipv6_txoptions *opt = v6_cork->opt;
1725 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1726 	struct flowi6 *fl6 = &cork->fl.u.ip6;
1727 	unsigned char proto = fl6->flowi6_proto;
1728 
1729 	skb = __skb_dequeue(queue);
1730 	if (!skb)
1731 		goto out;
1732 	tail_skb = &(skb_shinfo(skb)->frag_list);
1733 
1734 	/* move skb->data to ip header from ext header */
1735 	if (skb->data < skb_network_header(skb))
1736 		__skb_pull(skb, skb_network_offset(skb));
1737 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1738 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1739 		*tail_skb = tmp_skb;
1740 		tail_skb = &(tmp_skb->next);
1741 		skb->len += tmp_skb->len;
1742 		skb->data_len += tmp_skb->len;
1743 		skb->truesize += tmp_skb->truesize;
1744 		tmp_skb->destructor = NULL;
1745 		tmp_skb->sk = NULL;
1746 	}
1747 
1748 	/* Allow local fragmentation. */
1749 	skb->ignore_df = ip6_sk_ignore_df(sk);
1750 
1751 	*final_dst = fl6->daddr;
1752 	__skb_pull(skb, skb_network_header_len(skb));
1753 	if (opt && opt->opt_flen)
1754 		ipv6_push_frag_opts(skb, opt, &proto);
1755 	if (opt && opt->opt_nflen)
1756 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1757 
1758 	skb_push(skb, sizeof(struct ipv6hdr));
1759 	skb_reset_network_header(skb);
1760 	hdr = ipv6_hdr(skb);
1761 
1762 	ip6_flow_hdr(hdr, v6_cork->tclass,
1763 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1764 					ip6_autoflowlabel(net, np), fl6));
1765 	hdr->hop_limit = v6_cork->hop_limit;
1766 	hdr->nexthdr = proto;
1767 	hdr->saddr = fl6->saddr;
1768 	hdr->daddr = *final_dst;
1769 
1770 	skb->priority = sk->sk_priority;
1771 	skb->mark = cork->base.mark;
1772 
1773 	skb->tstamp = cork->base.transmit_time;
1774 
1775 	skb_dst_set(skb, dst_clone(&rt->dst));
1776 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1777 	if (proto == IPPROTO_ICMPV6) {
1778 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1779 
1780 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1781 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1782 	}
1783 
1784 	ip6_cork_release(cork, v6_cork);
1785 out:
1786 	return skb;
1787 }
1788 
ip6_send_skb(struct sk_buff * skb)1789 int ip6_send_skb(struct sk_buff *skb)
1790 {
1791 	struct net *net = sock_net(skb->sk);
1792 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1793 	int err;
1794 
1795 	err = ip6_local_out(net, skb->sk, skb);
1796 	if (err) {
1797 		if (err > 0)
1798 			err = net_xmit_errno(err);
1799 		if (err)
1800 			IP6_INC_STATS(net, rt->rt6i_idev,
1801 				      IPSTATS_MIB_OUTDISCARDS);
1802 	}
1803 
1804 	return err;
1805 }
1806 
ip6_push_pending_frames(struct sock * sk)1807 int ip6_push_pending_frames(struct sock *sk)
1808 {
1809 	struct sk_buff *skb;
1810 
1811 	skb = ip6_finish_skb(sk);
1812 	if (!skb)
1813 		return 0;
1814 
1815 	return ip6_send_skb(skb);
1816 }
1817 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1818 
__ip6_flush_pending_frames(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1819 static void __ip6_flush_pending_frames(struct sock *sk,
1820 				       struct sk_buff_head *queue,
1821 				       struct inet_cork_full *cork,
1822 				       struct inet6_cork *v6_cork)
1823 {
1824 	struct sk_buff *skb;
1825 
1826 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1827 		if (skb_dst(skb))
1828 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1829 				      IPSTATS_MIB_OUTDISCARDS);
1830 		kfree_skb(skb);
1831 	}
1832 
1833 	ip6_cork_release(cork, v6_cork);
1834 }
1835 
ip6_flush_pending_frames(struct sock * sk)1836 void ip6_flush_pending_frames(struct sock *sk)
1837 {
1838 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1839 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1840 }
1841 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1842 
ip6_make_skb(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,struct inet_cork_full * cork)1843 struct sk_buff *ip6_make_skb(struct sock *sk,
1844 			     int getfrag(void *from, char *to, int offset,
1845 					 int len, int odd, struct sk_buff *skb),
1846 			     void *from, int length, int transhdrlen,
1847 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1848 			     struct rt6_info *rt, unsigned int flags,
1849 			     struct inet_cork_full *cork)
1850 {
1851 	struct inet6_cork v6_cork;
1852 	struct sk_buff_head queue;
1853 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1854 	int err;
1855 
1856 	if (flags & MSG_PROBE)
1857 		return NULL;
1858 
1859 	__skb_queue_head_init(&queue);
1860 
1861 	cork->base.flags = 0;
1862 	cork->base.addr = 0;
1863 	cork->base.opt = NULL;
1864 	cork->base.dst = NULL;
1865 	v6_cork.opt = NULL;
1866 	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1867 	if (err) {
1868 		ip6_cork_release(cork, &v6_cork);
1869 		return ERR_PTR(err);
1870 	}
1871 	if (ipc6->dontfrag < 0)
1872 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1873 
1874 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1875 				&current->task_frag, getfrag, from,
1876 				length + exthdrlen, transhdrlen + exthdrlen,
1877 				flags, ipc6);
1878 	if (err) {
1879 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1880 		return ERR_PTR(err);
1881 	}
1882 
1883 	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1884 }
1885