• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	IPv6 output functions
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on linux/net/ipv4/ip_output.c
10  *
11  *	Changes:
12  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
13  *				extension headers are implemented.
14  *				route changes now work.
15  *				ip6_forward does not confuse sniffers.
16  *				etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *	Imran Patel	:	frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *			:       add ip6_append_data and related functions
22  *				for datagram xmit
23  */
24 
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37 
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41 
42 #include <net/sock.h>
43 #include <net/snmp.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 
ip6_finish_output2(struct net * net,struct sock * sk,struct sk_buff * skb)58 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
59 {
60 	struct dst_entry *dst = skb_dst(skb);
61 	struct net_device *dev = dst->dev;
62 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
63 	int delta = hh_len - skb_headroom(skb);
64 	const struct in6_addr *nexthop;
65 	struct neighbour *neigh;
66 	int ret;
67 
68 	/* Be paranoid, rather than too clever. */
69 	if (unlikely(delta > 0) && dev->header_ops) {
70 		/* pskb_expand_head() might crash, if skb is shared */
71 		if (skb_shared(skb)) {
72 			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
73 
74 			if (likely(nskb)) {
75 				if (skb->sk)
76 					skb_set_owner_w(nskb, skb->sk);
77 				consume_skb(skb);
78 			} else {
79 				kfree_skb(skb);
80 			}
81 			skb = nskb;
82 		}
83 		if (skb &&
84 		    pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
85 			kfree_skb(skb);
86 			skb = NULL;
87 		}
88 		if (!skb) {
89 			IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
90 			return -ENOMEM;
91 		}
92 	}
93 
94 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
95 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
96 
97 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
98 		    ((mroute6_is_socket(net, skb) &&
99 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
100 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
101 					 &ipv6_hdr(skb)->saddr))) {
102 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
103 
104 			/* Do not check for IFF_ALLMULTI; multicast routing
105 			   is not supported in any case.
106 			 */
107 			if (newskb)
108 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
109 					net, sk, newskb, NULL, newskb->dev,
110 					dev_loopback_xmit);
111 
112 			if (ipv6_hdr(skb)->hop_limit == 0) {
113 				IP6_INC_STATS(net, idev,
114 					      IPSTATS_MIB_OUTDISCARDS);
115 				kfree_skb(skb);
116 				return 0;
117 			}
118 		}
119 
120 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
121 
122 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
123 		    IPV6_ADDR_SCOPE_NODELOCAL &&
124 		    !(dev->flags & IFF_LOOPBACK)) {
125 			kfree_skb(skb);
126 			return 0;
127 		}
128 	}
129 
130 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
131 		int res = lwtunnel_xmit(skb);
132 
133 		if (res != LWTUNNEL_XMIT_CONTINUE)
134 			return res;
135 	}
136 
137 	rcu_read_lock_bh();
138 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
139 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
140 	if (unlikely(!neigh))
141 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
142 	if (!IS_ERR(neigh)) {
143 		sock_confirm_neigh(skb, neigh);
144 		ret = neigh_output(neigh, skb, false);
145 		rcu_read_unlock_bh();
146 		return ret;
147 	}
148 	rcu_read_unlock_bh();
149 
150 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
151 	kfree_skb(skb);
152 	return -EINVAL;
153 }
154 
155 static int
ip6_finish_output_gso_slowpath_drop(struct net * net,struct sock * sk,struct sk_buff * skb,unsigned int mtu)156 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
157 				    struct sk_buff *skb, unsigned int mtu)
158 {
159 	struct sk_buff *segs, *nskb;
160 	netdev_features_t features;
161 	int ret = 0;
162 
163 	/* Please see corresponding comment in ip_finish_output_gso
164 	 * describing the cases where GSO segment length exceeds the
165 	 * egress MTU.
166 	 */
167 	features = netif_skb_features(skb);
168 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
169 	if (IS_ERR_OR_NULL(segs)) {
170 		kfree_skb(skb);
171 		return -ENOMEM;
172 	}
173 
174 	consume_skb(skb);
175 
176 	skb_list_walk_safe(segs, segs, nskb) {
177 		int err;
178 
179 		skb_mark_not_on_list(segs);
180 		/* Last GSO segment can be smaller than gso_size (and MTU).
181 		 * Adding a fragment header would produce an "atomic fragment",
182 		 * which is considered harmful (RFC-8021). Avoid that.
183 		 */
184 		err = segs->len > mtu ?
185 			ip6_fragment(net, sk, segs, ip6_finish_output2) :
186 			ip6_finish_output2(net, sk, segs);
187 		if (err && ret == 0)
188 			ret = err;
189 	}
190 
191 	return ret;
192 }
193 
__ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)194 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
195 {
196 	unsigned int mtu;
197 
198 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
199 	/* Policy lookup after SNAT yielded a new policy */
200 	if (skb_dst(skb)->xfrm) {
201 		IP6CB(skb)->flags |= IP6SKB_REROUTED;
202 		return dst_output(net, sk, skb);
203 	}
204 #endif
205 
206 	mtu = ip6_skb_dst_mtu(skb);
207 	if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
208 		return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
209 
210 	if ((skb->len > mtu && !skb_is_gso(skb)) ||
211 	    dst_allfrag(skb_dst(skb)) ||
212 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
213 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
214 	else
215 		return ip6_finish_output2(net, sk, skb);
216 }
217 
ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)218 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
219 {
220 	int ret;
221 
222 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
223 	switch (ret) {
224 	case NET_XMIT_SUCCESS:
225 		return __ip6_finish_output(net, sk, skb);
226 	case NET_XMIT_CN:
227 		return __ip6_finish_output(net, sk, skb) ? : ret;
228 	default:
229 		kfree_skb(skb);
230 		return ret;
231 	}
232 }
233 
ip6_output(struct net * net,struct sock * sk,struct sk_buff * skb)234 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
235 {
236 	struct net_device *dev = skb_dst(skb)->dev;
237 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
238 
239 	skb->protocol = htons(ETH_P_IPV6);
240 	skb->dev = dev;
241 
242 	if (unlikely(idev->cnf.disable_ipv6)) {
243 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
244 		kfree_skb(skb);
245 		return 0;
246 	}
247 
248 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
249 			    net, sk, skb, NULL, dev,
250 			    ip6_finish_output,
251 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
252 }
253 
ip6_autoflowlabel(struct net * net,const struct ipv6_pinfo * np)254 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
255 {
256 	if (!np->autoflowlabel_set)
257 		return ip6_default_np_autolabel(net);
258 	else
259 		return np->autoflowlabel;
260 }
261 
262 /*
263  * xmit an sk_buff (used by TCP, SCTP and DCCP)
264  * Note : socket lock is not held for SYNACK packets, but might be modified
265  * by calls to skb_set_owner_w() and ipv6_local_error(),
266  * which are using proper atomic operations or spinlocks.
267  */
ip6_xmit(const struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,__u32 mark,struct ipv6_txoptions * opt,int tclass,u32 priority)268 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
269 	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
270 {
271 	struct net *net = sock_net(sk);
272 	const struct ipv6_pinfo *np = inet6_sk(sk);
273 	struct in6_addr *first_hop = &fl6->daddr;
274 	struct dst_entry *dst = skb_dst(skb);
275 	unsigned int head_room;
276 	struct ipv6hdr *hdr;
277 	u8  proto = fl6->flowi6_proto;
278 	int seg_len = skb->len;
279 	int hlimit = -1;
280 	u32 mtu;
281 
282 	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
283 	if (opt)
284 		head_room += opt->opt_nflen + opt->opt_flen;
285 
286 	if (unlikely(skb_headroom(skb) < head_room)) {
287 		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
288 		if (!skb2) {
289 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
290 				      IPSTATS_MIB_OUTDISCARDS);
291 			kfree_skb(skb);
292 			return -ENOBUFS;
293 		}
294 		if (skb->sk)
295 			skb_set_owner_w(skb2, skb->sk);
296 		consume_skb(skb);
297 		skb = skb2;
298 	}
299 
300 	if (opt) {
301 		seg_len += opt->opt_nflen + opt->opt_flen;
302 
303 		if (opt->opt_flen)
304 			ipv6_push_frag_opts(skb, opt, &proto);
305 
306 		if (opt->opt_nflen)
307 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
308 					     &fl6->saddr);
309 	}
310 
311 	skb_push(skb, sizeof(struct ipv6hdr));
312 	skb_reset_network_header(skb);
313 	hdr = ipv6_hdr(skb);
314 
315 	/*
316 	 *	Fill in the IPv6 header
317 	 */
318 	if (np)
319 		hlimit = np->hop_limit;
320 	if (hlimit < 0)
321 		hlimit = ip6_dst_hoplimit(dst);
322 
323 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
324 				ip6_autoflowlabel(net, np), fl6));
325 
326 	hdr->payload_len = htons(seg_len);
327 	hdr->nexthdr = proto;
328 	hdr->hop_limit = hlimit;
329 
330 	hdr->saddr = fl6->saddr;
331 	hdr->daddr = *first_hop;
332 
333 	skb->protocol = htons(ETH_P_IPV6);
334 	skb->priority = priority;
335 	skb->mark = mark;
336 
337 	mtu = dst_mtu(dst);
338 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
339 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
340 			      IPSTATS_MIB_OUT, skb->len);
341 
342 		/* if egress device is enslaved to an L3 master device pass the
343 		 * skb to its handler for processing
344 		 */
345 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
346 		if (unlikely(!skb))
347 			return 0;
348 
349 		/* hooks should never assume socket lock is held.
350 		 * we promote our socket to non const
351 		 */
352 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
353 			       net, (struct sock *)sk, skb, NULL, dst->dev,
354 			       dst_output);
355 	}
356 
357 	skb->dev = dst->dev;
358 	/* ipv6_local_error() does not require socket lock,
359 	 * we promote our socket to non const
360 	 */
361 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
362 
363 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
364 	kfree_skb(skb);
365 	return -EMSGSIZE;
366 }
367 EXPORT_SYMBOL(ip6_xmit);
368 
ip6_call_ra_chain(struct sk_buff * skb,int sel)369 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
370 {
371 	struct ip6_ra_chain *ra;
372 	struct sock *last = NULL;
373 
374 	read_lock(&ip6_ra_lock);
375 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
376 		struct sock *sk = ra->sk;
377 		if (sk && ra->sel == sel &&
378 		    (!sk->sk_bound_dev_if ||
379 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
380 			struct ipv6_pinfo *np = inet6_sk(sk);
381 
382 			if (np && np->rtalert_isolate &&
383 			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
384 				continue;
385 			}
386 			if (last) {
387 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
388 				if (skb2)
389 					rawv6_rcv(last, skb2);
390 			}
391 			last = sk;
392 		}
393 	}
394 
395 	if (last) {
396 		rawv6_rcv(last, skb);
397 		read_unlock(&ip6_ra_lock);
398 		return 1;
399 	}
400 	read_unlock(&ip6_ra_lock);
401 	return 0;
402 }
403 
ip6_forward_proxy_check(struct sk_buff * skb)404 static int ip6_forward_proxy_check(struct sk_buff *skb)
405 {
406 	struct ipv6hdr *hdr = ipv6_hdr(skb);
407 	u8 nexthdr = hdr->nexthdr;
408 	__be16 frag_off;
409 	int offset;
410 
411 	if (ipv6_ext_hdr(nexthdr)) {
412 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
413 		if (offset < 0)
414 			return 0;
415 	} else
416 		offset = sizeof(struct ipv6hdr);
417 
418 	if (nexthdr == IPPROTO_ICMPV6) {
419 		struct icmp6hdr *icmp6;
420 
421 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
422 					 offset + 1 - skb->data)))
423 			return 0;
424 
425 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
426 
427 		switch (icmp6->icmp6_type) {
428 		case NDISC_ROUTER_SOLICITATION:
429 		case NDISC_ROUTER_ADVERTISEMENT:
430 		case NDISC_NEIGHBOUR_SOLICITATION:
431 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
432 		case NDISC_REDIRECT:
433 			/* For reaction involving unicast neighbor discovery
434 			 * message destined to the proxied address, pass it to
435 			 * input function.
436 			 */
437 			return 1;
438 		default:
439 			break;
440 		}
441 	}
442 
443 	/*
444 	 * The proxying router can't forward traffic sent to a link-local
445 	 * address, so signal the sender and discard the packet. This
446 	 * behavior is clarified by the MIPv6 specification.
447 	 */
448 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
449 		dst_link_failure(skb);
450 		return -1;
451 	}
452 
453 	return 0;
454 }
455 
ip6_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)456 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
457 				     struct sk_buff *skb)
458 {
459 	struct dst_entry *dst = skb_dst(skb);
460 
461 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
462 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
463 
464 #ifdef CONFIG_NET_SWITCHDEV
465 	if (skb->offload_l3_fwd_mark) {
466 		consume_skb(skb);
467 		return 0;
468 	}
469 #endif
470 
471 	skb->tstamp = 0;
472 	return dst_output(net, sk, skb);
473 }
474 
ip6_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)475 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
476 {
477 	if (skb->len <= mtu)
478 		return false;
479 
480 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
481 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
482 		return true;
483 
484 	if (skb->ignore_df)
485 		return false;
486 
487 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
488 		return false;
489 
490 	return true;
491 }
492 
ip6_forward(struct sk_buff * skb)493 int ip6_forward(struct sk_buff *skb)
494 {
495 	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
496 	struct dst_entry *dst = skb_dst(skb);
497 	struct ipv6hdr *hdr = ipv6_hdr(skb);
498 	struct inet6_skb_parm *opt = IP6CB(skb);
499 	struct net *net = dev_net(dst->dev);
500 	u32 mtu;
501 
502 	if (net->ipv6.devconf_all->forwarding == 0)
503 		goto error;
504 
505 	if (skb->pkt_type != PACKET_HOST)
506 		goto drop;
507 
508 	if (unlikely(skb->sk))
509 		goto drop;
510 
511 	if (skb_warn_if_lro(skb))
512 		goto drop;
513 
514 	if (!net->ipv6.devconf_all->disable_policy &&
515 	    (!idev || !idev->cnf.disable_policy) &&
516 	    !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
517 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
518 		goto drop;
519 	}
520 
521 	skb_forward_csum(skb);
522 
523 	/*
524 	 *	We DO NOT make any processing on
525 	 *	RA packets, pushing them to user level AS IS
526 	 *	without ane WARRANTY that application will be able
527 	 *	to interpret them. The reason is that we
528 	 *	cannot make anything clever here.
529 	 *
530 	 *	We are not end-node, so that if packet contains
531 	 *	AH/ESP, we cannot make anything.
532 	 *	Defragmentation also would be mistake, RA packets
533 	 *	cannot be fragmented, because there is no warranty
534 	 *	that different fragments will go along one path. --ANK
535 	 */
536 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
537 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
538 			return 0;
539 	}
540 
541 	/*
542 	 *	check and decrement ttl
543 	 */
544 	if (hdr->hop_limit <= 1) {
545 		/* Force OUTPUT device used as source address */
546 		skb->dev = dst->dev;
547 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
548 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
549 
550 		kfree_skb(skb);
551 		return -ETIMEDOUT;
552 	}
553 
554 	/* XXX: idev->cnf.proxy_ndp? */
555 	if (net->ipv6.devconf_all->proxy_ndp &&
556 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
557 		int proxied = ip6_forward_proxy_check(skb);
558 		if (proxied > 0)
559 			return ip6_input(skb);
560 		else if (proxied < 0) {
561 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
562 			goto drop;
563 		}
564 	}
565 
566 	if (!xfrm6_route_forward(skb)) {
567 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
568 		goto drop;
569 	}
570 	dst = skb_dst(skb);
571 
572 	/* IPv6 specs say nothing about it, but it is clear that we cannot
573 	   send redirects to source routed frames.
574 	   We don't send redirects to frames decapsulated from IPsec.
575 	 */
576 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
577 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
578 		struct in6_addr *target = NULL;
579 		struct inet_peer *peer;
580 		struct rt6_info *rt;
581 
582 		/*
583 		 *	incoming and outgoing devices are the same
584 		 *	send a redirect.
585 		 */
586 
587 		rt = (struct rt6_info *) dst;
588 		if (rt->rt6i_flags & RTF_GATEWAY)
589 			target = &rt->rt6i_gateway;
590 		else
591 			target = &hdr->daddr;
592 
593 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
594 
595 		/* Limit redirects both by destination (here)
596 		   and by source (inside ndisc_send_redirect)
597 		 */
598 		if (inet_peer_xrlim_allow(peer, 1*HZ))
599 			ndisc_send_redirect(skb, target);
600 		if (peer)
601 			inet_putpeer(peer);
602 	} else {
603 		int addrtype = ipv6_addr_type(&hdr->saddr);
604 
605 		/* This check is security critical. */
606 		if (addrtype == IPV6_ADDR_ANY ||
607 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
608 			goto error;
609 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
610 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
611 				    ICMPV6_NOT_NEIGHBOUR, 0);
612 			goto error;
613 		}
614 	}
615 
616 	mtu = ip6_dst_mtu_forward(dst);
617 	if (mtu < IPV6_MIN_MTU)
618 		mtu = IPV6_MIN_MTU;
619 
620 	if (ip6_pkt_too_big(skb, mtu)) {
621 		/* Again, force OUTPUT device used as source address */
622 		skb->dev = dst->dev;
623 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
624 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
625 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
626 				IPSTATS_MIB_FRAGFAILS);
627 		kfree_skb(skb);
628 		return -EMSGSIZE;
629 	}
630 
631 	if (skb_cow(skb, dst->dev->hard_header_len)) {
632 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
633 				IPSTATS_MIB_OUTDISCARDS);
634 		goto drop;
635 	}
636 
637 	hdr = ipv6_hdr(skb);
638 
639 	/* Mangling hops number delayed to point after skb COW */
640 
641 	hdr->hop_limit--;
642 
643 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
644 		       net, NULL, skb, skb->dev, dst->dev,
645 		       ip6_forward_finish);
646 
647 error:
648 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
649 drop:
650 	kfree_skb(skb);
651 	return -EINVAL;
652 }
653 
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)654 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
655 {
656 	to->pkt_type = from->pkt_type;
657 	to->priority = from->priority;
658 	to->protocol = from->protocol;
659 	skb_dst_drop(to);
660 	skb_dst_set(to, dst_clone(skb_dst(from)));
661 	to->dev = from->dev;
662 	to->mark = from->mark;
663 
664 	skb_copy_hash(to, from);
665 
666 #ifdef CONFIG_NET_SCHED
667 	to->tc_index = from->tc_index;
668 #endif
669 	nf_copy(to, from);
670 	skb_ext_copy(to, from);
671 	skb_copy_secmark(to, from);
672 }
673 
ip6_fraglist_init(struct sk_buff * skb,unsigned int hlen,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_fraglist_iter * iter)674 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
675 		      u8 nexthdr, __be32 frag_id,
676 		      struct ip6_fraglist_iter *iter)
677 {
678 	unsigned int first_len;
679 	struct frag_hdr *fh;
680 
681 	/* BUILD HEADER */
682 	*prevhdr = NEXTHDR_FRAGMENT;
683 	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
684 	if (!iter->tmp_hdr)
685 		return -ENOMEM;
686 
687 	iter->frag = skb_shinfo(skb)->frag_list;
688 	skb_frag_list_init(skb);
689 
690 	iter->offset = 0;
691 	iter->hlen = hlen;
692 	iter->frag_id = frag_id;
693 	iter->nexthdr = nexthdr;
694 
695 	__skb_pull(skb, hlen);
696 	fh = __skb_push(skb, sizeof(struct frag_hdr));
697 	__skb_push(skb, hlen);
698 	skb_reset_network_header(skb);
699 	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
700 
701 	fh->nexthdr = nexthdr;
702 	fh->reserved = 0;
703 	fh->frag_off = htons(IP6_MF);
704 	fh->identification = frag_id;
705 
706 	first_len = skb_pagelen(skb);
707 	skb->data_len = first_len - skb_headlen(skb);
708 	skb->len = first_len;
709 	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
710 
711 	return 0;
712 }
713 EXPORT_SYMBOL(ip6_fraglist_init);
714 
ip6_fraglist_prepare(struct sk_buff * skb,struct ip6_fraglist_iter * iter)715 void ip6_fraglist_prepare(struct sk_buff *skb,
716 			  struct ip6_fraglist_iter *iter)
717 {
718 	struct sk_buff *frag = iter->frag;
719 	unsigned int hlen = iter->hlen;
720 	struct frag_hdr *fh;
721 
722 	frag->ip_summed = CHECKSUM_NONE;
723 	skb_reset_transport_header(frag);
724 	fh = __skb_push(frag, sizeof(struct frag_hdr));
725 	__skb_push(frag, hlen);
726 	skb_reset_network_header(frag);
727 	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
728 	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
729 	fh->nexthdr = iter->nexthdr;
730 	fh->reserved = 0;
731 	fh->frag_off = htons(iter->offset);
732 	if (frag->next)
733 		fh->frag_off |= htons(IP6_MF);
734 	fh->identification = iter->frag_id;
735 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
736 	ip6_copy_metadata(frag, skb);
737 }
738 EXPORT_SYMBOL(ip6_fraglist_prepare);
739 
ip6_frag_init(struct sk_buff * skb,unsigned int hlen,unsigned int mtu,unsigned short needed_tailroom,int hdr_room,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_frag_state * state)740 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
741 		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
742 		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
743 {
744 	state->prevhdr = prevhdr;
745 	state->nexthdr = nexthdr;
746 	state->frag_id = frag_id;
747 
748 	state->hlen = hlen;
749 	state->mtu = mtu;
750 
751 	state->left = skb->len - hlen;	/* Space per frame */
752 	state->ptr = hlen;		/* Where to start from */
753 
754 	state->hroom = hdr_room;
755 	state->troom = needed_tailroom;
756 
757 	state->offset = 0;
758 }
759 EXPORT_SYMBOL(ip6_frag_init);
760 
ip6_frag_next(struct sk_buff * skb,struct ip6_frag_state * state)761 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
762 {
763 	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
764 	struct sk_buff *frag;
765 	struct frag_hdr *fh;
766 	unsigned int len;
767 
768 	len = state->left;
769 	/* IF: it doesn't fit, use 'mtu' - the data space left */
770 	if (len > state->mtu)
771 		len = state->mtu;
772 	/* IF: we are not sending up to and including the packet end
773 	   then align the next start on an eight byte boundary */
774 	if (len < state->left)
775 		len &= ~7;
776 
777 	/* Allocate buffer */
778 	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
779 			 state->hroom + state->troom, GFP_ATOMIC);
780 	if (!frag)
781 		return ERR_PTR(-ENOMEM);
782 
783 	/*
784 	 *	Set up data on packet
785 	 */
786 
787 	ip6_copy_metadata(frag, skb);
788 	skb_reserve(frag, state->hroom);
789 	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
790 	skb_reset_network_header(frag);
791 	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
792 	frag->transport_header = (frag->network_header + state->hlen +
793 				  sizeof(struct frag_hdr));
794 
795 	/*
796 	 *	Charge the memory for the fragment to any owner
797 	 *	it might possess
798 	 */
799 	if (skb->sk)
800 		skb_set_owner_w(frag, skb->sk);
801 
802 	/*
803 	 *	Copy the packet header into the new buffer.
804 	 */
805 	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
806 
807 	fragnexthdr_offset = skb_network_header(frag);
808 	fragnexthdr_offset += prevhdr - skb_network_header(skb);
809 	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
810 
811 	/*
812 	 *	Build fragment header.
813 	 */
814 	fh->nexthdr = state->nexthdr;
815 	fh->reserved = 0;
816 	fh->identification = state->frag_id;
817 
818 	/*
819 	 *	Copy a block of the IP datagram.
820 	 */
821 	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
822 			     len));
823 	state->left -= len;
824 
825 	fh->frag_off = htons(state->offset);
826 	if (state->left > 0)
827 		fh->frag_off |= htons(IP6_MF);
828 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
829 
830 	state->ptr += len;
831 	state->offset += len;
832 
833 	return frag;
834 }
835 EXPORT_SYMBOL(ip6_frag_next);
836 
ip6_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,int (* output)(struct net *,struct sock *,struct sk_buff *))837 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
838 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
839 {
840 	struct sk_buff *frag;
841 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
842 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
843 				inet6_sk(skb->sk) : NULL;
844 	struct ip6_frag_state state;
845 	unsigned int mtu, hlen, nexthdr_offset;
846 	ktime_t tstamp = skb->tstamp;
847 	int hroom, err = 0;
848 	__be32 frag_id;
849 	u8 *prevhdr, nexthdr = 0;
850 
851 	err = ip6_find_1stfragopt(skb, &prevhdr);
852 	if (err < 0)
853 		goto fail;
854 	hlen = err;
855 	nexthdr = *prevhdr;
856 	nexthdr_offset = prevhdr - skb_network_header(skb);
857 
858 	mtu = ip6_skb_dst_mtu(skb);
859 
860 	/* We must not fragment if the socket is set to force MTU discovery
861 	 * or if the skb it not generated by a local socket.
862 	 */
863 	if (unlikely(!skb->ignore_df && skb->len > mtu))
864 		goto fail_toobig;
865 
866 	if (IP6CB(skb)->frag_max_size) {
867 		if (IP6CB(skb)->frag_max_size > mtu)
868 			goto fail_toobig;
869 
870 		/* don't send fragments larger than what we received */
871 		mtu = IP6CB(skb)->frag_max_size;
872 		if (mtu < IPV6_MIN_MTU)
873 			mtu = IPV6_MIN_MTU;
874 	}
875 
876 	if (np && np->frag_size < mtu) {
877 		if (np->frag_size)
878 			mtu = np->frag_size;
879 	}
880 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
881 		goto fail_toobig;
882 	mtu -= hlen + sizeof(struct frag_hdr);
883 
884 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
885 				    &ipv6_hdr(skb)->saddr);
886 
887 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
888 	    (err = skb_checksum_help(skb)))
889 		goto fail;
890 
891 	prevhdr = skb_network_header(skb) + nexthdr_offset;
892 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
893 	if (skb_has_frag_list(skb)) {
894 		unsigned int first_len = skb_pagelen(skb);
895 		struct ip6_fraglist_iter iter;
896 		struct sk_buff *frag2;
897 
898 		if (first_len - hlen > mtu ||
899 		    ((first_len - hlen) & 7) ||
900 		    skb_cloned(skb) ||
901 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
902 			goto slow_path;
903 
904 		skb_walk_frags(skb, frag) {
905 			/* Correct geometry. */
906 			if (frag->len > mtu ||
907 			    ((frag->len & 7) && frag->next) ||
908 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
909 				goto slow_path_clean;
910 
911 			/* Partially cloned skb? */
912 			if (skb_shared(frag))
913 				goto slow_path_clean;
914 
915 			BUG_ON(frag->sk);
916 			if (skb->sk) {
917 				frag->sk = skb->sk;
918 				frag->destructor = sock_wfree;
919 			}
920 			skb->truesize -= frag->truesize;
921 		}
922 
923 		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
924 					&iter);
925 		if (err < 0)
926 			goto fail;
927 
928 		/* We prevent @rt from being freed. */
929 		rcu_read_lock();
930 
931 		for (;;) {
932 			/* Prepare header of the next frame,
933 			 * before previous one went down. */
934 			if (iter.frag)
935 				ip6_fraglist_prepare(skb, &iter);
936 
937 			skb->tstamp = tstamp;
938 			err = output(net, sk, skb);
939 			if (!err)
940 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
941 					      IPSTATS_MIB_FRAGCREATES);
942 
943 			if (err || !iter.frag)
944 				break;
945 
946 			skb = ip6_fraglist_next(&iter);
947 		}
948 
949 		kfree(iter.tmp_hdr);
950 
951 		if (err == 0) {
952 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
953 				      IPSTATS_MIB_FRAGOKS);
954 			rcu_read_unlock();
955 			return 0;
956 		}
957 
958 		kfree_skb_list(iter.frag);
959 
960 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
961 			      IPSTATS_MIB_FRAGFAILS);
962 		rcu_read_unlock();
963 		return err;
964 
965 slow_path_clean:
966 		skb_walk_frags(skb, frag2) {
967 			if (frag2 == frag)
968 				break;
969 			frag2->sk = NULL;
970 			frag2->destructor = NULL;
971 			skb->truesize += frag2->truesize;
972 		}
973 	}
974 
975 slow_path:
976 	/*
977 	 *	Fragment the datagram.
978 	 */
979 
980 	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
981 		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
982 		      &state);
983 
984 	/*
985 	 *	Keep copying data until we run out.
986 	 */
987 
988 	while (state.left > 0) {
989 		frag = ip6_frag_next(skb, &state);
990 		if (IS_ERR(frag)) {
991 			err = PTR_ERR(frag);
992 			goto fail;
993 		}
994 
995 		/*
996 		 *	Put this fragment into the sending queue.
997 		 */
998 		frag->tstamp = tstamp;
999 		err = output(net, sk, frag);
1000 		if (err)
1001 			goto fail;
1002 
1003 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1004 			      IPSTATS_MIB_FRAGCREATES);
1005 	}
1006 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1007 		      IPSTATS_MIB_FRAGOKS);
1008 	consume_skb(skb);
1009 	return err;
1010 
1011 fail_toobig:
1012 	if (skb->sk && dst_allfrag(skb_dst(skb)))
1013 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
1014 
1015 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1016 	err = -EMSGSIZE;
1017 
1018 fail:
1019 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1020 		      IPSTATS_MIB_FRAGFAILS);
1021 	kfree_skb(skb);
1022 	return err;
1023 }
1024 
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)1025 static inline int ip6_rt_check(const struct rt6key *rt_key,
1026 			       const struct in6_addr *fl_addr,
1027 			       const struct in6_addr *addr_cache)
1028 {
1029 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1030 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1031 }
1032 
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)1033 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1034 					  struct dst_entry *dst,
1035 					  const struct flowi6 *fl6)
1036 {
1037 	struct ipv6_pinfo *np = inet6_sk(sk);
1038 	struct rt6_info *rt;
1039 
1040 	if (!dst)
1041 		goto out;
1042 
1043 	if (dst->ops->family != AF_INET6) {
1044 		dst_release(dst);
1045 		return NULL;
1046 	}
1047 
1048 	rt = (struct rt6_info *)dst;
1049 	/* Yes, checking route validity in not connected
1050 	 * case is not very simple. Take into account,
1051 	 * that we do not support routing by source, TOS,
1052 	 * and MSG_DONTROUTE		--ANK (980726)
1053 	 *
1054 	 * 1. ip6_rt_check(): If route was host route,
1055 	 *    check that cached destination is current.
1056 	 *    If it is network route, we still may
1057 	 *    check its validity using saved pointer
1058 	 *    to the last used address: daddr_cache.
1059 	 *    We do not want to save whole address now,
1060 	 *    (because main consumer of this service
1061 	 *    is tcp, which has not this problem),
1062 	 *    so that the last trick works only on connected
1063 	 *    sockets.
1064 	 * 2. oif also should be the same.
1065 	 */
1066 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1067 #ifdef CONFIG_IPV6_SUBTREES
1068 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1069 #endif
1070 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1071 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1072 		dst_release(dst);
1073 		dst = NULL;
1074 	}
1075 
1076 out:
1077 	return dst;
1078 }
1079 
ip6_dst_lookup_tail(struct net * net,const struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1080 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1081 			       struct dst_entry **dst, struct flowi6 *fl6)
1082 {
1083 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1084 	struct neighbour *n;
1085 	struct rt6_info *rt;
1086 #endif
1087 	int err;
1088 	int flags = 0;
1089 
1090 	/* The correct way to handle this would be to do
1091 	 * ip6_route_get_saddr, and then ip6_route_output; however,
1092 	 * the route-specific preferred source forces the
1093 	 * ip6_route_output call _before_ ip6_route_get_saddr.
1094 	 *
1095 	 * In source specific routing (no src=any default route),
1096 	 * ip6_route_output will fail given src=any saddr, though, so
1097 	 * that's why we try it again later.
1098 	 */
1099 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1100 		struct fib6_info *from;
1101 		struct rt6_info *rt;
1102 		bool had_dst = *dst != NULL;
1103 
1104 		if (!had_dst)
1105 			*dst = ip6_route_output(net, sk, fl6);
1106 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1107 
1108 		rcu_read_lock();
1109 		from = rt ? rcu_dereference(rt->from) : NULL;
1110 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
1111 					  sk ? inet6_sk(sk)->srcprefs : 0,
1112 					  &fl6->saddr);
1113 		rcu_read_unlock();
1114 
1115 		if (err)
1116 			goto out_err_release;
1117 
1118 		/* If we had an erroneous initial result, pretend it
1119 		 * never existed and let the SA-enabled version take
1120 		 * over.
1121 		 */
1122 		if (!had_dst && (*dst)->error) {
1123 			dst_release(*dst);
1124 			*dst = NULL;
1125 		}
1126 
1127 		if (fl6->flowi6_oif)
1128 			flags |= RT6_LOOKUP_F_IFACE;
1129 	}
1130 
1131 	if (!*dst)
1132 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
1133 
1134 	err = (*dst)->error;
1135 	if (err)
1136 		goto out_err_release;
1137 
1138 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1139 	/*
1140 	 * Here if the dst entry we've looked up
1141 	 * has a neighbour entry that is in the INCOMPLETE
1142 	 * state and the src address from the flow is
1143 	 * marked as OPTIMISTIC, we release the found
1144 	 * dst entry and replace it instead with the
1145 	 * dst entry of the nexthop router
1146 	 */
1147 	rt = (struct rt6_info *) *dst;
1148 	rcu_read_lock_bh();
1149 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1150 				      rt6_nexthop(rt, &fl6->daddr));
1151 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1152 	rcu_read_unlock_bh();
1153 
1154 	if (err) {
1155 		struct inet6_ifaddr *ifp;
1156 		struct flowi6 fl_gw6;
1157 		int redirect;
1158 
1159 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1160 				      (*dst)->dev, 1);
1161 
1162 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1163 		if (ifp)
1164 			in6_ifa_put(ifp);
1165 
1166 		if (redirect) {
1167 			/*
1168 			 * We need to get the dst entry for the
1169 			 * default router instead
1170 			 */
1171 			dst_release(*dst);
1172 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1173 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1174 			*dst = ip6_route_output(net, sk, &fl_gw6);
1175 			err = (*dst)->error;
1176 			if (err)
1177 				goto out_err_release;
1178 		}
1179 	}
1180 #endif
1181 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1182 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1183 		err = -EAFNOSUPPORT;
1184 		goto out_err_release;
1185 	}
1186 
1187 	return 0;
1188 
1189 out_err_release:
1190 	dst_release(*dst);
1191 	*dst = NULL;
1192 
1193 	if (err == -ENETUNREACH)
1194 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1195 	return err;
1196 }
1197 
1198 /**
1199  *	ip6_dst_lookup - perform route lookup on flow
1200  *	@sk: socket which provides route info
1201  *	@dst: pointer to dst_entry * for result
1202  *	@fl6: flow to lookup
1203  *
1204  *	This function performs a route lookup on the given flow.
1205  *
1206  *	It returns zero on success, or a standard errno code on error.
1207  */
ip6_dst_lookup(struct net * net,struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1208 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1209 		   struct flowi6 *fl6)
1210 {
1211 	*dst = NULL;
1212 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1213 }
1214 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1215 
1216 /**
1217  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1218  *	@sk: socket which provides route info
1219  *	@fl6: flow to lookup
1220  *	@final_dst: final destination address for ipsec lookup
1221  *
1222  *	This function performs a route lookup on the given flow.
1223  *
1224  *	It returns a valid dst pointer on success, or a pointer encoded
1225  *	error code.
1226  */
ip6_dst_lookup_flow(struct net * net,const struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1227 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1228 				      const struct in6_addr *final_dst)
1229 {
1230 	struct dst_entry *dst = NULL;
1231 	int err;
1232 
1233 	err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1234 	if (err)
1235 		return ERR_PTR(err);
1236 	if (final_dst)
1237 		fl6->daddr = *final_dst;
1238 
1239 	return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1240 }
1241 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1242 
1243 /**
1244  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1245  *	@sk: socket which provides the dst cache and route info
1246  *	@fl6: flow to lookup
1247  *	@final_dst: final destination address for ipsec lookup
1248  *	@connected: whether @sk is connected or not
1249  *
1250  *	This function performs a route lookup on the given flow with the
1251  *	possibility of using the cached route in the socket if it is valid.
1252  *	It will take the socket dst lock when operating on the dst cache.
1253  *	As a result, this function can only be used in process context.
1254  *
1255  *	In addition, for a connected socket, cache the dst in the socket
1256  *	if the current cache is not valid.
1257  *
1258  *	It returns a valid dst pointer on success, or a pointer encoded
1259  *	error code.
1260  */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool connected)1261 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1262 					 const struct in6_addr *final_dst,
1263 					 bool connected)
1264 {
1265 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1266 
1267 	dst = ip6_sk_dst_check(sk, dst, fl6);
1268 	if (dst)
1269 		return dst;
1270 
1271 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1272 	if (connected && !IS_ERR(dst))
1273 		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1274 
1275 	return dst;
1276 }
1277 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1278 
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1279 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1280 					       gfp_t gfp)
1281 {
1282 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1283 }
1284 
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1285 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1286 						gfp_t gfp)
1287 {
1288 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1289 }
1290 
ip6_append_data_mtu(unsigned int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt,unsigned int orig_mtu)1291 static void ip6_append_data_mtu(unsigned int *mtu,
1292 				int *maxfraglen,
1293 				unsigned int fragheaderlen,
1294 				struct sk_buff *skb,
1295 				struct rt6_info *rt,
1296 				unsigned int orig_mtu)
1297 {
1298 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1299 		if (!skb) {
1300 			/* first fragment, reserve header_len */
1301 			*mtu = orig_mtu - rt->dst.header_len;
1302 
1303 		} else {
1304 			/*
1305 			 * this fragment is not first, the headers
1306 			 * space is regarded as data space.
1307 			 */
1308 			*mtu = orig_mtu;
1309 		}
1310 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1311 			      + fragheaderlen - sizeof(struct frag_hdr);
1312 	}
1313 }
1314 
ip6_setup_cork(struct sock * sk,struct inet_cork_full * cork,struct inet6_cork * v6_cork,struct ipcm6_cookie * ipc6,struct rt6_info * rt,struct flowi6 * fl6)1315 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1316 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1317 			  struct rt6_info *rt, struct flowi6 *fl6)
1318 {
1319 	struct ipv6_pinfo *np = inet6_sk(sk);
1320 	unsigned int mtu;
1321 	struct ipv6_txoptions *opt = ipc6->opt;
1322 
1323 	/*
1324 	 * setup for corking
1325 	 */
1326 	if (opt) {
1327 		if (WARN_ON(v6_cork->opt))
1328 			return -EINVAL;
1329 
1330 		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1331 		if (unlikely(!v6_cork->opt))
1332 			return -ENOBUFS;
1333 
1334 		v6_cork->opt->tot_len = sizeof(*opt);
1335 		v6_cork->opt->opt_flen = opt->opt_flen;
1336 		v6_cork->opt->opt_nflen = opt->opt_nflen;
1337 
1338 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1339 						    sk->sk_allocation);
1340 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1341 			return -ENOBUFS;
1342 
1343 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1344 						    sk->sk_allocation);
1345 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1346 			return -ENOBUFS;
1347 
1348 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1349 						   sk->sk_allocation);
1350 		if (opt->hopopt && !v6_cork->opt->hopopt)
1351 			return -ENOBUFS;
1352 
1353 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1354 						    sk->sk_allocation);
1355 		if (opt->srcrt && !v6_cork->opt->srcrt)
1356 			return -ENOBUFS;
1357 
1358 		/* need source address above miyazawa*/
1359 	}
1360 	dst_hold(&rt->dst);
1361 	cork->base.dst = &rt->dst;
1362 	cork->fl.u.ip6 = *fl6;
1363 	v6_cork->hop_limit = ipc6->hlimit;
1364 	v6_cork->tclass = ipc6->tclass;
1365 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1366 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1367 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1368 	else
1369 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1370 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1371 	if (np->frag_size < mtu) {
1372 		if (np->frag_size)
1373 			mtu = np->frag_size;
1374 	}
1375 	cork->base.fragsize = mtu;
1376 	cork->base.gso_size = ipc6->gso_size;
1377 	cork->base.tx_flags = 0;
1378 	cork->base.mark = ipc6->sockc.mark;
1379 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1380 
1381 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1382 		cork->base.flags |= IPCORK_ALLFRAG;
1383 	cork->base.length = 0;
1384 
1385 	cork->base.transmit_time = ipc6->sockc.transmit_time;
1386 
1387 	return 0;
1388 }
1389 
__ip6_append_data(struct sock * sk,struct flowi6 * fl6,struct sk_buff_head * queue,struct inet_cork * cork,struct inet6_cork * v6_cork,struct page_frag * pfrag,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,unsigned int flags,struct ipcm6_cookie * ipc6)1390 static int __ip6_append_data(struct sock *sk,
1391 			     struct flowi6 *fl6,
1392 			     struct sk_buff_head *queue,
1393 			     struct inet_cork *cork,
1394 			     struct inet6_cork *v6_cork,
1395 			     struct page_frag *pfrag,
1396 			     int getfrag(void *from, char *to, int offset,
1397 					 int len, int odd, struct sk_buff *skb),
1398 			     void *from, int length, int transhdrlen,
1399 			     unsigned int flags, struct ipcm6_cookie *ipc6)
1400 {
1401 	struct sk_buff *skb, *skb_prev = NULL;
1402 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1403 	struct ubuf_info *uarg = NULL;
1404 	int exthdrlen = 0;
1405 	int dst_exthdrlen = 0;
1406 	int hh_len;
1407 	int copy;
1408 	int err;
1409 	int offset = 0;
1410 	u32 tskey = 0;
1411 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1412 	struct ipv6_txoptions *opt = v6_cork->opt;
1413 	int csummode = CHECKSUM_NONE;
1414 	unsigned int maxnonfragsize, headersize;
1415 	unsigned int wmem_alloc_delta = 0;
1416 	bool paged, extra_uref = false;
1417 
1418 	skb = skb_peek_tail(queue);
1419 	if (!skb) {
1420 		exthdrlen = opt ? opt->opt_flen : 0;
1421 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1422 	}
1423 
1424 	paged = !!cork->gso_size;
1425 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1426 	orig_mtu = mtu;
1427 
1428 	if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
1429 	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1430 		tskey = sk->sk_tskey++;
1431 
1432 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1433 
1434 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1435 			(opt ? opt->opt_nflen : 0);
1436 
1437 	headersize = sizeof(struct ipv6hdr) +
1438 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1439 		     (dst_allfrag(&rt->dst) ?
1440 		      sizeof(struct frag_hdr) : 0) +
1441 		     rt->rt6i_nfheader_len;
1442 
1443 	if (mtu <= fragheaderlen ||
1444 	    ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1445 		goto emsgsize;
1446 
1447 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1448 		     sizeof(struct frag_hdr);
1449 
1450 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1451 	 * the first fragment
1452 	 */
1453 	if (headersize + transhdrlen > mtu)
1454 		goto emsgsize;
1455 
1456 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1457 	    (sk->sk_protocol == IPPROTO_UDP ||
1458 	     sk->sk_protocol == IPPROTO_RAW)) {
1459 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1460 				sizeof(struct ipv6hdr));
1461 		goto emsgsize;
1462 	}
1463 
1464 	if (ip6_sk_ignore_df(sk))
1465 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1466 	else
1467 		maxnonfragsize = mtu;
1468 
1469 	if (cork->length + length > maxnonfragsize - headersize) {
1470 emsgsize:
1471 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1472 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1473 		return -EMSGSIZE;
1474 	}
1475 
1476 	/* CHECKSUM_PARTIAL only with no extension headers and when
1477 	 * we are not going to fragment
1478 	 */
1479 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1480 	    headersize == sizeof(struct ipv6hdr) &&
1481 	    length <= mtu - headersize &&
1482 	    (!(flags & MSG_MORE) || cork->gso_size) &&
1483 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1484 		csummode = CHECKSUM_PARTIAL;
1485 
1486 	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1487 		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1488 		if (!uarg)
1489 			return -ENOBUFS;
1490 		extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
1491 		if (rt->dst.dev->features & NETIF_F_SG &&
1492 		    csummode == CHECKSUM_PARTIAL) {
1493 			paged = true;
1494 		} else {
1495 			uarg->zerocopy = 0;
1496 			skb_zcopy_set(skb, uarg, &extra_uref);
1497 		}
1498 	}
1499 
1500 	/*
1501 	 * Let's try using as much space as possible.
1502 	 * Use MTU if total length of the message fits into the MTU.
1503 	 * Otherwise, we need to reserve fragment header and
1504 	 * fragment alignment (= 8-15 octects, in total).
1505 	 *
1506 	 * Note that we may need to "move" the data from the tail of
1507 	 * of the buffer to the new fragment when we split
1508 	 * the message.
1509 	 *
1510 	 * FIXME: It may be fragmented into multiple chunks
1511 	 *        at once if non-fragmentable extension headers
1512 	 *        are too large.
1513 	 * --yoshfuji
1514 	 */
1515 
1516 	cork->length += length;
1517 	if (!skb)
1518 		goto alloc_new_skb;
1519 
1520 	while (length > 0) {
1521 		/* Check if the remaining data fits into current packet. */
1522 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1523 		if (copy < length)
1524 			copy = maxfraglen - skb->len;
1525 
1526 		if (copy <= 0) {
1527 			char *data;
1528 			unsigned int datalen;
1529 			unsigned int fraglen;
1530 			unsigned int fraggap;
1531 			unsigned int alloclen, alloc_extra;
1532 			unsigned int pagedlen;
1533 alloc_new_skb:
1534 			/* There's no room in the current skb */
1535 			if (skb)
1536 				fraggap = skb->len - maxfraglen;
1537 			else
1538 				fraggap = 0;
1539 			/* update mtu and maxfraglen if necessary */
1540 			if (!skb || !skb_prev)
1541 				ip6_append_data_mtu(&mtu, &maxfraglen,
1542 						    fragheaderlen, skb, rt,
1543 						    orig_mtu);
1544 
1545 			skb_prev = skb;
1546 
1547 			/*
1548 			 * If remaining data exceeds the mtu,
1549 			 * we know we need more fragment(s).
1550 			 */
1551 			datalen = length + fraggap;
1552 
1553 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1554 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1555 			fraglen = datalen + fragheaderlen;
1556 			pagedlen = 0;
1557 
1558 			alloc_extra = hh_len;
1559 			alloc_extra += dst_exthdrlen;
1560 			alloc_extra += rt->dst.trailer_len;
1561 
1562 			/* We just reserve space for fragment header.
1563 			 * Note: this may be overallocation if the message
1564 			 * (without MSG_MORE) fits into the MTU.
1565 			 */
1566 			alloc_extra += sizeof(struct frag_hdr);
1567 
1568 			if ((flags & MSG_MORE) &&
1569 			    !(rt->dst.dev->features&NETIF_F_SG))
1570 				alloclen = mtu;
1571 			else if (!paged &&
1572 				 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1573 				  !(rt->dst.dev->features & NETIF_F_SG)))
1574 				alloclen = fraglen;
1575 			else {
1576 				alloclen = min_t(int, fraglen, MAX_HEADER);
1577 				pagedlen = fraglen - alloclen;
1578 			}
1579 			alloclen += alloc_extra;
1580 
1581 			if (datalen != length + fraggap) {
1582 				/*
1583 				 * this is not the last fragment, the trailer
1584 				 * space is regarded as data space.
1585 				 */
1586 				datalen += rt->dst.trailer_len;
1587 			}
1588 
1589 			fraglen = datalen + fragheaderlen;
1590 
1591 			copy = datalen - transhdrlen - fraggap - pagedlen;
1592 			if (copy < 0) {
1593 				err = -EINVAL;
1594 				goto error;
1595 			}
1596 			if (transhdrlen) {
1597 				skb = sock_alloc_send_skb(sk, alloclen,
1598 						(flags & MSG_DONTWAIT), &err);
1599 			} else {
1600 				skb = NULL;
1601 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1602 				    2 * sk->sk_sndbuf)
1603 					skb = alloc_skb(alloclen,
1604 							sk->sk_allocation);
1605 				if (unlikely(!skb))
1606 					err = -ENOBUFS;
1607 			}
1608 			if (!skb)
1609 				goto error;
1610 			/*
1611 			 *	Fill in the control structures
1612 			 */
1613 			skb->protocol = htons(ETH_P_IPV6);
1614 			skb->ip_summed = csummode;
1615 			skb->csum = 0;
1616 			/* reserve for fragmentation and ipsec header */
1617 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1618 				    dst_exthdrlen);
1619 
1620 			/*
1621 			 *	Find where to start putting bytes
1622 			 */
1623 			data = skb_put(skb, fraglen - pagedlen);
1624 			skb_set_network_header(skb, exthdrlen);
1625 			data += fragheaderlen;
1626 			skb->transport_header = (skb->network_header +
1627 						 fragheaderlen);
1628 			if (fraggap) {
1629 				skb->csum = skb_copy_and_csum_bits(
1630 					skb_prev, maxfraglen,
1631 					data + transhdrlen, fraggap, 0);
1632 				skb_prev->csum = csum_sub(skb_prev->csum,
1633 							  skb->csum);
1634 				data += fraggap;
1635 				pskb_trim_unique(skb_prev, maxfraglen);
1636 			}
1637 			if (copy > 0 &&
1638 			    getfrag(from, data + transhdrlen, offset,
1639 				    copy, fraggap, skb) < 0) {
1640 				err = -EFAULT;
1641 				kfree_skb(skb);
1642 				goto error;
1643 			}
1644 
1645 			offset += copy;
1646 			length -= copy + transhdrlen;
1647 			transhdrlen = 0;
1648 			exthdrlen = 0;
1649 			dst_exthdrlen = 0;
1650 
1651 			/* Only the initial fragment is time stamped */
1652 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1653 			cork->tx_flags = 0;
1654 			skb_shinfo(skb)->tskey = tskey;
1655 			tskey = 0;
1656 			skb_zcopy_set(skb, uarg, &extra_uref);
1657 
1658 			if ((flags & MSG_CONFIRM) && !skb_prev)
1659 				skb_set_dst_pending_confirm(skb, 1);
1660 
1661 			/*
1662 			 * Put the packet on the pending queue
1663 			 */
1664 			if (!skb->destructor) {
1665 				skb->destructor = sock_wfree;
1666 				skb->sk = sk;
1667 				wmem_alloc_delta += skb->truesize;
1668 			}
1669 			__skb_queue_tail(queue, skb);
1670 			continue;
1671 		}
1672 
1673 		if (copy > length)
1674 			copy = length;
1675 
1676 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1677 		    skb_tailroom(skb) >= copy) {
1678 			unsigned int off;
1679 
1680 			off = skb->len;
1681 			if (getfrag(from, skb_put(skb, copy),
1682 						offset, copy, off, skb) < 0) {
1683 				__skb_trim(skb, off);
1684 				err = -EFAULT;
1685 				goto error;
1686 			}
1687 		} else if (!uarg || !uarg->zerocopy) {
1688 			int i = skb_shinfo(skb)->nr_frags;
1689 
1690 			err = -ENOMEM;
1691 			if (!sk_page_frag_refill(sk, pfrag))
1692 				goto error;
1693 
1694 			if (!skb_can_coalesce(skb, i, pfrag->page,
1695 					      pfrag->offset)) {
1696 				err = -EMSGSIZE;
1697 				if (i == MAX_SKB_FRAGS)
1698 					goto error;
1699 
1700 				__skb_fill_page_desc(skb, i, pfrag->page,
1701 						     pfrag->offset, 0);
1702 				skb_shinfo(skb)->nr_frags = ++i;
1703 				get_page(pfrag->page);
1704 			}
1705 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1706 			if (getfrag(from,
1707 				    page_address(pfrag->page) + pfrag->offset,
1708 				    offset, copy, skb->len, skb) < 0)
1709 				goto error_efault;
1710 
1711 			pfrag->offset += copy;
1712 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1713 			skb->len += copy;
1714 			skb->data_len += copy;
1715 			skb->truesize += copy;
1716 			wmem_alloc_delta += copy;
1717 		} else {
1718 			err = skb_zerocopy_iter_dgram(skb, from, copy);
1719 			if (err < 0)
1720 				goto error;
1721 		}
1722 		offset += copy;
1723 		length -= copy;
1724 	}
1725 
1726 	if (wmem_alloc_delta)
1727 		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1728 	return 0;
1729 
1730 error_efault:
1731 	err = -EFAULT;
1732 error:
1733 	if (uarg)
1734 		sock_zerocopy_put_abort(uarg, extra_uref);
1735 	cork->length -= length;
1736 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1737 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1738 	return err;
1739 }
1740 
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags)1741 int ip6_append_data(struct sock *sk,
1742 		    int getfrag(void *from, char *to, int offset, int len,
1743 				int odd, struct sk_buff *skb),
1744 		    void *from, int length, int transhdrlen,
1745 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1746 		    struct rt6_info *rt, unsigned int flags)
1747 {
1748 	struct inet_sock *inet = inet_sk(sk);
1749 	struct ipv6_pinfo *np = inet6_sk(sk);
1750 	int exthdrlen;
1751 	int err;
1752 
1753 	if (flags&MSG_PROBE)
1754 		return 0;
1755 	if (skb_queue_empty(&sk->sk_write_queue)) {
1756 		/*
1757 		 * setup for corking
1758 		 */
1759 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1760 				     ipc6, rt, fl6);
1761 		if (err)
1762 			return err;
1763 
1764 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1765 		length += exthdrlen;
1766 		transhdrlen += exthdrlen;
1767 	} else {
1768 		fl6 = &inet->cork.fl.u.ip6;
1769 		transhdrlen = 0;
1770 	}
1771 
1772 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1773 				 &np->cork, sk_page_frag(sk), getfrag,
1774 				 from, length, transhdrlen, flags, ipc6);
1775 }
1776 EXPORT_SYMBOL_GPL(ip6_append_data);
1777 
ip6_cork_release(struct inet_cork_full * cork,struct inet6_cork * v6_cork)1778 static void ip6_cork_release(struct inet_cork_full *cork,
1779 			     struct inet6_cork *v6_cork)
1780 {
1781 	if (v6_cork->opt) {
1782 		kfree(v6_cork->opt->dst0opt);
1783 		kfree(v6_cork->opt->dst1opt);
1784 		kfree(v6_cork->opt->hopopt);
1785 		kfree(v6_cork->opt->srcrt);
1786 		kfree(v6_cork->opt);
1787 		v6_cork->opt = NULL;
1788 	}
1789 
1790 	if (cork->base.dst) {
1791 		dst_release(cork->base.dst);
1792 		cork->base.dst = NULL;
1793 		cork->base.flags &= ~IPCORK_ALLFRAG;
1794 	}
1795 	memset(&cork->fl, 0, sizeof(cork->fl));
1796 }
1797 
__ip6_make_skb(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1798 struct sk_buff *__ip6_make_skb(struct sock *sk,
1799 			       struct sk_buff_head *queue,
1800 			       struct inet_cork_full *cork,
1801 			       struct inet6_cork *v6_cork)
1802 {
1803 	struct sk_buff *skb, *tmp_skb;
1804 	struct sk_buff **tail_skb;
1805 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1806 	struct ipv6_pinfo *np = inet6_sk(sk);
1807 	struct net *net = sock_net(sk);
1808 	struct ipv6hdr *hdr;
1809 	struct ipv6_txoptions *opt = v6_cork->opt;
1810 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1811 	struct flowi6 *fl6 = &cork->fl.u.ip6;
1812 	unsigned char proto = fl6->flowi6_proto;
1813 
1814 	skb = __skb_dequeue(queue);
1815 	if (!skb)
1816 		goto out;
1817 	tail_skb = &(skb_shinfo(skb)->frag_list);
1818 
1819 	/* move skb->data to ip header from ext header */
1820 	if (skb->data < skb_network_header(skb))
1821 		__skb_pull(skb, skb_network_offset(skb));
1822 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1823 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1824 		*tail_skb = tmp_skb;
1825 		tail_skb = &(tmp_skb->next);
1826 		skb->len += tmp_skb->len;
1827 		skb->data_len += tmp_skb->len;
1828 		skb->truesize += tmp_skb->truesize;
1829 		tmp_skb->destructor = NULL;
1830 		tmp_skb->sk = NULL;
1831 	}
1832 
1833 	/* Allow local fragmentation. */
1834 	skb->ignore_df = ip6_sk_ignore_df(sk);
1835 
1836 	*final_dst = fl6->daddr;
1837 	__skb_pull(skb, skb_network_header_len(skb));
1838 	if (opt && opt->opt_flen)
1839 		ipv6_push_frag_opts(skb, opt, &proto);
1840 	if (opt && opt->opt_nflen)
1841 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1842 
1843 	skb_push(skb, sizeof(struct ipv6hdr));
1844 	skb_reset_network_header(skb);
1845 	hdr = ipv6_hdr(skb);
1846 
1847 	ip6_flow_hdr(hdr, v6_cork->tclass,
1848 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1849 					ip6_autoflowlabel(net, np), fl6));
1850 	hdr->hop_limit = v6_cork->hop_limit;
1851 	hdr->nexthdr = proto;
1852 	hdr->saddr = fl6->saddr;
1853 	hdr->daddr = *final_dst;
1854 
1855 	skb->priority = sk->sk_priority;
1856 	skb->mark = cork->base.mark;
1857 
1858 	skb->tstamp = cork->base.transmit_time;
1859 
1860 	skb_dst_set(skb, dst_clone(&rt->dst));
1861 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1862 	if (proto == IPPROTO_ICMPV6) {
1863 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1864 		u8 icmp6_type;
1865 
1866 		if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
1867 			icmp6_type = fl6->fl6_icmp_type;
1868 		else
1869 			icmp6_type = icmp6_hdr(skb)->icmp6_type;
1870 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
1871 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1872 	}
1873 
1874 	ip6_cork_release(cork, v6_cork);
1875 out:
1876 	return skb;
1877 }
1878 
ip6_send_skb(struct sk_buff * skb)1879 int ip6_send_skb(struct sk_buff *skb)
1880 {
1881 	struct net *net = sock_net(skb->sk);
1882 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1883 	int err;
1884 
1885 	err = ip6_local_out(net, skb->sk, skb);
1886 	if (err) {
1887 		if (err > 0)
1888 			err = net_xmit_errno(err);
1889 		if (err)
1890 			IP6_INC_STATS(net, rt->rt6i_idev,
1891 				      IPSTATS_MIB_OUTDISCARDS);
1892 	}
1893 
1894 	return err;
1895 }
1896 
ip6_push_pending_frames(struct sock * sk)1897 int ip6_push_pending_frames(struct sock *sk)
1898 {
1899 	struct sk_buff *skb;
1900 
1901 	skb = ip6_finish_skb(sk);
1902 	if (!skb)
1903 		return 0;
1904 
1905 	return ip6_send_skb(skb);
1906 }
1907 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1908 
__ip6_flush_pending_frames(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1909 static void __ip6_flush_pending_frames(struct sock *sk,
1910 				       struct sk_buff_head *queue,
1911 				       struct inet_cork_full *cork,
1912 				       struct inet6_cork *v6_cork)
1913 {
1914 	struct sk_buff *skb;
1915 
1916 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1917 		if (skb_dst(skb))
1918 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1919 				      IPSTATS_MIB_OUTDISCARDS);
1920 		kfree_skb(skb);
1921 	}
1922 
1923 	ip6_cork_release(cork, v6_cork);
1924 }
1925 
ip6_flush_pending_frames(struct sock * sk)1926 void ip6_flush_pending_frames(struct sock *sk)
1927 {
1928 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1929 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1930 }
1931 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1932 
ip6_make_skb(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,struct inet_cork_full * cork)1933 struct sk_buff *ip6_make_skb(struct sock *sk,
1934 			     int getfrag(void *from, char *to, int offset,
1935 					 int len, int odd, struct sk_buff *skb),
1936 			     void *from, int length, int transhdrlen,
1937 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1938 			     struct rt6_info *rt, unsigned int flags,
1939 			     struct inet_cork_full *cork)
1940 {
1941 	struct inet6_cork v6_cork;
1942 	struct sk_buff_head queue;
1943 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1944 	int err;
1945 
1946 	if (flags & MSG_PROBE)
1947 		return NULL;
1948 
1949 	__skb_queue_head_init(&queue);
1950 
1951 	cork->base.flags = 0;
1952 	cork->base.addr = 0;
1953 	cork->base.opt = NULL;
1954 	cork->base.dst = NULL;
1955 	v6_cork.opt = NULL;
1956 	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1957 	if (err) {
1958 		ip6_cork_release(cork, &v6_cork);
1959 		return ERR_PTR(err);
1960 	}
1961 	if (ipc6->dontfrag < 0)
1962 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1963 
1964 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1965 				&current->task_frag, getfrag, from,
1966 				length + exthdrlen, transhdrlen + exthdrlen,
1967 				flags, ipc6);
1968 	if (err) {
1969 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1970 		return ERR_PTR(err);
1971 	}
1972 
1973 	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1974 }
1975