• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	IPv6 output functions
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on linux/net/ipv4/ip_output.c
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *	Changes:
16  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
17  *				extension headers are implemented.
18  *				route changes now work.
19  *				ip6_forward does not confuse sniffers.
20  *				etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *	Imran Patel	:	frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *			:       add ip6_append_data and related functions
26  *				for datagram xmit
27  */
28 
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41 
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44 
45 #include <net/sock.h>
46 #include <net/snmp.h>
47 
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58 
ip6_finish_output2(struct sk_buff * skb)59 static int ip6_finish_output2(struct sk_buff *skb)
60 {
61 	struct dst_entry *dst = skb_dst(skb);
62 	struct net_device *dev = dst->dev;
63 	struct neighbour *neigh;
64 	struct in6_addr *nexthop;
65 	int ret;
66 
67 	skb->protocol = htons(ETH_P_IPV6);
68 	skb->dev = dev;
69 
70 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72 
73 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
74 		    ((mroute6_socket(dev_net(dev), skb) &&
75 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 					 &ipv6_hdr(skb)->saddr))) {
78 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79 
80 			/* Do not check for IFF_ALLMULTI; multicast routing
81 			   is not supported in any case.
82 			 */
83 			if (newskb)
84 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 					newskb, NULL, newskb->dev,
86 					dev_loopback_xmit);
87 
88 			if (ipv6_hdr(skb)->hop_limit == 0) {
89 				IP6_INC_STATS(dev_net(dev), idev,
90 					      IPSTATS_MIB_OUTDISCARDS);
91 				kfree_skb(skb);
92 				return 0;
93 			}
94 		}
95 
96 		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97 				skb->len);
98 
99 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 		    IPV6_ADDR_SCOPE_NODELOCAL &&
101 		    !(dev->flags & IFF_LOOPBACK)) {
102 			kfree_skb(skb);
103 			return 0;
104 		}
105 	}
106 
107 	rcu_read_lock_bh();
108 	nexthop = rt6_nexthop((struct rt6_info *)dst);
109 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 	if (unlikely(!neigh))
111 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 	if (!IS_ERR(neigh)) {
113 		ret = dst_neigh_output(dst, neigh, skb);
114 		rcu_read_unlock_bh();
115 		return ret;
116 	}
117 	rcu_read_unlock_bh();
118 
119 	IP6_INC_STATS(dev_net(dst->dev),
120 		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121 	kfree_skb(skb);
122 	return -EINVAL;
123 }
124 
ip6_finish_output(struct sk_buff * skb)125 static int ip6_finish_output(struct sk_buff *skb)
126 {
127 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128 	    dst_allfrag(skb_dst(skb)) ||
129 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
130 		return ip6_fragment(skb, ip6_finish_output2);
131 	else
132 		return ip6_finish_output2(skb);
133 }
134 
ip6_output(struct sock * sk,struct sk_buff * skb)135 int ip6_output(struct sock *sk, struct sk_buff *skb)
136 {
137 	struct net_device *dev = skb_dst(skb)->dev;
138 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
139 	if (unlikely(idev->cnf.disable_ipv6)) {
140 		IP6_INC_STATS(dev_net(dev), idev,
141 			      IPSTATS_MIB_OUTDISCARDS);
142 		kfree_skb(skb);
143 		return 0;
144 	}
145 
146 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
147 			    ip6_finish_output,
148 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
149 }
150 
151 /*
152  *	xmit an sk_buff (used by TCP, SCTP and DCCP)
153  */
154 
ip6_xmit(struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,struct ipv6_txoptions * opt,int tclass)155 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
156 	     struct ipv6_txoptions *opt, int tclass)
157 {
158 	struct net *net = sock_net(sk);
159 	struct ipv6_pinfo *np = inet6_sk(sk);
160 	struct in6_addr *first_hop = &fl6->daddr;
161 	struct dst_entry *dst = skb_dst(skb);
162 	struct ipv6hdr *hdr;
163 	u8  proto = fl6->flowi6_proto;
164 	int seg_len = skb->len;
165 	int hlimit = -1;
166 	u32 mtu;
167 
168 	if (opt) {
169 		unsigned int head_room;
170 
171 		/* First: exthdrs may take lots of space (~8K for now)
172 		   MAX_HEADER is not enough.
173 		 */
174 		head_room = opt->opt_nflen + opt->opt_flen;
175 		seg_len += head_room;
176 		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177 
178 		if (skb_headroom(skb) < head_room) {
179 			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
180 			if (skb2 == NULL) {
181 				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
182 					      IPSTATS_MIB_OUTDISCARDS);
183 				kfree_skb(skb);
184 				return -ENOBUFS;
185 			}
186 			consume_skb(skb);
187 			skb = skb2;
188 			skb_set_owner_w(skb, sk);
189 		}
190 		if (opt->opt_flen)
191 			ipv6_push_frag_opts(skb, opt, &proto);
192 		if (opt->opt_nflen)
193 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194 	}
195 
196 	skb_push(skb, sizeof(struct ipv6hdr));
197 	skb_reset_network_header(skb);
198 	hdr = ipv6_hdr(skb);
199 
200 	/*
201 	 *	Fill in the IPv6 header
202 	 */
203 	if (np)
204 		hlimit = np->hop_limit;
205 	if (hlimit < 0)
206 		hlimit = ip6_dst_hoplimit(dst);
207 
208 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
209 						     np->autoflowlabel));
210 
211 	hdr->payload_len = htons(seg_len);
212 	hdr->nexthdr = proto;
213 	hdr->hop_limit = hlimit;
214 
215 	hdr->saddr = fl6->saddr;
216 	hdr->daddr = *first_hop;
217 
218 	skb->protocol = htons(ETH_P_IPV6);
219 	skb->priority = sk->sk_priority;
220 	skb->mark = sk->sk_mark;
221 
222 	mtu = dst_mtu(dst);
223 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
224 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
225 			      IPSTATS_MIB_OUT, skb->len);
226 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
227 			       dst->dev, dst_output);
228 	}
229 
230 	skb->dev = dst->dev;
231 	ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
232 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
233 	kfree_skb(skb);
234 	return -EMSGSIZE;
235 }
236 EXPORT_SYMBOL(ip6_xmit);
237 
ip6_call_ra_chain(struct sk_buff * skb,int sel)238 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
239 {
240 	struct ip6_ra_chain *ra;
241 	struct sock *last = NULL;
242 
243 	read_lock(&ip6_ra_lock);
244 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
245 		struct sock *sk = ra->sk;
246 		if (sk && ra->sel == sel &&
247 		    (!sk->sk_bound_dev_if ||
248 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
249 			if (last) {
250 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
251 				if (skb2)
252 					rawv6_rcv(last, skb2);
253 			}
254 			last = sk;
255 		}
256 	}
257 
258 	if (last) {
259 		rawv6_rcv(last, skb);
260 		read_unlock(&ip6_ra_lock);
261 		return 1;
262 	}
263 	read_unlock(&ip6_ra_lock);
264 	return 0;
265 }
266 
ip6_forward_proxy_check(struct sk_buff * skb)267 static int ip6_forward_proxy_check(struct sk_buff *skb)
268 {
269 	struct ipv6hdr *hdr = ipv6_hdr(skb);
270 	u8 nexthdr = hdr->nexthdr;
271 	__be16 frag_off;
272 	int offset;
273 
274 	if (ipv6_ext_hdr(nexthdr)) {
275 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
276 		if (offset < 0)
277 			return 0;
278 	} else
279 		offset = sizeof(struct ipv6hdr);
280 
281 	if (nexthdr == IPPROTO_ICMPV6) {
282 		struct icmp6hdr *icmp6;
283 
284 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
285 					 offset + 1 - skb->data)))
286 			return 0;
287 
288 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
289 
290 		switch (icmp6->icmp6_type) {
291 		case NDISC_ROUTER_SOLICITATION:
292 		case NDISC_ROUTER_ADVERTISEMENT:
293 		case NDISC_NEIGHBOUR_SOLICITATION:
294 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
295 		case NDISC_REDIRECT:
296 			/* For reaction involving unicast neighbor discovery
297 			 * message destined to the proxied address, pass it to
298 			 * input function.
299 			 */
300 			return 1;
301 		default:
302 			break;
303 		}
304 	}
305 
306 	/*
307 	 * The proxying router can't forward traffic sent to a link-local
308 	 * address, so signal the sender and discard the packet. This
309 	 * behavior is clarified by the MIPv6 specification.
310 	 */
311 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
312 		dst_link_failure(skb);
313 		return -1;
314 	}
315 
316 	return 0;
317 }
318 
ip6_forward_finish(struct sk_buff * skb)319 static inline int ip6_forward_finish(struct sk_buff *skb)
320 {
321 	return dst_output(skb);
322 }
323 
ip6_dst_mtu_forward(const struct dst_entry * dst)324 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
325 {
326 	unsigned int mtu;
327 	struct inet6_dev *idev;
328 
329 	if (dst_metric_locked(dst, RTAX_MTU)) {
330 		mtu = dst_metric_raw(dst, RTAX_MTU);
331 		if (mtu)
332 			return mtu;
333 	}
334 
335 	mtu = IPV6_MIN_MTU;
336 	rcu_read_lock();
337 	idev = __in6_dev_get(dst->dev);
338 	if (idev)
339 		mtu = idev->cnf.mtu6;
340 	rcu_read_unlock();
341 
342 	return mtu;
343 }
344 
ip6_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)345 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
346 {
347 	if (skb->len <= mtu)
348 		return false;
349 
350 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
351 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
352 		return true;
353 
354 	if (skb->ignore_df)
355 		return false;
356 
357 	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
358 		return false;
359 
360 	return true;
361 }
362 
ip6_forward(struct sk_buff * skb)363 int ip6_forward(struct sk_buff *skb)
364 {
365 	struct dst_entry *dst = skb_dst(skb);
366 	struct ipv6hdr *hdr = ipv6_hdr(skb);
367 	struct inet6_skb_parm *opt = IP6CB(skb);
368 	struct net *net = dev_net(dst->dev);
369 	u32 mtu;
370 
371 	if (net->ipv6.devconf_all->forwarding == 0)
372 		goto error;
373 
374 	if (skb->pkt_type != PACKET_HOST)
375 		goto drop;
376 
377 	if (unlikely(skb->sk))
378 		goto drop;
379 
380 	if (skb_warn_if_lro(skb))
381 		goto drop;
382 
383 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
384 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
385 				 IPSTATS_MIB_INDISCARDS);
386 		goto drop;
387 	}
388 
389 	skb_forward_csum(skb);
390 
391 	/*
392 	 *	We DO NOT make any processing on
393 	 *	RA packets, pushing them to user level AS IS
394 	 *	without ane WARRANTY that application will be able
395 	 *	to interpret them. The reason is that we
396 	 *	cannot make anything clever here.
397 	 *
398 	 *	We are not end-node, so that if packet contains
399 	 *	AH/ESP, we cannot make anything.
400 	 *	Defragmentation also would be mistake, RA packets
401 	 *	cannot be fragmented, because there is no warranty
402 	 *	that different fragments will go along one path. --ANK
403 	 */
404 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
405 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
406 			return 0;
407 	}
408 
409 	/*
410 	 *	check and decrement ttl
411 	 */
412 	if (hdr->hop_limit <= 1) {
413 		/* Force OUTPUT device used as source address */
414 		skb->dev = dst->dev;
415 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
416 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
417 				 IPSTATS_MIB_INHDRERRORS);
418 
419 		kfree_skb(skb);
420 		return -ETIMEDOUT;
421 	}
422 
423 	/* XXX: idev->cnf.proxy_ndp? */
424 	if (net->ipv6.devconf_all->proxy_ndp &&
425 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
426 		int proxied = ip6_forward_proxy_check(skb);
427 		if (proxied > 0)
428 			return ip6_input(skb);
429 		else if (proxied < 0) {
430 			IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
431 					 IPSTATS_MIB_INDISCARDS);
432 			goto drop;
433 		}
434 	}
435 
436 	if (!xfrm6_route_forward(skb)) {
437 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
438 				 IPSTATS_MIB_INDISCARDS);
439 		goto drop;
440 	}
441 	dst = skb_dst(skb);
442 
443 	/* IPv6 specs say nothing about it, but it is clear that we cannot
444 	   send redirects to source routed frames.
445 	   We don't send redirects to frames decapsulated from IPsec.
446 	 */
447 	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
448 		struct in6_addr *target = NULL;
449 		struct inet_peer *peer;
450 		struct rt6_info *rt;
451 
452 		/*
453 		 *	incoming and outgoing devices are the same
454 		 *	send a redirect.
455 		 */
456 
457 		rt = (struct rt6_info *) dst;
458 		if (rt->rt6i_flags & RTF_GATEWAY)
459 			target = &rt->rt6i_gateway;
460 		else
461 			target = &hdr->daddr;
462 
463 		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
464 
465 		/* Limit redirects both by destination (here)
466 		   and by source (inside ndisc_send_redirect)
467 		 */
468 		if (inet_peer_xrlim_allow(peer, 1*HZ))
469 			ndisc_send_redirect(skb, target);
470 		if (peer)
471 			inet_putpeer(peer);
472 	} else {
473 		int addrtype = ipv6_addr_type(&hdr->saddr);
474 
475 		/* This check is security critical. */
476 		if (addrtype == IPV6_ADDR_ANY ||
477 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
478 			goto error;
479 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
480 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
481 				    ICMPV6_NOT_NEIGHBOUR, 0);
482 			goto error;
483 		}
484 	}
485 
486 	mtu = ip6_dst_mtu_forward(dst);
487 	if (mtu < IPV6_MIN_MTU)
488 		mtu = IPV6_MIN_MTU;
489 
490 	if (ip6_pkt_too_big(skb, mtu)) {
491 		/* Again, force OUTPUT device used as source address */
492 		skb->dev = dst->dev;
493 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
494 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
495 				 IPSTATS_MIB_INTOOBIGERRORS);
496 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
497 				 IPSTATS_MIB_FRAGFAILS);
498 		kfree_skb(skb);
499 		return -EMSGSIZE;
500 	}
501 
502 	if (skb_cow(skb, dst->dev->hard_header_len)) {
503 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
504 				 IPSTATS_MIB_OUTDISCARDS);
505 		goto drop;
506 	}
507 
508 	hdr = ipv6_hdr(skb);
509 
510 	/* Mangling hops number delayed to point after skb COW */
511 
512 	hdr->hop_limit--;
513 
514 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
515 	IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
516 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
517 		       ip6_forward_finish);
518 
519 error:
520 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
521 drop:
522 	kfree_skb(skb);
523 	return -EINVAL;
524 }
525 
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)526 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
527 {
528 	to->pkt_type = from->pkt_type;
529 	to->priority = from->priority;
530 	to->protocol = from->protocol;
531 	skb_dst_drop(to);
532 	skb_dst_set(to, dst_clone(skb_dst(from)));
533 	to->dev = from->dev;
534 	to->mark = from->mark;
535 
536 #ifdef CONFIG_NET_SCHED
537 	to->tc_index = from->tc_index;
538 #endif
539 	nf_copy(to, from);
540 	skb_copy_secmark(to, from);
541 }
542 
ipv6_select_ident(struct frag_hdr * fhdr,struct rt6_info * rt)543 static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
544 {
545 	static u32 ip6_idents_hashrnd __read_mostly;
546 	u32 hash, id;
547 
548 	net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
549 
550 	hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
551 	hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
552 
553 	id = ip_idents_reserve(hash, 1);
554 	fhdr->identification = htonl(id);
555 }
556 
ip6_fragment(struct sk_buff * skb,int (* output)(struct sk_buff *))557 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
558 {
559 	struct sk_buff *frag;
560 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
561 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
562 				inet6_sk(skb->sk) : NULL;
563 	struct ipv6hdr *tmp_hdr;
564 	struct frag_hdr *fh;
565 	unsigned int mtu, hlen, left, len;
566 	int hroom, troom;
567 	__be32 frag_id = 0;
568 	int ptr, offset = 0, err = 0;
569 	u8 *prevhdr, nexthdr = 0;
570 	struct net *net = dev_net(skb_dst(skb)->dev);
571 
572 	err = ip6_find_1stfragopt(skb, &prevhdr);
573 	if (err < 0)
574 		goto fail;
575 	hlen = err;
576 	nexthdr = *prevhdr;
577 
578 	mtu = ip6_skb_dst_mtu(skb);
579 
580 	/* We must not fragment if the socket is set to force MTU discovery
581 	 * or if the skb it not generated by a local socket.
582 	 */
583 	if (unlikely(!skb->ignore_df && skb->len > mtu) ||
584 		     (IP6CB(skb)->frag_max_size &&
585 		      IP6CB(skb)->frag_max_size > mtu)) {
586 		if (skb->sk && dst_allfrag(skb_dst(skb)))
587 			sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
588 
589 		skb->dev = skb_dst(skb)->dev;
590 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
591 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
592 			      IPSTATS_MIB_FRAGFAILS);
593 		kfree_skb(skb);
594 		return -EMSGSIZE;
595 	}
596 
597 	if (np && np->frag_size < mtu) {
598 		if (np->frag_size)
599 			mtu = np->frag_size;
600 	}
601 	mtu -= hlen + sizeof(struct frag_hdr);
602 
603 	if (skb_has_frag_list(skb)) {
604 		int first_len = skb_pagelen(skb);
605 		struct sk_buff *frag2;
606 
607 		if (first_len - hlen > mtu ||
608 		    ((first_len - hlen) & 7) ||
609 		    skb_cloned(skb))
610 			goto slow_path;
611 
612 		skb_walk_frags(skb, frag) {
613 			/* Correct geometry. */
614 			if (frag->len > mtu ||
615 			    ((frag->len & 7) && frag->next) ||
616 			    skb_headroom(frag) < hlen)
617 				goto slow_path_clean;
618 
619 			/* Partially cloned skb? */
620 			if (skb_shared(frag))
621 				goto slow_path_clean;
622 
623 			BUG_ON(frag->sk);
624 			if (skb->sk) {
625 				frag->sk = skb->sk;
626 				frag->destructor = sock_wfree;
627 			}
628 			skb->truesize -= frag->truesize;
629 		}
630 
631 		err = 0;
632 		offset = 0;
633 		frag = skb_shinfo(skb)->frag_list;
634 		skb_frag_list_init(skb);
635 		/* BUILD HEADER */
636 
637 		*prevhdr = NEXTHDR_FRAGMENT;
638 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
639 		if (!tmp_hdr) {
640 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
641 				      IPSTATS_MIB_FRAGFAILS);
642 			return -ENOMEM;
643 		}
644 
645 		__skb_pull(skb, hlen);
646 		fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
647 		__skb_push(skb, hlen);
648 		skb_reset_network_header(skb);
649 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
650 
651 		ipv6_select_ident(fh, rt);
652 		fh->nexthdr = nexthdr;
653 		fh->reserved = 0;
654 		fh->frag_off = htons(IP6_MF);
655 		frag_id = fh->identification;
656 
657 		first_len = skb_pagelen(skb);
658 		skb->data_len = first_len - skb_headlen(skb);
659 		skb->len = first_len;
660 		ipv6_hdr(skb)->payload_len = htons(first_len -
661 						   sizeof(struct ipv6hdr));
662 
663 		dst_hold(&rt->dst);
664 
665 		for (;;) {
666 			/* Prepare header of the next frame,
667 			 * before previous one went down. */
668 			if (frag) {
669 				frag->ip_summed = CHECKSUM_NONE;
670 				skb_reset_transport_header(frag);
671 				fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
672 				__skb_push(frag, hlen);
673 				skb_reset_network_header(frag);
674 				memcpy(skb_network_header(frag), tmp_hdr,
675 				       hlen);
676 				offset += skb->len - hlen - sizeof(struct frag_hdr);
677 				fh->nexthdr = nexthdr;
678 				fh->reserved = 0;
679 				fh->frag_off = htons(offset);
680 				if (frag->next != NULL)
681 					fh->frag_off |= htons(IP6_MF);
682 				fh->identification = frag_id;
683 				ipv6_hdr(frag)->payload_len =
684 						htons(frag->len -
685 						      sizeof(struct ipv6hdr));
686 				ip6_copy_metadata(frag, skb);
687 			}
688 
689 			err = output(skb);
690 			if (!err)
691 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
692 					      IPSTATS_MIB_FRAGCREATES);
693 
694 			if (err || !frag)
695 				break;
696 
697 			skb = frag;
698 			frag = skb->next;
699 			skb->next = NULL;
700 		}
701 
702 		kfree(tmp_hdr);
703 
704 		if (err == 0) {
705 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
706 				      IPSTATS_MIB_FRAGOKS);
707 			ip6_rt_put(rt);
708 			return 0;
709 		}
710 
711 		kfree_skb_list(frag);
712 
713 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
714 			      IPSTATS_MIB_FRAGFAILS);
715 		ip6_rt_put(rt);
716 		return err;
717 
718 slow_path_clean:
719 		skb_walk_frags(skb, frag2) {
720 			if (frag2 == frag)
721 				break;
722 			frag2->sk = NULL;
723 			frag2->destructor = NULL;
724 			skb->truesize += frag2->truesize;
725 		}
726 	}
727 
728 slow_path:
729 	if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
730 	    skb_checksum_help(skb))
731 		goto fail;
732 
733 	left = skb->len - hlen;		/* Space per frame */
734 	ptr = hlen;			/* Where to start from */
735 
736 	/*
737 	 *	Fragment the datagram.
738 	 */
739 
740 	*prevhdr = NEXTHDR_FRAGMENT;
741 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
742 	troom = rt->dst.dev->needed_tailroom;
743 
744 	/*
745 	 *	Keep copying data until we run out.
746 	 */
747 	while (left > 0)	{
748 		len = left;
749 		/* IF: it doesn't fit, use 'mtu' - the data space left */
750 		if (len > mtu)
751 			len = mtu;
752 		/* IF: we are not sending up to and including the packet end
753 		   then align the next start on an eight byte boundary */
754 		if (len < left)	{
755 			len &= ~7;
756 		}
757 		/*
758 		 *	Allocate buffer.
759 		 */
760 
761 		if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
762 				      hroom + troom, GFP_ATOMIC)) == NULL) {
763 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
764 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
765 				      IPSTATS_MIB_FRAGFAILS);
766 			err = -ENOMEM;
767 			goto fail;
768 		}
769 
770 		/*
771 		 *	Set up data on packet
772 		 */
773 
774 		ip6_copy_metadata(frag, skb);
775 		skb_reserve(frag, hroom);
776 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
777 		skb_reset_network_header(frag);
778 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
779 		frag->transport_header = (frag->network_header + hlen +
780 					  sizeof(struct frag_hdr));
781 
782 		/*
783 		 *	Charge the memory for the fragment to any owner
784 		 *	it might possess
785 		 */
786 		if (skb->sk)
787 			skb_set_owner_w(frag, skb->sk);
788 
789 		/*
790 		 *	Copy the packet header into the new buffer.
791 		 */
792 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
793 
794 		/*
795 		 *	Build fragment header.
796 		 */
797 		fh->nexthdr = nexthdr;
798 		fh->reserved = 0;
799 		if (!frag_id) {
800 			ipv6_select_ident(fh, rt);
801 			frag_id = fh->identification;
802 		} else
803 			fh->identification = frag_id;
804 
805 		/*
806 		 *	Copy a block of the IP datagram.
807 		 */
808 		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
809 				     len));
810 		left -= len;
811 
812 		fh->frag_off = htons(offset);
813 		if (left > 0)
814 			fh->frag_off |= htons(IP6_MF);
815 		ipv6_hdr(frag)->payload_len = htons(frag->len -
816 						    sizeof(struct ipv6hdr));
817 
818 		ptr += len;
819 		offset += len;
820 
821 		/*
822 		 *	Put this fragment into the sending queue.
823 		 */
824 		err = output(frag);
825 		if (err)
826 			goto fail;
827 
828 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
829 			      IPSTATS_MIB_FRAGCREATES);
830 	}
831 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
832 		      IPSTATS_MIB_FRAGOKS);
833 	consume_skb(skb);
834 	return err;
835 
836 fail:
837 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
838 		      IPSTATS_MIB_FRAGFAILS);
839 	kfree_skb(skb);
840 	return err;
841 }
842 
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)843 static inline int ip6_rt_check(const struct rt6key *rt_key,
844 			       const struct in6_addr *fl_addr,
845 			       const struct in6_addr *addr_cache)
846 {
847 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
848 		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
849 }
850 
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)851 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
852 					  struct dst_entry *dst,
853 					  const struct flowi6 *fl6)
854 {
855 	struct ipv6_pinfo *np = inet6_sk(sk);
856 	struct rt6_info *rt;
857 
858 	if (!dst)
859 		goto out;
860 
861 	if (dst->ops->family != AF_INET6) {
862 		dst_release(dst);
863 		return NULL;
864 	}
865 
866 	rt = (struct rt6_info *)dst;
867 	/* Yes, checking route validity in not connected
868 	 * case is not very simple. Take into account,
869 	 * that we do not support routing by source, TOS,
870 	 * and MSG_DONTROUTE		--ANK (980726)
871 	 *
872 	 * 1. ip6_rt_check(): If route was host route,
873 	 *    check that cached destination is current.
874 	 *    If it is network route, we still may
875 	 *    check its validity using saved pointer
876 	 *    to the last used address: daddr_cache.
877 	 *    We do not want to save whole address now,
878 	 *    (because main consumer of this service
879 	 *    is tcp, which has not this problem),
880 	 *    so that the last trick works only on connected
881 	 *    sockets.
882 	 * 2. oif also should be the same.
883 	 */
884 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
885 #ifdef CONFIG_IPV6_SUBTREES
886 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
887 #endif
888 	    (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
889 		dst_release(dst);
890 		dst = NULL;
891 	}
892 
893 out:
894 	return dst;
895 }
896 
ip6_dst_lookup_tail(struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)897 static int ip6_dst_lookup_tail(struct sock *sk,
898 			       struct dst_entry **dst, struct flowi6 *fl6)
899 {
900 	struct net *net = sock_net(sk);
901 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
902 	struct neighbour *n;
903 	struct rt6_info *rt;
904 #endif
905 	int err;
906 
907 	if (*dst == NULL)
908 		*dst = ip6_route_output(net, sk, fl6);
909 
910 	if ((err = (*dst)->error))
911 		goto out_err_release;
912 
913 	if (ipv6_addr_any(&fl6->saddr)) {
914 		struct rt6_info *rt = (struct rt6_info *) *dst;
915 		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
916 					  sk ? inet6_sk(sk)->srcprefs : 0,
917 					  &fl6->saddr);
918 		if (err)
919 			goto out_err_release;
920 	}
921 
922 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
923 	/*
924 	 * Here if the dst entry we've looked up
925 	 * has a neighbour entry that is in the INCOMPLETE
926 	 * state and the src address from the flow is
927 	 * marked as OPTIMISTIC, we release the found
928 	 * dst entry and replace it instead with the
929 	 * dst entry of the nexthop router
930 	 */
931 	rt = (struct rt6_info *) *dst;
932 	rcu_read_lock_bh();
933 	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
934 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
935 	rcu_read_unlock_bh();
936 
937 	if (err) {
938 		struct inet6_ifaddr *ifp;
939 		struct flowi6 fl_gw6;
940 		int redirect;
941 
942 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
943 				      (*dst)->dev, 1);
944 
945 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
946 		if (ifp)
947 			in6_ifa_put(ifp);
948 
949 		if (redirect) {
950 			/*
951 			 * We need to get the dst entry for the
952 			 * default router instead
953 			 */
954 			dst_release(*dst);
955 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
956 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
957 			*dst = ip6_route_output(net, sk, &fl_gw6);
958 			if ((err = (*dst)->error))
959 				goto out_err_release;
960 		}
961 	}
962 #endif
963 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
964 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
965 		err = -EAFNOSUPPORT;
966 		goto out_err_release;
967 	}
968 
969 	return 0;
970 
971 out_err_release:
972 	if (err == -ENETUNREACH)
973 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
974 	dst_release(*dst);
975 	*dst = NULL;
976 	return err;
977 }
978 
979 /**
980  *	ip6_dst_lookup - perform route lookup on flow
981  *	@sk: socket which provides route info
982  *	@dst: pointer to dst_entry * for result
983  *	@fl6: flow to lookup
984  *
985  *	This function performs a route lookup on the given flow.
986  *
987  *	It returns zero on success, or a standard errno code on error.
988  */
ip6_dst_lookup(struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)989 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
990 {
991 	*dst = NULL;
992 	return ip6_dst_lookup_tail(sk, dst, fl6);
993 }
994 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
995 
996 /**
997  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
998  *	@sk: socket which provides route info
999  *	@fl6: flow to lookup
1000  *	@final_dst: final destination address for ipsec lookup
1001  *
1002  *	This function performs a route lookup on the given flow.
1003  *
1004  *	It returns a valid dst pointer on success, or a pointer encoded
1005  *	error code.
1006  */
ip6_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1007 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1008 				      const struct in6_addr *final_dst)
1009 {
1010 	struct dst_entry *dst = NULL;
1011 	int err;
1012 
1013 	err = ip6_dst_lookup_tail(sk, &dst, fl6);
1014 	if (err)
1015 		return ERR_PTR(err);
1016 	if (final_dst)
1017 		fl6->daddr = *final_dst;
1018 
1019 	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1020 }
1021 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1022 
1023 /**
1024  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1025  *	@sk: socket which provides the dst cache and route info
1026  *	@fl6: flow to lookup
1027  *	@final_dst: final destination address for ipsec lookup
1028  *
1029  *	This function performs a route lookup on the given flow with the
1030  *	possibility of using the cached route in the socket if it is valid.
1031  *	It will take the socket dst lock when operating on the dst cache.
1032  *	As a result, this function can only be used in process context.
1033  *
1034  *	It returns a valid dst pointer on success, or a pointer encoded
1035  *	error code.
1036  */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1037 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1038 					 const struct in6_addr *final_dst)
1039 {
1040 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1041 
1042 	dst = ip6_sk_dst_check(sk, dst, fl6);
1043 	if (!dst)
1044 		dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1045 
1046 	return dst;
1047 }
1048 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1049 
ip6_ufo_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int hh_len,int fragheaderlen,int transhdrlen,int mtu,unsigned int flags,struct rt6_info * rt)1050 static inline int ip6_ufo_append_data(struct sock *sk,
1051 			int getfrag(void *from, char *to, int offset, int len,
1052 			int odd, struct sk_buff *skb),
1053 			void *from, int length, int hh_len, int fragheaderlen,
1054 			int transhdrlen, int mtu, unsigned int flags,
1055 			struct rt6_info *rt)
1056 
1057 {
1058 	struct sk_buff *skb;
1059 	struct frag_hdr fhdr;
1060 	int err;
1061 
1062 	/* There is support for UDP large send offload by network
1063 	 * device, so create one single skb packet containing complete
1064 	 * udp datagram
1065 	 */
1066 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1067 		skb = sock_alloc_send_skb(sk,
1068 			hh_len + fragheaderlen + transhdrlen + 20,
1069 			(flags & MSG_DONTWAIT), &err);
1070 		if (skb == NULL)
1071 			return err;
1072 
1073 		/* reserve space for Hardware header */
1074 		skb_reserve(skb, hh_len);
1075 
1076 		/* create space for UDP/IP header */
1077 		skb_put(skb, fragheaderlen + transhdrlen);
1078 
1079 		/* initialize network header pointer */
1080 		skb_reset_network_header(skb);
1081 
1082 		/* initialize protocol header pointer */
1083 		skb->transport_header = skb->network_header + fragheaderlen;
1084 
1085 		skb->protocol = htons(ETH_P_IPV6);
1086 		skb->csum = 0;
1087 
1088 		__skb_queue_tail(&sk->sk_write_queue, skb);
1089 	} else if (skb_is_gso(skb)) {
1090 		goto append;
1091 	}
1092 
1093 	skb->ip_summed = CHECKSUM_PARTIAL;
1094 	/* Specify the length of each IPv6 datagram fragment.
1095 	 * It has to be a multiple of 8.
1096 	 */
1097 	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1098 				     sizeof(struct frag_hdr)) & ~7;
1099 	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1100 	ipv6_select_ident(&fhdr, rt);
1101 	skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1102 
1103 append:
1104 	return skb_append_datato_frags(sk, skb, getfrag, from,
1105 				       (length - transhdrlen));
1106 }
1107 
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1108 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1109 					       gfp_t gfp)
1110 {
1111 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1112 }
1113 
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1114 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1115 						gfp_t gfp)
1116 {
1117 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1118 }
1119 
ip6_append_data_mtu(unsigned int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt,unsigned int orig_mtu)1120 static void ip6_append_data_mtu(unsigned int *mtu,
1121 				int *maxfraglen,
1122 				unsigned int fragheaderlen,
1123 				struct sk_buff *skb,
1124 				struct rt6_info *rt,
1125 				unsigned int orig_mtu)
1126 {
1127 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1128 		if (skb == NULL) {
1129 			/* first fragment, reserve header_len */
1130 			*mtu = orig_mtu - rt->dst.header_len;
1131 
1132 		} else {
1133 			/*
1134 			 * this fragment is not first, the headers
1135 			 * space is regarded as data space.
1136 			 */
1137 			*mtu = orig_mtu;
1138 		}
1139 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1140 			      + fragheaderlen - sizeof(struct frag_hdr);
1141 	}
1142 }
1143 
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,int hlimit,int tclass,struct ipv6_txoptions * opt,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,int dontfrag)1144 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1145 	int offset, int len, int odd, struct sk_buff *skb),
1146 	void *from, int length, int transhdrlen,
1147 	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1148 	struct rt6_info *rt, unsigned int flags, int dontfrag)
1149 {
1150 	struct inet_sock *inet = inet_sk(sk);
1151 	struct ipv6_pinfo *np = inet6_sk(sk);
1152 	struct inet_cork *cork;
1153 	struct sk_buff *skb, *skb_prev = NULL;
1154 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1155 	int exthdrlen;
1156 	int dst_exthdrlen;
1157 	int hh_len;
1158 	int copy;
1159 	int err;
1160 	int offset = 0;
1161 	__u8 tx_flags = 0;
1162 	u32 tskey = 0;
1163 
1164 	if (flags&MSG_PROBE)
1165 		return 0;
1166 	cork = &inet->cork.base;
1167 	if (skb_queue_empty(&sk->sk_write_queue)) {
1168 		/*
1169 		 * setup for corking
1170 		 */
1171 		if (opt) {
1172 			if (WARN_ON(np->cork.opt))
1173 				return -EINVAL;
1174 
1175 			np->cork.opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1176 			if (unlikely(np->cork.opt == NULL))
1177 				return -ENOBUFS;
1178 
1179 			np->cork.opt->tot_len = sizeof(*opt);
1180 			np->cork.opt->opt_flen = opt->opt_flen;
1181 			np->cork.opt->opt_nflen = opt->opt_nflen;
1182 
1183 			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1184 							    sk->sk_allocation);
1185 			if (opt->dst0opt && !np->cork.opt->dst0opt)
1186 				return -ENOBUFS;
1187 
1188 			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1189 							    sk->sk_allocation);
1190 			if (opt->dst1opt && !np->cork.opt->dst1opt)
1191 				return -ENOBUFS;
1192 
1193 			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1194 							   sk->sk_allocation);
1195 			if (opt->hopopt && !np->cork.opt->hopopt)
1196 				return -ENOBUFS;
1197 
1198 			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1199 							    sk->sk_allocation);
1200 			if (opt->srcrt && !np->cork.opt->srcrt)
1201 				return -ENOBUFS;
1202 
1203 			/* need source address above miyazawa*/
1204 		}
1205 		dst_hold(&rt->dst);
1206 		cork->dst = &rt->dst;
1207 		inet->cork.fl.u.ip6 = *fl6;
1208 		np->cork.hop_limit = hlimit;
1209 		np->cork.tclass = tclass;
1210 		if (rt->dst.flags & DST_XFRM_TUNNEL)
1211 			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1212 			      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1213 		else
1214 			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1215 			      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
1216 		if (np->frag_size < mtu) {
1217 			if (np->frag_size)
1218 				mtu = np->frag_size;
1219 		}
1220 		if (mtu < IPV6_MIN_MTU)
1221 			return -EINVAL;
1222 		cork->fragsize = mtu;
1223 		if (dst_allfrag(rt->dst.path))
1224 			cork->flags |= IPCORK_ALLFRAG;
1225 		cork->length = 0;
1226 		exthdrlen = (opt ? opt->opt_flen : 0);
1227 		length += exthdrlen;
1228 		transhdrlen += exthdrlen;
1229 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1230 	} else {
1231 		rt = (struct rt6_info *)cork->dst;
1232 		fl6 = &inet->cork.fl.u.ip6;
1233 		opt = np->cork.opt;
1234 		transhdrlen = 0;
1235 		exthdrlen = 0;
1236 		dst_exthdrlen = 0;
1237 		mtu = cork->fragsize;
1238 	}
1239 	orig_mtu = mtu;
1240 
1241 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1242 
1243 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1244 			(opt ? opt->opt_nflen : 0);
1245 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1246 		     sizeof(struct frag_hdr);
1247 
1248 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1249 		unsigned int maxnonfragsize, headersize;
1250 
1251 		headersize = sizeof(struct ipv6hdr) +
1252 			     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1253 			     (dst_allfrag(&rt->dst) ?
1254 			      sizeof(struct frag_hdr) : 0) +
1255 			     rt->rt6i_nfheader_len;
1256 
1257 		if (ip6_sk_ignore_df(sk))
1258 			maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1259 		else
1260 			maxnonfragsize = mtu;
1261 
1262 		/* dontfrag active */
1263 		if ((cork->length + length > mtu - headersize) && dontfrag &&
1264 		    (sk->sk_protocol == IPPROTO_UDP ||
1265 		     sk->sk_protocol == IPPROTO_RAW)) {
1266 			ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1267 						   sizeof(struct ipv6hdr));
1268 			goto emsgsize;
1269 		}
1270 
1271 		if (cork->length + length > maxnonfragsize - headersize) {
1272 emsgsize:
1273 			ipv6_local_error(sk, EMSGSIZE, fl6,
1274 					 mtu - headersize +
1275 					 sizeof(struct ipv6hdr));
1276 			return -EMSGSIZE;
1277 		}
1278 	}
1279 
1280 	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1281 		sock_tx_timestamp(sk, &tx_flags);
1282 		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1283 		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1284 			tskey = sk->sk_tskey++;
1285 	}
1286 
1287 	/*
1288 	 * Let's try using as much space as possible.
1289 	 * Use MTU if total length of the message fits into the MTU.
1290 	 * Otherwise, we need to reserve fragment header and
1291 	 * fragment alignment (= 8-15 octects, in total).
1292 	 *
1293 	 * Note that we may need to "move" the data from the tail of
1294 	 * of the buffer to the new fragment when we split
1295 	 * the message.
1296 	 *
1297 	 * FIXME: It may be fragmented into multiple chunks
1298 	 *        at once if non-fragmentable extension headers
1299 	 *        are too large.
1300 	 * --yoshfuji
1301 	 */
1302 
1303 	skb = skb_peek_tail(&sk->sk_write_queue);
1304 	cork->length += length;
1305 	if ((skb && skb_is_gso(skb)) ||
1306 	    (((length + fragheaderlen) > mtu) &&
1307 	    (skb_queue_len(&sk->sk_write_queue) <= 1) &&
1308 	    (sk->sk_protocol == IPPROTO_UDP) &&
1309 	    (rt->dst.dev->features & NETIF_F_UFO) &&
1310 	    (sk->sk_type == SOCK_DGRAM))) {
1311 		err = ip6_ufo_append_data(sk, getfrag, from, length,
1312 					  hh_len, fragheaderlen,
1313 					  transhdrlen, mtu, flags, rt);
1314 		if (err)
1315 			goto error;
1316 		return 0;
1317 	}
1318 
1319 	if (!skb)
1320 		goto alloc_new_skb;
1321 
1322 	while (length > 0) {
1323 		/* Check if the remaining data fits into current packet. */
1324 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1325 		if (copy < length)
1326 			copy = maxfraglen - skb->len;
1327 
1328 		if (copy <= 0) {
1329 			char *data;
1330 			unsigned int datalen;
1331 			unsigned int fraglen;
1332 			unsigned int fraggap;
1333 			unsigned int alloclen;
1334 alloc_new_skb:
1335 			/* There's no room in the current skb */
1336 			if (skb)
1337 				fraggap = skb->len - maxfraglen;
1338 			else
1339 				fraggap = 0;
1340 			/* update mtu and maxfraglen if necessary */
1341 			if (skb == NULL || skb_prev == NULL)
1342 				ip6_append_data_mtu(&mtu, &maxfraglen,
1343 						    fragheaderlen, skb, rt,
1344 						    orig_mtu);
1345 
1346 			skb_prev = skb;
1347 
1348 			/*
1349 			 * If remaining data exceeds the mtu,
1350 			 * we know we need more fragment(s).
1351 			 */
1352 			datalen = length + fraggap;
1353 
1354 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1355 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1356 			if ((flags & MSG_MORE) &&
1357 			    !(rt->dst.dev->features&NETIF_F_SG))
1358 				alloclen = mtu;
1359 			else
1360 				alloclen = datalen + fragheaderlen;
1361 
1362 			alloclen += dst_exthdrlen;
1363 
1364 			if (datalen != length + fraggap) {
1365 				/*
1366 				 * this is not the last fragment, the trailer
1367 				 * space is regarded as data space.
1368 				 */
1369 				datalen += rt->dst.trailer_len;
1370 			}
1371 
1372 			alloclen += rt->dst.trailer_len;
1373 			fraglen = datalen + fragheaderlen;
1374 
1375 			/*
1376 			 * We just reserve space for fragment header.
1377 			 * Note: this may be overallocation if the message
1378 			 * (without MSG_MORE) fits into the MTU.
1379 			 */
1380 			alloclen += sizeof(struct frag_hdr);
1381 
1382 			copy = datalen - transhdrlen - fraggap;
1383 			if (copy < 0) {
1384 				err = -EINVAL;
1385 				goto error;
1386 			}
1387 			if (transhdrlen) {
1388 				skb = sock_alloc_send_skb(sk,
1389 						alloclen + hh_len,
1390 						(flags & MSG_DONTWAIT), &err);
1391 			} else {
1392 				skb = NULL;
1393 				if (atomic_read(&sk->sk_wmem_alloc) <=
1394 				    2 * sk->sk_sndbuf)
1395 					skb = sock_wmalloc(sk,
1396 							   alloclen + hh_len, 1,
1397 							   sk->sk_allocation);
1398 				if (unlikely(skb == NULL))
1399 					err = -ENOBUFS;
1400 			}
1401 			if (skb == NULL)
1402 				goto error;
1403 			/*
1404 			 *	Fill in the control structures
1405 			 */
1406 			skb->protocol = htons(ETH_P_IPV6);
1407 			skb->ip_summed = CHECKSUM_NONE;
1408 			skb->csum = 0;
1409 			/* reserve for fragmentation and ipsec header */
1410 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1411 				    dst_exthdrlen);
1412 
1413 			/* Only the initial fragment is time stamped */
1414 			skb_shinfo(skb)->tx_flags = tx_flags;
1415 			tx_flags = 0;
1416 			skb_shinfo(skb)->tskey = tskey;
1417 			tskey = 0;
1418 
1419 			/*
1420 			 *	Find where to start putting bytes
1421 			 */
1422 			data = skb_put(skb, fraglen);
1423 			skb_set_network_header(skb, exthdrlen);
1424 			data += fragheaderlen;
1425 			skb->transport_header = (skb->network_header +
1426 						 fragheaderlen);
1427 			if (fraggap) {
1428 				skb->csum = skb_copy_and_csum_bits(
1429 					skb_prev, maxfraglen,
1430 					data + transhdrlen, fraggap, 0);
1431 				skb_prev->csum = csum_sub(skb_prev->csum,
1432 							  skb->csum);
1433 				data += fraggap;
1434 				pskb_trim_unique(skb_prev, maxfraglen);
1435 			}
1436 			if (copy > 0 &&
1437 			    getfrag(from, data + transhdrlen, offset,
1438 				    copy, fraggap, skb) < 0) {
1439 				err = -EFAULT;
1440 				kfree_skb(skb);
1441 				goto error;
1442 			}
1443 
1444 			offset += copy;
1445 			length -= datalen - fraggap;
1446 			transhdrlen = 0;
1447 			exthdrlen = 0;
1448 			dst_exthdrlen = 0;
1449 
1450 			/*
1451 			 * Put the packet on the pending queue
1452 			 */
1453 			__skb_queue_tail(&sk->sk_write_queue, skb);
1454 			continue;
1455 		}
1456 
1457 		if (copy > length)
1458 			copy = length;
1459 
1460 		if (!(rt->dst.dev->features&NETIF_F_SG)) {
1461 			unsigned int off;
1462 
1463 			off = skb->len;
1464 			if (getfrag(from, skb_put(skb, copy),
1465 						offset, copy, off, skb) < 0) {
1466 				__skb_trim(skb, off);
1467 				err = -EFAULT;
1468 				goto error;
1469 			}
1470 		} else {
1471 			int i = skb_shinfo(skb)->nr_frags;
1472 			struct page_frag *pfrag = sk_page_frag(sk);
1473 
1474 			err = -ENOMEM;
1475 			if (!sk_page_frag_refill(sk, pfrag))
1476 				goto error;
1477 
1478 			if (!skb_can_coalesce(skb, i, pfrag->page,
1479 					      pfrag->offset)) {
1480 				err = -EMSGSIZE;
1481 				if (i == MAX_SKB_FRAGS)
1482 					goto error;
1483 
1484 				__skb_fill_page_desc(skb, i, pfrag->page,
1485 						     pfrag->offset, 0);
1486 				skb_shinfo(skb)->nr_frags = ++i;
1487 				get_page(pfrag->page);
1488 			}
1489 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1490 			if (getfrag(from,
1491 				    page_address(pfrag->page) + pfrag->offset,
1492 				    offset, copy, skb->len, skb) < 0)
1493 				goto error_efault;
1494 
1495 			pfrag->offset += copy;
1496 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1497 			skb->len += copy;
1498 			skb->data_len += copy;
1499 			skb->truesize += copy;
1500 			atomic_add(copy, &sk->sk_wmem_alloc);
1501 		}
1502 		offset += copy;
1503 		length -= copy;
1504 	}
1505 
1506 	return 0;
1507 
1508 error_efault:
1509 	err = -EFAULT;
1510 error:
1511 	cork->length -= length;
1512 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1513 	return err;
1514 }
1515 EXPORT_SYMBOL_GPL(ip6_append_data);
1516 
ip6_cork_release(struct inet_sock * inet,struct ipv6_pinfo * np)1517 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1518 {
1519 	if (np->cork.opt) {
1520 		kfree(np->cork.opt->dst0opt);
1521 		kfree(np->cork.opt->dst1opt);
1522 		kfree(np->cork.opt->hopopt);
1523 		kfree(np->cork.opt->srcrt);
1524 		kfree(np->cork.opt);
1525 		np->cork.opt = NULL;
1526 	}
1527 
1528 	if (inet->cork.base.dst) {
1529 		dst_release(inet->cork.base.dst);
1530 		inet->cork.base.dst = NULL;
1531 		inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1532 	}
1533 	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1534 }
1535 
ip6_push_pending_frames(struct sock * sk)1536 int ip6_push_pending_frames(struct sock *sk)
1537 {
1538 	struct sk_buff *skb, *tmp_skb;
1539 	struct sk_buff **tail_skb;
1540 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1541 	struct inet_sock *inet = inet_sk(sk);
1542 	struct ipv6_pinfo *np = inet6_sk(sk);
1543 	struct net *net = sock_net(sk);
1544 	struct ipv6hdr *hdr;
1545 	struct ipv6_txoptions *opt = np->cork.opt;
1546 	struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1547 	struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1548 	unsigned char proto = fl6->flowi6_proto;
1549 	int err = 0;
1550 
1551 	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1552 		goto out;
1553 	tail_skb = &(skb_shinfo(skb)->frag_list);
1554 
1555 	/* move skb->data to ip header from ext header */
1556 	if (skb->data < skb_network_header(skb))
1557 		__skb_pull(skb, skb_network_offset(skb));
1558 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1559 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1560 		*tail_skb = tmp_skb;
1561 		tail_skb = &(tmp_skb->next);
1562 		skb->len += tmp_skb->len;
1563 		skb->data_len += tmp_skb->len;
1564 		skb->truesize += tmp_skb->truesize;
1565 		tmp_skb->destructor = NULL;
1566 		tmp_skb->sk = NULL;
1567 	}
1568 
1569 	/* Allow local fragmentation. */
1570 	skb->ignore_df = ip6_sk_ignore_df(sk);
1571 
1572 	*final_dst = fl6->daddr;
1573 	__skb_pull(skb, skb_network_header_len(skb));
1574 	if (opt && opt->opt_flen)
1575 		ipv6_push_frag_opts(skb, opt, &proto);
1576 	if (opt && opt->opt_nflen)
1577 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1578 
1579 	skb_push(skb, sizeof(struct ipv6hdr));
1580 	skb_reset_network_header(skb);
1581 	hdr = ipv6_hdr(skb);
1582 
1583 	ip6_flow_hdr(hdr, np->cork.tclass,
1584 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1585 					np->autoflowlabel));
1586 	hdr->hop_limit = np->cork.hop_limit;
1587 	hdr->nexthdr = proto;
1588 	hdr->saddr = fl6->saddr;
1589 	hdr->daddr = *final_dst;
1590 
1591 	skb->priority = sk->sk_priority;
1592 	skb->mark = sk->sk_mark;
1593 
1594 	skb_dst_set(skb, dst_clone(&rt->dst));
1595 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1596 	if (proto == IPPROTO_ICMPV6) {
1597 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1598 
1599 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1600 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1601 	}
1602 
1603 	err = ip6_local_out(skb);
1604 	if (err) {
1605 		if (err > 0)
1606 			err = net_xmit_errno(err);
1607 		if (err)
1608 			goto error;
1609 	}
1610 
1611 out:
1612 	ip6_cork_release(inet, np);
1613 	return err;
1614 error:
1615 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1616 	goto out;
1617 }
1618 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1619 
ip6_flush_pending_frames(struct sock * sk)1620 void ip6_flush_pending_frames(struct sock *sk)
1621 {
1622 	struct sk_buff *skb;
1623 
1624 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1625 		if (skb_dst(skb))
1626 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1627 				      IPSTATS_MIB_OUTDISCARDS);
1628 		kfree_skb(skb);
1629 	}
1630 
1631 	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1632 }
1633 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1634