1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * IPv6 output functions
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on linux/net/ipv4/ip_output.c
10 *
11 * Changes:
12 * A.N.Kuznetsov : airthmetics in fragmentation.
13 * extension headers are implemented.
14 * route changes now work.
15 * ip6_forward does not confuse sniffers.
16 * etc.
17 *
18 * H. von Brand : Added missing #include <linux/string.h>
19 * Imran Patel : frag id should be in NBO
20 * Kazunori MIYAZAWA @USAGI
21 * : add ip6_append_data and related functions
22 * for datagram xmit
23 */
24
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41
42 #include <net/sock.h>
43 #include <net/snmp.h>
44
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57
ip6_finish_output2(struct net * net,struct sock * sk,struct sk_buff * skb)58 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
59 {
60 struct dst_entry *dst = skb_dst(skb);
61 struct net_device *dev = dst->dev;
62 unsigned int hh_len = LL_RESERVED_SPACE(dev);
63 int delta = hh_len - skb_headroom(skb);
64 const struct in6_addr *nexthop;
65 struct neighbour *neigh;
66 int ret;
67
68 /* Be paranoid, rather than too clever. */
69 if (unlikely(delta > 0) && dev->header_ops) {
70 /* pskb_expand_head() might crash, if skb is shared */
71 if (skb_shared(skb)) {
72 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
73
74 if (likely(nskb)) {
75 if (skb->sk)
76 skb_set_owner_w(nskb, skb->sk);
77 consume_skb(skb);
78 } else {
79 kfree_skb(skb);
80 }
81 skb = nskb;
82 }
83 if (skb &&
84 pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
85 kfree_skb(skb);
86 skb = NULL;
87 }
88 if (!skb) {
89 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
90 return -ENOMEM;
91 }
92 }
93
94 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
95 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
96
97 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
98 ((mroute6_is_socket(net, skb) &&
99 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
100 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
101 &ipv6_hdr(skb)->saddr))) {
102 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
103
104 /* Do not check for IFF_ALLMULTI; multicast routing
105 is not supported in any case.
106 */
107 if (newskb)
108 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
109 net, sk, newskb, NULL, newskb->dev,
110 dev_loopback_xmit);
111
112 if (ipv6_hdr(skb)->hop_limit == 0) {
113 IP6_INC_STATS(net, idev,
114 IPSTATS_MIB_OUTDISCARDS);
115 kfree_skb(skb);
116 return 0;
117 }
118 }
119
120 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
121
122 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
123 IPV6_ADDR_SCOPE_NODELOCAL &&
124 !(dev->flags & IFF_LOOPBACK)) {
125 kfree_skb(skb);
126 return 0;
127 }
128 }
129
130 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
131 int res = lwtunnel_xmit(skb);
132
133 if (res != LWTUNNEL_XMIT_CONTINUE)
134 return res;
135 }
136
137 rcu_read_lock_bh();
138 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
139 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
140 if (unlikely(!neigh))
141 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
142 if (!IS_ERR(neigh)) {
143 sock_confirm_neigh(skb, neigh);
144 ret = neigh_output(neigh, skb, false);
145 rcu_read_unlock_bh();
146 return ret;
147 }
148 rcu_read_unlock_bh();
149
150 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
151 kfree_skb(skb);
152 return -EINVAL;
153 }
154
155 static int
ip6_finish_output_gso_slowpath_drop(struct net * net,struct sock * sk,struct sk_buff * skb,unsigned int mtu)156 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
157 struct sk_buff *skb, unsigned int mtu)
158 {
159 struct sk_buff *segs, *nskb;
160 netdev_features_t features;
161 int ret = 0;
162
163 /* Please see corresponding comment in ip_finish_output_gso
164 * describing the cases where GSO segment length exceeds the
165 * egress MTU.
166 */
167 features = netif_skb_features(skb);
168 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
169 if (IS_ERR_OR_NULL(segs)) {
170 kfree_skb(skb);
171 return -ENOMEM;
172 }
173
174 consume_skb(skb);
175
176 skb_list_walk_safe(segs, segs, nskb) {
177 int err;
178
179 skb_mark_not_on_list(segs);
180 /* Last GSO segment can be smaller than gso_size (and MTU).
181 * Adding a fragment header would produce an "atomic fragment",
182 * which is considered harmful (RFC-8021). Avoid that.
183 */
184 err = segs->len > mtu ?
185 ip6_fragment(net, sk, segs, ip6_finish_output2) :
186 ip6_finish_output2(net, sk, segs);
187 if (err && ret == 0)
188 ret = err;
189 }
190
191 return ret;
192 }
193
__ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)194 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
195 {
196 unsigned int mtu;
197
198 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
199 /* Policy lookup after SNAT yielded a new policy */
200 if (skb_dst(skb)->xfrm) {
201 IP6CB(skb)->flags |= IP6SKB_REROUTED;
202 return dst_output(net, sk, skb);
203 }
204 #endif
205
206 mtu = ip6_skb_dst_mtu(skb);
207 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
208 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
209
210 if ((skb->len > mtu && !skb_is_gso(skb)) ||
211 dst_allfrag(skb_dst(skb)) ||
212 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
213 return ip6_fragment(net, sk, skb, ip6_finish_output2);
214 else
215 return ip6_finish_output2(net, sk, skb);
216 }
217
ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)218 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
219 {
220 int ret;
221
222 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
223 switch (ret) {
224 case NET_XMIT_SUCCESS:
225 return __ip6_finish_output(net, sk, skb);
226 case NET_XMIT_CN:
227 return __ip6_finish_output(net, sk, skb) ? : ret;
228 default:
229 kfree_skb(skb);
230 return ret;
231 }
232 }
233
ip6_output(struct net * net,struct sock * sk,struct sk_buff * skb)234 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
235 {
236 struct net_device *dev = skb_dst(skb)->dev;
237 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
238
239 skb->protocol = htons(ETH_P_IPV6);
240 skb->dev = dev;
241
242 if (unlikely(idev->cnf.disable_ipv6)) {
243 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
244 kfree_skb(skb);
245 return 0;
246 }
247
248 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
249 net, sk, skb, NULL, dev,
250 ip6_finish_output,
251 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
252 }
253
ip6_autoflowlabel(struct net * net,const struct ipv6_pinfo * np)254 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
255 {
256 if (!np->autoflowlabel_set)
257 return ip6_default_np_autolabel(net);
258 else
259 return np->autoflowlabel;
260 }
261
262 /*
263 * xmit an sk_buff (used by TCP, SCTP and DCCP)
264 * Note : socket lock is not held for SYNACK packets, but might be modified
265 * by calls to skb_set_owner_w() and ipv6_local_error(),
266 * which are using proper atomic operations or spinlocks.
267 */
ip6_xmit(const struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,__u32 mark,struct ipv6_txoptions * opt,int tclass,u32 priority)268 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
269 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
270 {
271 struct net *net = sock_net(sk);
272 const struct ipv6_pinfo *np = inet6_sk(sk);
273 struct in6_addr *first_hop = &fl6->daddr;
274 struct dst_entry *dst = skb_dst(skb);
275 unsigned int head_room;
276 struct ipv6hdr *hdr;
277 u8 proto = fl6->flowi6_proto;
278 int seg_len = skb->len;
279 int hlimit = -1;
280 u32 mtu;
281
282 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
283 if (opt)
284 head_room += opt->opt_nflen + opt->opt_flen;
285
286 if (unlikely(skb_headroom(skb) < head_room)) {
287 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
288 if (!skb2) {
289 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
290 IPSTATS_MIB_OUTDISCARDS);
291 kfree_skb(skb);
292 return -ENOBUFS;
293 }
294 if (skb->sk)
295 skb_set_owner_w(skb2, skb->sk);
296 consume_skb(skb);
297 skb = skb2;
298 }
299
300 if (opt) {
301 seg_len += opt->opt_nflen + opt->opt_flen;
302
303 if (opt->opt_flen)
304 ipv6_push_frag_opts(skb, opt, &proto);
305
306 if (opt->opt_nflen)
307 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
308 &fl6->saddr);
309 }
310
311 skb_push(skb, sizeof(struct ipv6hdr));
312 skb_reset_network_header(skb);
313 hdr = ipv6_hdr(skb);
314
315 /*
316 * Fill in the IPv6 header
317 */
318 if (np)
319 hlimit = np->hop_limit;
320 if (hlimit < 0)
321 hlimit = ip6_dst_hoplimit(dst);
322
323 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
324 ip6_autoflowlabel(net, np), fl6));
325
326 hdr->payload_len = htons(seg_len);
327 hdr->nexthdr = proto;
328 hdr->hop_limit = hlimit;
329
330 hdr->saddr = fl6->saddr;
331 hdr->daddr = *first_hop;
332
333 skb->protocol = htons(ETH_P_IPV6);
334 skb->priority = priority;
335 skb->mark = mark;
336
337 mtu = dst_mtu(dst);
338 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
339 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
340 IPSTATS_MIB_OUT, skb->len);
341
342 /* if egress device is enslaved to an L3 master device pass the
343 * skb to its handler for processing
344 */
345 skb = l3mdev_ip6_out((struct sock *)sk, skb);
346 if (unlikely(!skb))
347 return 0;
348
349 /* hooks should never assume socket lock is held.
350 * we promote our socket to non const
351 */
352 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
353 net, (struct sock *)sk, skb, NULL, dst->dev,
354 dst_output);
355 }
356
357 skb->dev = dst->dev;
358 /* ipv6_local_error() does not require socket lock,
359 * we promote our socket to non const
360 */
361 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
362
363 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
364 kfree_skb(skb);
365 return -EMSGSIZE;
366 }
367 EXPORT_SYMBOL(ip6_xmit);
368
ip6_call_ra_chain(struct sk_buff * skb,int sel)369 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
370 {
371 struct ip6_ra_chain *ra;
372 struct sock *last = NULL;
373
374 read_lock(&ip6_ra_lock);
375 for (ra = ip6_ra_chain; ra; ra = ra->next) {
376 struct sock *sk = ra->sk;
377 if (sk && ra->sel == sel &&
378 (!sk->sk_bound_dev_if ||
379 sk->sk_bound_dev_if == skb->dev->ifindex)) {
380 struct ipv6_pinfo *np = inet6_sk(sk);
381
382 if (np && np->rtalert_isolate &&
383 !net_eq(sock_net(sk), dev_net(skb->dev))) {
384 continue;
385 }
386 if (last) {
387 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
388 if (skb2)
389 rawv6_rcv(last, skb2);
390 }
391 last = sk;
392 }
393 }
394
395 if (last) {
396 rawv6_rcv(last, skb);
397 read_unlock(&ip6_ra_lock);
398 return 1;
399 }
400 read_unlock(&ip6_ra_lock);
401 return 0;
402 }
403
ip6_forward_proxy_check(struct sk_buff * skb)404 static int ip6_forward_proxy_check(struct sk_buff *skb)
405 {
406 struct ipv6hdr *hdr = ipv6_hdr(skb);
407 u8 nexthdr = hdr->nexthdr;
408 __be16 frag_off;
409 int offset;
410
411 if (ipv6_ext_hdr(nexthdr)) {
412 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
413 if (offset < 0)
414 return 0;
415 } else
416 offset = sizeof(struct ipv6hdr);
417
418 if (nexthdr == IPPROTO_ICMPV6) {
419 struct icmp6hdr *icmp6;
420
421 if (!pskb_may_pull(skb, (skb_network_header(skb) +
422 offset + 1 - skb->data)))
423 return 0;
424
425 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
426
427 switch (icmp6->icmp6_type) {
428 case NDISC_ROUTER_SOLICITATION:
429 case NDISC_ROUTER_ADVERTISEMENT:
430 case NDISC_NEIGHBOUR_SOLICITATION:
431 case NDISC_NEIGHBOUR_ADVERTISEMENT:
432 case NDISC_REDIRECT:
433 /* For reaction involving unicast neighbor discovery
434 * message destined to the proxied address, pass it to
435 * input function.
436 */
437 return 1;
438 default:
439 break;
440 }
441 }
442
443 /*
444 * The proxying router can't forward traffic sent to a link-local
445 * address, so signal the sender and discard the packet. This
446 * behavior is clarified by the MIPv6 specification.
447 */
448 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
449 dst_link_failure(skb);
450 return -1;
451 }
452
453 return 0;
454 }
455
ip6_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)456 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
457 struct sk_buff *skb)
458 {
459 struct dst_entry *dst = skb_dst(skb);
460
461 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
462 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
463
464 #ifdef CONFIG_NET_SWITCHDEV
465 if (skb->offload_l3_fwd_mark) {
466 consume_skb(skb);
467 return 0;
468 }
469 #endif
470
471 skb->tstamp = 0;
472 return dst_output(net, sk, skb);
473 }
474
ip6_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)475 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
476 {
477 if (skb->len <= mtu)
478 return false;
479
480 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
481 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
482 return true;
483
484 if (skb->ignore_df)
485 return false;
486
487 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
488 return false;
489
490 return true;
491 }
492
ip6_forward(struct sk_buff * skb)493 int ip6_forward(struct sk_buff *skb)
494 {
495 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
496 struct dst_entry *dst = skb_dst(skb);
497 struct ipv6hdr *hdr = ipv6_hdr(skb);
498 struct inet6_skb_parm *opt = IP6CB(skb);
499 struct net *net = dev_net(dst->dev);
500 u32 mtu;
501
502 if (net->ipv6.devconf_all->forwarding == 0)
503 goto error;
504
505 if (skb->pkt_type != PACKET_HOST)
506 goto drop;
507
508 if (unlikely(skb->sk))
509 goto drop;
510
511 if (skb_warn_if_lro(skb))
512 goto drop;
513
514 if (!net->ipv6.devconf_all->disable_policy &&
515 (!idev || !idev->cnf.disable_policy) &&
516 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
517 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
518 goto drop;
519 }
520
521 skb_forward_csum(skb);
522
523 /*
524 * We DO NOT make any processing on
525 * RA packets, pushing them to user level AS IS
526 * without ane WARRANTY that application will be able
527 * to interpret them. The reason is that we
528 * cannot make anything clever here.
529 *
530 * We are not end-node, so that if packet contains
531 * AH/ESP, we cannot make anything.
532 * Defragmentation also would be mistake, RA packets
533 * cannot be fragmented, because there is no warranty
534 * that different fragments will go along one path. --ANK
535 */
536 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
537 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
538 return 0;
539 }
540
541 /*
542 * check and decrement ttl
543 */
544 if (hdr->hop_limit <= 1) {
545 /* Force OUTPUT device used as source address */
546 skb->dev = dst->dev;
547 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
548 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
549
550 kfree_skb(skb);
551 return -ETIMEDOUT;
552 }
553
554 /* XXX: idev->cnf.proxy_ndp? */
555 if (net->ipv6.devconf_all->proxy_ndp &&
556 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
557 int proxied = ip6_forward_proxy_check(skb);
558 if (proxied > 0)
559 return ip6_input(skb);
560 else if (proxied < 0) {
561 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
562 goto drop;
563 }
564 }
565
566 if (!xfrm6_route_forward(skb)) {
567 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
568 goto drop;
569 }
570 dst = skb_dst(skb);
571
572 /* IPv6 specs say nothing about it, but it is clear that we cannot
573 send redirects to source routed frames.
574 We don't send redirects to frames decapsulated from IPsec.
575 */
576 if (IP6CB(skb)->iif == dst->dev->ifindex &&
577 opt->srcrt == 0 && !skb_sec_path(skb)) {
578 struct in6_addr *target = NULL;
579 struct inet_peer *peer;
580 struct rt6_info *rt;
581
582 /*
583 * incoming and outgoing devices are the same
584 * send a redirect.
585 */
586
587 rt = (struct rt6_info *) dst;
588 if (rt->rt6i_flags & RTF_GATEWAY)
589 target = &rt->rt6i_gateway;
590 else
591 target = &hdr->daddr;
592
593 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
594
595 /* Limit redirects both by destination (here)
596 and by source (inside ndisc_send_redirect)
597 */
598 if (inet_peer_xrlim_allow(peer, 1*HZ))
599 ndisc_send_redirect(skb, target);
600 if (peer)
601 inet_putpeer(peer);
602 } else {
603 int addrtype = ipv6_addr_type(&hdr->saddr);
604
605 /* This check is security critical. */
606 if (addrtype == IPV6_ADDR_ANY ||
607 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
608 goto error;
609 if (addrtype & IPV6_ADDR_LINKLOCAL) {
610 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
611 ICMPV6_NOT_NEIGHBOUR, 0);
612 goto error;
613 }
614 }
615
616 mtu = ip6_dst_mtu_forward(dst);
617 if (mtu < IPV6_MIN_MTU)
618 mtu = IPV6_MIN_MTU;
619
620 if (ip6_pkt_too_big(skb, mtu)) {
621 /* Again, force OUTPUT device used as source address */
622 skb->dev = dst->dev;
623 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
624 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
625 __IP6_INC_STATS(net, ip6_dst_idev(dst),
626 IPSTATS_MIB_FRAGFAILS);
627 kfree_skb(skb);
628 return -EMSGSIZE;
629 }
630
631 if (skb_cow(skb, dst->dev->hard_header_len)) {
632 __IP6_INC_STATS(net, ip6_dst_idev(dst),
633 IPSTATS_MIB_OUTDISCARDS);
634 goto drop;
635 }
636
637 hdr = ipv6_hdr(skb);
638
639 /* Mangling hops number delayed to point after skb COW */
640
641 hdr->hop_limit--;
642
643 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
644 net, NULL, skb, skb->dev, dst->dev,
645 ip6_forward_finish);
646
647 error:
648 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
649 drop:
650 kfree_skb(skb);
651 return -EINVAL;
652 }
653
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)654 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
655 {
656 to->pkt_type = from->pkt_type;
657 to->priority = from->priority;
658 to->protocol = from->protocol;
659 skb_dst_drop(to);
660 skb_dst_set(to, dst_clone(skb_dst(from)));
661 to->dev = from->dev;
662 to->mark = from->mark;
663
664 skb_copy_hash(to, from);
665
666 #ifdef CONFIG_NET_SCHED
667 to->tc_index = from->tc_index;
668 #endif
669 nf_copy(to, from);
670 skb_ext_copy(to, from);
671 skb_copy_secmark(to, from);
672 }
673
ip6_fraglist_init(struct sk_buff * skb,unsigned int hlen,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_fraglist_iter * iter)674 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
675 u8 nexthdr, __be32 frag_id,
676 struct ip6_fraglist_iter *iter)
677 {
678 unsigned int first_len;
679 struct frag_hdr *fh;
680
681 /* BUILD HEADER */
682 *prevhdr = NEXTHDR_FRAGMENT;
683 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
684 if (!iter->tmp_hdr)
685 return -ENOMEM;
686
687 iter->frag = skb_shinfo(skb)->frag_list;
688 skb_frag_list_init(skb);
689
690 iter->offset = 0;
691 iter->hlen = hlen;
692 iter->frag_id = frag_id;
693 iter->nexthdr = nexthdr;
694
695 __skb_pull(skb, hlen);
696 fh = __skb_push(skb, sizeof(struct frag_hdr));
697 __skb_push(skb, hlen);
698 skb_reset_network_header(skb);
699 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
700
701 fh->nexthdr = nexthdr;
702 fh->reserved = 0;
703 fh->frag_off = htons(IP6_MF);
704 fh->identification = frag_id;
705
706 first_len = skb_pagelen(skb);
707 skb->data_len = first_len - skb_headlen(skb);
708 skb->len = first_len;
709 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
710
711 return 0;
712 }
713 EXPORT_SYMBOL(ip6_fraglist_init);
714
ip6_fraglist_prepare(struct sk_buff * skb,struct ip6_fraglist_iter * iter)715 void ip6_fraglist_prepare(struct sk_buff *skb,
716 struct ip6_fraglist_iter *iter)
717 {
718 struct sk_buff *frag = iter->frag;
719 unsigned int hlen = iter->hlen;
720 struct frag_hdr *fh;
721
722 frag->ip_summed = CHECKSUM_NONE;
723 skb_reset_transport_header(frag);
724 fh = __skb_push(frag, sizeof(struct frag_hdr));
725 __skb_push(frag, hlen);
726 skb_reset_network_header(frag);
727 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
728 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
729 fh->nexthdr = iter->nexthdr;
730 fh->reserved = 0;
731 fh->frag_off = htons(iter->offset);
732 if (frag->next)
733 fh->frag_off |= htons(IP6_MF);
734 fh->identification = iter->frag_id;
735 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
736 ip6_copy_metadata(frag, skb);
737 }
738 EXPORT_SYMBOL(ip6_fraglist_prepare);
739
ip6_frag_init(struct sk_buff * skb,unsigned int hlen,unsigned int mtu,unsigned short needed_tailroom,int hdr_room,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_frag_state * state)740 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
741 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
742 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
743 {
744 state->prevhdr = prevhdr;
745 state->nexthdr = nexthdr;
746 state->frag_id = frag_id;
747
748 state->hlen = hlen;
749 state->mtu = mtu;
750
751 state->left = skb->len - hlen; /* Space per frame */
752 state->ptr = hlen; /* Where to start from */
753
754 state->hroom = hdr_room;
755 state->troom = needed_tailroom;
756
757 state->offset = 0;
758 }
759 EXPORT_SYMBOL(ip6_frag_init);
760
ip6_frag_next(struct sk_buff * skb,struct ip6_frag_state * state)761 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
762 {
763 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
764 struct sk_buff *frag;
765 struct frag_hdr *fh;
766 unsigned int len;
767
768 len = state->left;
769 /* IF: it doesn't fit, use 'mtu' - the data space left */
770 if (len > state->mtu)
771 len = state->mtu;
772 /* IF: we are not sending up to and including the packet end
773 then align the next start on an eight byte boundary */
774 if (len < state->left)
775 len &= ~7;
776
777 /* Allocate buffer */
778 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
779 state->hroom + state->troom, GFP_ATOMIC);
780 if (!frag)
781 return ERR_PTR(-ENOMEM);
782
783 /*
784 * Set up data on packet
785 */
786
787 ip6_copy_metadata(frag, skb);
788 skb_reserve(frag, state->hroom);
789 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
790 skb_reset_network_header(frag);
791 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
792 frag->transport_header = (frag->network_header + state->hlen +
793 sizeof(struct frag_hdr));
794
795 /*
796 * Charge the memory for the fragment to any owner
797 * it might possess
798 */
799 if (skb->sk)
800 skb_set_owner_w(frag, skb->sk);
801
802 /*
803 * Copy the packet header into the new buffer.
804 */
805 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
806
807 fragnexthdr_offset = skb_network_header(frag);
808 fragnexthdr_offset += prevhdr - skb_network_header(skb);
809 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
810
811 /*
812 * Build fragment header.
813 */
814 fh->nexthdr = state->nexthdr;
815 fh->reserved = 0;
816 fh->identification = state->frag_id;
817
818 /*
819 * Copy a block of the IP datagram.
820 */
821 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
822 len));
823 state->left -= len;
824
825 fh->frag_off = htons(state->offset);
826 if (state->left > 0)
827 fh->frag_off |= htons(IP6_MF);
828 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
829
830 state->ptr += len;
831 state->offset += len;
832
833 return frag;
834 }
835 EXPORT_SYMBOL(ip6_frag_next);
836
ip6_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,int (* output)(struct net *,struct sock *,struct sk_buff *))837 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
838 int (*output)(struct net *, struct sock *, struct sk_buff *))
839 {
840 struct sk_buff *frag;
841 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
842 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
843 inet6_sk(skb->sk) : NULL;
844 struct ip6_frag_state state;
845 unsigned int mtu, hlen, nexthdr_offset;
846 ktime_t tstamp = skb->tstamp;
847 int hroom, err = 0;
848 __be32 frag_id;
849 u8 *prevhdr, nexthdr = 0;
850
851 err = ip6_find_1stfragopt(skb, &prevhdr);
852 if (err < 0)
853 goto fail;
854 hlen = err;
855 nexthdr = *prevhdr;
856 nexthdr_offset = prevhdr - skb_network_header(skb);
857
858 mtu = ip6_skb_dst_mtu(skb);
859
860 /* We must not fragment if the socket is set to force MTU discovery
861 * or if the skb it not generated by a local socket.
862 */
863 if (unlikely(!skb->ignore_df && skb->len > mtu))
864 goto fail_toobig;
865
866 if (IP6CB(skb)->frag_max_size) {
867 if (IP6CB(skb)->frag_max_size > mtu)
868 goto fail_toobig;
869
870 /* don't send fragments larger than what we received */
871 mtu = IP6CB(skb)->frag_max_size;
872 if (mtu < IPV6_MIN_MTU)
873 mtu = IPV6_MIN_MTU;
874 }
875
876 if (np && np->frag_size < mtu) {
877 if (np->frag_size)
878 mtu = np->frag_size;
879 }
880 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
881 goto fail_toobig;
882 mtu -= hlen + sizeof(struct frag_hdr);
883
884 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
885 &ipv6_hdr(skb)->saddr);
886
887 if (skb->ip_summed == CHECKSUM_PARTIAL &&
888 (err = skb_checksum_help(skb)))
889 goto fail;
890
891 prevhdr = skb_network_header(skb) + nexthdr_offset;
892 hroom = LL_RESERVED_SPACE(rt->dst.dev);
893 if (skb_has_frag_list(skb)) {
894 unsigned int first_len = skb_pagelen(skb);
895 struct ip6_fraglist_iter iter;
896 struct sk_buff *frag2;
897
898 if (first_len - hlen > mtu ||
899 ((first_len - hlen) & 7) ||
900 skb_cloned(skb) ||
901 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
902 goto slow_path;
903
904 skb_walk_frags(skb, frag) {
905 /* Correct geometry. */
906 if (frag->len > mtu ||
907 ((frag->len & 7) && frag->next) ||
908 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
909 goto slow_path_clean;
910
911 /* Partially cloned skb? */
912 if (skb_shared(frag))
913 goto slow_path_clean;
914
915 BUG_ON(frag->sk);
916 if (skb->sk) {
917 frag->sk = skb->sk;
918 frag->destructor = sock_wfree;
919 }
920 skb->truesize -= frag->truesize;
921 }
922
923 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
924 &iter);
925 if (err < 0)
926 goto fail;
927
928 /* We prevent @rt from being freed. */
929 rcu_read_lock();
930
931 for (;;) {
932 /* Prepare header of the next frame,
933 * before previous one went down. */
934 if (iter.frag)
935 ip6_fraglist_prepare(skb, &iter);
936
937 skb->tstamp = tstamp;
938 err = output(net, sk, skb);
939 if (!err)
940 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
941 IPSTATS_MIB_FRAGCREATES);
942
943 if (err || !iter.frag)
944 break;
945
946 skb = ip6_fraglist_next(&iter);
947 }
948
949 kfree(iter.tmp_hdr);
950
951 if (err == 0) {
952 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
953 IPSTATS_MIB_FRAGOKS);
954 rcu_read_unlock();
955 return 0;
956 }
957
958 kfree_skb_list(iter.frag);
959
960 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
961 IPSTATS_MIB_FRAGFAILS);
962 rcu_read_unlock();
963 return err;
964
965 slow_path_clean:
966 skb_walk_frags(skb, frag2) {
967 if (frag2 == frag)
968 break;
969 frag2->sk = NULL;
970 frag2->destructor = NULL;
971 skb->truesize += frag2->truesize;
972 }
973 }
974
975 slow_path:
976 /*
977 * Fragment the datagram.
978 */
979
980 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
981 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
982 &state);
983
984 /*
985 * Keep copying data until we run out.
986 */
987
988 while (state.left > 0) {
989 frag = ip6_frag_next(skb, &state);
990 if (IS_ERR(frag)) {
991 err = PTR_ERR(frag);
992 goto fail;
993 }
994
995 /*
996 * Put this fragment into the sending queue.
997 */
998 frag->tstamp = tstamp;
999 err = output(net, sk, frag);
1000 if (err)
1001 goto fail;
1002
1003 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1004 IPSTATS_MIB_FRAGCREATES);
1005 }
1006 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1007 IPSTATS_MIB_FRAGOKS);
1008 consume_skb(skb);
1009 return err;
1010
1011 fail_toobig:
1012 if (skb->sk && dst_allfrag(skb_dst(skb)))
1013 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
1014
1015 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1016 err = -EMSGSIZE;
1017
1018 fail:
1019 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1020 IPSTATS_MIB_FRAGFAILS);
1021 kfree_skb(skb);
1022 return err;
1023 }
1024
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)1025 static inline int ip6_rt_check(const struct rt6key *rt_key,
1026 const struct in6_addr *fl_addr,
1027 const struct in6_addr *addr_cache)
1028 {
1029 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1030 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1031 }
1032
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)1033 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1034 struct dst_entry *dst,
1035 const struct flowi6 *fl6)
1036 {
1037 struct ipv6_pinfo *np = inet6_sk(sk);
1038 struct rt6_info *rt;
1039
1040 if (!dst)
1041 goto out;
1042
1043 if (dst->ops->family != AF_INET6) {
1044 dst_release(dst);
1045 return NULL;
1046 }
1047
1048 rt = (struct rt6_info *)dst;
1049 /* Yes, checking route validity in not connected
1050 * case is not very simple. Take into account,
1051 * that we do not support routing by source, TOS,
1052 * and MSG_DONTROUTE --ANK (980726)
1053 *
1054 * 1. ip6_rt_check(): If route was host route,
1055 * check that cached destination is current.
1056 * If it is network route, we still may
1057 * check its validity using saved pointer
1058 * to the last used address: daddr_cache.
1059 * We do not want to save whole address now,
1060 * (because main consumer of this service
1061 * is tcp, which has not this problem),
1062 * so that the last trick works only on connected
1063 * sockets.
1064 * 2. oif also should be the same.
1065 */
1066 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1067 #ifdef CONFIG_IPV6_SUBTREES
1068 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1069 #endif
1070 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1071 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1072 dst_release(dst);
1073 dst = NULL;
1074 }
1075
1076 out:
1077 return dst;
1078 }
1079
ip6_dst_lookup_tail(struct net * net,const struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1080 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1081 struct dst_entry **dst, struct flowi6 *fl6)
1082 {
1083 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1084 struct neighbour *n;
1085 struct rt6_info *rt;
1086 #endif
1087 int err;
1088 int flags = 0;
1089
1090 /* The correct way to handle this would be to do
1091 * ip6_route_get_saddr, and then ip6_route_output; however,
1092 * the route-specific preferred source forces the
1093 * ip6_route_output call _before_ ip6_route_get_saddr.
1094 *
1095 * In source specific routing (no src=any default route),
1096 * ip6_route_output will fail given src=any saddr, though, so
1097 * that's why we try it again later.
1098 */
1099 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1100 struct fib6_info *from;
1101 struct rt6_info *rt;
1102 bool had_dst = *dst != NULL;
1103
1104 if (!had_dst)
1105 *dst = ip6_route_output(net, sk, fl6);
1106 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1107
1108 rcu_read_lock();
1109 from = rt ? rcu_dereference(rt->from) : NULL;
1110 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1111 sk ? inet6_sk(sk)->srcprefs : 0,
1112 &fl6->saddr);
1113 rcu_read_unlock();
1114
1115 if (err)
1116 goto out_err_release;
1117
1118 /* If we had an erroneous initial result, pretend it
1119 * never existed and let the SA-enabled version take
1120 * over.
1121 */
1122 if (!had_dst && (*dst)->error) {
1123 dst_release(*dst);
1124 *dst = NULL;
1125 }
1126
1127 if (fl6->flowi6_oif)
1128 flags |= RT6_LOOKUP_F_IFACE;
1129 }
1130
1131 if (!*dst)
1132 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1133
1134 err = (*dst)->error;
1135 if (err)
1136 goto out_err_release;
1137
1138 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1139 /*
1140 * Here if the dst entry we've looked up
1141 * has a neighbour entry that is in the INCOMPLETE
1142 * state and the src address from the flow is
1143 * marked as OPTIMISTIC, we release the found
1144 * dst entry and replace it instead with the
1145 * dst entry of the nexthop router
1146 */
1147 rt = (struct rt6_info *) *dst;
1148 rcu_read_lock_bh();
1149 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1150 rt6_nexthop(rt, &fl6->daddr));
1151 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1152 rcu_read_unlock_bh();
1153
1154 if (err) {
1155 struct inet6_ifaddr *ifp;
1156 struct flowi6 fl_gw6;
1157 int redirect;
1158
1159 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1160 (*dst)->dev, 1);
1161
1162 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1163 if (ifp)
1164 in6_ifa_put(ifp);
1165
1166 if (redirect) {
1167 /*
1168 * We need to get the dst entry for the
1169 * default router instead
1170 */
1171 dst_release(*dst);
1172 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1173 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1174 *dst = ip6_route_output(net, sk, &fl_gw6);
1175 err = (*dst)->error;
1176 if (err)
1177 goto out_err_release;
1178 }
1179 }
1180 #endif
1181 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1182 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1183 err = -EAFNOSUPPORT;
1184 goto out_err_release;
1185 }
1186
1187 return 0;
1188
1189 out_err_release:
1190 dst_release(*dst);
1191 *dst = NULL;
1192
1193 if (err == -ENETUNREACH)
1194 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1195 return err;
1196 }
1197
1198 /**
1199 * ip6_dst_lookup - perform route lookup on flow
1200 * @sk: socket which provides route info
1201 * @dst: pointer to dst_entry * for result
1202 * @fl6: flow to lookup
1203 *
1204 * This function performs a route lookup on the given flow.
1205 *
1206 * It returns zero on success, or a standard errno code on error.
1207 */
ip6_dst_lookup(struct net * net,struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1208 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1209 struct flowi6 *fl6)
1210 {
1211 *dst = NULL;
1212 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1213 }
1214 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1215
1216 /**
1217 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1218 * @sk: socket which provides route info
1219 * @fl6: flow to lookup
1220 * @final_dst: final destination address for ipsec lookup
1221 *
1222 * This function performs a route lookup on the given flow.
1223 *
1224 * It returns a valid dst pointer on success, or a pointer encoded
1225 * error code.
1226 */
ip6_dst_lookup_flow(struct net * net,const struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1227 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1228 const struct in6_addr *final_dst)
1229 {
1230 struct dst_entry *dst = NULL;
1231 int err;
1232
1233 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1234 if (err)
1235 return ERR_PTR(err);
1236 if (final_dst)
1237 fl6->daddr = *final_dst;
1238
1239 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1240 }
1241 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1242
1243 /**
1244 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1245 * @sk: socket which provides the dst cache and route info
1246 * @fl6: flow to lookup
1247 * @final_dst: final destination address for ipsec lookup
1248 * @connected: whether @sk is connected or not
1249 *
1250 * This function performs a route lookup on the given flow with the
1251 * possibility of using the cached route in the socket if it is valid.
1252 * It will take the socket dst lock when operating on the dst cache.
1253 * As a result, this function can only be used in process context.
1254 *
1255 * In addition, for a connected socket, cache the dst in the socket
1256 * if the current cache is not valid.
1257 *
1258 * It returns a valid dst pointer on success, or a pointer encoded
1259 * error code.
1260 */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool connected)1261 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1262 const struct in6_addr *final_dst,
1263 bool connected)
1264 {
1265 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1266
1267 dst = ip6_sk_dst_check(sk, dst, fl6);
1268 if (dst)
1269 return dst;
1270
1271 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1272 if (connected && !IS_ERR(dst))
1273 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1274
1275 return dst;
1276 }
1277 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1278
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1279 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1280 gfp_t gfp)
1281 {
1282 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1283 }
1284
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1285 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1286 gfp_t gfp)
1287 {
1288 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1289 }
1290
ip6_append_data_mtu(unsigned int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt,unsigned int orig_mtu)1291 static void ip6_append_data_mtu(unsigned int *mtu,
1292 int *maxfraglen,
1293 unsigned int fragheaderlen,
1294 struct sk_buff *skb,
1295 struct rt6_info *rt,
1296 unsigned int orig_mtu)
1297 {
1298 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1299 if (!skb) {
1300 /* first fragment, reserve header_len */
1301 *mtu = orig_mtu - rt->dst.header_len;
1302
1303 } else {
1304 /*
1305 * this fragment is not first, the headers
1306 * space is regarded as data space.
1307 */
1308 *mtu = orig_mtu;
1309 }
1310 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1311 + fragheaderlen - sizeof(struct frag_hdr);
1312 }
1313 }
1314
ip6_setup_cork(struct sock * sk,struct inet_cork_full * cork,struct inet6_cork * v6_cork,struct ipcm6_cookie * ipc6,struct rt6_info * rt,struct flowi6 * fl6)1315 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1316 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1317 struct rt6_info *rt, struct flowi6 *fl6)
1318 {
1319 struct ipv6_pinfo *np = inet6_sk(sk);
1320 unsigned int mtu;
1321 struct ipv6_txoptions *opt = ipc6->opt;
1322
1323 /*
1324 * setup for corking
1325 */
1326 if (opt) {
1327 if (WARN_ON(v6_cork->opt))
1328 return -EINVAL;
1329
1330 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1331 if (unlikely(!v6_cork->opt))
1332 return -ENOBUFS;
1333
1334 v6_cork->opt->tot_len = sizeof(*opt);
1335 v6_cork->opt->opt_flen = opt->opt_flen;
1336 v6_cork->opt->opt_nflen = opt->opt_nflen;
1337
1338 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1339 sk->sk_allocation);
1340 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1341 return -ENOBUFS;
1342
1343 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1344 sk->sk_allocation);
1345 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1346 return -ENOBUFS;
1347
1348 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1349 sk->sk_allocation);
1350 if (opt->hopopt && !v6_cork->opt->hopopt)
1351 return -ENOBUFS;
1352
1353 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1354 sk->sk_allocation);
1355 if (opt->srcrt && !v6_cork->opt->srcrt)
1356 return -ENOBUFS;
1357
1358 /* need source address above miyazawa*/
1359 }
1360 dst_hold(&rt->dst);
1361 cork->base.dst = &rt->dst;
1362 cork->fl.u.ip6 = *fl6;
1363 v6_cork->hop_limit = ipc6->hlimit;
1364 v6_cork->tclass = ipc6->tclass;
1365 if (rt->dst.flags & DST_XFRM_TUNNEL)
1366 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1367 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1368 else
1369 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1370 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1371 if (np->frag_size < mtu) {
1372 if (np->frag_size)
1373 mtu = np->frag_size;
1374 }
1375 cork->base.fragsize = mtu;
1376 cork->base.gso_size = ipc6->gso_size;
1377 cork->base.tx_flags = 0;
1378 cork->base.mark = ipc6->sockc.mark;
1379 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1380
1381 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1382 cork->base.flags |= IPCORK_ALLFRAG;
1383 cork->base.length = 0;
1384
1385 cork->base.transmit_time = ipc6->sockc.transmit_time;
1386
1387 return 0;
1388 }
1389
__ip6_append_data(struct sock * sk,struct flowi6 * fl6,struct sk_buff_head * queue,struct inet_cork * cork,struct inet6_cork * v6_cork,struct page_frag * pfrag,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,unsigned int flags,struct ipcm6_cookie * ipc6)1390 static int __ip6_append_data(struct sock *sk,
1391 struct flowi6 *fl6,
1392 struct sk_buff_head *queue,
1393 struct inet_cork *cork,
1394 struct inet6_cork *v6_cork,
1395 struct page_frag *pfrag,
1396 int getfrag(void *from, char *to, int offset,
1397 int len, int odd, struct sk_buff *skb),
1398 void *from, int length, int transhdrlen,
1399 unsigned int flags, struct ipcm6_cookie *ipc6)
1400 {
1401 struct sk_buff *skb, *skb_prev = NULL;
1402 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1403 struct ubuf_info *uarg = NULL;
1404 int exthdrlen = 0;
1405 int dst_exthdrlen = 0;
1406 int hh_len;
1407 int copy;
1408 int err;
1409 int offset = 0;
1410 u32 tskey = 0;
1411 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1412 struct ipv6_txoptions *opt = v6_cork->opt;
1413 int csummode = CHECKSUM_NONE;
1414 unsigned int maxnonfragsize, headersize;
1415 unsigned int wmem_alloc_delta = 0;
1416 bool paged, extra_uref = false;
1417
1418 skb = skb_peek_tail(queue);
1419 if (!skb) {
1420 exthdrlen = opt ? opt->opt_flen : 0;
1421 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1422 }
1423
1424 paged = !!cork->gso_size;
1425 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1426 orig_mtu = mtu;
1427
1428 if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
1429 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1430 tskey = sk->sk_tskey++;
1431
1432 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1433
1434 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1435 (opt ? opt->opt_nflen : 0);
1436
1437 headersize = sizeof(struct ipv6hdr) +
1438 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1439 (dst_allfrag(&rt->dst) ?
1440 sizeof(struct frag_hdr) : 0) +
1441 rt->rt6i_nfheader_len;
1442
1443 if (mtu <= fragheaderlen ||
1444 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1445 goto emsgsize;
1446
1447 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1448 sizeof(struct frag_hdr);
1449
1450 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1451 * the first fragment
1452 */
1453 if (headersize + transhdrlen > mtu)
1454 goto emsgsize;
1455
1456 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1457 (sk->sk_protocol == IPPROTO_UDP ||
1458 sk->sk_protocol == IPPROTO_RAW)) {
1459 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1460 sizeof(struct ipv6hdr));
1461 goto emsgsize;
1462 }
1463
1464 if (ip6_sk_ignore_df(sk))
1465 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1466 else
1467 maxnonfragsize = mtu;
1468
1469 if (cork->length + length > maxnonfragsize - headersize) {
1470 emsgsize:
1471 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1472 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1473 return -EMSGSIZE;
1474 }
1475
1476 /* CHECKSUM_PARTIAL only with no extension headers and when
1477 * we are not going to fragment
1478 */
1479 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1480 headersize == sizeof(struct ipv6hdr) &&
1481 length <= mtu - headersize &&
1482 (!(flags & MSG_MORE) || cork->gso_size) &&
1483 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1484 csummode = CHECKSUM_PARTIAL;
1485
1486 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1487 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1488 if (!uarg)
1489 return -ENOBUFS;
1490 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
1491 if (rt->dst.dev->features & NETIF_F_SG &&
1492 csummode == CHECKSUM_PARTIAL) {
1493 paged = true;
1494 } else {
1495 uarg->zerocopy = 0;
1496 skb_zcopy_set(skb, uarg, &extra_uref);
1497 }
1498 }
1499
1500 /*
1501 * Let's try using as much space as possible.
1502 * Use MTU if total length of the message fits into the MTU.
1503 * Otherwise, we need to reserve fragment header and
1504 * fragment alignment (= 8-15 octects, in total).
1505 *
1506 * Note that we may need to "move" the data from the tail of
1507 * of the buffer to the new fragment when we split
1508 * the message.
1509 *
1510 * FIXME: It may be fragmented into multiple chunks
1511 * at once if non-fragmentable extension headers
1512 * are too large.
1513 * --yoshfuji
1514 */
1515
1516 cork->length += length;
1517 if (!skb)
1518 goto alloc_new_skb;
1519
1520 while (length > 0) {
1521 /* Check if the remaining data fits into current packet. */
1522 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1523 if (copy < length)
1524 copy = maxfraglen - skb->len;
1525
1526 if (copy <= 0) {
1527 char *data;
1528 unsigned int datalen;
1529 unsigned int fraglen;
1530 unsigned int fraggap;
1531 unsigned int alloclen, alloc_extra;
1532 unsigned int pagedlen;
1533 alloc_new_skb:
1534 /* There's no room in the current skb */
1535 if (skb)
1536 fraggap = skb->len - maxfraglen;
1537 else
1538 fraggap = 0;
1539 /* update mtu and maxfraglen if necessary */
1540 if (!skb || !skb_prev)
1541 ip6_append_data_mtu(&mtu, &maxfraglen,
1542 fragheaderlen, skb, rt,
1543 orig_mtu);
1544
1545 skb_prev = skb;
1546
1547 /*
1548 * If remaining data exceeds the mtu,
1549 * we know we need more fragment(s).
1550 */
1551 datalen = length + fraggap;
1552
1553 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1554 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1555 fraglen = datalen + fragheaderlen;
1556 pagedlen = 0;
1557
1558 alloc_extra = hh_len;
1559 alloc_extra += dst_exthdrlen;
1560 alloc_extra += rt->dst.trailer_len;
1561
1562 /* We just reserve space for fragment header.
1563 * Note: this may be overallocation if the message
1564 * (without MSG_MORE) fits into the MTU.
1565 */
1566 alloc_extra += sizeof(struct frag_hdr);
1567
1568 if ((flags & MSG_MORE) &&
1569 !(rt->dst.dev->features&NETIF_F_SG))
1570 alloclen = mtu;
1571 else if (!paged &&
1572 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1573 !(rt->dst.dev->features & NETIF_F_SG)))
1574 alloclen = fraglen;
1575 else {
1576 alloclen = min_t(int, fraglen, MAX_HEADER);
1577 pagedlen = fraglen - alloclen;
1578 }
1579 alloclen += alloc_extra;
1580
1581 if (datalen != length + fraggap) {
1582 /*
1583 * this is not the last fragment, the trailer
1584 * space is regarded as data space.
1585 */
1586 datalen += rt->dst.trailer_len;
1587 }
1588
1589 fraglen = datalen + fragheaderlen;
1590
1591 copy = datalen - transhdrlen - fraggap - pagedlen;
1592 if (copy < 0) {
1593 err = -EINVAL;
1594 goto error;
1595 }
1596 if (transhdrlen) {
1597 skb = sock_alloc_send_skb(sk, alloclen,
1598 (flags & MSG_DONTWAIT), &err);
1599 } else {
1600 skb = NULL;
1601 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1602 2 * sk->sk_sndbuf)
1603 skb = alloc_skb(alloclen,
1604 sk->sk_allocation);
1605 if (unlikely(!skb))
1606 err = -ENOBUFS;
1607 }
1608 if (!skb)
1609 goto error;
1610 /*
1611 * Fill in the control structures
1612 */
1613 skb->protocol = htons(ETH_P_IPV6);
1614 skb->ip_summed = csummode;
1615 skb->csum = 0;
1616 /* reserve for fragmentation and ipsec header */
1617 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1618 dst_exthdrlen);
1619
1620 /*
1621 * Find where to start putting bytes
1622 */
1623 data = skb_put(skb, fraglen - pagedlen);
1624 skb_set_network_header(skb, exthdrlen);
1625 data += fragheaderlen;
1626 skb->transport_header = (skb->network_header +
1627 fragheaderlen);
1628 if (fraggap) {
1629 skb->csum = skb_copy_and_csum_bits(
1630 skb_prev, maxfraglen,
1631 data + transhdrlen, fraggap, 0);
1632 skb_prev->csum = csum_sub(skb_prev->csum,
1633 skb->csum);
1634 data += fraggap;
1635 pskb_trim_unique(skb_prev, maxfraglen);
1636 }
1637 if (copy > 0 &&
1638 getfrag(from, data + transhdrlen, offset,
1639 copy, fraggap, skb) < 0) {
1640 err = -EFAULT;
1641 kfree_skb(skb);
1642 goto error;
1643 }
1644
1645 offset += copy;
1646 length -= copy + transhdrlen;
1647 transhdrlen = 0;
1648 exthdrlen = 0;
1649 dst_exthdrlen = 0;
1650
1651 /* Only the initial fragment is time stamped */
1652 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1653 cork->tx_flags = 0;
1654 skb_shinfo(skb)->tskey = tskey;
1655 tskey = 0;
1656 skb_zcopy_set(skb, uarg, &extra_uref);
1657
1658 if ((flags & MSG_CONFIRM) && !skb_prev)
1659 skb_set_dst_pending_confirm(skb, 1);
1660
1661 /*
1662 * Put the packet on the pending queue
1663 */
1664 if (!skb->destructor) {
1665 skb->destructor = sock_wfree;
1666 skb->sk = sk;
1667 wmem_alloc_delta += skb->truesize;
1668 }
1669 __skb_queue_tail(queue, skb);
1670 continue;
1671 }
1672
1673 if (copy > length)
1674 copy = length;
1675
1676 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1677 skb_tailroom(skb) >= copy) {
1678 unsigned int off;
1679
1680 off = skb->len;
1681 if (getfrag(from, skb_put(skb, copy),
1682 offset, copy, off, skb) < 0) {
1683 __skb_trim(skb, off);
1684 err = -EFAULT;
1685 goto error;
1686 }
1687 } else if (!uarg || !uarg->zerocopy) {
1688 int i = skb_shinfo(skb)->nr_frags;
1689
1690 err = -ENOMEM;
1691 if (!sk_page_frag_refill(sk, pfrag))
1692 goto error;
1693
1694 if (!skb_can_coalesce(skb, i, pfrag->page,
1695 pfrag->offset)) {
1696 err = -EMSGSIZE;
1697 if (i == MAX_SKB_FRAGS)
1698 goto error;
1699
1700 __skb_fill_page_desc(skb, i, pfrag->page,
1701 pfrag->offset, 0);
1702 skb_shinfo(skb)->nr_frags = ++i;
1703 get_page(pfrag->page);
1704 }
1705 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1706 if (getfrag(from,
1707 page_address(pfrag->page) + pfrag->offset,
1708 offset, copy, skb->len, skb) < 0)
1709 goto error_efault;
1710
1711 pfrag->offset += copy;
1712 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1713 skb->len += copy;
1714 skb->data_len += copy;
1715 skb->truesize += copy;
1716 wmem_alloc_delta += copy;
1717 } else {
1718 err = skb_zerocopy_iter_dgram(skb, from, copy);
1719 if (err < 0)
1720 goto error;
1721 }
1722 offset += copy;
1723 length -= copy;
1724 }
1725
1726 if (wmem_alloc_delta)
1727 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1728 return 0;
1729
1730 error_efault:
1731 err = -EFAULT;
1732 error:
1733 if (uarg)
1734 sock_zerocopy_put_abort(uarg, extra_uref);
1735 cork->length -= length;
1736 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1737 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1738 return err;
1739 }
1740
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags)1741 int ip6_append_data(struct sock *sk,
1742 int getfrag(void *from, char *to, int offset, int len,
1743 int odd, struct sk_buff *skb),
1744 void *from, int length, int transhdrlen,
1745 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1746 struct rt6_info *rt, unsigned int flags)
1747 {
1748 struct inet_sock *inet = inet_sk(sk);
1749 struct ipv6_pinfo *np = inet6_sk(sk);
1750 int exthdrlen;
1751 int err;
1752
1753 if (flags&MSG_PROBE)
1754 return 0;
1755 if (skb_queue_empty(&sk->sk_write_queue)) {
1756 /*
1757 * setup for corking
1758 */
1759 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1760 ipc6, rt, fl6);
1761 if (err)
1762 return err;
1763
1764 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1765 length += exthdrlen;
1766 transhdrlen += exthdrlen;
1767 } else {
1768 fl6 = &inet->cork.fl.u.ip6;
1769 transhdrlen = 0;
1770 }
1771
1772 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1773 &np->cork, sk_page_frag(sk), getfrag,
1774 from, length, transhdrlen, flags, ipc6);
1775 }
1776 EXPORT_SYMBOL_GPL(ip6_append_data);
1777
ip6_cork_release(struct inet_cork_full * cork,struct inet6_cork * v6_cork)1778 static void ip6_cork_release(struct inet_cork_full *cork,
1779 struct inet6_cork *v6_cork)
1780 {
1781 if (v6_cork->opt) {
1782 kfree(v6_cork->opt->dst0opt);
1783 kfree(v6_cork->opt->dst1opt);
1784 kfree(v6_cork->opt->hopopt);
1785 kfree(v6_cork->opt->srcrt);
1786 kfree(v6_cork->opt);
1787 v6_cork->opt = NULL;
1788 }
1789
1790 if (cork->base.dst) {
1791 dst_release(cork->base.dst);
1792 cork->base.dst = NULL;
1793 cork->base.flags &= ~IPCORK_ALLFRAG;
1794 }
1795 memset(&cork->fl, 0, sizeof(cork->fl));
1796 }
1797
__ip6_make_skb(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1798 struct sk_buff *__ip6_make_skb(struct sock *sk,
1799 struct sk_buff_head *queue,
1800 struct inet_cork_full *cork,
1801 struct inet6_cork *v6_cork)
1802 {
1803 struct sk_buff *skb, *tmp_skb;
1804 struct sk_buff **tail_skb;
1805 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1806 struct ipv6_pinfo *np = inet6_sk(sk);
1807 struct net *net = sock_net(sk);
1808 struct ipv6hdr *hdr;
1809 struct ipv6_txoptions *opt = v6_cork->opt;
1810 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1811 struct flowi6 *fl6 = &cork->fl.u.ip6;
1812 unsigned char proto = fl6->flowi6_proto;
1813
1814 skb = __skb_dequeue(queue);
1815 if (!skb)
1816 goto out;
1817 tail_skb = &(skb_shinfo(skb)->frag_list);
1818
1819 /* move skb->data to ip header from ext header */
1820 if (skb->data < skb_network_header(skb))
1821 __skb_pull(skb, skb_network_offset(skb));
1822 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1823 __skb_pull(tmp_skb, skb_network_header_len(skb));
1824 *tail_skb = tmp_skb;
1825 tail_skb = &(tmp_skb->next);
1826 skb->len += tmp_skb->len;
1827 skb->data_len += tmp_skb->len;
1828 skb->truesize += tmp_skb->truesize;
1829 tmp_skb->destructor = NULL;
1830 tmp_skb->sk = NULL;
1831 }
1832
1833 /* Allow local fragmentation. */
1834 skb->ignore_df = ip6_sk_ignore_df(sk);
1835
1836 *final_dst = fl6->daddr;
1837 __skb_pull(skb, skb_network_header_len(skb));
1838 if (opt && opt->opt_flen)
1839 ipv6_push_frag_opts(skb, opt, &proto);
1840 if (opt && opt->opt_nflen)
1841 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1842
1843 skb_push(skb, sizeof(struct ipv6hdr));
1844 skb_reset_network_header(skb);
1845 hdr = ipv6_hdr(skb);
1846
1847 ip6_flow_hdr(hdr, v6_cork->tclass,
1848 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1849 ip6_autoflowlabel(net, np), fl6));
1850 hdr->hop_limit = v6_cork->hop_limit;
1851 hdr->nexthdr = proto;
1852 hdr->saddr = fl6->saddr;
1853 hdr->daddr = *final_dst;
1854
1855 skb->priority = sk->sk_priority;
1856 skb->mark = cork->base.mark;
1857
1858 skb->tstamp = cork->base.transmit_time;
1859
1860 skb_dst_set(skb, dst_clone(&rt->dst));
1861 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1862 if (proto == IPPROTO_ICMPV6) {
1863 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1864 u8 icmp6_type;
1865
1866 if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
1867 icmp6_type = fl6->fl6_icmp_type;
1868 else
1869 icmp6_type = icmp6_hdr(skb)->icmp6_type;
1870 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
1871 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1872 }
1873
1874 ip6_cork_release(cork, v6_cork);
1875 out:
1876 return skb;
1877 }
1878
ip6_send_skb(struct sk_buff * skb)1879 int ip6_send_skb(struct sk_buff *skb)
1880 {
1881 struct net *net = sock_net(skb->sk);
1882 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1883 int err;
1884
1885 err = ip6_local_out(net, skb->sk, skb);
1886 if (err) {
1887 if (err > 0)
1888 err = net_xmit_errno(err);
1889 if (err)
1890 IP6_INC_STATS(net, rt->rt6i_idev,
1891 IPSTATS_MIB_OUTDISCARDS);
1892 }
1893
1894 return err;
1895 }
1896
ip6_push_pending_frames(struct sock * sk)1897 int ip6_push_pending_frames(struct sock *sk)
1898 {
1899 struct sk_buff *skb;
1900
1901 skb = ip6_finish_skb(sk);
1902 if (!skb)
1903 return 0;
1904
1905 return ip6_send_skb(skb);
1906 }
1907 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1908
__ip6_flush_pending_frames(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1909 static void __ip6_flush_pending_frames(struct sock *sk,
1910 struct sk_buff_head *queue,
1911 struct inet_cork_full *cork,
1912 struct inet6_cork *v6_cork)
1913 {
1914 struct sk_buff *skb;
1915
1916 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1917 if (skb_dst(skb))
1918 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1919 IPSTATS_MIB_OUTDISCARDS);
1920 kfree_skb(skb);
1921 }
1922
1923 ip6_cork_release(cork, v6_cork);
1924 }
1925
ip6_flush_pending_frames(struct sock * sk)1926 void ip6_flush_pending_frames(struct sock *sk)
1927 {
1928 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1929 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1930 }
1931 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1932
ip6_make_skb(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,struct inet_cork_full * cork)1933 struct sk_buff *ip6_make_skb(struct sock *sk,
1934 int getfrag(void *from, char *to, int offset,
1935 int len, int odd, struct sk_buff *skb),
1936 void *from, int length, int transhdrlen,
1937 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1938 struct rt6_info *rt, unsigned int flags,
1939 struct inet_cork_full *cork)
1940 {
1941 struct inet6_cork v6_cork;
1942 struct sk_buff_head queue;
1943 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1944 int err;
1945
1946 if (flags & MSG_PROBE)
1947 return NULL;
1948
1949 __skb_queue_head_init(&queue);
1950
1951 cork->base.flags = 0;
1952 cork->base.addr = 0;
1953 cork->base.opt = NULL;
1954 cork->base.dst = NULL;
1955 v6_cork.opt = NULL;
1956 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1957 if (err) {
1958 ip6_cork_release(cork, &v6_cork);
1959 return ERR_PTR(err);
1960 }
1961 if (ipc6->dontfrag < 0)
1962 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1963
1964 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1965 ¤t->task_frag, getfrag, from,
1966 length + exthdrlen, transhdrlen + exthdrlen,
1967 flags, ipc6);
1968 if (err) {
1969 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1970 return ERR_PTR(err);
1971 }
1972
1973 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1974 }
1975