1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * IPv6 output functions
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on linux/net/ipv4/ip_output.c
10 *
11 * Changes:
12 * A.N.Kuznetsov : airthmetics in fragmentation.
13 * extension headers are implemented.
14 * route changes now work.
15 * ip6_forward does not confuse sniffers.
16 * etc.
17 *
18 * H. von Brand : Added missing #include <linux/string.h>
19 * Imran Patel : frag id should be in NBO
20 * Kazunori MIYAZAWA @USAGI
21 * : add ip6_append_data and related functions
22 * for datagram xmit
23 */
24
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41
42 #include <net/sock.h>
43 #include <net/snmp.h>
44
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58
ip6_finish_output2(struct net * net,struct sock * sk,struct sk_buff * skb)59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 unsigned int hh_len = LL_RESERVED_SPACE(dev);
64 int delta = hh_len - skb_headroom(skb);
65 const struct in6_addr *nexthop;
66 struct neighbour *neigh;
67 int ret;
68
69 /* Be paranoid, rather than too clever. */
70 if (unlikely(delta > 0) && dev->header_ops) {
71 /* pskb_expand_head() might crash, if skb is shared */
72 if (skb_shared(skb)) {
73 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
74
75 if (likely(nskb)) {
76 if (skb->sk)
77 skb_set_owner_w(nskb, skb->sk);
78 consume_skb(skb);
79 } else {
80 kfree_skb(skb);
81 }
82 skb = nskb;
83 }
84 if (skb &&
85 pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
86 kfree_skb(skb);
87 skb = NULL;
88 }
89 if (!skb) {
90 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
91 return -ENOMEM;
92 }
93 }
94
95 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
96 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
97
98 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
99 ((mroute6_is_socket(net, skb) &&
100 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
101 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
102 &ipv6_hdr(skb)->saddr))) {
103 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
104
105 /* Do not check for IFF_ALLMULTI; multicast routing
106 is not supported in any case.
107 */
108 if (newskb)
109 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
110 net, sk, newskb, NULL, newskb->dev,
111 dev_loopback_xmit);
112
113 if (ipv6_hdr(skb)->hop_limit == 0) {
114 IP6_INC_STATS(net, idev,
115 IPSTATS_MIB_OUTDISCARDS);
116 kfree_skb(skb);
117 return 0;
118 }
119 }
120
121 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
122
123 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
124 IPV6_ADDR_SCOPE_NODELOCAL &&
125 !(dev->flags & IFF_LOOPBACK)) {
126 kfree_skb(skb);
127 return 0;
128 }
129 }
130
131 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
132 int res = lwtunnel_xmit(skb);
133
134 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
135 return res;
136 }
137
138 rcu_read_lock_bh();
139 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
140 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
141 if (unlikely(!neigh))
142 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
143 if (!IS_ERR(neigh)) {
144 sock_confirm_neigh(skb, neigh);
145 ret = neigh_output(neigh, skb, false);
146 rcu_read_unlock_bh();
147 return ret;
148 }
149 rcu_read_unlock_bh();
150
151 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
152 kfree_skb(skb);
153 return -EINVAL;
154 }
155
156 static int
ip6_finish_output_gso_slowpath_drop(struct net * net,struct sock * sk,struct sk_buff * skb,unsigned int mtu)157 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
158 struct sk_buff *skb, unsigned int mtu)
159 {
160 struct sk_buff *segs, *nskb;
161 netdev_features_t features;
162 int ret = 0;
163
164 /* Please see corresponding comment in ip_finish_output_gso
165 * describing the cases where GSO segment length exceeds the
166 * egress MTU.
167 */
168 features = netif_skb_features(skb);
169 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
170 if (IS_ERR_OR_NULL(segs)) {
171 kfree_skb(skb);
172 return -ENOMEM;
173 }
174
175 consume_skb(skb);
176
177 skb_list_walk_safe(segs, segs, nskb) {
178 int err;
179
180 skb_mark_not_on_list(segs);
181 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
182 if (err && ret == 0)
183 ret = err;
184 }
185
186 return ret;
187 }
188
__ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)189 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
190 {
191 unsigned int mtu;
192
193 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
194 /* Policy lookup after SNAT yielded a new policy */
195 if (skb_dst(skb)->xfrm) {
196 IP6CB(skb)->flags |= IP6SKB_REROUTED;
197 return dst_output(net, sk, skb);
198 }
199 #endif
200
201 mtu = ip6_skb_dst_mtu(skb);
202 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
203 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
204
205 if ((skb->len > mtu && !skb_is_gso(skb)) ||
206 dst_allfrag(skb_dst(skb)) ||
207 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
208 return ip6_fragment(net, sk, skb, ip6_finish_output2);
209 else
210 return ip6_finish_output2(net, sk, skb);
211 }
212
ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)213 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
214 {
215 int ret;
216
217 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
218 switch (ret) {
219 case NET_XMIT_SUCCESS:
220 return __ip6_finish_output(net, sk, skb);
221 case NET_XMIT_CN:
222 return __ip6_finish_output(net, sk, skb) ? : ret;
223 default:
224 kfree_skb(skb);
225 return ret;
226 }
227 }
228
ip6_output(struct net * net,struct sock * sk,struct sk_buff * skb)229 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
230 {
231 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
232 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
233
234 skb->protocol = htons(ETH_P_IPV6);
235 skb->dev = dev;
236
237 if (unlikely(idev->cnf.disable_ipv6)) {
238 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
239 kfree_skb(skb);
240 return 0;
241 }
242
243 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
244 net, sk, skb, indev, dev,
245 ip6_finish_output,
246 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
247 }
248
ip6_autoflowlabel(struct net * net,const struct ipv6_pinfo * np)249 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
250 {
251 if (!np->autoflowlabel_set)
252 return ip6_default_np_autolabel(net);
253 else
254 return np->autoflowlabel;
255 }
256
257 /*
258 * xmit an sk_buff (used by TCP, SCTP and DCCP)
259 * Note : socket lock is not held for SYNACK packets, but might be modified
260 * by calls to skb_set_owner_w() and ipv6_local_error(),
261 * which are using proper atomic operations or spinlocks.
262 */
ip6_xmit(const struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,__u32 mark,struct ipv6_txoptions * opt,int tclass,u32 priority)263 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
264 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
265 {
266 struct net *net = sock_net(sk);
267 const struct ipv6_pinfo *np = inet6_sk(sk);
268 struct in6_addr *first_hop = &fl6->daddr;
269 struct dst_entry *dst = skb_dst(skb);
270 unsigned int head_room;
271 struct ipv6hdr *hdr;
272 u8 proto = fl6->flowi6_proto;
273 int seg_len = skb->len;
274 int hlimit = -1;
275 u32 mtu;
276
277 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
278 if (opt)
279 head_room += opt->opt_nflen + opt->opt_flen;
280
281 if (unlikely(skb_headroom(skb) < head_room)) {
282 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
283 if (!skb2) {
284 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
285 IPSTATS_MIB_OUTDISCARDS);
286 kfree_skb(skb);
287 return -ENOBUFS;
288 }
289 if (skb->sk)
290 skb_set_owner_w(skb2, skb->sk);
291 consume_skb(skb);
292 skb = skb2;
293 }
294
295 if (opt) {
296 seg_len += opt->opt_nflen + opt->opt_flen;
297
298 if (opt->opt_flen)
299 ipv6_push_frag_opts(skb, opt, &proto);
300
301 if (opt->opt_nflen)
302 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
303 &fl6->saddr);
304 }
305
306 skb_push(skb, sizeof(struct ipv6hdr));
307 skb_reset_network_header(skb);
308 hdr = ipv6_hdr(skb);
309
310 /*
311 * Fill in the IPv6 header
312 */
313 if (np)
314 hlimit = np->hop_limit;
315 if (hlimit < 0)
316 hlimit = ip6_dst_hoplimit(dst);
317
318 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
319 ip6_autoflowlabel(net, np), fl6));
320
321 hdr->payload_len = htons(seg_len);
322 hdr->nexthdr = proto;
323 hdr->hop_limit = hlimit;
324
325 hdr->saddr = fl6->saddr;
326 hdr->daddr = *first_hop;
327
328 skb->protocol = htons(ETH_P_IPV6);
329 skb->priority = priority;
330 skb->mark = mark;
331
332 mtu = dst_mtu(dst);
333 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
334 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
335 IPSTATS_MIB_OUT, skb->len);
336
337 /* if egress device is enslaved to an L3 master device pass the
338 * skb to its handler for processing
339 */
340 skb = l3mdev_ip6_out((struct sock *)sk, skb);
341 if (unlikely(!skb))
342 return 0;
343
344 /* hooks should never assume socket lock is held.
345 * we promote our socket to non const
346 */
347 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
348 net, (struct sock *)sk, skb, NULL, dst->dev,
349 dst_output);
350 }
351
352 skb->dev = dst->dev;
353 /* ipv6_local_error() does not require socket lock,
354 * we promote our socket to non const
355 */
356 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
357
358 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
359 kfree_skb(skb);
360 return -EMSGSIZE;
361 }
362 EXPORT_SYMBOL(ip6_xmit);
363
ip6_call_ra_chain(struct sk_buff * skb,int sel)364 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
365 {
366 struct ip6_ra_chain *ra;
367 struct sock *last = NULL;
368
369 read_lock(&ip6_ra_lock);
370 for (ra = ip6_ra_chain; ra; ra = ra->next) {
371 struct sock *sk = ra->sk;
372 if (sk && ra->sel == sel &&
373 (!sk->sk_bound_dev_if ||
374 sk->sk_bound_dev_if == skb->dev->ifindex)) {
375 struct ipv6_pinfo *np = inet6_sk(sk);
376
377 if (np && np->rtalert_isolate &&
378 !net_eq(sock_net(sk), dev_net(skb->dev))) {
379 continue;
380 }
381 if (last) {
382 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
383 if (skb2)
384 rawv6_rcv(last, skb2);
385 }
386 last = sk;
387 }
388 }
389
390 if (last) {
391 rawv6_rcv(last, skb);
392 read_unlock(&ip6_ra_lock);
393 return 1;
394 }
395 read_unlock(&ip6_ra_lock);
396 return 0;
397 }
398
ip6_forward_proxy_check(struct sk_buff * skb)399 static int ip6_forward_proxy_check(struct sk_buff *skb)
400 {
401 struct ipv6hdr *hdr = ipv6_hdr(skb);
402 u8 nexthdr = hdr->nexthdr;
403 __be16 frag_off;
404 int offset;
405
406 if (ipv6_ext_hdr(nexthdr)) {
407 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
408 if (offset < 0)
409 return 0;
410 } else
411 offset = sizeof(struct ipv6hdr);
412
413 if (nexthdr == IPPROTO_ICMPV6) {
414 struct icmp6hdr *icmp6;
415
416 if (!pskb_may_pull(skb, (skb_network_header(skb) +
417 offset + 1 - skb->data)))
418 return 0;
419
420 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
421
422 switch (icmp6->icmp6_type) {
423 case NDISC_ROUTER_SOLICITATION:
424 case NDISC_ROUTER_ADVERTISEMENT:
425 case NDISC_NEIGHBOUR_SOLICITATION:
426 case NDISC_NEIGHBOUR_ADVERTISEMENT:
427 case NDISC_REDIRECT:
428 /* For reaction involving unicast neighbor discovery
429 * message destined to the proxied address, pass it to
430 * input function.
431 */
432 return 1;
433 default:
434 break;
435 }
436 }
437
438 /*
439 * The proxying router can't forward traffic sent to a link-local
440 * address, so signal the sender and discard the packet. This
441 * behavior is clarified by the MIPv6 specification.
442 */
443 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
444 dst_link_failure(skb);
445 return -1;
446 }
447
448 return 0;
449 }
450
ip6_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)451 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
452 struct sk_buff *skb)
453 {
454 struct dst_entry *dst = skb_dst(skb);
455
456 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
457 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
458
459 #ifdef CONFIG_NET_SWITCHDEV
460 if (skb->offload_l3_fwd_mark) {
461 consume_skb(skb);
462 return 0;
463 }
464 #endif
465
466 skb->tstamp = 0;
467 return dst_output(net, sk, skb);
468 }
469
ip6_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)470 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
471 {
472 if (skb->len <= mtu)
473 return false;
474
475 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
476 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
477 return true;
478
479 if (skb->ignore_df)
480 return false;
481
482 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
483 return false;
484
485 return true;
486 }
487
ip6_forward(struct sk_buff * skb)488 int ip6_forward(struct sk_buff *skb)
489 {
490 struct dst_entry *dst = skb_dst(skb);
491 struct ipv6hdr *hdr = ipv6_hdr(skb);
492 struct inet6_skb_parm *opt = IP6CB(skb);
493 struct net *net = dev_net(dst->dev);
494 struct inet6_dev *idev;
495 u32 mtu;
496
497 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
498 if (net->ipv6.devconf_all->forwarding == 0)
499 goto error;
500
501 if (skb->pkt_type != PACKET_HOST)
502 goto drop;
503
504 if (unlikely(skb->sk))
505 goto drop;
506
507 if (skb_warn_if_lro(skb))
508 goto drop;
509
510 if (!net->ipv6.devconf_all->disable_policy &&
511 (!idev || !idev->cnf.disable_policy) &&
512 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
513 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
514 goto drop;
515 }
516
517 skb_forward_csum(skb);
518
519 /*
520 * We DO NOT make any processing on
521 * RA packets, pushing them to user level AS IS
522 * without ane WARRANTY that application will be able
523 * to interpret them. The reason is that we
524 * cannot make anything clever here.
525 *
526 * We are not end-node, so that if packet contains
527 * AH/ESP, we cannot make anything.
528 * Defragmentation also would be mistake, RA packets
529 * cannot be fragmented, because there is no warranty
530 * that different fragments will go along one path. --ANK
531 */
532 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
533 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
534 return 0;
535 }
536
537 /*
538 * check and decrement ttl
539 */
540 if (hdr->hop_limit <= 1) {
541 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
542 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
543
544 kfree_skb(skb);
545 return -ETIMEDOUT;
546 }
547
548 /* XXX: idev->cnf.proxy_ndp? */
549 if (net->ipv6.devconf_all->proxy_ndp &&
550 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
551 int proxied = ip6_forward_proxy_check(skb);
552 if (proxied > 0)
553 return ip6_input(skb);
554 else if (proxied < 0) {
555 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
556 goto drop;
557 }
558 }
559
560 if (!xfrm6_route_forward(skb)) {
561 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
562 goto drop;
563 }
564 dst = skb_dst(skb);
565
566 /* IPv6 specs say nothing about it, but it is clear that we cannot
567 send redirects to source routed frames.
568 We don't send redirects to frames decapsulated from IPsec.
569 */
570 if (IP6CB(skb)->iif == dst->dev->ifindex &&
571 opt->srcrt == 0 && !skb_sec_path(skb)) {
572 struct in6_addr *target = NULL;
573 struct inet_peer *peer;
574 struct rt6_info *rt;
575
576 /*
577 * incoming and outgoing devices are the same
578 * send a redirect.
579 */
580
581 rt = (struct rt6_info *) dst;
582 if (rt->rt6i_flags & RTF_GATEWAY)
583 target = &rt->rt6i_gateway;
584 else
585 target = &hdr->daddr;
586
587 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
588
589 /* Limit redirects both by destination (here)
590 and by source (inside ndisc_send_redirect)
591 */
592 if (inet_peer_xrlim_allow(peer, 1*HZ))
593 ndisc_send_redirect(skb, target);
594 if (peer)
595 inet_putpeer(peer);
596 } else {
597 int addrtype = ipv6_addr_type(&hdr->saddr);
598
599 /* This check is security critical. */
600 if (addrtype == IPV6_ADDR_ANY ||
601 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
602 goto error;
603 if (addrtype & IPV6_ADDR_LINKLOCAL) {
604 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
605 ICMPV6_NOT_NEIGHBOUR, 0);
606 goto error;
607 }
608 }
609
610 mtu = ip6_dst_mtu_forward(dst);
611 if (mtu < IPV6_MIN_MTU)
612 mtu = IPV6_MIN_MTU;
613
614 if (ip6_pkt_too_big(skb, mtu)) {
615 /* Again, force OUTPUT device used as source address */
616 skb->dev = dst->dev;
617 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
618 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
619 __IP6_INC_STATS(net, ip6_dst_idev(dst),
620 IPSTATS_MIB_FRAGFAILS);
621 kfree_skb(skb);
622 return -EMSGSIZE;
623 }
624
625 if (skb_cow(skb, dst->dev->hard_header_len)) {
626 __IP6_INC_STATS(net, ip6_dst_idev(dst),
627 IPSTATS_MIB_OUTDISCARDS);
628 goto drop;
629 }
630
631 hdr = ipv6_hdr(skb);
632
633 /* Mangling hops number delayed to point after skb COW */
634
635 hdr->hop_limit--;
636
637 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
638 net, NULL, skb, skb->dev, dst->dev,
639 ip6_forward_finish);
640
641 error:
642 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
643 drop:
644 kfree_skb(skb);
645 return -EINVAL;
646 }
647
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)648 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
649 {
650 to->pkt_type = from->pkt_type;
651 to->priority = from->priority;
652 to->protocol = from->protocol;
653 skb_dst_drop(to);
654 skb_dst_set(to, dst_clone(skb_dst(from)));
655 to->dev = from->dev;
656 to->mark = from->mark;
657
658 skb_copy_hash(to, from);
659
660 #ifdef CONFIG_NET_SCHED
661 to->tc_index = from->tc_index;
662 #endif
663 nf_copy(to, from);
664 skb_ext_copy(to, from);
665 skb_copy_secmark(to, from);
666 }
667
ip6_fraglist_init(struct sk_buff * skb,unsigned int hlen,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_fraglist_iter * iter)668 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
669 u8 nexthdr, __be32 frag_id,
670 struct ip6_fraglist_iter *iter)
671 {
672 unsigned int first_len;
673 struct frag_hdr *fh;
674
675 /* BUILD HEADER */
676 *prevhdr = NEXTHDR_FRAGMENT;
677 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
678 if (!iter->tmp_hdr)
679 return -ENOMEM;
680
681 iter->frag = skb_shinfo(skb)->frag_list;
682 skb_frag_list_init(skb);
683
684 iter->offset = 0;
685 iter->hlen = hlen;
686 iter->frag_id = frag_id;
687 iter->nexthdr = nexthdr;
688
689 __skb_pull(skb, hlen);
690 fh = __skb_push(skb, sizeof(struct frag_hdr));
691 __skb_push(skb, hlen);
692 skb_reset_network_header(skb);
693 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
694
695 fh->nexthdr = nexthdr;
696 fh->reserved = 0;
697 fh->frag_off = htons(IP6_MF);
698 fh->identification = frag_id;
699
700 first_len = skb_pagelen(skb);
701 skb->data_len = first_len - skb_headlen(skb);
702 skb->len = first_len;
703 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
704
705 return 0;
706 }
707 EXPORT_SYMBOL(ip6_fraglist_init);
708
ip6_fraglist_prepare(struct sk_buff * skb,struct ip6_fraglist_iter * iter)709 void ip6_fraglist_prepare(struct sk_buff *skb,
710 struct ip6_fraglist_iter *iter)
711 {
712 struct sk_buff *frag = iter->frag;
713 unsigned int hlen = iter->hlen;
714 struct frag_hdr *fh;
715
716 frag->ip_summed = CHECKSUM_NONE;
717 skb_reset_transport_header(frag);
718 fh = __skb_push(frag, sizeof(struct frag_hdr));
719 __skb_push(frag, hlen);
720 skb_reset_network_header(frag);
721 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
722 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
723 fh->nexthdr = iter->nexthdr;
724 fh->reserved = 0;
725 fh->frag_off = htons(iter->offset);
726 if (frag->next)
727 fh->frag_off |= htons(IP6_MF);
728 fh->identification = iter->frag_id;
729 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
730 ip6_copy_metadata(frag, skb);
731 }
732 EXPORT_SYMBOL(ip6_fraglist_prepare);
733
ip6_frag_init(struct sk_buff * skb,unsigned int hlen,unsigned int mtu,unsigned short needed_tailroom,int hdr_room,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_frag_state * state)734 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
735 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
736 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
737 {
738 state->prevhdr = prevhdr;
739 state->nexthdr = nexthdr;
740 state->frag_id = frag_id;
741
742 state->hlen = hlen;
743 state->mtu = mtu;
744
745 state->left = skb->len - hlen; /* Space per frame */
746 state->ptr = hlen; /* Where to start from */
747
748 state->hroom = hdr_room;
749 state->troom = needed_tailroom;
750
751 state->offset = 0;
752 }
753 EXPORT_SYMBOL(ip6_frag_init);
754
ip6_frag_next(struct sk_buff * skb,struct ip6_frag_state * state)755 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
756 {
757 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
758 struct sk_buff *frag;
759 struct frag_hdr *fh;
760 unsigned int len;
761
762 len = state->left;
763 /* IF: it doesn't fit, use 'mtu' - the data space left */
764 if (len > state->mtu)
765 len = state->mtu;
766 /* IF: we are not sending up to and including the packet end
767 then align the next start on an eight byte boundary */
768 if (len < state->left)
769 len &= ~7;
770
771 /* Allocate buffer */
772 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
773 state->hroom + state->troom, GFP_ATOMIC);
774 if (!frag)
775 return ERR_PTR(-ENOMEM);
776
777 /*
778 * Set up data on packet
779 */
780
781 ip6_copy_metadata(frag, skb);
782 skb_reserve(frag, state->hroom);
783 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
784 skb_reset_network_header(frag);
785 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
786 frag->transport_header = (frag->network_header + state->hlen +
787 sizeof(struct frag_hdr));
788
789 /*
790 * Charge the memory for the fragment to any owner
791 * it might possess
792 */
793 if (skb->sk)
794 skb_set_owner_w(frag, skb->sk);
795
796 /*
797 * Copy the packet header into the new buffer.
798 */
799 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
800
801 fragnexthdr_offset = skb_network_header(frag);
802 fragnexthdr_offset += prevhdr - skb_network_header(skb);
803 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
804
805 /*
806 * Build fragment header.
807 */
808 fh->nexthdr = state->nexthdr;
809 fh->reserved = 0;
810 fh->identification = state->frag_id;
811
812 /*
813 * Copy a block of the IP datagram.
814 */
815 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
816 len));
817 state->left -= len;
818
819 fh->frag_off = htons(state->offset);
820 if (state->left > 0)
821 fh->frag_off |= htons(IP6_MF);
822 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
823
824 state->ptr += len;
825 state->offset += len;
826
827 return frag;
828 }
829 EXPORT_SYMBOL(ip6_frag_next);
830
ip6_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,int (* output)(struct net *,struct sock *,struct sk_buff *))831 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
832 int (*output)(struct net *, struct sock *, struct sk_buff *))
833 {
834 struct sk_buff *frag;
835 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
836 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
837 inet6_sk(skb->sk) : NULL;
838 struct ip6_frag_state state;
839 unsigned int mtu, hlen, nexthdr_offset;
840 ktime_t tstamp = skb->tstamp;
841 int hroom, err = 0;
842 __be32 frag_id;
843 u8 *prevhdr, nexthdr = 0;
844
845 err = ip6_find_1stfragopt(skb, &prevhdr);
846 if (err < 0)
847 goto fail;
848 hlen = err;
849 nexthdr = *prevhdr;
850 nexthdr_offset = prevhdr - skb_network_header(skb);
851
852 mtu = ip6_skb_dst_mtu(skb);
853
854 /* We must not fragment if the socket is set to force MTU discovery
855 * or if the skb it not generated by a local socket.
856 */
857 if (unlikely(!skb->ignore_df && skb->len > mtu))
858 goto fail_toobig;
859
860 if (IP6CB(skb)->frag_max_size) {
861 if (IP6CB(skb)->frag_max_size > mtu)
862 goto fail_toobig;
863
864 /* don't send fragments larger than what we received */
865 mtu = IP6CB(skb)->frag_max_size;
866 if (mtu < IPV6_MIN_MTU)
867 mtu = IPV6_MIN_MTU;
868 }
869
870 if (np && np->frag_size < mtu) {
871 if (np->frag_size)
872 mtu = np->frag_size;
873 }
874 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
875 goto fail_toobig;
876 mtu -= hlen + sizeof(struct frag_hdr);
877
878 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
879 &ipv6_hdr(skb)->saddr);
880
881 if (skb->ip_summed == CHECKSUM_PARTIAL &&
882 (err = skb_checksum_help(skb)))
883 goto fail;
884
885 prevhdr = skb_network_header(skb) + nexthdr_offset;
886 hroom = LL_RESERVED_SPACE(rt->dst.dev);
887 if (skb_has_frag_list(skb)) {
888 unsigned int first_len = skb_pagelen(skb);
889 struct ip6_fraglist_iter iter;
890 struct sk_buff *frag2;
891
892 if (first_len - hlen > mtu ||
893 ((first_len - hlen) & 7) ||
894 skb_cloned(skb) ||
895 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
896 goto slow_path;
897
898 skb_walk_frags(skb, frag) {
899 /* Correct geometry. */
900 if (frag->len > mtu ||
901 ((frag->len & 7) && frag->next) ||
902 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
903 goto slow_path_clean;
904
905 /* Partially cloned skb? */
906 if (skb_shared(frag))
907 goto slow_path_clean;
908
909 BUG_ON(frag->sk);
910 if (skb->sk) {
911 frag->sk = skb->sk;
912 frag->destructor = sock_wfree;
913 }
914 skb->truesize -= frag->truesize;
915 }
916
917 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
918 &iter);
919 if (err < 0)
920 goto fail;
921
922 /* We prevent @rt from being freed. */
923 rcu_read_lock();
924
925 for (;;) {
926 /* Prepare header of the next frame,
927 * before previous one went down. */
928 if (iter.frag)
929 ip6_fraglist_prepare(skb, &iter);
930
931 skb->tstamp = tstamp;
932 err = output(net, sk, skb);
933 if (!err)
934 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
935 IPSTATS_MIB_FRAGCREATES);
936
937 if (err || !iter.frag)
938 break;
939
940 skb = ip6_fraglist_next(&iter);
941 }
942
943 kfree(iter.tmp_hdr);
944
945 if (err == 0) {
946 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
947 IPSTATS_MIB_FRAGOKS);
948 rcu_read_unlock();
949 return 0;
950 }
951
952 kfree_skb_list(iter.frag);
953
954 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
955 IPSTATS_MIB_FRAGFAILS);
956 rcu_read_unlock();
957 return err;
958
959 slow_path_clean:
960 skb_walk_frags(skb, frag2) {
961 if (frag2 == frag)
962 break;
963 frag2->sk = NULL;
964 frag2->destructor = NULL;
965 skb->truesize += frag2->truesize;
966 }
967 }
968
969 slow_path:
970 /*
971 * Fragment the datagram.
972 */
973
974 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
975 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
976 &state);
977
978 /*
979 * Keep copying data until we run out.
980 */
981
982 while (state.left > 0) {
983 frag = ip6_frag_next(skb, &state);
984 if (IS_ERR(frag)) {
985 err = PTR_ERR(frag);
986 goto fail;
987 }
988
989 /*
990 * Put this fragment into the sending queue.
991 */
992 frag->tstamp = tstamp;
993 err = output(net, sk, frag);
994 if (err)
995 goto fail;
996
997 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
998 IPSTATS_MIB_FRAGCREATES);
999 }
1000 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1001 IPSTATS_MIB_FRAGOKS);
1002 consume_skb(skb);
1003 return err;
1004
1005 fail_toobig:
1006 if (skb->sk && dst_allfrag(skb_dst(skb)))
1007 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
1008
1009 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1010 err = -EMSGSIZE;
1011
1012 fail:
1013 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1014 IPSTATS_MIB_FRAGFAILS);
1015 kfree_skb(skb);
1016 return err;
1017 }
1018
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)1019 static inline int ip6_rt_check(const struct rt6key *rt_key,
1020 const struct in6_addr *fl_addr,
1021 const struct in6_addr *addr_cache)
1022 {
1023 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1024 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1025 }
1026
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)1027 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1028 struct dst_entry *dst,
1029 const struct flowi6 *fl6)
1030 {
1031 struct ipv6_pinfo *np = inet6_sk(sk);
1032 struct rt6_info *rt;
1033
1034 if (!dst)
1035 goto out;
1036
1037 if (dst->ops->family != AF_INET6) {
1038 dst_release(dst);
1039 return NULL;
1040 }
1041
1042 rt = (struct rt6_info *)dst;
1043 /* Yes, checking route validity in not connected
1044 * case is not very simple. Take into account,
1045 * that we do not support routing by source, TOS,
1046 * and MSG_DONTROUTE --ANK (980726)
1047 *
1048 * 1. ip6_rt_check(): If route was host route,
1049 * check that cached destination is current.
1050 * If it is network route, we still may
1051 * check its validity using saved pointer
1052 * to the last used address: daddr_cache.
1053 * We do not want to save whole address now,
1054 * (because main consumer of this service
1055 * is tcp, which has not this problem),
1056 * so that the last trick works only on connected
1057 * sockets.
1058 * 2. oif also should be the same.
1059 */
1060 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1061 #ifdef CONFIG_IPV6_SUBTREES
1062 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1063 #endif
1064 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1065 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1066 dst_release(dst);
1067 dst = NULL;
1068 }
1069
1070 out:
1071 return dst;
1072 }
1073
ip6_dst_lookup_tail(struct net * net,const struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1074 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1075 struct dst_entry **dst, struct flowi6 *fl6)
1076 {
1077 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1078 struct neighbour *n;
1079 struct rt6_info *rt;
1080 #endif
1081 int err;
1082 int flags = 0;
1083
1084 /* The correct way to handle this would be to do
1085 * ip6_route_get_saddr, and then ip6_route_output; however,
1086 * the route-specific preferred source forces the
1087 * ip6_route_output call _before_ ip6_route_get_saddr.
1088 *
1089 * In source specific routing (no src=any default route),
1090 * ip6_route_output will fail given src=any saddr, though, so
1091 * that's why we try it again later.
1092 */
1093 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1094 struct fib6_info *from;
1095 struct rt6_info *rt;
1096 bool had_dst = *dst != NULL;
1097
1098 if (!had_dst)
1099 *dst = ip6_route_output(net, sk, fl6);
1100 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1101
1102 rcu_read_lock();
1103 from = rt ? rcu_dereference(rt->from) : NULL;
1104 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1105 sk ? inet6_sk(sk)->srcprefs : 0,
1106 &fl6->saddr);
1107 rcu_read_unlock();
1108
1109 if (err)
1110 goto out_err_release;
1111
1112 /* If we had an erroneous initial result, pretend it
1113 * never existed and let the SA-enabled version take
1114 * over.
1115 */
1116 if (!had_dst && (*dst)->error) {
1117 dst_release(*dst);
1118 *dst = NULL;
1119 }
1120
1121 if (fl6->flowi6_oif)
1122 flags |= RT6_LOOKUP_F_IFACE;
1123 }
1124
1125 if (!*dst)
1126 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1127
1128 err = (*dst)->error;
1129 if (err)
1130 goto out_err_release;
1131
1132 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1133 /*
1134 * Here if the dst entry we've looked up
1135 * has a neighbour entry that is in the INCOMPLETE
1136 * state and the src address from the flow is
1137 * marked as OPTIMISTIC, we release the found
1138 * dst entry and replace it instead with the
1139 * dst entry of the nexthop router
1140 */
1141 rt = (struct rt6_info *) *dst;
1142 rcu_read_lock_bh();
1143 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1144 rt6_nexthop(rt, &fl6->daddr));
1145 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1146 rcu_read_unlock_bh();
1147
1148 if (err) {
1149 struct inet6_ifaddr *ifp;
1150 struct flowi6 fl_gw6;
1151 int redirect;
1152
1153 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1154 (*dst)->dev, 1);
1155
1156 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1157 if (ifp)
1158 in6_ifa_put(ifp);
1159
1160 if (redirect) {
1161 /*
1162 * We need to get the dst entry for the
1163 * default router instead
1164 */
1165 dst_release(*dst);
1166 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1167 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1168 *dst = ip6_route_output(net, sk, &fl_gw6);
1169 err = (*dst)->error;
1170 if (err)
1171 goto out_err_release;
1172 }
1173 }
1174 #endif
1175 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1176 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1177 err = -EAFNOSUPPORT;
1178 goto out_err_release;
1179 }
1180
1181 return 0;
1182
1183 out_err_release:
1184 dst_release(*dst);
1185 *dst = NULL;
1186
1187 if (err == -ENETUNREACH)
1188 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1189 return err;
1190 }
1191
1192 /**
1193 * ip6_dst_lookup - perform route lookup on flow
1194 * @net: Network namespace to perform lookup in
1195 * @sk: socket which provides route info
1196 * @dst: pointer to dst_entry * for result
1197 * @fl6: flow to lookup
1198 *
1199 * This function performs a route lookup on the given flow.
1200 *
1201 * It returns zero on success, or a standard errno code on error.
1202 */
ip6_dst_lookup(struct net * net,struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1203 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1204 struct flowi6 *fl6)
1205 {
1206 *dst = NULL;
1207 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1208 }
1209 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1210
1211 /**
1212 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1213 * @net: Network namespace to perform lookup in
1214 * @sk: socket which provides route info
1215 * @fl6: flow to lookup
1216 * @final_dst: final destination address for ipsec lookup
1217 *
1218 * This function performs a route lookup on the given flow.
1219 *
1220 * It returns a valid dst pointer on success, or a pointer encoded
1221 * error code.
1222 */
ip6_dst_lookup_flow(struct net * net,const struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1223 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1224 const struct in6_addr *final_dst)
1225 {
1226 struct dst_entry *dst = NULL;
1227 int err;
1228
1229 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1230 if (err)
1231 return ERR_PTR(err);
1232 if (final_dst)
1233 fl6->daddr = *final_dst;
1234
1235 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1236 }
1237 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1238
1239 /**
1240 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1241 * @sk: socket which provides the dst cache and route info
1242 * @fl6: flow to lookup
1243 * @final_dst: final destination address for ipsec lookup
1244 * @connected: whether @sk is connected or not
1245 *
1246 * This function performs a route lookup on the given flow with the
1247 * possibility of using the cached route in the socket if it is valid.
1248 * It will take the socket dst lock when operating on the dst cache.
1249 * As a result, this function can only be used in process context.
1250 *
1251 * In addition, for a connected socket, cache the dst in the socket
1252 * if the current cache is not valid.
1253 *
1254 * It returns a valid dst pointer on success, or a pointer encoded
1255 * error code.
1256 */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool connected)1257 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1258 const struct in6_addr *final_dst,
1259 bool connected)
1260 {
1261 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1262
1263 dst = ip6_sk_dst_check(sk, dst, fl6);
1264 if (dst)
1265 return dst;
1266
1267 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1268 if (connected && !IS_ERR(dst))
1269 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1270
1271 return dst;
1272 }
1273 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1274
1275 /**
1276 * ip6_dst_lookup_tunnel - perform route lookup on tunnel
1277 * @skb: Packet for which lookup is done
1278 * @dev: Tunnel device
1279 * @net: Network namespace of tunnel device
1280 * @sock: Socket which provides route info
1281 * @saddr: Memory to store the src ip address
1282 * @info: Tunnel information
1283 * @protocol: IP protocol
1284 * @use_cache: Flag to enable cache usage
1285 * This function performs a route lookup on a tunnel
1286 *
1287 * It returns a valid dst pointer and stores src address to be used in
1288 * tunnel in param saddr on success, else a pointer encoded error code.
1289 */
1290
ip6_dst_lookup_tunnel(struct sk_buff * skb,struct net_device * dev,struct net * net,struct socket * sock,struct in6_addr * saddr,const struct ip_tunnel_info * info,u8 protocol,bool use_cache)1291 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1292 struct net_device *dev,
1293 struct net *net,
1294 struct socket *sock,
1295 struct in6_addr *saddr,
1296 const struct ip_tunnel_info *info,
1297 u8 protocol,
1298 bool use_cache)
1299 {
1300 struct dst_entry *dst = NULL;
1301 #ifdef CONFIG_DST_CACHE
1302 struct dst_cache *dst_cache;
1303 #endif
1304 struct flowi6 fl6;
1305 __u8 prio;
1306
1307 #ifdef CONFIG_DST_CACHE
1308 dst_cache = (struct dst_cache *)&info->dst_cache;
1309 if (use_cache) {
1310 dst = dst_cache_get_ip6(dst_cache, saddr);
1311 if (dst)
1312 return dst;
1313 }
1314 #endif
1315 memset(&fl6, 0, sizeof(fl6));
1316 fl6.flowi6_mark = skb->mark;
1317 fl6.flowi6_proto = protocol;
1318 fl6.daddr = info->key.u.ipv6.dst;
1319 fl6.saddr = info->key.u.ipv6.src;
1320 prio = info->key.tos;
1321 fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
1322
1323 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1324 NULL);
1325 if (IS_ERR(dst)) {
1326 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1327 return ERR_PTR(-ENETUNREACH);
1328 }
1329 if (dst->dev == dev) { /* is this necessary? */
1330 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1331 dst_release(dst);
1332 return ERR_PTR(-ELOOP);
1333 }
1334 #ifdef CONFIG_DST_CACHE
1335 if (use_cache)
1336 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1337 #endif
1338 *saddr = fl6.saddr;
1339 return dst;
1340 }
1341 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1342
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1343 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1344 gfp_t gfp)
1345 {
1346 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1347 }
1348
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1349 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1350 gfp_t gfp)
1351 {
1352 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1353 }
1354
ip6_append_data_mtu(unsigned int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt,unsigned int orig_mtu)1355 static void ip6_append_data_mtu(unsigned int *mtu,
1356 int *maxfraglen,
1357 unsigned int fragheaderlen,
1358 struct sk_buff *skb,
1359 struct rt6_info *rt,
1360 unsigned int orig_mtu)
1361 {
1362 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1363 if (!skb) {
1364 /* first fragment, reserve header_len */
1365 *mtu = orig_mtu - rt->dst.header_len;
1366
1367 } else {
1368 /*
1369 * this fragment is not first, the headers
1370 * space is regarded as data space.
1371 */
1372 *mtu = orig_mtu;
1373 }
1374 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1375 + fragheaderlen - sizeof(struct frag_hdr);
1376 }
1377 }
1378
ip6_setup_cork(struct sock * sk,struct inet_cork_full * cork,struct inet6_cork * v6_cork,struct ipcm6_cookie * ipc6,struct rt6_info * rt,struct flowi6 * fl6)1379 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1380 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1381 struct rt6_info *rt, struct flowi6 *fl6)
1382 {
1383 struct ipv6_pinfo *np = inet6_sk(sk);
1384 unsigned int mtu;
1385 struct ipv6_txoptions *opt = ipc6->opt;
1386
1387 /*
1388 * setup for corking
1389 */
1390 if (opt) {
1391 if (WARN_ON(v6_cork->opt))
1392 return -EINVAL;
1393
1394 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1395 if (unlikely(!v6_cork->opt))
1396 return -ENOBUFS;
1397
1398 v6_cork->opt->tot_len = sizeof(*opt);
1399 v6_cork->opt->opt_flen = opt->opt_flen;
1400 v6_cork->opt->opt_nflen = opt->opt_nflen;
1401
1402 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1403 sk->sk_allocation);
1404 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1405 return -ENOBUFS;
1406
1407 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1408 sk->sk_allocation);
1409 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1410 return -ENOBUFS;
1411
1412 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1413 sk->sk_allocation);
1414 if (opt->hopopt && !v6_cork->opt->hopopt)
1415 return -ENOBUFS;
1416
1417 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1418 sk->sk_allocation);
1419 if (opt->srcrt && !v6_cork->opt->srcrt)
1420 return -ENOBUFS;
1421
1422 /* need source address above miyazawa*/
1423 }
1424 dst_hold(&rt->dst);
1425 cork->base.dst = &rt->dst;
1426 cork->fl.u.ip6 = *fl6;
1427 v6_cork->hop_limit = ipc6->hlimit;
1428 v6_cork->tclass = ipc6->tclass;
1429 if (rt->dst.flags & DST_XFRM_TUNNEL)
1430 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1431 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1432 else
1433 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1434 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1435 if (np->frag_size < mtu) {
1436 if (np->frag_size)
1437 mtu = np->frag_size;
1438 }
1439 cork->base.fragsize = mtu;
1440 cork->base.gso_size = ipc6->gso_size;
1441 cork->base.tx_flags = 0;
1442 cork->base.mark = ipc6->sockc.mark;
1443 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1444
1445 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1446 cork->base.flags |= IPCORK_ALLFRAG;
1447 cork->base.length = 0;
1448
1449 cork->base.transmit_time = ipc6->sockc.transmit_time;
1450
1451 return 0;
1452 }
1453
__ip6_append_data(struct sock * sk,struct flowi6 * fl6,struct sk_buff_head * queue,struct inet_cork * cork,struct inet6_cork * v6_cork,struct page_frag * pfrag,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,unsigned int flags,struct ipcm6_cookie * ipc6)1454 static int __ip6_append_data(struct sock *sk,
1455 struct flowi6 *fl6,
1456 struct sk_buff_head *queue,
1457 struct inet_cork *cork,
1458 struct inet6_cork *v6_cork,
1459 struct page_frag *pfrag,
1460 int getfrag(void *from, char *to, int offset,
1461 int len, int odd, struct sk_buff *skb),
1462 void *from, int length, int transhdrlen,
1463 unsigned int flags, struct ipcm6_cookie *ipc6)
1464 {
1465 struct sk_buff *skb, *skb_prev = NULL;
1466 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1467 struct ubuf_info *uarg = NULL;
1468 int exthdrlen = 0;
1469 int dst_exthdrlen = 0;
1470 int hh_len;
1471 int copy;
1472 int err;
1473 int offset = 0;
1474 u32 tskey = 0;
1475 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1476 struct ipv6_txoptions *opt = v6_cork->opt;
1477 int csummode = CHECKSUM_NONE;
1478 unsigned int maxnonfragsize, headersize;
1479 unsigned int wmem_alloc_delta = 0;
1480 bool paged, extra_uref = false;
1481
1482 skb = skb_peek_tail(queue);
1483 if (!skb) {
1484 exthdrlen = opt ? opt->opt_flen : 0;
1485 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1486 }
1487
1488 paged = !!cork->gso_size;
1489 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1490 orig_mtu = mtu;
1491
1492 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1493 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1494 tskey = sk->sk_tskey++;
1495
1496 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1497
1498 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1499 (opt ? opt->opt_nflen : 0);
1500
1501 headersize = sizeof(struct ipv6hdr) +
1502 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1503 (dst_allfrag(&rt->dst) ?
1504 sizeof(struct frag_hdr) : 0) +
1505 rt->rt6i_nfheader_len;
1506
1507 if (mtu <= fragheaderlen ||
1508 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1509 goto emsgsize;
1510
1511 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1512 sizeof(struct frag_hdr);
1513
1514 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1515 * the first fragment
1516 */
1517 if (headersize + transhdrlen > mtu)
1518 goto emsgsize;
1519
1520 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1521 (sk->sk_protocol == IPPROTO_UDP ||
1522 sk->sk_protocol == IPPROTO_RAW)) {
1523 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1524 sizeof(struct ipv6hdr));
1525 goto emsgsize;
1526 }
1527
1528 if (ip6_sk_ignore_df(sk))
1529 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1530 else
1531 maxnonfragsize = mtu;
1532
1533 if (cork->length + length > maxnonfragsize - headersize) {
1534 emsgsize:
1535 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1536 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1537 return -EMSGSIZE;
1538 }
1539
1540 /* CHECKSUM_PARTIAL only with no extension headers and when
1541 * we are not going to fragment
1542 */
1543 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1544 headersize == sizeof(struct ipv6hdr) &&
1545 length <= mtu - headersize &&
1546 (!(flags & MSG_MORE) || cork->gso_size) &&
1547 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1548 csummode = CHECKSUM_PARTIAL;
1549
1550 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1551 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1552 if (!uarg)
1553 return -ENOBUFS;
1554 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
1555 if (rt->dst.dev->features & NETIF_F_SG &&
1556 csummode == CHECKSUM_PARTIAL) {
1557 paged = true;
1558 } else {
1559 uarg->zerocopy = 0;
1560 skb_zcopy_set(skb, uarg, &extra_uref);
1561 }
1562 }
1563
1564 /*
1565 * Let's try using as much space as possible.
1566 * Use MTU if total length of the message fits into the MTU.
1567 * Otherwise, we need to reserve fragment header and
1568 * fragment alignment (= 8-15 octects, in total).
1569 *
1570 * Note that we may need to "move" the data from the tail
1571 * of the buffer to the new fragment when we split
1572 * the message.
1573 *
1574 * FIXME: It may be fragmented into multiple chunks
1575 * at once if non-fragmentable extension headers
1576 * are too large.
1577 * --yoshfuji
1578 */
1579
1580 cork->length += length;
1581 if (!skb)
1582 goto alloc_new_skb;
1583
1584 while (length > 0) {
1585 /* Check if the remaining data fits into current packet. */
1586 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1587 if (copy < length)
1588 copy = maxfraglen - skb->len;
1589
1590 if (copy <= 0) {
1591 char *data;
1592 unsigned int datalen;
1593 unsigned int fraglen;
1594 unsigned int fraggap;
1595 unsigned int alloclen, alloc_extra;
1596 unsigned int pagedlen;
1597 alloc_new_skb:
1598 /* There's no room in the current skb */
1599 if (skb)
1600 fraggap = skb->len - maxfraglen;
1601 else
1602 fraggap = 0;
1603 /* update mtu and maxfraglen if necessary */
1604 if (!skb || !skb_prev)
1605 ip6_append_data_mtu(&mtu, &maxfraglen,
1606 fragheaderlen, skb, rt,
1607 orig_mtu);
1608
1609 skb_prev = skb;
1610
1611 /*
1612 * If remaining data exceeds the mtu,
1613 * we know we need more fragment(s).
1614 */
1615 datalen = length + fraggap;
1616
1617 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1618 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1619 fraglen = datalen + fragheaderlen;
1620 pagedlen = 0;
1621
1622 alloc_extra = hh_len;
1623 alloc_extra += dst_exthdrlen;
1624 alloc_extra += rt->dst.trailer_len;
1625
1626 /* We just reserve space for fragment header.
1627 * Note: this may be overallocation if the message
1628 * (without MSG_MORE) fits into the MTU.
1629 */
1630 alloc_extra += sizeof(struct frag_hdr);
1631
1632 if ((flags & MSG_MORE) &&
1633 !(rt->dst.dev->features&NETIF_F_SG))
1634 alloclen = mtu;
1635 else if (!paged &&
1636 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1637 !(rt->dst.dev->features & NETIF_F_SG)))
1638 alloclen = fraglen;
1639 else {
1640 alloclen = min_t(int, fraglen, MAX_HEADER);
1641 pagedlen = fraglen - alloclen;
1642 }
1643 alloclen += alloc_extra;
1644
1645 if (datalen != length + fraggap) {
1646 /*
1647 * this is not the last fragment, the trailer
1648 * space is regarded as data space.
1649 */
1650 datalen += rt->dst.trailer_len;
1651 }
1652
1653 fraglen = datalen + fragheaderlen;
1654
1655 copy = datalen - transhdrlen - fraggap - pagedlen;
1656 if (copy < 0) {
1657 err = -EINVAL;
1658 goto error;
1659 }
1660 if (transhdrlen) {
1661 skb = sock_alloc_send_skb(sk, alloclen,
1662 (flags & MSG_DONTWAIT), &err);
1663 } else {
1664 skb = NULL;
1665 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1666 2 * sk->sk_sndbuf)
1667 skb = alloc_skb(alloclen,
1668 sk->sk_allocation);
1669 if (unlikely(!skb))
1670 err = -ENOBUFS;
1671 }
1672 if (!skb)
1673 goto error;
1674 /*
1675 * Fill in the control structures
1676 */
1677 skb->protocol = htons(ETH_P_IPV6);
1678 skb->ip_summed = csummode;
1679 skb->csum = 0;
1680 /* reserve for fragmentation and ipsec header */
1681 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1682 dst_exthdrlen);
1683
1684 /*
1685 * Find where to start putting bytes
1686 */
1687 data = skb_put(skb, fraglen - pagedlen);
1688 skb_set_network_header(skb, exthdrlen);
1689 data += fragheaderlen;
1690 skb->transport_header = (skb->network_header +
1691 fragheaderlen);
1692 if (fraggap) {
1693 skb->csum = skb_copy_and_csum_bits(
1694 skb_prev, maxfraglen,
1695 data + transhdrlen, fraggap);
1696 skb_prev->csum = csum_sub(skb_prev->csum,
1697 skb->csum);
1698 data += fraggap;
1699 pskb_trim_unique(skb_prev, maxfraglen);
1700 }
1701 if (copy > 0 &&
1702 getfrag(from, data + transhdrlen, offset,
1703 copy, fraggap, skb) < 0) {
1704 err = -EFAULT;
1705 kfree_skb(skb);
1706 goto error;
1707 }
1708
1709 offset += copy;
1710 length -= copy + transhdrlen;
1711 transhdrlen = 0;
1712 exthdrlen = 0;
1713 dst_exthdrlen = 0;
1714
1715 /* Only the initial fragment is time stamped */
1716 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1717 cork->tx_flags = 0;
1718 skb_shinfo(skb)->tskey = tskey;
1719 tskey = 0;
1720 skb_zcopy_set(skb, uarg, &extra_uref);
1721
1722 if ((flags & MSG_CONFIRM) && !skb_prev)
1723 skb_set_dst_pending_confirm(skb, 1);
1724
1725 /*
1726 * Put the packet on the pending queue
1727 */
1728 if (!skb->destructor) {
1729 skb->destructor = sock_wfree;
1730 skb->sk = sk;
1731 wmem_alloc_delta += skb->truesize;
1732 }
1733 __skb_queue_tail(queue, skb);
1734 continue;
1735 }
1736
1737 if (copy > length)
1738 copy = length;
1739
1740 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1741 skb_tailroom(skb) >= copy) {
1742 unsigned int off;
1743
1744 off = skb->len;
1745 if (getfrag(from, skb_put(skb, copy),
1746 offset, copy, off, skb) < 0) {
1747 __skb_trim(skb, off);
1748 err = -EFAULT;
1749 goto error;
1750 }
1751 } else if (!uarg || !uarg->zerocopy) {
1752 int i = skb_shinfo(skb)->nr_frags;
1753
1754 err = -ENOMEM;
1755 if (!sk_page_frag_refill(sk, pfrag))
1756 goto error;
1757
1758 if (!skb_can_coalesce(skb, i, pfrag->page,
1759 pfrag->offset)) {
1760 err = -EMSGSIZE;
1761 if (i == MAX_SKB_FRAGS)
1762 goto error;
1763
1764 __skb_fill_page_desc(skb, i, pfrag->page,
1765 pfrag->offset, 0);
1766 skb_shinfo(skb)->nr_frags = ++i;
1767 get_page(pfrag->page);
1768 }
1769 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1770 if (getfrag(from,
1771 page_address(pfrag->page) + pfrag->offset,
1772 offset, copy, skb->len, skb) < 0)
1773 goto error_efault;
1774
1775 pfrag->offset += copy;
1776 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1777 skb->len += copy;
1778 skb->data_len += copy;
1779 skb->truesize += copy;
1780 wmem_alloc_delta += copy;
1781 } else {
1782 err = skb_zerocopy_iter_dgram(skb, from, copy);
1783 if (err < 0)
1784 goto error;
1785 }
1786 offset += copy;
1787 length -= copy;
1788 }
1789
1790 if (wmem_alloc_delta)
1791 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1792 return 0;
1793
1794 error_efault:
1795 err = -EFAULT;
1796 error:
1797 if (uarg)
1798 sock_zerocopy_put_abort(uarg, extra_uref);
1799 cork->length -= length;
1800 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1801 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1802 return err;
1803 }
1804
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags)1805 int ip6_append_data(struct sock *sk,
1806 int getfrag(void *from, char *to, int offset, int len,
1807 int odd, struct sk_buff *skb),
1808 void *from, int length, int transhdrlen,
1809 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1810 struct rt6_info *rt, unsigned int flags)
1811 {
1812 struct inet_sock *inet = inet_sk(sk);
1813 struct ipv6_pinfo *np = inet6_sk(sk);
1814 int exthdrlen;
1815 int err;
1816
1817 if (flags&MSG_PROBE)
1818 return 0;
1819 if (skb_queue_empty(&sk->sk_write_queue)) {
1820 /*
1821 * setup for corking
1822 */
1823 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1824 ipc6, rt, fl6);
1825 if (err)
1826 return err;
1827
1828 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1829 length += exthdrlen;
1830 transhdrlen += exthdrlen;
1831 } else {
1832 fl6 = &inet->cork.fl.u.ip6;
1833 transhdrlen = 0;
1834 }
1835
1836 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1837 &np->cork, sk_page_frag(sk), getfrag,
1838 from, length, transhdrlen, flags, ipc6);
1839 }
1840 EXPORT_SYMBOL_GPL(ip6_append_data);
1841
ip6_cork_release(struct inet_cork_full * cork,struct inet6_cork * v6_cork)1842 static void ip6_cork_release(struct inet_cork_full *cork,
1843 struct inet6_cork *v6_cork)
1844 {
1845 if (v6_cork->opt) {
1846 kfree(v6_cork->opt->dst0opt);
1847 kfree(v6_cork->opt->dst1opt);
1848 kfree(v6_cork->opt->hopopt);
1849 kfree(v6_cork->opt->srcrt);
1850 kfree(v6_cork->opt);
1851 v6_cork->opt = NULL;
1852 }
1853
1854 if (cork->base.dst) {
1855 dst_release(cork->base.dst);
1856 cork->base.dst = NULL;
1857 cork->base.flags &= ~IPCORK_ALLFRAG;
1858 }
1859 memset(&cork->fl, 0, sizeof(cork->fl));
1860 }
1861
__ip6_make_skb(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1862 struct sk_buff *__ip6_make_skb(struct sock *sk,
1863 struct sk_buff_head *queue,
1864 struct inet_cork_full *cork,
1865 struct inet6_cork *v6_cork)
1866 {
1867 struct sk_buff *skb, *tmp_skb;
1868 struct sk_buff **tail_skb;
1869 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1870 struct ipv6_pinfo *np = inet6_sk(sk);
1871 struct net *net = sock_net(sk);
1872 struct ipv6hdr *hdr;
1873 struct ipv6_txoptions *opt = v6_cork->opt;
1874 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1875 struct flowi6 *fl6 = &cork->fl.u.ip6;
1876 unsigned char proto = fl6->flowi6_proto;
1877
1878 skb = __skb_dequeue(queue);
1879 if (!skb)
1880 goto out;
1881 tail_skb = &(skb_shinfo(skb)->frag_list);
1882
1883 /* move skb->data to ip header from ext header */
1884 if (skb->data < skb_network_header(skb))
1885 __skb_pull(skb, skb_network_offset(skb));
1886 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1887 __skb_pull(tmp_skb, skb_network_header_len(skb));
1888 *tail_skb = tmp_skb;
1889 tail_skb = &(tmp_skb->next);
1890 skb->len += tmp_skb->len;
1891 skb->data_len += tmp_skb->len;
1892 skb->truesize += tmp_skb->truesize;
1893 tmp_skb->destructor = NULL;
1894 tmp_skb->sk = NULL;
1895 }
1896
1897 /* Allow local fragmentation. */
1898 skb->ignore_df = ip6_sk_ignore_df(sk);
1899
1900 *final_dst = fl6->daddr;
1901 __skb_pull(skb, skb_network_header_len(skb));
1902 if (opt && opt->opt_flen)
1903 ipv6_push_frag_opts(skb, opt, &proto);
1904 if (opt && opt->opt_nflen)
1905 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1906
1907 skb_push(skb, sizeof(struct ipv6hdr));
1908 skb_reset_network_header(skb);
1909 hdr = ipv6_hdr(skb);
1910
1911 ip6_flow_hdr(hdr, v6_cork->tclass,
1912 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1913 ip6_autoflowlabel(net, np), fl6));
1914 hdr->hop_limit = v6_cork->hop_limit;
1915 hdr->nexthdr = proto;
1916 hdr->saddr = fl6->saddr;
1917 hdr->daddr = *final_dst;
1918
1919 skb->priority = sk->sk_priority;
1920 skb->mark = cork->base.mark;
1921
1922 skb->tstamp = cork->base.transmit_time;
1923
1924 skb_dst_set(skb, dst_clone(&rt->dst));
1925 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1926 if (proto == IPPROTO_ICMPV6) {
1927 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1928 u8 icmp6_type;
1929
1930 if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
1931 icmp6_type = fl6->fl6_icmp_type;
1932 else
1933 icmp6_type = icmp6_hdr(skb)->icmp6_type;
1934 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
1935 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1936 }
1937
1938 ip6_cork_release(cork, v6_cork);
1939 out:
1940 return skb;
1941 }
1942
ip6_send_skb(struct sk_buff * skb)1943 int ip6_send_skb(struct sk_buff *skb)
1944 {
1945 struct net *net = sock_net(skb->sk);
1946 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1947 int err;
1948
1949 err = ip6_local_out(net, skb->sk, skb);
1950 if (err) {
1951 if (err > 0)
1952 err = net_xmit_errno(err);
1953 if (err)
1954 IP6_INC_STATS(net, rt->rt6i_idev,
1955 IPSTATS_MIB_OUTDISCARDS);
1956 }
1957
1958 return err;
1959 }
1960
ip6_push_pending_frames(struct sock * sk)1961 int ip6_push_pending_frames(struct sock *sk)
1962 {
1963 struct sk_buff *skb;
1964
1965 skb = ip6_finish_skb(sk);
1966 if (!skb)
1967 return 0;
1968
1969 return ip6_send_skb(skb);
1970 }
1971 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1972
__ip6_flush_pending_frames(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1973 static void __ip6_flush_pending_frames(struct sock *sk,
1974 struct sk_buff_head *queue,
1975 struct inet_cork_full *cork,
1976 struct inet6_cork *v6_cork)
1977 {
1978 struct sk_buff *skb;
1979
1980 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1981 if (skb_dst(skb))
1982 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1983 IPSTATS_MIB_OUTDISCARDS);
1984 kfree_skb(skb);
1985 }
1986
1987 ip6_cork_release(cork, v6_cork);
1988 }
1989
ip6_flush_pending_frames(struct sock * sk)1990 void ip6_flush_pending_frames(struct sock *sk)
1991 {
1992 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1993 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1994 }
1995 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1996
ip6_make_skb(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,struct inet_cork_full * cork)1997 struct sk_buff *ip6_make_skb(struct sock *sk,
1998 int getfrag(void *from, char *to, int offset,
1999 int len, int odd, struct sk_buff *skb),
2000 void *from, int length, int transhdrlen,
2001 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
2002 struct rt6_info *rt, unsigned int flags,
2003 struct inet_cork_full *cork)
2004 {
2005 struct inet6_cork v6_cork;
2006 struct sk_buff_head queue;
2007 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
2008 int err;
2009
2010 if (flags & MSG_PROBE)
2011 return NULL;
2012
2013 __skb_queue_head_init(&queue);
2014
2015 cork->base.flags = 0;
2016 cork->base.addr = 0;
2017 cork->base.opt = NULL;
2018 cork->base.dst = NULL;
2019 v6_cork.opt = NULL;
2020 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
2021 if (err) {
2022 ip6_cork_release(cork, &v6_cork);
2023 return ERR_PTR(err);
2024 }
2025 if (ipc6->dontfrag < 0)
2026 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
2027
2028 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
2029 ¤t->task_frag, getfrag, from,
2030 length + exthdrlen, transhdrlen + exthdrlen,
2031 flags, ipc6);
2032 if (err) {
2033 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
2034 return ERR_PTR(err);
2035 }
2036
2037 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2038 }
2039