1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * IPv6 output functions
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on linux/net/ipv4/ip_output.c
10 *
11 * Changes:
12 * A.N.Kuznetsov : airthmetics in fragmentation.
13 * extension headers are implemented.
14 * route changes now work.
15 * ip6_forward does not confuse sniffers.
16 * etc.
17 *
18 * H. von Brand : Added missing #include <linux/string.h>
19 * Imran Patel : frag id should be in NBO
20 * Kazunori MIYAZAWA @USAGI
21 * : add ip6_append_data and related functions
22 * for datagram xmit
23 */
24
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41
42 #include <net/sock.h>
43 #include <net/snmp.h>
44
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58
ip6_finish_output2(struct net * net,struct sock * sk,struct sk_buff * skb)59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 struct inet6_dev *idev = ip6_dst_idev(dst);
64 unsigned int hh_len = LL_RESERVED_SPACE(dev);
65 const struct in6_addr *daddr, *nexthop;
66 struct ipv6hdr *hdr;
67 struct neighbour *neigh;
68 int ret;
69
70 /* Be paranoid, rather than too clever. */
71 if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
72 skb = skb_expand_head(skb, hh_len);
73 if (!skb) {
74 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
75 return -ENOMEM;
76 }
77 }
78
79 hdr = ipv6_hdr(skb);
80 daddr = &hdr->daddr;
81 if (ipv6_addr_is_multicast(daddr)) {
82 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
83 ((mroute6_is_socket(net, skb) &&
84 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
85 ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
86 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
87
88 /* Do not check for IFF_ALLMULTI; multicast routing
89 is not supported in any case.
90 */
91 if (newskb)
92 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
93 net, sk, newskb, NULL, newskb->dev,
94 dev_loopback_xmit);
95
96 if (hdr->hop_limit == 0) {
97 IP6_INC_STATS(net, idev,
98 IPSTATS_MIB_OUTDISCARDS);
99 kfree_skb(skb);
100 return 0;
101 }
102 }
103
104 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
105 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
106 !(dev->flags & IFF_LOOPBACK)) {
107 kfree_skb(skb);
108 return 0;
109 }
110 }
111
112 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
113 int res = lwtunnel_xmit(skb);
114
115 if (res != LWTUNNEL_XMIT_CONTINUE)
116 return res;
117 }
118
119 rcu_read_lock_bh();
120 nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
121 neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
122 if (unlikely(!neigh))
123 neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
124 if (!IS_ERR(neigh)) {
125 sock_confirm_neigh(skb, neigh);
126 ret = neigh_output(neigh, skb, false);
127 rcu_read_unlock_bh();
128 return ret;
129 }
130 rcu_read_unlock_bh();
131
132 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
133 kfree_skb(skb);
134 return -EINVAL;
135 }
136
137 static int
ip6_finish_output_gso_slowpath_drop(struct net * net,struct sock * sk,struct sk_buff * skb,unsigned int mtu)138 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
139 struct sk_buff *skb, unsigned int mtu)
140 {
141 struct sk_buff *segs, *nskb;
142 netdev_features_t features;
143 int ret = 0;
144
145 /* Please see corresponding comment in ip_finish_output_gso
146 * describing the cases where GSO segment length exceeds the
147 * egress MTU.
148 */
149 features = netif_skb_features(skb);
150 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
151 if (IS_ERR_OR_NULL(segs)) {
152 kfree_skb(skb);
153 return -ENOMEM;
154 }
155
156 consume_skb(skb);
157
158 skb_list_walk_safe(segs, segs, nskb) {
159 int err;
160
161 skb_mark_not_on_list(segs);
162 /* Last GSO segment can be smaller than gso_size (and MTU).
163 * Adding a fragment header would produce an "atomic fragment",
164 * which is considered harmful (RFC-8021). Avoid that.
165 */
166 err = segs->len > mtu ?
167 ip6_fragment(net, sk, segs, ip6_finish_output2) :
168 ip6_finish_output2(net, sk, segs);
169 if (err && ret == 0)
170 ret = err;
171 }
172
173 return ret;
174 }
175
__ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)176 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
177 {
178 unsigned int mtu;
179
180 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
181 /* Policy lookup after SNAT yielded a new policy */
182 if (skb_dst(skb)->xfrm) {
183 IP6CB(skb)->flags |= IP6SKB_REROUTED;
184 return dst_output(net, sk, skb);
185 }
186 #endif
187
188 mtu = ip6_skb_dst_mtu(skb);
189 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
190 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
191
192 if ((skb->len > mtu && !skb_is_gso(skb)) ||
193 dst_allfrag(skb_dst(skb)) ||
194 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
195 return ip6_fragment(net, sk, skb, ip6_finish_output2);
196 else
197 return ip6_finish_output2(net, sk, skb);
198 }
199
ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)200 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
201 {
202 int ret;
203
204 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
205 switch (ret) {
206 case NET_XMIT_SUCCESS:
207 return __ip6_finish_output(net, sk, skb);
208 case NET_XMIT_CN:
209 return __ip6_finish_output(net, sk, skb) ? : ret;
210 default:
211 kfree_skb(skb);
212 return ret;
213 }
214 }
215
ip6_output(struct net * net,struct sock * sk,struct sk_buff * skb)216 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
217 {
218 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
219 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
220
221 skb->protocol = htons(ETH_P_IPV6);
222 skb->dev = dev;
223
224 if (unlikely(idev->cnf.disable_ipv6)) {
225 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
226 kfree_skb(skb);
227 return 0;
228 }
229
230 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
231 net, sk, skb, indev, dev,
232 ip6_finish_output,
233 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
234 }
235 EXPORT_SYMBOL(ip6_output);
236
ip6_autoflowlabel(struct net * net,const struct ipv6_pinfo * np)237 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
238 {
239 if (!np->autoflowlabel_set)
240 return ip6_default_np_autolabel(net);
241 else
242 return np->autoflowlabel;
243 }
244
245 /*
246 * xmit an sk_buff (used by TCP, SCTP and DCCP)
247 * Note : socket lock is not held for SYNACK packets, but might be modified
248 * by calls to skb_set_owner_w() and ipv6_local_error(),
249 * which are using proper atomic operations or spinlocks.
250 */
ip6_xmit(const struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,__u32 mark,struct ipv6_txoptions * opt,int tclass,u32 priority)251 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
252 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
253 {
254 struct net *net = sock_net(sk);
255 const struct ipv6_pinfo *np = inet6_sk(sk);
256 struct in6_addr *first_hop = &fl6->daddr;
257 struct dst_entry *dst = skb_dst(skb);
258 struct net_device *dev = dst->dev;
259 struct inet6_dev *idev = ip6_dst_idev(dst);
260 unsigned int head_room;
261 struct ipv6hdr *hdr;
262 u8 proto = fl6->flowi6_proto;
263 int seg_len = skb->len;
264 int hlimit = -1;
265 u32 mtu;
266
267 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
268 if (opt)
269 head_room += opt->opt_nflen + opt->opt_flen;
270
271 if (unlikely(head_room > skb_headroom(skb))) {
272 skb = skb_expand_head(skb, head_room);
273 if (!skb) {
274 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
275 return -ENOBUFS;
276 }
277 }
278
279 if (opt) {
280 seg_len += opt->opt_nflen + opt->opt_flen;
281
282 if (opt->opt_flen)
283 ipv6_push_frag_opts(skb, opt, &proto);
284
285 if (opt->opt_nflen)
286 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
287 &fl6->saddr);
288 }
289
290 skb_push(skb, sizeof(struct ipv6hdr));
291 skb_reset_network_header(skb);
292 hdr = ipv6_hdr(skb);
293
294 /*
295 * Fill in the IPv6 header
296 */
297 if (np)
298 hlimit = np->hop_limit;
299 if (hlimit < 0)
300 hlimit = ip6_dst_hoplimit(dst);
301
302 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
303 ip6_autoflowlabel(net, np), fl6));
304
305 hdr->payload_len = htons(seg_len);
306 hdr->nexthdr = proto;
307 hdr->hop_limit = hlimit;
308
309 hdr->saddr = fl6->saddr;
310 hdr->daddr = *first_hop;
311
312 skb->protocol = htons(ETH_P_IPV6);
313 skb->priority = priority;
314 skb->mark = mark;
315
316 mtu = dst_mtu(dst);
317 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
318 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
319
320 /* if egress device is enslaved to an L3 master device pass the
321 * skb to its handler for processing
322 */
323 skb = l3mdev_ip6_out((struct sock *)sk, skb);
324 if (unlikely(!skb))
325 return 0;
326
327 /* hooks should never assume socket lock is held.
328 * we promote our socket to non const
329 */
330 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
331 net, (struct sock *)sk, skb, NULL, dev,
332 dst_output);
333 }
334
335 skb->dev = dev;
336 /* ipv6_local_error() does not require socket lock,
337 * we promote our socket to non const
338 */
339 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
340
341 IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
342 kfree_skb(skb);
343 return -EMSGSIZE;
344 }
345 EXPORT_SYMBOL(ip6_xmit);
346
ip6_call_ra_chain(struct sk_buff * skb,int sel)347 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
348 {
349 struct ip6_ra_chain *ra;
350 struct sock *last = NULL;
351
352 read_lock(&ip6_ra_lock);
353 for (ra = ip6_ra_chain; ra; ra = ra->next) {
354 struct sock *sk = ra->sk;
355 if (sk && ra->sel == sel &&
356 (!sk->sk_bound_dev_if ||
357 sk->sk_bound_dev_if == skb->dev->ifindex)) {
358 struct ipv6_pinfo *np = inet6_sk(sk);
359
360 if (np && np->rtalert_isolate &&
361 !net_eq(sock_net(sk), dev_net(skb->dev))) {
362 continue;
363 }
364 if (last) {
365 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
366 if (skb2)
367 rawv6_rcv(last, skb2);
368 }
369 last = sk;
370 }
371 }
372
373 if (last) {
374 rawv6_rcv(last, skb);
375 read_unlock(&ip6_ra_lock);
376 return 1;
377 }
378 read_unlock(&ip6_ra_lock);
379 return 0;
380 }
381
ip6_forward_proxy_check(struct sk_buff * skb)382 static int ip6_forward_proxy_check(struct sk_buff *skb)
383 {
384 struct ipv6hdr *hdr = ipv6_hdr(skb);
385 u8 nexthdr = hdr->nexthdr;
386 __be16 frag_off;
387 int offset;
388
389 if (ipv6_ext_hdr(nexthdr)) {
390 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
391 if (offset < 0)
392 return 0;
393 } else
394 offset = sizeof(struct ipv6hdr);
395
396 if (nexthdr == IPPROTO_ICMPV6) {
397 struct icmp6hdr *icmp6;
398
399 if (!pskb_may_pull(skb, (skb_network_header(skb) +
400 offset + 1 - skb->data)))
401 return 0;
402
403 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
404
405 switch (icmp6->icmp6_type) {
406 case NDISC_ROUTER_SOLICITATION:
407 case NDISC_ROUTER_ADVERTISEMENT:
408 case NDISC_NEIGHBOUR_SOLICITATION:
409 case NDISC_NEIGHBOUR_ADVERTISEMENT:
410 case NDISC_REDIRECT:
411 /* For reaction involving unicast neighbor discovery
412 * message destined to the proxied address, pass it to
413 * input function.
414 */
415 return 1;
416 default:
417 break;
418 }
419 }
420
421 /*
422 * The proxying router can't forward traffic sent to a link-local
423 * address, so signal the sender and discard the packet. This
424 * behavior is clarified by the MIPv6 specification.
425 */
426 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
427 dst_link_failure(skb);
428 return -1;
429 }
430
431 return 0;
432 }
433
ip6_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)434 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
435 struct sk_buff *skb)
436 {
437 struct dst_entry *dst = skb_dst(skb);
438
439 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
440 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
441
442 #ifdef CONFIG_NET_SWITCHDEV
443 if (skb->offload_l3_fwd_mark) {
444 consume_skb(skb);
445 return 0;
446 }
447 #endif
448
449 skb->tstamp = 0;
450 return dst_output(net, sk, skb);
451 }
452
ip6_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)453 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
454 {
455 if (skb->len <= mtu)
456 return false;
457
458 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
459 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
460 return true;
461
462 if (skb->ignore_df)
463 return false;
464
465 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
466 return false;
467
468 return true;
469 }
470
ip6_forward(struct sk_buff * skb)471 int ip6_forward(struct sk_buff *skb)
472 {
473 struct dst_entry *dst = skb_dst(skb);
474 struct ipv6hdr *hdr = ipv6_hdr(skb);
475 struct inet6_skb_parm *opt = IP6CB(skb);
476 struct net *net = dev_net(dst->dev);
477 struct inet6_dev *idev;
478 u32 mtu;
479
480 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
481 if (net->ipv6.devconf_all->forwarding == 0)
482 goto error;
483
484 if (skb->pkt_type != PACKET_HOST)
485 goto drop;
486
487 if (unlikely(skb->sk))
488 goto drop;
489
490 if (skb_warn_if_lro(skb))
491 goto drop;
492
493 if (!net->ipv6.devconf_all->disable_policy &&
494 (!idev || !idev->cnf.disable_policy) &&
495 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
496 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
497 goto drop;
498 }
499
500 skb_forward_csum(skb);
501
502 /*
503 * We DO NOT make any processing on
504 * RA packets, pushing them to user level AS IS
505 * without ane WARRANTY that application will be able
506 * to interpret them. The reason is that we
507 * cannot make anything clever here.
508 *
509 * We are not end-node, so that if packet contains
510 * AH/ESP, we cannot make anything.
511 * Defragmentation also would be mistake, RA packets
512 * cannot be fragmented, because there is no warranty
513 * that different fragments will go along one path. --ANK
514 */
515 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
516 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
517 return 0;
518 }
519
520 /*
521 * check and decrement ttl
522 */
523 if (hdr->hop_limit <= 1) {
524 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
525 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
526
527 kfree_skb(skb);
528 return -ETIMEDOUT;
529 }
530
531 /* XXX: idev->cnf.proxy_ndp? */
532 if (net->ipv6.devconf_all->proxy_ndp &&
533 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
534 int proxied = ip6_forward_proxy_check(skb);
535 if (proxied > 0) {
536 /* It's tempting to decrease the hop limit
537 * here by 1, as we do at the end of the
538 * function too.
539 *
540 * But that would be incorrect, as proxying is
541 * not forwarding. The ip6_input function
542 * will handle this packet locally, and it
543 * depends on the hop limit being unchanged.
544 *
545 * One example is the NDP hop limit, that
546 * always has to stay 255, but other would be
547 * similar checks around RA packets, where the
548 * user can even change the desired limit.
549 */
550 return ip6_input(skb);
551 } else if (proxied < 0) {
552 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
553 goto drop;
554 }
555 }
556
557 if (!xfrm6_route_forward(skb)) {
558 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
559 goto drop;
560 }
561 dst = skb_dst(skb);
562
563 /* IPv6 specs say nothing about it, but it is clear that we cannot
564 send redirects to source routed frames.
565 We don't send redirects to frames decapsulated from IPsec.
566 */
567 if (IP6CB(skb)->iif == dst->dev->ifindex &&
568 opt->srcrt == 0 && !skb_sec_path(skb)) {
569 struct in6_addr *target = NULL;
570 struct inet_peer *peer;
571 struct rt6_info *rt;
572
573 /*
574 * incoming and outgoing devices are the same
575 * send a redirect.
576 */
577
578 rt = (struct rt6_info *) dst;
579 if (rt->rt6i_flags & RTF_GATEWAY)
580 target = &rt->rt6i_gateway;
581 else
582 target = &hdr->daddr;
583
584 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
585
586 /* Limit redirects both by destination (here)
587 and by source (inside ndisc_send_redirect)
588 */
589 if (inet_peer_xrlim_allow(peer, 1*HZ))
590 ndisc_send_redirect(skb, target);
591 if (peer)
592 inet_putpeer(peer);
593 } else {
594 int addrtype = ipv6_addr_type(&hdr->saddr);
595
596 /* This check is security critical. */
597 if (addrtype == IPV6_ADDR_ANY ||
598 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
599 goto error;
600 if (addrtype & IPV6_ADDR_LINKLOCAL) {
601 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
602 ICMPV6_NOT_NEIGHBOUR, 0);
603 goto error;
604 }
605 }
606
607 mtu = ip6_dst_mtu_maybe_forward(dst, true);
608 if (mtu < IPV6_MIN_MTU)
609 mtu = IPV6_MIN_MTU;
610
611 if (ip6_pkt_too_big(skb, mtu)) {
612 /* Again, force OUTPUT device used as source address */
613 skb->dev = dst->dev;
614 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
615 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
616 __IP6_INC_STATS(net, ip6_dst_idev(dst),
617 IPSTATS_MIB_FRAGFAILS);
618 kfree_skb(skb);
619 return -EMSGSIZE;
620 }
621
622 if (skb_cow(skb, dst->dev->hard_header_len)) {
623 __IP6_INC_STATS(net, ip6_dst_idev(dst),
624 IPSTATS_MIB_OUTDISCARDS);
625 goto drop;
626 }
627
628 hdr = ipv6_hdr(skb);
629
630 /* Mangling hops number delayed to point after skb COW */
631
632 hdr->hop_limit--;
633
634 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
635 net, NULL, skb, skb->dev, dst->dev,
636 ip6_forward_finish);
637
638 error:
639 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
640 drop:
641 kfree_skb(skb);
642 return -EINVAL;
643 }
644
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)645 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
646 {
647 to->pkt_type = from->pkt_type;
648 to->priority = from->priority;
649 to->protocol = from->protocol;
650 skb_dst_drop(to);
651 skb_dst_set(to, dst_clone(skb_dst(from)));
652 to->dev = from->dev;
653 to->mark = from->mark;
654
655 skb_copy_hash(to, from);
656
657 #ifdef CONFIG_NET_SCHED
658 to->tc_index = from->tc_index;
659 #endif
660 nf_copy(to, from);
661 skb_ext_copy(to, from);
662 skb_copy_secmark(to, from);
663 }
664
ip6_fraglist_init(struct sk_buff * skb,unsigned int hlen,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_fraglist_iter * iter)665 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
666 u8 nexthdr, __be32 frag_id,
667 struct ip6_fraglist_iter *iter)
668 {
669 unsigned int first_len;
670 struct frag_hdr *fh;
671
672 /* BUILD HEADER */
673 *prevhdr = NEXTHDR_FRAGMENT;
674 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
675 if (!iter->tmp_hdr)
676 return -ENOMEM;
677
678 iter->frag = skb_shinfo(skb)->frag_list;
679 skb_frag_list_init(skb);
680
681 iter->offset = 0;
682 iter->hlen = hlen;
683 iter->frag_id = frag_id;
684 iter->nexthdr = nexthdr;
685
686 __skb_pull(skb, hlen);
687 fh = __skb_push(skb, sizeof(struct frag_hdr));
688 __skb_push(skb, hlen);
689 skb_reset_network_header(skb);
690 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
691
692 fh->nexthdr = nexthdr;
693 fh->reserved = 0;
694 fh->frag_off = htons(IP6_MF);
695 fh->identification = frag_id;
696
697 first_len = skb_pagelen(skb);
698 skb->data_len = first_len - skb_headlen(skb);
699 skb->len = first_len;
700 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
701
702 return 0;
703 }
704 EXPORT_SYMBOL(ip6_fraglist_init);
705
ip6_fraglist_prepare(struct sk_buff * skb,struct ip6_fraglist_iter * iter)706 void ip6_fraglist_prepare(struct sk_buff *skb,
707 struct ip6_fraglist_iter *iter)
708 {
709 struct sk_buff *frag = iter->frag;
710 unsigned int hlen = iter->hlen;
711 struct frag_hdr *fh;
712
713 frag->ip_summed = CHECKSUM_NONE;
714 skb_reset_transport_header(frag);
715 fh = __skb_push(frag, sizeof(struct frag_hdr));
716 __skb_push(frag, hlen);
717 skb_reset_network_header(frag);
718 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
719 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
720 fh->nexthdr = iter->nexthdr;
721 fh->reserved = 0;
722 fh->frag_off = htons(iter->offset);
723 if (frag->next)
724 fh->frag_off |= htons(IP6_MF);
725 fh->identification = iter->frag_id;
726 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
727 ip6_copy_metadata(frag, skb);
728 }
729 EXPORT_SYMBOL(ip6_fraglist_prepare);
730
ip6_frag_init(struct sk_buff * skb,unsigned int hlen,unsigned int mtu,unsigned short needed_tailroom,int hdr_room,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_frag_state * state)731 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
732 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
733 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
734 {
735 state->prevhdr = prevhdr;
736 state->nexthdr = nexthdr;
737 state->frag_id = frag_id;
738
739 state->hlen = hlen;
740 state->mtu = mtu;
741
742 state->left = skb->len - hlen; /* Space per frame */
743 state->ptr = hlen; /* Where to start from */
744
745 state->hroom = hdr_room;
746 state->troom = needed_tailroom;
747
748 state->offset = 0;
749 }
750 EXPORT_SYMBOL(ip6_frag_init);
751
ip6_frag_next(struct sk_buff * skb,struct ip6_frag_state * state)752 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
753 {
754 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
755 struct sk_buff *frag;
756 struct frag_hdr *fh;
757 unsigned int len;
758
759 len = state->left;
760 /* IF: it doesn't fit, use 'mtu' - the data space left */
761 if (len > state->mtu)
762 len = state->mtu;
763 /* IF: we are not sending up to and including the packet end
764 then align the next start on an eight byte boundary */
765 if (len < state->left)
766 len &= ~7;
767
768 /* Allocate buffer */
769 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
770 state->hroom + state->troom, GFP_ATOMIC);
771 if (!frag)
772 return ERR_PTR(-ENOMEM);
773
774 /*
775 * Set up data on packet
776 */
777
778 ip6_copy_metadata(frag, skb);
779 skb_reserve(frag, state->hroom);
780 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
781 skb_reset_network_header(frag);
782 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
783 frag->transport_header = (frag->network_header + state->hlen +
784 sizeof(struct frag_hdr));
785
786 /*
787 * Charge the memory for the fragment to any owner
788 * it might possess
789 */
790 if (skb->sk)
791 skb_set_owner_w(frag, skb->sk);
792
793 /*
794 * Copy the packet header into the new buffer.
795 */
796 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
797
798 fragnexthdr_offset = skb_network_header(frag);
799 fragnexthdr_offset += prevhdr - skb_network_header(skb);
800 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
801
802 /*
803 * Build fragment header.
804 */
805 fh->nexthdr = state->nexthdr;
806 fh->reserved = 0;
807 fh->identification = state->frag_id;
808
809 /*
810 * Copy a block of the IP datagram.
811 */
812 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
813 len));
814 state->left -= len;
815
816 fh->frag_off = htons(state->offset);
817 if (state->left > 0)
818 fh->frag_off |= htons(IP6_MF);
819 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
820
821 state->ptr += len;
822 state->offset += len;
823
824 return frag;
825 }
826 EXPORT_SYMBOL(ip6_frag_next);
827
ip6_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,int (* output)(struct net *,struct sock *,struct sk_buff *))828 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
829 int (*output)(struct net *, struct sock *, struct sk_buff *))
830 {
831 struct sk_buff *frag;
832 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
833 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
834 inet6_sk(skb->sk) : NULL;
835 struct ip6_frag_state state;
836 unsigned int mtu, hlen, nexthdr_offset;
837 ktime_t tstamp = skb->tstamp;
838 int hroom, err = 0;
839 __be32 frag_id;
840 u8 *prevhdr, nexthdr = 0;
841
842 err = ip6_find_1stfragopt(skb, &prevhdr);
843 if (err < 0)
844 goto fail;
845 hlen = err;
846 nexthdr = *prevhdr;
847 nexthdr_offset = prevhdr - skb_network_header(skb);
848
849 mtu = ip6_skb_dst_mtu(skb);
850
851 /* We must not fragment if the socket is set to force MTU discovery
852 * or if the skb it not generated by a local socket.
853 */
854 if (unlikely(!skb->ignore_df && skb->len > mtu))
855 goto fail_toobig;
856
857 if (IP6CB(skb)->frag_max_size) {
858 if (IP6CB(skb)->frag_max_size > mtu)
859 goto fail_toobig;
860
861 /* don't send fragments larger than what we received */
862 mtu = IP6CB(skb)->frag_max_size;
863 if (mtu < IPV6_MIN_MTU)
864 mtu = IPV6_MIN_MTU;
865 }
866
867 if (np && np->frag_size < mtu) {
868 if (np->frag_size)
869 mtu = np->frag_size;
870 }
871 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
872 goto fail_toobig;
873 mtu -= hlen + sizeof(struct frag_hdr);
874
875 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
876 &ipv6_hdr(skb)->saddr);
877
878 if (skb->ip_summed == CHECKSUM_PARTIAL &&
879 (err = skb_checksum_help(skb)))
880 goto fail;
881
882 prevhdr = skb_network_header(skb) + nexthdr_offset;
883 hroom = LL_RESERVED_SPACE(rt->dst.dev);
884 if (skb_has_frag_list(skb)) {
885 unsigned int first_len = skb_pagelen(skb);
886 struct ip6_fraglist_iter iter;
887 struct sk_buff *frag2;
888
889 if (first_len - hlen > mtu ||
890 ((first_len - hlen) & 7) ||
891 skb_cloned(skb) ||
892 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
893 goto slow_path;
894
895 skb_walk_frags(skb, frag) {
896 /* Correct geometry. */
897 if (frag->len > mtu ||
898 ((frag->len & 7) && frag->next) ||
899 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
900 goto slow_path_clean;
901
902 /* Partially cloned skb? */
903 if (skb_shared(frag))
904 goto slow_path_clean;
905
906 BUG_ON(frag->sk);
907 if (skb->sk) {
908 frag->sk = skb->sk;
909 frag->destructor = sock_wfree;
910 }
911 skb->truesize -= frag->truesize;
912 }
913
914 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
915 &iter);
916 if (err < 0)
917 goto fail;
918
919 /* We prevent @rt from being freed. */
920 rcu_read_lock();
921
922 for (;;) {
923 /* Prepare header of the next frame,
924 * before previous one went down. */
925 if (iter.frag)
926 ip6_fraglist_prepare(skb, &iter);
927
928 skb->tstamp = tstamp;
929 err = output(net, sk, skb);
930 if (!err)
931 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
932 IPSTATS_MIB_FRAGCREATES);
933
934 if (err || !iter.frag)
935 break;
936
937 skb = ip6_fraglist_next(&iter);
938 }
939
940 kfree(iter.tmp_hdr);
941
942 if (err == 0) {
943 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
944 IPSTATS_MIB_FRAGOKS);
945 rcu_read_unlock();
946 return 0;
947 }
948
949 kfree_skb_list(iter.frag);
950
951 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
952 IPSTATS_MIB_FRAGFAILS);
953 rcu_read_unlock();
954 return err;
955
956 slow_path_clean:
957 skb_walk_frags(skb, frag2) {
958 if (frag2 == frag)
959 break;
960 frag2->sk = NULL;
961 frag2->destructor = NULL;
962 skb->truesize += frag2->truesize;
963 }
964 }
965
966 slow_path:
967 /*
968 * Fragment the datagram.
969 */
970
971 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
972 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
973 &state);
974
975 /*
976 * Keep copying data until we run out.
977 */
978
979 while (state.left > 0) {
980 frag = ip6_frag_next(skb, &state);
981 if (IS_ERR(frag)) {
982 err = PTR_ERR(frag);
983 goto fail;
984 }
985
986 /*
987 * Put this fragment into the sending queue.
988 */
989 frag->tstamp = tstamp;
990 err = output(net, sk, frag);
991 if (err)
992 goto fail;
993
994 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
995 IPSTATS_MIB_FRAGCREATES);
996 }
997 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
998 IPSTATS_MIB_FRAGOKS);
999 consume_skb(skb);
1000 return err;
1001
1002 fail_toobig:
1003 if (skb->sk && dst_allfrag(skb_dst(skb)))
1004 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
1005
1006 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1007 err = -EMSGSIZE;
1008
1009 fail:
1010 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1011 IPSTATS_MIB_FRAGFAILS);
1012 kfree_skb(skb);
1013 return err;
1014 }
1015
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)1016 static inline int ip6_rt_check(const struct rt6key *rt_key,
1017 const struct in6_addr *fl_addr,
1018 const struct in6_addr *addr_cache)
1019 {
1020 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1021 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1022 }
1023
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)1024 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1025 struct dst_entry *dst,
1026 const struct flowi6 *fl6)
1027 {
1028 struct ipv6_pinfo *np = inet6_sk(sk);
1029 struct rt6_info *rt;
1030
1031 if (!dst)
1032 goto out;
1033
1034 if (dst->ops->family != AF_INET6) {
1035 dst_release(dst);
1036 return NULL;
1037 }
1038
1039 rt = (struct rt6_info *)dst;
1040 /* Yes, checking route validity in not connected
1041 * case is not very simple. Take into account,
1042 * that we do not support routing by source, TOS,
1043 * and MSG_DONTROUTE --ANK (980726)
1044 *
1045 * 1. ip6_rt_check(): If route was host route,
1046 * check that cached destination is current.
1047 * If it is network route, we still may
1048 * check its validity using saved pointer
1049 * to the last used address: daddr_cache.
1050 * We do not want to save whole address now,
1051 * (because main consumer of this service
1052 * is tcp, which has not this problem),
1053 * so that the last trick works only on connected
1054 * sockets.
1055 * 2. oif also should be the same.
1056 */
1057 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1058 #ifdef CONFIG_IPV6_SUBTREES
1059 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1060 #endif
1061 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1062 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1063 dst_release(dst);
1064 dst = NULL;
1065 }
1066
1067 out:
1068 return dst;
1069 }
1070
ip6_dst_lookup_tail(struct net * net,const struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1071 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1072 struct dst_entry **dst, struct flowi6 *fl6)
1073 {
1074 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1075 struct neighbour *n;
1076 struct rt6_info *rt;
1077 #endif
1078 int err;
1079 int flags = 0;
1080
1081 /* The correct way to handle this would be to do
1082 * ip6_route_get_saddr, and then ip6_route_output; however,
1083 * the route-specific preferred source forces the
1084 * ip6_route_output call _before_ ip6_route_get_saddr.
1085 *
1086 * In source specific routing (no src=any default route),
1087 * ip6_route_output will fail given src=any saddr, though, so
1088 * that's why we try it again later.
1089 */
1090 if (ipv6_addr_any(&fl6->saddr)) {
1091 struct fib6_info *from;
1092 struct rt6_info *rt;
1093
1094 *dst = ip6_route_output(net, sk, fl6);
1095 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1096
1097 rcu_read_lock();
1098 from = rt ? rcu_dereference(rt->from) : NULL;
1099 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1100 sk ? inet6_sk(sk)->srcprefs : 0,
1101 &fl6->saddr);
1102 rcu_read_unlock();
1103
1104 if (err)
1105 goto out_err_release;
1106
1107 /* If we had an erroneous initial result, pretend it
1108 * never existed and let the SA-enabled version take
1109 * over.
1110 */
1111 if ((*dst)->error) {
1112 dst_release(*dst);
1113 *dst = NULL;
1114 }
1115
1116 if (fl6->flowi6_oif)
1117 flags |= RT6_LOOKUP_F_IFACE;
1118 }
1119
1120 if (!*dst)
1121 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1122
1123 err = (*dst)->error;
1124 if (err)
1125 goto out_err_release;
1126
1127 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1128 /*
1129 * Here if the dst entry we've looked up
1130 * has a neighbour entry that is in the INCOMPLETE
1131 * state and the src address from the flow is
1132 * marked as OPTIMISTIC, we release the found
1133 * dst entry and replace it instead with the
1134 * dst entry of the nexthop router
1135 */
1136 rt = (struct rt6_info *) *dst;
1137 rcu_read_lock_bh();
1138 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1139 rt6_nexthop(rt, &fl6->daddr));
1140 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1141 rcu_read_unlock_bh();
1142
1143 if (err) {
1144 struct inet6_ifaddr *ifp;
1145 struct flowi6 fl_gw6;
1146 int redirect;
1147
1148 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1149 (*dst)->dev, 1);
1150
1151 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1152 if (ifp)
1153 in6_ifa_put(ifp);
1154
1155 if (redirect) {
1156 /*
1157 * We need to get the dst entry for the
1158 * default router instead
1159 */
1160 dst_release(*dst);
1161 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1162 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1163 *dst = ip6_route_output(net, sk, &fl_gw6);
1164 err = (*dst)->error;
1165 if (err)
1166 goto out_err_release;
1167 }
1168 }
1169 #endif
1170 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1171 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1172 err = -EAFNOSUPPORT;
1173 goto out_err_release;
1174 }
1175
1176 return 0;
1177
1178 out_err_release:
1179 dst_release(*dst);
1180 *dst = NULL;
1181
1182 if (err == -ENETUNREACH)
1183 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1184 return err;
1185 }
1186
1187 /**
1188 * ip6_dst_lookup - perform route lookup on flow
1189 * @net: Network namespace to perform lookup in
1190 * @sk: socket which provides route info
1191 * @dst: pointer to dst_entry * for result
1192 * @fl6: flow to lookup
1193 *
1194 * This function performs a route lookup on the given flow.
1195 *
1196 * It returns zero on success, or a standard errno code on error.
1197 */
ip6_dst_lookup(struct net * net,struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1198 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1199 struct flowi6 *fl6)
1200 {
1201 *dst = NULL;
1202 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1203 }
1204 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1205
1206 /**
1207 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1208 * @net: Network namespace to perform lookup in
1209 * @sk: socket which provides route info
1210 * @fl6: flow to lookup
1211 * @final_dst: final destination address for ipsec lookup
1212 *
1213 * This function performs a route lookup on the given flow.
1214 *
1215 * It returns a valid dst pointer on success, or a pointer encoded
1216 * error code.
1217 */
ip6_dst_lookup_flow(struct net * net,const struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1218 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1219 const struct in6_addr *final_dst)
1220 {
1221 struct dst_entry *dst = NULL;
1222 int err;
1223
1224 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1225 if (err)
1226 return ERR_PTR(err);
1227 if (final_dst)
1228 fl6->daddr = *final_dst;
1229
1230 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1231 }
1232 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1233
1234 /**
1235 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1236 * @sk: socket which provides the dst cache and route info
1237 * @fl6: flow to lookup
1238 * @final_dst: final destination address for ipsec lookup
1239 * @connected: whether @sk is connected or not
1240 *
1241 * This function performs a route lookup on the given flow with the
1242 * possibility of using the cached route in the socket if it is valid.
1243 * It will take the socket dst lock when operating on the dst cache.
1244 * As a result, this function can only be used in process context.
1245 *
1246 * In addition, for a connected socket, cache the dst in the socket
1247 * if the current cache is not valid.
1248 *
1249 * It returns a valid dst pointer on success, or a pointer encoded
1250 * error code.
1251 */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool connected)1252 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1253 const struct in6_addr *final_dst,
1254 bool connected)
1255 {
1256 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1257
1258 dst = ip6_sk_dst_check(sk, dst, fl6);
1259 if (dst)
1260 return dst;
1261
1262 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1263 if (connected && !IS_ERR(dst))
1264 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1265
1266 return dst;
1267 }
1268 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1269
1270 /**
1271 * ip6_dst_lookup_tunnel - perform route lookup on tunnel
1272 * @skb: Packet for which lookup is done
1273 * @dev: Tunnel device
1274 * @net: Network namespace of tunnel device
1275 * @sock: Socket which provides route info
1276 * @saddr: Memory to store the src ip address
1277 * @info: Tunnel information
1278 * @protocol: IP protocol
1279 * @use_cache: Flag to enable cache usage
1280 * This function performs a route lookup on a tunnel
1281 *
1282 * It returns a valid dst pointer and stores src address to be used in
1283 * tunnel in param saddr on success, else a pointer encoded error code.
1284 */
1285
ip6_dst_lookup_tunnel(struct sk_buff * skb,struct net_device * dev,struct net * net,struct socket * sock,struct in6_addr * saddr,const struct ip_tunnel_info * info,u8 protocol,bool use_cache)1286 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1287 struct net_device *dev,
1288 struct net *net,
1289 struct socket *sock,
1290 struct in6_addr *saddr,
1291 const struct ip_tunnel_info *info,
1292 u8 protocol,
1293 bool use_cache)
1294 {
1295 struct dst_entry *dst = NULL;
1296 #ifdef CONFIG_DST_CACHE
1297 struct dst_cache *dst_cache;
1298 #endif
1299 struct flowi6 fl6;
1300 __u8 prio;
1301
1302 #ifdef CONFIG_DST_CACHE
1303 dst_cache = (struct dst_cache *)&info->dst_cache;
1304 if (use_cache) {
1305 dst = dst_cache_get_ip6(dst_cache, saddr);
1306 if (dst)
1307 return dst;
1308 }
1309 #endif
1310 memset(&fl6, 0, sizeof(fl6));
1311 fl6.flowi6_mark = skb->mark;
1312 fl6.flowi6_proto = protocol;
1313 fl6.daddr = info->key.u.ipv6.dst;
1314 fl6.saddr = info->key.u.ipv6.src;
1315 prio = info->key.tos;
1316 fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
1317
1318 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1319 NULL);
1320 if (IS_ERR(dst)) {
1321 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1322 return ERR_PTR(-ENETUNREACH);
1323 }
1324 if (dst->dev == dev) { /* is this necessary? */
1325 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1326 dst_release(dst);
1327 return ERR_PTR(-ELOOP);
1328 }
1329 #ifdef CONFIG_DST_CACHE
1330 if (use_cache)
1331 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1332 #endif
1333 *saddr = fl6.saddr;
1334 return dst;
1335 }
1336 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1337
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1338 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1339 gfp_t gfp)
1340 {
1341 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1342 }
1343
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1344 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1345 gfp_t gfp)
1346 {
1347 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1348 }
1349
ip6_append_data_mtu(unsigned int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt,unsigned int orig_mtu)1350 static void ip6_append_data_mtu(unsigned int *mtu,
1351 int *maxfraglen,
1352 unsigned int fragheaderlen,
1353 struct sk_buff *skb,
1354 struct rt6_info *rt,
1355 unsigned int orig_mtu)
1356 {
1357 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1358 if (!skb) {
1359 /* first fragment, reserve header_len */
1360 *mtu = orig_mtu - rt->dst.header_len;
1361
1362 } else {
1363 /*
1364 * this fragment is not first, the headers
1365 * space is regarded as data space.
1366 */
1367 *mtu = orig_mtu;
1368 }
1369 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1370 + fragheaderlen - sizeof(struct frag_hdr);
1371 }
1372 }
1373
ip6_setup_cork(struct sock * sk,struct inet_cork_full * cork,struct inet6_cork * v6_cork,struct ipcm6_cookie * ipc6,struct rt6_info * rt,struct flowi6 * fl6)1374 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1375 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1376 struct rt6_info *rt, struct flowi6 *fl6)
1377 {
1378 struct ipv6_pinfo *np = inet6_sk(sk);
1379 unsigned int mtu;
1380 struct ipv6_txoptions *opt = ipc6->opt;
1381
1382 /*
1383 * setup for corking
1384 */
1385 if (opt) {
1386 if (WARN_ON(v6_cork->opt))
1387 return -EINVAL;
1388
1389 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1390 if (unlikely(!v6_cork->opt))
1391 return -ENOBUFS;
1392
1393 v6_cork->opt->tot_len = sizeof(*opt);
1394 v6_cork->opt->opt_flen = opt->opt_flen;
1395 v6_cork->opt->opt_nflen = opt->opt_nflen;
1396
1397 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1398 sk->sk_allocation);
1399 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1400 return -ENOBUFS;
1401
1402 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1403 sk->sk_allocation);
1404 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1405 return -ENOBUFS;
1406
1407 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1408 sk->sk_allocation);
1409 if (opt->hopopt && !v6_cork->opt->hopopt)
1410 return -ENOBUFS;
1411
1412 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1413 sk->sk_allocation);
1414 if (opt->srcrt && !v6_cork->opt->srcrt)
1415 return -ENOBUFS;
1416
1417 /* need source address above miyazawa*/
1418 }
1419 dst_hold(&rt->dst);
1420 cork->base.dst = &rt->dst;
1421 cork->fl.u.ip6 = *fl6;
1422 v6_cork->hop_limit = ipc6->hlimit;
1423 v6_cork->tclass = ipc6->tclass;
1424 if (rt->dst.flags & DST_XFRM_TUNNEL)
1425 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1426 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1427 else
1428 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1429 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1430 if (np->frag_size < mtu) {
1431 if (np->frag_size)
1432 mtu = np->frag_size;
1433 }
1434 cork->base.fragsize = mtu;
1435 cork->base.gso_size = ipc6->gso_size;
1436 cork->base.tx_flags = 0;
1437 cork->base.mark = ipc6->sockc.mark;
1438 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1439
1440 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1441 cork->base.flags |= IPCORK_ALLFRAG;
1442 cork->base.length = 0;
1443
1444 cork->base.transmit_time = ipc6->sockc.transmit_time;
1445
1446 return 0;
1447 }
1448
__ip6_append_data(struct sock * sk,struct flowi6 * fl6,struct sk_buff_head * queue,struct inet_cork * cork,struct inet6_cork * v6_cork,struct page_frag * pfrag,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,unsigned int flags,struct ipcm6_cookie * ipc6)1449 static int __ip6_append_data(struct sock *sk,
1450 struct flowi6 *fl6,
1451 struct sk_buff_head *queue,
1452 struct inet_cork *cork,
1453 struct inet6_cork *v6_cork,
1454 struct page_frag *pfrag,
1455 int getfrag(void *from, char *to, int offset,
1456 int len, int odd, struct sk_buff *skb),
1457 void *from, int length, int transhdrlen,
1458 unsigned int flags, struct ipcm6_cookie *ipc6)
1459 {
1460 struct sk_buff *skb, *skb_prev = NULL;
1461 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1462 struct ubuf_info *uarg = NULL;
1463 int exthdrlen = 0;
1464 int dst_exthdrlen = 0;
1465 int hh_len;
1466 int copy;
1467 int err;
1468 int offset = 0;
1469 u32 tskey = 0;
1470 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1471 struct ipv6_txoptions *opt = v6_cork->opt;
1472 int csummode = CHECKSUM_NONE;
1473 unsigned int maxnonfragsize, headersize;
1474 unsigned int wmem_alloc_delta = 0;
1475 bool paged, extra_uref = false;
1476
1477 skb = skb_peek_tail(queue);
1478 if (!skb) {
1479 exthdrlen = opt ? opt->opt_flen : 0;
1480 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1481 }
1482
1483 paged = !!cork->gso_size;
1484 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1485 orig_mtu = mtu;
1486
1487 if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
1488 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1489 tskey = atomic_inc_return(&sk->sk_tskey) - 1;
1490
1491 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1492
1493 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1494 (opt ? opt->opt_nflen : 0);
1495
1496 headersize = sizeof(struct ipv6hdr) +
1497 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1498 (dst_allfrag(&rt->dst) ?
1499 sizeof(struct frag_hdr) : 0) +
1500 rt->rt6i_nfheader_len;
1501
1502 if (mtu <= fragheaderlen ||
1503 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1504 goto emsgsize;
1505
1506 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1507 sizeof(struct frag_hdr);
1508
1509 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1510 * the first fragment
1511 */
1512 if (headersize + transhdrlen > mtu)
1513 goto emsgsize;
1514
1515 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1516 (sk->sk_protocol == IPPROTO_UDP ||
1517 sk->sk_protocol == IPPROTO_RAW)) {
1518 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1519 sizeof(struct ipv6hdr));
1520 goto emsgsize;
1521 }
1522
1523 if (ip6_sk_ignore_df(sk))
1524 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1525 else
1526 maxnonfragsize = mtu;
1527
1528 if (cork->length + length > maxnonfragsize - headersize) {
1529 emsgsize:
1530 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1531 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1532 return -EMSGSIZE;
1533 }
1534
1535 /* CHECKSUM_PARTIAL only with no extension headers and when
1536 * we are not going to fragment
1537 */
1538 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1539 headersize == sizeof(struct ipv6hdr) &&
1540 length <= mtu - headersize &&
1541 (!(flags & MSG_MORE) || cork->gso_size) &&
1542 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1543 csummode = CHECKSUM_PARTIAL;
1544
1545 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1546 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1547 if (!uarg)
1548 return -ENOBUFS;
1549 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
1550 if (rt->dst.dev->features & NETIF_F_SG &&
1551 csummode == CHECKSUM_PARTIAL) {
1552 paged = true;
1553 } else {
1554 uarg->zerocopy = 0;
1555 skb_zcopy_set(skb, uarg, &extra_uref);
1556 }
1557 }
1558
1559 /*
1560 * Let's try using as much space as possible.
1561 * Use MTU if total length of the message fits into the MTU.
1562 * Otherwise, we need to reserve fragment header and
1563 * fragment alignment (= 8-15 octects, in total).
1564 *
1565 * Note that we may need to "move" the data from the tail
1566 * of the buffer to the new fragment when we split
1567 * the message.
1568 *
1569 * FIXME: It may be fragmented into multiple chunks
1570 * at once if non-fragmentable extension headers
1571 * are too large.
1572 * --yoshfuji
1573 */
1574
1575 cork->length += length;
1576 if (!skb)
1577 goto alloc_new_skb;
1578
1579 while (length > 0) {
1580 /* Check if the remaining data fits into current packet. */
1581 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1582 if (copy < length)
1583 copy = maxfraglen - skb->len;
1584
1585 if (copy <= 0) {
1586 char *data;
1587 unsigned int datalen;
1588 unsigned int fraglen;
1589 unsigned int fraggap;
1590 unsigned int alloclen, alloc_extra;
1591 unsigned int pagedlen;
1592 alloc_new_skb:
1593 /* There's no room in the current skb */
1594 if (skb)
1595 fraggap = skb->len - maxfraglen;
1596 else
1597 fraggap = 0;
1598 /* update mtu and maxfraglen if necessary */
1599 if (!skb || !skb_prev)
1600 ip6_append_data_mtu(&mtu, &maxfraglen,
1601 fragheaderlen, skb, rt,
1602 orig_mtu);
1603
1604 skb_prev = skb;
1605
1606 /*
1607 * If remaining data exceeds the mtu,
1608 * we know we need more fragment(s).
1609 */
1610 datalen = length + fraggap;
1611
1612 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1613 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1614 fraglen = datalen + fragheaderlen;
1615 pagedlen = 0;
1616
1617 alloc_extra = hh_len;
1618 alloc_extra += dst_exthdrlen;
1619 alloc_extra += rt->dst.trailer_len;
1620
1621 /* We just reserve space for fragment header.
1622 * Note: this may be overallocation if the message
1623 * (without MSG_MORE) fits into the MTU.
1624 */
1625 alloc_extra += sizeof(struct frag_hdr);
1626
1627 if ((flags & MSG_MORE) &&
1628 !(rt->dst.dev->features&NETIF_F_SG))
1629 alloclen = mtu;
1630 else if (!paged &&
1631 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1632 !(rt->dst.dev->features & NETIF_F_SG)))
1633 alloclen = fraglen;
1634 else {
1635 alloclen = min_t(int, fraglen, MAX_HEADER);
1636 pagedlen = fraglen - alloclen;
1637 }
1638 alloclen += alloc_extra;
1639
1640 if (datalen != length + fraggap) {
1641 /*
1642 * this is not the last fragment, the trailer
1643 * space is regarded as data space.
1644 */
1645 datalen += rt->dst.trailer_len;
1646 }
1647
1648 fraglen = datalen + fragheaderlen;
1649
1650 copy = datalen - transhdrlen - fraggap - pagedlen;
1651 if (copy < 0) {
1652 err = -EINVAL;
1653 goto error;
1654 }
1655 if (transhdrlen) {
1656 skb = sock_alloc_send_skb(sk, alloclen,
1657 (flags & MSG_DONTWAIT), &err);
1658 } else {
1659 skb = NULL;
1660 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1661 2 * sk->sk_sndbuf)
1662 skb = alloc_skb(alloclen,
1663 sk->sk_allocation);
1664 if (unlikely(!skb))
1665 err = -ENOBUFS;
1666 }
1667 if (!skb)
1668 goto error;
1669 /*
1670 * Fill in the control structures
1671 */
1672 skb->protocol = htons(ETH_P_IPV6);
1673 skb->ip_summed = csummode;
1674 skb->csum = 0;
1675 /* reserve for fragmentation and ipsec header */
1676 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1677 dst_exthdrlen);
1678
1679 /*
1680 * Find where to start putting bytes
1681 */
1682 data = skb_put(skb, fraglen - pagedlen);
1683 skb_set_network_header(skb, exthdrlen);
1684 data += fragheaderlen;
1685 skb->transport_header = (skb->network_header +
1686 fragheaderlen);
1687 if (fraggap) {
1688 skb->csum = skb_copy_and_csum_bits(
1689 skb_prev, maxfraglen,
1690 data + transhdrlen, fraggap);
1691 skb_prev->csum = csum_sub(skb_prev->csum,
1692 skb->csum);
1693 data += fraggap;
1694 pskb_trim_unique(skb_prev, maxfraglen);
1695 }
1696 if (copy > 0 &&
1697 getfrag(from, data + transhdrlen, offset,
1698 copy, fraggap, skb) < 0) {
1699 err = -EFAULT;
1700 kfree_skb(skb);
1701 goto error;
1702 }
1703
1704 offset += copy;
1705 length -= copy + transhdrlen;
1706 transhdrlen = 0;
1707 exthdrlen = 0;
1708 dst_exthdrlen = 0;
1709
1710 /* Only the initial fragment is time stamped */
1711 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1712 cork->tx_flags = 0;
1713 skb_shinfo(skb)->tskey = tskey;
1714 tskey = 0;
1715 skb_zcopy_set(skb, uarg, &extra_uref);
1716
1717 if ((flags & MSG_CONFIRM) && !skb_prev)
1718 skb_set_dst_pending_confirm(skb, 1);
1719
1720 /*
1721 * Put the packet on the pending queue
1722 */
1723 if (!skb->destructor) {
1724 skb->destructor = sock_wfree;
1725 skb->sk = sk;
1726 wmem_alloc_delta += skb->truesize;
1727 }
1728 __skb_queue_tail(queue, skb);
1729 continue;
1730 }
1731
1732 if (copy > length)
1733 copy = length;
1734
1735 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1736 skb_tailroom(skb) >= copy) {
1737 unsigned int off;
1738
1739 off = skb->len;
1740 if (getfrag(from, skb_put(skb, copy),
1741 offset, copy, off, skb) < 0) {
1742 __skb_trim(skb, off);
1743 err = -EFAULT;
1744 goto error;
1745 }
1746 } else if (!uarg || !uarg->zerocopy) {
1747 int i = skb_shinfo(skb)->nr_frags;
1748
1749 err = -ENOMEM;
1750 if (!sk_page_frag_refill(sk, pfrag))
1751 goto error;
1752
1753 if (!skb_can_coalesce(skb, i, pfrag->page,
1754 pfrag->offset)) {
1755 err = -EMSGSIZE;
1756 if (i == MAX_SKB_FRAGS)
1757 goto error;
1758
1759 __skb_fill_page_desc(skb, i, pfrag->page,
1760 pfrag->offset, 0);
1761 skb_shinfo(skb)->nr_frags = ++i;
1762 get_page(pfrag->page);
1763 }
1764 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1765 if (getfrag(from,
1766 page_address(pfrag->page) + pfrag->offset,
1767 offset, copy, skb->len, skb) < 0)
1768 goto error_efault;
1769
1770 pfrag->offset += copy;
1771 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1772 skb->len += copy;
1773 skb->data_len += copy;
1774 skb->truesize += copy;
1775 wmem_alloc_delta += copy;
1776 } else {
1777 err = skb_zerocopy_iter_dgram(skb, from, copy);
1778 if (err < 0)
1779 goto error;
1780 }
1781 offset += copy;
1782 length -= copy;
1783 }
1784
1785 if (wmem_alloc_delta)
1786 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1787 return 0;
1788
1789 error_efault:
1790 err = -EFAULT;
1791 error:
1792 net_zcopy_put_abort(uarg, extra_uref);
1793 cork->length -= length;
1794 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1795 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1796 return err;
1797 }
1798
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags)1799 int ip6_append_data(struct sock *sk,
1800 int getfrag(void *from, char *to, int offset, int len,
1801 int odd, struct sk_buff *skb),
1802 void *from, int length, int transhdrlen,
1803 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1804 struct rt6_info *rt, unsigned int flags)
1805 {
1806 struct inet_sock *inet = inet_sk(sk);
1807 struct ipv6_pinfo *np = inet6_sk(sk);
1808 int exthdrlen;
1809 int err;
1810
1811 if (flags&MSG_PROBE)
1812 return 0;
1813 if (skb_queue_empty(&sk->sk_write_queue)) {
1814 /*
1815 * setup for corking
1816 */
1817 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1818 ipc6, rt, fl6);
1819 if (err)
1820 return err;
1821
1822 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1823 length += exthdrlen;
1824 transhdrlen += exthdrlen;
1825 } else {
1826 fl6 = &inet->cork.fl.u.ip6;
1827 transhdrlen = 0;
1828 }
1829
1830 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1831 &np->cork, sk_page_frag(sk), getfrag,
1832 from, length, transhdrlen, flags, ipc6);
1833 }
1834 EXPORT_SYMBOL_GPL(ip6_append_data);
1835
ip6_cork_release(struct inet_cork_full * cork,struct inet6_cork * v6_cork)1836 static void ip6_cork_release(struct inet_cork_full *cork,
1837 struct inet6_cork *v6_cork)
1838 {
1839 if (v6_cork->opt) {
1840 kfree(v6_cork->opt->dst0opt);
1841 kfree(v6_cork->opt->dst1opt);
1842 kfree(v6_cork->opt->hopopt);
1843 kfree(v6_cork->opt->srcrt);
1844 kfree(v6_cork->opt);
1845 v6_cork->opt = NULL;
1846 }
1847
1848 if (cork->base.dst) {
1849 dst_release(cork->base.dst);
1850 cork->base.dst = NULL;
1851 cork->base.flags &= ~IPCORK_ALLFRAG;
1852 }
1853 memset(&cork->fl, 0, sizeof(cork->fl));
1854 }
1855
__ip6_make_skb(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1856 struct sk_buff *__ip6_make_skb(struct sock *sk,
1857 struct sk_buff_head *queue,
1858 struct inet_cork_full *cork,
1859 struct inet6_cork *v6_cork)
1860 {
1861 struct sk_buff *skb, *tmp_skb;
1862 struct sk_buff **tail_skb;
1863 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1864 struct ipv6_pinfo *np = inet6_sk(sk);
1865 struct net *net = sock_net(sk);
1866 struct ipv6hdr *hdr;
1867 struct ipv6_txoptions *opt = v6_cork->opt;
1868 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1869 struct flowi6 *fl6 = &cork->fl.u.ip6;
1870 unsigned char proto = fl6->flowi6_proto;
1871
1872 skb = __skb_dequeue(queue);
1873 if (!skb)
1874 goto out;
1875 tail_skb = &(skb_shinfo(skb)->frag_list);
1876
1877 /* move skb->data to ip header from ext header */
1878 if (skb->data < skb_network_header(skb))
1879 __skb_pull(skb, skb_network_offset(skb));
1880 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1881 __skb_pull(tmp_skb, skb_network_header_len(skb));
1882 *tail_skb = tmp_skb;
1883 tail_skb = &(tmp_skb->next);
1884 skb->len += tmp_skb->len;
1885 skb->data_len += tmp_skb->len;
1886 skb->truesize += tmp_skb->truesize;
1887 tmp_skb->destructor = NULL;
1888 tmp_skb->sk = NULL;
1889 }
1890
1891 /* Allow local fragmentation. */
1892 skb->ignore_df = ip6_sk_ignore_df(sk);
1893
1894 *final_dst = fl6->daddr;
1895 __skb_pull(skb, skb_network_header_len(skb));
1896 if (opt && opt->opt_flen)
1897 ipv6_push_frag_opts(skb, opt, &proto);
1898 if (opt && opt->opt_nflen)
1899 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1900
1901 skb_push(skb, sizeof(struct ipv6hdr));
1902 skb_reset_network_header(skb);
1903 hdr = ipv6_hdr(skb);
1904
1905 ip6_flow_hdr(hdr, v6_cork->tclass,
1906 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1907 ip6_autoflowlabel(net, np), fl6));
1908 hdr->hop_limit = v6_cork->hop_limit;
1909 hdr->nexthdr = proto;
1910 hdr->saddr = fl6->saddr;
1911 hdr->daddr = *final_dst;
1912
1913 skb->priority = sk->sk_priority;
1914 skb->mark = cork->base.mark;
1915
1916 skb->tstamp = cork->base.transmit_time;
1917
1918 skb_dst_set(skb, dst_clone(&rt->dst));
1919 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1920 if (proto == IPPROTO_ICMPV6) {
1921 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1922 u8 icmp6_type;
1923
1924 if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
1925 icmp6_type = fl6->fl6_icmp_type;
1926 else
1927 icmp6_type = icmp6_hdr(skb)->icmp6_type;
1928 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
1929 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1930 }
1931
1932 ip6_cork_release(cork, v6_cork);
1933 out:
1934 return skb;
1935 }
1936
ip6_send_skb(struct sk_buff * skb)1937 int ip6_send_skb(struct sk_buff *skb)
1938 {
1939 struct net *net = sock_net(skb->sk);
1940 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1941 int err;
1942
1943 err = ip6_local_out(net, skb->sk, skb);
1944 if (err) {
1945 if (err > 0)
1946 err = net_xmit_errno(err);
1947 if (err)
1948 IP6_INC_STATS(net, rt->rt6i_idev,
1949 IPSTATS_MIB_OUTDISCARDS);
1950 }
1951
1952 return err;
1953 }
1954
ip6_push_pending_frames(struct sock * sk)1955 int ip6_push_pending_frames(struct sock *sk)
1956 {
1957 struct sk_buff *skb;
1958
1959 skb = ip6_finish_skb(sk);
1960 if (!skb)
1961 return 0;
1962
1963 return ip6_send_skb(skb);
1964 }
1965 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1966
__ip6_flush_pending_frames(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1967 static void __ip6_flush_pending_frames(struct sock *sk,
1968 struct sk_buff_head *queue,
1969 struct inet_cork_full *cork,
1970 struct inet6_cork *v6_cork)
1971 {
1972 struct sk_buff *skb;
1973
1974 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1975 if (skb_dst(skb))
1976 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1977 IPSTATS_MIB_OUTDISCARDS);
1978 kfree_skb(skb);
1979 }
1980
1981 ip6_cork_release(cork, v6_cork);
1982 }
1983
ip6_flush_pending_frames(struct sock * sk)1984 void ip6_flush_pending_frames(struct sock *sk)
1985 {
1986 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1987 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1988 }
1989 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1990
ip6_make_skb(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,struct inet_cork_full * cork)1991 struct sk_buff *ip6_make_skb(struct sock *sk,
1992 int getfrag(void *from, char *to, int offset,
1993 int len, int odd, struct sk_buff *skb),
1994 void *from, int length, int transhdrlen,
1995 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1996 struct rt6_info *rt, unsigned int flags,
1997 struct inet_cork_full *cork)
1998 {
1999 struct inet6_cork v6_cork;
2000 struct sk_buff_head queue;
2001 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
2002 int err;
2003
2004 if (flags & MSG_PROBE)
2005 return NULL;
2006
2007 __skb_queue_head_init(&queue);
2008
2009 cork->base.flags = 0;
2010 cork->base.addr = 0;
2011 cork->base.opt = NULL;
2012 cork->base.dst = NULL;
2013 v6_cork.opt = NULL;
2014 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
2015 if (err) {
2016 ip6_cork_release(cork, &v6_cork);
2017 return ERR_PTR(err);
2018 }
2019 if (ipc6->dontfrag < 0)
2020 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
2021
2022 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
2023 ¤t->task_frag, getfrag, from,
2024 length + exthdrlen, transhdrlen + exthdrlen,
2025 flags, ipc6);
2026 if (err) {
2027 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
2028 return ERR_PTR(err);
2029 }
2030
2031 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2032 }
2033