1 /*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58 #include <net/udp.h>
59 #include <net/gue.h>
60
61 #if IS_ENABLED(CONFIG_IPV6)
62 #include <net/ipv6.h>
63 #include <net/ip6_fib.h>
64 #include <net/ip6_route.h>
65 #endif
66
ip_tunnel_hash(__be32 key,__be32 remote)67 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 {
69 return hash_32((__force u32)key ^ (__force u32)remote,
70 IP_TNL_HASH_BITS);
71 }
72
__tunnel_dst_set(struct ip_tunnel_dst * idst,struct dst_entry * dst,__be32 saddr)73 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
74 struct dst_entry *dst, __be32 saddr)
75 {
76 struct dst_entry *old_dst;
77
78 dst_clone(dst);
79 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
80 dst_release(old_dst);
81 idst->saddr = saddr;
82 }
83
tunnel_dst_set(struct ip_tunnel * t,struct dst_entry * dst,__be32 saddr)84 static noinline void tunnel_dst_set(struct ip_tunnel *t,
85 struct dst_entry *dst, __be32 saddr)
86 {
87 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
88 }
89
tunnel_dst_reset(struct ip_tunnel * t)90 static void tunnel_dst_reset(struct ip_tunnel *t)
91 {
92 tunnel_dst_set(t, NULL, 0);
93 }
94
ip_tunnel_dst_reset_all(struct ip_tunnel * t)95 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
96 {
97 int i;
98
99 for_each_possible_cpu(i)
100 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
101 }
102 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
103
tunnel_rtable_get(struct ip_tunnel * t,u32 cookie,__be32 * saddr)104 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
105 u32 cookie, __be32 *saddr)
106 {
107 struct ip_tunnel_dst *idst;
108 struct dst_entry *dst;
109
110 rcu_read_lock();
111 idst = raw_cpu_ptr(t->dst_cache);
112 dst = rcu_dereference(idst->dst);
113 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
114 dst = NULL;
115 if (dst) {
116 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
117 *saddr = idst->saddr;
118 } else {
119 tunnel_dst_reset(t);
120 dst_release(dst);
121 dst = NULL;
122 }
123 }
124 rcu_read_unlock();
125 return (struct rtable *)dst;
126 }
127
ip_tunnel_key_match(const struct ip_tunnel_parm * p,__be16 flags,__be32 key)128 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
129 __be16 flags, __be32 key)
130 {
131 if (p->i_flags & TUNNEL_KEY) {
132 if (flags & TUNNEL_KEY)
133 return key == p->i_key;
134 else
135 /* key expected, none present */
136 return false;
137 } else
138 return !(flags & TUNNEL_KEY);
139 }
140
141 /* Fallback tunnel: no source, no destination, no key, no options
142
143 Tunnel hash table:
144 We require exact key match i.e. if a key is present in packet
145 it will match only tunnel with the same key; if it is not present,
146 it will match only keyless tunnel.
147
148 All keysless packets, if not matched configured keyless tunnels
149 will match fallback tunnel.
150 Given src, dst and key, find appropriate for input tunnel.
151 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,__be16 flags,__be32 remote,__be32 local,__be32 key)152 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
153 int link, __be16 flags,
154 __be32 remote, __be32 local,
155 __be32 key)
156 {
157 unsigned int hash;
158 struct ip_tunnel *t, *cand = NULL;
159 struct hlist_head *head;
160
161 hash = ip_tunnel_hash(key, remote);
162 head = &itn->tunnels[hash];
163
164 hlist_for_each_entry_rcu(t, head, hash_node) {
165 if (local != t->parms.iph.saddr ||
166 remote != t->parms.iph.daddr ||
167 !(t->dev->flags & IFF_UP))
168 continue;
169
170 if (!ip_tunnel_key_match(&t->parms, flags, key))
171 continue;
172
173 if (t->parms.link == link)
174 return t;
175 else
176 cand = t;
177 }
178
179 hlist_for_each_entry_rcu(t, head, hash_node) {
180 if (remote != t->parms.iph.daddr ||
181 t->parms.iph.saddr != 0 ||
182 !(t->dev->flags & IFF_UP))
183 continue;
184
185 if (!ip_tunnel_key_match(&t->parms, flags, key))
186 continue;
187
188 if (t->parms.link == link)
189 return t;
190 else if (!cand)
191 cand = t;
192 }
193
194 hash = ip_tunnel_hash(key, 0);
195 head = &itn->tunnels[hash];
196
197 hlist_for_each_entry_rcu(t, head, hash_node) {
198 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
199 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
200 continue;
201
202 if (!(t->dev->flags & IFF_UP))
203 continue;
204
205 if (!ip_tunnel_key_match(&t->parms, flags, key))
206 continue;
207
208 if (t->parms.link == link)
209 return t;
210 else if (!cand)
211 cand = t;
212 }
213
214 if (flags & TUNNEL_NO_KEY)
215 goto skip_key_lookup;
216
217 hlist_for_each_entry_rcu(t, head, hash_node) {
218 if (t->parms.i_key != key ||
219 t->parms.iph.saddr != 0 ||
220 t->parms.iph.daddr != 0 ||
221 !(t->dev->flags & IFF_UP))
222 continue;
223
224 if (t->parms.link == link)
225 return t;
226 else if (!cand)
227 cand = t;
228 }
229
230 skip_key_lookup:
231 if (cand)
232 return cand;
233
234 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
235 return netdev_priv(itn->fb_tunnel_dev);
236
237
238 return NULL;
239 }
240 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
241
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)242 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
243 struct ip_tunnel_parm *parms)
244 {
245 unsigned int h;
246 __be32 remote;
247 __be32 i_key = parms->i_key;
248
249 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
250 remote = parms->iph.daddr;
251 else
252 remote = 0;
253
254 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
255 i_key = 0;
256
257 h = ip_tunnel_hash(i_key, remote);
258 return &itn->tunnels[h];
259 }
260
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)261 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
262 {
263 struct hlist_head *head = ip_bucket(itn, &t->parms);
264
265 hlist_add_head_rcu(&t->hash_node, head);
266 }
267
ip_tunnel_del(struct ip_tunnel * t)268 static void ip_tunnel_del(struct ip_tunnel *t)
269 {
270 hlist_del_init_rcu(&t->hash_node);
271 }
272
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms,int type)273 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
274 struct ip_tunnel_parm *parms,
275 int type)
276 {
277 __be32 remote = parms->iph.daddr;
278 __be32 local = parms->iph.saddr;
279 __be32 key = parms->i_key;
280 __be16 flags = parms->i_flags;
281 int link = parms->link;
282 struct ip_tunnel *t = NULL;
283 struct hlist_head *head = ip_bucket(itn, parms);
284
285 hlist_for_each_entry_rcu(t, head, hash_node) {
286 if (local == t->parms.iph.saddr &&
287 remote == t->parms.iph.daddr &&
288 link == t->parms.link &&
289 type == t->dev->type &&
290 ip_tunnel_key_match(&t->parms, flags, key))
291 break;
292 }
293 return t;
294 }
295
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm * parms)296 static struct net_device *__ip_tunnel_create(struct net *net,
297 const struct rtnl_link_ops *ops,
298 struct ip_tunnel_parm *parms)
299 {
300 int err;
301 struct ip_tunnel *tunnel;
302 struct net_device *dev;
303 char name[IFNAMSIZ];
304
305 if (parms->name[0])
306 strlcpy(name, parms->name, IFNAMSIZ);
307 else {
308 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
309 err = -E2BIG;
310 goto failed;
311 }
312 strlcpy(name, ops->kind, IFNAMSIZ);
313 strncat(name, "%d", 2);
314 }
315
316 ASSERT_RTNL();
317 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
318 if (!dev) {
319 err = -ENOMEM;
320 goto failed;
321 }
322 dev_net_set(dev, net);
323
324 dev->rtnl_link_ops = ops;
325
326 tunnel = netdev_priv(dev);
327 tunnel->parms = *parms;
328 tunnel->net = net;
329
330 err = register_netdevice(dev);
331 if (err)
332 goto failed_free;
333
334 return dev;
335
336 failed_free:
337 free_netdev(dev);
338 failed:
339 return ERR_PTR(err);
340 }
341
init_tunnel_flow(struct flowi4 * fl4,int proto,__be32 daddr,__be32 saddr,__be32 key,__u8 tos,int oif)342 static inline void init_tunnel_flow(struct flowi4 *fl4,
343 int proto,
344 __be32 daddr, __be32 saddr,
345 __be32 key, __u8 tos, int oif)
346 {
347 memset(fl4, 0, sizeof(*fl4));
348 fl4->flowi4_oif = oif;
349 fl4->daddr = daddr;
350 fl4->saddr = saddr;
351 fl4->flowi4_tos = tos;
352 fl4->flowi4_proto = proto;
353 fl4->fl4_gre_key = key;
354 }
355
ip_tunnel_bind_dev(struct net_device * dev)356 static int ip_tunnel_bind_dev(struct net_device *dev)
357 {
358 struct net_device *tdev = NULL;
359 struct ip_tunnel *tunnel = netdev_priv(dev);
360 const struct iphdr *iph;
361 int hlen = LL_MAX_HEADER;
362 int mtu = ETH_DATA_LEN;
363 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
364
365 iph = &tunnel->parms.iph;
366
367 /* Guess output device to choose reasonable mtu and needed_headroom */
368 if (iph->daddr) {
369 struct flowi4 fl4;
370 struct rtable *rt;
371
372 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
373 iph->saddr, tunnel->parms.o_key,
374 RT_TOS(iph->tos), tunnel->parms.link);
375 rt = ip_route_output_key(tunnel->net, &fl4);
376
377 if (!IS_ERR(rt)) {
378 tdev = rt->dst.dev;
379 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
380 ip_rt_put(rt);
381 }
382 if (dev->type != ARPHRD_ETHER)
383 dev->flags |= IFF_POINTOPOINT;
384 }
385
386 if (!tdev && tunnel->parms.link)
387 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
388
389 if (tdev) {
390 hlen = tdev->hard_header_len + tdev->needed_headroom;
391 mtu = tdev->mtu;
392 }
393 dev->iflink = tunnel->parms.link;
394
395 dev->needed_headroom = t_hlen + hlen;
396 mtu -= (dev->hard_header_len + t_hlen);
397
398 if (mtu < IPV4_MIN_MTU)
399 mtu = IPV4_MIN_MTU;
400
401 return mtu;
402 }
403
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)404 static struct ip_tunnel *ip_tunnel_create(struct net *net,
405 struct ip_tunnel_net *itn,
406 struct ip_tunnel_parm *parms)
407 {
408 struct ip_tunnel *nt;
409 struct net_device *dev;
410
411 BUG_ON(!itn->fb_tunnel_dev);
412 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
413 if (IS_ERR(dev))
414 return ERR_CAST(dev);
415
416 dev->mtu = ip_tunnel_bind_dev(dev);
417
418 nt = netdev_priv(dev);
419 ip_tunnel_add(itn, nt);
420 return nt;
421 }
422
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,bool log_ecn_error)423 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
424 const struct tnl_ptk_info *tpi, bool log_ecn_error)
425 {
426 struct pcpu_sw_netstats *tstats;
427 const struct iphdr *iph = ip_hdr(skb);
428 int err;
429
430 #ifdef CONFIG_NET_IPGRE_BROADCAST
431 if (ipv4_is_multicast(iph->daddr)) {
432 tunnel->dev->stats.multicast++;
433 skb->pkt_type = PACKET_BROADCAST;
434 }
435 #endif
436
437 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
438 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
439 tunnel->dev->stats.rx_crc_errors++;
440 tunnel->dev->stats.rx_errors++;
441 goto drop;
442 }
443
444 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
445 if (!(tpi->flags&TUNNEL_SEQ) ||
446 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
447 tunnel->dev->stats.rx_fifo_errors++;
448 tunnel->dev->stats.rx_errors++;
449 goto drop;
450 }
451 tunnel->i_seqno = ntohl(tpi->seq) + 1;
452 }
453
454 skb_reset_network_header(skb);
455
456 err = IP_ECN_decapsulate(iph, skb);
457 if (unlikely(err)) {
458 if (log_ecn_error)
459 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
460 &iph->saddr, iph->tos);
461 if (err > 1) {
462 ++tunnel->dev->stats.rx_frame_errors;
463 ++tunnel->dev->stats.rx_errors;
464 goto drop;
465 }
466 }
467
468 tstats = this_cpu_ptr(tunnel->dev->tstats);
469 u64_stats_update_begin(&tstats->syncp);
470 tstats->rx_packets++;
471 tstats->rx_bytes += skb->len;
472 u64_stats_update_end(&tstats->syncp);
473
474 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
475
476 if (tunnel->dev->type == ARPHRD_ETHER) {
477 skb->protocol = eth_type_trans(skb, tunnel->dev);
478 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
479 } else {
480 skb->dev = tunnel->dev;
481 }
482
483 gro_cells_receive(&tunnel->gro_cells, skb);
484 return 0;
485
486 drop:
487 kfree_skb(skb);
488 return 0;
489 }
490 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
491
ip_encap_hlen(struct ip_tunnel_encap * e)492 static int ip_encap_hlen(struct ip_tunnel_encap *e)
493 {
494 switch (e->type) {
495 case TUNNEL_ENCAP_NONE:
496 return 0;
497 case TUNNEL_ENCAP_FOU:
498 return sizeof(struct udphdr);
499 case TUNNEL_ENCAP_GUE:
500 return sizeof(struct udphdr) + sizeof(struct guehdr);
501 default:
502 return -EINVAL;
503 }
504 }
505
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)506 int ip_tunnel_encap_setup(struct ip_tunnel *t,
507 struct ip_tunnel_encap *ipencap)
508 {
509 int hlen;
510
511 memset(&t->encap, 0, sizeof(t->encap));
512
513 hlen = ip_encap_hlen(ipencap);
514 if (hlen < 0)
515 return hlen;
516
517 t->encap.type = ipencap->type;
518 t->encap.sport = ipencap->sport;
519 t->encap.dport = ipencap->dport;
520 t->encap.flags = ipencap->flags;
521
522 t->encap_hlen = hlen;
523 t->hlen = t->encap_hlen + t->tun_hlen;
524
525 return 0;
526 }
527 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
528
fou_build_header(struct sk_buff * skb,struct ip_tunnel_encap * e,size_t hdr_len,u8 * protocol,struct flowi4 * fl4)529 static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
530 size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
531 {
532 struct udphdr *uh;
533 __be16 sport;
534 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
535 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
536
537 skb = iptunnel_handle_offloads(skb, csum, type);
538
539 if (IS_ERR(skb))
540 return PTR_ERR(skb);
541
542 /* Get length and hash before making space in skb */
543
544 sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
545 skb, 0, 0, false);
546
547 skb_push(skb, hdr_len);
548
549 skb_reset_transport_header(skb);
550 uh = udp_hdr(skb);
551
552 if (e->type == TUNNEL_ENCAP_GUE) {
553 struct guehdr *guehdr = (struct guehdr *)&uh[1];
554
555 guehdr->version = 0;
556 guehdr->hlen = 0;
557 guehdr->flags = 0;
558 guehdr->next_hdr = *protocol;
559 }
560
561 uh->dest = e->dport;
562 uh->source = sport;
563 uh->len = htons(skb->len);
564 uh->check = 0;
565 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
566 fl4->saddr, fl4->daddr, skb->len);
567
568 *protocol = IPPROTO_UDP;
569
570 return 0;
571 }
572
ip_tunnel_encap(struct sk_buff * skb,struct ip_tunnel * t,u8 * protocol,struct flowi4 * fl4)573 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
574 u8 *protocol, struct flowi4 *fl4)
575 {
576 switch (t->encap.type) {
577 case TUNNEL_ENCAP_NONE:
578 return 0;
579 case TUNNEL_ENCAP_FOU:
580 case TUNNEL_ENCAP_GUE:
581 return fou_build_header(skb, &t->encap, t->encap_hlen,
582 protocol, fl4);
583 default:
584 return -EINVAL;
585 }
586 }
587 EXPORT_SYMBOL(ip_tunnel_encap);
588
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph)589 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
590 struct rtable *rt, __be16 df,
591 const struct iphdr *inner_iph)
592 {
593 struct ip_tunnel *tunnel = netdev_priv(dev);
594 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
595 int mtu;
596
597 if (df)
598 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
599 - sizeof(struct iphdr) - tunnel->hlen;
600 else
601 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
602
603 if (skb_dst(skb))
604 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
605
606 if (skb->protocol == htons(ETH_P_IP)) {
607 if (!skb_is_gso(skb) &&
608 (inner_iph->frag_off & htons(IP_DF)) &&
609 mtu < pkt_size) {
610 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
611 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
612 return -E2BIG;
613 }
614 }
615 #if IS_ENABLED(CONFIG_IPV6)
616 else if (skb->protocol == htons(ETH_P_IPV6)) {
617 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
618
619 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
620 mtu >= IPV6_MIN_MTU) {
621 if ((tunnel->parms.iph.daddr &&
622 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
623 rt6->rt6i_dst.plen == 128) {
624 rt6->rt6i_flags |= RTF_MODIFIED;
625 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
626 }
627 }
628
629 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
630 mtu < pkt_size) {
631 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
632 return -E2BIG;
633 }
634 }
635 #endif
636 return 0;
637 }
638
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)639 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
640 const struct iphdr *tnl_params, u8 protocol)
641 {
642 struct ip_tunnel *tunnel = netdev_priv(dev);
643 const struct iphdr *inner_iph;
644 struct flowi4 fl4;
645 u8 tos, ttl;
646 __be16 df;
647 struct rtable *rt; /* Route to the other host */
648 unsigned int max_headroom; /* The extra header space needed */
649 __be32 dst;
650 int err;
651 bool connected;
652
653 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
654 connected = (tunnel->parms.iph.daddr != 0);
655
656 dst = tnl_params->daddr;
657 if (dst == 0) {
658 /* NBMA tunnel */
659
660 if (skb_dst(skb) == NULL) {
661 dev->stats.tx_fifo_errors++;
662 goto tx_error;
663 }
664
665 if (skb->protocol == htons(ETH_P_IP)) {
666 rt = skb_rtable(skb);
667 dst = rt_nexthop(rt, inner_iph->daddr);
668 }
669 #if IS_ENABLED(CONFIG_IPV6)
670 else if (skb->protocol == htons(ETH_P_IPV6)) {
671 const struct in6_addr *addr6;
672 struct neighbour *neigh;
673 bool do_tx_error_icmp;
674 int addr_type;
675
676 neigh = dst_neigh_lookup(skb_dst(skb),
677 &ipv6_hdr(skb)->daddr);
678 if (neigh == NULL)
679 goto tx_error;
680
681 addr6 = (const struct in6_addr *)&neigh->primary_key;
682 addr_type = ipv6_addr_type(addr6);
683
684 if (addr_type == IPV6_ADDR_ANY) {
685 addr6 = &ipv6_hdr(skb)->daddr;
686 addr_type = ipv6_addr_type(addr6);
687 }
688
689 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
690 do_tx_error_icmp = true;
691 else {
692 do_tx_error_icmp = false;
693 dst = addr6->s6_addr32[3];
694 }
695 neigh_release(neigh);
696 if (do_tx_error_icmp)
697 goto tx_error_icmp;
698 }
699 #endif
700 else
701 goto tx_error;
702
703 connected = false;
704 }
705
706 tos = tnl_params->tos;
707 if (tos & 0x1) {
708 tos &= ~0x1;
709 if (skb->protocol == htons(ETH_P_IP)) {
710 tos = inner_iph->tos;
711 connected = false;
712 } else if (skb->protocol == htons(ETH_P_IPV6)) {
713 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
714 connected = false;
715 }
716 }
717
718 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
719 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
720
721 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
722 goto tx_error;
723
724 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
725
726 if (!rt) {
727 rt = ip_route_output_key(tunnel->net, &fl4);
728
729 if (IS_ERR(rt)) {
730 dev->stats.tx_carrier_errors++;
731 goto tx_error;
732 }
733 if (connected)
734 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
735 }
736
737 if (rt->dst.dev == dev) {
738 ip_rt_put(rt);
739 dev->stats.collisions++;
740 goto tx_error;
741 }
742
743 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
744 ip_rt_put(rt);
745 goto tx_error;
746 }
747
748 if (tunnel->err_count > 0) {
749 if (time_before(jiffies,
750 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
751 tunnel->err_count--;
752
753 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
754 dst_link_failure(skb);
755 } else
756 tunnel->err_count = 0;
757 }
758
759 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
760 ttl = tnl_params->ttl;
761 if (ttl == 0) {
762 if (skb->protocol == htons(ETH_P_IP))
763 ttl = inner_iph->ttl;
764 #if IS_ENABLED(CONFIG_IPV6)
765 else if (skb->protocol == htons(ETH_P_IPV6))
766 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
767 #endif
768 else
769 ttl = ip4_dst_hoplimit(&rt->dst);
770 }
771
772 df = tnl_params->frag_off;
773 if (skb->protocol == htons(ETH_P_IP))
774 df |= (inner_iph->frag_off&htons(IP_DF));
775
776 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
777 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
778 if (max_headroom > dev->needed_headroom)
779 dev->needed_headroom = max_headroom;
780
781 if (skb_cow_head(skb, dev->needed_headroom)) {
782 ip_rt_put(rt);
783 dev->stats.tx_dropped++;
784 kfree_skb(skb);
785 return;
786 }
787
788 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
789 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
790 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
791
792 return;
793
794 #if IS_ENABLED(CONFIG_IPV6)
795 tx_error_icmp:
796 dst_link_failure(skb);
797 #endif
798 tx_error:
799 dev->stats.tx_errors++;
800 kfree_skb(skb);
801 }
802 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
803
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm * p,bool set_mtu)804 static void ip_tunnel_update(struct ip_tunnel_net *itn,
805 struct ip_tunnel *t,
806 struct net_device *dev,
807 struct ip_tunnel_parm *p,
808 bool set_mtu)
809 {
810 ip_tunnel_del(t);
811 t->parms.iph.saddr = p->iph.saddr;
812 t->parms.iph.daddr = p->iph.daddr;
813 t->parms.i_key = p->i_key;
814 t->parms.o_key = p->o_key;
815 if (dev->type != ARPHRD_ETHER) {
816 memcpy(dev->dev_addr, &p->iph.saddr, 4);
817 memcpy(dev->broadcast, &p->iph.daddr, 4);
818 }
819 ip_tunnel_add(itn, t);
820
821 t->parms.iph.ttl = p->iph.ttl;
822 t->parms.iph.tos = p->iph.tos;
823 t->parms.iph.frag_off = p->iph.frag_off;
824
825 if (t->parms.link != p->link) {
826 int mtu;
827
828 t->parms.link = p->link;
829 mtu = ip_tunnel_bind_dev(dev);
830 if (set_mtu)
831 dev->mtu = mtu;
832 }
833 ip_tunnel_dst_reset_all(t);
834 netdev_state_change(dev);
835 }
836
ip_tunnel_ioctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)837 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
838 {
839 int err = 0;
840 struct ip_tunnel *t = netdev_priv(dev);
841 struct net *net = t->net;
842 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
843
844 BUG_ON(!itn->fb_tunnel_dev);
845 switch (cmd) {
846 case SIOCGETTUNNEL:
847 if (dev == itn->fb_tunnel_dev) {
848 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
849 if (t == NULL)
850 t = netdev_priv(dev);
851 }
852 memcpy(p, &t->parms, sizeof(*p));
853 break;
854
855 case SIOCADDTUNNEL:
856 case SIOCCHGTUNNEL:
857 err = -EPERM;
858 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
859 goto done;
860 if (p->iph.ttl)
861 p->iph.frag_off |= htons(IP_DF);
862 if (!(p->i_flags & VTI_ISVTI)) {
863 if (!(p->i_flags & TUNNEL_KEY))
864 p->i_key = 0;
865 if (!(p->o_flags & TUNNEL_KEY))
866 p->o_key = 0;
867 }
868
869 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
870
871 if (cmd == SIOCADDTUNNEL) {
872 if (!t) {
873 t = ip_tunnel_create(net, itn, p);
874 err = PTR_ERR_OR_ZERO(t);
875 break;
876 }
877
878 err = -EEXIST;
879 break;
880 }
881 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
882 if (t != NULL) {
883 if (t->dev != dev) {
884 err = -EEXIST;
885 break;
886 }
887 } else {
888 unsigned int nflags = 0;
889
890 if (ipv4_is_multicast(p->iph.daddr))
891 nflags = IFF_BROADCAST;
892 else if (p->iph.daddr)
893 nflags = IFF_POINTOPOINT;
894
895 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
896 err = -EINVAL;
897 break;
898 }
899
900 t = netdev_priv(dev);
901 }
902 }
903
904 if (t) {
905 err = 0;
906 ip_tunnel_update(itn, t, dev, p, true);
907 } else {
908 err = -ENOENT;
909 }
910 break;
911
912 case SIOCDELTUNNEL:
913 err = -EPERM;
914 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
915 goto done;
916
917 if (dev == itn->fb_tunnel_dev) {
918 err = -ENOENT;
919 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
920 if (t == NULL)
921 goto done;
922 err = -EPERM;
923 if (t == netdev_priv(itn->fb_tunnel_dev))
924 goto done;
925 dev = t->dev;
926 }
927 unregister_netdevice(dev);
928 err = 0;
929 break;
930
931 default:
932 err = -EINVAL;
933 }
934
935 done:
936 return err;
937 }
938 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
939
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)940 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
941 {
942 struct ip_tunnel *tunnel = netdev_priv(dev);
943 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
944
945 if (new_mtu < 68 ||
946 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
947 return -EINVAL;
948 dev->mtu = new_mtu;
949 return 0;
950 }
951 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
952
ip_tunnel_dev_free(struct net_device * dev)953 static void ip_tunnel_dev_free(struct net_device *dev)
954 {
955 struct ip_tunnel *tunnel = netdev_priv(dev);
956
957 gro_cells_destroy(&tunnel->gro_cells);
958 free_percpu(tunnel->dst_cache);
959 free_percpu(dev->tstats);
960 free_netdev(dev);
961 }
962
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)963 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
964 {
965 struct ip_tunnel *tunnel = netdev_priv(dev);
966 struct ip_tunnel_net *itn;
967
968 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
969
970 if (itn->fb_tunnel_dev != dev) {
971 ip_tunnel_del(netdev_priv(dev));
972 unregister_netdevice_queue(dev, head);
973 }
974 }
975 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
976
ip_tunnel_init_net(struct net * net,int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)977 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
978 struct rtnl_link_ops *ops, char *devname)
979 {
980 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
981 struct ip_tunnel_parm parms;
982 unsigned int i;
983
984 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
985 INIT_HLIST_HEAD(&itn->tunnels[i]);
986
987 if (!ops) {
988 itn->fb_tunnel_dev = NULL;
989 return 0;
990 }
991
992 memset(&parms, 0, sizeof(parms));
993 if (devname)
994 strlcpy(parms.name, devname, IFNAMSIZ);
995
996 rtnl_lock();
997 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
998 /* FB netdevice is special: we have one, and only one per netns.
999 * Allowing to move it to another netns is clearly unsafe.
1000 */
1001 if (!IS_ERR(itn->fb_tunnel_dev)) {
1002 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1003 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1004 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1005 }
1006 rtnl_unlock();
1007
1008 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1009 }
1010 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1011
ip_tunnel_destroy(struct ip_tunnel_net * itn,struct list_head * head,struct rtnl_link_ops * ops)1012 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1013 struct rtnl_link_ops *ops)
1014 {
1015 struct net *net = dev_net(itn->fb_tunnel_dev);
1016 struct net_device *dev, *aux;
1017 int h;
1018
1019 for_each_netdev_safe(net, dev, aux)
1020 if (dev->rtnl_link_ops == ops)
1021 unregister_netdevice_queue(dev, head);
1022
1023 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1024 struct ip_tunnel *t;
1025 struct hlist_node *n;
1026 struct hlist_head *thead = &itn->tunnels[h];
1027
1028 hlist_for_each_entry_safe(t, n, thead, hash_node)
1029 /* If dev is in the same netns, it has already
1030 * been added to the list by the previous loop.
1031 */
1032 if (!net_eq(dev_net(t->dev), net))
1033 unregister_netdevice_queue(t->dev, head);
1034 }
1035 }
1036
ip_tunnel_delete_net(struct ip_tunnel_net * itn,struct rtnl_link_ops * ops)1037 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1038 {
1039 LIST_HEAD(list);
1040
1041 rtnl_lock();
1042 ip_tunnel_destroy(itn, &list, ops);
1043 unregister_netdevice_many(&list);
1044 rtnl_unlock();
1045 }
1046 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1047
ip_tunnel_newlink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p)1048 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1049 struct ip_tunnel_parm *p)
1050 {
1051 struct ip_tunnel *nt;
1052 struct net *net = dev_net(dev);
1053 struct ip_tunnel_net *itn;
1054 int mtu;
1055 int err;
1056
1057 nt = netdev_priv(dev);
1058 itn = net_generic(net, nt->ip_tnl_net_id);
1059
1060 if (ip_tunnel_find(itn, p, dev->type))
1061 return -EEXIST;
1062
1063 nt->net = net;
1064 nt->parms = *p;
1065 err = register_netdevice(dev);
1066 if (err)
1067 goto out;
1068
1069 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1070 eth_hw_addr_random(dev);
1071
1072 mtu = ip_tunnel_bind_dev(dev);
1073 if (!tb[IFLA_MTU])
1074 dev->mtu = mtu;
1075
1076 ip_tunnel_add(itn, nt);
1077
1078 out:
1079 return err;
1080 }
1081 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1082
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p)1083 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1084 struct ip_tunnel_parm *p)
1085 {
1086 struct ip_tunnel *t;
1087 struct ip_tunnel *tunnel = netdev_priv(dev);
1088 struct net *net = tunnel->net;
1089 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1090
1091 if (dev == itn->fb_tunnel_dev)
1092 return -EINVAL;
1093
1094 t = ip_tunnel_find(itn, p, dev->type);
1095
1096 if (t) {
1097 if (t->dev != dev)
1098 return -EEXIST;
1099 } else {
1100 t = tunnel;
1101
1102 if (dev->type != ARPHRD_ETHER) {
1103 unsigned int nflags = 0;
1104
1105 if (ipv4_is_multicast(p->iph.daddr))
1106 nflags = IFF_BROADCAST;
1107 else if (p->iph.daddr)
1108 nflags = IFF_POINTOPOINT;
1109
1110 if ((dev->flags ^ nflags) &
1111 (IFF_POINTOPOINT | IFF_BROADCAST))
1112 return -EINVAL;
1113 }
1114 }
1115
1116 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1117 return 0;
1118 }
1119 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1120
ip_tunnel_init(struct net_device * dev)1121 int ip_tunnel_init(struct net_device *dev)
1122 {
1123 struct ip_tunnel *tunnel = netdev_priv(dev);
1124 struct iphdr *iph = &tunnel->parms.iph;
1125 int err;
1126
1127 dev->destructor = ip_tunnel_dev_free;
1128 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1129 if (!dev->tstats)
1130 return -ENOMEM;
1131
1132 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1133 if (!tunnel->dst_cache) {
1134 free_percpu(dev->tstats);
1135 return -ENOMEM;
1136 }
1137
1138 err = gro_cells_init(&tunnel->gro_cells, dev);
1139 if (err) {
1140 free_percpu(tunnel->dst_cache);
1141 free_percpu(dev->tstats);
1142 return err;
1143 }
1144
1145 tunnel->dev = dev;
1146 tunnel->net = dev_net(dev);
1147 strcpy(tunnel->parms.name, dev->name);
1148 iph->version = 4;
1149 iph->ihl = 5;
1150
1151 return 0;
1152 }
1153 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1154
ip_tunnel_uninit(struct net_device * dev)1155 void ip_tunnel_uninit(struct net_device *dev)
1156 {
1157 struct ip_tunnel *tunnel = netdev_priv(dev);
1158 struct net *net = tunnel->net;
1159 struct ip_tunnel_net *itn;
1160
1161 itn = net_generic(net, tunnel->ip_tnl_net_id);
1162 /* fb_tunnel_dev will be unregisted in net-exit call. */
1163 if (itn->fb_tunnel_dev != dev)
1164 ip_tunnel_del(netdev_priv(dev));
1165
1166 ip_tunnel_dst_reset_all(tunnel);
1167 }
1168 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1169
1170 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,int net_id)1171 void ip_tunnel_setup(struct net_device *dev, int net_id)
1172 {
1173 struct ip_tunnel *tunnel = netdev_priv(dev);
1174 tunnel->ip_tnl_net_id = net_id;
1175 }
1176 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1177
1178 MODULE_LICENSE("GPL");
1179