• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18 
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44 
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58 #include <net/udp.h>
59 #include <net/gue.h>
60 
61 #if IS_ENABLED(CONFIG_IPV6)
62 #include <net/ipv6.h>
63 #include <net/ip6_fib.h>
64 #include <net/ip6_route.h>
65 #endif
66 
ip_tunnel_hash(__be32 key,__be32 remote)67 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 {
69 	return hash_32((__force u32)key ^ (__force u32)remote,
70 			 IP_TNL_HASH_BITS);
71 }
72 
__tunnel_dst_set(struct ip_tunnel_dst * idst,struct dst_entry * dst,__be32 saddr)73 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
74 			     struct dst_entry *dst, __be32 saddr)
75 {
76 	struct dst_entry *old_dst;
77 
78 	dst_clone(dst);
79 	old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
80 	dst_release(old_dst);
81 	idst->saddr = saddr;
82 }
83 
tunnel_dst_set(struct ip_tunnel * t,struct dst_entry * dst,__be32 saddr)84 static noinline void tunnel_dst_set(struct ip_tunnel *t,
85 			   struct dst_entry *dst, __be32 saddr)
86 {
87 	__tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
88 }
89 
tunnel_dst_reset(struct ip_tunnel * t)90 static void tunnel_dst_reset(struct ip_tunnel *t)
91 {
92 	tunnel_dst_set(t, NULL, 0);
93 }
94 
ip_tunnel_dst_reset_all(struct ip_tunnel * t)95 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
96 {
97 	int i;
98 
99 	for_each_possible_cpu(i)
100 		__tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
101 }
102 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
103 
tunnel_rtable_get(struct ip_tunnel * t,u32 cookie,__be32 * saddr)104 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
105 					u32 cookie, __be32 *saddr)
106 {
107 	struct ip_tunnel_dst *idst;
108 	struct dst_entry *dst;
109 
110 	rcu_read_lock();
111 	idst = raw_cpu_ptr(t->dst_cache);
112 	dst = rcu_dereference(idst->dst);
113 	if (dst && !atomic_inc_not_zero(&dst->__refcnt))
114 		dst = NULL;
115 	if (dst) {
116 		if (!dst->obsolete || dst->ops->check(dst, cookie)) {
117 			*saddr = idst->saddr;
118 		} else {
119 			tunnel_dst_reset(t);
120 			dst_release(dst);
121 			dst = NULL;
122 		}
123 	}
124 	rcu_read_unlock();
125 	return (struct rtable *)dst;
126 }
127 
ip_tunnel_key_match(const struct ip_tunnel_parm * p,__be16 flags,__be32 key)128 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
129 				__be16 flags, __be32 key)
130 {
131 	if (p->i_flags & TUNNEL_KEY) {
132 		if (flags & TUNNEL_KEY)
133 			return key == p->i_key;
134 		else
135 			/* key expected, none present */
136 			return false;
137 	} else
138 		return !(flags & TUNNEL_KEY);
139 }
140 
141 /* Fallback tunnel: no source, no destination, no key, no options
142 
143    Tunnel hash table:
144    We require exact key match i.e. if a key is present in packet
145    it will match only tunnel with the same key; if it is not present,
146    it will match only keyless tunnel.
147 
148    All keysless packets, if not matched configured keyless tunnels
149    will match fallback tunnel.
150    Given src, dst and key, find appropriate for input tunnel.
151 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,__be16 flags,__be32 remote,__be32 local,__be32 key)152 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
153 				   int link, __be16 flags,
154 				   __be32 remote, __be32 local,
155 				   __be32 key)
156 {
157 	unsigned int hash;
158 	struct ip_tunnel *t, *cand = NULL;
159 	struct hlist_head *head;
160 
161 	hash = ip_tunnel_hash(key, remote);
162 	head = &itn->tunnels[hash];
163 
164 	hlist_for_each_entry_rcu(t, head, hash_node) {
165 		if (local != t->parms.iph.saddr ||
166 		    remote != t->parms.iph.daddr ||
167 		    !(t->dev->flags & IFF_UP))
168 			continue;
169 
170 		if (!ip_tunnel_key_match(&t->parms, flags, key))
171 			continue;
172 
173 		if (t->parms.link == link)
174 			return t;
175 		else
176 			cand = t;
177 	}
178 
179 	hlist_for_each_entry_rcu(t, head, hash_node) {
180 		if (remote != t->parms.iph.daddr ||
181 		    t->parms.iph.saddr != 0 ||
182 		    !(t->dev->flags & IFF_UP))
183 			continue;
184 
185 		if (!ip_tunnel_key_match(&t->parms, flags, key))
186 			continue;
187 
188 		if (t->parms.link == link)
189 			return t;
190 		else if (!cand)
191 			cand = t;
192 	}
193 
194 	hash = ip_tunnel_hash(key, 0);
195 	head = &itn->tunnels[hash];
196 
197 	hlist_for_each_entry_rcu(t, head, hash_node) {
198 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
199 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
200 			continue;
201 
202 		if (!(t->dev->flags & IFF_UP))
203 			continue;
204 
205 		if (!ip_tunnel_key_match(&t->parms, flags, key))
206 			continue;
207 
208 		if (t->parms.link == link)
209 			return t;
210 		else if (!cand)
211 			cand = t;
212 	}
213 
214 	if (flags & TUNNEL_NO_KEY)
215 		goto skip_key_lookup;
216 
217 	hlist_for_each_entry_rcu(t, head, hash_node) {
218 		if (t->parms.i_key != key ||
219 		    t->parms.iph.saddr != 0 ||
220 		    t->parms.iph.daddr != 0 ||
221 		    !(t->dev->flags & IFF_UP))
222 			continue;
223 
224 		if (t->parms.link == link)
225 			return t;
226 		else if (!cand)
227 			cand = t;
228 	}
229 
230 skip_key_lookup:
231 	if (cand)
232 		return cand;
233 
234 	if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
235 		return netdev_priv(itn->fb_tunnel_dev);
236 
237 
238 	return NULL;
239 }
240 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
241 
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)242 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
243 				    struct ip_tunnel_parm *parms)
244 {
245 	unsigned int h;
246 	__be32 remote;
247 	__be32 i_key = parms->i_key;
248 
249 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
250 		remote = parms->iph.daddr;
251 	else
252 		remote = 0;
253 
254 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
255 		i_key = 0;
256 
257 	h = ip_tunnel_hash(i_key, remote);
258 	return &itn->tunnels[h];
259 }
260 
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)261 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
262 {
263 	struct hlist_head *head = ip_bucket(itn, &t->parms);
264 
265 	hlist_add_head_rcu(&t->hash_node, head);
266 }
267 
ip_tunnel_del(struct ip_tunnel * t)268 static void ip_tunnel_del(struct ip_tunnel *t)
269 {
270 	hlist_del_init_rcu(&t->hash_node);
271 }
272 
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms,int type)273 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
274 					struct ip_tunnel_parm *parms,
275 					int type)
276 {
277 	__be32 remote = parms->iph.daddr;
278 	__be32 local = parms->iph.saddr;
279 	__be32 key = parms->i_key;
280 	__be16 flags = parms->i_flags;
281 	int link = parms->link;
282 	struct ip_tunnel *t = NULL;
283 	struct hlist_head *head = ip_bucket(itn, parms);
284 
285 	hlist_for_each_entry_rcu(t, head, hash_node) {
286 		if (local == t->parms.iph.saddr &&
287 		    remote == t->parms.iph.daddr &&
288 		    link == t->parms.link &&
289 		    type == t->dev->type &&
290 		    ip_tunnel_key_match(&t->parms, flags, key))
291 			break;
292 	}
293 	return t;
294 }
295 
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm * parms)296 static struct net_device *__ip_tunnel_create(struct net *net,
297 					     const struct rtnl_link_ops *ops,
298 					     struct ip_tunnel_parm *parms)
299 {
300 	int err;
301 	struct ip_tunnel *tunnel;
302 	struct net_device *dev;
303 	char name[IFNAMSIZ];
304 
305 	if (parms->name[0])
306 		strlcpy(name, parms->name, IFNAMSIZ);
307 	else {
308 		if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
309 			err = -E2BIG;
310 			goto failed;
311 		}
312 		strlcpy(name, ops->kind, IFNAMSIZ);
313 		strncat(name, "%d", 2);
314 	}
315 
316 	ASSERT_RTNL();
317 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
318 	if (!dev) {
319 		err = -ENOMEM;
320 		goto failed;
321 	}
322 	dev_net_set(dev, net);
323 
324 	dev->rtnl_link_ops = ops;
325 
326 	tunnel = netdev_priv(dev);
327 	tunnel->parms = *parms;
328 	tunnel->net = net;
329 
330 	err = register_netdevice(dev);
331 	if (err)
332 		goto failed_free;
333 
334 	return dev;
335 
336 failed_free:
337 	free_netdev(dev);
338 failed:
339 	return ERR_PTR(err);
340 }
341 
init_tunnel_flow(struct flowi4 * fl4,int proto,__be32 daddr,__be32 saddr,__be32 key,__u8 tos,int oif)342 static inline void init_tunnel_flow(struct flowi4 *fl4,
343 				    int proto,
344 				    __be32 daddr, __be32 saddr,
345 				    __be32 key, __u8 tos, int oif)
346 {
347 	memset(fl4, 0, sizeof(*fl4));
348 	fl4->flowi4_oif = oif;
349 	fl4->daddr = daddr;
350 	fl4->saddr = saddr;
351 	fl4->flowi4_tos = tos;
352 	fl4->flowi4_proto = proto;
353 	fl4->fl4_gre_key = key;
354 }
355 
ip_tunnel_bind_dev(struct net_device * dev)356 static int ip_tunnel_bind_dev(struct net_device *dev)
357 {
358 	struct net_device *tdev = NULL;
359 	struct ip_tunnel *tunnel = netdev_priv(dev);
360 	const struct iphdr *iph;
361 	int hlen = LL_MAX_HEADER;
362 	int mtu = ETH_DATA_LEN;
363 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
364 
365 	iph = &tunnel->parms.iph;
366 
367 	/* Guess output device to choose reasonable mtu and needed_headroom */
368 	if (iph->daddr) {
369 		struct flowi4 fl4;
370 		struct rtable *rt;
371 
372 		init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
373 				 iph->saddr, tunnel->parms.o_key,
374 				 RT_TOS(iph->tos), tunnel->parms.link);
375 		rt = ip_route_output_key(tunnel->net, &fl4);
376 
377 		if (!IS_ERR(rt)) {
378 			tdev = rt->dst.dev;
379 			tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
380 			ip_rt_put(rt);
381 		}
382 		if (dev->type != ARPHRD_ETHER)
383 			dev->flags |= IFF_POINTOPOINT;
384 	}
385 
386 	if (!tdev && tunnel->parms.link)
387 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
388 
389 	if (tdev) {
390 		hlen = tdev->hard_header_len + tdev->needed_headroom;
391 		mtu = tdev->mtu;
392 	}
393 	dev->iflink = tunnel->parms.link;
394 
395 	dev->needed_headroom = t_hlen + hlen;
396 	mtu -= (dev->hard_header_len + t_hlen);
397 
398 	if (mtu < IPV4_MIN_MTU)
399 		mtu = IPV4_MIN_MTU;
400 
401 	return mtu;
402 }
403 
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)404 static struct ip_tunnel *ip_tunnel_create(struct net *net,
405 					  struct ip_tunnel_net *itn,
406 					  struct ip_tunnel_parm *parms)
407 {
408 	struct ip_tunnel *nt;
409 	struct net_device *dev;
410 
411 	BUG_ON(!itn->fb_tunnel_dev);
412 	dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
413 	if (IS_ERR(dev))
414 		return ERR_CAST(dev);
415 
416 	dev->mtu = ip_tunnel_bind_dev(dev);
417 
418 	nt = netdev_priv(dev);
419 	ip_tunnel_add(itn, nt);
420 	return nt;
421 }
422 
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,bool log_ecn_error)423 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
424 		  const struct tnl_ptk_info *tpi, bool log_ecn_error)
425 {
426 	struct pcpu_sw_netstats *tstats;
427 	const struct iphdr *iph = ip_hdr(skb);
428 	int err;
429 
430 #ifdef CONFIG_NET_IPGRE_BROADCAST
431 	if (ipv4_is_multicast(iph->daddr)) {
432 		tunnel->dev->stats.multicast++;
433 		skb->pkt_type = PACKET_BROADCAST;
434 	}
435 #endif
436 
437 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
438 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
439 		tunnel->dev->stats.rx_crc_errors++;
440 		tunnel->dev->stats.rx_errors++;
441 		goto drop;
442 	}
443 
444 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
445 		if (!(tpi->flags&TUNNEL_SEQ) ||
446 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
447 			tunnel->dev->stats.rx_fifo_errors++;
448 			tunnel->dev->stats.rx_errors++;
449 			goto drop;
450 		}
451 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
452 	}
453 
454 	skb_reset_network_header(skb);
455 
456 	err = IP_ECN_decapsulate(iph, skb);
457 	if (unlikely(err)) {
458 		if (log_ecn_error)
459 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
460 					&iph->saddr, iph->tos);
461 		if (err > 1) {
462 			++tunnel->dev->stats.rx_frame_errors;
463 			++tunnel->dev->stats.rx_errors;
464 			goto drop;
465 		}
466 	}
467 
468 	tstats = this_cpu_ptr(tunnel->dev->tstats);
469 	u64_stats_update_begin(&tstats->syncp);
470 	tstats->rx_packets++;
471 	tstats->rx_bytes += skb->len;
472 	u64_stats_update_end(&tstats->syncp);
473 
474 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
475 
476 	if (tunnel->dev->type == ARPHRD_ETHER) {
477 		skb->protocol = eth_type_trans(skb, tunnel->dev);
478 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
479 	} else {
480 		skb->dev = tunnel->dev;
481 	}
482 
483 	gro_cells_receive(&tunnel->gro_cells, skb);
484 	return 0;
485 
486 drop:
487 	kfree_skb(skb);
488 	return 0;
489 }
490 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
491 
ip_encap_hlen(struct ip_tunnel_encap * e)492 static int ip_encap_hlen(struct ip_tunnel_encap *e)
493 {
494 	switch (e->type) {
495 	case TUNNEL_ENCAP_NONE:
496 		return 0;
497 	case TUNNEL_ENCAP_FOU:
498 		return sizeof(struct udphdr);
499 	case TUNNEL_ENCAP_GUE:
500 		return sizeof(struct udphdr) + sizeof(struct guehdr);
501 	default:
502 		return -EINVAL;
503 	}
504 }
505 
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)506 int ip_tunnel_encap_setup(struct ip_tunnel *t,
507 			  struct ip_tunnel_encap *ipencap)
508 {
509 	int hlen;
510 
511 	memset(&t->encap, 0, sizeof(t->encap));
512 
513 	hlen = ip_encap_hlen(ipencap);
514 	if (hlen < 0)
515 		return hlen;
516 
517 	t->encap.type = ipencap->type;
518 	t->encap.sport = ipencap->sport;
519 	t->encap.dport = ipencap->dport;
520 	t->encap.flags = ipencap->flags;
521 
522 	t->encap_hlen = hlen;
523 	t->hlen = t->encap_hlen + t->tun_hlen;
524 
525 	return 0;
526 }
527 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
528 
fou_build_header(struct sk_buff * skb,struct ip_tunnel_encap * e,size_t hdr_len,u8 * protocol,struct flowi4 * fl4)529 static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
530 			    size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
531 {
532 	struct udphdr *uh;
533 	__be16 sport;
534 	bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
535 	int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
536 
537 	skb = iptunnel_handle_offloads(skb, csum, type);
538 
539 	if (IS_ERR(skb))
540 		return PTR_ERR(skb);
541 
542 	/* Get length and hash before making space in skb */
543 
544 	sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
545 					       skb, 0, 0, false);
546 
547 	skb_push(skb, hdr_len);
548 
549 	skb_reset_transport_header(skb);
550 	uh = udp_hdr(skb);
551 
552 	if (e->type == TUNNEL_ENCAP_GUE) {
553 		struct guehdr *guehdr = (struct guehdr *)&uh[1];
554 
555 		guehdr->version = 0;
556 		guehdr->hlen = 0;
557 		guehdr->flags = 0;
558 		guehdr->next_hdr = *protocol;
559 	}
560 
561 	uh->dest = e->dport;
562 	uh->source = sport;
563 	uh->len = htons(skb->len);
564 	uh->check = 0;
565 	udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
566 		     fl4->saddr, fl4->daddr, skb->len);
567 
568 	*protocol = IPPROTO_UDP;
569 
570 	return 0;
571 }
572 
ip_tunnel_encap(struct sk_buff * skb,struct ip_tunnel * t,u8 * protocol,struct flowi4 * fl4)573 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
574 		    u8 *protocol, struct flowi4 *fl4)
575 {
576 	switch (t->encap.type) {
577 	case TUNNEL_ENCAP_NONE:
578 		return 0;
579 	case TUNNEL_ENCAP_FOU:
580 	case TUNNEL_ENCAP_GUE:
581 		return fou_build_header(skb, &t->encap, t->encap_hlen,
582 					protocol, fl4);
583 	default:
584 		return -EINVAL;
585 	}
586 }
587 EXPORT_SYMBOL(ip_tunnel_encap);
588 
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph)589 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
590 			    struct rtable *rt, __be16 df,
591 			    const struct iphdr *inner_iph)
592 {
593 	struct ip_tunnel *tunnel = netdev_priv(dev);
594 	int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
595 	int mtu;
596 
597 	if (df)
598 		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
599 					- sizeof(struct iphdr) - tunnel->hlen;
600 	else
601 		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
602 
603 	if (skb_dst(skb))
604 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
605 
606 	if (skb->protocol == htons(ETH_P_IP)) {
607 		if (!skb_is_gso(skb) &&
608 		    (inner_iph->frag_off & htons(IP_DF)) &&
609 		    mtu < pkt_size) {
610 			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
611 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
612 			return -E2BIG;
613 		}
614 	}
615 #if IS_ENABLED(CONFIG_IPV6)
616 	else if (skb->protocol == htons(ETH_P_IPV6)) {
617 		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
618 
619 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
620 			   mtu >= IPV6_MIN_MTU) {
621 			if ((tunnel->parms.iph.daddr &&
622 			    !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
623 			    rt6->rt6i_dst.plen == 128) {
624 				rt6->rt6i_flags |= RTF_MODIFIED;
625 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
626 			}
627 		}
628 
629 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
630 					mtu < pkt_size) {
631 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
632 			return -E2BIG;
633 		}
634 	}
635 #endif
636 	return 0;
637 }
638 
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)639 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
640 		    const struct iphdr *tnl_params, u8 protocol)
641 {
642 	struct ip_tunnel *tunnel = netdev_priv(dev);
643 	const struct iphdr *inner_iph;
644 	struct flowi4 fl4;
645 	u8     tos, ttl;
646 	__be16 df;
647 	struct rtable *rt;		/* Route to the other host */
648 	unsigned int max_headroom;	/* The extra header space needed */
649 	__be32 dst;
650 	int err;
651 	bool connected;
652 
653 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
654 	connected = (tunnel->parms.iph.daddr != 0);
655 
656 	dst = tnl_params->daddr;
657 	if (dst == 0) {
658 		/* NBMA tunnel */
659 
660 		if (skb_dst(skb) == NULL) {
661 			dev->stats.tx_fifo_errors++;
662 			goto tx_error;
663 		}
664 
665 		if (skb->protocol == htons(ETH_P_IP)) {
666 			rt = skb_rtable(skb);
667 			dst = rt_nexthop(rt, inner_iph->daddr);
668 		}
669 #if IS_ENABLED(CONFIG_IPV6)
670 		else if (skb->protocol == htons(ETH_P_IPV6)) {
671 			const struct in6_addr *addr6;
672 			struct neighbour *neigh;
673 			bool do_tx_error_icmp;
674 			int addr_type;
675 
676 			neigh = dst_neigh_lookup(skb_dst(skb),
677 						 &ipv6_hdr(skb)->daddr);
678 			if (neigh == NULL)
679 				goto tx_error;
680 
681 			addr6 = (const struct in6_addr *)&neigh->primary_key;
682 			addr_type = ipv6_addr_type(addr6);
683 
684 			if (addr_type == IPV6_ADDR_ANY) {
685 				addr6 = &ipv6_hdr(skb)->daddr;
686 				addr_type = ipv6_addr_type(addr6);
687 			}
688 
689 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
690 				do_tx_error_icmp = true;
691 			else {
692 				do_tx_error_icmp = false;
693 				dst = addr6->s6_addr32[3];
694 			}
695 			neigh_release(neigh);
696 			if (do_tx_error_icmp)
697 				goto tx_error_icmp;
698 		}
699 #endif
700 		else
701 			goto tx_error;
702 
703 		connected = false;
704 	}
705 
706 	tos = tnl_params->tos;
707 	if (tos & 0x1) {
708 		tos &= ~0x1;
709 		if (skb->protocol == htons(ETH_P_IP)) {
710 			tos = inner_iph->tos;
711 			connected = false;
712 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
713 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
714 			connected = false;
715 		}
716 	}
717 
718 	init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
719 			 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
720 
721 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
722 		goto tx_error;
723 
724 	rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
725 
726 	if (!rt) {
727 		rt = ip_route_output_key(tunnel->net, &fl4);
728 
729 		if (IS_ERR(rt)) {
730 			dev->stats.tx_carrier_errors++;
731 			goto tx_error;
732 		}
733 		if (connected)
734 			tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
735 	}
736 
737 	if (rt->dst.dev == dev) {
738 		ip_rt_put(rt);
739 		dev->stats.collisions++;
740 		goto tx_error;
741 	}
742 
743 	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
744 		ip_rt_put(rt);
745 		goto tx_error;
746 	}
747 
748 	if (tunnel->err_count > 0) {
749 		if (time_before(jiffies,
750 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
751 			tunnel->err_count--;
752 
753 			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
754 			dst_link_failure(skb);
755 		} else
756 			tunnel->err_count = 0;
757 	}
758 
759 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
760 	ttl = tnl_params->ttl;
761 	if (ttl == 0) {
762 		if (skb->protocol == htons(ETH_P_IP))
763 			ttl = inner_iph->ttl;
764 #if IS_ENABLED(CONFIG_IPV6)
765 		else if (skb->protocol == htons(ETH_P_IPV6))
766 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
767 #endif
768 		else
769 			ttl = ip4_dst_hoplimit(&rt->dst);
770 	}
771 
772 	df = tnl_params->frag_off;
773 	if (skb->protocol == htons(ETH_P_IP))
774 		df |= (inner_iph->frag_off&htons(IP_DF));
775 
776 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
777 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
778 	if (max_headroom > dev->needed_headroom)
779 		dev->needed_headroom = max_headroom;
780 
781 	if (skb_cow_head(skb, dev->needed_headroom)) {
782 		ip_rt_put(rt);
783 		dev->stats.tx_dropped++;
784 		kfree_skb(skb);
785 		return;
786 	}
787 
788 	err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
789 			    tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
790 	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
791 
792 	return;
793 
794 #if IS_ENABLED(CONFIG_IPV6)
795 tx_error_icmp:
796 	dst_link_failure(skb);
797 #endif
798 tx_error:
799 	dev->stats.tx_errors++;
800 	kfree_skb(skb);
801 }
802 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
803 
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm * p,bool set_mtu)804 static void ip_tunnel_update(struct ip_tunnel_net *itn,
805 			     struct ip_tunnel *t,
806 			     struct net_device *dev,
807 			     struct ip_tunnel_parm *p,
808 			     bool set_mtu)
809 {
810 	ip_tunnel_del(t);
811 	t->parms.iph.saddr = p->iph.saddr;
812 	t->parms.iph.daddr = p->iph.daddr;
813 	t->parms.i_key = p->i_key;
814 	t->parms.o_key = p->o_key;
815 	if (dev->type != ARPHRD_ETHER) {
816 		memcpy(dev->dev_addr, &p->iph.saddr, 4);
817 		memcpy(dev->broadcast, &p->iph.daddr, 4);
818 	}
819 	ip_tunnel_add(itn, t);
820 
821 	t->parms.iph.ttl = p->iph.ttl;
822 	t->parms.iph.tos = p->iph.tos;
823 	t->parms.iph.frag_off = p->iph.frag_off;
824 
825 	if (t->parms.link != p->link) {
826 		int mtu;
827 
828 		t->parms.link = p->link;
829 		mtu = ip_tunnel_bind_dev(dev);
830 		if (set_mtu)
831 			dev->mtu = mtu;
832 	}
833 	ip_tunnel_dst_reset_all(t);
834 	netdev_state_change(dev);
835 }
836 
ip_tunnel_ioctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)837 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
838 {
839 	int err = 0;
840 	struct ip_tunnel *t = netdev_priv(dev);
841 	struct net *net = t->net;
842 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
843 
844 	BUG_ON(!itn->fb_tunnel_dev);
845 	switch (cmd) {
846 	case SIOCGETTUNNEL:
847 		if (dev == itn->fb_tunnel_dev) {
848 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
849 			if (t == NULL)
850 				t = netdev_priv(dev);
851 		}
852 		memcpy(p, &t->parms, sizeof(*p));
853 		break;
854 
855 	case SIOCADDTUNNEL:
856 	case SIOCCHGTUNNEL:
857 		err = -EPERM;
858 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
859 			goto done;
860 		if (p->iph.ttl)
861 			p->iph.frag_off |= htons(IP_DF);
862 		if (!(p->i_flags & VTI_ISVTI)) {
863 			if (!(p->i_flags & TUNNEL_KEY))
864 				p->i_key = 0;
865 			if (!(p->o_flags & TUNNEL_KEY))
866 				p->o_key = 0;
867 		}
868 
869 		t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
870 
871 		if (cmd == SIOCADDTUNNEL) {
872 			if (!t) {
873 				t = ip_tunnel_create(net, itn, p);
874 				err = PTR_ERR_OR_ZERO(t);
875 				break;
876 			}
877 
878 			err = -EEXIST;
879 			break;
880 		}
881 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
882 			if (t != NULL) {
883 				if (t->dev != dev) {
884 					err = -EEXIST;
885 					break;
886 				}
887 			} else {
888 				unsigned int nflags = 0;
889 
890 				if (ipv4_is_multicast(p->iph.daddr))
891 					nflags = IFF_BROADCAST;
892 				else if (p->iph.daddr)
893 					nflags = IFF_POINTOPOINT;
894 
895 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
896 					err = -EINVAL;
897 					break;
898 				}
899 
900 				t = netdev_priv(dev);
901 			}
902 		}
903 
904 		if (t) {
905 			err = 0;
906 			ip_tunnel_update(itn, t, dev, p, true);
907 		} else {
908 			err = -ENOENT;
909 		}
910 		break;
911 
912 	case SIOCDELTUNNEL:
913 		err = -EPERM;
914 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
915 			goto done;
916 
917 		if (dev == itn->fb_tunnel_dev) {
918 			err = -ENOENT;
919 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
920 			if (t == NULL)
921 				goto done;
922 			err = -EPERM;
923 			if (t == netdev_priv(itn->fb_tunnel_dev))
924 				goto done;
925 			dev = t->dev;
926 		}
927 		unregister_netdevice(dev);
928 		err = 0;
929 		break;
930 
931 	default:
932 		err = -EINVAL;
933 	}
934 
935 done:
936 	return err;
937 }
938 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
939 
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)940 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
941 {
942 	struct ip_tunnel *tunnel = netdev_priv(dev);
943 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
944 
945 	if (new_mtu < 68 ||
946 	    new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
947 		return -EINVAL;
948 	dev->mtu = new_mtu;
949 	return 0;
950 }
951 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
952 
ip_tunnel_dev_free(struct net_device * dev)953 static void ip_tunnel_dev_free(struct net_device *dev)
954 {
955 	struct ip_tunnel *tunnel = netdev_priv(dev);
956 
957 	gro_cells_destroy(&tunnel->gro_cells);
958 	free_percpu(tunnel->dst_cache);
959 	free_percpu(dev->tstats);
960 	free_netdev(dev);
961 }
962 
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)963 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
964 {
965 	struct ip_tunnel *tunnel = netdev_priv(dev);
966 	struct ip_tunnel_net *itn;
967 
968 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
969 
970 	if (itn->fb_tunnel_dev != dev) {
971 		ip_tunnel_del(netdev_priv(dev));
972 		unregister_netdevice_queue(dev, head);
973 	}
974 }
975 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
976 
ip_tunnel_init_net(struct net * net,int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)977 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
978 				  struct rtnl_link_ops *ops, char *devname)
979 {
980 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
981 	struct ip_tunnel_parm parms;
982 	unsigned int i;
983 
984 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
985 		INIT_HLIST_HEAD(&itn->tunnels[i]);
986 
987 	if (!ops) {
988 		itn->fb_tunnel_dev = NULL;
989 		return 0;
990 	}
991 
992 	memset(&parms, 0, sizeof(parms));
993 	if (devname)
994 		strlcpy(parms.name, devname, IFNAMSIZ);
995 
996 	rtnl_lock();
997 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
998 	/* FB netdevice is special: we have one, and only one per netns.
999 	 * Allowing to move it to another netns is clearly unsafe.
1000 	 */
1001 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1002 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1003 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1004 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1005 	}
1006 	rtnl_unlock();
1007 
1008 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1009 }
1010 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1011 
ip_tunnel_destroy(struct ip_tunnel_net * itn,struct list_head * head,struct rtnl_link_ops * ops)1012 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1013 			      struct rtnl_link_ops *ops)
1014 {
1015 	struct net *net = dev_net(itn->fb_tunnel_dev);
1016 	struct net_device *dev, *aux;
1017 	int h;
1018 
1019 	for_each_netdev_safe(net, dev, aux)
1020 		if (dev->rtnl_link_ops == ops)
1021 			unregister_netdevice_queue(dev, head);
1022 
1023 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1024 		struct ip_tunnel *t;
1025 		struct hlist_node *n;
1026 		struct hlist_head *thead = &itn->tunnels[h];
1027 
1028 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1029 			/* If dev is in the same netns, it has already
1030 			 * been added to the list by the previous loop.
1031 			 */
1032 			if (!net_eq(dev_net(t->dev), net))
1033 				unregister_netdevice_queue(t->dev, head);
1034 	}
1035 }
1036 
ip_tunnel_delete_net(struct ip_tunnel_net * itn,struct rtnl_link_ops * ops)1037 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1038 {
1039 	LIST_HEAD(list);
1040 
1041 	rtnl_lock();
1042 	ip_tunnel_destroy(itn, &list, ops);
1043 	unregister_netdevice_many(&list);
1044 	rtnl_unlock();
1045 }
1046 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1047 
ip_tunnel_newlink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p)1048 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1049 		      struct ip_tunnel_parm *p)
1050 {
1051 	struct ip_tunnel *nt;
1052 	struct net *net = dev_net(dev);
1053 	struct ip_tunnel_net *itn;
1054 	int mtu;
1055 	int err;
1056 
1057 	nt = netdev_priv(dev);
1058 	itn = net_generic(net, nt->ip_tnl_net_id);
1059 
1060 	if (ip_tunnel_find(itn, p, dev->type))
1061 		return -EEXIST;
1062 
1063 	nt->net = net;
1064 	nt->parms = *p;
1065 	err = register_netdevice(dev);
1066 	if (err)
1067 		goto out;
1068 
1069 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1070 		eth_hw_addr_random(dev);
1071 
1072 	mtu = ip_tunnel_bind_dev(dev);
1073 	if (!tb[IFLA_MTU])
1074 		dev->mtu = mtu;
1075 
1076 	ip_tunnel_add(itn, nt);
1077 
1078 out:
1079 	return err;
1080 }
1081 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1082 
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p)1083 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1084 			 struct ip_tunnel_parm *p)
1085 {
1086 	struct ip_tunnel *t;
1087 	struct ip_tunnel *tunnel = netdev_priv(dev);
1088 	struct net *net = tunnel->net;
1089 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1090 
1091 	if (dev == itn->fb_tunnel_dev)
1092 		return -EINVAL;
1093 
1094 	t = ip_tunnel_find(itn, p, dev->type);
1095 
1096 	if (t) {
1097 		if (t->dev != dev)
1098 			return -EEXIST;
1099 	} else {
1100 		t = tunnel;
1101 
1102 		if (dev->type != ARPHRD_ETHER) {
1103 			unsigned int nflags = 0;
1104 
1105 			if (ipv4_is_multicast(p->iph.daddr))
1106 				nflags = IFF_BROADCAST;
1107 			else if (p->iph.daddr)
1108 				nflags = IFF_POINTOPOINT;
1109 
1110 			if ((dev->flags ^ nflags) &
1111 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1112 				return -EINVAL;
1113 		}
1114 	}
1115 
1116 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1117 	return 0;
1118 }
1119 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1120 
ip_tunnel_init(struct net_device * dev)1121 int ip_tunnel_init(struct net_device *dev)
1122 {
1123 	struct ip_tunnel *tunnel = netdev_priv(dev);
1124 	struct iphdr *iph = &tunnel->parms.iph;
1125 	int err;
1126 
1127 	dev->destructor	= ip_tunnel_dev_free;
1128 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1129 	if (!dev->tstats)
1130 		return -ENOMEM;
1131 
1132 	tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1133 	if (!tunnel->dst_cache) {
1134 		free_percpu(dev->tstats);
1135 		return -ENOMEM;
1136 	}
1137 
1138 	err = gro_cells_init(&tunnel->gro_cells, dev);
1139 	if (err) {
1140 		free_percpu(tunnel->dst_cache);
1141 		free_percpu(dev->tstats);
1142 		return err;
1143 	}
1144 
1145 	tunnel->dev = dev;
1146 	tunnel->net = dev_net(dev);
1147 	strcpy(tunnel->parms.name, dev->name);
1148 	iph->version		= 4;
1149 	iph->ihl		= 5;
1150 
1151 	return 0;
1152 }
1153 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1154 
ip_tunnel_uninit(struct net_device * dev)1155 void ip_tunnel_uninit(struct net_device *dev)
1156 {
1157 	struct ip_tunnel *tunnel = netdev_priv(dev);
1158 	struct net *net = tunnel->net;
1159 	struct ip_tunnel_net *itn;
1160 
1161 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1162 	/* fb_tunnel_dev will be unregisted in net-exit call. */
1163 	if (itn->fb_tunnel_dev != dev)
1164 		ip_tunnel_del(netdev_priv(dev));
1165 
1166 	ip_tunnel_dst_reset_all(tunnel);
1167 }
1168 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1169 
1170 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,int net_id)1171 void ip_tunnel_setup(struct net_device *dev, int net_id)
1172 {
1173 	struct ip_tunnel *tunnel = netdev_priv(dev);
1174 	tunnel->ip_tnl_net_id = net_id;
1175 }
1176 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1177 
1178 MODULE_LICENSE("GPL");
1179