• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
ip_tunnel_hash(__be32 key,__be32 remote)53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
ip_tunnel_key_match(const struct ip_tunnel_parm * p,__be16 flags,__be32 key)59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,__be16 flags,__be32 remote,__be32 local,__be32 key)83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (t->parms.link == link)
106 			return t;
107 		else
108 			cand = t;
109 	}
110 
111 	hlist_for_each_entry_rcu(t, head, hash_node) {
112 		if (remote != t->parms.iph.daddr ||
113 		    t->parms.iph.saddr != 0 ||
114 		    !(t->dev->flags & IFF_UP))
115 			continue;
116 
117 		if (!ip_tunnel_key_match(&t->parms, flags, key))
118 			continue;
119 
120 		if (t->parms.link == link)
121 			return t;
122 		else if (!cand)
123 			cand = t;
124 	}
125 
126 	hash = ip_tunnel_hash(key, 0);
127 	head = &itn->tunnels[hash];
128 
129 	hlist_for_each_entry_rcu(t, head, hash_node) {
130 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132 			continue;
133 
134 		if (!(t->dev->flags & IFF_UP))
135 			continue;
136 
137 		if (!ip_tunnel_key_match(&t->parms, flags, key))
138 			continue;
139 
140 		if (t->parms.link == link)
141 			return t;
142 		else if (!cand)
143 			cand = t;
144 	}
145 
146 	hlist_for_each_entry_rcu(t, head, hash_node) {
147 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148 		    t->parms.iph.saddr != 0 ||
149 		    t->parms.iph.daddr != 0 ||
150 		    !(t->dev->flags & IFF_UP))
151 			continue;
152 
153 		if (t->parms.link == link)
154 			return t;
155 		else if (!cand)
156 			cand = t;
157 	}
158 
159 	if (cand)
160 		return cand;
161 
162 	t = rcu_dereference(itn->collect_md_tun);
163 	if (t && t->dev->flags & IFF_UP)
164 		return t;
165 
166 	ndev = READ_ONCE(itn->fb_tunnel_dev);
167 	if (ndev && ndev->flags & IFF_UP)
168 		return netdev_priv(ndev);
169 
170 	return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173 
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175 				    struct ip_tunnel_parm *parms)
176 {
177 	unsigned int h;
178 	__be32 remote;
179 	__be32 i_key = parms->i_key;
180 
181 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182 		remote = parms->iph.daddr;
183 	else
184 		remote = 0;
185 
186 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187 		i_key = 0;
188 
189 	h = ip_tunnel_hash(i_key, remote);
190 	return &itn->tunnels[h];
191 }
192 
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195 	struct hlist_head *head = ip_bucket(itn, &t->parms);
196 
197 	if (t->collect_md)
198 		rcu_assign_pointer(itn->collect_md_tun, t);
199 	hlist_add_head_rcu(&t->hash_node, head);
200 }
201 
ip_tunnel_del(struct ip_tunnel_net * itn,struct ip_tunnel * t)202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204 	if (t->collect_md)
205 		rcu_assign_pointer(itn->collect_md_tun, NULL);
206 	hlist_del_init_rcu(&t->hash_node);
207 }
208 
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms,int type)209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210 					struct ip_tunnel_parm *parms,
211 					int type)
212 {
213 	__be32 remote = parms->iph.daddr;
214 	__be32 local = parms->iph.saddr;
215 	__be32 key = parms->i_key;
216 	__be16 flags = parms->i_flags;
217 	int link = parms->link;
218 	struct ip_tunnel *t = NULL;
219 	struct hlist_head *head = ip_bucket(itn, parms);
220 
221 	hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) {
222 		if (local == t->parms.iph.saddr &&
223 		    remote == t->parms.iph.daddr &&
224 		    link == t->parms.link &&
225 		    type == t->dev->type &&
226 		    ip_tunnel_key_match(&t->parms, flags, key))
227 			break;
228 	}
229 	return t;
230 }
231 
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm * parms)232 static struct net_device *__ip_tunnel_create(struct net *net,
233 					     const struct rtnl_link_ops *ops,
234 					     struct ip_tunnel_parm *parms)
235 {
236 	int err;
237 	struct ip_tunnel *tunnel;
238 	struct net_device *dev;
239 	char name[IFNAMSIZ];
240 
241 	err = -E2BIG;
242 	if (parms->name[0]) {
243 		if (!dev_valid_name(parms->name))
244 			goto failed;
245 		strlcpy(name, parms->name, IFNAMSIZ);
246 	} else {
247 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
248 			goto failed;
249 		strcpy(name, ops->kind);
250 		strcat(name, "%d");
251 	}
252 
253 	ASSERT_RTNL();
254 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255 	if (!dev) {
256 		err = -ENOMEM;
257 		goto failed;
258 	}
259 	dev_net_set(dev, net);
260 
261 	dev->rtnl_link_ops = ops;
262 
263 	tunnel = netdev_priv(dev);
264 	tunnel->parms = *parms;
265 	tunnel->net = net;
266 
267 	err = register_netdevice(dev);
268 	if (err)
269 		goto failed_free;
270 
271 	return dev;
272 
273 failed_free:
274 	free_netdev(dev);
275 failed:
276 	return ERR_PTR(err);
277 }
278 
ip_tunnel_bind_dev(struct net_device * dev)279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281 	struct net_device *tdev = NULL;
282 	struct ip_tunnel *tunnel = netdev_priv(dev);
283 	const struct iphdr *iph;
284 	int hlen = LL_MAX_HEADER;
285 	int mtu = ETH_DATA_LEN;
286 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287 
288 	iph = &tunnel->parms.iph;
289 
290 	/* Guess output device to choose reasonable mtu and needed_headroom */
291 	if (iph->daddr) {
292 		struct flowi4 fl4;
293 		struct rtable *rt;
294 
295 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296 				    iph->saddr, tunnel->parms.o_key,
297 				    RT_TOS(iph->tos), tunnel->parms.link,
298 				    tunnel->fwmark, 0);
299 		rt = ip_route_output_key(tunnel->net, &fl4);
300 
301 		if (!IS_ERR(rt)) {
302 			tdev = rt->dst.dev;
303 			ip_rt_put(rt);
304 		}
305 		if (dev->type != ARPHRD_ETHER)
306 			dev->flags |= IFF_POINTOPOINT;
307 
308 		dst_cache_reset(&tunnel->dst_cache);
309 	}
310 
311 	if (!tdev && tunnel->parms.link)
312 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313 
314 	if (tdev) {
315 		hlen = tdev->hard_header_len + tdev->needed_headroom;
316 		mtu = min(tdev->mtu, IP_MAX_MTU);
317 	}
318 
319 	dev->needed_headroom = t_hlen + hlen;
320 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321 
322 	if (mtu < IPV4_MIN_MTU)
323 		mtu = IPV4_MIN_MTU;
324 
325 	return mtu;
326 }
327 
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329 					  struct ip_tunnel_net *itn,
330 					  struct ip_tunnel_parm *parms)
331 {
332 	struct ip_tunnel *nt;
333 	struct net_device *dev;
334 	int t_hlen;
335 	int mtu;
336 	int err;
337 
338 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339 	if (IS_ERR(dev))
340 		return ERR_CAST(dev);
341 
342 	mtu = ip_tunnel_bind_dev(dev);
343 	err = dev_set_mtu(dev, mtu);
344 	if (err)
345 		goto err_dev_set_mtu;
346 
347 	nt = netdev_priv(dev);
348 	t_hlen = nt->hlen + sizeof(struct iphdr);
349 	dev->min_mtu = ETH_MIN_MTU;
350 	dev->max_mtu = IP_MAX_MTU - t_hlen;
351 	if (dev->type == ARPHRD_ETHER)
352 		dev->max_mtu -= dev->hard_header_len;
353 
354 	ip_tunnel_add(itn, nt);
355 	return nt;
356 
357 err_dev_set_mtu:
358 	unregister_netdevice(dev);
359 	return ERR_PTR(err);
360 }
361 
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct metadata_dst * tun_dst,bool log_ecn_error)362 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
363 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
364 		  bool log_ecn_error)
365 {
366 	const struct iphdr *iph = ip_hdr(skb);
367 	int nh, err;
368 
369 #ifdef CONFIG_NET_IPGRE_BROADCAST
370 	if (ipv4_is_multicast(iph->daddr)) {
371 		tunnel->dev->stats.multicast++;
372 		skb->pkt_type = PACKET_BROADCAST;
373 	}
374 #endif
375 
376 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
377 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
378 		tunnel->dev->stats.rx_crc_errors++;
379 		tunnel->dev->stats.rx_errors++;
380 		goto drop;
381 	}
382 
383 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
384 		if (!(tpi->flags&TUNNEL_SEQ) ||
385 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
386 			tunnel->dev->stats.rx_fifo_errors++;
387 			tunnel->dev->stats.rx_errors++;
388 			goto drop;
389 		}
390 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
391 	}
392 
393 	/* Save offset of outer header relative to skb->head,
394 	 * because we are going to reset the network header to the inner header
395 	 * and might change skb->head.
396 	 */
397 	nh = skb_network_header(skb) - skb->head;
398 
399 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
400 
401 	if (!pskb_inet_may_pull(skb)) {
402 		DEV_STATS_INC(tunnel->dev, rx_length_errors);
403 		DEV_STATS_INC(tunnel->dev, rx_errors);
404 		goto drop;
405 	}
406 	iph = (struct iphdr *)(skb->head + nh);
407 
408 	err = IP_ECN_decapsulate(iph, skb);
409 	if (unlikely(err)) {
410 		if (log_ecn_error)
411 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
412 					&iph->saddr, iph->tos);
413 		if (err > 1) {
414 			++tunnel->dev->stats.rx_frame_errors;
415 			++tunnel->dev->stats.rx_errors;
416 			goto drop;
417 		}
418 	}
419 
420 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
421 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
422 
423 	if (tunnel->dev->type == ARPHRD_ETHER) {
424 		skb->protocol = eth_type_trans(skb, tunnel->dev);
425 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
426 	} else {
427 		skb->dev = tunnel->dev;
428 	}
429 
430 	if (tun_dst)
431 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
432 
433 	gro_cells_receive(&tunnel->gro_cells, skb);
434 	return 0;
435 
436 drop:
437 	if (tun_dst)
438 		dst_release((struct dst_entry *)tun_dst);
439 	kfree_skb(skb);
440 	return 0;
441 }
442 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
443 
ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)444 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
445 			    unsigned int num)
446 {
447 	if (num >= MAX_IPTUN_ENCAP_OPS)
448 		return -ERANGE;
449 
450 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
451 			&iptun_encaps[num],
452 			NULL, ops) ? 0 : -1;
453 }
454 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
455 
ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)456 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
457 			    unsigned int num)
458 {
459 	int ret;
460 
461 	if (num >= MAX_IPTUN_ENCAP_OPS)
462 		return -ERANGE;
463 
464 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
465 		       &iptun_encaps[num],
466 		       ops, NULL) == ops) ? 0 : -1;
467 
468 	synchronize_net();
469 
470 	return ret;
471 }
472 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
473 
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)474 int ip_tunnel_encap_setup(struct ip_tunnel *t,
475 			  struct ip_tunnel_encap *ipencap)
476 {
477 	int hlen;
478 
479 	memset(&t->encap, 0, sizeof(t->encap));
480 
481 	hlen = ip_encap_hlen(ipencap);
482 	if (hlen < 0)
483 		return hlen;
484 
485 	t->encap.type = ipencap->type;
486 	t->encap.sport = ipencap->sport;
487 	t->encap.dport = ipencap->dport;
488 	t->encap.flags = ipencap->flags;
489 
490 	t->encap_hlen = hlen;
491 	t->hlen = t->encap_hlen + t->tun_hlen;
492 
493 	return 0;
494 }
495 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
496 
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph,int tunnel_hlen,__be32 dst,bool md)497 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
498 			    struct rtable *rt, __be16 df,
499 			    const struct iphdr *inner_iph,
500 			    int tunnel_hlen, __be32 dst, bool md)
501 {
502 	struct ip_tunnel *tunnel = netdev_priv(dev);
503 	int pkt_size;
504 	int mtu;
505 
506 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
507 	pkt_size = skb->len - tunnel_hlen;
508 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
509 
510 	if (df) {
511 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
512 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
513 	} else {
514 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
515 	}
516 
517 	if (skb_valid_dst(skb))
518 		skb_dst_update_pmtu_no_confirm(skb, mtu);
519 
520 	if (skb->protocol == htons(ETH_P_IP)) {
521 		if (!skb_is_gso(skb) &&
522 		    (inner_iph->frag_off & htons(IP_DF)) &&
523 		    mtu < pkt_size) {
524 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
525 			return -E2BIG;
526 		}
527 	}
528 #if IS_ENABLED(CONFIG_IPV6)
529 	else if (skb->protocol == htons(ETH_P_IPV6)) {
530 		struct rt6_info *rt6;
531 		__be32 daddr;
532 
533 		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
534 					   NULL;
535 		daddr = md ? dst : tunnel->parms.iph.daddr;
536 
537 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
538 			   mtu >= IPV6_MIN_MTU) {
539 			if ((daddr && !ipv4_is_multicast(daddr)) ||
540 			    rt6->rt6i_dst.plen == 128) {
541 				rt6->rt6i_flags |= RTF_MODIFIED;
542 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
543 			}
544 		}
545 
546 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
547 					mtu < pkt_size) {
548 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
549 			return -E2BIG;
550 		}
551 	}
552 #endif
553 	return 0;
554 }
555 
ip_tunnel_adj_headroom(struct net_device * dev,unsigned int headroom)556 static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
557 {
558 	/* we must cap headroom to some upperlimit, else pskb_expand_head
559 	 * will overflow header offsets in skb_headers_offset_update().
560 	 */
561 	static const unsigned int max_allowed = 512;
562 
563 	if (headroom > max_allowed)
564 		headroom = max_allowed;
565 
566 	if (headroom > READ_ONCE(dev->needed_headroom))
567 		WRITE_ONCE(dev->needed_headroom, headroom);
568 }
569 
ip_md_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,u8 proto,int tunnel_hlen)570 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
571 		       u8 proto, int tunnel_hlen)
572 {
573 	struct ip_tunnel *tunnel = netdev_priv(dev);
574 	u32 headroom = sizeof(struct iphdr);
575 	struct ip_tunnel_info *tun_info;
576 	const struct ip_tunnel_key *key;
577 	const struct iphdr *inner_iph;
578 	struct rtable *rt = NULL;
579 	struct flowi4 fl4;
580 	__be16 df = 0;
581 	u8 tos, ttl;
582 	bool use_cache;
583 
584 	tun_info = skb_tunnel_info(skb);
585 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
586 		     ip_tunnel_info_af(tun_info) != AF_INET))
587 		goto tx_error;
588 	key = &tun_info->key;
589 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
590 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
591 	tos = key->tos;
592 	if (tos == 1) {
593 		if (skb->protocol == htons(ETH_P_IP))
594 			tos = inner_iph->tos;
595 		else if (skb->protocol == htons(ETH_P_IPV6))
596 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
597 	}
598 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
599 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
600 			    0, skb->mark, skb_get_hash(skb));
601 	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
602 		goto tx_error;
603 
604 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
605 	if (use_cache)
606 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
607 	if (!rt) {
608 		rt = ip_route_output_key(tunnel->net, &fl4);
609 		if (IS_ERR(rt)) {
610 			dev->stats.tx_carrier_errors++;
611 			goto tx_error;
612 		}
613 		if (use_cache)
614 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
615 					  fl4.saddr);
616 	}
617 	if (rt->dst.dev == dev) {
618 		ip_rt_put(rt);
619 		dev->stats.collisions++;
620 		goto tx_error;
621 	}
622 
623 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
624 		df = htons(IP_DF);
625 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
626 			    key->u.ipv4.dst, true)) {
627 		ip_rt_put(rt);
628 		goto tx_error;
629 	}
630 
631 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
632 	ttl = key->ttl;
633 	if (ttl == 0) {
634 		if (skb->protocol == htons(ETH_P_IP))
635 			ttl = inner_iph->ttl;
636 		else if (skb->protocol == htons(ETH_P_IPV6))
637 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
638 		else
639 			ttl = ip4_dst_hoplimit(&rt->dst);
640 	}
641 
642 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
643 	if (skb_cow_head(skb, headroom)) {
644 		ip_rt_put(rt);
645 		goto tx_dropped;
646 	}
647 
648 	ip_tunnel_adj_headroom(dev, headroom);
649 
650 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
651 		      df, !net_eq(tunnel->net, dev_net(dev)));
652 	return;
653 tx_error:
654 	dev->stats.tx_errors++;
655 	goto kfree;
656 tx_dropped:
657 	dev->stats.tx_dropped++;
658 kfree:
659 	kfree_skb(skb);
660 }
661 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
662 
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)663 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
664 		    const struct iphdr *tnl_params, u8 protocol)
665 {
666 	struct ip_tunnel *tunnel = netdev_priv(dev);
667 	struct ip_tunnel_info *tun_info = NULL;
668 	const struct iphdr *inner_iph;
669 	unsigned int max_headroom;	/* The extra header space needed */
670 	struct rtable *rt = NULL;		/* Route to the other host */
671 	bool use_cache = false;
672 	struct flowi4 fl4;
673 	bool md = false;
674 	bool connected;
675 	u8 tos, ttl;
676 	__be32 dst;
677 	__be16 df;
678 
679 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
680 	connected = (tunnel->parms.iph.daddr != 0);
681 
682 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
683 
684 	dst = tnl_params->daddr;
685 	if (dst == 0) {
686 		/* NBMA tunnel */
687 
688 		if (!skb_dst(skb)) {
689 			dev->stats.tx_fifo_errors++;
690 			goto tx_error;
691 		}
692 
693 		tun_info = skb_tunnel_info(skb);
694 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
695 		    ip_tunnel_info_af(tun_info) == AF_INET &&
696 		    tun_info->key.u.ipv4.dst) {
697 			dst = tun_info->key.u.ipv4.dst;
698 			md = true;
699 			connected = true;
700 		}
701 		else if (skb->protocol == htons(ETH_P_IP)) {
702 			rt = skb_rtable(skb);
703 			dst = rt_nexthop(rt, inner_iph->daddr);
704 		}
705 #if IS_ENABLED(CONFIG_IPV6)
706 		else if (skb->protocol == htons(ETH_P_IPV6)) {
707 			const struct in6_addr *addr6;
708 			struct neighbour *neigh;
709 			bool do_tx_error_icmp;
710 			int addr_type;
711 
712 			neigh = dst_neigh_lookup(skb_dst(skb),
713 						 &ipv6_hdr(skb)->daddr);
714 			if (!neigh)
715 				goto tx_error;
716 
717 			addr6 = (const struct in6_addr *)&neigh->primary_key;
718 			addr_type = ipv6_addr_type(addr6);
719 
720 			if (addr_type == IPV6_ADDR_ANY) {
721 				addr6 = &ipv6_hdr(skb)->daddr;
722 				addr_type = ipv6_addr_type(addr6);
723 			}
724 
725 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
726 				do_tx_error_icmp = true;
727 			else {
728 				do_tx_error_icmp = false;
729 				dst = addr6->s6_addr32[3];
730 			}
731 			neigh_release(neigh);
732 			if (do_tx_error_icmp)
733 				goto tx_error_icmp;
734 		}
735 #endif
736 		else
737 			goto tx_error;
738 
739 		if (!md)
740 			connected = false;
741 	}
742 
743 	tos = tnl_params->tos;
744 	if (tos & 0x1) {
745 		tos &= ~0x1;
746 		if (skb->protocol == htons(ETH_P_IP)) {
747 			tos = inner_iph->tos;
748 			connected = false;
749 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
750 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
751 			connected = false;
752 		}
753 	}
754 
755 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
756 			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
757 			    tunnel->fwmark, skb_get_hash(skb));
758 
759 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
760 		goto tx_error;
761 
762 	if (connected && md) {
763 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
764 		if (use_cache)
765 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
766 					       &fl4.saddr);
767 	} else {
768 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
769 						&fl4.saddr) : NULL;
770 	}
771 
772 	if (!rt) {
773 		rt = ip_route_output_key(tunnel->net, &fl4);
774 
775 		if (IS_ERR(rt)) {
776 			dev->stats.tx_carrier_errors++;
777 			goto tx_error;
778 		}
779 		if (use_cache)
780 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
781 					  fl4.saddr);
782 		else if (!md && connected)
783 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
784 					  fl4.saddr);
785 	}
786 
787 	if (rt->dst.dev == dev) {
788 		ip_rt_put(rt);
789 		dev->stats.collisions++;
790 		goto tx_error;
791 	}
792 
793 	df = tnl_params->frag_off;
794 	if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
795 		df |= (inner_iph->frag_off & htons(IP_DF));
796 
797 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
798 		ip_rt_put(rt);
799 		goto tx_error;
800 	}
801 
802 	if (tunnel->err_count > 0) {
803 		if (time_before(jiffies,
804 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
805 			tunnel->err_count--;
806 
807 			dst_link_failure(skb);
808 		} else
809 			tunnel->err_count = 0;
810 	}
811 
812 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
813 	ttl = tnl_params->ttl;
814 	if (ttl == 0) {
815 		if (skb->protocol == htons(ETH_P_IP))
816 			ttl = inner_iph->ttl;
817 #if IS_ENABLED(CONFIG_IPV6)
818 		else if (skb->protocol == htons(ETH_P_IPV6))
819 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
820 #endif
821 		else
822 			ttl = ip4_dst_hoplimit(&rt->dst);
823 	}
824 
825 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
826 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
827 
828 	if (skb_cow_head(skb, max_headroom)) {
829 		ip_rt_put(rt);
830 		dev->stats.tx_dropped++;
831 		kfree_skb(skb);
832 		return;
833 	}
834 
835 	ip_tunnel_adj_headroom(dev, max_headroom);
836 
837 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
838 		      df, !net_eq(tunnel->net, dev_net(dev)));
839 	return;
840 
841 #if IS_ENABLED(CONFIG_IPV6)
842 tx_error_icmp:
843 	dst_link_failure(skb);
844 #endif
845 tx_error:
846 	dev->stats.tx_errors++;
847 	kfree_skb(skb);
848 }
849 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
850 
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm * p,bool set_mtu,__u32 fwmark)851 static void ip_tunnel_update(struct ip_tunnel_net *itn,
852 			     struct ip_tunnel *t,
853 			     struct net_device *dev,
854 			     struct ip_tunnel_parm *p,
855 			     bool set_mtu,
856 			     __u32 fwmark)
857 {
858 	ip_tunnel_del(itn, t);
859 	t->parms.iph.saddr = p->iph.saddr;
860 	t->parms.iph.daddr = p->iph.daddr;
861 	t->parms.i_key = p->i_key;
862 	t->parms.o_key = p->o_key;
863 	if (dev->type != ARPHRD_ETHER) {
864 		memcpy(dev->dev_addr, &p->iph.saddr, 4);
865 		memcpy(dev->broadcast, &p->iph.daddr, 4);
866 	}
867 	ip_tunnel_add(itn, t);
868 
869 	t->parms.iph.ttl = p->iph.ttl;
870 	t->parms.iph.tos = p->iph.tos;
871 	t->parms.iph.frag_off = p->iph.frag_off;
872 
873 	if (t->parms.link != p->link || t->fwmark != fwmark) {
874 		int mtu;
875 
876 		t->parms.link = p->link;
877 		t->fwmark = fwmark;
878 		mtu = ip_tunnel_bind_dev(dev);
879 		if (set_mtu)
880 			dev->mtu = mtu;
881 	}
882 	dst_cache_reset(&t->dst_cache);
883 	netdev_state_change(dev);
884 }
885 
ip_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)886 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
887 {
888 	int err = 0;
889 	struct ip_tunnel *t = netdev_priv(dev);
890 	struct net *net = t->net;
891 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
892 
893 	switch (cmd) {
894 	case SIOCGETTUNNEL:
895 		if (dev == itn->fb_tunnel_dev) {
896 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
897 			if (!t)
898 				t = netdev_priv(dev);
899 		}
900 		memcpy(p, &t->parms, sizeof(*p));
901 		break;
902 
903 	case SIOCADDTUNNEL:
904 	case SIOCCHGTUNNEL:
905 		err = -EPERM;
906 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
907 			goto done;
908 		if (p->iph.ttl)
909 			p->iph.frag_off |= htons(IP_DF);
910 		if (!(p->i_flags & VTI_ISVTI)) {
911 			if (!(p->i_flags & TUNNEL_KEY))
912 				p->i_key = 0;
913 			if (!(p->o_flags & TUNNEL_KEY))
914 				p->o_key = 0;
915 		}
916 
917 		t = ip_tunnel_find(itn, p, itn->type);
918 
919 		if (cmd == SIOCADDTUNNEL) {
920 			if (!t) {
921 				t = ip_tunnel_create(net, itn, p);
922 				err = PTR_ERR_OR_ZERO(t);
923 				break;
924 			}
925 
926 			err = -EEXIST;
927 			break;
928 		}
929 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
930 			if (t) {
931 				if (t->dev != dev) {
932 					err = -EEXIST;
933 					break;
934 				}
935 			} else {
936 				unsigned int nflags = 0;
937 
938 				if (ipv4_is_multicast(p->iph.daddr))
939 					nflags = IFF_BROADCAST;
940 				else if (p->iph.daddr)
941 					nflags = IFF_POINTOPOINT;
942 
943 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
944 					err = -EINVAL;
945 					break;
946 				}
947 
948 				t = netdev_priv(dev);
949 			}
950 		}
951 
952 		if (t) {
953 			err = 0;
954 			ip_tunnel_update(itn, t, dev, p, true, 0);
955 		} else {
956 			err = -ENOENT;
957 		}
958 		break;
959 
960 	case SIOCDELTUNNEL:
961 		err = -EPERM;
962 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
963 			goto done;
964 
965 		if (dev == itn->fb_tunnel_dev) {
966 			err = -ENOENT;
967 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
968 			if (!t)
969 				goto done;
970 			err = -EPERM;
971 			if (t == netdev_priv(itn->fb_tunnel_dev))
972 				goto done;
973 			dev = t->dev;
974 		}
975 		unregister_netdevice(dev);
976 		err = 0;
977 		break;
978 
979 	default:
980 		err = -EINVAL;
981 	}
982 
983 done:
984 	return err;
985 }
986 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
987 
ip_tunnel_ioctl(struct net_device * dev,struct ifreq * ifr,int cmd)988 int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
989 {
990 	struct ip_tunnel_parm p;
991 	int err;
992 
993 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
994 		return -EFAULT;
995 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
996 	if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
997 		return -EFAULT;
998 	return err;
999 }
1000 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
1001 
__ip_tunnel_change_mtu(struct net_device * dev,int new_mtu,bool strict)1002 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
1003 {
1004 	struct ip_tunnel *tunnel = netdev_priv(dev);
1005 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1006 	int max_mtu = IP_MAX_MTU - t_hlen;
1007 
1008 	if (dev->type == ARPHRD_ETHER)
1009 		max_mtu -= dev->hard_header_len;
1010 
1011 	if (new_mtu < ETH_MIN_MTU)
1012 		return -EINVAL;
1013 
1014 	if (new_mtu > max_mtu) {
1015 		if (strict)
1016 			return -EINVAL;
1017 
1018 		new_mtu = max_mtu;
1019 	}
1020 
1021 	dev->mtu = new_mtu;
1022 	return 0;
1023 }
1024 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1025 
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)1026 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1027 {
1028 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1029 }
1030 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1031 
ip_tunnel_dev_free(struct net_device * dev)1032 static void ip_tunnel_dev_free(struct net_device *dev)
1033 {
1034 	struct ip_tunnel *tunnel = netdev_priv(dev);
1035 
1036 	gro_cells_destroy(&tunnel->gro_cells);
1037 	dst_cache_destroy(&tunnel->dst_cache);
1038 	free_percpu(dev->tstats);
1039 }
1040 
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)1041 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1042 {
1043 	struct ip_tunnel *tunnel = netdev_priv(dev);
1044 	struct ip_tunnel_net *itn;
1045 
1046 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1047 
1048 	if (itn->fb_tunnel_dev != dev) {
1049 		ip_tunnel_del(itn, netdev_priv(dev));
1050 		unregister_netdevice_queue(dev, head);
1051 	}
1052 }
1053 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1054 
ip_tunnel_get_link_net(const struct net_device * dev)1055 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1056 {
1057 	struct ip_tunnel *tunnel = netdev_priv(dev);
1058 
1059 	return tunnel->net;
1060 }
1061 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1062 
ip_tunnel_get_iflink(const struct net_device * dev)1063 int ip_tunnel_get_iflink(const struct net_device *dev)
1064 {
1065 	struct ip_tunnel *tunnel = netdev_priv(dev);
1066 
1067 	return tunnel->parms.link;
1068 }
1069 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1070 
ip_tunnel_init_net(struct net * net,unsigned int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)1071 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1072 				  struct rtnl_link_ops *ops, char *devname)
1073 {
1074 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1075 	struct ip_tunnel_parm parms;
1076 	unsigned int i;
1077 
1078 	itn->rtnl_link_ops = ops;
1079 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1080 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1081 
1082 	if (!ops || !net_has_fallback_tunnels(net)) {
1083 		struct ip_tunnel_net *it_init_net;
1084 
1085 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1086 		itn->type = it_init_net->type;
1087 		itn->fb_tunnel_dev = NULL;
1088 		return 0;
1089 	}
1090 
1091 	memset(&parms, 0, sizeof(parms));
1092 	if (devname)
1093 		strlcpy(parms.name, devname, IFNAMSIZ);
1094 
1095 	rtnl_lock();
1096 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1097 	/* FB netdevice is special: we have one, and only one per netns.
1098 	 * Allowing to move it to another netns is clearly unsafe.
1099 	 */
1100 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1101 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1102 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1103 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1104 		itn->type = itn->fb_tunnel_dev->type;
1105 	}
1106 	rtnl_unlock();
1107 
1108 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1109 }
1110 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1111 
ip_tunnel_destroy(struct net * net,struct ip_tunnel_net * itn,struct list_head * head,struct rtnl_link_ops * ops)1112 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1113 			      struct list_head *head,
1114 			      struct rtnl_link_ops *ops)
1115 {
1116 	struct net_device *dev, *aux;
1117 	int h;
1118 
1119 	for_each_netdev_safe(net, dev, aux)
1120 		if (dev->rtnl_link_ops == ops)
1121 			unregister_netdevice_queue(dev, head);
1122 
1123 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1124 		struct ip_tunnel *t;
1125 		struct hlist_node *n;
1126 		struct hlist_head *thead = &itn->tunnels[h];
1127 
1128 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1129 			/* If dev is in the same netns, it has already
1130 			 * been added to the list by the previous loop.
1131 			 */
1132 			if (!net_eq(dev_net(t->dev), net))
1133 				unregister_netdevice_queue(t->dev, head);
1134 	}
1135 }
1136 
ip_tunnel_delete_nets(struct list_head * net_list,unsigned int id,struct rtnl_link_ops * ops)1137 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1138 			   struct rtnl_link_ops *ops)
1139 {
1140 	struct ip_tunnel_net *itn;
1141 	struct net *net;
1142 	LIST_HEAD(list);
1143 
1144 	rtnl_lock();
1145 	list_for_each_entry(net, net_list, exit_list) {
1146 		itn = net_generic(net, id);
1147 		ip_tunnel_destroy(net, itn, &list, ops);
1148 	}
1149 	unregister_netdevice_many(&list);
1150 	rtnl_unlock();
1151 }
1152 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1153 
ip_tunnel_newlink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1154 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1155 		      struct ip_tunnel_parm *p, __u32 fwmark)
1156 {
1157 	struct ip_tunnel *nt;
1158 	struct net *net = dev_net(dev);
1159 	struct ip_tunnel_net *itn;
1160 	int mtu;
1161 	int err;
1162 
1163 	nt = netdev_priv(dev);
1164 	itn = net_generic(net, nt->ip_tnl_net_id);
1165 
1166 	if (nt->collect_md) {
1167 		if (rtnl_dereference(itn->collect_md_tun))
1168 			return -EEXIST;
1169 	} else {
1170 		if (ip_tunnel_find(itn, p, dev->type))
1171 			return -EEXIST;
1172 	}
1173 
1174 	nt->net = net;
1175 	nt->parms = *p;
1176 	nt->fwmark = fwmark;
1177 	err = register_netdevice(dev);
1178 	if (err)
1179 		goto err_register_netdevice;
1180 
1181 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1182 		eth_hw_addr_random(dev);
1183 
1184 	mtu = ip_tunnel_bind_dev(dev);
1185 	if (tb[IFLA_MTU]) {
1186 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1187 
1188 		if (dev->type == ARPHRD_ETHER)
1189 			max -= dev->hard_header_len;
1190 
1191 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1192 	}
1193 
1194 	err = dev_set_mtu(dev, mtu);
1195 	if (err)
1196 		goto err_dev_set_mtu;
1197 
1198 	ip_tunnel_add(itn, nt);
1199 	return 0;
1200 
1201 err_dev_set_mtu:
1202 	unregister_netdevice(dev);
1203 err_register_netdevice:
1204 	return err;
1205 }
1206 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1207 
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1208 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1209 			 struct ip_tunnel_parm *p, __u32 fwmark)
1210 {
1211 	struct ip_tunnel *t;
1212 	struct ip_tunnel *tunnel = netdev_priv(dev);
1213 	struct net *net = tunnel->net;
1214 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1215 
1216 	if (dev == itn->fb_tunnel_dev)
1217 		return -EINVAL;
1218 
1219 	t = ip_tunnel_find(itn, p, dev->type);
1220 
1221 	if (t) {
1222 		if (t->dev != dev)
1223 			return -EEXIST;
1224 	} else {
1225 		t = tunnel;
1226 
1227 		if (dev->type != ARPHRD_ETHER) {
1228 			unsigned int nflags = 0;
1229 
1230 			if (ipv4_is_multicast(p->iph.daddr))
1231 				nflags = IFF_BROADCAST;
1232 			else if (p->iph.daddr)
1233 				nflags = IFF_POINTOPOINT;
1234 
1235 			if ((dev->flags ^ nflags) &
1236 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1237 				return -EINVAL;
1238 		}
1239 	}
1240 
1241 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1242 	return 0;
1243 }
1244 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1245 
ip_tunnel_init(struct net_device * dev)1246 int ip_tunnel_init(struct net_device *dev)
1247 {
1248 	struct ip_tunnel *tunnel = netdev_priv(dev);
1249 	struct iphdr *iph = &tunnel->parms.iph;
1250 	int err;
1251 
1252 	dev->needs_free_netdev = true;
1253 	dev->priv_destructor = ip_tunnel_dev_free;
1254 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1255 	if (!dev->tstats)
1256 		return -ENOMEM;
1257 
1258 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1259 	if (err) {
1260 		free_percpu(dev->tstats);
1261 		return err;
1262 	}
1263 
1264 	err = gro_cells_init(&tunnel->gro_cells, dev);
1265 	if (err) {
1266 		dst_cache_destroy(&tunnel->dst_cache);
1267 		free_percpu(dev->tstats);
1268 		return err;
1269 	}
1270 
1271 	tunnel->dev = dev;
1272 	tunnel->net = dev_net(dev);
1273 	strcpy(tunnel->parms.name, dev->name);
1274 	iph->version		= 4;
1275 	iph->ihl		= 5;
1276 
1277 	if (tunnel->collect_md)
1278 		netif_keep_dst(dev);
1279 	return 0;
1280 }
1281 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1282 
ip_tunnel_uninit(struct net_device * dev)1283 void ip_tunnel_uninit(struct net_device *dev)
1284 {
1285 	struct ip_tunnel *tunnel = netdev_priv(dev);
1286 	struct net *net = tunnel->net;
1287 	struct ip_tunnel_net *itn;
1288 
1289 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1290 	ip_tunnel_del(itn, netdev_priv(dev));
1291 	if (itn->fb_tunnel_dev == dev)
1292 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1293 
1294 	dst_cache_reset(&tunnel->dst_cache);
1295 }
1296 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1297 
1298 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,unsigned int net_id)1299 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1300 {
1301 	struct ip_tunnel *tunnel = netdev_priv(dev);
1302 	tunnel->ip_tnl_net_id = net_id;
1303 }
1304 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1305 
1306 MODULE_LICENSE("GPL");
1307