• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
ip_tunnel_hash(__be32 key,__be32 remote)53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
ip_tunnel_key_match(const struct ip_tunnel_parm * p,__be16 flags,__be32 key)59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,__be16 flags,__be32 remote,__be32 local,__be32 key)83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (t->parms.link == link)
106 			return t;
107 		else
108 			cand = t;
109 	}
110 
111 	hlist_for_each_entry_rcu(t, head, hash_node) {
112 		if (remote != t->parms.iph.daddr ||
113 		    t->parms.iph.saddr != 0 ||
114 		    !(t->dev->flags & IFF_UP))
115 			continue;
116 
117 		if (!ip_tunnel_key_match(&t->parms, flags, key))
118 			continue;
119 
120 		if (t->parms.link == link)
121 			return t;
122 		else if (!cand)
123 			cand = t;
124 	}
125 
126 	hash = ip_tunnel_hash(key, 0);
127 	head = &itn->tunnels[hash];
128 
129 	hlist_for_each_entry_rcu(t, head, hash_node) {
130 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132 			continue;
133 
134 		if (!(t->dev->flags & IFF_UP))
135 			continue;
136 
137 		if (!ip_tunnel_key_match(&t->parms, flags, key))
138 			continue;
139 
140 		if (t->parms.link == link)
141 			return t;
142 		else if (!cand)
143 			cand = t;
144 	}
145 
146 	hlist_for_each_entry_rcu(t, head, hash_node) {
147 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148 		    t->parms.iph.saddr != 0 ||
149 		    t->parms.iph.daddr != 0 ||
150 		    !(t->dev->flags & IFF_UP))
151 			continue;
152 
153 		if (t->parms.link == link)
154 			return t;
155 		else if (!cand)
156 			cand = t;
157 	}
158 
159 	if (cand)
160 		return cand;
161 
162 	t = rcu_dereference(itn->collect_md_tun);
163 	if (t && t->dev->flags & IFF_UP)
164 		return t;
165 
166 	ndev = READ_ONCE(itn->fb_tunnel_dev);
167 	if (ndev && ndev->flags & IFF_UP)
168 		return netdev_priv(ndev);
169 
170 	return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173 
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175 				    struct ip_tunnel_parm *parms)
176 {
177 	unsigned int h;
178 	__be32 remote;
179 	__be32 i_key = parms->i_key;
180 
181 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182 		remote = parms->iph.daddr;
183 	else
184 		remote = 0;
185 
186 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187 		i_key = 0;
188 
189 	h = ip_tunnel_hash(i_key, remote);
190 	return &itn->tunnels[h];
191 }
192 
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195 	struct hlist_head *head = ip_bucket(itn, &t->parms);
196 
197 	if (t->collect_md)
198 		rcu_assign_pointer(itn->collect_md_tun, t);
199 	hlist_add_head_rcu(&t->hash_node, head);
200 }
201 
ip_tunnel_del(struct ip_tunnel_net * itn,struct ip_tunnel * t)202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204 	if (t->collect_md)
205 		rcu_assign_pointer(itn->collect_md_tun, NULL);
206 	hlist_del_init_rcu(&t->hash_node);
207 }
208 
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms,int type)209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210 					struct ip_tunnel_parm *parms,
211 					int type)
212 {
213 	__be32 remote = parms->iph.daddr;
214 	__be32 local = parms->iph.saddr;
215 	__be32 key = parms->i_key;
216 	__be16 flags = parms->i_flags;
217 	int link = parms->link;
218 	struct ip_tunnel *t = NULL;
219 	struct hlist_head *head = ip_bucket(itn, parms);
220 
221 	hlist_for_each_entry_rcu(t, head, hash_node) {
222 		if (local == t->parms.iph.saddr &&
223 		    remote == t->parms.iph.daddr &&
224 		    link == t->parms.link &&
225 		    type == t->dev->type &&
226 		    ip_tunnel_key_match(&t->parms, flags, key))
227 			break;
228 	}
229 	return t;
230 }
231 
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm * parms)232 static struct net_device *__ip_tunnel_create(struct net *net,
233 					     const struct rtnl_link_ops *ops,
234 					     struct ip_tunnel_parm *parms)
235 {
236 	int err;
237 	struct ip_tunnel *tunnel;
238 	struct net_device *dev;
239 	char name[IFNAMSIZ];
240 
241 	err = -E2BIG;
242 	if (parms->name[0]) {
243 		if (!dev_valid_name(parms->name))
244 			goto failed;
245 		strlcpy(name, parms->name, IFNAMSIZ);
246 	} else {
247 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
248 			goto failed;
249 		strcpy(name, ops->kind);
250 		strcat(name, "%d");
251 	}
252 
253 	ASSERT_RTNL();
254 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255 	if (!dev) {
256 		err = -ENOMEM;
257 		goto failed;
258 	}
259 	dev_net_set(dev, net);
260 
261 	dev->rtnl_link_ops = ops;
262 
263 	tunnel = netdev_priv(dev);
264 	tunnel->parms = *parms;
265 	tunnel->net = net;
266 
267 	err = register_netdevice(dev);
268 	if (err)
269 		goto failed_free;
270 
271 	return dev;
272 
273 failed_free:
274 	free_netdev(dev);
275 failed:
276 	return ERR_PTR(err);
277 }
278 
ip_tunnel_bind_dev(struct net_device * dev)279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281 	struct net_device *tdev = NULL;
282 	struct ip_tunnel *tunnel = netdev_priv(dev);
283 	const struct iphdr *iph;
284 	int hlen = LL_MAX_HEADER;
285 	int mtu = ETH_DATA_LEN;
286 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287 
288 	iph = &tunnel->parms.iph;
289 
290 	/* Guess output device to choose reasonable mtu and needed_headroom */
291 	if (iph->daddr) {
292 		struct flowi4 fl4;
293 		struct rtable *rt;
294 
295 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296 				    iph->saddr, tunnel->parms.o_key,
297 				    RT_TOS(iph->tos), tunnel->parms.link,
298 				    tunnel->fwmark, 0);
299 		rt = ip_route_output_key(tunnel->net, &fl4);
300 
301 		if (!IS_ERR(rt)) {
302 			tdev = rt->dst.dev;
303 			ip_rt_put(rt);
304 		}
305 		if (dev->type != ARPHRD_ETHER)
306 			dev->flags |= IFF_POINTOPOINT;
307 
308 		dst_cache_reset(&tunnel->dst_cache);
309 	}
310 
311 	if (!tdev && tunnel->parms.link)
312 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313 
314 	if (tdev) {
315 		hlen = tdev->hard_header_len + tdev->needed_headroom;
316 		mtu = min(tdev->mtu, IP_MAX_MTU);
317 	}
318 
319 	dev->needed_headroom = t_hlen + hlen;
320 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321 
322 	if (mtu < IPV4_MIN_MTU)
323 		mtu = IPV4_MIN_MTU;
324 
325 	return mtu;
326 }
327 
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329 					  struct ip_tunnel_net *itn,
330 					  struct ip_tunnel_parm *parms)
331 {
332 	struct ip_tunnel *nt;
333 	struct net_device *dev;
334 	int t_hlen;
335 	int mtu;
336 	int err;
337 
338 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339 	if (IS_ERR(dev))
340 		return ERR_CAST(dev);
341 
342 	mtu = ip_tunnel_bind_dev(dev);
343 	err = dev_set_mtu(dev, mtu);
344 	if (err)
345 		goto err_dev_set_mtu;
346 
347 	nt = netdev_priv(dev);
348 	t_hlen = nt->hlen + sizeof(struct iphdr);
349 	dev->min_mtu = ETH_MIN_MTU;
350 	dev->max_mtu = IP_MAX_MTU - t_hlen;
351 	if (dev->type == ARPHRD_ETHER)
352 		dev->max_mtu -= dev->hard_header_len;
353 
354 	ip_tunnel_add(itn, nt);
355 	return nt;
356 
357 err_dev_set_mtu:
358 	unregister_netdevice(dev);
359 	return ERR_PTR(err);
360 }
361 
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct metadata_dst * tun_dst,bool log_ecn_error)362 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
363 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
364 		  bool log_ecn_error)
365 {
366 	const struct iphdr *iph = ip_hdr(skb);
367 	int err;
368 
369 #ifdef CONFIG_NET_IPGRE_BROADCAST
370 	if (ipv4_is_multicast(iph->daddr)) {
371 		tunnel->dev->stats.multicast++;
372 		skb->pkt_type = PACKET_BROADCAST;
373 	}
374 #endif
375 
376 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
377 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
378 		tunnel->dev->stats.rx_crc_errors++;
379 		tunnel->dev->stats.rx_errors++;
380 		goto drop;
381 	}
382 
383 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
384 		if (!(tpi->flags&TUNNEL_SEQ) ||
385 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
386 			tunnel->dev->stats.rx_fifo_errors++;
387 			tunnel->dev->stats.rx_errors++;
388 			goto drop;
389 		}
390 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
391 	}
392 
393 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
394 
395 	err = IP_ECN_decapsulate(iph, skb);
396 	if (unlikely(err)) {
397 		if (log_ecn_error)
398 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
399 					&iph->saddr, iph->tos);
400 		if (err > 1) {
401 			++tunnel->dev->stats.rx_frame_errors;
402 			++tunnel->dev->stats.rx_errors;
403 			goto drop;
404 		}
405 	}
406 
407 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
408 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
409 
410 	if (tunnel->dev->type == ARPHRD_ETHER) {
411 		skb->protocol = eth_type_trans(skb, tunnel->dev);
412 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
413 	} else {
414 		skb->dev = tunnel->dev;
415 	}
416 
417 	if (tun_dst)
418 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
419 
420 	gro_cells_receive(&tunnel->gro_cells, skb);
421 	return 0;
422 
423 drop:
424 	if (tun_dst)
425 		dst_release((struct dst_entry *)tun_dst);
426 	kfree_skb(skb);
427 	return 0;
428 }
429 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
430 
ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)431 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
432 			    unsigned int num)
433 {
434 	if (num >= MAX_IPTUN_ENCAP_OPS)
435 		return -ERANGE;
436 
437 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
438 			&iptun_encaps[num],
439 			NULL, ops) ? 0 : -1;
440 }
441 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
442 
ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)443 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
444 			    unsigned int num)
445 {
446 	int ret;
447 
448 	if (num >= MAX_IPTUN_ENCAP_OPS)
449 		return -ERANGE;
450 
451 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
452 		       &iptun_encaps[num],
453 		       ops, NULL) == ops) ? 0 : -1;
454 
455 	synchronize_net();
456 
457 	return ret;
458 }
459 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
460 
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)461 int ip_tunnel_encap_setup(struct ip_tunnel *t,
462 			  struct ip_tunnel_encap *ipencap)
463 {
464 	int hlen;
465 
466 	memset(&t->encap, 0, sizeof(t->encap));
467 
468 	hlen = ip_encap_hlen(ipencap);
469 	if (hlen < 0)
470 		return hlen;
471 
472 	t->encap.type = ipencap->type;
473 	t->encap.sport = ipencap->sport;
474 	t->encap.dport = ipencap->dport;
475 	t->encap.flags = ipencap->flags;
476 
477 	t->encap_hlen = hlen;
478 	t->hlen = t->encap_hlen + t->tun_hlen;
479 
480 	return 0;
481 }
482 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
483 
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph,int tunnel_hlen,__be32 dst,bool md)484 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
485 			    struct rtable *rt, __be16 df,
486 			    const struct iphdr *inner_iph,
487 			    int tunnel_hlen, __be32 dst, bool md)
488 {
489 	struct ip_tunnel *tunnel = netdev_priv(dev);
490 	int pkt_size;
491 	int mtu;
492 
493 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
494 	pkt_size = skb->len - tunnel_hlen;
495 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
496 
497 	if (df) {
498 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
499 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
500 	} else {
501 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502 	}
503 
504 	if (skb_valid_dst(skb))
505 		skb_dst_update_pmtu_no_confirm(skb, mtu);
506 
507 	if (skb->protocol == htons(ETH_P_IP)) {
508 		if (!skb_is_gso(skb) &&
509 		    (inner_iph->frag_off & htons(IP_DF)) &&
510 		    mtu < pkt_size) {
511 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
512 			return -E2BIG;
513 		}
514 	}
515 #if IS_ENABLED(CONFIG_IPV6)
516 	else if (skb->protocol == htons(ETH_P_IPV6)) {
517 		struct rt6_info *rt6;
518 		__be32 daddr;
519 
520 		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
521 					   NULL;
522 		daddr = md ? dst : tunnel->parms.iph.daddr;
523 
524 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
525 			   mtu >= IPV6_MIN_MTU) {
526 			if ((daddr && !ipv4_is_multicast(daddr)) ||
527 			    rt6->rt6i_dst.plen == 128) {
528 				rt6->rt6i_flags |= RTF_MODIFIED;
529 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
530 			}
531 		}
532 
533 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
534 					mtu < pkt_size) {
535 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
536 			return -E2BIG;
537 		}
538 	}
539 #endif
540 	return 0;
541 }
542 
ip_md_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,u8 proto,int tunnel_hlen)543 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
544 		       u8 proto, int tunnel_hlen)
545 {
546 	struct ip_tunnel *tunnel = netdev_priv(dev);
547 	u32 headroom = sizeof(struct iphdr);
548 	struct ip_tunnel_info *tun_info;
549 	const struct ip_tunnel_key *key;
550 	const struct iphdr *inner_iph;
551 	struct rtable *rt = NULL;
552 	struct flowi4 fl4;
553 	__be16 df = 0;
554 	u8 tos, ttl;
555 	bool use_cache;
556 
557 	tun_info = skb_tunnel_info(skb);
558 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
559 		     ip_tunnel_info_af(tun_info) != AF_INET))
560 		goto tx_error;
561 	key = &tun_info->key;
562 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
563 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
564 	tos = key->tos;
565 	if (tos == 1) {
566 		if (skb->protocol == htons(ETH_P_IP))
567 			tos = inner_iph->tos;
568 		else if (skb->protocol == htons(ETH_P_IPV6))
569 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
570 	}
571 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
572 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
573 			    0, skb->mark, skb_get_hash(skb));
574 	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
575 		goto tx_error;
576 
577 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
578 	if (use_cache)
579 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
580 	if (!rt) {
581 		rt = ip_route_output_key(tunnel->net, &fl4);
582 		if (IS_ERR(rt)) {
583 			dev->stats.tx_carrier_errors++;
584 			goto tx_error;
585 		}
586 		if (use_cache)
587 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
588 					  fl4.saddr);
589 	}
590 	if (rt->dst.dev == dev) {
591 		ip_rt_put(rt);
592 		dev->stats.collisions++;
593 		goto tx_error;
594 	}
595 
596 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
597 		df = htons(IP_DF);
598 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
599 			    key->u.ipv4.dst, true)) {
600 		ip_rt_put(rt);
601 		goto tx_error;
602 	}
603 
604 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
605 	ttl = key->ttl;
606 	if (ttl == 0) {
607 		if (skb->protocol == htons(ETH_P_IP))
608 			ttl = inner_iph->ttl;
609 		else if (skb->protocol == htons(ETH_P_IPV6))
610 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
611 		else
612 			ttl = ip4_dst_hoplimit(&rt->dst);
613 	}
614 
615 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
616 	if (headroom > dev->needed_headroom)
617 		dev->needed_headroom = headroom;
618 
619 	if (skb_cow_head(skb, dev->needed_headroom)) {
620 		ip_rt_put(rt);
621 		goto tx_dropped;
622 	}
623 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
624 		      df, !net_eq(tunnel->net, dev_net(dev)));
625 	return;
626 tx_error:
627 	dev->stats.tx_errors++;
628 	goto kfree;
629 tx_dropped:
630 	dev->stats.tx_dropped++;
631 kfree:
632 	kfree_skb(skb);
633 }
634 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
635 
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)636 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
637 		    const struct iphdr *tnl_params, u8 protocol)
638 {
639 	struct ip_tunnel *tunnel = netdev_priv(dev);
640 	struct ip_tunnel_info *tun_info = NULL;
641 	const struct iphdr *inner_iph;
642 	unsigned int max_headroom;	/* The extra header space needed */
643 	struct rtable *rt = NULL;		/* Route to the other host */
644 	bool use_cache = false;
645 	struct flowi4 fl4;
646 	bool md = false;
647 	bool connected;
648 	u8 tos, ttl;
649 	__be32 dst;
650 	__be16 df;
651 
652 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
653 	connected = (tunnel->parms.iph.daddr != 0);
654 
655 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
656 
657 	dst = tnl_params->daddr;
658 	if (dst == 0) {
659 		/* NBMA tunnel */
660 
661 		if (!skb_dst(skb)) {
662 			dev->stats.tx_fifo_errors++;
663 			goto tx_error;
664 		}
665 
666 		tun_info = skb_tunnel_info(skb);
667 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
668 		    ip_tunnel_info_af(tun_info) == AF_INET &&
669 		    tun_info->key.u.ipv4.dst) {
670 			dst = tun_info->key.u.ipv4.dst;
671 			md = true;
672 			connected = true;
673 		}
674 		else if (skb->protocol == htons(ETH_P_IP)) {
675 			rt = skb_rtable(skb);
676 			dst = rt_nexthop(rt, inner_iph->daddr);
677 		}
678 #if IS_ENABLED(CONFIG_IPV6)
679 		else if (skb->protocol == htons(ETH_P_IPV6)) {
680 			const struct in6_addr *addr6;
681 			struct neighbour *neigh;
682 			bool do_tx_error_icmp;
683 			int addr_type;
684 
685 			neigh = dst_neigh_lookup(skb_dst(skb),
686 						 &ipv6_hdr(skb)->daddr);
687 			if (!neigh)
688 				goto tx_error;
689 
690 			addr6 = (const struct in6_addr *)&neigh->primary_key;
691 			addr_type = ipv6_addr_type(addr6);
692 
693 			if (addr_type == IPV6_ADDR_ANY) {
694 				addr6 = &ipv6_hdr(skb)->daddr;
695 				addr_type = ipv6_addr_type(addr6);
696 			}
697 
698 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
699 				do_tx_error_icmp = true;
700 			else {
701 				do_tx_error_icmp = false;
702 				dst = addr6->s6_addr32[3];
703 			}
704 			neigh_release(neigh);
705 			if (do_tx_error_icmp)
706 				goto tx_error_icmp;
707 		}
708 #endif
709 		else
710 			goto tx_error;
711 
712 		if (!md)
713 			connected = false;
714 	}
715 
716 	tos = tnl_params->tos;
717 	if (tos & 0x1) {
718 		tos &= ~0x1;
719 		if (skb->protocol == htons(ETH_P_IP)) {
720 			tos = inner_iph->tos;
721 			connected = false;
722 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
723 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
724 			connected = false;
725 		}
726 	}
727 
728 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
729 			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
730 			    tunnel->fwmark, skb_get_hash(skb));
731 
732 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
733 		goto tx_error;
734 
735 	if (connected && md) {
736 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
737 		if (use_cache)
738 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
739 					       &fl4.saddr);
740 	} else {
741 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
742 						&fl4.saddr) : NULL;
743 	}
744 
745 	if (!rt) {
746 		rt = ip_route_output_key(tunnel->net, &fl4);
747 
748 		if (IS_ERR(rt)) {
749 			dev->stats.tx_carrier_errors++;
750 			goto tx_error;
751 		}
752 		if (use_cache)
753 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
754 					  fl4.saddr);
755 		else if (!md && connected)
756 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
757 					  fl4.saddr);
758 	}
759 
760 	if (rt->dst.dev == dev) {
761 		ip_rt_put(rt);
762 		dev->stats.collisions++;
763 		goto tx_error;
764 	}
765 
766 	df = tnl_params->frag_off;
767 	if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
768 		df |= (inner_iph->frag_off & htons(IP_DF));
769 
770 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
771 		ip_rt_put(rt);
772 		goto tx_error;
773 	}
774 
775 	if (tunnel->err_count > 0) {
776 		if (time_before(jiffies,
777 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
778 			tunnel->err_count--;
779 
780 			dst_link_failure(skb);
781 		} else
782 			tunnel->err_count = 0;
783 	}
784 
785 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
786 	ttl = tnl_params->ttl;
787 	if (ttl == 0) {
788 		if (skb->protocol == htons(ETH_P_IP))
789 			ttl = inner_iph->ttl;
790 #if IS_ENABLED(CONFIG_IPV6)
791 		else if (skb->protocol == htons(ETH_P_IPV6))
792 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
793 #endif
794 		else
795 			ttl = ip4_dst_hoplimit(&rt->dst);
796 	}
797 
798 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
799 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
800 	if (max_headroom > dev->needed_headroom)
801 		dev->needed_headroom = max_headroom;
802 
803 	if (skb_cow_head(skb, dev->needed_headroom)) {
804 		ip_rt_put(rt);
805 		dev->stats.tx_dropped++;
806 		kfree_skb(skb);
807 		return;
808 	}
809 
810 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
811 		      df, !net_eq(tunnel->net, dev_net(dev)));
812 	return;
813 
814 #if IS_ENABLED(CONFIG_IPV6)
815 tx_error_icmp:
816 	dst_link_failure(skb);
817 #endif
818 tx_error:
819 	dev->stats.tx_errors++;
820 	kfree_skb(skb);
821 }
822 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
823 
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm * p,bool set_mtu,__u32 fwmark)824 static void ip_tunnel_update(struct ip_tunnel_net *itn,
825 			     struct ip_tunnel *t,
826 			     struct net_device *dev,
827 			     struct ip_tunnel_parm *p,
828 			     bool set_mtu,
829 			     __u32 fwmark)
830 {
831 	ip_tunnel_del(itn, t);
832 	t->parms.iph.saddr = p->iph.saddr;
833 	t->parms.iph.daddr = p->iph.daddr;
834 	t->parms.i_key = p->i_key;
835 	t->parms.o_key = p->o_key;
836 	if (dev->type != ARPHRD_ETHER) {
837 		memcpy(dev->dev_addr, &p->iph.saddr, 4);
838 		memcpy(dev->broadcast, &p->iph.daddr, 4);
839 	}
840 	ip_tunnel_add(itn, t);
841 
842 	t->parms.iph.ttl = p->iph.ttl;
843 	t->parms.iph.tos = p->iph.tos;
844 	t->parms.iph.frag_off = p->iph.frag_off;
845 
846 	if (t->parms.link != p->link || t->fwmark != fwmark) {
847 		int mtu;
848 
849 		t->parms.link = p->link;
850 		t->fwmark = fwmark;
851 		mtu = ip_tunnel_bind_dev(dev);
852 		if (set_mtu)
853 			dev->mtu = mtu;
854 	}
855 	dst_cache_reset(&t->dst_cache);
856 	netdev_state_change(dev);
857 }
858 
ip_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)859 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
860 {
861 	int err = 0;
862 	struct ip_tunnel *t = netdev_priv(dev);
863 	struct net *net = t->net;
864 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
865 
866 	switch (cmd) {
867 	case SIOCGETTUNNEL:
868 		if (dev == itn->fb_tunnel_dev) {
869 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
870 			if (!t)
871 				t = netdev_priv(dev);
872 		}
873 		memcpy(p, &t->parms, sizeof(*p));
874 		break;
875 
876 	case SIOCADDTUNNEL:
877 	case SIOCCHGTUNNEL:
878 		err = -EPERM;
879 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
880 			goto done;
881 		if (p->iph.ttl)
882 			p->iph.frag_off |= htons(IP_DF);
883 		if (!(p->i_flags & VTI_ISVTI)) {
884 			if (!(p->i_flags & TUNNEL_KEY))
885 				p->i_key = 0;
886 			if (!(p->o_flags & TUNNEL_KEY))
887 				p->o_key = 0;
888 		}
889 
890 		t = ip_tunnel_find(itn, p, itn->type);
891 
892 		if (cmd == SIOCADDTUNNEL) {
893 			if (!t) {
894 				t = ip_tunnel_create(net, itn, p);
895 				err = PTR_ERR_OR_ZERO(t);
896 				break;
897 			}
898 
899 			err = -EEXIST;
900 			break;
901 		}
902 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
903 			if (t) {
904 				if (t->dev != dev) {
905 					err = -EEXIST;
906 					break;
907 				}
908 			} else {
909 				unsigned int nflags = 0;
910 
911 				if (ipv4_is_multicast(p->iph.daddr))
912 					nflags = IFF_BROADCAST;
913 				else if (p->iph.daddr)
914 					nflags = IFF_POINTOPOINT;
915 
916 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
917 					err = -EINVAL;
918 					break;
919 				}
920 
921 				t = netdev_priv(dev);
922 			}
923 		}
924 
925 		if (t) {
926 			err = 0;
927 			ip_tunnel_update(itn, t, dev, p, true, 0);
928 		} else {
929 			err = -ENOENT;
930 		}
931 		break;
932 
933 	case SIOCDELTUNNEL:
934 		err = -EPERM;
935 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
936 			goto done;
937 
938 		if (dev == itn->fb_tunnel_dev) {
939 			err = -ENOENT;
940 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
941 			if (!t)
942 				goto done;
943 			err = -EPERM;
944 			if (t == netdev_priv(itn->fb_tunnel_dev))
945 				goto done;
946 			dev = t->dev;
947 		}
948 		unregister_netdevice(dev);
949 		err = 0;
950 		break;
951 
952 	default:
953 		err = -EINVAL;
954 	}
955 
956 done:
957 	return err;
958 }
959 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
960 
ip_tunnel_ioctl(struct net_device * dev,struct ifreq * ifr,int cmd)961 int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
962 {
963 	struct ip_tunnel_parm p;
964 	int err;
965 
966 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
967 		return -EFAULT;
968 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
969 	if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
970 		return -EFAULT;
971 	return err;
972 }
973 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
974 
__ip_tunnel_change_mtu(struct net_device * dev,int new_mtu,bool strict)975 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
976 {
977 	struct ip_tunnel *tunnel = netdev_priv(dev);
978 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
979 	int max_mtu = IP_MAX_MTU - t_hlen;
980 
981 	if (dev->type == ARPHRD_ETHER)
982 		max_mtu -= dev->hard_header_len;
983 
984 	if (new_mtu < ETH_MIN_MTU)
985 		return -EINVAL;
986 
987 	if (new_mtu > max_mtu) {
988 		if (strict)
989 			return -EINVAL;
990 
991 		new_mtu = max_mtu;
992 	}
993 
994 	dev->mtu = new_mtu;
995 	return 0;
996 }
997 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
998 
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)999 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1000 {
1001 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1002 }
1003 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1004 
ip_tunnel_dev_free(struct net_device * dev)1005 static void ip_tunnel_dev_free(struct net_device *dev)
1006 {
1007 	struct ip_tunnel *tunnel = netdev_priv(dev);
1008 
1009 	gro_cells_destroy(&tunnel->gro_cells);
1010 	dst_cache_destroy(&tunnel->dst_cache);
1011 	free_percpu(dev->tstats);
1012 }
1013 
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)1014 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1015 {
1016 	struct ip_tunnel *tunnel = netdev_priv(dev);
1017 	struct ip_tunnel_net *itn;
1018 
1019 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1020 
1021 	if (itn->fb_tunnel_dev != dev) {
1022 		ip_tunnel_del(itn, netdev_priv(dev));
1023 		unregister_netdevice_queue(dev, head);
1024 	}
1025 }
1026 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1027 
ip_tunnel_get_link_net(const struct net_device * dev)1028 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1029 {
1030 	struct ip_tunnel *tunnel = netdev_priv(dev);
1031 
1032 	return tunnel->net;
1033 }
1034 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1035 
ip_tunnel_get_iflink(const struct net_device * dev)1036 int ip_tunnel_get_iflink(const struct net_device *dev)
1037 {
1038 	struct ip_tunnel *tunnel = netdev_priv(dev);
1039 
1040 	return tunnel->parms.link;
1041 }
1042 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1043 
ip_tunnel_init_net(struct net * net,unsigned int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)1044 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1045 				  struct rtnl_link_ops *ops, char *devname)
1046 {
1047 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1048 	struct ip_tunnel_parm parms;
1049 	unsigned int i;
1050 
1051 	itn->rtnl_link_ops = ops;
1052 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1053 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1054 
1055 	if (!ops || !net_has_fallback_tunnels(net)) {
1056 		struct ip_tunnel_net *it_init_net;
1057 
1058 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1059 		itn->type = it_init_net->type;
1060 		itn->fb_tunnel_dev = NULL;
1061 		return 0;
1062 	}
1063 
1064 	memset(&parms, 0, sizeof(parms));
1065 	if (devname)
1066 		strlcpy(parms.name, devname, IFNAMSIZ);
1067 
1068 	rtnl_lock();
1069 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1070 	/* FB netdevice is special: we have one, and only one per netns.
1071 	 * Allowing to move it to another netns is clearly unsafe.
1072 	 */
1073 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1074 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1075 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1076 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1077 		itn->type = itn->fb_tunnel_dev->type;
1078 	}
1079 	rtnl_unlock();
1080 
1081 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1082 }
1083 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1084 
ip_tunnel_destroy(struct net * net,struct ip_tunnel_net * itn,struct list_head * head,struct rtnl_link_ops * ops)1085 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1086 			      struct list_head *head,
1087 			      struct rtnl_link_ops *ops)
1088 {
1089 	struct net_device *dev, *aux;
1090 	int h;
1091 
1092 	for_each_netdev_safe(net, dev, aux)
1093 		if (dev->rtnl_link_ops == ops)
1094 			unregister_netdevice_queue(dev, head);
1095 
1096 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1097 		struct ip_tunnel *t;
1098 		struct hlist_node *n;
1099 		struct hlist_head *thead = &itn->tunnels[h];
1100 
1101 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1102 			/* If dev is in the same netns, it has already
1103 			 * been added to the list by the previous loop.
1104 			 */
1105 			if (!net_eq(dev_net(t->dev), net))
1106 				unregister_netdevice_queue(t->dev, head);
1107 	}
1108 }
1109 
ip_tunnel_delete_nets(struct list_head * net_list,unsigned int id,struct rtnl_link_ops * ops)1110 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1111 			   struct rtnl_link_ops *ops)
1112 {
1113 	struct ip_tunnel_net *itn;
1114 	struct net *net;
1115 	LIST_HEAD(list);
1116 
1117 	rtnl_lock();
1118 	list_for_each_entry(net, net_list, exit_list) {
1119 		itn = net_generic(net, id);
1120 		ip_tunnel_destroy(net, itn, &list, ops);
1121 	}
1122 	unregister_netdevice_many(&list);
1123 	rtnl_unlock();
1124 }
1125 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1126 
ip_tunnel_newlink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1127 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1128 		      struct ip_tunnel_parm *p, __u32 fwmark)
1129 {
1130 	struct ip_tunnel *nt;
1131 	struct net *net = dev_net(dev);
1132 	struct ip_tunnel_net *itn;
1133 	int mtu;
1134 	int err;
1135 
1136 	nt = netdev_priv(dev);
1137 	itn = net_generic(net, nt->ip_tnl_net_id);
1138 
1139 	if (nt->collect_md) {
1140 		if (rtnl_dereference(itn->collect_md_tun))
1141 			return -EEXIST;
1142 	} else {
1143 		if (ip_tunnel_find(itn, p, dev->type))
1144 			return -EEXIST;
1145 	}
1146 
1147 	nt->net = net;
1148 	nt->parms = *p;
1149 	nt->fwmark = fwmark;
1150 	err = register_netdevice(dev);
1151 	if (err)
1152 		goto err_register_netdevice;
1153 
1154 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1155 		eth_hw_addr_random(dev);
1156 
1157 	mtu = ip_tunnel_bind_dev(dev);
1158 	if (tb[IFLA_MTU]) {
1159 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1160 
1161 		if (dev->type == ARPHRD_ETHER)
1162 			max -= dev->hard_header_len;
1163 
1164 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1165 	}
1166 
1167 	err = dev_set_mtu(dev, mtu);
1168 	if (err)
1169 		goto err_dev_set_mtu;
1170 
1171 	ip_tunnel_add(itn, nt);
1172 	return 0;
1173 
1174 err_dev_set_mtu:
1175 	unregister_netdevice(dev);
1176 err_register_netdevice:
1177 	return err;
1178 }
1179 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1180 
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1181 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1182 			 struct ip_tunnel_parm *p, __u32 fwmark)
1183 {
1184 	struct ip_tunnel *t;
1185 	struct ip_tunnel *tunnel = netdev_priv(dev);
1186 	struct net *net = tunnel->net;
1187 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1188 
1189 	if (dev == itn->fb_tunnel_dev)
1190 		return -EINVAL;
1191 
1192 	t = ip_tunnel_find(itn, p, dev->type);
1193 
1194 	if (t) {
1195 		if (t->dev != dev)
1196 			return -EEXIST;
1197 	} else {
1198 		t = tunnel;
1199 
1200 		if (dev->type != ARPHRD_ETHER) {
1201 			unsigned int nflags = 0;
1202 
1203 			if (ipv4_is_multicast(p->iph.daddr))
1204 				nflags = IFF_BROADCAST;
1205 			else if (p->iph.daddr)
1206 				nflags = IFF_POINTOPOINT;
1207 
1208 			if ((dev->flags ^ nflags) &
1209 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1210 				return -EINVAL;
1211 		}
1212 	}
1213 
1214 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1215 	return 0;
1216 }
1217 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1218 
ip_tunnel_init(struct net_device * dev)1219 int ip_tunnel_init(struct net_device *dev)
1220 {
1221 	struct ip_tunnel *tunnel = netdev_priv(dev);
1222 	struct iphdr *iph = &tunnel->parms.iph;
1223 	int err;
1224 
1225 	dev->needs_free_netdev = true;
1226 	dev->priv_destructor = ip_tunnel_dev_free;
1227 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1228 	if (!dev->tstats)
1229 		return -ENOMEM;
1230 
1231 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1232 	if (err) {
1233 		free_percpu(dev->tstats);
1234 		return err;
1235 	}
1236 
1237 	err = gro_cells_init(&tunnel->gro_cells, dev);
1238 	if (err) {
1239 		dst_cache_destroy(&tunnel->dst_cache);
1240 		free_percpu(dev->tstats);
1241 		return err;
1242 	}
1243 
1244 	tunnel->dev = dev;
1245 	tunnel->net = dev_net(dev);
1246 	strcpy(tunnel->parms.name, dev->name);
1247 	iph->version		= 4;
1248 	iph->ihl		= 5;
1249 
1250 	if (tunnel->collect_md)
1251 		netif_keep_dst(dev);
1252 	return 0;
1253 }
1254 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1255 
ip_tunnel_uninit(struct net_device * dev)1256 void ip_tunnel_uninit(struct net_device *dev)
1257 {
1258 	struct ip_tunnel *tunnel = netdev_priv(dev);
1259 	struct net *net = tunnel->net;
1260 	struct ip_tunnel_net *itn;
1261 
1262 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1263 	ip_tunnel_del(itn, netdev_priv(dev));
1264 	if (itn->fb_tunnel_dev == dev)
1265 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1266 
1267 	dst_cache_reset(&tunnel->dst_cache);
1268 }
1269 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1270 
1271 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,unsigned int net_id)1272 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1273 {
1274 	struct ip_tunnel *tunnel = netdev_priv(dev);
1275 	tunnel->ip_tnl_net_id = net_id;
1276 }
1277 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1278 
1279 MODULE_LICENSE("GPL");
1280