• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
ip_tunnel_hash(__be32 key,__be32 remote)53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
ip_tunnel_key_match(const struct ip_tunnel_parm * p,__be16 flags,__be32 key)59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,__be16 flags,__be32 remote,__be32 local,__be32 key)83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (t->parms.link == link)
106 			return t;
107 		else
108 			cand = t;
109 	}
110 
111 	hlist_for_each_entry_rcu(t, head, hash_node) {
112 		if (remote != t->parms.iph.daddr ||
113 		    t->parms.iph.saddr != 0 ||
114 		    !(t->dev->flags & IFF_UP))
115 			continue;
116 
117 		if (!ip_tunnel_key_match(&t->parms, flags, key))
118 			continue;
119 
120 		if (t->parms.link == link)
121 			return t;
122 		else if (!cand)
123 			cand = t;
124 	}
125 
126 	hash = ip_tunnel_hash(key, 0);
127 	head = &itn->tunnels[hash];
128 
129 	hlist_for_each_entry_rcu(t, head, hash_node) {
130 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132 			continue;
133 
134 		if (!(t->dev->flags & IFF_UP))
135 			continue;
136 
137 		if (!ip_tunnel_key_match(&t->parms, flags, key))
138 			continue;
139 
140 		if (t->parms.link == link)
141 			return t;
142 		else if (!cand)
143 			cand = t;
144 	}
145 
146 	hlist_for_each_entry_rcu(t, head, hash_node) {
147 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148 		    t->parms.iph.saddr != 0 ||
149 		    t->parms.iph.daddr != 0 ||
150 		    !(t->dev->flags & IFF_UP))
151 			continue;
152 
153 		if (t->parms.link == link)
154 			return t;
155 		else if (!cand)
156 			cand = t;
157 	}
158 
159 	if (cand)
160 		return cand;
161 
162 	t = rcu_dereference(itn->collect_md_tun);
163 	if (t && t->dev->flags & IFF_UP)
164 		return t;
165 
166 	ndev = READ_ONCE(itn->fb_tunnel_dev);
167 	if (ndev && ndev->flags & IFF_UP)
168 		return netdev_priv(ndev);
169 
170 	return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173 
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175 				    struct ip_tunnel_parm *parms)
176 {
177 	unsigned int h;
178 	__be32 remote;
179 	__be32 i_key = parms->i_key;
180 
181 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182 		remote = parms->iph.daddr;
183 	else
184 		remote = 0;
185 
186 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187 		i_key = 0;
188 
189 	h = ip_tunnel_hash(i_key, remote);
190 	return &itn->tunnels[h];
191 }
192 
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195 	struct hlist_head *head = ip_bucket(itn, &t->parms);
196 
197 	if (t->collect_md)
198 		rcu_assign_pointer(itn->collect_md_tun, t);
199 	hlist_add_head_rcu(&t->hash_node, head);
200 }
201 
ip_tunnel_del(struct ip_tunnel_net * itn,struct ip_tunnel * t)202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204 	if (t->collect_md)
205 		rcu_assign_pointer(itn->collect_md_tun, NULL);
206 	hlist_del_init_rcu(&t->hash_node);
207 }
208 
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms,int type)209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210 					struct ip_tunnel_parm *parms,
211 					int type)
212 {
213 	__be32 remote = parms->iph.daddr;
214 	__be32 local = parms->iph.saddr;
215 	__be32 key = parms->i_key;
216 	__be16 flags = parms->i_flags;
217 	int link = parms->link;
218 	struct ip_tunnel *t = NULL;
219 	struct hlist_head *head = ip_bucket(itn, parms);
220 
221 	hlist_for_each_entry_rcu(t, head, hash_node) {
222 		if (local == t->parms.iph.saddr &&
223 		    remote == t->parms.iph.daddr &&
224 		    link == t->parms.link &&
225 		    type == t->dev->type &&
226 		    ip_tunnel_key_match(&t->parms, flags, key))
227 			break;
228 	}
229 	return t;
230 }
231 
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm * parms)232 static struct net_device *__ip_tunnel_create(struct net *net,
233 					     const struct rtnl_link_ops *ops,
234 					     struct ip_tunnel_parm *parms)
235 {
236 	int err;
237 	struct ip_tunnel *tunnel;
238 	struct net_device *dev;
239 	char name[IFNAMSIZ];
240 
241 	err = -E2BIG;
242 	if (parms->name[0]) {
243 		if (!dev_valid_name(parms->name))
244 			goto failed;
245 		strlcpy(name, parms->name, IFNAMSIZ);
246 	} else {
247 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
248 			goto failed;
249 		strcpy(name, ops->kind);
250 		strcat(name, "%d");
251 	}
252 
253 	ASSERT_RTNL();
254 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255 	if (!dev) {
256 		err = -ENOMEM;
257 		goto failed;
258 	}
259 	dev_net_set(dev, net);
260 
261 	dev->rtnl_link_ops = ops;
262 
263 	tunnel = netdev_priv(dev);
264 	tunnel->parms = *parms;
265 	tunnel->net = net;
266 
267 	err = register_netdevice(dev);
268 	if (err)
269 		goto failed_free;
270 
271 	return dev;
272 
273 failed_free:
274 	free_netdev(dev);
275 failed:
276 	return ERR_PTR(err);
277 }
278 
ip_tunnel_bind_dev(struct net_device * dev)279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281 	struct net_device *tdev = NULL;
282 	struct ip_tunnel *tunnel = netdev_priv(dev);
283 	const struct iphdr *iph;
284 	int hlen = LL_MAX_HEADER;
285 	int mtu = ETH_DATA_LEN;
286 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287 
288 	iph = &tunnel->parms.iph;
289 
290 	/* Guess output device to choose reasonable mtu and needed_headroom */
291 	if (iph->daddr) {
292 		struct flowi4 fl4;
293 		struct rtable *rt;
294 
295 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296 				    iph->saddr, tunnel->parms.o_key,
297 				    RT_TOS(iph->tos), tunnel->parms.link,
298 				    tunnel->fwmark, 0);
299 		rt = ip_route_output_key(tunnel->net, &fl4);
300 
301 		if (!IS_ERR(rt)) {
302 			tdev = rt->dst.dev;
303 			ip_rt_put(rt);
304 		}
305 		if (dev->type != ARPHRD_ETHER)
306 			dev->flags |= IFF_POINTOPOINT;
307 
308 		dst_cache_reset(&tunnel->dst_cache);
309 	}
310 
311 	if (!tdev && tunnel->parms.link)
312 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313 
314 	if (tdev) {
315 		hlen = tdev->hard_header_len + tdev->needed_headroom;
316 		mtu = min(tdev->mtu, IP_MAX_MTU);
317 	}
318 
319 	dev->needed_headroom = t_hlen + hlen;
320 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321 
322 	if (mtu < IPV4_MIN_MTU)
323 		mtu = IPV4_MIN_MTU;
324 
325 	return mtu;
326 }
327 
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329 					  struct ip_tunnel_net *itn,
330 					  struct ip_tunnel_parm *parms)
331 {
332 	struct ip_tunnel *nt;
333 	struct net_device *dev;
334 	int t_hlen;
335 	int mtu;
336 	int err;
337 
338 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339 	if (IS_ERR(dev))
340 		return ERR_CAST(dev);
341 
342 	mtu = ip_tunnel_bind_dev(dev);
343 	err = dev_set_mtu(dev, mtu);
344 	if (err)
345 		goto err_dev_set_mtu;
346 
347 	nt = netdev_priv(dev);
348 	t_hlen = nt->hlen + sizeof(struct iphdr);
349 	dev->min_mtu = ETH_MIN_MTU;
350 	dev->max_mtu = IP_MAX_MTU - t_hlen;
351 	if (dev->type == ARPHRD_ETHER)
352 		dev->max_mtu -= dev->hard_header_len;
353 
354 	ip_tunnel_add(itn, nt);
355 	return nt;
356 
357 err_dev_set_mtu:
358 	unregister_netdevice(dev);
359 	return ERR_PTR(err);
360 }
361 
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct metadata_dst * tun_dst,bool log_ecn_error)362 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
363 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
364 		  bool log_ecn_error)
365 {
366 	const struct iphdr *iph = ip_hdr(skb);
367 	int err;
368 
369 #ifdef CONFIG_NET_IPGRE_BROADCAST
370 	if (ipv4_is_multicast(iph->daddr)) {
371 		tunnel->dev->stats.multicast++;
372 		skb->pkt_type = PACKET_BROADCAST;
373 	}
374 #endif
375 
376 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
377 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
378 		tunnel->dev->stats.rx_crc_errors++;
379 		tunnel->dev->stats.rx_errors++;
380 		goto drop;
381 	}
382 
383 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
384 		if (!(tpi->flags&TUNNEL_SEQ) ||
385 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
386 			tunnel->dev->stats.rx_fifo_errors++;
387 			tunnel->dev->stats.rx_errors++;
388 			goto drop;
389 		}
390 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
391 	}
392 
393 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
394 
395 	err = IP_ECN_decapsulate(iph, skb);
396 	if (unlikely(err)) {
397 		if (log_ecn_error)
398 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
399 					&iph->saddr, iph->tos);
400 		if (err > 1) {
401 			++tunnel->dev->stats.rx_frame_errors;
402 			++tunnel->dev->stats.rx_errors;
403 			goto drop;
404 		}
405 	}
406 
407 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
408 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
409 
410 	if (tunnel->dev->type == ARPHRD_ETHER) {
411 		skb->protocol = eth_type_trans(skb, tunnel->dev);
412 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
413 	} else {
414 		skb->dev = tunnel->dev;
415 	}
416 
417 	if (tun_dst)
418 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
419 
420 	gro_cells_receive(&tunnel->gro_cells, skb);
421 	return 0;
422 
423 drop:
424 	if (tun_dst)
425 		dst_release((struct dst_entry *)tun_dst);
426 	kfree_skb(skb);
427 	return 0;
428 }
429 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
430 
ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)431 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
432 			    unsigned int num)
433 {
434 	if (num >= MAX_IPTUN_ENCAP_OPS)
435 		return -ERANGE;
436 
437 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
438 			&iptun_encaps[num],
439 			NULL, ops) ? 0 : -1;
440 }
441 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
442 
ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)443 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
444 			    unsigned int num)
445 {
446 	int ret;
447 
448 	if (num >= MAX_IPTUN_ENCAP_OPS)
449 		return -ERANGE;
450 
451 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
452 		       &iptun_encaps[num],
453 		       ops, NULL) == ops) ? 0 : -1;
454 
455 	synchronize_net();
456 
457 	return ret;
458 }
459 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
460 
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)461 int ip_tunnel_encap_setup(struct ip_tunnel *t,
462 			  struct ip_tunnel_encap *ipencap)
463 {
464 	int hlen;
465 
466 	memset(&t->encap, 0, sizeof(t->encap));
467 
468 	hlen = ip_encap_hlen(ipencap);
469 	if (hlen < 0)
470 		return hlen;
471 
472 	t->encap.type = ipencap->type;
473 	t->encap.sport = ipencap->sport;
474 	t->encap.dport = ipencap->dport;
475 	t->encap.flags = ipencap->flags;
476 
477 	t->encap_hlen = hlen;
478 	t->hlen = t->encap_hlen + t->tun_hlen;
479 
480 	return 0;
481 }
482 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
483 
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph,int tunnel_hlen,__be32 dst,bool md)484 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
485 			    struct rtable *rt, __be16 df,
486 			    const struct iphdr *inner_iph,
487 			    int tunnel_hlen, __be32 dst, bool md)
488 {
489 	struct ip_tunnel *tunnel = netdev_priv(dev);
490 	int pkt_size;
491 	int mtu;
492 
493 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
494 	pkt_size = skb->len - tunnel_hlen;
495 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
496 
497 	if (df) {
498 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
499 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
500 	} else {
501 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502 	}
503 
504 	if (skb_valid_dst(skb))
505 		skb_dst_update_pmtu_no_confirm(skb, mtu);
506 
507 	if (skb->protocol == htons(ETH_P_IP)) {
508 		if (!skb_is_gso(skb) &&
509 		    (inner_iph->frag_off & htons(IP_DF)) &&
510 		    mtu < pkt_size) {
511 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
512 			return -E2BIG;
513 		}
514 	}
515 #if IS_ENABLED(CONFIG_IPV6)
516 	else if (skb->protocol == htons(ETH_P_IPV6)) {
517 		struct rt6_info *rt6;
518 		__be32 daddr;
519 
520 		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
521 					   NULL;
522 		daddr = md ? dst : tunnel->parms.iph.daddr;
523 
524 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
525 			   mtu >= IPV6_MIN_MTU) {
526 			if ((daddr && !ipv4_is_multicast(daddr)) ||
527 			    rt6->rt6i_dst.plen == 128) {
528 				rt6->rt6i_flags |= RTF_MODIFIED;
529 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
530 			}
531 		}
532 
533 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
534 					mtu < pkt_size) {
535 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
536 			return -E2BIG;
537 		}
538 	}
539 #endif
540 	return 0;
541 }
542 
ip_tunnel_adj_headroom(struct net_device * dev,unsigned int headroom)543 static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
544 {
545 	/* we must cap headroom to some upperlimit, else pskb_expand_head
546 	 * will overflow header offsets in skb_headers_offset_update().
547 	 */
548 	static const unsigned int max_allowed = 512;
549 
550 	if (headroom > max_allowed)
551 		headroom = max_allowed;
552 
553 	if (headroom > READ_ONCE(dev->needed_headroom))
554 		WRITE_ONCE(dev->needed_headroom, headroom);
555 }
556 
ip_md_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,u8 proto,int tunnel_hlen)557 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
558 		       u8 proto, int tunnel_hlen)
559 {
560 	struct ip_tunnel *tunnel = netdev_priv(dev);
561 	u32 headroom = sizeof(struct iphdr);
562 	struct ip_tunnel_info *tun_info;
563 	const struct ip_tunnel_key *key;
564 	const struct iphdr *inner_iph;
565 	struct rtable *rt = NULL;
566 	struct flowi4 fl4;
567 	__be16 df = 0;
568 	u8 tos, ttl;
569 	bool use_cache;
570 
571 	tun_info = skb_tunnel_info(skb);
572 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
573 		     ip_tunnel_info_af(tun_info) != AF_INET))
574 		goto tx_error;
575 	key = &tun_info->key;
576 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
577 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
578 	tos = key->tos;
579 	if (tos == 1) {
580 		if (skb->protocol == htons(ETH_P_IP))
581 			tos = inner_iph->tos;
582 		else if (skb->protocol == htons(ETH_P_IPV6))
583 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
584 	}
585 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
586 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
587 			    0, skb->mark, skb_get_hash(skb));
588 	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
589 		goto tx_error;
590 
591 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
592 	if (use_cache)
593 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
594 	if (!rt) {
595 		rt = ip_route_output_key(tunnel->net, &fl4);
596 		if (IS_ERR(rt)) {
597 			dev->stats.tx_carrier_errors++;
598 			goto tx_error;
599 		}
600 		if (use_cache)
601 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
602 					  fl4.saddr);
603 	}
604 	if (rt->dst.dev == dev) {
605 		ip_rt_put(rt);
606 		dev->stats.collisions++;
607 		goto tx_error;
608 	}
609 
610 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
611 		df = htons(IP_DF);
612 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
613 			    key->u.ipv4.dst, true)) {
614 		ip_rt_put(rt);
615 		goto tx_error;
616 	}
617 
618 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
619 	ttl = key->ttl;
620 	if (ttl == 0) {
621 		if (skb->protocol == htons(ETH_P_IP))
622 			ttl = inner_iph->ttl;
623 		else if (skb->protocol == htons(ETH_P_IPV6))
624 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
625 		else
626 			ttl = ip4_dst_hoplimit(&rt->dst);
627 	}
628 
629 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
630 	if (skb_cow_head(skb, headroom)) {
631 		ip_rt_put(rt);
632 		goto tx_dropped;
633 	}
634 
635 	ip_tunnel_adj_headroom(dev, headroom);
636 
637 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
638 		      df, !net_eq(tunnel->net, dev_net(dev)));
639 	return;
640 tx_error:
641 	dev->stats.tx_errors++;
642 	goto kfree;
643 tx_dropped:
644 	dev->stats.tx_dropped++;
645 kfree:
646 	kfree_skb(skb);
647 }
648 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
649 
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)650 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
651 		    const struct iphdr *tnl_params, u8 protocol)
652 {
653 	struct ip_tunnel *tunnel = netdev_priv(dev);
654 	struct ip_tunnel_info *tun_info = NULL;
655 	const struct iphdr *inner_iph;
656 	unsigned int max_headroom;	/* The extra header space needed */
657 	struct rtable *rt = NULL;		/* Route to the other host */
658 	bool use_cache = false;
659 	struct flowi4 fl4;
660 	bool md = false;
661 	bool connected;
662 	u8 tos, ttl;
663 	__be32 dst;
664 	__be16 df;
665 
666 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
667 	connected = (tunnel->parms.iph.daddr != 0);
668 
669 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
670 
671 	dst = tnl_params->daddr;
672 	if (dst == 0) {
673 		/* NBMA tunnel */
674 
675 		if (!skb_dst(skb)) {
676 			dev->stats.tx_fifo_errors++;
677 			goto tx_error;
678 		}
679 
680 		tun_info = skb_tunnel_info(skb);
681 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
682 		    ip_tunnel_info_af(tun_info) == AF_INET &&
683 		    tun_info->key.u.ipv4.dst) {
684 			dst = tun_info->key.u.ipv4.dst;
685 			md = true;
686 			connected = true;
687 		}
688 		else if (skb->protocol == htons(ETH_P_IP)) {
689 			rt = skb_rtable(skb);
690 			dst = rt_nexthop(rt, inner_iph->daddr);
691 		}
692 #if IS_ENABLED(CONFIG_IPV6)
693 		else if (skb->protocol == htons(ETH_P_IPV6)) {
694 			const struct in6_addr *addr6;
695 			struct neighbour *neigh;
696 			bool do_tx_error_icmp;
697 			int addr_type;
698 
699 			neigh = dst_neigh_lookup(skb_dst(skb),
700 						 &ipv6_hdr(skb)->daddr);
701 			if (!neigh)
702 				goto tx_error;
703 
704 			addr6 = (const struct in6_addr *)&neigh->primary_key;
705 			addr_type = ipv6_addr_type(addr6);
706 
707 			if (addr_type == IPV6_ADDR_ANY) {
708 				addr6 = &ipv6_hdr(skb)->daddr;
709 				addr_type = ipv6_addr_type(addr6);
710 			}
711 
712 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
713 				do_tx_error_icmp = true;
714 			else {
715 				do_tx_error_icmp = false;
716 				dst = addr6->s6_addr32[3];
717 			}
718 			neigh_release(neigh);
719 			if (do_tx_error_icmp)
720 				goto tx_error_icmp;
721 		}
722 #endif
723 		else
724 			goto tx_error;
725 
726 		if (!md)
727 			connected = false;
728 	}
729 
730 	tos = tnl_params->tos;
731 	if (tos & 0x1) {
732 		tos &= ~0x1;
733 		if (skb->protocol == htons(ETH_P_IP)) {
734 			tos = inner_iph->tos;
735 			connected = false;
736 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
737 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
738 			connected = false;
739 		}
740 	}
741 
742 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
743 			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
744 			    tunnel->fwmark, skb_get_hash(skb));
745 
746 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
747 		goto tx_error;
748 
749 	if (connected && md) {
750 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
751 		if (use_cache)
752 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
753 					       &fl4.saddr);
754 	} else {
755 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
756 						&fl4.saddr) : NULL;
757 	}
758 
759 	if (!rt) {
760 		rt = ip_route_output_key(tunnel->net, &fl4);
761 
762 		if (IS_ERR(rt)) {
763 			dev->stats.tx_carrier_errors++;
764 			goto tx_error;
765 		}
766 		if (use_cache)
767 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
768 					  fl4.saddr);
769 		else if (!md && connected)
770 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
771 					  fl4.saddr);
772 	}
773 
774 	if (rt->dst.dev == dev) {
775 		ip_rt_put(rt);
776 		dev->stats.collisions++;
777 		goto tx_error;
778 	}
779 
780 	df = tnl_params->frag_off;
781 	if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
782 		df |= (inner_iph->frag_off & htons(IP_DF));
783 
784 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
785 		ip_rt_put(rt);
786 		goto tx_error;
787 	}
788 
789 	if (tunnel->err_count > 0) {
790 		if (time_before(jiffies,
791 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
792 			tunnel->err_count--;
793 
794 			dst_link_failure(skb);
795 		} else
796 			tunnel->err_count = 0;
797 	}
798 
799 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
800 	ttl = tnl_params->ttl;
801 	if (ttl == 0) {
802 		if (skb->protocol == htons(ETH_P_IP))
803 			ttl = inner_iph->ttl;
804 #if IS_ENABLED(CONFIG_IPV6)
805 		else if (skb->protocol == htons(ETH_P_IPV6))
806 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
807 #endif
808 		else
809 			ttl = ip4_dst_hoplimit(&rt->dst);
810 	}
811 
812 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
813 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
814 
815 	if (skb_cow_head(skb, max_headroom)) {
816 		ip_rt_put(rt);
817 		dev->stats.tx_dropped++;
818 		kfree_skb(skb);
819 		return;
820 	}
821 
822 	ip_tunnel_adj_headroom(dev, max_headroom);
823 
824 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
825 		      df, !net_eq(tunnel->net, dev_net(dev)));
826 	return;
827 
828 #if IS_ENABLED(CONFIG_IPV6)
829 tx_error_icmp:
830 	dst_link_failure(skb);
831 #endif
832 tx_error:
833 	dev->stats.tx_errors++;
834 	kfree_skb(skb);
835 }
836 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
837 
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm * p,bool set_mtu,__u32 fwmark)838 static void ip_tunnel_update(struct ip_tunnel_net *itn,
839 			     struct ip_tunnel *t,
840 			     struct net_device *dev,
841 			     struct ip_tunnel_parm *p,
842 			     bool set_mtu,
843 			     __u32 fwmark)
844 {
845 	ip_tunnel_del(itn, t);
846 	t->parms.iph.saddr = p->iph.saddr;
847 	t->parms.iph.daddr = p->iph.daddr;
848 	t->parms.i_key = p->i_key;
849 	t->parms.o_key = p->o_key;
850 	if (dev->type != ARPHRD_ETHER) {
851 		memcpy(dev->dev_addr, &p->iph.saddr, 4);
852 		memcpy(dev->broadcast, &p->iph.daddr, 4);
853 	}
854 	ip_tunnel_add(itn, t);
855 
856 	t->parms.iph.ttl = p->iph.ttl;
857 	t->parms.iph.tos = p->iph.tos;
858 	t->parms.iph.frag_off = p->iph.frag_off;
859 
860 	if (t->parms.link != p->link || t->fwmark != fwmark) {
861 		int mtu;
862 
863 		t->parms.link = p->link;
864 		t->fwmark = fwmark;
865 		mtu = ip_tunnel_bind_dev(dev);
866 		if (set_mtu)
867 			dev->mtu = mtu;
868 	}
869 	dst_cache_reset(&t->dst_cache);
870 	netdev_state_change(dev);
871 }
872 
ip_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)873 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
874 {
875 	int err = 0;
876 	struct ip_tunnel *t = netdev_priv(dev);
877 	struct net *net = t->net;
878 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
879 
880 	switch (cmd) {
881 	case SIOCGETTUNNEL:
882 		if (dev == itn->fb_tunnel_dev) {
883 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
884 			if (!t)
885 				t = netdev_priv(dev);
886 		}
887 		memcpy(p, &t->parms, sizeof(*p));
888 		break;
889 
890 	case SIOCADDTUNNEL:
891 	case SIOCCHGTUNNEL:
892 		err = -EPERM;
893 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
894 			goto done;
895 		if (p->iph.ttl)
896 			p->iph.frag_off |= htons(IP_DF);
897 		if (!(p->i_flags & VTI_ISVTI)) {
898 			if (!(p->i_flags & TUNNEL_KEY))
899 				p->i_key = 0;
900 			if (!(p->o_flags & TUNNEL_KEY))
901 				p->o_key = 0;
902 		}
903 
904 		t = ip_tunnel_find(itn, p, itn->type);
905 
906 		if (cmd == SIOCADDTUNNEL) {
907 			if (!t) {
908 				t = ip_tunnel_create(net, itn, p);
909 				err = PTR_ERR_OR_ZERO(t);
910 				break;
911 			}
912 
913 			err = -EEXIST;
914 			break;
915 		}
916 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
917 			if (t) {
918 				if (t->dev != dev) {
919 					err = -EEXIST;
920 					break;
921 				}
922 			} else {
923 				unsigned int nflags = 0;
924 
925 				if (ipv4_is_multicast(p->iph.daddr))
926 					nflags = IFF_BROADCAST;
927 				else if (p->iph.daddr)
928 					nflags = IFF_POINTOPOINT;
929 
930 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
931 					err = -EINVAL;
932 					break;
933 				}
934 
935 				t = netdev_priv(dev);
936 			}
937 		}
938 
939 		if (t) {
940 			err = 0;
941 			ip_tunnel_update(itn, t, dev, p, true, 0);
942 		} else {
943 			err = -ENOENT;
944 		}
945 		break;
946 
947 	case SIOCDELTUNNEL:
948 		err = -EPERM;
949 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
950 			goto done;
951 
952 		if (dev == itn->fb_tunnel_dev) {
953 			err = -ENOENT;
954 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
955 			if (!t)
956 				goto done;
957 			err = -EPERM;
958 			if (t == netdev_priv(itn->fb_tunnel_dev))
959 				goto done;
960 			dev = t->dev;
961 		}
962 		unregister_netdevice(dev);
963 		err = 0;
964 		break;
965 
966 	default:
967 		err = -EINVAL;
968 	}
969 
970 done:
971 	return err;
972 }
973 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
974 
ip_tunnel_ioctl(struct net_device * dev,struct ifreq * ifr,int cmd)975 int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
976 {
977 	struct ip_tunnel_parm p;
978 	int err;
979 
980 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
981 		return -EFAULT;
982 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
983 	if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
984 		return -EFAULT;
985 	return err;
986 }
987 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
988 
__ip_tunnel_change_mtu(struct net_device * dev,int new_mtu,bool strict)989 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
990 {
991 	struct ip_tunnel *tunnel = netdev_priv(dev);
992 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
993 	int max_mtu = IP_MAX_MTU - t_hlen;
994 
995 	if (dev->type == ARPHRD_ETHER)
996 		max_mtu -= dev->hard_header_len;
997 
998 	if (new_mtu < ETH_MIN_MTU)
999 		return -EINVAL;
1000 
1001 	if (new_mtu > max_mtu) {
1002 		if (strict)
1003 			return -EINVAL;
1004 
1005 		new_mtu = max_mtu;
1006 	}
1007 
1008 	dev->mtu = new_mtu;
1009 	return 0;
1010 }
1011 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1012 
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)1013 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1014 {
1015 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1016 }
1017 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1018 
ip_tunnel_dev_free(struct net_device * dev)1019 static void ip_tunnel_dev_free(struct net_device *dev)
1020 {
1021 	struct ip_tunnel *tunnel = netdev_priv(dev);
1022 
1023 	gro_cells_destroy(&tunnel->gro_cells);
1024 	dst_cache_destroy(&tunnel->dst_cache);
1025 	free_percpu(dev->tstats);
1026 }
1027 
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)1028 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1029 {
1030 	struct ip_tunnel *tunnel = netdev_priv(dev);
1031 	struct ip_tunnel_net *itn;
1032 
1033 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1034 
1035 	if (itn->fb_tunnel_dev != dev) {
1036 		ip_tunnel_del(itn, netdev_priv(dev));
1037 		unregister_netdevice_queue(dev, head);
1038 	}
1039 }
1040 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1041 
ip_tunnel_get_link_net(const struct net_device * dev)1042 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1043 {
1044 	struct ip_tunnel *tunnel = netdev_priv(dev);
1045 
1046 	return tunnel->net;
1047 }
1048 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1049 
ip_tunnel_get_iflink(const struct net_device * dev)1050 int ip_tunnel_get_iflink(const struct net_device *dev)
1051 {
1052 	struct ip_tunnel *tunnel = netdev_priv(dev);
1053 
1054 	return tunnel->parms.link;
1055 }
1056 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1057 
ip_tunnel_init_net(struct net * net,unsigned int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)1058 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1059 				  struct rtnl_link_ops *ops, char *devname)
1060 {
1061 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1062 	struct ip_tunnel_parm parms;
1063 	unsigned int i;
1064 
1065 	itn->rtnl_link_ops = ops;
1066 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1067 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1068 
1069 	if (!ops || !net_has_fallback_tunnels(net)) {
1070 		struct ip_tunnel_net *it_init_net;
1071 
1072 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1073 		itn->type = it_init_net->type;
1074 		itn->fb_tunnel_dev = NULL;
1075 		return 0;
1076 	}
1077 
1078 	memset(&parms, 0, sizeof(parms));
1079 	if (devname)
1080 		strlcpy(parms.name, devname, IFNAMSIZ);
1081 
1082 	rtnl_lock();
1083 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1084 	/* FB netdevice is special: we have one, and only one per netns.
1085 	 * Allowing to move it to another netns is clearly unsafe.
1086 	 */
1087 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1088 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1089 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1090 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1091 		itn->type = itn->fb_tunnel_dev->type;
1092 	}
1093 	rtnl_unlock();
1094 
1095 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1096 }
1097 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1098 
ip_tunnel_destroy(struct net * net,struct ip_tunnel_net * itn,struct list_head * head,struct rtnl_link_ops * ops)1099 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1100 			      struct list_head *head,
1101 			      struct rtnl_link_ops *ops)
1102 {
1103 	struct net_device *dev, *aux;
1104 	int h;
1105 
1106 	for_each_netdev_safe(net, dev, aux)
1107 		if (dev->rtnl_link_ops == ops)
1108 			unregister_netdevice_queue(dev, head);
1109 
1110 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1111 		struct ip_tunnel *t;
1112 		struct hlist_node *n;
1113 		struct hlist_head *thead = &itn->tunnels[h];
1114 
1115 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1116 			/* If dev is in the same netns, it has already
1117 			 * been added to the list by the previous loop.
1118 			 */
1119 			if (!net_eq(dev_net(t->dev), net))
1120 				unregister_netdevice_queue(t->dev, head);
1121 	}
1122 }
1123 
ip_tunnel_delete_nets(struct list_head * net_list,unsigned int id,struct rtnl_link_ops * ops)1124 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1125 			   struct rtnl_link_ops *ops)
1126 {
1127 	struct ip_tunnel_net *itn;
1128 	struct net *net;
1129 	LIST_HEAD(list);
1130 
1131 	rtnl_lock();
1132 	list_for_each_entry(net, net_list, exit_list) {
1133 		itn = net_generic(net, id);
1134 		ip_tunnel_destroy(net, itn, &list, ops);
1135 	}
1136 	unregister_netdevice_many(&list);
1137 	rtnl_unlock();
1138 }
1139 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1140 
ip_tunnel_newlink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1141 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1142 		      struct ip_tunnel_parm *p, __u32 fwmark)
1143 {
1144 	struct ip_tunnel *nt;
1145 	struct net *net = dev_net(dev);
1146 	struct ip_tunnel_net *itn;
1147 	int mtu;
1148 	int err;
1149 
1150 	nt = netdev_priv(dev);
1151 	itn = net_generic(net, nt->ip_tnl_net_id);
1152 
1153 	if (nt->collect_md) {
1154 		if (rtnl_dereference(itn->collect_md_tun))
1155 			return -EEXIST;
1156 	} else {
1157 		if (ip_tunnel_find(itn, p, dev->type))
1158 			return -EEXIST;
1159 	}
1160 
1161 	nt->net = net;
1162 	nt->parms = *p;
1163 	nt->fwmark = fwmark;
1164 	err = register_netdevice(dev);
1165 	if (err)
1166 		goto err_register_netdevice;
1167 
1168 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1169 		eth_hw_addr_random(dev);
1170 
1171 	mtu = ip_tunnel_bind_dev(dev);
1172 	if (tb[IFLA_MTU]) {
1173 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1174 
1175 		if (dev->type == ARPHRD_ETHER)
1176 			max -= dev->hard_header_len;
1177 
1178 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1179 	}
1180 
1181 	err = dev_set_mtu(dev, mtu);
1182 	if (err)
1183 		goto err_dev_set_mtu;
1184 
1185 	ip_tunnel_add(itn, nt);
1186 	return 0;
1187 
1188 err_dev_set_mtu:
1189 	unregister_netdevice(dev);
1190 err_register_netdevice:
1191 	return err;
1192 }
1193 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1194 
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1195 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1196 			 struct ip_tunnel_parm *p, __u32 fwmark)
1197 {
1198 	struct ip_tunnel *t;
1199 	struct ip_tunnel *tunnel = netdev_priv(dev);
1200 	struct net *net = tunnel->net;
1201 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1202 
1203 	if (dev == itn->fb_tunnel_dev)
1204 		return -EINVAL;
1205 
1206 	t = ip_tunnel_find(itn, p, dev->type);
1207 
1208 	if (t) {
1209 		if (t->dev != dev)
1210 			return -EEXIST;
1211 	} else {
1212 		t = tunnel;
1213 
1214 		if (dev->type != ARPHRD_ETHER) {
1215 			unsigned int nflags = 0;
1216 
1217 			if (ipv4_is_multicast(p->iph.daddr))
1218 				nflags = IFF_BROADCAST;
1219 			else if (p->iph.daddr)
1220 				nflags = IFF_POINTOPOINT;
1221 
1222 			if ((dev->flags ^ nflags) &
1223 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1224 				return -EINVAL;
1225 		}
1226 	}
1227 
1228 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1229 	return 0;
1230 }
1231 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1232 
ip_tunnel_init(struct net_device * dev)1233 int ip_tunnel_init(struct net_device *dev)
1234 {
1235 	struct ip_tunnel *tunnel = netdev_priv(dev);
1236 	struct iphdr *iph = &tunnel->parms.iph;
1237 	int err;
1238 
1239 	dev->needs_free_netdev = true;
1240 	dev->priv_destructor = ip_tunnel_dev_free;
1241 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1242 	if (!dev->tstats)
1243 		return -ENOMEM;
1244 
1245 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1246 	if (err) {
1247 		free_percpu(dev->tstats);
1248 		return err;
1249 	}
1250 
1251 	err = gro_cells_init(&tunnel->gro_cells, dev);
1252 	if (err) {
1253 		dst_cache_destroy(&tunnel->dst_cache);
1254 		free_percpu(dev->tstats);
1255 		return err;
1256 	}
1257 
1258 	tunnel->dev = dev;
1259 	tunnel->net = dev_net(dev);
1260 	strcpy(tunnel->parms.name, dev->name);
1261 	iph->version		= 4;
1262 	iph->ihl		= 5;
1263 
1264 	if (tunnel->collect_md)
1265 		netif_keep_dst(dev);
1266 	return 0;
1267 }
1268 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1269 
ip_tunnel_uninit(struct net_device * dev)1270 void ip_tunnel_uninit(struct net_device *dev)
1271 {
1272 	struct ip_tunnel *tunnel = netdev_priv(dev);
1273 	struct net *net = tunnel->net;
1274 	struct ip_tunnel_net *itn;
1275 
1276 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1277 	ip_tunnel_del(itn, netdev_priv(dev));
1278 	if (itn->fb_tunnel_dev == dev)
1279 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1280 
1281 	dst_cache_reset(&tunnel->dst_cache);
1282 }
1283 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1284 
1285 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,unsigned int net_id)1286 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1287 {
1288 	struct ip_tunnel *tunnel = netdev_priv(dev);
1289 	tunnel->ip_tnl_net_id = net_id;
1290 }
1291 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1292 
1293 MODULE_LICENSE("GPL");
1294