• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
ip_tunnel_hash(__be32 key,__be32 remote)53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
ip_tunnel_key_match(const struct ip_tunnel_parm * p,__be16 flags,__be32 key)59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,__be16 flags,__be32 remote,__be32 local,__be32 key)83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (t->parms.link == link)
106 			return t;
107 		else
108 			cand = t;
109 	}
110 
111 	hlist_for_each_entry_rcu(t, head, hash_node) {
112 		if (remote != t->parms.iph.daddr ||
113 		    t->parms.iph.saddr != 0 ||
114 		    !(t->dev->flags & IFF_UP))
115 			continue;
116 
117 		if (!ip_tunnel_key_match(&t->parms, flags, key))
118 			continue;
119 
120 		if (t->parms.link == link)
121 			return t;
122 		else if (!cand)
123 			cand = t;
124 	}
125 
126 	hash = ip_tunnel_hash(key, 0);
127 	head = &itn->tunnels[hash];
128 
129 	hlist_for_each_entry_rcu(t, head, hash_node) {
130 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132 			continue;
133 
134 		if (!(t->dev->flags & IFF_UP))
135 			continue;
136 
137 		if (!ip_tunnel_key_match(&t->parms, flags, key))
138 			continue;
139 
140 		if (t->parms.link == link)
141 			return t;
142 		else if (!cand)
143 			cand = t;
144 	}
145 
146 	hlist_for_each_entry_rcu(t, head, hash_node) {
147 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148 		    t->parms.iph.saddr != 0 ||
149 		    t->parms.iph.daddr != 0 ||
150 		    !(t->dev->flags & IFF_UP))
151 			continue;
152 
153 		if (t->parms.link == link)
154 			return t;
155 		else if (!cand)
156 			cand = t;
157 	}
158 
159 	if (cand)
160 		return cand;
161 
162 	t = rcu_dereference(itn->collect_md_tun);
163 	if (t && t->dev->flags & IFF_UP)
164 		return t;
165 
166 	ndev = READ_ONCE(itn->fb_tunnel_dev);
167 	if (ndev && ndev->flags & IFF_UP)
168 		return netdev_priv(ndev);
169 
170 	return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173 
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175 				    struct ip_tunnel_parm *parms)
176 {
177 	unsigned int h;
178 	__be32 remote;
179 	__be32 i_key = parms->i_key;
180 
181 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182 		remote = parms->iph.daddr;
183 	else
184 		remote = 0;
185 
186 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187 		i_key = 0;
188 
189 	h = ip_tunnel_hash(i_key, remote);
190 	return &itn->tunnels[h];
191 }
192 
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195 	struct hlist_head *head = ip_bucket(itn, &t->parms);
196 
197 	if (t->collect_md)
198 		rcu_assign_pointer(itn->collect_md_tun, t);
199 	hlist_add_head_rcu(&t->hash_node, head);
200 }
201 
ip_tunnel_del(struct ip_tunnel_net * itn,struct ip_tunnel * t)202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204 	if (t->collect_md)
205 		rcu_assign_pointer(itn->collect_md_tun, NULL);
206 	hlist_del_init_rcu(&t->hash_node);
207 }
208 
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms,int type)209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210 					struct ip_tunnel_parm *parms,
211 					int type)
212 {
213 	__be32 remote = parms->iph.daddr;
214 	__be32 local = parms->iph.saddr;
215 	__be32 key = parms->i_key;
216 	__be16 flags = parms->i_flags;
217 	int link = parms->link;
218 	struct ip_tunnel *t = NULL;
219 	struct hlist_head *head = ip_bucket(itn, parms);
220 
221 	hlist_for_each_entry_rcu(t, head, hash_node) {
222 		if (local == t->parms.iph.saddr &&
223 		    remote == t->parms.iph.daddr &&
224 		    link == t->parms.link &&
225 		    type == t->dev->type &&
226 		    ip_tunnel_key_match(&t->parms, flags, key))
227 			break;
228 	}
229 	return t;
230 }
231 
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm * parms)232 static struct net_device *__ip_tunnel_create(struct net *net,
233 					     const struct rtnl_link_ops *ops,
234 					     struct ip_tunnel_parm *parms)
235 {
236 	int err;
237 	struct ip_tunnel *tunnel;
238 	struct net_device *dev;
239 	char name[IFNAMSIZ];
240 
241 	err = -E2BIG;
242 	if (parms->name[0]) {
243 		if (!dev_valid_name(parms->name))
244 			goto failed;
245 		strlcpy(name, parms->name, IFNAMSIZ);
246 	} else {
247 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
248 			goto failed;
249 		strcpy(name, ops->kind);
250 		strcat(name, "%d");
251 	}
252 
253 	ASSERT_RTNL();
254 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255 	if (!dev) {
256 		err = -ENOMEM;
257 		goto failed;
258 	}
259 	dev_net_set(dev, net);
260 
261 	dev->rtnl_link_ops = ops;
262 
263 	tunnel = netdev_priv(dev);
264 	tunnel->parms = *parms;
265 	tunnel->net = net;
266 
267 	err = register_netdevice(dev);
268 	if (err)
269 		goto failed_free;
270 
271 	return dev;
272 
273 failed_free:
274 	free_netdev(dev);
275 failed:
276 	return ERR_PTR(err);
277 }
278 
ip_tunnel_bind_dev(struct net_device * dev)279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281 	struct net_device *tdev = NULL;
282 	struct ip_tunnel *tunnel = netdev_priv(dev);
283 	const struct iphdr *iph;
284 	int hlen = LL_MAX_HEADER;
285 	int mtu = ETH_DATA_LEN;
286 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287 
288 	iph = &tunnel->parms.iph;
289 
290 	/* Guess output device to choose reasonable mtu and needed_headroom */
291 	if (iph->daddr) {
292 		struct flowi4 fl4;
293 		struct rtable *rt;
294 
295 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296 				    iph->saddr, tunnel->parms.o_key,
297 				    RT_TOS(iph->tos), tunnel->parms.link,
298 				    tunnel->fwmark, 0);
299 		rt = ip_route_output_key(tunnel->net, &fl4);
300 
301 		if (!IS_ERR(rt)) {
302 			tdev = rt->dst.dev;
303 			ip_rt_put(rt);
304 		}
305 		if (dev->type != ARPHRD_ETHER)
306 			dev->flags |= IFF_POINTOPOINT;
307 
308 		dst_cache_reset(&tunnel->dst_cache);
309 	}
310 
311 	if (!tdev && tunnel->parms.link)
312 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313 
314 	if (tdev) {
315 		hlen = tdev->hard_header_len + tdev->needed_headroom;
316 		mtu = min(tdev->mtu, IP_MAX_MTU);
317 	}
318 
319 	dev->needed_headroom = t_hlen + hlen;
320 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321 
322 	if (mtu < IPV4_MIN_MTU)
323 		mtu = IPV4_MIN_MTU;
324 
325 	return mtu;
326 }
327 
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329 					  struct ip_tunnel_net *itn,
330 					  struct ip_tunnel_parm *parms)
331 {
332 	struct ip_tunnel *nt;
333 	struct net_device *dev;
334 	int t_hlen;
335 	int mtu;
336 	int err;
337 
338 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339 	if (IS_ERR(dev))
340 		return ERR_CAST(dev);
341 
342 	mtu = ip_tunnel_bind_dev(dev);
343 	err = dev_set_mtu(dev, mtu);
344 	if (err)
345 		goto err_dev_set_mtu;
346 
347 	nt = netdev_priv(dev);
348 	t_hlen = nt->hlen + sizeof(struct iphdr);
349 	dev->min_mtu = ETH_MIN_MTU;
350 	dev->max_mtu = IP_MAX_MTU - t_hlen;
351 	if (dev->type == ARPHRD_ETHER)
352 		dev->max_mtu -= dev->hard_header_len;
353 
354 	ip_tunnel_add(itn, nt);
355 	return nt;
356 
357 err_dev_set_mtu:
358 	unregister_netdevice(dev);
359 	return ERR_PTR(err);
360 }
361 
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct metadata_dst * tun_dst,bool log_ecn_error)362 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
363 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
364 		  bool log_ecn_error)
365 {
366 	struct pcpu_sw_netstats *tstats;
367 	const struct iphdr *iph = ip_hdr(skb);
368 	int err;
369 
370 #ifdef CONFIG_NET_IPGRE_BROADCAST
371 	if (ipv4_is_multicast(iph->daddr)) {
372 		tunnel->dev->stats.multicast++;
373 		skb->pkt_type = PACKET_BROADCAST;
374 	}
375 #endif
376 
377 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
378 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
379 		tunnel->dev->stats.rx_crc_errors++;
380 		tunnel->dev->stats.rx_errors++;
381 		goto drop;
382 	}
383 
384 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
385 		if (!(tpi->flags&TUNNEL_SEQ) ||
386 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
387 			tunnel->dev->stats.rx_fifo_errors++;
388 			tunnel->dev->stats.rx_errors++;
389 			goto drop;
390 		}
391 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
392 	}
393 
394 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
395 
396 	err = IP_ECN_decapsulate(iph, skb);
397 	if (unlikely(err)) {
398 		if (log_ecn_error)
399 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
400 					&iph->saddr, iph->tos);
401 		if (err > 1) {
402 			++tunnel->dev->stats.rx_frame_errors;
403 			++tunnel->dev->stats.rx_errors;
404 			goto drop;
405 		}
406 	}
407 
408 	tstats = this_cpu_ptr(tunnel->dev->tstats);
409 	u64_stats_update_begin(&tstats->syncp);
410 	tstats->rx_packets++;
411 	tstats->rx_bytes += skb->len;
412 	u64_stats_update_end(&tstats->syncp);
413 
414 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
415 
416 	if (tunnel->dev->type == ARPHRD_ETHER) {
417 		skb->protocol = eth_type_trans(skb, tunnel->dev);
418 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
419 	} else {
420 		skb->dev = tunnel->dev;
421 	}
422 
423 	if (tun_dst)
424 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
425 
426 	gro_cells_receive(&tunnel->gro_cells, skb);
427 	return 0;
428 
429 drop:
430 	if (tun_dst)
431 		dst_release((struct dst_entry *)tun_dst);
432 	kfree_skb(skb);
433 	return 0;
434 }
435 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
436 
ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)437 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
438 			    unsigned int num)
439 {
440 	if (num >= MAX_IPTUN_ENCAP_OPS)
441 		return -ERANGE;
442 
443 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
444 			&iptun_encaps[num],
445 			NULL, ops) ? 0 : -1;
446 }
447 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
448 
ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)449 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
450 			    unsigned int num)
451 {
452 	int ret;
453 
454 	if (num >= MAX_IPTUN_ENCAP_OPS)
455 		return -ERANGE;
456 
457 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
458 		       &iptun_encaps[num],
459 		       ops, NULL) == ops) ? 0 : -1;
460 
461 	synchronize_net();
462 
463 	return ret;
464 }
465 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
466 
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)467 int ip_tunnel_encap_setup(struct ip_tunnel *t,
468 			  struct ip_tunnel_encap *ipencap)
469 {
470 	int hlen;
471 
472 	memset(&t->encap, 0, sizeof(t->encap));
473 
474 	hlen = ip_encap_hlen(ipencap);
475 	if (hlen < 0)
476 		return hlen;
477 
478 	t->encap.type = ipencap->type;
479 	t->encap.sport = ipencap->sport;
480 	t->encap.dport = ipencap->dport;
481 	t->encap.flags = ipencap->flags;
482 
483 	t->encap_hlen = hlen;
484 	t->hlen = t->encap_hlen + t->tun_hlen;
485 
486 	return 0;
487 }
488 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
489 
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph,int tunnel_hlen,__be32 dst,bool md)490 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
491 			    struct rtable *rt, __be16 df,
492 			    const struct iphdr *inner_iph,
493 			    int tunnel_hlen, __be32 dst, bool md)
494 {
495 	struct ip_tunnel *tunnel = netdev_priv(dev);
496 	int pkt_size;
497 	int mtu;
498 
499 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
500 	pkt_size = skb->len - tunnel_hlen;
501 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
502 
503 	if (df) {
504 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
505 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
506 	} else {
507 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
508 	}
509 
510 	if (skb_valid_dst(skb))
511 		skb_dst_update_pmtu_no_confirm(skb, mtu);
512 
513 	if (skb->protocol == htons(ETH_P_IP)) {
514 		if (!skb_is_gso(skb) &&
515 		    (inner_iph->frag_off & htons(IP_DF)) &&
516 		    mtu < pkt_size) {
517 			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
518 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
519 			return -E2BIG;
520 		}
521 	}
522 #if IS_ENABLED(CONFIG_IPV6)
523 	else if (skb->protocol == htons(ETH_P_IPV6)) {
524 		struct rt6_info *rt6;
525 		__be32 daddr;
526 
527 		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
528 					   NULL;
529 		daddr = md ? dst : tunnel->parms.iph.daddr;
530 
531 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
532 			   mtu >= IPV6_MIN_MTU) {
533 			if ((daddr && !ipv4_is_multicast(daddr)) ||
534 			    rt6->rt6i_dst.plen == 128) {
535 				rt6->rt6i_flags |= RTF_MODIFIED;
536 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
537 			}
538 		}
539 
540 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
541 					mtu < pkt_size) {
542 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
543 			return -E2BIG;
544 		}
545 	}
546 #endif
547 	return 0;
548 }
549 
ip_tunnel_adj_headroom(struct net_device * dev,unsigned int headroom)550 static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
551 {
552 	/* we must cap headroom to some upperlimit, else pskb_expand_head
553 	 * will overflow header offsets in skb_headers_offset_update().
554 	 */
555 	static const unsigned int max_allowed = 512;
556 
557 	if (headroom > max_allowed)
558 		headroom = max_allowed;
559 
560 	if (headroom > READ_ONCE(dev->needed_headroom))
561 		WRITE_ONCE(dev->needed_headroom, headroom);
562 }
563 
ip_md_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,u8 proto,int tunnel_hlen)564 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
565 		       u8 proto, int tunnel_hlen)
566 {
567 	struct ip_tunnel *tunnel = netdev_priv(dev);
568 	u32 headroom = sizeof(struct iphdr);
569 	struct ip_tunnel_info *tun_info;
570 	const struct ip_tunnel_key *key;
571 	const struct iphdr *inner_iph;
572 	struct rtable *rt = NULL;
573 	struct flowi4 fl4;
574 	__be16 df = 0;
575 	u8 tos, ttl;
576 	bool use_cache;
577 
578 	tun_info = skb_tunnel_info(skb);
579 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
580 		     ip_tunnel_info_af(tun_info) != AF_INET))
581 		goto tx_error;
582 	key = &tun_info->key;
583 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
584 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
585 	tos = key->tos;
586 	if (tos == 1) {
587 		if (skb->protocol == htons(ETH_P_IP))
588 			tos = inner_iph->tos;
589 		else if (skb->protocol == htons(ETH_P_IPV6))
590 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
591 	}
592 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
593 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
594 			    0, skb->mark, skb_get_hash(skb));
595 	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
596 		goto tx_error;
597 
598 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
599 	if (use_cache)
600 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
601 	if (!rt) {
602 		rt = ip_route_output_key(tunnel->net, &fl4);
603 		if (IS_ERR(rt)) {
604 			dev->stats.tx_carrier_errors++;
605 			goto tx_error;
606 		}
607 		if (use_cache)
608 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
609 					  fl4.saddr);
610 	}
611 	if (rt->dst.dev == dev) {
612 		ip_rt_put(rt);
613 		dev->stats.collisions++;
614 		goto tx_error;
615 	}
616 
617 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
618 		df = htons(IP_DF);
619 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
620 			    key->u.ipv4.dst, true)) {
621 		ip_rt_put(rt);
622 		goto tx_error;
623 	}
624 
625 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
626 	ttl = key->ttl;
627 	if (ttl == 0) {
628 		if (skb->protocol == htons(ETH_P_IP))
629 			ttl = inner_iph->ttl;
630 		else if (skb->protocol == htons(ETH_P_IPV6))
631 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
632 		else
633 			ttl = ip4_dst_hoplimit(&rt->dst);
634 	}
635 
636 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
637 	if (skb_cow_head(skb, headroom)) {
638 		ip_rt_put(rt);
639 		goto tx_dropped;
640 	}
641 
642 	ip_tunnel_adj_headroom(dev, headroom);
643 
644 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
645 		      df, !net_eq(tunnel->net, dev_net(dev)));
646 	return;
647 tx_error:
648 	dev->stats.tx_errors++;
649 	goto kfree;
650 tx_dropped:
651 	dev->stats.tx_dropped++;
652 kfree:
653 	kfree_skb(skb);
654 }
655 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
656 
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)657 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
658 		    const struct iphdr *tnl_params, u8 protocol)
659 {
660 	struct ip_tunnel *tunnel = netdev_priv(dev);
661 	struct ip_tunnel_info *tun_info = NULL;
662 	const struct iphdr *inner_iph;
663 	unsigned int max_headroom;	/* The extra header space needed */
664 	struct rtable *rt = NULL;		/* Route to the other host */
665 	bool use_cache = false;
666 	struct flowi4 fl4;
667 	bool md = false;
668 	bool connected;
669 	u8 tos, ttl;
670 	__be32 dst;
671 	__be16 df;
672 
673 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
674 	connected = (tunnel->parms.iph.daddr != 0);
675 
676 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
677 
678 	dst = tnl_params->daddr;
679 	if (dst == 0) {
680 		/* NBMA tunnel */
681 
682 		if (!skb_dst(skb)) {
683 			dev->stats.tx_fifo_errors++;
684 			goto tx_error;
685 		}
686 
687 		tun_info = skb_tunnel_info(skb);
688 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
689 		    ip_tunnel_info_af(tun_info) == AF_INET &&
690 		    tun_info->key.u.ipv4.dst) {
691 			dst = tun_info->key.u.ipv4.dst;
692 			md = true;
693 			connected = true;
694 		}
695 		else if (skb->protocol == htons(ETH_P_IP)) {
696 			rt = skb_rtable(skb);
697 			dst = rt_nexthop(rt, inner_iph->daddr);
698 		}
699 #if IS_ENABLED(CONFIG_IPV6)
700 		else if (skb->protocol == htons(ETH_P_IPV6)) {
701 			const struct in6_addr *addr6;
702 			struct neighbour *neigh;
703 			bool do_tx_error_icmp;
704 			int addr_type;
705 
706 			neigh = dst_neigh_lookup(skb_dst(skb),
707 						 &ipv6_hdr(skb)->daddr);
708 			if (!neigh)
709 				goto tx_error;
710 
711 			addr6 = (const struct in6_addr *)&neigh->primary_key;
712 			addr_type = ipv6_addr_type(addr6);
713 
714 			if (addr_type == IPV6_ADDR_ANY) {
715 				addr6 = &ipv6_hdr(skb)->daddr;
716 				addr_type = ipv6_addr_type(addr6);
717 			}
718 
719 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
720 				do_tx_error_icmp = true;
721 			else {
722 				do_tx_error_icmp = false;
723 				dst = addr6->s6_addr32[3];
724 			}
725 			neigh_release(neigh);
726 			if (do_tx_error_icmp)
727 				goto tx_error_icmp;
728 		}
729 #endif
730 		else
731 			goto tx_error;
732 
733 		if (!md)
734 			connected = false;
735 	}
736 
737 	tos = tnl_params->tos;
738 	if (tos & 0x1) {
739 		tos &= ~0x1;
740 		if (skb->protocol == htons(ETH_P_IP)) {
741 			tos = inner_iph->tos;
742 			connected = false;
743 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
744 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
745 			connected = false;
746 		}
747 	}
748 
749 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
750 			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
751 			    tunnel->fwmark, skb_get_hash(skb));
752 
753 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
754 		goto tx_error;
755 
756 	if (connected && md) {
757 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
758 		if (use_cache)
759 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
760 					       &fl4.saddr);
761 	} else {
762 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
763 						&fl4.saddr) : NULL;
764 	}
765 
766 	if (!rt) {
767 		rt = ip_route_output_key(tunnel->net, &fl4);
768 
769 		if (IS_ERR(rt)) {
770 			dev->stats.tx_carrier_errors++;
771 			goto tx_error;
772 		}
773 		if (use_cache)
774 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
775 					  fl4.saddr);
776 		else if (!md && connected)
777 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
778 					  fl4.saddr);
779 	}
780 
781 	if (rt->dst.dev == dev) {
782 		ip_rt_put(rt);
783 		dev->stats.collisions++;
784 		goto tx_error;
785 	}
786 
787 	df = tnl_params->frag_off;
788 	if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
789 		df |= (inner_iph->frag_off & htons(IP_DF));
790 
791 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
792 		ip_rt_put(rt);
793 		goto tx_error;
794 	}
795 
796 	if (tunnel->err_count > 0) {
797 		if (time_before(jiffies,
798 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
799 			tunnel->err_count--;
800 
801 			dst_link_failure(skb);
802 		} else
803 			tunnel->err_count = 0;
804 	}
805 
806 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
807 	ttl = tnl_params->ttl;
808 	if (ttl == 0) {
809 		if (skb->protocol == htons(ETH_P_IP))
810 			ttl = inner_iph->ttl;
811 #if IS_ENABLED(CONFIG_IPV6)
812 		else if (skb->protocol == htons(ETH_P_IPV6))
813 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
814 #endif
815 		else
816 			ttl = ip4_dst_hoplimit(&rt->dst);
817 	}
818 
819 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
820 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
821 
822 	if (skb_cow_head(skb, max_headroom)) {
823 		ip_rt_put(rt);
824 		dev->stats.tx_dropped++;
825 		kfree_skb(skb);
826 		return;
827 	}
828 
829 	ip_tunnel_adj_headroom(dev, max_headroom);
830 
831 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
832 		      df, !net_eq(tunnel->net, dev_net(dev)));
833 	return;
834 
835 #if IS_ENABLED(CONFIG_IPV6)
836 tx_error_icmp:
837 	dst_link_failure(skb);
838 #endif
839 tx_error:
840 	dev->stats.tx_errors++;
841 	kfree_skb(skb);
842 }
843 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
844 
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm * p,bool set_mtu,__u32 fwmark)845 static void ip_tunnel_update(struct ip_tunnel_net *itn,
846 			     struct ip_tunnel *t,
847 			     struct net_device *dev,
848 			     struct ip_tunnel_parm *p,
849 			     bool set_mtu,
850 			     __u32 fwmark)
851 {
852 	ip_tunnel_del(itn, t);
853 	t->parms.iph.saddr = p->iph.saddr;
854 	t->parms.iph.daddr = p->iph.daddr;
855 	t->parms.i_key = p->i_key;
856 	t->parms.o_key = p->o_key;
857 	if (dev->type != ARPHRD_ETHER) {
858 		memcpy(dev->dev_addr, &p->iph.saddr, 4);
859 		memcpy(dev->broadcast, &p->iph.daddr, 4);
860 	}
861 	ip_tunnel_add(itn, t);
862 
863 	t->parms.iph.ttl = p->iph.ttl;
864 	t->parms.iph.tos = p->iph.tos;
865 	t->parms.iph.frag_off = p->iph.frag_off;
866 
867 	if (t->parms.link != p->link || t->fwmark != fwmark) {
868 		int mtu;
869 
870 		t->parms.link = p->link;
871 		t->fwmark = fwmark;
872 		mtu = ip_tunnel_bind_dev(dev);
873 		if (set_mtu)
874 			dev->mtu = mtu;
875 	}
876 	dst_cache_reset(&t->dst_cache);
877 	netdev_state_change(dev);
878 }
879 
ip_tunnel_ioctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)880 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
881 {
882 	int err = 0;
883 	struct ip_tunnel *t = netdev_priv(dev);
884 	struct net *net = t->net;
885 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
886 
887 	switch (cmd) {
888 	case SIOCGETTUNNEL:
889 		if (dev == itn->fb_tunnel_dev) {
890 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
891 			if (!t)
892 				t = netdev_priv(dev);
893 		}
894 		memcpy(p, &t->parms, sizeof(*p));
895 		break;
896 
897 	case SIOCADDTUNNEL:
898 	case SIOCCHGTUNNEL:
899 		err = -EPERM;
900 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
901 			goto done;
902 		if (p->iph.ttl)
903 			p->iph.frag_off |= htons(IP_DF);
904 		if (!(p->i_flags & VTI_ISVTI)) {
905 			if (!(p->i_flags & TUNNEL_KEY))
906 				p->i_key = 0;
907 			if (!(p->o_flags & TUNNEL_KEY))
908 				p->o_key = 0;
909 		}
910 
911 		t = ip_tunnel_find(itn, p, itn->type);
912 
913 		if (cmd == SIOCADDTUNNEL) {
914 			if (!t) {
915 				t = ip_tunnel_create(net, itn, p);
916 				err = PTR_ERR_OR_ZERO(t);
917 				break;
918 			}
919 
920 			err = -EEXIST;
921 			break;
922 		}
923 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
924 			if (t) {
925 				if (t->dev != dev) {
926 					err = -EEXIST;
927 					break;
928 				}
929 			} else {
930 				unsigned int nflags = 0;
931 
932 				if (ipv4_is_multicast(p->iph.daddr))
933 					nflags = IFF_BROADCAST;
934 				else if (p->iph.daddr)
935 					nflags = IFF_POINTOPOINT;
936 
937 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
938 					err = -EINVAL;
939 					break;
940 				}
941 
942 				t = netdev_priv(dev);
943 			}
944 		}
945 
946 		if (t) {
947 			err = 0;
948 			ip_tunnel_update(itn, t, dev, p, true, 0);
949 		} else {
950 			err = -ENOENT;
951 		}
952 		break;
953 
954 	case SIOCDELTUNNEL:
955 		err = -EPERM;
956 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
957 			goto done;
958 
959 		if (dev == itn->fb_tunnel_dev) {
960 			err = -ENOENT;
961 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
962 			if (!t)
963 				goto done;
964 			err = -EPERM;
965 			if (t == netdev_priv(itn->fb_tunnel_dev))
966 				goto done;
967 			dev = t->dev;
968 		}
969 		unregister_netdevice(dev);
970 		err = 0;
971 		break;
972 
973 	default:
974 		err = -EINVAL;
975 	}
976 
977 done:
978 	return err;
979 }
980 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
981 
__ip_tunnel_change_mtu(struct net_device * dev,int new_mtu,bool strict)982 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
983 {
984 	struct ip_tunnel *tunnel = netdev_priv(dev);
985 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
986 	int max_mtu = IP_MAX_MTU - t_hlen;
987 
988 	if (dev->type == ARPHRD_ETHER)
989 		max_mtu -= dev->hard_header_len;
990 
991 	if (new_mtu < ETH_MIN_MTU)
992 		return -EINVAL;
993 
994 	if (new_mtu > max_mtu) {
995 		if (strict)
996 			return -EINVAL;
997 
998 		new_mtu = max_mtu;
999 	}
1000 
1001 	dev->mtu = new_mtu;
1002 	return 0;
1003 }
1004 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1005 
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)1006 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1007 {
1008 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1009 }
1010 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1011 
ip_tunnel_dev_free(struct net_device * dev)1012 static void ip_tunnel_dev_free(struct net_device *dev)
1013 {
1014 	struct ip_tunnel *tunnel = netdev_priv(dev);
1015 
1016 	gro_cells_destroy(&tunnel->gro_cells);
1017 	dst_cache_destroy(&tunnel->dst_cache);
1018 	free_percpu(dev->tstats);
1019 }
1020 
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)1021 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1022 {
1023 	struct ip_tunnel *tunnel = netdev_priv(dev);
1024 	struct ip_tunnel_net *itn;
1025 
1026 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1027 
1028 	if (itn->fb_tunnel_dev != dev) {
1029 		ip_tunnel_del(itn, netdev_priv(dev));
1030 		unregister_netdevice_queue(dev, head);
1031 	}
1032 }
1033 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1034 
ip_tunnel_get_link_net(const struct net_device * dev)1035 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1036 {
1037 	struct ip_tunnel *tunnel = netdev_priv(dev);
1038 
1039 	return tunnel->net;
1040 }
1041 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1042 
ip_tunnel_get_iflink(const struct net_device * dev)1043 int ip_tunnel_get_iflink(const struct net_device *dev)
1044 {
1045 	struct ip_tunnel *tunnel = netdev_priv(dev);
1046 
1047 	return tunnel->parms.link;
1048 }
1049 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1050 
ip_tunnel_init_net(struct net * net,unsigned int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)1051 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1052 				  struct rtnl_link_ops *ops, char *devname)
1053 {
1054 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1055 	struct ip_tunnel_parm parms;
1056 	unsigned int i;
1057 
1058 	itn->rtnl_link_ops = ops;
1059 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1060 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1061 
1062 	if (!ops || !net_has_fallback_tunnels(net)) {
1063 		struct ip_tunnel_net *it_init_net;
1064 
1065 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1066 		itn->type = it_init_net->type;
1067 		itn->fb_tunnel_dev = NULL;
1068 		return 0;
1069 	}
1070 
1071 	memset(&parms, 0, sizeof(parms));
1072 	if (devname)
1073 		strlcpy(parms.name, devname, IFNAMSIZ);
1074 
1075 	rtnl_lock();
1076 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1077 	/* FB netdevice is special: we have one, and only one per netns.
1078 	 * Allowing to move it to another netns is clearly unsafe.
1079 	 */
1080 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1081 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1082 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1083 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1084 		itn->type = itn->fb_tunnel_dev->type;
1085 	}
1086 	rtnl_unlock();
1087 
1088 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1089 }
1090 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1091 
ip_tunnel_destroy(struct net * net,struct ip_tunnel_net * itn,struct list_head * head,struct rtnl_link_ops * ops)1092 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1093 			      struct list_head *head,
1094 			      struct rtnl_link_ops *ops)
1095 {
1096 	struct net_device *dev, *aux;
1097 	int h;
1098 
1099 	for_each_netdev_safe(net, dev, aux)
1100 		if (dev->rtnl_link_ops == ops)
1101 			unregister_netdevice_queue(dev, head);
1102 
1103 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1104 		struct ip_tunnel *t;
1105 		struct hlist_node *n;
1106 		struct hlist_head *thead = &itn->tunnels[h];
1107 
1108 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1109 			/* If dev is in the same netns, it has already
1110 			 * been added to the list by the previous loop.
1111 			 */
1112 			if (!net_eq(dev_net(t->dev), net))
1113 				unregister_netdevice_queue(t->dev, head);
1114 	}
1115 }
1116 
ip_tunnel_delete_nets(struct list_head * net_list,unsigned int id,struct rtnl_link_ops * ops)1117 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1118 			   struct rtnl_link_ops *ops)
1119 {
1120 	struct ip_tunnel_net *itn;
1121 	struct net *net;
1122 	LIST_HEAD(list);
1123 
1124 	rtnl_lock();
1125 	list_for_each_entry(net, net_list, exit_list) {
1126 		itn = net_generic(net, id);
1127 		ip_tunnel_destroy(net, itn, &list, ops);
1128 	}
1129 	unregister_netdevice_many(&list);
1130 	rtnl_unlock();
1131 }
1132 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1133 
ip_tunnel_newlink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1134 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1135 		      struct ip_tunnel_parm *p, __u32 fwmark)
1136 {
1137 	struct ip_tunnel *nt;
1138 	struct net *net = dev_net(dev);
1139 	struct ip_tunnel_net *itn;
1140 	int mtu;
1141 	int err;
1142 
1143 	nt = netdev_priv(dev);
1144 	itn = net_generic(net, nt->ip_tnl_net_id);
1145 
1146 	if (nt->collect_md) {
1147 		if (rtnl_dereference(itn->collect_md_tun))
1148 			return -EEXIST;
1149 	} else {
1150 		if (ip_tunnel_find(itn, p, dev->type))
1151 			return -EEXIST;
1152 	}
1153 
1154 	nt->net = net;
1155 	nt->parms = *p;
1156 	nt->fwmark = fwmark;
1157 	err = register_netdevice(dev);
1158 	if (err)
1159 		goto err_register_netdevice;
1160 
1161 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1162 		eth_hw_addr_random(dev);
1163 
1164 	mtu = ip_tunnel_bind_dev(dev);
1165 	if (tb[IFLA_MTU]) {
1166 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1167 
1168 		if (dev->type == ARPHRD_ETHER)
1169 			max -= dev->hard_header_len;
1170 
1171 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1172 	}
1173 
1174 	err = dev_set_mtu(dev, mtu);
1175 	if (err)
1176 		goto err_dev_set_mtu;
1177 
1178 	ip_tunnel_add(itn, nt);
1179 	return 0;
1180 
1181 err_dev_set_mtu:
1182 	unregister_netdevice(dev);
1183 err_register_netdevice:
1184 	return err;
1185 }
1186 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1187 
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1188 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1189 			 struct ip_tunnel_parm *p, __u32 fwmark)
1190 {
1191 	struct ip_tunnel *t;
1192 	struct ip_tunnel *tunnel = netdev_priv(dev);
1193 	struct net *net = tunnel->net;
1194 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1195 
1196 	if (dev == itn->fb_tunnel_dev)
1197 		return -EINVAL;
1198 
1199 	t = ip_tunnel_find(itn, p, dev->type);
1200 
1201 	if (t) {
1202 		if (t->dev != dev)
1203 			return -EEXIST;
1204 	} else {
1205 		t = tunnel;
1206 
1207 		if (dev->type != ARPHRD_ETHER) {
1208 			unsigned int nflags = 0;
1209 
1210 			if (ipv4_is_multicast(p->iph.daddr))
1211 				nflags = IFF_BROADCAST;
1212 			else if (p->iph.daddr)
1213 				nflags = IFF_POINTOPOINT;
1214 
1215 			if ((dev->flags ^ nflags) &
1216 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1217 				return -EINVAL;
1218 		}
1219 	}
1220 
1221 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1222 	return 0;
1223 }
1224 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1225 
ip_tunnel_init(struct net_device * dev)1226 int ip_tunnel_init(struct net_device *dev)
1227 {
1228 	struct ip_tunnel *tunnel = netdev_priv(dev);
1229 	struct iphdr *iph = &tunnel->parms.iph;
1230 	int err;
1231 
1232 	dev->needs_free_netdev = true;
1233 	dev->priv_destructor = ip_tunnel_dev_free;
1234 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1235 	if (!dev->tstats)
1236 		return -ENOMEM;
1237 
1238 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1239 	if (err) {
1240 		free_percpu(dev->tstats);
1241 		return err;
1242 	}
1243 
1244 	err = gro_cells_init(&tunnel->gro_cells, dev);
1245 	if (err) {
1246 		dst_cache_destroy(&tunnel->dst_cache);
1247 		free_percpu(dev->tstats);
1248 		return err;
1249 	}
1250 
1251 	tunnel->dev = dev;
1252 	tunnel->net = dev_net(dev);
1253 	strcpy(tunnel->parms.name, dev->name);
1254 	iph->version		= 4;
1255 	iph->ihl		= 5;
1256 
1257 	if (tunnel->collect_md)
1258 		netif_keep_dst(dev);
1259 	return 0;
1260 }
1261 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1262 
ip_tunnel_uninit(struct net_device * dev)1263 void ip_tunnel_uninit(struct net_device *dev)
1264 {
1265 	struct ip_tunnel *tunnel = netdev_priv(dev);
1266 	struct net *net = tunnel->net;
1267 	struct ip_tunnel_net *itn;
1268 
1269 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1270 	ip_tunnel_del(itn, netdev_priv(dev));
1271 	if (itn->fb_tunnel_dev == dev)
1272 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1273 
1274 	dst_cache_reset(&tunnel->dst_cache);
1275 }
1276 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1277 
1278 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,unsigned int net_id)1279 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1280 {
1281 	struct ip_tunnel *tunnel = netdev_priv(dev);
1282 	tunnel->ip_tnl_net_id = net_id;
1283 }
1284 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1285 
1286 MODULE_LICENSE("GPL");
1287