• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux NET3:	GRE over IP protocol decoder.
4  *
5  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/gre.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
47 
48 /*
49    Problems & solutions
50    --------------------
51 
52    1. The most important issue is detecting local dead loops.
53    They would cause complete host lockup in transmit, which
54    would be "resolved" by stack overflow or, if queueing is enabled,
55    with infinite looping in net_bh.
56 
57    We cannot track such dead loops during route installation,
58    it is infeasible task. The most general solutions would be
59    to keep skb->encapsulation counter (sort of local ttl),
60    and silently drop packet when it expires. It is a good
61    solution, but it supposes maintaining new variable in ALL
62    skb, even if no tunneling is used.
63 
64    Current solution: xmit_recursion breaks dead loops. This is a percpu
65    counter, since when we enter the first ndo_xmit(), cpu migration is
66    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
67 
68    2. Networking dead loops would not kill routers, but would really
69    kill network. IP hop limit plays role of "t->recursion" in this case,
70    if we copy it from packet being encapsulated to upper header.
71    It is very good solution, but it introduces two problems:
72 
73    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74      do not work over tunnels.
75    - traceroute does not work. I planned to relay ICMP from tunnel,
76      so that this problem would be solved and traceroute output
77      would even more informative. This idea appeared to be wrong:
78      only Linux complies to rfc1812 now (yes, guys, Linux is the only
79      true router now :-)), all routers (at least, in neighbourhood of mine)
80      return only 8 bytes of payload. It is the end.
81 
82    Hence, if we want that OSPF worked or traceroute said something reasonable,
83    we should search for another solution.
84 
85    One of them is to parse packet trying to detect inner encapsulation
86    made by our node. It is difficult or even impossible, especially,
87    taking into account fragmentation. TO be short, ttl is not solution at all.
88 
89    Current solution: The solution was UNEXPECTEDLY SIMPLE.
90    We force DF flag on tunnels with preconfigured hop limit,
91    that is ALL. :-) Well, it does not remove the problem completely,
92    but exponential growth of network traffic is changed to linear
93    (branches, that exceed pmtu are pruned) and tunnel mtu
94    rapidly degrades to value <68, where looping stops.
95    Yes, it is not good if there exists a router in the loop,
96    which does not force DF, even when encapsulating packets have DF set.
97    But it is not our problem! Nobody could accuse us, we made
98    all that we could make. Even if it is your gated who injected
99    fatal route to network, even if it were you who configured
100    fatal static route: you are innocent. :-)
101 
102    Alexey Kuznetsov.
103  */
104 
105 static bool log_ecn_error = true;
106 module_param(log_ecn_error, bool, 0644);
107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
108 
109 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
110 static const struct header_ops ipgre_header_ops;
111 
112 static int ipgre_tunnel_init(struct net_device *dev);
113 static void erspan_build_header(struct sk_buff *skb,
114 				u32 id, u32 index,
115 				bool truncate, bool is_ipv4);
116 
117 static unsigned int ipgre_net_id __read_mostly;
118 static unsigned int gre_tap_net_id __read_mostly;
119 static unsigned int erspan_net_id __read_mostly;
120 
ipgre_err(struct sk_buff * skb,u32 info,const struct tnl_ptk_info * tpi)121 static int ipgre_err(struct sk_buff *skb, u32 info,
122 		     const struct tnl_ptk_info *tpi)
123 {
124 
125 	/* All the routers (except for Linux) return only
126 	   8 bytes of packet payload. It means, that precise relaying of
127 	   ICMP in the real Internet is absolutely infeasible.
128 
129 	   Moreover, Cisco "wise men" put GRE key to the third word
130 	   in GRE header. It makes impossible maintaining even soft
131 	   state for keyed GRE tunnels with enabled checksum. Tell
132 	   them "thank you".
133 
134 	   Well, I wonder, rfc1812 was written by Cisco employee,
135 	   what the hell these idiots break standards established
136 	   by themselves???
137 	   */
138 	struct net *net = dev_net(skb->dev);
139 	struct ip_tunnel_net *itn;
140 	const struct iphdr *iph;
141 	const int type = icmp_hdr(skb)->type;
142 	const int code = icmp_hdr(skb)->code;
143 	unsigned int data_len = 0;
144 	struct ip_tunnel *t;
145 
146 	if (tpi->proto == htons(ETH_P_TEB))
147 		itn = net_generic(net, gre_tap_net_id);
148 	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149 		 tpi->proto == htons(ETH_P_ERSPAN2))
150 		itn = net_generic(net, erspan_net_id);
151 	else
152 		itn = net_generic(net, ipgre_net_id);
153 
154 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156 			     iph->daddr, iph->saddr, tpi->key);
157 
158 	if (!t)
159 		return -ENOENT;
160 
161 	switch (type) {
162 	default:
163 	case ICMP_PARAMETERPROB:
164 		return 0;
165 
166 	case ICMP_DEST_UNREACH:
167 		switch (code) {
168 		case ICMP_SR_FAILED:
169 		case ICMP_PORT_UNREACH:
170 			/* Impossible event. */
171 			return 0;
172 		default:
173 			/* All others are translated to HOST_UNREACH.
174 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
175 			   I believe they are just ether pollution. --ANK
176 			 */
177 			break;
178 		}
179 		break;
180 
181 	case ICMP_TIME_EXCEEDED:
182 		if (code != ICMP_EXC_TTL)
183 			return 0;
184 		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
185 		break;
186 
187 	case ICMP_REDIRECT:
188 		break;
189 	}
190 
191 #if IS_ENABLED(CONFIG_IPV6)
192 	if (tpi->proto == htons(ETH_P_IPV6) &&
193 	    !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
194 					type, data_len))
195 		return 0;
196 #endif
197 
198 	if (t->parms.iph.daddr == 0 ||
199 	    ipv4_is_multicast(t->parms.iph.daddr))
200 		return 0;
201 
202 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
203 		return 0;
204 
205 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
206 		t->err_count++;
207 	else
208 		t->err_count = 1;
209 	t->err_time = jiffies;
210 
211 	return 0;
212 }
213 
gre_err(struct sk_buff * skb,u32 info)214 static void gre_err(struct sk_buff *skb, u32 info)
215 {
216 	/* All the routers (except for Linux) return only
217 	 * 8 bytes of packet payload. It means, that precise relaying of
218 	 * ICMP in the real Internet is absolutely infeasible.
219 	 *
220 	 * Moreover, Cisco "wise men" put GRE key to the third word
221 	 * in GRE header. It makes impossible maintaining even soft
222 	 * state for keyed
223 	 * GRE tunnels with enabled checksum. Tell them "thank you".
224 	 *
225 	 * Well, I wonder, rfc1812 was written by Cisco employee,
226 	 * what the hell these idiots break standards established
227 	 * by themselves???
228 	 */
229 
230 	const struct iphdr *iph = (struct iphdr *)skb->data;
231 	const int type = icmp_hdr(skb)->type;
232 	const int code = icmp_hdr(skb)->code;
233 	struct tnl_ptk_info tpi;
234 
235 	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
236 			     iph->ihl * 4) < 0)
237 		return;
238 
239 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
240 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
241 				 skb->dev->ifindex, IPPROTO_GRE);
242 		return;
243 	}
244 	if (type == ICMP_REDIRECT) {
245 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
246 			      IPPROTO_GRE);
247 		return;
248 	}
249 
250 	ipgre_err(skb, info, &tpi);
251 }
252 
is_erspan_type1(int gre_hdr_len)253 static bool is_erspan_type1(int gre_hdr_len)
254 {
255 	/* Both ERSPAN type I (version 0) and type II (version 1) use
256 	 * protocol 0x88BE, but the type I has only 4-byte GRE header,
257 	 * while type II has 8-byte.
258 	 */
259 	return gre_hdr_len == 4;
260 }
261 
erspan_rcv(struct sk_buff * skb,struct tnl_ptk_info * tpi,int gre_hdr_len)262 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
263 		      int gre_hdr_len)
264 {
265 	struct net *net = dev_net(skb->dev);
266 	struct metadata_dst *tun_dst = NULL;
267 	struct erspan_base_hdr *ershdr;
268 	struct ip_tunnel_net *itn;
269 	struct ip_tunnel *tunnel;
270 	const struct iphdr *iph;
271 	struct erspan_md2 *md2;
272 	int ver;
273 	int len;
274 
275 	itn = net_generic(net, erspan_net_id);
276 	iph = ip_hdr(skb);
277 	if (is_erspan_type1(gre_hdr_len)) {
278 		ver = 0;
279 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
280 					  tpi->flags | TUNNEL_NO_KEY,
281 					  iph->saddr, iph->daddr, 0);
282 	} else {
283 		if (unlikely(!pskb_may_pull(skb,
284 					    gre_hdr_len + sizeof(*ershdr))))
285 			return PACKET_REJECT;
286 
287 		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
288 		ver = ershdr->ver;
289 		iph = ip_hdr(skb);
290 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
291 					  tpi->flags | TUNNEL_KEY,
292 					  iph->saddr, iph->daddr, tpi->key);
293 	}
294 
295 	if (tunnel) {
296 		if (is_erspan_type1(gre_hdr_len))
297 			len = gre_hdr_len;
298 		else
299 			len = gre_hdr_len + erspan_hdr_len(ver);
300 
301 		if (unlikely(!pskb_may_pull(skb, len)))
302 			return PACKET_REJECT;
303 
304 		if (__iptunnel_pull_header(skb,
305 					   len,
306 					   htons(ETH_P_TEB),
307 					   false, false) < 0)
308 			goto drop;
309 
310 		if (tunnel->collect_md) {
311 			struct erspan_metadata *pkt_md, *md;
312 			struct ip_tunnel_info *info;
313 			unsigned char *gh;
314 			__be64 tun_id;
315 			__be16 flags;
316 
317 			tpi->flags |= TUNNEL_KEY;
318 			flags = tpi->flags;
319 			tun_id = key32_to_tunnel_id(tpi->key);
320 
321 			tun_dst = ip_tun_rx_dst(skb, flags,
322 						tun_id, sizeof(*md));
323 			if (!tun_dst)
324 				return PACKET_REJECT;
325 
326 			/* skb can be uncloned in __iptunnel_pull_header, so
327 			 * old pkt_md is no longer valid and we need to reset
328 			 * it
329 			 */
330 			gh = skb_network_header(skb) +
331 			     skb_network_header_len(skb);
332 			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
333 							    sizeof(*ershdr));
334 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
335 			md->version = ver;
336 			md2 = &md->u.md2;
337 			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
338 						       ERSPAN_V2_MDSIZE);
339 
340 			info = &tun_dst->u.tun_info;
341 			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
342 			info->options_len = sizeof(*md);
343 		}
344 
345 		skb_reset_mac_header(skb);
346 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
347 		return PACKET_RCVD;
348 	}
349 	return PACKET_REJECT;
350 
351 drop:
352 	kfree_skb(skb);
353 	return PACKET_RCVD;
354 }
355 
__ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct ip_tunnel_net * itn,int hdr_len,bool raw_proto)356 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
357 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
358 {
359 	struct metadata_dst *tun_dst = NULL;
360 	const struct iphdr *iph;
361 	struct ip_tunnel *tunnel;
362 
363 	iph = ip_hdr(skb);
364 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
365 				  iph->saddr, iph->daddr, tpi->key);
366 
367 	if (tunnel) {
368 		const struct iphdr *tnl_params;
369 
370 		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
371 					   raw_proto, false) < 0)
372 			goto drop;
373 
374 		/* Special case for ipgre_header_parse(), which expects the
375 		 * mac_header to point to the outer IP header.
376 		 */
377 		if (tunnel->dev->header_ops == &ipgre_header_ops)
378 			skb_pop_mac_header(skb);
379 		else
380 			skb_reset_mac_header(skb);
381 
382 		tnl_params = &tunnel->parms.iph;
383 		if (tunnel->collect_md || tnl_params->daddr == 0) {
384 			__be16 flags;
385 			__be64 tun_id;
386 
387 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
388 			tun_id = key32_to_tunnel_id(tpi->key);
389 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
390 			if (!tun_dst)
391 				return PACKET_REJECT;
392 		}
393 
394 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
395 		return PACKET_RCVD;
396 	}
397 	return PACKET_NEXT;
398 
399 drop:
400 	kfree_skb(skb);
401 	return PACKET_RCVD;
402 }
403 
ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,int hdr_len)404 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
405 		     int hdr_len)
406 {
407 	struct net *net = dev_net(skb->dev);
408 	struct ip_tunnel_net *itn;
409 	int res;
410 
411 	if (tpi->proto == htons(ETH_P_TEB))
412 		itn = net_generic(net, gre_tap_net_id);
413 	else
414 		itn = net_generic(net, ipgre_net_id);
415 
416 	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
417 	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
418 		/* ipgre tunnels in collect metadata mode should receive
419 		 * also ETH_P_TEB traffic.
420 		 */
421 		itn = net_generic(net, ipgre_net_id);
422 		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
423 	}
424 	return res;
425 }
426 
gre_rcv(struct sk_buff * skb)427 static int gre_rcv(struct sk_buff *skb)
428 {
429 	struct tnl_ptk_info tpi;
430 	bool csum_err = false;
431 	int hdr_len;
432 
433 #ifdef CONFIG_NET_IPGRE_BROADCAST
434 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
435 		/* Looped back packet, drop it! */
436 		if (rt_is_output_route(skb_rtable(skb)))
437 			goto drop;
438 	}
439 #endif
440 
441 	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
442 	if (hdr_len < 0)
443 		goto drop;
444 
445 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
446 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
447 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
448 			return 0;
449 		goto out;
450 	}
451 
452 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
453 		return 0;
454 
455 out:
456 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
457 drop:
458 	kfree_skb(skb);
459 	return 0;
460 }
461 
__gre_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,__be16 proto)462 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
463 		       const struct iphdr *tnl_params,
464 		       __be16 proto)
465 {
466 	struct ip_tunnel *tunnel = netdev_priv(dev);
467 	__be16 flags = tunnel->parms.o_flags;
468 
469 	/* Push GRE header. */
470 	gre_build_header(skb, tunnel->tun_hlen,
471 			 flags, proto, tunnel->parms.o_key,
472 			 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
473 
474 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
475 }
476 
gre_handle_offloads(struct sk_buff * skb,bool csum)477 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
478 {
479 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
480 }
481 
gre_fb_xmit(struct sk_buff * skb,struct net_device * dev,__be16 proto)482 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
483 			__be16 proto)
484 {
485 	struct ip_tunnel *tunnel = netdev_priv(dev);
486 	struct ip_tunnel_info *tun_info;
487 	const struct ip_tunnel_key *key;
488 	int tunnel_hlen;
489 	__be16 flags;
490 
491 	tun_info = skb_tunnel_info(skb);
492 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
493 		     ip_tunnel_info_af(tun_info) != AF_INET))
494 		goto err_free_skb;
495 
496 	key = &tun_info->key;
497 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
498 
499 	if (skb_cow_head(skb, dev->needed_headroom))
500 		goto err_free_skb;
501 
502 	/* Push Tunnel header. */
503 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
504 		goto err_free_skb;
505 
506 	flags = tun_info->key.tun_flags &
507 		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
508 	gre_build_header(skb, tunnel_hlen, flags, proto,
509 			 tunnel_id_to_key32(tun_info->key.tun_id),
510 			 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
511 
512 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
513 
514 	return;
515 
516 err_free_skb:
517 	kfree_skb(skb);
518 	DEV_STATS_INC(dev, tx_dropped);
519 }
520 
erspan_fb_xmit(struct sk_buff * skb,struct net_device * dev)521 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
522 {
523 	struct ip_tunnel *tunnel = netdev_priv(dev);
524 	struct ip_tunnel_info *tun_info;
525 	const struct ip_tunnel_key *key;
526 	struct erspan_metadata *md;
527 	bool truncate = false;
528 	__be16 proto;
529 	int tunnel_hlen;
530 	int version;
531 	int nhoff;
532 
533 	tun_info = skb_tunnel_info(skb);
534 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
535 		     ip_tunnel_info_af(tun_info) != AF_INET))
536 		goto err_free_skb;
537 
538 	key = &tun_info->key;
539 	if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
540 		goto err_free_skb;
541 	if (tun_info->options_len < sizeof(*md))
542 		goto err_free_skb;
543 	md = ip_tunnel_info_opts(tun_info);
544 
545 	/* ERSPAN has fixed 8 byte GRE header */
546 	version = md->version;
547 	tunnel_hlen = 8 + erspan_hdr_len(version);
548 
549 	if (skb_cow_head(skb, dev->needed_headroom))
550 		goto err_free_skb;
551 
552 	if (gre_handle_offloads(skb, false))
553 		goto err_free_skb;
554 
555 	if (skb->len > dev->mtu + dev->hard_header_len) {
556 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
557 			goto err_free_skb;
558 		truncate = true;
559 	}
560 
561 	nhoff = skb_network_offset(skb);
562 	if (skb->protocol == htons(ETH_P_IP) &&
563 	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
564 		truncate = true;
565 
566 	if (skb->protocol == htons(ETH_P_IPV6)) {
567 		int thoff;
568 
569 		if (skb_transport_header_was_set(skb))
570 			thoff = skb_transport_offset(skb);
571 		else
572 			thoff = nhoff + sizeof(struct ipv6hdr);
573 		if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
574 			truncate = true;
575 	}
576 
577 	if (version == 1) {
578 		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
579 				    ntohl(md->u.index), truncate, true);
580 		proto = htons(ETH_P_ERSPAN);
581 	} else if (version == 2) {
582 		erspan_build_header_v2(skb,
583 				       ntohl(tunnel_id_to_key32(key->tun_id)),
584 				       md->u.md2.dir,
585 				       get_hwid(&md->u.md2),
586 				       truncate, true);
587 		proto = htons(ETH_P_ERSPAN2);
588 	} else {
589 		goto err_free_skb;
590 	}
591 
592 	gre_build_header(skb, 8, TUNNEL_SEQ,
593 			 proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
594 
595 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
596 
597 	return;
598 
599 err_free_skb:
600 	kfree_skb(skb);
601 	DEV_STATS_INC(dev, tx_dropped);
602 }
603 
gre_fill_metadata_dst(struct net_device * dev,struct sk_buff * skb)604 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
605 {
606 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
607 	const struct ip_tunnel_key *key;
608 	struct rtable *rt;
609 	struct flowi4 fl4;
610 
611 	if (ip_tunnel_info_af(info) != AF_INET)
612 		return -EINVAL;
613 
614 	key = &info->key;
615 	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
616 			    tunnel_id_to_key32(key->tun_id),
617 			    key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
618 			    skb->mark, skb_get_hash(skb), key->flow_flags);
619 	rt = ip_route_output_key(dev_net(dev), &fl4);
620 	if (IS_ERR(rt))
621 		return PTR_ERR(rt);
622 
623 	ip_rt_put(rt);
624 	info->key.u.ipv4.src = fl4.saddr;
625 	return 0;
626 }
627 
ipgre_xmit(struct sk_buff * skb,struct net_device * dev)628 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
629 			      struct net_device *dev)
630 {
631 	struct ip_tunnel *tunnel = netdev_priv(dev);
632 	const struct iphdr *tnl_params;
633 
634 	if (!pskb_inet_may_pull(skb))
635 		goto free_skb;
636 
637 	if (tunnel->collect_md) {
638 		gre_fb_xmit(skb, dev, skb->protocol);
639 		return NETDEV_TX_OK;
640 	}
641 
642 	if (dev->header_ops) {
643 		int pull_len = tunnel->hlen + sizeof(struct iphdr);
644 
645 		if (skb_cow_head(skb, 0))
646 			goto free_skb;
647 
648 		tnl_params = (const struct iphdr *)skb->data;
649 
650 		if (!pskb_network_may_pull(skb, pull_len))
651 			goto free_skb;
652 
653 		/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
654 		skb_pull(skb, pull_len);
655 		skb_reset_mac_header(skb);
656 
657 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
658 		    skb_checksum_start(skb) < skb->data)
659 			goto free_skb;
660 	} else {
661 		if (skb_cow_head(skb, dev->needed_headroom))
662 			goto free_skb;
663 
664 		tnl_params = &tunnel->parms.iph;
665 	}
666 
667 	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
668 		goto free_skb;
669 
670 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
671 	return NETDEV_TX_OK;
672 
673 free_skb:
674 	kfree_skb(skb);
675 	DEV_STATS_INC(dev, tx_dropped);
676 	return NETDEV_TX_OK;
677 }
678 
erspan_xmit(struct sk_buff * skb,struct net_device * dev)679 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
680 			       struct net_device *dev)
681 {
682 	struct ip_tunnel *tunnel = netdev_priv(dev);
683 	bool truncate = false;
684 	__be16 proto;
685 
686 	if (!pskb_inet_may_pull(skb))
687 		goto free_skb;
688 
689 	if (tunnel->collect_md) {
690 		erspan_fb_xmit(skb, dev);
691 		return NETDEV_TX_OK;
692 	}
693 
694 	if (gre_handle_offloads(skb, false))
695 		goto free_skb;
696 
697 	if (skb_cow_head(skb, dev->needed_headroom))
698 		goto free_skb;
699 
700 	if (skb->len > dev->mtu + dev->hard_header_len) {
701 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
702 			goto free_skb;
703 		truncate = true;
704 	}
705 
706 	/* Push ERSPAN header */
707 	if (tunnel->erspan_ver == 0) {
708 		proto = htons(ETH_P_ERSPAN);
709 		tunnel->parms.o_flags &= ~TUNNEL_SEQ;
710 	} else if (tunnel->erspan_ver == 1) {
711 		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
712 				    tunnel->index,
713 				    truncate, true);
714 		proto = htons(ETH_P_ERSPAN);
715 	} else if (tunnel->erspan_ver == 2) {
716 		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
717 				       tunnel->dir, tunnel->hwid,
718 				       truncate, true);
719 		proto = htons(ETH_P_ERSPAN2);
720 	} else {
721 		goto free_skb;
722 	}
723 
724 	tunnel->parms.o_flags &= ~TUNNEL_KEY;
725 	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
726 	return NETDEV_TX_OK;
727 
728 free_skb:
729 	kfree_skb(skb);
730 	DEV_STATS_INC(dev, tx_dropped);
731 	return NETDEV_TX_OK;
732 }
733 
gre_tap_xmit(struct sk_buff * skb,struct net_device * dev)734 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
735 				struct net_device *dev)
736 {
737 	struct ip_tunnel *tunnel = netdev_priv(dev);
738 
739 	if (!pskb_inet_may_pull(skb))
740 		goto free_skb;
741 
742 	if (tunnel->collect_md) {
743 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
744 		return NETDEV_TX_OK;
745 	}
746 
747 	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
748 		goto free_skb;
749 
750 	if (skb_cow_head(skb, dev->needed_headroom))
751 		goto free_skb;
752 
753 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
754 	return NETDEV_TX_OK;
755 
756 free_skb:
757 	kfree_skb(skb);
758 	DEV_STATS_INC(dev, tx_dropped);
759 	return NETDEV_TX_OK;
760 }
761 
ipgre_link_update(struct net_device * dev,bool set_mtu)762 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
763 {
764 	struct ip_tunnel *tunnel = netdev_priv(dev);
765 	__be16 flags;
766 	int len;
767 
768 	len = tunnel->tun_hlen;
769 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
770 	len = tunnel->tun_hlen - len;
771 	tunnel->hlen = tunnel->hlen + len;
772 
773 	if (dev->header_ops)
774 		dev->hard_header_len += len;
775 	else
776 		dev->needed_headroom += len;
777 
778 	if (set_mtu)
779 		dev->mtu = max_t(int, dev->mtu - len, 68);
780 
781 	flags = tunnel->parms.o_flags;
782 
783 	if (flags & TUNNEL_SEQ ||
784 	    (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
785 		dev->features &= ~NETIF_F_GSO_SOFTWARE;
786 		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
787 	} else {
788 		dev->features |= NETIF_F_GSO_SOFTWARE;
789 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
790 	}
791 }
792 
ipgre_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)793 static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
794 			    int cmd)
795 {
796 	int err;
797 
798 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
799 		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
800 		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
801 		    ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
802 			return -EINVAL;
803 	}
804 
805 	p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
806 	p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
807 
808 	err = ip_tunnel_ctl(dev, p, cmd);
809 	if (err)
810 		return err;
811 
812 	if (cmd == SIOCCHGTUNNEL) {
813 		struct ip_tunnel *t = netdev_priv(dev);
814 
815 		t->parms.i_flags = p->i_flags;
816 		t->parms.o_flags = p->o_flags;
817 
818 		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
819 			ipgre_link_update(dev, true);
820 	}
821 
822 	p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
823 	p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
824 	return 0;
825 }
826 
827 /* Nice toy. Unfortunately, useless in real life :-)
828    It allows to construct virtual multiprotocol broadcast "LAN"
829    over the Internet, provided multicast routing is tuned.
830 
831 
832    I have no idea was this bicycle invented before me,
833    so that I had to set ARPHRD_IPGRE to a random value.
834    I have an impression, that Cisco could make something similar,
835    but this feature is apparently missing in IOS<=11.2(8).
836 
837    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
838    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
839 
840    ping -t 255 224.66.66.66
841 
842    If nobody answers, mbone does not work.
843 
844    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
845    ip addr add 10.66.66.<somewhat>/24 dev Universe
846    ifconfig Universe up
847    ifconfig Universe add fe80::<Your_real_addr>/10
848    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
849    ftp 10.66.66.66
850    ...
851    ftp fec0:6666:6666::193.233.7.65
852    ...
853  */
ipgre_header(struct sk_buff * skb,struct net_device * dev,unsigned short type,const void * daddr,const void * saddr,unsigned int len)854 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
855 			unsigned short type,
856 			const void *daddr, const void *saddr, unsigned int len)
857 {
858 	struct ip_tunnel *t = netdev_priv(dev);
859 	struct iphdr *iph;
860 	struct gre_base_hdr *greh;
861 
862 	iph = skb_push(skb, t->hlen + sizeof(*iph));
863 	greh = (struct gre_base_hdr *)(iph+1);
864 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
865 	greh->protocol = htons(type);
866 
867 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
868 
869 	/* Set the source hardware address. */
870 	if (saddr)
871 		memcpy(&iph->saddr, saddr, 4);
872 	if (daddr)
873 		memcpy(&iph->daddr, daddr, 4);
874 	if (iph->daddr)
875 		return t->hlen + sizeof(*iph);
876 
877 	return -(t->hlen + sizeof(*iph));
878 }
879 
ipgre_header_parse(const struct sk_buff * skb,unsigned char * haddr)880 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
881 {
882 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
883 	memcpy(haddr, &iph->saddr, 4);
884 	return 4;
885 }
886 
887 static const struct header_ops ipgre_header_ops = {
888 	.create	= ipgre_header,
889 	.parse	= ipgre_header_parse,
890 };
891 
892 #ifdef CONFIG_NET_IPGRE_BROADCAST
ipgre_open(struct net_device * dev)893 static int ipgre_open(struct net_device *dev)
894 {
895 	struct ip_tunnel *t = netdev_priv(dev);
896 
897 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
898 		struct flowi4 fl4;
899 		struct rtable *rt;
900 
901 		rt = ip_route_output_gre(t->net, &fl4,
902 					 t->parms.iph.daddr,
903 					 t->parms.iph.saddr,
904 					 t->parms.o_key,
905 					 RT_TOS(t->parms.iph.tos),
906 					 t->parms.link);
907 		if (IS_ERR(rt))
908 			return -EADDRNOTAVAIL;
909 		dev = rt->dst.dev;
910 		ip_rt_put(rt);
911 		if (!__in_dev_get_rtnl(dev))
912 			return -EADDRNOTAVAIL;
913 		t->mlink = dev->ifindex;
914 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
915 	}
916 	return 0;
917 }
918 
ipgre_close(struct net_device * dev)919 static int ipgre_close(struct net_device *dev)
920 {
921 	struct ip_tunnel *t = netdev_priv(dev);
922 
923 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
924 		struct in_device *in_dev;
925 		in_dev = inetdev_by_index(t->net, t->mlink);
926 		if (in_dev)
927 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
928 	}
929 	return 0;
930 }
931 #endif
932 
933 static const struct net_device_ops ipgre_netdev_ops = {
934 	.ndo_init		= ipgre_tunnel_init,
935 	.ndo_uninit		= ip_tunnel_uninit,
936 #ifdef CONFIG_NET_IPGRE_BROADCAST
937 	.ndo_open		= ipgre_open,
938 	.ndo_stop		= ipgre_close,
939 #endif
940 	.ndo_start_xmit		= ipgre_xmit,
941 	.ndo_siocdevprivate	= ip_tunnel_siocdevprivate,
942 	.ndo_change_mtu		= ip_tunnel_change_mtu,
943 	.ndo_get_stats64	= dev_get_tstats64,
944 	.ndo_get_iflink		= ip_tunnel_get_iflink,
945 	.ndo_tunnel_ctl		= ipgre_tunnel_ctl,
946 };
947 
948 #define GRE_FEATURES (NETIF_F_SG |		\
949 		      NETIF_F_FRAGLIST |	\
950 		      NETIF_F_HIGHDMA |		\
951 		      NETIF_F_HW_CSUM)
952 
ipgre_tunnel_setup(struct net_device * dev)953 static void ipgre_tunnel_setup(struct net_device *dev)
954 {
955 	dev->netdev_ops		= &ipgre_netdev_ops;
956 	dev->type		= ARPHRD_IPGRE;
957 	ip_tunnel_setup(dev, ipgre_net_id);
958 }
959 
__gre_tunnel_init(struct net_device * dev)960 static void __gre_tunnel_init(struct net_device *dev)
961 {
962 	struct ip_tunnel *tunnel;
963 	__be16 flags;
964 
965 	tunnel = netdev_priv(dev);
966 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
967 	tunnel->parms.iph.protocol = IPPROTO_GRE;
968 
969 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
970 	dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
971 
972 	dev->features		|= GRE_FEATURES | NETIF_F_LLTX;
973 	dev->hw_features	|= GRE_FEATURES;
974 
975 	flags = tunnel->parms.o_flags;
976 
977 	/* TCP offload with GRE SEQ is not supported, nor can we support 2
978 	 * levels of outer headers requiring an update.
979 	 */
980 	if (flags & TUNNEL_SEQ)
981 		return;
982 	if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)
983 		return;
984 
985 	dev->features |= NETIF_F_GSO_SOFTWARE;
986 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
987 }
988 
ipgre_tunnel_init(struct net_device * dev)989 static int ipgre_tunnel_init(struct net_device *dev)
990 {
991 	struct ip_tunnel *tunnel = netdev_priv(dev);
992 	struct iphdr *iph = &tunnel->parms.iph;
993 
994 	__gre_tunnel_init(dev);
995 
996 	__dev_addr_set(dev, &iph->saddr, 4);
997 	memcpy(dev->broadcast, &iph->daddr, 4);
998 
999 	dev->flags		= IFF_NOARP;
1000 	netif_keep_dst(dev);
1001 	dev->addr_len		= 4;
1002 
1003 	if (iph->daddr && !tunnel->collect_md) {
1004 #ifdef CONFIG_NET_IPGRE_BROADCAST
1005 		if (ipv4_is_multicast(iph->daddr)) {
1006 			if (!iph->saddr)
1007 				return -EINVAL;
1008 			dev->flags = IFF_BROADCAST;
1009 			dev->header_ops = &ipgre_header_ops;
1010 			dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1011 			dev->needed_headroom = 0;
1012 		}
1013 #endif
1014 	} else if (!tunnel->collect_md) {
1015 		dev->header_ops = &ipgre_header_ops;
1016 		dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1017 		dev->needed_headroom = 0;
1018 	}
1019 
1020 	return ip_tunnel_init(dev);
1021 }
1022 
1023 static const struct gre_protocol ipgre_protocol = {
1024 	.handler     = gre_rcv,
1025 	.err_handler = gre_err,
1026 };
1027 
ipgre_init_net(struct net * net)1028 static int __net_init ipgre_init_net(struct net *net)
1029 {
1030 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1031 }
1032 
ipgre_exit_batch_net(struct list_head * list_net)1033 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1034 {
1035 	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1036 }
1037 
1038 static struct pernet_operations ipgre_net_ops = {
1039 	.init = ipgre_init_net,
1040 	.exit_batch = ipgre_exit_batch_net,
1041 	.id   = &ipgre_net_id,
1042 	.size = sizeof(struct ip_tunnel_net),
1043 };
1044 
ipgre_tunnel_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1045 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1046 				 struct netlink_ext_ack *extack)
1047 {
1048 	__be16 flags;
1049 
1050 	if (!data)
1051 		return 0;
1052 
1053 	flags = 0;
1054 	if (data[IFLA_GRE_IFLAGS])
1055 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1056 	if (data[IFLA_GRE_OFLAGS])
1057 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1058 	if (flags & (GRE_VERSION|GRE_ROUTING))
1059 		return -EINVAL;
1060 
1061 	if (data[IFLA_GRE_COLLECT_METADATA] &&
1062 	    data[IFLA_GRE_ENCAP_TYPE] &&
1063 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1064 		return -EINVAL;
1065 
1066 	return 0;
1067 }
1068 
ipgre_tap_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1069 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1070 			      struct netlink_ext_ack *extack)
1071 {
1072 	__be32 daddr;
1073 
1074 	if (tb[IFLA_ADDRESS]) {
1075 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1076 			return -EINVAL;
1077 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1078 			return -EADDRNOTAVAIL;
1079 	}
1080 
1081 	if (!data)
1082 		goto out;
1083 
1084 	if (data[IFLA_GRE_REMOTE]) {
1085 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1086 		if (!daddr)
1087 			return -EINVAL;
1088 	}
1089 
1090 out:
1091 	return ipgre_tunnel_validate(tb, data, extack);
1092 }
1093 
erspan_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1094 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1095 			   struct netlink_ext_ack *extack)
1096 {
1097 	__be16 flags = 0;
1098 	int ret;
1099 
1100 	if (!data)
1101 		return 0;
1102 
1103 	ret = ipgre_tap_validate(tb, data, extack);
1104 	if (ret)
1105 		return ret;
1106 
1107 	if (data[IFLA_GRE_ERSPAN_VER] &&
1108 	    nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1109 		return 0;
1110 
1111 	/* ERSPAN type II/III should only have GRE sequence and key flag */
1112 	if (data[IFLA_GRE_OFLAGS])
1113 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1114 	if (data[IFLA_GRE_IFLAGS])
1115 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1116 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1117 	    flags != (GRE_SEQ | GRE_KEY))
1118 		return -EINVAL;
1119 
1120 	/* ERSPAN Session ID only has 10-bit. Since we reuse
1121 	 * 32-bit key field as ID, check it's range.
1122 	 */
1123 	if (data[IFLA_GRE_IKEY] &&
1124 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1125 		return -EINVAL;
1126 
1127 	if (data[IFLA_GRE_OKEY] &&
1128 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1129 		return -EINVAL;
1130 
1131 	return 0;
1132 }
1133 
ipgre_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm * parms,__u32 * fwmark)1134 static int ipgre_netlink_parms(struct net_device *dev,
1135 				struct nlattr *data[],
1136 				struct nlattr *tb[],
1137 				struct ip_tunnel_parm *parms,
1138 				__u32 *fwmark)
1139 {
1140 	struct ip_tunnel *t = netdev_priv(dev);
1141 
1142 	memset(parms, 0, sizeof(*parms));
1143 
1144 	parms->iph.protocol = IPPROTO_GRE;
1145 
1146 	if (!data)
1147 		return 0;
1148 
1149 	if (data[IFLA_GRE_LINK])
1150 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1151 
1152 	if (data[IFLA_GRE_IFLAGS])
1153 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1154 
1155 	if (data[IFLA_GRE_OFLAGS])
1156 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1157 
1158 	if (data[IFLA_GRE_IKEY])
1159 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1160 
1161 	if (data[IFLA_GRE_OKEY])
1162 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1163 
1164 	if (data[IFLA_GRE_LOCAL])
1165 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1166 
1167 	if (data[IFLA_GRE_REMOTE])
1168 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1169 
1170 	if (data[IFLA_GRE_TTL])
1171 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1172 
1173 	if (data[IFLA_GRE_TOS])
1174 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1175 
1176 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1177 		if (t->ignore_df)
1178 			return -EINVAL;
1179 		parms->iph.frag_off = htons(IP_DF);
1180 	}
1181 
1182 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1183 		t->collect_md = true;
1184 		if (dev->type == ARPHRD_IPGRE)
1185 			dev->type = ARPHRD_NONE;
1186 	}
1187 
1188 	if (data[IFLA_GRE_IGNORE_DF]) {
1189 		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1190 		  && (parms->iph.frag_off & htons(IP_DF)))
1191 			return -EINVAL;
1192 		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1193 	}
1194 
1195 	if (data[IFLA_GRE_FWMARK])
1196 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1197 
1198 	return 0;
1199 }
1200 
erspan_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm * parms,__u32 * fwmark)1201 static int erspan_netlink_parms(struct net_device *dev,
1202 				struct nlattr *data[],
1203 				struct nlattr *tb[],
1204 				struct ip_tunnel_parm *parms,
1205 				__u32 *fwmark)
1206 {
1207 	struct ip_tunnel *t = netdev_priv(dev);
1208 	int err;
1209 
1210 	err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1211 	if (err)
1212 		return err;
1213 	if (!data)
1214 		return 0;
1215 
1216 	if (data[IFLA_GRE_ERSPAN_VER]) {
1217 		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1218 
1219 		if (t->erspan_ver > 2)
1220 			return -EINVAL;
1221 	}
1222 
1223 	if (t->erspan_ver == 1) {
1224 		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1225 			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1226 			if (t->index & ~INDEX_MASK)
1227 				return -EINVAL;
1228 		}
1229 	} else if (t->erspan_ver == 2) {
1230 		if (data[IFLA_GRE_ERSPAN_DIR]) {
1231 			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1232 			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1233 				return -EINVAL;
1234 		}
1235 		if (data[IFLA_GRE_ERSPAN_HWID]) {
1236 			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1237 			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1238 				return -EINVAL;
1239 		}
1240 	}
1241 
1242 	return 0;
1243 }
1244 
1245 /* This function returns true when ENCAP attributes are present in the nl msg */
ipgre_netlink_encap_parms(struct nlattr * data[],struct ip_tunnel_encap * ipencap)1246 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1247 				      struct ip_tunnel_encap *ipencap)
1248 {
1249 	bool ret = false;
1250 
1251 	memset(ipencap, 0, sizeof(*ipencap));
1252 
1253 	if (!data)
1254 		return ret;
1255 
1256 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1257 		ret = true;
1258 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1259 	}
1260 
1261 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1262 		ret = true;
1263 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1264 	}
1265 
1266 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1267 		ret = true;
1268 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1269 	}
1270 
1271 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1272 		ret = true;
1273 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1274 	}
1275 
1276 	return ret;
1277 }
1278 
gre_tap_init(struct net_device * dev)1279 static int gre_tap_init(struct net_device *dev)
1280 {
1281 	__gre_tunnel_init(dev);
1282 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1283 	netif_keep_dst(dev);
1284 
1285 	return ip_tunnel_init(dev);
1286 }
1287 
1288 static const struct net_device_ops gre_tap_netdev_ops = {
1289 	.ndo_init		= gre_tap_init,
1290 	.ndo_uninit		= ip_tunnel_uninit,
1291 	.ndo_start_xmit		= gre_tap_xmit,
1292 	.ndo_set_mac_address 	= eth_mac_addr,
1293 	.ndo_validate_addr	= eth_validate_addr,
1294 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1295 	.ndo_get_stats64	= dev_get_tstats64,
1296 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1297 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1298 };
1299 
erspan_tunnel_init(struct net_device * dev)1300 static int erspan_tunnel_init(struct net_device *dev)
1301 {
1302 	struct ip_tunnel *tunnel = netdev_priv(dev);
1303 
1304 	if (tunnel->erspan_ver == 0)
1305 		tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1306 	else
1307 		tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1308 
1309 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1310 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1311 		       erspan_hdr_len(tunnel->erspan_ver);
1312 
1313 	dev->features		|= GRE_FEATURES;
1314 	dev->hw_features	|= GRE_FEATURES;
1315 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1316 	netif_keep_dst(dev);
1317 
1318 	return ip_tunnel_init(dev);
1319 }
1320 
1321 static const struct net_device_ops erspan_netdev_ops = {
1322 	.ndo_init		= erspan_tunnel_init,
1323 	.ndo_uninit		= ip_tunnel_uninit,
1324 	.ndo_start_xmit		= erspan_xmit,
1325 	.ndo_set_mac_address	= eth_mac_addr,
1326 	.ndo_validate_addr	= eth_validate_addr,
1327 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1328 	.ndo_get_stats64	= dev_get_tstats64,
1329 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1330 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1331 };
1332 
ipgre_tap_setup(struct net_device * dev)1333 static void ipgre_tap_setup(struct net_device *dev)
1334 {
1335 	ether_setup(dev);
1336 	dev->max_mtu = 0;
1337 	dev->netdev_ops	= &gre_tap_netdev_ops;
1338 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1339 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1340 	ip_tunnel_setup(dev, gre_tap_net_id);
1341 }
1342 
1343 static int
ipgre_newlink_encap_setup(struct net_device * dev,struct nlattr * data[])1344 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1345 {
1346 	struct ip_tunnel_encap ipencap;
1347 
1348 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1349 		struct ip_tunnel *t = netdev_priv(dev);
1350 		int err = ip_tunnel_encap_setup(t, &ipencap);
1351 
1352 		if (err < 0)
1353 			return err;
1354 	}
1355 
1356 	return 0;
1357 }
1358 
ipgre_newlink(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1359 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1360 			 struct nlattr *tb[], struct nlattr *data[],
1361 			 struct netlink_ext_ack *extack)
1362 {
1363 	struct ip_tunnel_parm p;
1364 	__u32 fwmark = 0;
1365 	int err;
1366 
1367 	err = ipgre_newlink_encap_setup(dev, data);
1368 	if (err)
1369 		return err;
1370 
1371 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1372 	if (err < 0)
1373 		return err;
1374 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1375 }
1376 
erspan_newlink(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1377 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1378 			  struct nlattr *tb[], struct nlattr *data[],
1379 			  struct netlink_ext_ack *extack)
1380 {
1381 	struct ip_tunnel_parm p;
1382 	__u32 fwmark = 0;
1383 	int err;
1384 
1385 	err = ipgre_newlink_encap_setup(dev, data);
1386 	if (err)
1387 		return err;
1388 
1389 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1390 	if (err)
1391 		return err;
1392 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1393 }
1394 
ipgre_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1395 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1396 			    struct nlattr *data[],
1397 			    struct netlink_ext_ack *extack)
1398 {
1399 	struct ip_tunnel *t = netdev_priv(dev);
1400 	__u32 fwmark = t->fwmark;
1401 	struct ip_tunnel_parm p;
1402 	int err;
1403 
1404 	err = ipgre_newlink_encap_setup(dev, data);
1405 	if (err)
1406 		return err;
1407 
1408 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1409 	if (err < 0)
1410 		return err;
1411 
1412 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1413 	if (err < 0)
1414 		return err;
1415 
1416 	t->parms.i_flags = p.i_flags;
1417 	t->parms.o_flags = p.o_flags;
1418 
1419 	ipgre_link_update(dev, !tb[IFLA_MTU]);
1420 
1421 	return 0;
1422 }
1423 
erspan_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1424 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1425 			     struct nlattr *data[],
1426 			     struct netlink_ext_ack *extack)
1427 {
1428 	struct ip_tunnel *t = netdev_priv(dev);
1429 	__u32 fwmark = t->fwmark;
1430 	struct ip_tunnel_parm p;
1431 	int err;
1432 
1433 	err = ipgre_newlink_encap_setup(dev, data);
1434 	if (err)
1435 		return err;
1436 
1437 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1438 	if (err < 0)
1439 		return err;
1440 
1441 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1442 	if (err < 0)
1443 		return err;
1444 
1445 	t->parms.i_flags = p.i_flags;
1446 	t->parms.o_flags = p.o_flags;
1447 
1448 	return 0;
1449 }
1450 
ipgre_get_size(const struct net_device * dev)1451 static size_t ipgre_get_size(const struct net_device *dev)
1452 {
1453 	return
1454 		/* IFLA_GRE_LINK */
1455 		nla_total_size(4) +
1456 		/* IFLA_GRE_IFLAGS */
1457 		nla_total_size(2) +
1458 		/* IFLA_GRE_OFLAGS */
1459 		nla_total_size(2) +
1460 		/* IFLA_GRE_IKEY */
1461 		nla_total_size(4) +
1462 		/* IFLA_GRE_OKEY */
1463 		nla_total_size(4) +
1464 		/* IFLA_GRE_LOCAL */
1465 		nla_total_size(4) +
1466 		/* IFLA_GRE_REMOTE */
1467 		nla_total_size(4) +
1468 		/* IFLA_GRE_TTL */
1469 		nla_total_size(1) +
1470 		/* IFLA_GRE_TOS */
1471 		nla_total_size(1) +
1472 		/* IFLA_GRE_PMTUDISC */
1473 		nla_total_size(1) +
1474 		/* IFLA_GRE_ENCAP_TYPE */
1475 		nla_total_size(2) +
1476 		/* IFLA_GRE_ENCAP_FLAGS */
1477 		nla_total_size(2) +
1478 		/* IFLA_GRE_ENCAP_SPORT */
1479 		nla_total_size(2) +
1480 		/* IFLA_GRE_ENCAP_DPORT */
1481 		nla_total_size(2) +
1482 		/* IFLA_GRE_COLLECT_METADATA */
1483 		nla_total_size(0) +
1484 		/* IFLA_GRE_IGNORE_DF */
1485 		nla_total_size(1) +
1486 		/* IFLA_GRE_FWMARK */
1487 		nla_total_size(4) +
1488 		/* IFLA_GRE_ERSPAN_INDEX */
1489 		nla_total_size(4) +
1490 		/* IFLA_GRE_ERSPAN_VER */
1491 		nla_total_size(1) +
1492 		/* IFLA_GRE_ERSPAN_DIR */
1493 		nla_total_size(1) +
1494 		/* IFLA_GRE_ERSPAN_HWID */
1495 		nla_total_size(2) +
1496 		0;
1497 }
1498 
ipgre_fill_info(struct sk_buff * skb,const struct net_device * dev)1499 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1500 {
1501 	struct ip_tunnel *t = netdev_priv(dev);
1502 	struct ip_tunnel_parm *p = &t->parms;
1503 	__be16 o_flags = p->o_flags;
1504 
1505 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1506 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1507 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1508 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1509 			 gre_tnl_flags_to_gre_flags(o_flags)) ||
1510 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1511 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1512 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1513 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1514 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1515 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1516 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1517 		       !!(p->iph.frag_off & htons(IP_DF))) ||
1518 	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1519 		goto nla_put_failure;
1520 
1521 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1522 			t->encap.type) ||
1523 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1524 			 t->encap.sport) ||
1525 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1526 			 t->encap.dport) ||
1527 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1528 			t->encap.flags))
1529 		goto nla_put_failure;
1530 
1531 	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1532 		goto nla_put_failure;
1533 
1534 	if (t->collect_md) {
1535 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1536 			goto nla_put_failure;
1537 	}
1538 
1539 	return 0;
1540 
1541 nla_put_failure:
1542 	return -EMSGSIZE;
1543 }
1544 
erspan_fill_info(struct sk_buff * skb,const struct net_device * dev)1545 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1546 {
1547 	struct ip_tunnel *t = netdev_priv(dev);
1548 
1549 	if (t->erspan_ver <= 2) {
1550 		if (t->erspan_ver != 0 && !t->collect_md)
1551 			t->parms.o_flags |= TUNNEL_KEY;
1552 
1553 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1554 			goto nla_put_failure;
1555 
1556 		if (t->erspan_ver == 1) {
1557 			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1558 				goto nla_put_failure;
1559 		} else if (t->erspan_ver == 2) {
1560 			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1561 				goto nla_put_failure;
1562 			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1563 				goto nla_put_failure;
1564 		}
1565 	}
1566 
1567 	return ipgre_fill_info(skb, dev);
1568 
1569 nla_put_failure:
1570 	return -EMSGSIZE;
1571 }
1572 
erspan_setup(struct net_device * dev)1573 static void erspan_setup(struct net_device *dev)
1574 {
1575 	struct ip_tunnel *t = netdev_priv(dev);
1576 
1577 	ether_setup(dev);
1578 	dev->max_mtu = 0;
1579 	dev->netdev_ops = &erspan_netdev_ops;
1580 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1581 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1582 	ip_tunnel_setup(dev, erspan_net_id);
1583 	t->erspan_ver = 1;
1584 }
1585 
1586 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1587 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1588 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1589 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1590 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1591 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1592 	[IFLA_GRE_LOCAL]	= { .len = sizeof_field(struct iphdr, saddr) },
1593 	[IFLA_GRE_REMOTE]	= { .len = sizeof_field(struct iphdr, daddr) },
1594 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1595 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1596 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1597 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1598 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1599 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1600 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1601 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1602 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1603 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1604 	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1605 	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
1606 	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
1607 	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
1608 };
1609 
1610 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1611 	.kind		= "gre",
1612 	.maxtype	= IFLA_GRE_MAX,
1613 	.policy		= ipgre_policy,
1614 	.priv_size	= sizeof(struct ip_tunnel),
1615 	.setup		= ipgre_tunnel_setup,
1616 	.validate	= ipgre_tunnel_validate,
1617 	.newlink	= ipgre_newlink,
1618 	.changelink	= ipgre_changelink,
1619 	.dellink	= ip_tunnel_dellink,
1620 	.get_size	= ipgre_get_size,
1621 	.fill_info	= ipgre_fill_info,
1622 	.get_link_net	= ip_tunnel_get_link_net,
1623 };
1624 
1625 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1626 	.kind		= "gretap",
1627 	.maxtype	= IFLA_GRE_MAX,
1628 	.policy		= ipgre_policy,
1629 	.priv_size	= sizeof(struct ip_tunnel),
1630 	.setup		= ipgre_tap_setup,
1631 	.validate	= ipgre_tap_validate,
1632 	.newlink	= ipgre_newlink,
1633 	.changelink	= ipgre_changelink,
1634 	.dellink	= ip_tunnel_dellink,
1635 	.get_size	= ipgre_get_size,
1636 	.fill_info	= ipgre_fill_info,
1637 	.get_link_net	= ip_tunnel_get_link_net,
1638 };
1639 
1640 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1641 	.kind		= "erspan",
1642 	.maxtype	= IFLA_GRE_MAX,
1643 	.policy		= ipgre_policy,
1644 	.priv_size	= sizeof(struct ip_tunnel),
1645 	.setup		= erspan_setup,
1646 	.validate	= erspan_validate,
1647 	.newlink	= erspan_newlink,
1648 	.changelink	= erspan_changelink,
1649 	.dellink	= ip_tunnel_dellink,
1650 	.get_size	= ipgre_get_size,
1651 	.fill_info	= erspan_fill_info,
1652 	.get_link_net	= ip_tunnel_get_link_net,
1653 };
1654 
gretap_fb_dev_create(struct net * net,const char * name,u8 name_assign_type)1655 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1656 					u8 name_assign_type)
1657 {
1658 	struct nlattr *tb[IFLA_MAX + 1];
1659 	struct net_device *dev;
1660 	LIST_HEAD(list_kill);
1661 	struct ip_tunnel *t;
1662 	int err;
1663 
1664 	memset(&tb, 0, sizeof(tb));
1665 
1666 	dev = rtnl_create_link(net, name, name_assign_type,
1667 			       &ipgre_tap_ops, tb, NULL);
1668 	if (IS_ERR(dev))
1669 		return dev;
1670 
1671 	/* Configure flow based GRE device. */
1672 	t = netdev_priv(dev);
1673 	t->collect_md = true;
1674 
1675 	err = ipgre_newlink(net, dev, tb, NULL, NULL);
1676 	if (err < 0) {
1677 		free_netdev(dev);
1678 		return ERR_PTR(err);
1679 	}
1680 
1681 	/* openvswitch users expect packet sizes to be unrestricted,
1682 	 * so set the largest MTU we can.
1683 	 */
1684 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1685 	if (err)
1686 		goto out;
1687 
1688 	err = rtnl_configure_link(dev, NULL, 0, NULL);
1689 	if (err < 0)
1690 		goto out;
1691 
1692 	return dev;
1693 out:
1694 	ip_tunnel_dellink(dev, &list_kill);
1695 	unregister_netdevice_many(&list_kill);
1696 	return ERR_PTR(err);
1697 }
1698 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1699 
ipgre_tap_init_net(struct net * net)1700 static int __net_init ipgre_tap_init_net(struct net *net)
1701 {
1702 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1703 }
1704 
ipgre_tap_exit_batch_net(struct list_head * list_net)1705 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1706 {
1707 	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1708 }
1709 
1710 static struct pernet_operations ipgre_tap_net_ops = {
1711 	.init = ipgre_tap_init_net,
1712 	.exit_batch = ipgre_tap_exit_batch_net,
1713 	.id   = &gre_tap_net_id,
1714 	.size = sizeof(struct ip_tunnel_net),
1715 };
1716 
erspan_init_net(struct net * net)1717 static int __net_init erspan_init_net(struct net *net)
1718 {
1719 	return ip_tunnel_init_net(net, erspan_net_id,
1720 				  &erspan_link_ops, "erspan0");
1721 }
1722 
erspan_exit_batch_net(struct list_head * net_list)1723 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1724 {
1725 	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1726 }
1727 
1728 static struct pernet_operations erspan_net_ops = {
1729 	.init = erspan_init_net,
1730 	.exit_batch = erspan_exit_batch_net,
1731 	.id   = &erspan_net_id,
1732 	.size = sizeof(struct ip_tunnel_net),
1733 };
1734 
ipgre_init(void)1735 static int __init ipgre_init(void)
1736 {
1737 	int err;
1738 
1739 	pr_info("GRE over IPv4 tunneling driver\n");
1740 
1741 	err = register_pernet_device(&ipgre_net_ops);
1742 	if (err < 0)
1743 		return err;
1744 
1745 	err = register_pernet_device(&ipgre_tap_net_ops);
1746 	if (err < 0)
1747 		goto pnet_tap_failed;
1748 
1749 	err = register_pernet_device(&erspan_net_ops);
1750 	if (err < 0)
1751 		goto pnet_erspan_failed;
1752 
1753 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1754 	if (err < 0) {
1755 		pr_info("%s: can't add protocol\n", __func__);
1756 		goto add_proto_failed;
1757 	}
1758 
1759 	err = rtnl_link_register(&ipgre_link_ops);
1760 	if (err < 0)
1761 		goto rtnl_link_failed;
1762 
1763 	err = rtnl_link_register(&ipgre_tap_ops);
1764 	if (err < 0)
1765 		goto tap_ops_failed;
1766 
1767 	err = rtnl_link_register(&erspan_link_ops);
1768 	if (err < 0)
1769 		goto erspan_link_failed;
1770 
1771 	return 0;
1772 
1773 erspan_link_failed:
1774 	rtnl_link_unregister(&ipgre_tap_ops);
1775 tap_ops_failed:
1776 	rtnl_link_unregister(&ipgre_link_ops);
1777 rtnl_link_failed:
1778 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1779 add_proto_failed:
1780 	unregister_pernet_device(&erspan_net_ops);
1781 pnet_erspan_failed:
1782 	unregister_pernet_device(&ipgre_tap_net_ops);
1783 pnet_tap_failed:
1784 	unregister_pernet_device(&ipgre_net_ops);
1785 	return err;
1786 }
1787 
ipgre_fini(void)1788 static void __exit ipgre_fini(void)
1789 {
1790 	rtnl_link_unregister(&ipgre_tap_ops);
1791 	rtnl_link_unregister(&ipgre_link_ops);
1792 	rtnl_link_unregister(&erspan_link_ops);
1793 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1794 	unregister_pernet_device(&ipgre_tap_net_ops);
1795 	unregister_pernet_device(&ipgre_net_ops);
1796 	unregister_pernet_device(&erspan_net_ops);
1797 }
1798 
1799 module_init(ipgre_init);
1800 module_exit(ipgre_fini);
1801 MODULE_LICENSE("GPL");
1802 MODULE_ALIAS_RTNL_LINK("gre");
1803 MODULE_ALIAS_RTNL_LINK("gretap");
1804 MODULE_ALIAS_RTNL_LINK("erspan");
1805 MODULE_ALIAS_NETDEV("gre0");
1806 MODULE_ALIAS_NETDEV("gretap0");
1807 MODULE_ALIAS_NETDEV("erspan0");
1808