• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux NET3:	GRE over IP protocol decoder.
4  *
5  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/gre.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
47 
48 /*
49    Problems & solutions
50    --------------------
51 
52    1. The most important issue is detecting local dead loops.
53    They would cause complete host lockup in transmit, which
54    would be "resolved" by stack overflow or, if queueing is enabled,
55    with infinite looping in net_bh.
56 
57    We cannot track such dead loops during route installation,
58    it is infeasible task. The most general solutions would be
59    to keep skb->encapsulation counter (sort of local ttl),
60    and silently drop packet when it expires. It is a good
61    solution, but it supposes maintaining new variable in ALL
62    skb, even if no tunneling is used.
63 
64    Current solution: xmit_recursion breaks dead loops. This is a percpu
65    counter, since when we enter the first ndo_xmit(), cpu migration is
66    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
67 
68    2. Networking dead loops would not kill routers, but would really
69    kill network. IP hop limit plays role of "t->recursion" in this case,
70    if we copy it from packet being encapsulated to upper header.
71    It is very good solution, but it introduces two problems:
72 
73    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74      do not work over tunnels.
75    - traceroute does not work. I planned to relay ICMP from tunnel,
76      so that this problem would be solved and traceroute output
77      would even more informative. This idea appeared to be wrong:
78      only Linux complies to rfc1812 now (yes, guys, Linux is the only
79      true router now :-)), all routers (at least, in neighbourhood of mine)
80      return only 8 bytes of payload. It is the end.
81 
82    Hence, if we want that OSPF worked or traceroute said something reasonable,
83    we should search for another solution.
84 
85    One of them is to parse packet trying to detect inner encapsulation
86    made by our node. It is difficult or even impossible, especially,
87    taking into account fragmentation. TO be short, ttl is not solution at all.
88 
89    Current solution: The solution was UNEXPECTEDLY SIMPLE.
90    We force DF flag on tunnels with preconfigured hop limit,
91    that is ALL. :-) Well, it does not remove the problem completely,
92    but exponential growth of network traffic is changed to linear
93    (branches, that exceed pmtu are pruned) and tunnel mtu
94    rapidly degrades to value <68, where looping stops.
95    Yes, it is not good if there exists a router in the loop,
96    which does not force DF, even when encapsulating packets have DF set.
97    But it is not our problem! Nobody could accuse us, we made
98    all that we could make. Even if it is your gated who injected
99    fatal route to network, even if it were you who configured
100    fatal static route: you are innocent. :-)
101 
102    Alexey Kuznetsov.
103  */
104 
105 static bool log_ecn_error = true;
106 module_param(log_ecn_error, bool, 0644);
107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
108 
109 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
110 static int ipgre_tunnel_init(struct net_device *dev);
111 static void erspan_build_header(struct sk_buff *skb,
112 				u32 id, u32 index,
113 				bool truncate, bool is_ipv4);
114 
115 static unsigned int ipgre_net_id __read_mostly;
116 static unsigned int gre_tap_net_id __read_mostly;
117 static unsigned int erspan_net_id __read_mostly;
118 
ipgre_err(struct sk_buff * skb,u32 info,const struct tnl_ptk_info * tpi)119 static int ipgre_err(struct sk_buff *skb, u32 info,
120 		     const struct tnl_ptk_info *tpi)
121 {
122 
123 	/* All the routers (except for Linux) return only
124 	   8 bytes of packet payload. It means, that precise relaying of
125 	   ICMP in the real Internet is absolutely infeasible.
126 
127 	   Moreover, Cisco "wise men" put GRE key to the third word
128 	   in GRE header. It makes impossible maintaining even soft
129 	   state for keyed GRE tunnels with enabled checksum. Tell
130 	   them "thank you".
131 
132 	   Well, I wonder, rfc1812 was written by Cisco employee,
133 	   what the hell these idiots break standards established
134 	   by themselves???
135 	   */
136 	struct net *net = dev_net(skb->dev);
137 	struct ip_tunnel_net *itn;
138 	const struct iphdr *iph;
139 	const int type = icmp_hdr(skb)->type;
140 	const int code = icmp_hdr(skb)->code;
141 	unsigned int data_len = 0;
142 	struct ip_tunnel *t;
143 
144 	if (tpi->proto == htons(ETH_P_TEB))
145 		itn = net_generic(net, gre_tap_net_id);
146 	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
147 		 tpi->proto == htons(ETH_P_ERSPAN2))
148 		itn = net_generic(net, erspan_net_id);
149 	else
150 		itn = net_generic(net, ipgre_net_id);
151 
152 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
153 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
154 			     iph->daddr, iph->saddr, tpi->key);
155 
156 	if (!t)
157 		return -ENOENT;
158 
159 	switch (type) {
160 	default:
161 	case ICMP_PARAMETERPROB:
162 		return 0;
163 
164 	case ICMP_DEST_UNREACH:
165 		switch (code) {
166 		case ICMP_SR_FAILED:
167 		case ICMP_PORT_UNREACH:
168 			/* Impossible event. */
169 			return 0;
170 		default:
171 			/* All others are translated to HOST_UNREACH.
172 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
173 			   I believe they are just ether pollution. --ANK
174 			 */
175 			break;
176 		}
177 		break;
178 
179 	case ICMP_TIME_EXCEEDED:
180 		if (code != ICMP_EXC_TTL)
181 			return 0;
182 		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
183 		break;
184 
185 	case ICMP_REDIRECT:
186 		break;
187 	}
188 
189 #if IS_ENABLED(CONFIG_IPV6)
190        if (tpi->proto == htons(ETH_P_IPV6) &&
191            !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
192 				       type, data_len))
193                return 0;
194 #endif
195 
196 	if (t->parms.iph.daddr == 0 ||
197 	    ipv4_is_multicast(t->parms.iph.daddr))
198 		return 0;
199 
200 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
201 		return 0;
202 
203 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
204 		t->err_count++;
205 	else
206 		t->err_count = 1;
207 	t->err_time = jiffies;
208 
209 	return 0;
210 }
211 
gre_err(struct sk_buff * skb,u32 info)212 static void gre_err(struct sk_buff *skb, u32 info)
213 {
214 	/* All the routers (except for Linux) return only
215 	 * 8 bytes of packet payload. It means, that precise relaying of
216 	 * ICMP in the real Internet is absolutely infeasible.
217 	 *
218 	 * Moreover, Cisco "wise men" put GRE key to the third word
219 	 * in GRE header. It makes impossible maintaining even soft
220 	 * state for keyed
221 	 * GRE tunnels with enabled checksum. Tell them "thank you".
222 	 *
223 	 * Well, I wonder, rfc1812 was written by Cisco employee,
224 	 * what the hell these idiots break standards established
225 	 * by themselves???
226 	 */
227 
228 	const struct iphdr *iph = (struct iphdr *)skb->data;
229 	const int type = icmp_hdr(skb)->type;
230 	const int code = icmp_hdr(skb)->code;
231 	struct tnl_ptk_info tpi;
232 
233 	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
234 			     iph->ihl * 4) < 0)
235 		return;
236 
237 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
238 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
239 				 skb->dev->ifindex, IPPROTO_GRE);
240 		return;
241 	}
242 	if (type == ICMP_REDIRECT) {
243 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
244 			      IPPROTO_GRE);
245 		return;
246 	}
247 
248 	ipgre_err(skb, info, &tpi);
249 }
250 
is_erspan_type1(int gre_hdr_len)251 static bool is_erspan_type1(int gre_hdr_len)
252 {
253 	/* Both ERSPAN type I (version 0) and type II (version 1) use
254 	 * protocol 0x88BE, but the type I has only 4-byte GRE header,
255 	 * while type II has 8-byte.
256 	 */
257 	return gre_hdr_len == 4;
258 }
259 
erspan_rcv(struct sk_buff * skb,struct tnl_ptk_info * tpi,int gre_hdr_len)260 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
261 		      int gre_hdr_len)
262 {
263 	struct net *net = dev_net(skb->dev);
264 	struct metadata_dst *tun_dst = NULL;
265 	struct erspan_base_hdr *ershdr;
266 	struct ip_tunnel_net *itn;
267 	struct ip_tunnel *tunnel;
268 	const struct iphdr *iph;
269 	struct erspan_md2 *md2;
270 	int ver;
271 	int len;
272 
273 	itn = net_generic(net, erspan_net_id);
274 	iph = ip_hdr(skb);
275 	if (is_erspan_type1(gre_hdr_len)) {
276 		ver = 0;
277 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
278 					  tpi->flags | TUNNEL_NO_KEY,
279 					  iph->saddr, iph->daddr, 0);
280 	} else {
281 		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
282 		ver = ershdr->ver;
283 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
284 					  tpi->flags | TUNNEL_KEY,
285 					  iph->saddr, iph->daddr, tpi->key);
286 	}
287 
288 	if (tunnel) {
289 		if (is_erspan_type1(gre_hdr_len))
290 			len = gre_hdr_len;
291 		else
292 			len = gre_hdr_len + erspan_hdr_len(ver);
293 
294 		if (unlikely(!pskb_may_pull(skb, len)))
295 			return PACKET_REJECT;
296 
297 		if (__iptunnel_pull_header(skb,
298 					   len,
299 					   htons(ETH_P_TEB),
300 					   false, false) < 0)
301 			goto drop;
302 
303 		if (tunnel->collect_md) {
304 			struct erspan_metadata *pkt_md, *md;
305 			struct ip_tunnel_info *info;
306 			unsigned char *gh;
307 			__be64 tun_id;
308 			__be16 flags;
309 
310 			tpi->flags |= TUNNEL_KEY;
311 			flags = tpi->flags;
312 			tun_id = key32_to_tunnel_id(tpi->key);
313 
314 			tun_dst = ip_tun_rx_dst(skb, flags,
315 						tun_id, sizeof(*md));
316 			if (!tun_dst)
317 				return PACKET_REJECT;
318 
319 			/* skb can be uncloned in __iptunnel_pull_header, so
320 			 * old pkt_md is no longer valid and we need to reset
321 			 * it
322 			 */
323 			gh = skb_network_header(skb) +
324 			     skb_network_header_len(skb);
325 			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
326 							    sizeof(*ershdr));
327 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
328 			md->version = ver;
329 			md2 = &md->u.md2;
330 			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
331 						       ERSPAN_V2_MDSIZE);
332 
333 			info = &tun_dst->u.tun_info;
334 			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
335 			info->options_len = sizeof(*md);
336 		}
337 
338 		skb_reset_mac_header(skb);
339 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
340 		return PACKET_RCVD;
341 	}
342 	return PACKET_REJECT;
343 
344 drop:
345 	kfree_skb(skb);
346 	return PACKET_RCVD;
347 }
348 
__ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct ip_tunnel_net * itn,int hdr_len,bool raw_proto)349 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
350 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
351 {
352 	struct metadata_dst *tun_dst = NULL;
353 	const struct iphdr *iph;
354 	struct ip_tunnel *tunnel;
355 
356 	iph = ip_hdr(skb);
357 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
358 				  iph->saddr, iph->daddr, tpi->key);
359 
360 	if (tunnel) {
361 		const struct iphdr *tnl_params;
362 
363 		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
364 					   raw_proto, false) < 0)
365 			goto drop;
366 
367 		if (tunnel->dev->type != ARPHRD_NONE)
368 			skb_pop_mac_header(skb);
369 		else
370 			skb_reset_mac_header(skb);
371 
372 		tnl_params = &tunnel->parms.iph;
373 		if (tunnel->collect_md || tnl_params->daddr == 0) {
374 			__be16 flags;
375 			__be64 tun_id;
376 
377 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
378 			tun_id = key32_to_tunnel_id(tpi->key);
379 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
380 			if (!tun_dst)
381 				return PACKET_REJECT;
382 		}
383 
384 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
385 		return PACKET_RCVD;
386 	}
387 	return PACKET_NEXT;
388 
389 drop:
390 	kfree_skb(skb);
391 	return PACKET_RCVD;
392 }
393 
ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,int hdr_len)394 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
395 		     int hdr_len)
396 {
397 	struct net *net = dev_net(skb->dev);
398 	struct ip_tunnel_net *itn;
399 	int res;
400 
401 	if (tpi->proto == htons(ETH_P_TEB))
402 		itn = net_generic(net, gre_tap_net_id);
403 	else
404 		itn = net_generic(net, ipgre_net_id);
405 
406 	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
407 	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
408 		/* ipgre tunnels in collect metadata mode should receive
409 		 * also ETH_P_TEB traffic.
410 		 */
411 		itn = net_generic(net, ipgre_net_id);
412 		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
413 	}
414 	return res;
415 }
416 
gre_rcv(struct sk_buff * skb)417 static int gre_rcv(struct sk_buff *skb)
418 {
419 	struct tnl_ptk_info tpi;
420 	bool csum_err = false;
421 	int hdr_len;
422 
423 #ifdef CONFIG_NET_IPGRE_BROADCAST
424 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
425 		/* Looped back packet, drop it! */
426 		if (rt_is_output_route(skb_rtable(skb)))
427 			goto drop;
428 	}
429 #endif
430 
431 	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
432 	if (hdr_len < 0)
433 		goto drop;
434 
435 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
436 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
437 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
438 			return 0;
439 		goto out;
440 	}
441 
442 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
443 		return 0;
444 
445 out:
446 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
447 drop:
448 	kfree_skb(skb);
449 	return 0;
450 }
451 
__gre_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,__be16 proto)452 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
453 		       const struct iphdr *tnl_params,
454 		       __be16 proto)
455 {
456 	struct ip_tunnel *tunnel = netdev_priv(dev);
457 	__be16 flags = tunnel->parms.o_flags;
458 
459 	/* Push GRE header. */
460 	gre_build_header(skb, tunnel->tun_hlen,
461 			 flags, proto, tunnel->parms.o_key,
462 			 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
463 
464 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
465 }
466 
gre_handle_offloads(struct sk_buff * skb,bool csum)467 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
468 {
469 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
470 }
471 
gre_fb_xmit(struct sk_buff * skb,struct net_device * dev,__be16 proto)472 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
473 			__be16 proto)
474 {
475 	struct ip_tunnel *tunnel = netdev_priv(dev);
476 	struct ip_tunnel_info *tun_info;
477 	const struct ip_tunnel_key *key;
478 	int tunnel_hlen;
479 	__be16 flags;
480 
481 	tun_info = skb_tunnel_info(skb);
482 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
483 		     ip_tunnel_info_af(tun_info) != AF_INET))
484 		goto err_free_skb;
485 
486 	key = &tun_info->key;
487 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
488 
489 	if (skb_cow_head(skb, dev->needed_headroom))
490 		goto err_free_skb;
491 
492 	/* Push Tunnel header. */
493 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
494 		goto err_free_skb;
495 
496 	flags = tun_info->key.tun_flags &
497 		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
498 	gre_build_header(skb, tunnel_hlen, flags, proto,
499 			 tunnel_id_to_key32(tun_info->key.tun_id),
500 			 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
501 
502 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
503 
504 	return;
505 
506 err_free_skb:
507 	kfree_skb(skb);
508 	dev->stats.tx_dropped++;
509 }
510 
erspan_fb_xmit(struct sk_buff * skb,struct net_device * dev)511 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
512 {
513 	struct ip_tunnel *tunnel = netdev_priv(dev);
514 	struct ip_tunnel_info *tun_info;
515 	const struct ip_tunnel_key *key;
516 	struct erspan_metadata *md;
517 	bool truncate = false;
518 	__be16 proto;
519 	int tunnel_hlen;
520 	int version;
521 	int nhoff;
522 
523 	tun_info = skb_tunnel_info(skb);
524 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
525 		     ip_tunnel_info_af(tun_info) != AF_INET))
526 		goto err_free_skb;
527 
528 	key = &tun_info->key;
529 	if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
530 		goto err_free_skb;
531 	if (tun_info->options_len < sizeof(*md))
532 		goto err_free_skb;
533 	md = ip_tunnel_info_opts(tun_info);
534 
535 	/* ERSPAN has fixed 8 byte GRE header */
536 	version = md->version;
537 	tunnel_hlen = 8 + erspan_hdr_len(version);
538 
539 	if (skb_cow_head(skb, dev->needed_headroom))
540 		goto err_free_skb;
541 
542 	if (gre_handle_offloads(skb, false))
543 		goto err_free_skb;
544 
545 	if (skb->len > dev->mtu + dev->hard_header_len) {
546 		pskb_trim(skb, dev->mtu + dev->hard_header_len);
547 		truncate = true;
548 	}
549 
550 	nhoff = skb_network_header(skb) - skb_mac_header(skb);
551 	if (skb->protocol == htons(ETH_P_IP) &&
552 	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
553 		truncate = true;
554 
555 	if (skb->protocol == htons(ETH_P_IPV6)) {
556 		int thoff;
557 
558 		if (skb_transport_header_was_set(skb))
559 			thoff = skb_transport_header(skb) - skb_mac_header(skb);
560 		else
561 			thoff = nhoff + sizeof(struct ipv6hdr);
562 		if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
563 			truncate = true;
564 	}
565 
566 	if (version == 1) {
567 		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
568 				    ntohl(md->u.index), truncate, true);
569 		proto = htons(ETH_P_ERSPAN);
570 	} else if (version == 2) {
571 		erspan_build_header_v2(skb,
572 				       ntohl(tunnel_id_to_key32(key->tun_id)),
573 				       md->u.md2.dir,
574 				       get_hwid(&md->u.md2),
575 				       truncate, true);
576 		proto = htons(ETH_P_ERSPAN2);
577 	} else {
578 		goto err_free_skb;
579 	}
580 
581 	gre_build_header(skb, 8, TUNNEL_SEQ,
582 			 proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
583 
584 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
585 
586 	return;
587 
588 err_free_skb:
589 	kfree_skb(skb);
590 	dev->stats.tx_dropped++;
591 }
592 
gre_fill_metadata_dst(struct net_device * dev,struct sk_buff * skb)593 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
594 {
595 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
596 	const struct ip_tunnel_key *key;
597 	struct rtable *rt;
598 	struct flowi4 fl4;
599 
600 	if (ip_tunnel_info_af(info) != AF_INET)
601 		return -EINVAL;
602 
603 	key = &info->key;
604 	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
605 			    tunnel_id_to_key32(key->tun_id),
606 			    key->tos & ~INET_ECN_MASK, 0, skb->mark,
607 			    skb_get_hash(skb));
608 	rt = ip_route_output_key(dev_net(dev), &fl4);
609 	if (IS_ERR(rt))
610 		return PTR_ERR(rt);
611 
612 	ip_rt_put(rt);
613 	info->key.u.ipv4.src = fl4.saddr;
614 	return 0;
615 }
616 
ipgre_xmit(struct sk_buff * skb,struct net_device * dev)617 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
618 			      struct net_device *dev)
619 {
620 	struct ip_tunnel *tunnel = netdev_priv(dev);
621 	const struct iphdr *tnl_params;
622 
623 	if (!pskb_inet_may_pull(skb))
624 		goto free_skb;
625 
626 	if (tunnel->collect_md) {
627 		gre_fb_xmit(skb, dev, skb->protocol);
628 		return NETDEV_TX_OK;
629 	}
630 
631 	if (dev->header_ops) {
632 		if (skb_cow_head(skb, 0))
633 			goto free_skb;
634 
635 		tnl_params = (const struct iphdr *)skb->data;
636 
637 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
638 		 * to gre header.
639 		 */
640 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
641 		skb_reset_mac_header(skb);
642 
643 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
644 		    skb_checksum_start(skb) < skb->data)
645 			goto free_skb;
646 	} else {
647 		if (skb_cow_head(skb, dev->needed_headroom))
648 			goto free_skb;
649 
650 		tnl_params = &tunnel->parms.iph;
651 	}
652 
653 	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
654 		goto free_skb;
655 
656 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
657 	return NETDEV_TX_OK;
658 
659 free_skb:
660 	kfree_skb(skb);
661 	dev->stats.tx_dropped++;
662 	return NETDEV_TX_OK;
663 }
664 
erspan_xmit(struct sk_buff * skb,struct net_device * dev)665 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
666 			       struct net_device *dev)
667 {
668 	struct ip_tunnel *tunnel = netdev_priv(dev);
669 	bool truncate = false;
670 	__be16 proto;
671 
672 	if (!pskb_inet_may_pull(skb))
673 		goto free_skb;
674 
675 	if (tunnel->collect_md) {
676 		erspan_fb_xmit(skb, dev);
677 		return NETDEV_TX_OK;
678 	}
679 
680 	if (gre_handle_offloads(skb, false))
681 		goto free_skb;
682 
683 	if (skb_cow_head(skb, dev->needed_headroom))
684 		goto free_skb;
685 
686 	if (skb->len > dev->mtu + dev->hard_header_len) {
687 		pskb_trim(skb, dev->mtu + dev->hard_header_len);
688 		truncate = true;
689 	}
690 
691 	/* Push ERSPAN header */
692 	if (tunnel->erspan_ver == 0) {
693 		proto = htons(ETH_P_ERSPAN);
694 		tunnel->parms.o_flags &= ~TUNNEL_SEQ;
695 	} else if (tunnel->erspan_ver == 1) {
696 		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
697 				    tunnel->index,
698 				    truncate, true);
699 		proto = htons(ETH_P_ERSPAN);
700 	} else if (tunnel->erspan_ver == 2) {
701 		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
702 				       tunnel->dir, tunnel->hwid,
703 				       truncate, true);
704 		proto = htons(ETH_P_ERSPAN2);
705 	} else {
706 		goto free_skb;
707 	}
708 
709 	tunnel->parms.o_flags &= ~TUNNEL_KEY;
710 	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
711 	return NETDEV_TX_OK;
712 
713 free_skb:
714 	kfree_skb(skb);
715 	dev->stats.tx_dropped++;
716 	return NETDEV_TX_OK;
717 }
718 
gre_tap_xmit(struct sk_buff * skb,struct net_device * dev)719 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
720 				struct net_device *dev)
721 {
722 	struct ip_tunnel *tunnel = netdev_priv(dev);
723 
724 	if (!pskb_inet_may_pull(skb))
725 		goto free_skb;
726 
727 	if (tunnel->collect_md) {
728 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
729 		return NETDEV_TX_OK;
730 	}
731 
732 	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
733 		goto free_skb;
734 
735 	if (skb_cow_head(skb, dev->needed_headroom))
736 		goto free_skb;
737 
738 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
739 	return NETDEV_TX_OK;
740 
741 free_skb:
742 	kfree_skb(skb);
743 	dev->stats.tx_dropped++;
744 	return NETDEV_TX_OK;
745 }
746 
ipgre_link_update(struct net_device * dev,bool set_mtu)747 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
748 {
749 	struct ip_tunnel *tunnel = netdev_priv(dev);
750 	int len;
751 
752 	len = tunnel->tun_hlen;
753 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
754 	len = tunnel->tun_hlen - len;
755 	tunnel->hlen = tunnel->hlen + len;
756 
757 	if (dev->header_ops)
758 		dev->hard_header_len += len;
759 	else
760 		dev->needed_headroom += len;
761 
762 	if (set_mtu)
763 		dev->mtu = max_t(int, dev->mtu - len, 68);
764 
765 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
766 		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
767 		    tunnel->encap.type == TUNNEL_ENCAP_NONE) {
768 			dev->features |= NETIF_F_GSO_SOFTWARE;
769 			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
770 		} else {
771 			dev->features &= ~NETIF_F_GSO_SOFTWARE;
772 			dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
773 		}
774 		dev->features |= NETIF_F_LLTX;
775 	} else {
776 		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
777 		dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
778 	}
779 }
780 
ipgre_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)781 static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
782 			    int cmd)
783 {
784 	int err;
785 
786 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
787 		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
788 		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
789 		    ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
790 			return -EINVAL;
791 	}
792 
793 	p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
794 	p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
795 
796 	err = ip_tunnel_ctl(dev, p, cmd);
797 	if (err)
798 		return err;
799 
800 	if (cmd == SIOCCHGTUNNEL) {
801 		struct ip_tunnel *t = netdev_priv(dev);
802 
803 		t->parms.i_flags = p->i_flags;
804 		t->parms.o_flags = p->o_flags;
805 
806 		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
807 			ipgre_link_update(dev, true);
808 	}
809 
810 	p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
811 	p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
812 	return 0;
813 }
814 
815 /* Nice toy. Unfortunately, useless in real life :-)
816    It allows to construct virtual multiprotocol broadcast "LAN"
817    over the Internet, provided multicast routing is tuned.
818 
819 
820    I have no idea was this bicycle invented before me,
821    so that I had to set ARPHRD_IPGRE to a random value.
822    I have an impression, that Cisco could make something similar,
823    but this feature is apparently missing in IOS<=11.2(8).
824 
825    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
826    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
827 
828    ping -t 255 224.66.66.66
829 
830    If nobody answers, mbone does not work.
831 
832    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
833    ip addr add 10.66.66.<somewhat>/24 dev Universe
834    ifconfig Universe up
835    ifconfig Universe add fe80::<Your_real_addr>/10
836    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
837    ftp 10.66.66.66
838    ...
839    ftp fec0:6666:6666::193.233.7.65
840    ...
841  */
ipgre_header(struct sk_buff * skb,struct net_device * dev,unsigned short type,const void * daddr,const void * saddr,unsigned int len)842 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
843 			unsigned short type,
844 			const void *daddr, const void *saddr, unsigned int len)
845 {
846 	struct ip_tunnel *t = netdev_priv(dev);
847 	struct iphdr *iph;
848 	struct gre_base_hdr *greh;
849 
850 	iph = skb_push(skb, t->hlen + sizeof(*iph));
851 	greh = (struct gre_base_hdr *)(iph+1);
852 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
853 	greh->protocol = htons(type);
854 
855 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
856 
857 	/* Set the source hardware address. */
858 	if (saddr)
859 		memcpy(&iph->saddr, saddr, 4);
860 	if (daddr)
861 		memcpy(&iph->daddr, daddr, 4);
862 	if (iph->daddr)
863 		return t->hlen + sizeof(*iph);
864 
865 	return -(t->hlen + sizeof(*iph));
866 }
867 
ipgre_header_parse(const struct sk_buff * skb,unsigned char * haddr)868 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
869 {
870 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
871 	memcpy(haddr, &iph->saddr, 4);
872 	return 4;
873 }
874 
875 static const struct header_ops ipgre_header_ops = {
876 	.create	= ipgre_header,
877 	.parse	= ipgre_header_parse,
878 };
879 
880 #ifdef CONFIG_NET_IPGRE_BROADCAST
ipgre_open(struct net_device * dev)881 static int ipgre_open(struct net_device *dev)
882 {
883 	struct ip_tunnel *t = netdev_priv(dev);
884 
885 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
886 		struct flowi4 fl4;
887 		struct rtable *rt;
888 
889 		rt = ip_route_output_gre(t->net, &fl4,
890 					 t->parms.iph.daddr,
891 					 t->parms.iph.saddr,
892 					 t->parms.o_key,
893 					 RT_TOS(t->parms.iph.tos),
894 					 t->parms.link);
895 		if (IS_ERR(rt))
896 			return -EADDRNOTAVAIL;
897 		dev = rt->dst.dev;
898 		ip_rt_put(rt);
899 		if (!__in_dev_get_rtnl(dev))
900 			return -EADDRNOTAVAIL;
901 		t->mlink = dev->ifindex;
902 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
903 	}
904 	return 0;
905 }
906 
ipgre_close(struct net_device * dev)907 static int ipgre_close(struct net_device *dev)
908 {
909 	struct ip_tunnel *t = netdev_priv(dev);
910 
911 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
912 		struct in_device *in_dev;
913 		in_dev = inetdev_by_index(t->net, t->mlink);
914 		if (in_dev)
915 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
916 	}
917 	return 0;
918 }
919 #endif
920 
921 static const struct net_device_ops ipgre_netdev_ops = {
922 	.ndo_init		= ipgre_tunnel_init,
923 	.ndo_uninit		= ip_tunnel_uninit,
924 #ifdef CONFIG_NET_IPGRE_BROADCAST
925 	.ndo_open		= ipgre_open,
926 	.ndo_stop		= ipgre_close,
927 #endif
928 	.ndo_start_xmit		= ipgre_xmit,
929 	.ndo_do_ioctl		= ip_tunnel_ioctl,
930 	.ndo_change_mtu		= ip_tunnel_change_mtu,
931 	.ndo_get_stats64	= ip_tunnel_get_stats64,
932 	.ndo_get_iflink		= ip_tunnel_get_iflink,
933 	.ndo_tunnel_ctl		= ipgre_tunnel_ctl,
934 };
935 
936 #define GRE_FEATURES (NETIF_F_SG |		\
937 		      NETIF_F_FRAGLIST |	\
938 		      NETIF_F_HIGHDMA |		\
939 		      NETIF_F_HW_CSUM)
940 
ipgre_tunnel_setup(struct net_device * dev)941 static void ipgre_tunnel_setup(struct net_device *dev)
942 {
943 	dev->netdev_ops		= &ipgre_netdev_ops;
944 	dev->type		= ARPHRD_IPGRE;
945 	ip_tunnel_setup(dev, ipgre_net_id);
946 }
947 
__gre_tunnel_init(struct net_device * dev)948 static void __gre_tunnel_init(struct net_device *dev)
949 {
950 	struct ip_tunnel *tunnel;
951 
952 	tunnel = netdev_priv(dev);
953 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
954 	tunnel->parms.iph.protocol = IPPROTO_GRE;
955 
956 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
957 	dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
958 
959 	dev->features		|= GRE_FEATURES;
960 	dev->hw_features	|= GRE_FEATURES;
961 
962 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
963 		/* TCP offload with GRE SEQ is not supported, nor
964 		 * can we support 2 levels of outer headers requiring
965 		 * an update.
966 		 */
967 		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
968 		    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
969 			dev->features    |= NETIF_F_GSO_SOFTWARE;
970 			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
971 		}
972 
973 		/* Can use a lockless transmit, unless we generate
974 		 * output sequences
975 		 */
976 		dev->features |= NETIF_F_LLTX;
977 	}
978 }
979 
ipgre_tunnel_init(struct net_device * dev)980 static int ipgre_tunnel_init(struct net_device *dev)
981 {
982 	struct ip_tunnel *tunnel = netdev_priv(dev);
983 	struct iphdr *iph = &tunnel->parms.iph;
984 
985 	__gre_tunnel_init(dev);
986 
987 	memcpy(dev->dev_addr, &iph->saddr, 4);
988 	memcpy(dev->broadcast, &iph->daddr, 4);
989 
990 	dev->flags		= IFF_NOARP;
991 	netif_keep_dst(dev);
992 	dev->addr_len		= 4;
993 
994 	if (iph->daddr && !tunnel->collect_md) {
995 #ifdef CONFIG_NET_IPGRE_BROADCAST
996 		if (ipv4_is_multicast(iph->daddr)) {
997 			if (!iph->saddr)
998 				return -EINVAL;
999 			dev->flags = IFF_BROADCAST;
1000 			dev->header_ops = &ipgre_header_ops;
1001 			dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1002 			dev->needed_headroom = 0;
1003 		}
1004 #endif
1005 	} else if (!tunnel->collect_md) {
1006 		dev->header_ops = &ipgre_header_ops;
1007 		dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1008 		dev->needed_headroom = 0;
1009 	}
1010 
1011 	return ip_tunnel_init(dev);
1012 }
1013 
1014 static const struct gre_protocol ipgre_protocol = {
1015 	.handler     = gre_rcv,
1016 	.err_handler = gre_err,
1017 };
1018 
ipgre_init_net(struct net * net)1019 static int __net_init ipgre_init_net(struct net *net)
1020 {
1021 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1022 }
1023 
ipgre_exit_batch_net(struct list_head * list_net)1024 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1025 {
1026 	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1027 }
1028 
1029 static struct pernet_operations ipgre_net_ops = {
1030 	.init = ipgre_init_net,
1031 	.exit_batch = ipgre_exit_batch_net,
1032 	.id   = &ipgre_net_id,
1033 	.size = sizeof(struct ip_tunnel_net),
1034 };
1035 
ipgre_tunnel_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1036 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1037 				 struct netlink_ext_ack *extack)
1038 {
1039 	__be16 flags;
1040 
1041 	if (!data)
1042 		return 0;
1043 
1044 	flags = 0;
1045 	if (data[IFLA_GRE_IFLAGS])
1046 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1047 	if (data[IFLA_GRE_OFLAGS])
1048 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1049 	if (flags & (GRE_VERSION|GRE_ROUTING))
1050 		return -EINVAL;
1051 
1052 	if (data[IFLA_GRE_COLLECT_METADATA] &&
1053 	    data[IFLA_GRE_ENCAP_TYPE] &&
1054 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1055 		return -EINVAL;
1056 
1057 	return 0;
1058 }
1059 
ipgre_tap_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1060 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1061 			      struct netlink_ext_ack *extack)
1062 {
1063 	__be32 daddr;
1064 
1065 	if (tb[IFLA_ADDRESS]) {
1066 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1067 			return -EINVAL;
1068 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1069 			return -EADDRNOTAVAIL;
1070 	}
1071 
1072 	if (!data)
1073 		goto out;
1074 
1075 	if (data[IFLA_GRE_REMOTE]) {
1076 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1077 		if (!daddr)
1078 			return -EINVAL;
1079 	}
1080 
1081 out:
1082 	return ipgre_tunnel_validate(tb, data, extack);
1083 }
1084 
erspan_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1085 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1086 			   struct netlink_ext_ack *extack)
1087 {
1088 	__be16 flags = 0;
1089 	int ret;
1090 
1091 	if (!data)
1092 		return 0;
1093 
1094 	ret = ipgre_tap_validate(tb, data, extack);
1095 	if (ret)
1096 		return ret;
1097 
1098 	if (data[IFLA_GRE_ERSPAN_VER] &&
1099 	    nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1100 		return 0;
1101 
1102 	/* ERSPAN type II/III should only have GRE sequence and key flag */
1103 	if (data[IFLA_GRE_OFLAGS])
1104 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1105 	if (data[IFLA_GRE_IFLAGS])
1106 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1107 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1108 	    flags != (GRE_SEQ | GRE_KEY))
1109 		return -EINVAL;
1110 
1111 	/* ERSPAN Session ID only has 10-bit. Since we reuse
1112 	 * 32-bit key field as ID, check it's range.
1113 	 */
1114 	if (data[IFLA_GRE_IKEY] &&
1115 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1116 		return -EINVAL;
1117 
1118 	if (data[IFLA_GRE_OKEY] &&
1119 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1120 		return -EINVAL;
1121 
1122 	return 0;
1123 }
1124 
ipgre_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm * parms,__u32 * fwmark)1125 static int ipgre_netlink_parms(struct net_device *dev,
1126 				struct nlattr *data[],
1127 				struct nlattr *tb[],
1128 				struct ip_tunnel_parm *parms,
1129 				__u32 *fwmark)
1130 {
1131 	struct ip_tunnel *t = netdev_priv(dev);
1132 
1133 	memset(parms, 0, sizeof(*parms));
1134 
1135 	parms->iph.protocol = IPPROTO_GRE;
1136 
1137 	if (!data)
1138 		return 0;
1139 
1140 	if (data[IFLA_GRE_LINK])
1141 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1142 
1143 	if (data[IFLA_GRE_IFLAGS])
1144 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1145 
1146 	if (data[IFLA_GRE_OFLAGS])
1147 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1148 
1149 	if (data[IFLA_GRE_IKEY])
1150 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1151 
1152 	if (data[IFLA_GRE_OKEY])
1153 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1154 
1155 	if (data[IFLA_GRE_LOCAL])
1156 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1157 
1158 	if (data[IFLA_GRE_REMOTE])
1159 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1160 
1161 	if (data[IFLA_GRE_TTL])
1162 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1163 
1164 	if (data[IFLA_GRE_TOS])
1165 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1166 
1167 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1168 		if (t->ignore_df)
1169 			return -EINVAL;
1170 		parms->iph.frag_off = htons(IP_DF);
1171 	}
1172 
1173 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1174 		t->collect_md = true;
1175 		if (dev->type == ARPHRD_IPGRE)
1176 			dev->type = ARPHRD_NONE;
1177 	}
1178 
1179 	if (data[IFLA_GRE_IGNORE_DF]) {
1180 		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1181 		  && (parms->iph.frag_off & htons(IP_DF)))
1182 			return -EINVAL;
1183 		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1184 	}
1185 
1186 	if (data[IFLA_GRE_FWMARK])
1187 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1188 
1189 	return 0;
1190 }
1191 
erspan_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm * parms,__u32 * fwmark)1192 static int erspan_netlink_parms(struct net_device *dev,
1193 				struct nlattr *data[],
1194 				struct nlattr *tb[],
1195 				struct ip_tunnel_parm *parms,
1196 				__u32 *fwmark)
1197 {
1198 	struct ip_tunnel *t = netdev_priv(dev);
1199 	int err;
1200 
1201 	err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1202 	if (err)
1203 		return err;
1204 	if (!data)
1205 		return 0;
1206 
1207 	if (data[IFLA_GRE_ERSPAN_VER]) {
1208 		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1209 
1210 		if (t->erspan_ver > 2)
1211 			return -EINVAL;
1212 	}
1213 
1214 	if (t->erspan_ver == 1) {
1215 		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1216 			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1217 			if (t->index & ~INDEX_MASK)
1218 				return -EINVAL;
1219 		}
1220 	} else if (t->erspan_ver == 2) {
1221 		if (data[IFLA_GRE_ERSPAN_DIR]) {
1222 			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1223 			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1224 				return -EINVAL;
1225 		}
1226 		if (data[IFLA_GRE_ERSPAN_HWID]) {
1227 			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1228 			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1229 				return -EINVAL;
1230 		}
1231 	}
1232 
1233 	return 0;
1234 }
1235 
1236 /* This function returns true when ENCAP attributes are present in the nl msg */
ipgre_netlink_encap_parms(struct nlattr * data[],struct ip_tunnel_encap * ipencap)1237 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1238 				      struct ip_tunnel_encap *ipencap)
1239 {
1240 	bool ret = false;
1241 
1242 	memset(ipencap, 0, sizeof(*ipencap));
1243 
1244 	if (!data)
1245 		return ret;
1246 
1247 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1248 		ret = true;
1249 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1250 	}
1251 
1252 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1253 		ret = true;
1254 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1255 	}
1256 
1257 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1258 		ret = true;
1259 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1260 	}
1261 
1262 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1263 		ret = true;
1264 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1265 	}
1266 
1267 	return ret;
1268 }
1269 
gre_tap_init(struct net_device * dev)1270 static int gre_tap_init(struct net_device *dev)
1271 {
1272 	__gre_tunnel_init(dev);
1273 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1274 	netif_keep_dst(dev);
1275 
1276 	return ip_tunnel_init(dev);
1277 }
1278 
1279 static const struct net_device_ops gre_tap_netdev_ops = {
1280 	.ndo_init		= gre_tap_init,
1281 	.ndo_uninit		= ip_tunnel_uninit,
1282 	.ndo_start_xmit		= gre_tap_xmit,
1283 	.ndo_set_mac_address 	= eth_mac_addr,
1284 	.ndo_validate_addr	= eth_validate_addr,
1285 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1286 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1287 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1288 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1289 };
1290 
erspan_tunnel_init(struct net_device * dev)1291 static int erspan_tunnel_init(struct net_device *dev)
1292 {
1293 	struct ip_tunnel *tunnel = netdev_priv(dev);
1294 
1295 	if (tunnel->erspan_ver == 0)
1296 		tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1297 	else
1298 		tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1299 
1300 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1301 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1302 		       erspan_hdr_len(tunnel->erspan_ver);
1303 
1304 	dev->features		|= GRE_FEATURES;
1305 	dev->hw_features	|= GRE_FEATURES;
1306 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1307 	netif_keep_dst(dev);
1308 
1309 	return ip_tunnel_init(dev);
1310 }
1311 
1312 static const struct net_device_ops erspan_netdev_ops = {
1313 	.ndo_init		= erspan_tunnel_init,
1314 	.ndo_uninit		= ip_tunnel_uninit,
1315 	.ndo_start_xmit		= erspan_xmit,
1316 	.ndo_set_mac_address	= eth_mac_addr,
1317 	.ndo_validate_addr	= eth_validate_addr,
1318 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1319 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1320 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1321 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1322 };
1323 
ipgre_tap_setup(struct net_device * dev)1324 static void ipgre_tap_setup(struct net_device *dev)
1325 {
1326 	ether_setup(dev);
1327 	dev->max_mtu = 0;
1328 	dev->netdev_ops	= &gre_tap_netdev_ops;
1329 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1330 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1331 	ip_tunnel_setup(dev, gre_tap_net_id);
1332 }
1333 
1334 static int
ipgre_newlink_encap_setup(struct net_device * dev,struct nlattr * data[])1335 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1336 {
1337 	struct ip_tunnel_encap ipencap;
1338 
1339 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1340 		struct ip_tunnel *t = netdev_priv(dev);
1341 		int err = ip_tunnel_encap_setup(t, &ipencap);
1342 
1343 		if (err < 0)
1344 			return err;
1345 	}
1346 
1347 	return 0;
1348 }
1349 
ipgre_newlink(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1350 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1351 			 struct nlattr *tb[], struct nlattr *data[],
1352 			 struct netlink_ext_ack *extack)
1353 {
1354 	struct ip_tunnel_parm p;
1355 	__u32 fwmark = 0;
1356 	int err;
1357 
1358 	err = ipgre_newlink_encap_setup(dev, data);
1359 	if (err)
1360 		return err;
1361 
1362 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1363 	if (err < 0)
1364 		return err;
1365 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1366 }
1367 
erspan_newlink(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1368 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1369 			  struct nlattr *tb[], struct nlattr *data[],
1370 			  struct netlink_ext_ack *extack)
1371 {
1372 	struct ip_tunnel_parm p;
1373 	__u32 fwmark = 0;
1374 	int err;
1375 
1376 	err = ipgre_newlink_encap_setup(dev, data);
1377 	if (err)
1378 		return err;
1379 
1380 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1381 	if (err)
1382 		return err;
1383 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1384 }
1385 
ipgre_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1386 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1387 			    struct nlattr *data[],
1388 			    struct netlink_ext_ack *extack)
1389 {
1390 	struct ip_tunnel *t = netdev_priv(dev);
1391 	__u32 fwmark = t->fwmark;
1392 	struct ip_tunnel_parm p;
1393 	int err;
1394 
1395 	err = ipgre_newlink_encap_setup(dev, data);
1396 	if (err)
1397 		return err;
1398 
1399 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1400 	if (err < 0)
1401 		return err;
1402 
1403 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1404 	if (err < 0)
1405 		return err;
1406 
1407 	t->parms.i_flags = p.i_flags;
1408 	t->parms.o_flags = p.o_flags;
1409 
1410 	ipgre_link_update(dev, !tb[IFLA_MTU]);
1411 
1412 	return 0;
1413 }
1414 
erspan_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1415 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1416 			     struct nlattr *data[],
1417 			     struct netlink_ext_ack *extack)
1418 {
1419 	struct ip_tunnel *t = netdev_priv(dev);
1420 	__u32 fwmark = t->fwmark;
1421 	struct ip_tunnel_parm p;
1422 	int err;
1423 
1424 	err = ipgre_newlink_encap_setup(dev, data);
1425 	if (err)
1426 		return err;
1427 
1428 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1429 	if (err < 0)
1430 		return err;
1431 
1432 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1433 	if (err < 0)
1434 		return err;
1435 
1436 	t->parms.i_flags = p.i_flags;
1437 	t->parms.o_flags = p.o_flags;
1438 
1439 	return 0;
1440 }
1441 
ipgre_get_size(const struct net_device * dev)1442 static size_t ipgre_get_size(const struct net_device *dev)
1443 {
1444 	return
1445 		/* IFLA_GRE_LINK */
1446 		nla_total_size(4) +
1447 		/* IFLA_GRE_IFLAGS */
1448 		nla_total_size(2) +
1449 		/* IFLA_GRE_OFLAGS */
1450 		nla_total_size(2) +
1451 		/* IFLA_GRE_IKEY */
1452 		nla_total_size(4) +
1453 		/* IFLA_GRE_OKEY */
1454 		nla_total_size(4) +
1455 		/* IFLA_GRE_LOCAL */
1456 		nla_total_size(4) +
1457 		/* IFLA_GRE_REMOTE */
1458 		nla_total_size(4) +
1459 		/* IFLA_GRE_TTL */
1460 		nla_total_size(1) +
1461 		/* IFLA_GRE_TOS */
1462 		nla_total_size(1) +
1463 		/* IFLA_GRE_PMTUDISC */
1464 		nla_total_size(1) +
1465 		/* IFLA_GRE_ENCAP_TYPE */
1466 		nla_total_size(2) +
1467 		/* IFLA_GRE_ENCAP_FLAGS */
1468 		nla_total_size(2) +
1469 		/* IFLA_GRE_ENCAP_SPORT */
1470 		nla_total_size(2) +
1471 		/* IFLA_GRE_ENCAP_DPORT */
1472 		nla_total_size(2) +
1473 		/* IFLA_GRE_COLLECT_METADATA */
1474 		nla_total_size(0) +
1475 		/* IFLA_GRE_IGNORE_DF */
1476 		nla_total_size(1) +
1477 		/* IFLA_GRE_FWMARK */
1478 		nla_total_size(4) +
1479 		/* IFLA_GRE_ERSPAN_INDEX */
1480 		nla_total_size(4) +
1481 		/* IFLA_GRE_ERSPAN_VER */
1482 		nla_total_size(1) +
1483 		/* IFLA_GRE_ERSPAN_DIR */
1484 		nla_total_size(1) +
1485 		/* IFLA_GRE_ERSPAN_HWID */
1486 		nla_total_size(2) +
1487 		0;
1488 }
1489 
ipgre_fill_info(struct sk_buff * skb,const struct net_device * dev)1490 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1491 {
1492 	struct ip_tunnel *t = netdev_priv(dev);
1493 	struct ip_tunnel_parm *p = &t->parms;
1494 	__be16 o_flags = p->o_flags;
1495 
1496 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1497 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1498 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1499 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1500 			 gre_tnl_flags_to_gre_flags(o_flags)) ||
1501 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1502 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1503 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1504 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1505 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1506 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1507 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1508 		       !!(p->iph.frag_off & htons(IP_DF))) ||
1509 	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1510 		goto nla_put_failure;
1511 
1512 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1513 			t->encap.type) ||
1514 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1515 			 t->encap.sport) ||
1516 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1517 			 t->encap.dport) ||
1518 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1519 			t->encap.flags))
1520 		goto nla_put_failure;
1521 
1522 	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1523 		goto nla_put_failure;
1524 
1525 	if (t->collect_md) {
1526 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1527 			goto nla_put_failure;
1528 	}
1529 
1530 	return 0;
1531 
1532 nla_put_failure:
1533 	return -EMSGSIZE;
1534 }
1535 
erspan_fill_info(struct sk_buff * skb,const struct net_device * dev)1536 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1537 {
1538 	struct ip_tunnel *t = netdev_priv(dev);
1539 
1540 	if (t->erspan_ver <= 2) {
1541 		if (t->erspan_ver != 0 && !t->collect_md)
1542 			t->parms.o_flags |= TUNNEL_KEY;
1543 
1544 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1545 			goto nla_put_failure;
1546 
1547 		if (t->erspan_ver == 1) {
1548 			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1549 				goto nla_put_failure;
1550 		} else if (t->erspan_ver == 2) {
1551 			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1552 				goto nla_put_failure;
1553 			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1554 				goto nla_put_failure;
1555 		}
1556 	}
1557 
1558 	return ipgre_fill_info(skb, dev);
1559 
1560 nla_put_failure:
1561 	return -EMSGSIZE;
1562 }
1563 
erspan_setup(struct net_device * dev)1564 static void erspan_setup(struct net_device *dev)
1565 {
1566 	struct ip_tunnel *t = netdev_priv(dev);
1567 
1568 	ether_setup(dev);
1569 	dev->max_mtu = 0;
1570 	dev->netdev_ops = &erspan_netdev_ops;
1571 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1572 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1573 	ip_tunnel_setup(dev, erspan_net_id);
1574 	t->erspan_ver = 1;
1575 }
1576 
1577 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1578 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1579 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1580 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1581 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1582 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1583 	[IFLA_GRE_LOCAL]	= { .len = sizeof_field(struct iphdr, saddr) },
1584 	[IFLA_GRE_REMOTE]	= { .len = sizeof_field(struct iphdr, daddr) },
1585 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1586 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1587 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1588 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1589 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1590 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1591 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1592 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1593 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1594 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1595 	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1596 	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
1597 	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
1598 	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
1599 };
1600 
1601 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1602 	.kind		= "gre",
1603 	.maxtype	= IFLA_GRE_MAX,
1604 	.policy		= ipgre_policy,
1605 	.priv_size	= sizeof(struct ip_tunnel),
1606 	.setup		= ipgre_tunnel_setup,
1607 	.validate	= ipgre_tunnel_validate,
1608 	.newlink	= ipgre_newlink,
1609 	.changelink	= ipgre_changelink,
1610 	.dellink	= ip_tunnel_dellink,
1611 	.get_size	= ipgre_get_size,
1612 	.fill_info	= ipgre_fill_info,
1613 	.get_link_net	= ip_tunnel_get_link_net,
1614 };
1615 
1616 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1617 	.kind		= "gretap",
1618 	.maxtype	= IFLA_GRE_MAX,
1619 	.policy		= ipgre_policy,
1620 	.priv_size	= sizeof(struct ip_tunnel),
1621 	.setup		= ipgre_tap_setup,
1622 	.validate	= ipgre_tap_validate,
1623 	.newlink	= ipgre_newlink,
1624 	.changelink	= ipgre_changelink,
1625 	.dellink	= ip_tunnel_dellink,
1626 	.get_size	= ipgre_get_size,
1627 	.fill_info	= ipgre_fill_info,
1628 	.get_link_net	= ip_tunnel_get_link_net,
1629 };
1630 
1631 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1632 	.kind		= "erspan",
1633 	.maxtype	= IFLA_GRE_MAX,
1634 	.policy		= ipgre_policy,
1635 	.priv_size	= sizeof(struct ip_tunnel),
1636 	.setup		= erspan_setup,
1637 	.validate	= erspan_validate,
1638 	.newlink	= erspan_newlink,
1639 	.changelink	= erspan_changelink,
1640 	.dellink	= ip_tunnel_dellink,
1641 	.get_size	= ipgre_get_size,
1642 	.fill_info	= erspan_fill_info,
1643 	.get_link_net	= ip_tunnel_get_link_net,
1644 };
1645 
gretap_fb_dev_create(struct net * net,const char * name,u8 name_assign_type)1646 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1647 					u8 name_assign_type)
1648 {
1649 	struct nlattr *tb[IFLA_MAX + 1];
1650 	struct net_device *dev;
1651 	LIST_HEAD(list_kill);
1652 	struct ip_tunnel *t;
1653 	int err;
1654 
1655 	memset(&tb, 0, sizeof(tb));
1656 
1657 	dev = rtnl_create_link(net, name, name_assign_type,
1658 			       &ipgre_tap_ops, tb, NULL);
1659 	if (IS_ERR(dev))
1660 		return dev;
1661 
1662 	/* Configure flow based GRE device. */
1663 	t = netdev_priv(dev);
1664 	t->collect_md = true;
1665 
1666 	err = ipgre_newlink(net, dev, tb, NULL, NULL);
1667 	if (err < 0) {
1668 		free_netdev(dev);
1669 		return ERR_PTR(err);
1670 	}
1671 
1672 	/* openvswitch users expect packet sizes to be unrestricted,
1673 	 * so set the largest MTU we can.
1674 	 */
1675 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1676 	if (err)
1677 		goto out;
1678 
1679 	err = rtnl_configure_link(dev, NULL);
1680 	if (err < 0)
1681 		goto out;
1682 
1683 	return dev;
1684 out:
1685 	ip_tunnel_dellink(dev, &list_kill);
1686 	unregister_netdevice_many(&list_kill);
1687 	return ERR_PTR(err);
1688 }
1689 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1690 
ipgre_tap_init_net(struct net * net)1691 static int __net_init ipgre_tap_init_net(struct net *net)
1692 {
1693 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1694 }
1695 
ipgre_tap_exit_batch_net(struct list_head * list_net)1696 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1697 {
1698 	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1699 }
1700 
1701 static struct pernet_operations ipgre_tap_net_ops = {
1702 	.init = ipgre_tap_init_net,
1703 	.exit_batch = ipgre_tap_exit_batch_net,
1704 	.id   = &gre_tap_net_id,
1705 	.size = sizeof(struct ip_tunnel_net),
1706 };
1707 
erspan_init_net(struct net * net)1708 static int __net_init erspan_init_net(struct net *net)
1709 {
1710 	return ip_tunnel_init_net(net, erspan_net_id,
1711 				  &erspan_link_ops, "erspan0");
1712 }
1713 
erspan_exit_batch_net(struct list_head * net_list)1714 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1715 {
1716 	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1717 }
1718 
1719 static struct pernet_operations erspan_net_ops = {
1720 	.init = erspan_init_net,
1721 	.exit_batch = erspan_exit_batch_net,
1722 	.id   = &erspan_net_id,
1723 	.size = sizeof(struct ip_tunnel_net),
1724 };
1725 
ipgre_init(void)1726 static int __init ipgre_init(void)
1727 {
1728 	int err;
1729 
1730 	pr_info("GRE over IPv4 tunneling driver\n");
1731 
1732 	err = register_pernet_device(&ipgre_net_ops);
1733 	if (err < 0)
1734 		return err;
1735 
1736 	err = register_pernet_device(&ipgre_tap_net_ops);
1737 	if (err < 0)
1738 		goto pnet_tap_failed;
1739 
1740 	err = register_pernet_device(&erspan_net_ops);
1741 	if (err < 0)
1742 		goto pnet_erspan_failed;
1743 
1744 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1745 	if (err < 0) {
1746 		pr_info("%s: can't add protocol\n", __func__);
1747 		goto add_proto_failed;
1748 	}
1749 
1750 	err = rtnl_link_register(&ipgre_link_ops);
1751 	if (err < 0)
1752 		goto rtnl_link_failed;
1753 
1754 	err = rtnl_link_register(&ipgre_tap_ops);
1755 	if (err < 0)
1756 		goto tap_ops_failed;
1757 
1758 	err = rtnl_link_register(&erspan_link_ops);
1759 	if (err < 0)
1760 		goto erspan_link_failed;
1761 
1762 	return 0;
1763 
1764 erspan_link_failed:
1765 	rtnl_link_unregister(&ipgre_tap_ops);
1766 tap_ops_failed:
1767 	rtnl_link_unregister(&ipgre_link_ops);
1768 rtnl_link_failed:
1769 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1770 add_proto_failed:
1771 	unregister_pernet_device(&erspan_net_ops);
1772 pnet_erspan_failed:
1773 	unregister_pernet_device(&ipgre_tap_net_ops);
1774 pnet_tap_failed:
1775 	unregister_pernet_device(&ipgre_net_ops);
1776 	return err;
1777 }
1778 
ipgre_fini(void)1779 static void __exit ipgre_fini(void)
1780 {
1781 	rtnl_link_unregister(&ipgre_tap_ops);
1782 	rtnl_link_unregister(&ipgre_link_ops);
1783 	rtnl_link_unregister(&erspan_link_ops);
1784 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1785 	unregister_pernet_device(&ipgre_tap_net_ops);
1786 	unregister_pernet_device(&ipgre_net_ops);
1787 	unregister_pernet_device(&erspan_net_ops);
1788 }
1789 
1790 module_init(ipgre_init);
1791 module_exit(ipgre_fini);
1792 MODULE_LICENSE("GPL");
1793 MODULE_ALIAS_RTNL_LINK("gre");
1794 MODULE_ALIAS_RTNL_LINK("gretap");
1795 MODULE_ALIAS_RTNL_LINK("erspan");
1796 MODULE_ALIAS_NETDEV("gre0");
1797 MODULE_ALIAS_NETDEV("gretap0");
1798 MODULE_ALIAS_NETDEV("erspan0");
1799