1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux NET3:	GRE over IP protocol decoder.
4  *
5  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/gre.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
47 #include <net/inet_dscp.h>
48 
49 /*
50    Problems & solutions
51    --------------------
52 
53    1. The most important issue is detecting local dead loops.
54    They would cause complete host lockup in transmit, which
55    would be "resolved" by stack overflow or, if queueing is enabled,
56    with infinite looping in net_bh.
57 
58    We cannot track such dead loops during route installation,
59    it is infeasible task. The most general solutions would be
60    to keep skb->encapsulation counter (sort of local ttl),
61    and silently drop packet when it expires. It is a good
62    solution, but it supposes maintaining new variable in ALL
63    skb, even if no tunneling is used.
64 
65    Current solution: xmit_recursion breaks dead loops. This is a percpu
66    counter, since when we enter the first ndo_xmit(), cpu migration is
67    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
68 
69    2. Networking dead loops would not kill routers, but would really
70    kill network. IP hop limit plays role of "t->recursion" in this case,
71    if we copy it from packet being encapsulated to upper header.
72    It is very good solution, but it introduces two problems:
73 
74    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
75      do not work over tunnels.
76    - traceroute does not work. I planned to relay ICMP from tunnel,
77      so that this problem would be solved and traceroute output
78      would even more informative. This idea appeared to be wrong:
79      only Linux complies to rfc1812 now (yes, guys, Linux is the only
80      true router now :-)), all routers (at least, in neighbourhood of mine)
81      return only 8 bytes of payload. It is the end.
82 
83    Hence, if we want that OSPF worked or traceroute said something reasonable,
84    we should search for another solution.
85 
86    One of them is to parse packet trying to detect inner encapsulation
87    made by our node. It is difficult or even impossible, especially,
88    taking into account fragmentation. TO be short, ttl is not solution at all.
89 
90    Current solution: The solution was UNEXPECTEDLY SIMPLE.
91    We force DF flag on tunnels with preconfigured hop limit,
92    that is ALL. :-) Well, it does not remove the problem completely,
93    but exponential growth of network traffic is changed to linear
94    (branches, that exceed pmtu are pruned) and tunnel mtu
95    rapidly degrades to value <68, where looping stops.
96    Yes, it is not good if there exists a router in the loop,
97    which does not force DF, even when encapsulating packets have DF set.
98    But it is not our problem! Nobody could accuse us, we made
99    all that we could make. Even if it is your gated who injected
100    fatal route to network, even if it were you who configured
101    fatal static route: you are innocent. :-)
102 
103    Alexey Kuznetsov.
104  */
105 
106 static bool log_ecn_error = true;
107 module_param(log_ecn_error, bool, 0644);
108 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
109 
110 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
111 static const struct header_ops ipgre_header_ops;
112 
113 static int ipgre_tunnel_init(struct net_device *dev);
114 static void erspan_build_header(struct sk_buff *skb,
115 				u32 id, u32 index,
116 				bool truncate, bool is_ipv4);
117 
118 static unsigned int ipgre_net_id __read_mostly;
119 static unsigned int gre_tap_net_id __read_mostly;
120 static unsigned int erspan_net_id __read_mostly;
121 
ipgre_err(struct sk_buff * skb,u32 info,const struct tnl_ptk_info * tpi)122 static int ipgre_err(struct sk_buff *skb, u32 info,
123 		     const struct tnl_ptk_info *tpi)
124 {
125 
126 	/* All the routers (except for Linux) return only
127 	   8 bytes of packet payload. It means, that precise relaying of
128 	   ICMP in the real Internet is absolutely infeasible.
129 
130 	   Moreover, Cisco "wise men" put GRE key to the third word
131 	   in GRE header. It makes impossible maintaining even soft
132 	   state for keyed GRE tunnels with enabled checksum. Tell
133 	   them "thank you".
134 
135 	   Well, I wonder, rfc1812 was written by Cisco employee,
136 	   what the hell these idiots break standards established
137 	   by themselves???
138 	   */
139 	struct net *net = dev_net(skb->dev);
140 	struct ip_tunnel_net *itn;
141 	const struct iphdr *iph;
142 	const int type = icmp_hdr(skb)->type;
143 	const int code = icmp_hdr(skb)->code;
144 	struct ip_tunnel *t;
145 
146 	if (tpi->proto == htons(ETH_P_TEB))
147 		itn = net_generic(net, gre_tap_net_id);
148 	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149 		 tpi->proto == htons(ETH_P_ERSPAN2))
150 		itn = net_generic(net, erspan_net_id);
151 	else
152 		itn = net_generic(net, ipgre_net_id);
153 
154 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156 			     iph->daddr, iph->saddr, tpi->key);
157 
158 	if (!t)
159 		return -ENOENT;
160 
161 	switch (type) {
162 	default:
163 	case ICMP_PARAMETERPROB:
164 		return 0;
165 
166 	case ICMP_DEST_UNREACH:
167 		switch (code) {
168 		case ICMP_SR_FAILED:
169 		case ICMP_PORT_UNREACH:
170 			/* Impossible event. */
171 			return 0;
172 		default:
173 			/* All others are translated to HOST_UNREACH.
174 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
175 			   I believe they are just ether pollution. --ANK
176 			 */
177 			break;
178 		}
179 		break;
180 
181 	case ICMP_TIME_EXCEEDED:
182 		if (code != ICMP_EXC_TTL)
183 			return 0;
184 		break;
185 
186 	case ICMP_REDIRECT:
187 		break;
188 	}
189 
190 #if IS_ENABLED(CONFIG_IPV6)
191 	if (tpi->proto == htons(ETH_P_IPV6)) {
192 		unsigned int data_len = 0;
193 
194 		if (type == ICMP_TIME_EXCEEDED)
195 			data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
196 
197 		if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
198 						type, data_len))
199 			return 0;
200 	}
201 #endif
202 
203 	if (t->parms.iph.daddr == 0 ||
204 	    ipv4_is_multicast(t->parms.iph.daddr))
205 		return 0;
206 
207 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
208 		return 0;
209 
210 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
211 		t->err_count++;
212 	else
213 		t->err_count = 1;
214 	t->err_time = jiffies;
215 
216 	return 0;
217 }
218 
gre_err(struct sk_buff * skb,u32 info)219 static void gre_err(struct sk_buff *skb, u32 info)
220 {
221 	/* All the routers (except for Linux) return only
222 	 * 8 bytes of packet payload. It means, that precise relaying of
223 	 * ICMP in the real Internet is absolutely infeasible.
224 	 *
225 	 * Moreover, Cisco "wise men" put GRE key to the third word
226 	 * in GRE header. It makes impossible maintaining even soft
227 	 * state for keyed
228 	 * GRE tunnels with enabled checksum. Tell them "thank you".
229 	 *
230 	 * Well, I wonder, rfc1812 was written by Cisco employee,
231 	 * what the hell these idiots break standards established
232 	 * by themselves???
233 	 */
234 
235 	const struct iphdr *iph = (struct iphdr *)skb->data;
236 	const int type = icmp_hdr(skb)->type;
237 	const int code = icmp_hdr(skb)->code;
238 	struct tnl_ptk_info tpi;
239 
240 	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
241 			     iph->ihl * 4) < 0)
242 		return;
243 
244 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
245 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
246 				 skb->dev->ifindex, IPPROTO_GRE);
247 		return;
248 	}
249 	if (type == ICMP_REDIRECT) {
250 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
251 			      IPPROTO_GRE);
252 		return;
253 	}
254 
255 	ipgre_err(skb, info, &tpi);
256 }
257 
is_erspan_type1(int gre_hdr_len)258 static bool is_erspan_type1(int gre_hdr_len)
259 {
260 	/* Both ERSPAN type I (version 0) and type II (version 1) use
261 	 * protocol 0x88BE, but the type I has only 4-byte GRE header,
262 	 * while type II has 8-byte.
263 	 */
264 	return gre_hdr_len == 4;
265 }
266 
erspan_rcv(struct sk_buff * skb,struct tnl_ptk_info * tpi,int gre_hdr_len)267 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
268 		      int gre_hdr_len)
269 {
270 	struct net *net = dev_net(skb->dev);
271 	struct metadata_dst *tun_dst = NULL;
272 	struct erspan_base_hdr *ershdr;
273 	IP_TUNNEL_DECLARE_FLAGS(flags);
274 	struct ip_tunnel_net *itn;
275 	struct ip_tunnel *tunnel;
276 	const struct iphdr *iph;
277 	struct erspan_md2 *md2;
278 	int ver;
279 	int len;
280 
281 	ip_tunnel_flags_copy(flags, tpi->flags);
282 
283 	itn = net_generic(net, erspan_net_id);
284 	iph = ip_hdr(skb);
285 	if (is_erspan_type1(gre_hdr_len)) {
286 		ver = 0;
287 		__set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
288 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
289 					  iph->saddr, iph->daddr, 0);
290 	} else {
291 		if (unlikely(!pskb_may_pull(skb,
292 					    gre_hdr_len + sizeof(*ershdr))))
293 			return PACKET_REJECT;
294 
295 		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
296 		ver = ershdr->ver;
297 		iph = ip_hdr(skb);
298 		__set_bit(IP_TUNNEL_KEY_BIT, flags);
299 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
300 					  iph->saddr, iph->daddr, tpi->key);
301 	}
302 
303 	if (tunnel) {
304 		if (is_erspan_type1(gre_hdr_len))
305 			len = gre_hdr_len;
306 		else
307 			len = gre_hdr_len + erspan_hdr_len(ver);
308 
309 		if (unlikely(!pskb_may_pull(skb, len)))
310 			return PACKET_REJECT;
311 
312 		if (__iptunnel_pull_header(skb,
313 					   len,
314 					   htons(ETH_P_TEB),
315 					   false, false) < 0)
316 			goto drop;
317 
318 		if (tunnel->collect_md) {
319 			struct erspan_metadata *pkt_md, *md;
320 			struct ip_tunnel_info *info;
321 			unsigned char *gh;
322 			__be64 tun_id;
323 
324 			__set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
325 			ip_tunnel_flags_copy(flags, tpi->flags);
326 			tun_id = key32_to_tunnel_id(tpi->key);
327 
328 			tun_dst = ip_tun_rx_dst(skb, flags,
329 						tun_id, sizeof(*md));
330 			if (!tun_dst)
331 				return PACKET_REJECT;
332 
333 			/* skb can be uncloned in __iptunnel_pull_header, so
334 			 * old pkt_md is no longer valid and we need to reset
335 			 * it
336 			 */
337 			gh = skb_network_header(skb) +
338 			     skb_network_header_len(skb);
339 			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
340 							    sizeof(*ershdr));
341 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
342 			md->version = ver;
343 			md2 = &md->u.md2;
344 			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
345 						       ERSPAN_V2_MDSIZE);
346 
347 			info = &tun_dst->u.tun_info;
348 			__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
349 				  info->key.tun_flags);
350 			info->options_len = sizeof(*md);
351 		}
352 
353 		skb_reset_mac_header(skb);
354 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
355 		return PACKET_RCVD;
356 	}
357 	return PACKET_REJECT;
358 
359 drop:
360 	kfree_skb(skb);
361 	return PACKET_RCVD;
362 }
363 
__ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct ip_tunnel_net * itn,int hdr_len,bool raw_proto)364 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
365 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
366 {
367 	struct metadata_dst *tun_dst = NULL;
368 	const struct iphdr *iph;
369 	struct ip_tunnel *tunnel;
370 
371 	iph = ip_hdr(skb);
372 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
373 				  iph->saddr, iph->daddr, tpi->key);
374 
375 	if (tunnel) {
376 		const struct iphdr *tnl_params;
377 
378 		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
379 					   raw_proto, false) < 0)
380 			goto drop;
381 
382 		/* Special case for ipgre_header_parse(), which expects the
383 		 * mac_header to point to the outer IP header.
384 		 */
385 		if (tunnel->dev->header_ops == &ipgre_header_ops)
386 			skb_pop_mac_header(skb);
387 		else
388 			skb_reset_mac_header(skb);
389 
390 		tnl_params = &tunnel->parms.iph;
391 		if (tunnel->collect_md || tnl_params->daddr == 0) {
392 			IP_TUNNEL_DECLARE_FLAGS(flags) = { };
393 			__be64 tun_id;
394 
395 			__set_bit(IP_TUNNEL_CSUM_BIT, flags);
396 			__set_bit(IP_TUNNEL_KEY_BIT, flags);
397 			ip_tunnel_flags_and(flags, tpi->flags, flags);
398 
399 			tun_id = key32_to_tunnel_id(tpi->key);
400 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
401 			if (!tun_dst)
402 				return PACKET_REJECT;
403 		}
404 
405 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
406 		return PACKET_RCVD;
407 	}
408 	return PACKET_NEXT;
409 
410 drop:
411 	kfree_skb(skb);
412 	return PACKET_RCVD;
413 }
414 
ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,int hdr_len)415 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
416 		     int hdr_len)
417 {
418 	struct net *net = dev_net(skb->dev);
419 	struct ip_tunnel_net *itn;
420 	int res;
421 
422 	if (tpi->proto == htons(ETH_P_TEB))
423 		itn = net_generic(net, gre_tap_net_id);
424 	else
425 		itn = net_generic(net, ipgre_net_id);
426 
427 	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
428 	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
429 		/* ipgre tunnels in collect metadata mode should receive
430 		 * also ETH_P_TEB traffic.
431 		 */
432 		itn = net_generic(net, ipgre_net_id);
433 		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
434 	}
435 	return res;
436 }
437 
gre_rcv(struct sk_buff * skb)438 static int gre_rcv(struct sk_buff *skb)
439 {
440 	struct tnl_ptk_info tpi;
441 	bool csum_err = false;
442 	int hdr_len;
443 
444 #ifdef CONFIG_NET_IPGRE_BROADCAST
445 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
446 		/* Looped back packet, drop it! */
447 		if (rt_is_output_route(skb_rtable(skb)))
448 			goto drop;
449 	}
450 #endif
451 
452 	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
453 	if (hdr_len < 0)
454 		goto drop;
455 
456 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
457 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
458 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
459 			return 0;
460 		goto out;
461 	}
462 
463 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
464 		return 0;
465 
466 out:
467 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
468 drop:
469 	kfree_skb(skb);
470 	return 0;
471 }
472 
__gre_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,__be16 proto)473 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
474 		       const struct iphdr *tnl_params,
475 		       __be16 proto)
476 {
477 	struct ip_tunnel *tunnel = netdev_priv(dev);
478 	IP_TUNNEL_DECLARE_FLAGS(flags);
479 
480 	ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
481 
482 	/* Push GRE header. */
483 	gre_build_header(skb, tunnel->tun_hlen,
484 			 flags, proto, tunnel->parms.o_key,
485 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
486 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
487 
488 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
489 }
490 
gre_handle_offloads(struct sk_buff * skb,bool csum)491 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
492 {
493 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
494 }
495 
gre_fb_xmit(struct sk_buff * skb,struct net_device * dev,__be16 proto)496 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
497 			__be16 proto)
498 {
499 	struct ip_tunnel *tunnel = netdev_priv(dev);
500 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
501 	struct ip_tunnel_info *tun_info;
502 	const struct ip_tunnel_key *key;
503 	int tunnel_hlen;
504 
505 	tun_info = skb_tunnel_info(skb);
506 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
507 		     ip_tunnel_info_af(tun_info) != AF_INET))
508 		goto err_free_skb;
509 
510 	key = &tun_info->key;
511 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
512 
513 	if (skb_cow_head(skb, dev->needed_headroom))
514 		goto err_free_skb;
515 
516 	/* Push Tunnel header. */
517 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
518 					      tunnel->parms.o_flags)))
519 		goto err_free_skb;
520 
521 	__set_bit(IP_TUNNEL_CSUM_BIT, flags);
522 	__set_bit(IP_TUNNEL_KEY_BIT, flags);
523 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
524 	ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags);
525 
526 	gre_build_header(skb, tunnel_hlen, flags, proto,
527 			 tunnel_id_to_key32(tun_info->key.tun_id),
528 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
529 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
530 
531 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
532 
533 	return;
534 
535 err_free_skb:
536 	kfree_skb(skb);
537 	DEV_STATS_INC(dev, tx_dropped);
538 }
539 
erspan_fb_xmit(struct sk_buff * skb,struct net_device * dev)540 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
541 {
542 	struct ip_tunnel *tunnel = netdev_priv(dev);
543 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
544 	struct ip_tunnel_info *tun_info;
545 	const struct ip_tunnel_key *key;
546 	struct erspan_metadata *md;
547 	bool truncate = false;
548 	__be16 proto;
549 	int tunnel_hlen;
550 	int version;
551 	int nhoff;
552 
553 	tun_info = skb_tunnel_info(skb);
554 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
555 		     ip_tunnel_info_af(tun_info) != AF_INET))
556 		goto err_free_skb;
557 
558 	key = &tun_info->key;
559 	if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
560 		goto err_free_skb;
561 	if (tun_info->options_len < sizeof(*md))
562 		goto err_free_skb;
563 	md = ip_tunnel_info_opts(tun_info);
564 
565 	/* ERSPAN has fixed 8 byte GRE header */
566 	version = md->version;
567 	tunnel_hlen = 8 + erspan_hdr_len(version);
568 
569 	if (skb_cow_head(skb, dev->needed_headroom))
570 		goto err_free_skb;
571 
572 	if (gre_handle_offloads(skb, false))
573 		goto err_free_skb;
574 
575 	if (skb->len > dev->mtu + dev->hard_header_len) {
576 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
577 			goto err_free_skb;
578 		truncate = true;
579 	}
580 
581 	nhoff = skb_network_offset(skb);
582 	if (skb->protocol == htons(ETH_P_IP) &&
583 	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
584 		truncate = true;
585 
586 	if (skb->protocol == htons(ETH_P_IPV6)) {
587 		int thoff;
588 
589 		if (skb_transport_header_was_set(skb))
590 			thoff = skb_transport_offset(skb);
591 		else
592 			thoff = nhoff + sizeof(struct ipv6hdr);
593 		if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
594 			truncate = true;
595 	}
596 
597 	if (version == 1) {
598 		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
599 				    ntohl(md->u.index), truncate, true);
600 		proto = htons(ETH_P_ERSPAN);
601 	} else if (version == 2) {
602 		erspan_build_header_v2(skb,
603 				       ntohl(tunnel_id_to_key32(key->tun_id)),
604 				       md->u.md2.dir,
605 				       get_hwid(&md->u.md2),
606 				       truncate, true);
607 		proto = htons(ETH_P_ERSPAN2);
608 	} else {
609 		goto err_free_skb;
610 	}
611 
612 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
613 	gre_build_header(skb, 8, flags, proto, 0,
614 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)));
615 
616 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
617 
618 	return;
619 
620 err_free_skb:
621 	kfree_skb(skb);
622 	DEV_STATS_INC(dev, tx_dropped);
623 }
624 
gre_fill_metadata_dst(struct net_device * dev,struct sk_buff * skb)625 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
626 {
627 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
628 	const struct ip_tunnel_key *key;
629 	struct rtable *rt;
630 	struct flowi4 fl4;
631 
632 	if (ip_tunnel_info_af(info) != AF_INET)
633 		return -EINVAL;
634 
635 	key = &info->key;
636 	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
637 			    tunnel_id_to_key32(key->tun_id),
638 			    key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
639 			    skb->mark, skb_get_hash(skb), key->flow_flags);
640 	rt = ip_route_output_key(dev_net(dev), &fl4);
641 	if (IS_ERR(rt))
642 		return PTR_ERR(rt);
643 
644 	ip_rt_put(rt);
645 	info->key.u.ipv4.src = fl4.saddr;
646 	return 0;
647 }
648 
ipgre_xmit(struct sk_buff * skb,struct net_device * dev)649 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
650 			      struct net_device *dev)
651 {
652 	struct ip_tunnel *tunnel = netdev_priv(dev);
653 	const struct iphdr *tnl_params;
654 
655 	if (!pskb_inet_may_pull(skb))
656 		goto free_skb;
657 
658 	if (tunnel->collect_md) {
659 		gre_fb_xmit(skb, dev, skb->protocol);
660 		return NETDEV_TX_OK;
661 	}
662 
663 	if (dev->header_ops) {
664 		int pull_len = tunnel->hlen + sizeof(struct iphdr);
665 
666 		if (skb_cow_head(skb, 0))
667 			goto free_skb;
668 
669 		if (!pskb_may_pull(skb, pull_len))
670 			goto free_skb;
671 
672 		tnl_params = (const struct iphdr *)skb->data;
673 
674 		/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
675 		skb_pull(skb, pull_len);
676 		skb_reset_mac_header(skb);
677 
678 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
679 		    skb_checksum_start(skb) < skb->data)
680 			goto free_skb;
681 	} else {
682 		if (skb_cow_head(skb, dev->needed_headroom))
683 			goto free_skb;
684 
685 		tnl_params = &tunnel->parms.iph;
686 	}
687 
688 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
689 					      tunnel->parms.o_flags)))
690 		goto free_skb;
691 
692 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
693 	return NETDEV_TX_OK;
694 
695 free_skb:
696 	kfree_skb(skb);
697 	DEV_STATS_INC(dev, tx_dropped);
698 	return NETDEV_TX_OK;
699 }
700 
erspan_xmit(struct sk_buff * skb,struct net_device * dev)701 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
702 			       struct net_device *dev)
703 {
704 	struct ip_tunnel *tunnel = netdev_priv(dev);
705 	bool truncate = false;
706 	__be16 proto;
707 
708 	if (!pskb_inet_may_pull(skb))
709 		goto free_skb;
710 
711 	if (tunnel->collect_md) {
712 		erspan_fb_xmit(skb, dev);
713 		return NETDEV_TX_OK;
714 	}
715 
716 	if (gre_handle_offloads(skb, false))
717 		goto free_skb;
718 
719 	if (skb_cow_head(skb, dev->needed_headroom))
720 		goto free_skb;
721 
722 	if (skb->len > dev->mtu + dev->hard_header_len) {
723 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
724 			goto free_skb;
725 		truncate = true;
726 	}
727 
728 	/* Push ERSPAN header */
729 	if (tunnel->erspan_ver == 0) {
730 		proto = htons(ETH_P_ERSPAN);
731 		__clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags);
732 	} else if (tunnel->erspan_ver == 1) {
733 		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
734 				    tunnel->index,
735 				    truncate, true);
736 		proto = htons(ETH_P_ERSPAN);
737 	} else if (tunnel->erspan_ver == 2) {
738 		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
739 				       tunnel->dir, tunnel->hwid,
740 				       truncate, true);
741 		proto = htons(ETH_P_ERSPAN2);
742 	} else {
743 		goto free_skb;
744 	}
745 
746 	__clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags);
747 	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
748 	return NETDEV_TX_OK;
749 
750 free_skb:
751 	kfree_skb(skb);
752 	DEV_STATS_INC(dev, tx_dropped);
753 	return NETDEV_TX_OK;
754 }
755 
gre_tap_xmit(struct sk_buff * skb,struct net_device * dev)756 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
757 				struct net_device *dev)
758 {
759 	struct ip_tunnel *tunnel = netdev_priv(dev);
760 
761 	if (!pskb_inet_may_pull(skb))
762 		goto free_skb;
763 
764 	if (tunnel->collect_md) {
765 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
766 		return NETDEV_TX_OK;
767 	}
768 
769 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
770 					      tunnel->parms.o_flags)))
771 		goto free_skb;
772 
773 	if (skb_cow_head(skb, dev->needed_headroom))
774 		goto free_skb;
775 
776 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
777 	return NETDEV_TX_OK;
778 
779 free_skb:
780 	kfree_skb(skb);
781 	DEV_STATS_INC(dev, tx_dropped);
782 	return NETDEV_TX_OK;
783 }
784 
ipgre_link_update(struct net_device * dev,bool set_mtu)785 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
786 {
787 	struct ip_tunnel *tunnel = netdev_priv(dev);
788 	int len;
789 
790 	len = tunnel->tun_hlen;
791 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
792 	len = tunnel->tun_hlen - len;
793 	tunnel->hlen = tunnel->hlen + len;
794 
795 	if (dev->header_ops)
796 		dev->hard_header_len += len;
797 	else
798 		dev->needed_headroom += len;
799 
800 	if (set_mtu)
801 		WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68));
802 
803 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
804 	    (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
805 	     tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
806 		dev->features &= ~NETIF_F_GSO_SOFTWARE;
807 		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
808 	} else {
809 		dev->features |= NETIF_F_GSO_SOFTWARE;
810 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
811 	}
812 }
813 
ipgre_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm_kern * p,int cmd)814 static int ipgre_tunnel_ctl(struct net_device *dev,
815 			    struct ip_tunnel_parm_kern *p,
816 			    int cmd)
817 {
818 	__be16 i_flags, o_flags;
819 	int err;
820 
821 	if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
822 	    !ip_tunnel_flags_is_be16_compat(p->o_flags))
823 		return -EOVERFLOW;
824 
825 	i_flags = ip_tunnel_flags_to_be16(p->i_flags);
826 	o_flags = ip_tunnel_flags_to_be16(p->o_flags);
827 
828 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
829 		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
830 		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
831 		    ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING)))
832 			return -EINVAL;
833 	}
834 
835 	gre_flags_to_tnl_flags(p->i_flags, i_flags);
836 	gre_flags_to_tnl_flags(p->o_flags, o_flags);
837 
838 	err = ip_tunnel_ctl(dev, p, cmd);
839 	if (err)
840 		return err;
841 
842 	if (cmd == SIOCCHGTUNNEL) {
843 		struct ip_tunnel *t = netdev_priv(dev);
844 
845 		ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
846 		ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
847 
848 		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
849 			ipgre_link_update(dev, true);
850 	}
851 
852 	i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
853 	ip_tunnel_flags_from_be16(p->i_flags, i_flags);
854 	o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
855 	ip_tunnel_flags_from_be16(p->o_flags, o_flags);
856 
857 	return 0;
858 }
859 
860 /* Nice toy. Unfortunately, useless in real life :-)
861    It allows to construct virtual multiprotocol broadcast "LAN"
862    over the Internet, provided multicast routing is tuned.
863 
864 
865    I have no idea was this bicycle invented before me,
866    so that I had to set ARPHRD_IPGRE to a random value.
867    I have an impression, that Cisco could make something similar,
868    but this feature is apparently missing in IOS<=11.2(8).
869 
870    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
871    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
872 
873    ping -t 255 224.66.66.66
874 
875    If nobody answers, mbone does not work.
876 
877    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
878    ip addr add 10.66.66.<somewhat>/24 dev Universe
879    ifconfig Universe up
880    ifconfig Universe add fe80::<Your_real_addr>/10
881    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
882    ftp 10.66.66.66
883    ...
884    ftp fec0:6666:6666::193.233.7.65
885    ...
886  */
ipgre_header(struct sk_buff * skb,struct net_device * dev,unsigned short type,const void * daddr,const void * saddr,unsigned int len)887 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
888 			unsigned short type,
889 			const void *daddr, const void *saddr, unsigned int len)
890 {
891 	struct ip_tunnel *t = netdev_priv(dev);
892 	struct iphdr *iph;
893 	struct gre_base_hdr *greh;
894 
895 	iph = skb_push(skb, t->hlen + sizeof(*iph));
896 	greh = (struct gre_base_hdr *)(iph+1);
897 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
898 	greh->protocol = htons(type);
899 
900 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
901 
902 	/* Set the source hardware address. */
903 	if (saddr)
904 		memcpy(&iph->saddr, saddr, 4);
905 	if (daddr)
906 		memcpy(&iph->daddr, daddr, 4);
907 	if (iph->daddr)
908 		return t->hlen + sizeof(*iph);
909 
910 	return -(t->hlen + sizeof(*iph));
911 }
912 
ipgre_header_parse(const struct sk_buff * skb,unsigned char * haddr)913 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
914 {
915 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
916 	memcpy(haddr, &iph->saddr, 4);
917 	return 4;
918 }
919 
920 static const struct header_ops ipgre_header_ops = {
921 	.create	= ipgre_header,
922 	.parse	= ipgre_header_parse,
923 };
924 
925 #ifdef CONFIG_NET_IPGRE_BROADCAST
ipgre_open(struct net_device * dev)926 static int ipgre_open(struct net_device *dev)
927 {
928 	struct ip_tunnel *t = netdev_priv(dev);
929 
930 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
931 		struct flowi4 fl4;
932 		struct rtable *rt;
933 
934 		rt = ip_route_output_gre(t->net, &fl4,
935 					 t->parms.iph.daddr,
936 					 t->parms.iph.saddr,
937 					 t->parms.o_key,
938 					 t->parms.iph.tos & INET_DSCP_MASK,
939 					 t->parms.link);
940 		if (IS_ERR(rt))
941 			return -EADDRNOTAVAIL;
942 		dev = rt->dst.dev;
943 		ip_rt_put(rt);
944 		if (!__in_dev_get_rtnl(dev))
945 			return -EADDRNOTAVAIL;
946 		t->mlink = dev->ifindex;
947 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
948 	}
949 	return 0;
950 }
951 
ipgre_close(struct net_device * dev)952 static int ipgre_close(struct net_device *dev)
953 {
954 	struct ip_tunnel *t = netdev_priv(dev);
955 
956 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
957 		struct in_device *in_dev;
958 		in_dev = inetdev_by_index(t->net, t->mlink);
959 		if (in_dev)
960 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
961 	}
962 	return 0;
963 }
964 #endif
965 
966 static const struct net_device_ops ipgre_netdev_ops = {
967 	.ndo_init		= ipgre_tunnel_init,
968 	.ndo_uninit		= ip_tunnel_uninit,
969 #ifdef CONFIG_NET_IPGRE_BROADCAST
970 	.ndo_open		= ipgre_open,
971 	.ndo_stop		= ipgre_close,
972 #endif
973 	.ndo_start_xmit		= ipgre_xmit,
974 	.ndo_siocdevprivate	= ip_tunnel_siocdevprivate,
975 	.ndo_change_mtu		= ip_tunnel_change_mtu,
976 	.ndo_get_stats64	= dev_get_tstats64,
977 	.ndo_get_iflink		= ip_tunnel_get_iflink,
978 	.ndo_tunnel_ctl		= ipgre_tunnel_ctl,
979 };
980 
981 #define GRE_FEATURES (NETIF_F_SG |		\
982 		      NETIF_F_FRAGLIST |	\
983 		      NETIF_F_HIGHDMA |		\
984 		      NETIF_F_HW_CSUM)
985 
ipgre_tunnel_setup(struct net_device * dev)986 static void ipgre_tunnel_setup(struct net_device *dev)
987 {
988 	dev->netdev_ops		= &ipgre_netdev_ops;
989 	dev->type		= ARPHRD_IPGRE;
990 	ip_tunnel_setup(dev, ipgre_net_id);
991 }
992 
__gre_tunnel_init(struct net_device * dev)993 static void __gre_tunnel_init(struct net_device *dev)
994 {
995 	struct ip_tunnel *tunnel;
996 
997 	tunnel = netdev_priv(dev);
998 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
999 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1000 
1001 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
1002 	dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
1003 
1004 	dev->features		|= GRE_FEATURES;
1005 	dev->hw_features	|= GRE_FEATURES;
1006 
1007 	/* TCP offload with GRE SEQ is not supported, nor can we support 2
1008 	 * levels of outer headers requiring an update.
1009 	 */
1010 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags))
1011 		return;
1012 	if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
1013 	    tunnel->encap.type != TUNNEL_ENCAP_NONE)
1014 		return;
1015 
1016 	dev->features |= NETIF_F_GSO_SOFTWARE;
1017 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1018 
1019 	dev->lltx = true;
1020 }
1021 
ipgre_tunnel_init(struct net_device * dev)1022 static int ipgre_tunnel_init(struct net_device *dev)
1023 {
1024 	struct ip_tunnel *tunnel = netdev_priv(dev);
1025 	struct iphdr *iph = &tunnel->parms.iph;
1026 
1027 	__gre_tunnel_init(dev);
1028 
1029 	__dev_addr_set(dev, &iph->saddr, 4);
1030 	memcpy(dev->broadcast, &iph->daddr, 4);
1031 
1032 	dev->flags		= IFF_NOARP;
1033 	netif_keep_dst(dev);
1034 	dev->addr_len		= 4;
1035 
1036 	if (iph->daddr && !tunnel->collect_md) {
1037 #ifdef CONFIG_NET_IPGRE_BROADCAST
1038 		if (ipv4_is_multicast(iph->daddr)) {
1039 			if (!iph->saddr)
1040 				return -EINVAL;
1041 			dev->flags = IFF_BROADCAST;
1042 			dev->header_ops = &ipgre_header_ops;
1043 			dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1044 			dev->needed_headroom = 0;
1045 		}
1046 #endif
1047 	} else if (!tunnel->collect_md) {
1048 		dev->header_ops = &ipgre_header_ops;
1049 		dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1050 		dev->needed_headroom = 0;
1051 	}
1052 
1053 	return ip_tunnel_init(dev);
1054 }
1055 
1056 static const struct gre_protocol ipgre_protocol = {
1057 	.handler     = gre_rcv,
1058 	.err_handler = gre_err,
1059 };
1060 
ipgre_init_net(struct net * net)1061 static int __net_init ipgre_init_net(struct net *net)
1062 {
1063 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1064 }
1065 
ipgre_exit_batch_rtnl(struct list_head * list_net,struct list_head * dev_to_kill)1066 static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net,
1067 					     struct list_head *dev_to_kill)
1068 {
1069 	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops,
1070 			      dev_to_kill);
1071 }
1072 
1073 static struct pernet_operations ipgre_net_ops = {
1074 	.init = ipgre_init_net,
1075 	.exit_batch_rtnl = ipgre_exit_batch_rtnl,
1076 	.id   = &ipgre_net_id,
1077 	.size = sizeof(struct ip_tunnel_net),
1078 };
1079 
ipgre_tunnel_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1080 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1081 				 struct netlink_ext_ack *extack)
1082 {
1083 	__be16 flags;
1084 
1085 	if (!data)
1086 		return 0;
1087 
1088 	flags = 0;
1089 	if (data[IFLA_GRE_IFLAGS])
1090 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1091 	if (data[IFLA_GRE_OFLAGS])
1092 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1093 	if (flags & (GRE_VERSION|GRE_ROUTING))
1094 		return -EINVAL;
1095 
1096 	if (data[IFLA_GRE_COLLECT_METADATA] &&
1097 	    data[IFLA_GRE_ENCAP_TYPE] &&
1098 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1099 		return -EINVAL;
1100 
1101 	return 0;
1102 }
1103 
ipgre_tap_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1104 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1105 			      struct netlink_ext_ack *extack)
1106 {
1107 	__be32 daddr;
1108 
1109 	if (tb[IFLA_ADDRESS]) {
1110 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1111 			return -EINVAL;
1112 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1113 			return -EADDRNOTAVAIL;
1114 	}
1115 
1116 	if (!data)
1117 		goto out;
1118 
1119 	if (data[IFLA_GRE_REMOTE]) {
1120 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1121 		if (!daddr)
1122 			return -EINVAL;
1123 	}
1124 
1125 out:
1126 	return ipgre_tunnel_validate(tb, data, extack);
1127 }
1128 
erspan_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1129 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1130 			   struct netlink_ext_ack *extack)
1131 {
1132 	__be16 flags = 0;
1133 	int ret;
1134 
1135 	if (!data)
1136 		return 0;
1137 
1138 	ret = ipgre_tap_validate(tb, data, extack);
1139 	if (ret)
1140 		return ret;
1141 
1142 	if (data[IFLA_GRE_ERSPAN_VER] &&
1143 	    nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1144 		return 0;
1145 
1146 	/* ERSPAN type II/III should only have GRE sequence and key flag */
1147 	if (data[IFLA_GRE_OFLAGS])
1148 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1149 	if (data[IFLA_GRE_IFLAGS])
1150 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1151 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1152 	    flags != (GRE_SEQ | GRE_KEY))
1153 		return -EINVAL;
1154 
1155 	/* ERSPAN Session ID only has 10-bit. Since we reuse
1156 	 * 32-bit key field as ID, check it's range.
1157 	 */
1158 	if (data[IFLA_GRE_IKEY] &&
1159 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1160 		return -EINVAL;
1161 
1162 	if (data[IFLA_GRE_OKEY] &&
1163 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1164 		return -EINVAL;
1165 
1166 	return 0;
1167 }
1168 
ipgre_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm_kern * parms,__u32 * fwmark)1169 static int ipgre_netlink_parms(struct net_device *dev,
1170 				struct nlattr *data[],
1171 				struct nlattr *tb[],
1172 				struct ip_tunnel_parm_kern *parms,
1173 				__u32 *fwmark)
1174 {
1175 	struct ip_tunnel *t = netdev_priv(dev);
1176 
1177 	memset(parms, 0, sizeof(*parms));
1178 
1179 	parms->iph.protocol = IPPROTO_GRE;
1180 
1181 	if (!data)
1182 		return 0;
1183 
1184 	if (data[IFLA_GRE_LINK])
1185 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1186 
1187 	if (data[IFLA_GRE_IFLAGS])
1188 		gre_flags_to_tnl_flags(parms->i_flags,
1189 				       nla_get_be16(data[IFLA_GRE_IFLAGS]));
1190 
1191 	if (data[IFLA_GRE_OFLAGS])
1192 		gre_flags_to_tnl_flags(parms->o_flags,
1193 				       nla_get_be16(data[IFLA_GRE_OFLAGS]));
1194 
1195 	if (data[IFLA_GRE_IKEY])
1196 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1197 
1198 	if (data[IFLA_GRE_OKEY])
1199 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1200 
1201 	if (data[IFLA_GRE_LOCAL])
1202 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1203 
1204 	if (data[IFLA_GRE_REMOTE])
1205 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1206 
1207 	if (data[IFLA_GRE_TTL])
1208 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1209 
1210 	if (data[IFLA_GRE_TOS])
1211 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1212 
1213 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1214 		if (t->ignore_df)
1215 			return -EINVAL;
1216 		parms->iph.frag_off = htons(IP_DF);
1217 	}
1218 
1219 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1220 		t->collect_md = true;
1221 		if (dev->type == ARPHRD_IPGRE)
1222 			dev->type = ARPHRD_NONE;
1223 	}
1224 
1225 	if (data[IFLA_GRE_IGNORE_DF]) {
1226 		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1227 		  && (parms->iph.frag_off & htons(IP_DF)))
1228 			return -EINVAL;
1229 		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1230 	}
1231 
1232 	if (data[IFLA_GRE_FWMARK])
1233 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1234 
1235 	return 0;
1236 }
1237 
erspan_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm_kern * parms,__u32 * fwmark)1238 static int erspan_netlink_parms(struct net_device *dev,
1239 				struct nlattr *data[],
1240 				struct nlattr *tb[],
1241 				struct ip_tunnel_parm_kern *parms,
1242 				__u32 *fwmark)
1243 {
1244 	struct ip_tunnel *t = netdev_priv(dev);
1245 	int err;
1246 
1247 	err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1248 	if (err)
1249 		return err;
1250 	if (!data)
1251 		return 0;
1252 
1253 	if (data[IFLA_GRE_ERSPAN_VER]) {
1254 		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1255 
1256 		if (t->erspan_ver > 2)
1257 			return -EINVAL;
1258 	}
1259 
1260 	if (t->erspan_ver == 1) {
1261 		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1262 			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1263 			if (t->index & ~INDEX_MASK)
1264 				return -EINVAL;
1265 		}
1266 	} else if (t->erspan_ver == 2) {
1267 		if (data[IFLA_GRE_ERSPAN_DIR]) {
1268 			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1269 			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1270 				return -EINVAL;
1271 		}
1272 		if (data[IFLA_GRE_ERSPAN_HWID]) {
1273 			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1274 			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1275 				return -EINVAL;
1276 		}
1277 	}
1278 
1279 	return 0;
1280 }
1281 
1282 /* This function returns true when ENCAP attributes are present in the nl msg */
ipgre_netlink_encap_parms(struct nlattr * data[],struct ip_tunnel_encap * ipencap)1283 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1284 				      struct ip_tunnel_encap *ipencap)
1285 {
1286 	bool ret = false;
1287 
1288 	memset(ipencap, 0, sizeof(*ipencap));
1289 
1290 	if (!data)
1291 		return ret;
1292 
1293 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1294 		ret = true;
1295 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1296 	}
1297 
1298 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1299 		ret = true;
1300 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1301 	}
1302 
1303 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1304 		ret = true;
1305 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1306 	}
1307 
1308 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1309 		ret = true;
1310 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1311 	}
1312 
1313 	return ret;
1314 }
1315 
gre_tap_init(struct net_device * dev)1316 static int gre_tap_init(struct net_device *dev)
1317 {
1318 	__gre_tunnel_init(dev);
1319 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1320 	netif_keep_dst(dev);
1321 
1322 	return ip_tunnel_init(dev);
1323 }
1324 
1325 static const struct net_device_ops gre_tap_netdev_ops = {
1326 	.ndo_init		= gre_tap_init,
1327 	.ndo_uninit		= ip_tunnel_uninit,
1328 	.ndo_start_xmit		= gre_tap_xmit,
1329 	.ndo_set_mac_address 	= eth_mac_addr,
1330 	.ndo_validate_addr	= eth_validate_addr,
1331 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1332 	.ndo_get_stats64	= dev_get_tstats64,
1333 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1334 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1335 };
1336 
erspan_tunnel_init(struct net_device * dev)1337 static int erspan_tunnel_init(struct net_device *dev)
1338 {
1339 	struct ip_tunnel *tunnel = netdev_priv(dev);
1340 
1341 	if (tunnel->erspan_ver == 0)
1342 		tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1343 	else
1344 		tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1345 
1346 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1347 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1348 		       erspan_hdr_len(tunnel->erspan_ver);
1349 
1350 	dev->features		|= GRE_FEATURES;
1351 	dev->hw_features	|= GRE_FEATURES;
1352 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1353 	netif_keep_dst(dev);
1354 
1355 	return ip_tunnel_init(dev);
1356 }
1357 
1358 static const struct net_device_ops erspan_netdev_ops = {
1359 	.ndo_init		= erspan_tunnel_init,
1360 	.ndo_uninit		= ip_tunnel_uninit,
1361 	.ndo_start_xmit		= erspan_xmit,
1362 	.ndo_set_mac_address	= eth_mac_addr,
1363 	.ndo_validate_addr	= eth_validate_addr,
1364 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1365 	.ndo_get_stats64	= dev_get_tstats64,
1366 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1367 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1368 };
1369 
ipgre_tap_setup(struct net_device * dev)1370 static void ipgre_tap_setup(struct net_device *dev)
1371 {
1372 	ether_setup(dev);
1373 	dev->max_mtu = 0;
1374 	dev->netdev_ops	= &gre_tap_netdev_ops;
1375 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1376 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1377 	ip_tunnel_setup(dev, gre_tap_net_id);
1378 }
1379 
1380 static int
ipgre_newlink_encap_setup(struct net_device * dev,struct nlattr * data[])1381 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1382 {
1383 	struct ip_tunnel_encap ipencap;
1384 
1385 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1386 		struct ip_tunnel *t = netdev_priv(dev);
1387 		int err = ip_tunnel_encap_setup(t, &ipencap);
1388 
1389 		if (err < 0)
1390 			return err;
1391 	}
1392 
1393 	return 0;
1394 }
1395 
ipgre_newlink(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1396 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1397 			 struct nlattr *tb[], struct nlattr *data[],
1398 			 struct netlink_ext_ack *extack)
1399 {
1400 	struct ip_tunnel_parm_kern p;
1401 	__u32 fwmark = 0;
1402 	int err;
1403 
1404 	err = ipgre_newlink_encap_setup(dev, data);
1405 	if (err)
1406 		return err;
1407 
1408 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1409 	if (err < 0)
1410 		return err;
1411 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1412 }
1413 
erspan_newlink(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1414 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1415 			  struct nlattr *tb[], struct nlattr *data[],
1416 			  struct netlink_ext_ack *extack)
1417 {
1418 	struct ip_tunnel_parm_kern p;
1419 	__u32 fwmark = 0;
1420 	int err;
1421 
1422 	err = ipgre_newlink_encap_setup(dev, data);
1423 	if (err)
1424 		return err;
1425 
1426 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1427 	if (err)
1428 		return err;
1429 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1430 }
1431 
ipgre_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1432 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1433 			    struct nlattr *data[],
1434 			    struct netlink_ext_ack *extack)
1435 {
1436 	struct ip_tunnel *t = netdev_priv(dev);
1437 	struct ip_tunnel_parm_kern p;
1438 	__u32 fwmark = t->fwmark;
1439 	int err;
1440 
1441 	err = ipgre_newlink_encap_setup(dev, data);
1442 	if (err)
1443 		return err;
1444 
1445 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1446 	if (err < 0)
1447 		return err;
1448 
1449 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1450 	if (err < 0)
1451 		return err;
1452 
1453 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1454 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1455 
1456 	ipgre_link_update(dev, !tb[IFLA_MTU]);
1457 
1458 	return 0;
1459 }
1460 
erspan_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1461 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1462 			     struct nlattr *data[],
1463 			     struct netlink_ext_ack *extack)
1464 {
1465 	struct ip_tunnel *t = netdev_priv(dev);
1466 	struct ip_tunnel_parm_kern p;
1467 	__u32 fwmark = t->fwmark;
1468 	int err;
1469 
1470 	err = ipgre_newlink_encap_setup(dev, data);
1471 	if (err)
1472 		return err;
1473 
1474 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1475 	if (err < 0)
1476 		return err;
1477 
1478 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1479 	if (err < 0)
1480 		return err;
1481 
1482 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1483 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1484 
1485 	return 0;
1486 }
1487 
ipgre_get_size(const struct net_device * dev)1488 static size_t ipgre_get_size(const struct net_device *dev)
1489 {
1490 	return
1491 		/* IFLA_GRE_LINK */
1492 		nla_total_size(4) +
1493 		/* IFLA_GRE_IFLAGS */
1494 		nla_total_size(2) +
1495 		/* IFLA_GRE_OFLAGS */
1496 		nla_total_size(2) +
1497 		/* IFLA_GRE_IKEY */
1498 		nla_total_size(4) +
1499 		/* IFLA_GRE_OKEY */
1500 		nla_total_size(4) +
1501 		/* IFLA_GRE_LOCAL */
1502 		nla_total_size(4) +
1503 		/* IFLA_GRE_REMOTE */
1504 		nla_total_size(4) +
1505 		/* IFLA_GRE_TTL */
1506 		nla_total_size(1) +
1507 		/* IFLA_GRE_TOS */
1508 		nla_total_size(1) +
1509 		/* IFLA_GRE_PMTUDISC */
1510 		nla_total_size(1) +
1511 		/* IFLA_GRE_ENCAP_TYPE */
1512 		nla_total_size(2) +
1513 		/* IFLA_GRE_ENCAP_FLAGS */
1514 		nla_total_size(2) +
1515 		/* IFLA_GRE_ENCAP_SPORT */
1516 		nla_total_size(2) +
1517 		/* IFLA_GRE_ENCAP_DPORT */
1518 		nla_total_size(2) +
1519 		/* IFLA_GRE_COLLECT_METADATA */
1520 		nla_total_size(0) +
1521 		/* IFLA_GRE_IGNORE_DF */
1522 		nla_total_size(1) +
1523 		/* IFLA_GRE_FWMARK */
1524 		nla_total_size(4) +
1525 		/* IFLA_GRE_ERSPAN_INDEX */
1526 		nla_total_size(4) +
1527 		/* IFLA_GRE_ERSPAN_VER */
1528 		nla_total_size(1) +
1529 		/* IFLA_GRE_ERSPAN_DIR */
1530 		nla_total_size(1) +
1531 		/* IFLA_GRE_ERSPAN_HWID */
1532 		nla_total_size(2) +
1533 		0;
1534 }
1535 
ipgre_fill_info(struct sk_buff * skb,const struct net_device * dev)1536 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1537 {
1538 	struct ip_tunnel *t = netdev_priv(dev);
1539 	struct ip_tunnel_parm_kern *p = &t->parms;
1540 	IP_TUNNEL_DECLARE_FLAGS(o_flags);
1541 
1542 	ip_tunnel_flags_copy(o_flags, p->o_flags);
1543 
1544 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1545 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1546 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1547 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1548 			 gre_tnl_flags_to_gre_flags(o_flags)) ||
1549 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1550 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1551 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1552 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1553 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1554 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1555 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1556 		       !!(p->iph.frag_off & htons(IP_DF))) ||
1557 	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1558 		goto nla_put_failure;
1559 
1560 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1561 			t->encap.type) ||
1562 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1563 			 t->encap.sport) ||
1564 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1565 			 t->encap.dport) ||
1566 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1567 			t->encap.flags))
1568 		goto nla_put_failure;
1569 
1570 	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1571 		goto nla_put_failure;
1572 
1573 	if (t->collect_md) {
1574 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1575 			goto nla_put_failure;
1576 	}
1577 
1578 	return 0;
1579 
1580 nla_put_failure:
1581 	return -EMSGSIZE;
1582 }
1583 
erspan_fill_info(struct sk_buff * skb,const struct net_device * dev)1584 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1585 {
1586 	struct ip_tunnel *t = netdev_priv(dev);
1587 
1588 	if (t->erspan_ver <= 2) {
1589 		if (t->erspan_ver != 0 && !t->collect_md)
1590 			__set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
1591 
1592 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1593 			goto nla_put_failure;
1594 
1595 		if (t->erspan_ver == 1) {
1596 			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1597 				goto nla_put_failure;
1598 		} else if (t->erspan_ver == 2) {
1599 			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1600 				goto nla_put_failure;
1601 			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1602 				goto nla_put_failure;
1603 		}
1604 	}
1605 
1606 	return ipgre_fill_info(skb, dev);
1607 
1608 nla_put_failure:
1609 	return -EMSGSIZE;
1610 }
1611 
erspan_setup(struct net_device * dev)1612 static void erspan_setup(struct net_device *dev)
1613 {
1614 	struct ip_tunnel *t = netdev_priv(dev);
1615 
1616 	ether_setup(dev);
1617 	dev->max_mtu = 0;
1618 	dev->netdev_ops = &erspan_netdev_ops;
1619 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1620 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1621 	ip_tunnel_setup(dev, erspan_net_id);
1622 	t->erspan_ver = 1;
1623 }
1624 
1625 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1626 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1627 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1628 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1629 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1630 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1631 	[IFLA_GRE_LOCAL]	= { .len = sizeof_field(struct iphdr, saddr) },
1632 	[IFLA_GRE_REMOTE]	= { .len = sizeof_field(struct iphdr, daddr) },
1633 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1634 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1635 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1636 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1637 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1638 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1639 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1640 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1641 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1642 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1643 	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1644 	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
1645 	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
1646 	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
1647 };
1648 
1649 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1650 	.kind		= "gre",
1651 	.maxtype	= IFLA_GRE_MAX,
1652 	.policy		= ipgre_policy,
1653 	.priv_size	= sizeof(struct ip_tunnel),
1654 	.setup		= ipgre_tunnel_setup,
1655 	.validate	= ipgre_tunnel_validate,
1656 	.newlink	= ipgre_newlink,
1657 	.changelink	= ipgre_changelink,
1658 	.dellink	= ip_tunnel_dellink,
1659 	.get_size	= ipgre_get_size,
1660 	.fill_info	= ipgre_fill_info,
1661 	.get_link_net	= ip_tunnel_get_link_net,
1662 };
1663 
1664 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1665 	.kind		= "gretap",
1666 	.maxtype	= IFLA_GRE_MAX,
1667 	.policy		= ipgre_policy,
1668 	.priv_size	= sizeof(struct ip_tunnel),
1669 	.setup		= ipgre_tap_setup,
1670 	.validate	= ipgre_tap_validate,
1671 	.newlink	= ipgre_newlink,
1672 	.changelink	= ipgre_changelink,
1673 	.dellink	= ip_tunnel_dellink,
1674 	.get_size	= ipgre_get_size,
1675 	.fill_info	= ipgre_fill_info,
1676 	.get_link_net	= ip_tunnel_get_link_net,
1677 };
1678 
1679 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1680 	.kind		= "erspan",
1681 	.maxtype	= IFLA_GRE_MAX,
1682 	.policy		= ipgre_policy,
1683 	.priv_size	= sizeof(struct ip_tunnel),
1684 	.setup		= erspan_setup,
1685 	.validate	= erspan_validate,
1686 	.newlink	= erspan_newlink,
1687 	.changelink	= erspan_changelink,
1688 	.dellink	= ip_tunnel_dellink,
1689 	.get_size	= ipgre_get_size,
1690 	.fill_info	= erspan_fill_info,
1691 	.get_link_net	= ip_tunnel_get_link_net,
1692 };
1693 
gretap_fb_dev_create(struct net * net,const char * name,u8 name_assign_type)1694 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1695 					u8 name_assign_type)
1696 {
1697 	struct nlattr *tb[IFLA_MAX + 1];
1698 	struct net_device *dev;
1699 	LIST_HEAD(list_kill);
1700 	struct ip_tunnel *t;
1701 	int err;
1702 
1703 	memset(&tb, 0, sizeof(tb));
1704 
1705 	dev = rtnl_create_link(net, name, name_assign_type,
1706 			       &ipgre_tap_ops, tb, NULL);
1707 	if (IS_ERR(dev))
1708 		return dev;
1709 
1710 	/* Configure flow based GRE device. */
1711 	t = netdev_priv(dev);
1712 	t->collect_md = true;
1713 
1714 	err = ipgre_newlink(net, dev, tb, NULL, NULL);
1715 	if (err < 0) {
1716 		free_netdev(dev);
1717 		return ERR_PTR(err);
1718 	}
1719 
1720 	/* openvswitch users expect packet sizes to be unrestricted,
1721 	 * so set the largest MTU we can.
1722 	 */
1723 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1724 	if (err)
1725 		goto out;
1726 
1727 	err = rtnl_configure_link(dev, NULL, 0, NULL);
1728 	if (err < 0)
1729 		goto out;
1730 
1731 	return dev;
1732 out:
1733 	ip_tunnel_dellink(dev, &list_kill);
1734 	unregister_netdevice_many(&list_kill);
1735 	return ERR_PTR(err);
1736 }
1737 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1738 
ipgre_tap_init_net(struct net * net)1739 static int __net_init ipgre_tap_init_net(struct net *net)
1740 {
1741 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1742 }
1743 
ipgre_tap_exit_batch_rtnl(struct list_head * list_net,struct list_head * dev_to_kill)1744 static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net,
1745 						 struct list_head *dev_to_kill)
1746 {
1747 	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops,
1748 			      dev_to_kill);
1749 }
1750 
1751 static struct pernet_operations ipgre_tap_net_ops = {
1752 	.init = ipgre_tap_init_net,
1753 	.exit_batch_rtnl = ipgre_tap_exit_batch_rtnl,
1754 	.id   = &gre_tap_net_id,
1755 	.size = sizeof(struct ip_tunnel_net),
1756 };
1757 
erspan_init_net(struct net * net)1758 static int __net_init erspan_init_net(struct net *net)
1759 {
1760 	return ip_tunnel_init_net(net, erspan_net_id,
1761 				  &erspan_link_ops, "erspan0");
1762 }
1763 
erspan_exit_batch_rtnl(struct list_head * net_list,struct list_head * dev_to_kill)1764 static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list,
1765 					      struct list_head *dev_to_kill)
1766 {
1767 	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops,
1768 			      dev_to_kill);
1769 }
1770 
1771 static struct pernet_operations erspan_net_ops = {
1772 	.init = erspan_init_net,
1773 	.exit_batch_rtnl = erspan_exit_batch_rtnl,
1774 	.id   = &erspan_net_id,
1775 	.size = sizeof(struct ip_tunnel_net),
1776 };
1777 
ipgre_init(void)1778 static int __init ipgre_init(void)
1779 {
1780 	int err;
1781 
1782 	pr_info("GRE over IPv4 tunneling driver\n");
1783 
1784 	err = register_pernet_device(&ipgre_net_ops);
1785 	if (err < 0)
1786 		return err;
1787 
1788 	err = register_pernet_device(&ipgre_tap_net_ops);
1789 	if (err < 0)
1790 		goto pnet_tap_failed;
1791 
1792 	err = register_pernet_device(&erspan_net_ops);
1793 	if (err < 0)
1794 		goto pnet_erspan_failed;
1795 
1796 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1797 	if (err < 0) {
1798 		pr_info("%s: can't add protocol\n", __func__);
1799 		goto add_proto_failed;
1800 	}
1801 
1802 	err = rtnl_link_register(&ipgre_link_ops);
1803 	if (err < 0)
1804 		goto rtnl_link_failed;
1805 
1806 	err = rtnl_link_register(&ipgre_tap_ops);
1807 	if (err < 0)
1808 		goto tap_ops_failed;
1809 
1810 	err = rtnl_link_register(&erspan_link_ops);
1811 	if (err < 0)
1812 		goto erspan_link_failed;
1813 
1814 	return 0;
1815 
1816 erspan_link_failed:
1817 	rtnl_link_unregister(&ipgre_tap_ops);
1818 tap_ops_failed:
1819 	rtnl_link_unregister(&ipgre_link_ops);
1820 rtnl_link_failed:
1821 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1822 add_proto_failed:
1823 	unregister_pernet_device(&erspan_net_ops);
1824 pnet_erspan_failed:
1825 	unregister_pernet_device(&ipgre_tap_net_ops);
1826 pnet_tap_failed:
1827 	unregister_pernet_device(&ipgre_net_ops);
1828 	return err;
1829 }
1830 
ipgre_fini(void)1831 static void __exit ipgre_fini(void)
1832 {
1833 	rtnl_link_unregister(&ipgre_tap_ops);
1834 	rtnl_link_unregister(&ipgre_link_ops);
1835 	rtnl_link_unregister(&erspan_link_ops);
1836 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1837 	unregister_pernet_device(&ipgre_tap_net_ops);
1838 	unregister_pernet_device(&ipgre_net_ops);
1839 	unregister_pernet_device(&erspan_net_ops);
1840 }
1841 
1842 module_init(ipgre_init);
1843 module_exit(ipgre_fini);
1844 MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
1845 MODULE_LICENSE("GPL");
1846 MODULE_ALIAS_RTNL_LINK("gre");
1847 MODULE_ALIAS_RTNL_LINK("gretap");
1848 MODULE_ALIAS_RTNL_LINK("erspan");
1849 MODULE_ALIAS_NETDEV("gre0");
1850 MODULE_ALIAS_NETDEV("gretap0");
1851 MODULE_ALIAS_NETDEV("erspan0");
1852