• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of the GNU General Public License as
5  * published by the Free Software Foundation; either version 2 of
6  * the License, or (at your option) any later version.
7  *
8  */
9 
10 #include "ipvlan.h"
11 
12 static u32 ipvlan_jhash_secret __read_mostly;
13 
ipvlan_init_secret(void)14 void ipvlan_init_secret(void)
15 {
16 	net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret));
17 }
18 
ipvlan_count_rx(const struct ipvl_dev * ipvlan,unsigned int len,bool success,bool mcast)19 void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
20 			    unsigned int len, bool success, bool mcast)
21 {
22 	if (likely(success)) {
23 		struct ipvl_pcpu_stats *pcptr;
24 
25 		pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
26 		u64_stats_update_begin(&pcptr->syncp);
27 		pcptr->rx_pkts++;
28 		pcptr->rx_bytes += len;
29 		if (mcast)
30 			pcptr->rx_mcast++;
31 		u64_stats_update_end(&pcptr->syncp);
32 	} else {
33 		this_cpu_inc(ipvlan->pcpu_stats->rx_errs);
34 	}
35 }
36 EXPORT_SYMBOL_GPL(ipvlan_count_rx);
37 
38 #if IS_ENABLED(CONFIG_IPV6)
ipvlan_get_v6_hash(const void * iaddr)39 static u8 ipvlan_get_v6_hash(const void *iaddr)
40 {
41 	const struct in6_addr *ip6_addr = iaddr;
42 
43 	return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
44 	       IPVLAN_HASH_MASK;
45 }
46 #else
ipvlan_get_v6_hash(const void * iaddr)47 static u8 ipvlan_get_v6_hash(const void *iaddr)
48 {
49 	return 0;
50 }
51 #endif
52 
ipvlan_get_v4_hash(const void * iaddr)53 static u8 ipvlan_get_v4_hash(const void *iaddr)
54 {
55 	const struct in_addr *ip4_addr = iaddr;
56 
57 	return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) &
58 	       IPVLAN_HASH_MASK;
59 }
60 
addr_equal(bool is_v6,struct ipvl_addr * addr,const void * iaddr)61 static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr)
62 {
63 	if (!is_v6 && addr->atype == IPVL_IPV4) {
64 		struct in_addr *i4addr = (struct in_addr *)iaddr;
65 
66 		return addr->ip4addr.s_addr == i4addr->s_addr;
67 #if IS_ENABLED(CONFIG_IPV6)
68 	} else if (is_v6 && addr->atype == IPVL_IPV6) {
69 		struct in6_addr *i6addr = (struct in6_addr *)iaddr;
70 
71 		return ipv6_addr_equal(&addr->ip6addr, i6addr);
72 #endif
73 	}
74 
75 	return false;
76 }
77 
ipvlan_ht_addr_lookup(const struct ipvl_port * port,const void * iaddr,bool is_v6)78 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
79 					       const void *iaddr, bool is_v6)
80 {
81 	struct ipvl_addr *addr;
82 	u8 hash;
83 
84 	hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
85 	       ipvlan_get_v4_hash(iaddr);
86 	hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode)
87 		if (addr_equal(is_v6, addr, iaddr))
88 			return addr;
89 	return NULL;
90 }
91 
ipvlan_ht_addr_add(struct ipvl_dev * ipvlan,struct ipvl_addr * addr)92 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr)
93 {
94 	struct ipvl_port *port = ipvlan->port;
95 	u8 hash;
96 
97 	hash = (addr->atype == IPVL_IPV6) ?
98 	       ipvlan_get_v6_hash(&addr->ip6addr) :
99 	       ipvlan_get_v4_hash(&addr->ip4addr);
100 	if (hlist_unhashed(&addr->hlnode))
101 		hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]);
102 }
103 
ipvlan_ht_addr_del(struct ipvl_addr * addr)104 void ipvlan_ht_addr_del(struct ipvl_addr *addr)
105 {
106 	hlist_del_init_rcu(&addr->hlnode);
107 }
108 
ipvlan_find_addr(const struct ipvl_dev * ipvlan,const void * iaddr,bool is_v6)109 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
110 				   const void *iaddr, bool is_v6)
111 {
112 	struct ipvl_addr *addr, *ret = NULL;
113 
114 	rcu_read_lock();
115 	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
116 		if (addr_equal(is_v6, addr, iaddr)) {
117 			ret = addr;
118 			break;
119 		}
120 	}
121 	rcu_read_unlock();
122 	return ret;
123 }
124 
ipvlan_addr_busy(struct ipvl_port * port,void * iaddr,bool is_v6)125 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
126 {
127 	struct ipvl_dev *ipvlan;
128 	bool ret = false;
129 
130 	rcu_read_lock();
131 	list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
132 		if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
133 			ret = true;
134 			break;
135 		}
136 	}
137 	rcu_read_unlock();
138 	return ret;
139 }
140 
ipvlan_get_L3_hdr(struct ipvl_port * port,struct sk_buff * skb,int * type)141 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
142 {
143 	void *lyr3h = NULL;
144 
145 	switch (skb->protocol) {
146 	case htons(ETH_P_ARP): {
147 		struct arphdr *arph;
148 
149 		if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev))))
150 			return NULL;
151 
152 		arph = arp_hdr(skb);
153 		*type = IPVL_ARP;
154 		lyr3h = arph;
155 		break;
156 	}
157 	case htons(ETH_P_IP): {
158 		u32 pktlen;
159 		struct iphdr *ip4h;
160 
161 		if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h))))
162 			return NULL;
163 
164 		ip4h = ip_hdr(skb);
165 		pktlen = ntohs(ip4h->tot_len);
166 		if (ip4h->ihl < 5 || ip4h->version != 4)
167 			return NULL;
168 		if (skb->len < pktlen || pktlen < (ip4h->ihl * 4))
169 			return NULL;
170 
171 		*type = IPVL_IPV4;
172 		lyr3h = ip4h;
173 		break;
174 	}
175 #if IS_ENABLED(CONFIG_IPV6)
176 	case htons(ETH_P_IPV6): {
177 		struct ipv6hdr *ip6h;
178 
179 		if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h))))
180 			return NULL;
181 
182 		ip6h = ipv6_hdr(skb);
183 		if (ip6h->version != 6)
184 			return NULL;
185 
186 		*type = IPVL_IPV6;
187 		lyr3h = ip6h;
188 		/* Only Neighbour Solicitation pkts need different treatment */
189 		if (ipv6_addr_any(&ip6h->saddr) &&
190 		    ip6h->nexthdr == NEXTHDR_ICMP) {
191 			struct icmp6hdr	*icmph;
192 
193 			if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph))))
194 				return NULL;
195 
196 			ip6h = ipv6_hdr(skb);
197 			icmph = (struct icmp6hdr *)(ip6h + 1);
198 
199 			if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
200 				/* Need to access the ipv6 address in body */
201 				if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)
202 						+ sizeof(struct in6_addr))))
203 					return NULL;
204 
205 				ip6h = ipv6_hdr(skb);
206 				icmph = (struct icmp6hdr *)(ip6h + 1);
207 			}
208 
209 			*type = IPVL_ICMPV6;
210 			lyr3h = icmph;
211 		}
212 		break;
213 	}
214 #endif
215 	default:
216 		return NULL;
217 	}
218 
219 	return lyr3h;
220 }
221 
ipvlan_mac_hash(const unsigned char * addr)222 unsigned int ipvlan_mac_hash(const unsigned char *addr)
223 {
224 	u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2),
225 			       ipvlan_jhash_secret);
226 
227 	return hash & IPVLAN_MAC_FILTER_MASK;
228 }
229 
ipvlan_process_multicast(struct work_struct * work)230 void ipvlan_process_multicast(struct work_struct *work)
231 {
232 	struct ipvl_port *port = container_of(work, struct ipvl_port, wq);
233 	struct ethhdr *ethh;
234 	struct ipvl_dev *ipvlan;
235 	struct sk_buff *skb, *nskb;
236 	struct sk_buff_head list;
237 	unsigned int len;
238 	unsigned int mac_hash;
239 	int ret;
240 	u8 pkt_type;
241 	bool tx_pkt;
242 
243 	__skb_queue_head_init(&list);
244 
245 	spin_lock_bh(&port->backlog.lock);
246 	skb_queue_splice_tail_init(&port->backlog, &list);
247 	spin_unlock_bh(&port->backlog.lock);
248 
249 	while ((skb = __skb_dequeue(&list)) != NULL) {
250 		struct net_device *dev = skb->dev;
251 		bool consumed = false;
252 
253 		ethh = eth_hdr(skb);
254 		tx_pkt = IPVL_SKB_CB(skb)->tx_pkt;
255 		mac_hash = ipvlan_mac_hash(ethh->h_dest);
256 
257 		if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
258 			pkt_type = PACKET_BROADCAST;
259 		else
260 			pkt_type = PACKET_MULTICAST;
261 
262 		rcu_read_lock();
263 		list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
264 			if (tx_pkt && (ipvlan->dev == skb->dev))
265 				continue;
266 			if (!test_bit(mac_hash, ipvlan->mac_filters))
267 				continue;
268 			if (!(ipvlan->dev->flags & IFF_UP))
269 				continue;
270 			ret = NET_RX_DROP;
271 			len = skb->len + ETH_HLEN;
272 			nskb = skb_clone(skb, GFP_ATOMIC);
273 			local_bh_disable();
274 			if (nskb) {
275 				consumed = true;
276 				nskb->pkt_type = pkt_type;
277 				nskb->dev = ipvlan->dev;
278 				if (tx_pkt)
279 					ret = dev_forward_skb(ipvlan->dev, nskb);
280 				else
281 					ret = netif_rx(nskb);
282 			}
283 			ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
284 			local_bh_enable();
285 		}
286 		rcu_read_unlock();
287 
288 		if (tx_pkt) {
289 			/* If the packet originated here, send it out. */
290 			skb->dev = port->dev;
291 			skb->pkt_type = pkt_type;
292 			dev_queue_xmit(skb);
293 		} else {
294 			if (consumed)
295 				consume_skb(skb);
296 			else
297 				kfree_skb(skb);
298 		}
299 		if (dev)
300 			dev_put(dev);
301 		cond_resched();
302 	}
303 }
304 
ipvlan_skb_crossing_ns(struct sk_buff * skb,struct net_device * dev)305 static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
306 {
307 	bool xnet = true;
308 
309 	if (dev)
310 		xnet = !net_eq(dev_net(skb->dev), dev_net(dev));
311 
312 	skb_scrub_packet(skb, xnet);
313 	if (dev)
314 		skb->dev = dev;
315 }
316 
ipvlan_rcv_frame(struct ipvl_addr * addr,struct sk_buff ** pskb,bool local)317 static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
318 			    bool local)
319 {
320 	struct ipvl_dev *ipvlan = addr->master;
321 	struct net_device *dev = ipvlan->dev;
322 	unsigned int len;
323 	rx_handler_result_t ret = RX_HANDLER_CONSUMED;
324 	bool success = false;
325 	struct sk_buff *skb = *pskb;
326 
327 	len = skb->len + ETH_HLEN;
328 	/* Only packets exchanged between two local slaves need to have
329 	 * device-up check as well as skb-share check.
330 	 */
331 	if (local) {
332 		if (unlikely(!(dev->flags & IFF_UP))) {
333 			kfree_skb(skb);
334 			goto out;
335 		}
336 
337 		skb = skb_share_check(skb, GFP_ATOMIC);
338 		if (!skb)
339 			goto out;
340 
341 		*pskb = skb;
342 	}
343 
344 	if (local) {
345 		skb->pkt_type = PACKET_HOST;
346 		if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
347 			success = true;
348 	} else {
349 		skb->dev = dev;
350 		ret = RX_HANDLER_ANOTHER;
351 		success = true;
352 	}
353 
354 out:
355 	ipvlan_count_rx(ipvlan, len, success, false);
356 	return ret;
357 }
358 
ipvlan_addr_lookup(struct ipvl_port * port,void * lyr3h,int addr_type,bool use_dest)359 static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
360 					    void *lyr3h, int addr_type,
361 					    bool use_dest)
362 {
363 	struct ipvl_addr *addr = NULL;
364 
365 	switch (addr_type) {
366 #if IS_ENABLED(CONFIG_IPV6)
367 	case IPVL_IPV6: {
368 		struct ipv6hdr *ip6h;
369 		struct in6_addr *i6addr;
370 
371 		ip6h = (struct ipv6hdr *)lyr3h;
372 		i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
373 		addr = ipvlan_ht_addr_lookup(port, i6addr, true);
374 		break;
375 	}
376 	case IPVL_ICMPV6: {
377 		struct nd_msg *ndmh;
378 		struct in6_addr *i6addr;
379 
380 		/* Make sure that the NeighborSolicitation ICMPv6 packets
381 		 * are handled to avoid DAD issue.
382 		 */
383 		ndmh = (struct nd_msg *)lyr3h;
384 		if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
385 			i6addr = &ndmh->target;
386 			addr = ipvlan_ht_addr_lookup(port, i6addr, true);
387 		}
388 		break;
389 	}
390 #endif
391 	case IPVL_IPV4: {
392 		struct iphdr *ip4h;
393 		__be32 *i4addr;
394 
395 		ip4h = (struct iphdr *)lyr3h;
396 		i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
397 		addr = ipvlan_ht_addr_lookup(port, i4addr, false);
398 		break;
399 	}
400 	case IPVL_ARP: {
401 		struct arphdr *arph;
402 		unsigned char *arp_ptr;
403 		__be32 dip;
404 
405 		arph = (struct arphdr *)lyr3h;
406 		arp_ptr = (unsigned char *)(arph + 1);
407 		if (use_dest)
408 			arp_ptr += (2 * port->dev->addr_len) + 4;
409 		else
410 			arp_ptr += port->dev->addr_len;
411 
412 		memcpy(&dip, arp_ptr, 4);
413 		addr = ipvlan_ht_addr_lookup(port, &dip, false);
414 		break;
415 	}
416 	}
417 
418 	return addr;
419 }
420 
ipvlan_process_v4_outbound(struct sk_buff * skb)421 static int ipvlan_process_v4_outbound(struct sk_buff *skb)
422 {
423 	const struct iphdr *ip4h = ip_hdr(skb);
424 	struct net_device *dev = skb->dev;
425 	struct net *net = dev_net(dev);
426 	struct rtable *rt;
427 	int err, ret = NET_XMIT_DROP;
428 	struct flowi4 fl4 = {
429 		.flowi4_oif = dev->ifindex,
430 		.flowi4_tos = RT_TOS(ip4h->tos),
431 		.flowi4_flags = FLOWI_FLAG_ANYSRC,
432 		.flowi4_mark = skb->mark,
433 		.daddr = ip4h->daddr,
434 		.saddr = ip4h->saddr,
435 	};
436 
437 	rt = ip_route_output_flow(net, &fl4, NULL);
438 	if (IS_ERR(rt))
439 		goto err;
440 
441 	if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
442 		ip_rt_put(rt);
443 		goto err;
444 	}
445 	skb_dst_set(skb, &rt->dst);
446 	err = ip_local_out(net, skb->sk, skb);
447 	if (unlikely(net_xmit_eval(err)))
448 		dev->stats.tx_errors++;
449 	else
450 		ret = NET_XMIT_SUCCESS;
451 	goto out;
452 err:
453 	dev->stats.tx_errors++;
454 	kfree_skb(skb);
455 out:
456 	return ret;
457 }
458 
459 #if IS_ENABLED(CONFIG_IPV6)
ipvlan_process_v6_outbound(struct sk_buff * skb)460 static int ipvlan_process_v6_outbound(struct sk_buff *skb)
461 {
462 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
463 	struct net_device *dev = skb->dev;
464 	struct net *net = dev_net(dev);
465 	struct dst_entry *dst;
466 	int err, ret = NET_XMIT_DROP;
467 	struct flowi6 fl6 = {
468 		.flowi6_oif = dev->ifindex,
469 		.daddr = ip6h->daddr,
470 		.saddr = ip6h->saddr,
471 		.flowi6_flags = FLOWI_FLAG_ANYSRC,
472 		.flowlabel = ip6_flowinfo(ip6h),
473 		.flowi6_mark = skb->mark,
474 		.flowi6_proto = ip6h->nexthdr,
475 	};
476 
477 	dst = ip6_route_output(net, NULL, &fl6);
478 	if (dst->error) {
479 		ret = dst->error;
480 		dst_release(dst);
481 		goto err;
482 	}
483 	skb_dst_set(skb, dst);
484 	err = ip6_local_out(net, skb->sk, skb);
485 	if (unlikely(net_xmit_eval(err)))
486 		dev->stats.tx_errors++;
487 	else
488 		ret = NET_XMIT_SUCCESS;
489 	goto out;
490 err:
491 	dev->stats.tx_errors++;
492 	kfree_skb(skb);
493 out:
494 	return ret;
495 }
496 #else
ipvlan_process_v6_outbound(struct sk_buff * skb)497 static int ipvlan_process_v6_outbound(struct sk_buff *skb)
498 {
499 	return NET_XMIT_DROP;
500 }
501 #endif
502 
ipvlan_process_outbound(struct sk_buff * skb)503 static int ipvlan_process_outbound(struct sk_buff *skb)
504 {
505 	struct ethhdr *ethh = eth_hdr(skb);
506 	int ret = NET_XMIT_DROP;
507 
508 	/* The ipvlan is a pseudo-L2 device, so the packets that we receive
509 	 * will have L2; which need to discarded and processed further
510 	 * in the net-ns of the main-device.
511 	 */
512 	if (skb_mac_header_was_set(skb)) {
513 		/* In this mode we dont care about
514 		 * multicast and broadcast traffic */
515 		if (is_multicast_ether_addr(ethh->h_dest)) {
516 			pr_debug_ratelimited(
517 				"Dropped {multi|broad}cast of type=[%x]\n",
518 				ntohs(skb->protocol));
519 			kfree_skb(skb);
520 			goto out;
521 		}
522 
523 		skb_pull(skb, sizeof(*ethh));
524 		skb->mac_header = (typeof(skb->mac_header))~0U;
525 		skb_reset_network_header(skb);
526 	}
527 
528 	if (skb->protocol == htons(ETH_P_IPV6))
529 		ret = ipvlan_process_v6_outbound(skb);
530 	else if (skb->protocol == htons(ETH_P_IP))
531 		ret = ipvlan_process_v4_outbound(skb);
532 	else {
533 		pr_warn_ratelimited("Dropped outbound packet type=%x\n",
534 				    ntohs(skb->protocol));
535 		kfree_skb(skb);
536 	}
537 out:
538 	return ret;
539 }
540 
ipvlan_multicast_enqueue(struct ipvl_port * port,struct sk_buff * skb,bool tx_pkt)541 static void ipvlan_multicast_enqueue(struct ipvl_port *port,
542 				     struct sk_buff *skb, bool tx_pkt)
543 {
544 	if (skb->protocol == htons(ETH_P_PAUSE)) {
545 		kfree_skb(skb);
546 		return;
547 	}
548 
549 	/* Record that the deferred packet is from TX or RX path. By
550 	 * looking at mac-addresses on packet will lead to erronus decisions.
551 	 * (This would be true for a loopback-mode on master device or a
552 	 * hair-pin mode of the switch.)
553 	 */
554 	IPVL_SKB_CB(skb)->tx_pkt = tx_pkt;
555 
556 	spin_lock(&port->backlog.lock);
557 	if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
558 		if (skb->dev)
559 			dev_hold(skb->dev);
560 		__skb_queue_tail(&port->backlog, skb);
561 		spin_unlock(&port->backlog.lock);
562 		schedule_work(&port->wq);
563 	} else {
564 		spin_unlock(&port->backlog.lock);
565 		atomic_long_inc(&skb->dev->rx_dropped);
566 		kfree_skb(skb);
567 	}
568 }
569 
ipvlan_xmit_mode_l3(struct sk_buff * skb,struct net_device * dev)570 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
571 {
572 	const struct ipvl_dev *ipvlan = netdev_priv(dev);
573 	void *lyr3h;
574 	struct ipvl_addr *addr;
575 	int addr_type;
576 
577 	lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
578 	if (!lyr3h)
579 		goto out;
580 
581 	if (!ipvlan_is_vepa(ipvlan->port)) {
582 		addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
583 		if (addr) {
584 			if (ipvlan_is_private(ipvlan->port)) {
585 				consume_skb(skb);
586 				return NET_XMIT_DROP;
587 			}
588 			return ipvlan_rcv_frame(addr, &skb, true);
589 		}
590 	}
591 out:
592 	ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
593 	return ipvlan_process_outbound(skb);
594 }
595 
ipvlan_xmit_mode_l2(struct sk_buff * skb,struct net_device * dev)596 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
597 {
598 	const struct ipvl_dev *ipvlan = netdev_priv(dev);
599 	struct ethhdr *eth = eth_hdr(skb);
600 	struct ipvl_addr *addr;
601 	void *lyr3h;
602 	int addr_type;
603 
604 	if (!ipvlan_is_vepa(ipvlan->port) &&
605 	    ether_addr_equal(eth->h_dest, eth->h_source)) {
606 		lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
607 		if (lyr3h) {
608 			addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
609 			if (addr) {
610 				if (ipvlan_is_private(ipvlan->port)) {
611 					consume_skb(skb);
612 					return NET_XMIT_DROP;
613 				}
614 				return ipvlan_rcv_frame(addr, &skb, true);
615 			}
616 		}
617 		skb = skb_share_check(skb, GFP_ATOMIC);
618 		if (!skb)
619 			return NET_XMIT_DROP;
620 
621 		/* Packet definitely does not belong to any of the
622 		 * virtual devices, but the dest is local. So forward
623 		 * the skb for the main-dev. At the RX side we just return
624 		 * RX_PASS for it to be processed further on the stack.
625 		 */
626 		return dev_forward_skb(ipvlan->phy_dev, skb);
627 
628 	} else if (is_multicast_ether_addr(eth->h_dest)) {
629 		ipvlan_skb_crossing_ns(skb, NULL);
630 		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
631 		return NET_XMIT_SUCCESS;
632 	}
633 
634 	skb->dev = ipvlan->phy_dev;
635 	return dev_queue_xmit(skb);
636 }
637 
ipvlan_queue_xmit(struct sk_buff * skb,struct net_device * dev)638 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
639 {
640 	struct ipvl_dev *ipvlan = netdev_priv(dev);
641 	struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev);
642 
643 	if (!port)
644 		goto out;
645 
646 	if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
647 		goto out;
648 
649 	switch(port->mode) {
650 	case IPVLAN_MODE_L2:
651 		return ipvlan_xmit_mode_l2(skb, dev);
652 	case IPVLAN_MODE_L3:
653 	case IPVLAN_MODE_L3S:
654 		return ipvlan_xmit_mode_l3(skb, dev);
655 	}
656 
657 	/* Should not reach here */
658 	WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n",
659 			  port->mode);
660 out:
661 	kfree_skb(skb);
662 	return NET_XMIT_DROP;
663 }
664 
ipvlan_external_frame(struct sk_buff * skb,struct ipvl_port * port)665 static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port)
666 {
667 	struct ethhdr *eth = eth_hdr(skb);
668 	struct ipvl_addr *addr;
669 	void *lyr3h;
670 	int addr_type;
671 
672 	if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) {
673 		lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
674 		if (!lyr3h)
675 			return true;
676 
677 		addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false);
678 		if (addr)
679 			return false;
680 	}
681 
682 	return true;
683 }
684 
ipvlan_handle_mode_l3(struct sk_buff ** pskb,struct ipvl_port * port)685 static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
686 						 struct ipvl_port *port)
687 {
688 	void *lyr3h;
689 	int addr_type;
690 	struct ipvl_addr *addr;
691 	struct sk_buff *skb = *pskb;
692 	rx_handler_result_t ret = RX_HANDLER_PASS;
693 
694 	lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
695 	if (!lyr3h)
696 		goto out;
697 
698 	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
699 	if (addr)
700 		ret = ipvlan_rcv_frame(addr, pskb, false);
701 
702 out:
703 	return ret;
704 }
705 
ipvlan_handle_mode_l2(struct sk_buff ** pskb,struct ipvl_port * port)706 static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
707 						 struct ipvl_port *port)
708 {
709 	struct sk_buff *skb = *pskb;
710 	struct ethhdr *eth = eth_hdr(skb);
711 	rx_handler_result_t ret = RX_HANDLER_PASS;
712 
713 	if (is_multicast_ether_addr(eth->h_dest)) {
714 		if (ipvlan_external_frame(skb, port)) {
715 			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
716 
717 			/* External frames are queued for device local
718 			 * distribution, but a copy is given to master
719 			 * straight away to avoid sending duplicates later
720 			 * when work-queue processes this frame. This is
721 			 * achieved by returning RX_HANDLER_PASS.
722 			 */
723 			if (nskb) {
724 				ipvlan_skb_crossing_ns(nskb, NULL);
725 				ipvlan_multicast_enqueue(port, nskb, false);
726 			}
727 		}
728 	} else {
729 		/* Perform like l3 mode for non-multicast packet */
730 		ret = ipvlan_handle_mode_l3(pskb, port);
731 	}
732 
733 	return ret;
734 }
735 
ipvlan_handle_frame(struct sk_buff ** pskb)736 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
737 {
738 	struct sk_buff *skb = *pskb;
739 	struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev);
740 
741 	if (!port)
742 		return RX_HANDLER_PASS;
743 
744 	switch (port->mode) {
745 	case IPVLAN_MODE_L2:
746 		return ipvlan_handle_mode_l2(pskb, port);
747 	case IPVLAN_MODE_L3:
748 		return ipvlan_handle_mode_l3(pskb, port);
749 	case IPVLAN_MODE_L3S:
750 		return RX_HANDLER_PASS;
751 	}
752 
753 	/* Should not reach here */
754 	WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
755 			  port->mode);
756 	kfree_skb(skb);
757 	return RX_HANDLER_CONSUMED;
758 }
759 
ipvlan_skb_to_addr(struct sk_buff * skb,struct net_device * dev)760 static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
761 					    struct net_device *dev)
762 {
763 	struct ipvl_addr *addr = NULL;
764 	struct ipvl_port *port;
765 	void *lyr3h;
766 	int addr_type;
767 
768 	if (!dev || !netif_is_ipvlan_port(dev))
769 		goto out;
770 
771 	port = ipvlan_port_get_rcu(dev);
772 	if (!port || port->mode != IPVLAN_MODE_L3S)
773 		goto out;
774 
775 	lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
776 	if (!lyr3h)
777 		goto out;
778 
779 	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
780 out:
781 	return addr;
782 }
783 
ipvlan_l3_rcv(struct net_device * dev,struct sk_buff * skb,u16 proto)784 struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
785 			      u16 proto)
786 {
787 	struct ipvl_addr *addr;
788 	struct net_device *sdev;
789 
790 	addr = ipvlan_skb_to_addr(skb, dev);
791 	if (!addr)
792 		goto out;
793 
794 	sdev = addr->master->dev;
795 	switch (proto) {
796 	case AF_INET:
797 	{
798 		int err;
799 		struct iphdr *ip4h = ip_hdr(skb);
800 
801 		err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
802 					   ip4h->tos, sdev);
803 		if (unlikely(err))
804 			goto out;
805 		break;
806 	}
807 #if IS_ENABLED(CONFIG_IPV6)
808 	case AF_INET6:
809 	{
810 		struct dst_entry *dst;
811 		struct ipv6hdr *ip6h = ipv6_hdr(skb);
812 		int flags = RT6_LOOKUP_F_HAS_SADDR;
813 		struct flowi6 fl6 = {
814 			.flowi6_iif   = sdev->ifindex,
815 			.daddr        = ip6h->daddr,
816 			.saddr        = ip6h->saddr,
817 			.flowlabel    = ip6_flowinfo(ip6h),
818 			.flowi6_mark  = skb->mark,
819 			.flowi6_proto = ip6h->nexthdr,
820 		};
821 
822 		skb_dst_drop(skb);
823 		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
824 					     skb, flags);
825 		skb_dst_set(skb, dst);
826 		break;
827 	}
828 #endif
829 	default:
830 		break;
831 	}
832 
833 out:
834 	return skb;
835 }
836 
ipvlan_nf_input(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)837 unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
838 			     const struct nf_hook_state *state)
839 {
840 	struct ipvl_addr *addr;
841 	unsigned int len;
842 
843 	addr = ipvlan_skb_to_addr(skb, skb->dev);
844 	if (!addr)
845 		goto out;
846 
847 	skb->dev = addr->master->dev;
848 	len = skb->len + ETH_HLEN;
849 	ipvlan_count_rx(addr->master, len, true, false);
850 out:
851 	return NF_ACCEPT;
852 }
853