• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66 
67 #include <asm/uaccess.h>
68 
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72 
73 enum rt6_nud_state {
74 	RT6_NUD_FAIL_HARD = -3,
75 	RT6_NUD_FAIL_PROBE = -2,
76 	RT6_NUD_FAIL_DO_RR = -1,
77 	RT6_NUD_SUCCEED = 1
78 };
79 
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101 
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info *rt6_add_route_info(struct net *net,
104 					   const struct in6_addr *prefix, int prefixlen,
105 					   const struct in6_addr *gwaddr,
106 					   struct net_device *dev,
107 					   unsigned int pref);
108 static struct rt6_info *rt6_get_route_info(struct net *net,
109 					   const struct in6_addr *prefix, int prefixlen,
110 					   const struct in6_addr *gwaddr,
111 					   struct net_device *dev);
112 #endif
113 
114 struct uncached_list {
115 	spinlock_t		lock;
116 	struct list_head	head;
117 };
118 
119 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
120 
rt6_uncached_list_add(struct rt6_info * rt)121 static void rt6_uncached_list_add(struct rt6_info *rt)
122 {
123 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
124 
125 	rt->dst.flags |= DST_NOCACHE;
126 	rt->rt6i_uncached_list = ul;
127 
128 	spin_lock_bh(&ul->lock);
129 	list_add_tail(&rt->rt6i_uncached, &ul->head);
130 	spin_unlock_bh(&ul->lock);
131 }
132 
rt6_uncached_list_del(struct rt6_info * rt)133 static void rt6_uncached_list_del(struct rt6_info *rt)
134 {
135 	if (!list_empty(&rt->rt6i_uncached)) {
136 		struct uncached_list *ul = rt->rt6i_uncached_list;
137 
138 		spin_lock_bh(&ul->lock);
139 		list_del(&rt->rt6i_uncached);
140 		spin_unlock_bh(&ul->lock);
141 	}
142 }
143 
rt6_uncached_list_flush_dev(struct net * net,struct net_device * dev)144 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
145 {
146 	struct net_device *loopback_dev = net->loopback_dev;
147 	int cpu;
148 
149 	if (dev == loopback_dev)
150 		return;
151 
152 	for_each_possible_cpu(cpu) {
153 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
154 		struct rt6_info *rt;
155 
156 		spin_lock_bh(&ul->lock);
157 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
158 			struct inet6_dev *rt_idev = rt->rt6i_idev;
159 			struct net_device *rt_dev = rt->dst.dev;
160 
161 			if (rt_idev->dev == dev) {
162 				rt->rt6i_idev = in6_dev_get(loopback_dev);
163 				in6_dev_put(rt_idev);
164 			}
165 
166 			if (rt_dev == dev) {
167 				rt->dst.dev = loopback_dev;
168 				dev_hold(rt->dst.dev);
169 				dev_put(rt_dev);
170 			}
171 		}
172 		spin_unlock_bh(&ul->lock);
173 	}
174 }
175 
rt6_pcpu_cow_metrics(struct rt6_info * rt)176 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
177 {
178 	return dst_metrics_write_ptr(rt->dst.from);
179 }
180 
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)181 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
182 {
183 	struct rt6_info *rt = (struct rt6_info *)dst;
184 
185 	if (rt->rt6i_flags & RTF_PCPU)
186 		return rt6_pcpu_cow_metrics(rt);
187 	else if (rt->rt6i_flags & RTF_CACHE)
188 		return NULL;
189 	else
190 		return dst_cow_metrics_generic(dst, old);
191 }
192 
choose_neigh_daddr(struct rt6_info * rt,struct sk_buff * skb,const void * daddr)193 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
194 					     struct sk_buff *skb,
195 					     const void *daddr)
196 {
197 	struct in6_addr *p = &rt->rt6i_gateway;
198 
199 	if (!ipv6_addr_any(p))
200 		return (const void *) p;
201 	else if (skb)
202 		return &ipv6_hdr(skb)->daddr;
203 	return daddr;
204 }
205 
ip6_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)206 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
207 					  struct sk_buff *skb,
208 					  const void *daddr)
209 {
210 	struct rt6_info *rt = (struct rt6_info *) dst;
211 	struct neighbour *n;
212 
213 	daddr = choose_neigh_daddr(rt, skb, daddr);
214 	n = __ipv6_neigh_lookup(dst->dev, daddr);
215 	if (n)
216 		return n;
217 	return neigh_create(&nd_tbl, daddr, dst->dev);
218 }
219 
220 static struct dst_ops ip6_dst_ops_template = {
221 	.family			=	AF_INET6,
222 	.gc			=	ip6_dst_gc,
223 	.gc_thresh		=	1024,
224 	.check			=	ip6_dst_check,
225 	.default_advmss		=	ip6_default_advmss,
226 	.mtu			=	ip6_mtu,
227 	.cow_metrics		=	ipv6_cow_metrics,
228 	.destroy		=	ip6_dst_destroy,
229 	.ifdown			=	ip6_dst_ifdown,
230 	.negative_advice	=	ip6_negative_advice,
231 	.link_failure		=	ip6_link_failure,
232 	.update_pmtu		=	ip6_rt_update_pmtu,
233 	.redirect		=	rt6_do_redirect,
234 	.local_out		=	__ip6_local_out,
235 	.neigh_lookup		=	ip6_neigh_lookup,
236 };
237 
ip6_blackhole_mtu(const struct dst_entry * dst)238 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
239 {
240 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
241 
242 	return mtu ? : dst->dev->mtu;
243 }
244 
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)245 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
246 					 struct sk_buff *skb, u32 mtu)
247 {
248 }
249 
ip6_rt_blackhole_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)250 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
251 				      struct sk_buff *skb)
252 {
253 }
254 
255 static struct dst_ops ip6_dst_blackhole_ops = {
256 	.family			=	AF_INET6,
257 	.destroy		=	ip6_dst_destroy,
258 	.check			=	ip6_dst_check,
259 	.mtu			=	ip6_blackhole_mtu,
260 	.default_advmss		=	ip6_default_advmss,
261 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
262 	.redirect		=	ip6_rt_blackhole_redirect,
263 	.cow_metrics		=	dst_cow_metrics_generic,
264 	.neigh_lookup		=	ip6_neigh_lookup,
265 };
266 
267 static const u32 ip6_template_metrics[RTAX_MAX] = {
268 	[RTAX_HOPLIMIT - 1] = 0,
269 };
270 
271 static const struct rt6_info ip6_null_entry_template = {
272 	.dst = {
273 		.__refcnt	= ATOMIC_INIT(1),
274 		.__use		= 1,
275 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
276 		.error		= -ENETUNREACH,
277 		.input		= ip6_pkt_discard,
278 		.output		= ip6_pkt_discard_out,
279 	},
280 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
281 	.rt6i_protocol  = RTPROT_KERNEL,
282 	.rt6i_metric	= ~(u32) 0,
283 	.rt6i_ref	= ATOMIC_INIT(1),
284 };
285 
286 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
287 
288 static const struct rt6_info ip6_prohibit_entry_template = {
289 	.dst = {
290 		.__refcnt	= ATOMIC_INIT(1),
291 		.__use		= 1,
292 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
293 		.error		= -EACCES,
294 		.input		= ip6_pkt_prohibit,
295 		.output		= ip6_pkt_prohibit_out,
296 	},
297 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
298 	.rt6i_protocol  = RTPROT_KERNEL,
299 	.rt6i_metric	= ~(u32) 0,
300 	.rt6i_ref	= ATOMIC_INIT(1),
301 };
302 
303 static const struct rt6_info ip6_blk_hole_entry_template = {
304 	.dst = {
305 		.__refcnt	= ATOMIC_INIT(1),
306 		.__use		= 1,
307 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
308 		.error		= -EINVAL,
309 		.input		= dst_discard,
310 		.output		= dst_discard_out,
311 	},
312 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
313 	.rt6i_protocol  = RTPROT_KERNEL,
314 	.rt6i_metric	= ~(u32) 0,
315 	.rt6i_ref	= ATOMIC_INIT(1),
316 };
317 
318 #endif
319 
rt6_info_init(struct rt6_info * rt)320 static void rt6_info_init(struct rt6_info *rt)
321 {
322 	struct dst_entry *dst = &rt->dst;
323 
324 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
325 	INIT_LIST_HEAD(&rt->rt6i_siblings);
326 	INIT_LIST_HEAD(&rt->rt6i_uncached);
327 }
328 
329 /* allocate dst with ip6_dst_ops */
__ip6_dst_alloc(struct net * net,struct net_device * dev,int flags)330 static struct rt6_info *__ip6_dst_alloc(struct net *net,
331 					struct net_device *dev,
332 					int flags)
333 {
334 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
335 					0, DST_OBSOLETE_FORCE_CHK, flags);
336 
337 	if (rt)
338 		rt6_info_init(rt);
339 
340 	return rt;
341 }
342 
ip6_dst_alloc(struct net * net,struct net_device * dev,int flags)343 struct rt6_info *ip6_dst_alloc(struct net *net,
344 			       struct net_device *dev,
345 			       int flags)
346 {
347 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
348 
349 	if (rt) {
350 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
351 		if (rt->rt6i_pcpu) {
352 			int cpu;
353 
354 			for_each_possible_cpu(cpu) {
355 				struct rt6_info **p;
356 
357 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
358 				/* no one shares rt */
359 				*p =  NULL;
360 			}
361 		} else {
362 			dst_destroy((struct dst_entry *)rt);
363 			return NULL;
364 		}
365 	}
366 
367 	return rt;
368 }
369 EXPORT_SYMBOL(ip6_dst_alloc);
370 
ip6_dst_destroy(struct dst_entry * dst)371 static void ip6_dst_destroy(struct dst_entry *dst)
372 {
373 	struct rt6_info *rt = (struct rt6_info *)dst;
374 	struct dst_entry *from = dst->from;
375 	struct inet6_dev *idev;
376 
377 	dst_destroy_metrics_generic(dst);
378 	free_percpu(rt->rt6i_pcpu);
379 	rt6_uncached_list_del(rt);
380 
381 	idev = rt->rt6i_idev;
382 	if (idev) {
383 		rt->rt6i_idev = NULL;
384 		in6_dev_put(idev);
385 	}
386 
387 	dst->from = NULL;
388 	dst_release(from);
389 }
390 
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)391 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
392 			   int how)
393 {
394 	struct rt6_info *rt = (struct rt6_info *)dst;
395 	struct inet6_dev *idev = rt->rt6i_idev;
396 	struct net_device *loopback_dev =
397 		dev_net(dev)->loopback_dev;
398 
399 	if (dev != loopback_dev) {
400 		if (idev && idev->dev == dev) {
401 			struct inet6_dev *loopback_idev =
402 				in6_dev_get(loopback_dev);
403 			if (loopback_idev) {
404 				rt->rt6i_idev = loopback_idev;
405 				in6_dev_put(idev);
406 			}
407 		}
408 	}
409 }
410 
__rt6_check_expired(const struct rt6_info * rt)411 static bool __rt6_check_expired(const struct rt6_info *rt)
412 {
413 	if (rt->rt6i_flags & RTF_EXPIRES)
414 		return time_after(jiffies, rt->dst.expires);
415 	else
416 		return false;
417 }
418 
rt6_check_expired(const struct rt6_info * rt)419 static bool rt6_check_expired(const struct rt6_info *rt)
420 {
421 	if (rt->rt6i_flags & RTF_EXPIRES) {
422 		if (time_after(jiffies, rt->dst.expires))
423 			return true;
424 	} else if (rt->dst.from) {
425 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
426 	}
427 	return false;
428 }
429 
430 /* Multipath route selection:
431  *   Hash based function using packet header and flowlabel.
432  * Adapted from fib_info_hashfn()
433  */
rt6_info_hash_nhsfn(unsigned int candidate_count,const struct flowi6 * fl6)434 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
435 			       const struct flowi6 *fl6)
436 {
437 	return get_hash_from_flowi6(fl6) % candidate_count;
438 }
439 
rt6_multipath_select(struct rt6_info * match,struct flowi6 * fl6,int oif,int strict)440 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
441 					     struct flowi6 *fl6, int oif,
442 					     int strict)
443 {
444 	struct rt6_info *sibling, *next_sibling;
445 	int route_choosen;
446 
447 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
448 	/* Don't change the route, if route_choosen == 0
449 	 * (siblings does not include ourself)
450 	 */
451 	if (route_choosen)
452 		list_for_each_entry_safe(sibling, next_sibling,
453 				&match->rt6i_siblings, rt6i_siblings) {
454 			route_choosen--;
455 			if (route_choosen == 0) {
456 				if (rt6_score_route(sibling, oif, strict) < 0)
457 					break;
458 				match = sibling;
459 				break;
460 			}
461 		}
462 	return match;
463 }
464 
465 /*
466  *	Route lookup. Any table->tb6_lock is implied.
467  */
468 
rt6_device_match(struct net * net,struct rt6_info * rt,const struct in6_addr * saddr,int oif,int flags)469 static inline struct rt6_info *rt6_device_match(struct net *net,
470 						    struct rt6_info *rt,
471 						    const struct in6_addr *saddr,
472 						    int oif,
473 						    int flags)
474 {
475 	struct rt6_info *local = NULL;
476 	struct rt6_info *sprt;
477 
478 	if (!oif && ipv6_addr_any(saddr))
479 		goto out;
480 
481 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
482 		struct net_device *dev = sprt->dst.dev;
483 
484 		if (oif) {
485 			if (dev->ifindex == oif)
486 				return sprt;
487 			if (dev->flags & IFF_LOOPBACK) {
488 				if (!sprt->rt6i_idev ||
489 				    sprt->rt6i_idev->dev->ifindex != oif) {
490 					if (flags & RT6_LOOKUP_F_IFACE)
491 						continue;
492 					if (local &&
493 					    local->rt6i_idev->dev->ifindex == oif)
494 						continue;
495 				}
496 				local = sprt;
497 			}
498 		} else {
499 			if (ipv6_chk_addr(net, saddr, dev,
500 					  flags & RT6_LOOKUP_F_IFACE))
501 				return sprt;
502 		}
503 	}
504 
505 	if (oif) {
506 		if (local)
507 			return local;
508 
509 		if (flags & RT6_LOOKUP_F_IFACE)
510 			return net->ipv6.ip6_null_entry;
511 	}
512 out:
513 	return rt;
514 }
515 
516 #ifdef CONFIG_IPV6_ROUTER_PREF
517 struct __rt6_probe_work {
518 	struct work_struct work;
519 	struct in6_addr target;
520 	struct net_device *dev;
521 };
522 
rt6_probe_deferred(struct work_struct * w)523 static void rt6_probe_deferred(struct work_struct *w)
524 {
525 	struct in6_addr mcaddr;
526 	struct __rt6_probe_work *work =
527 		container_of(w, struct __rt6_probe_work, work);
528 
529 	addrconf_addr_solict_mult(&work->target, &mcaddr);
530 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
531 	dev_put(work->dev);
532 	kfree(work);
533 }
534 
rt6_probe(struct rt6_info * rt)535 static void rt6_probe(struct rt6_info *rt)
536 {
537 	struct __rt6_probe_work *work;
538 	struct neighbour *neigh;
539 	/*
540 	 * Okay, this does not seem to be appropriate
541 	 * for now, however, we need to check if it
542 	 * is really so; aka Router Reachability Probing.
543 	 *
544 	 * Router Reachability Probe MUST be rate-limited
545 	 * to no more than one per minute.
546 	 */
547 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
548 		return;
549 	rcu_read_lock_bh();
550 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
551 	if (neigh) {
552 		if (neigh->nud_state & NUD_VALID)
553 			goto out;
554 
555 		work = NULL;
556 		write_lock(&neigh->lock);
557 		if (!(neigh->nud_state & NUD_VALID) &&
558 		    time_after(jiffies,
559 			       neigh->updated +
560 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
561 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
562 			if (work)
563 				__neigh_set_probe_once(neigh);
564 		}
565 		write_unlock(&neigh->lock);
566 	} else {
567 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
568 	}
569 
570 	if (work) {
571 		INIT_WORK(&work->work, rt6_probe_deferred);
572 		work->target = rt->rt6i_gateway;
573 		dev_hold(rt->dst.dev);
574 		work->dev = rt->dst.dev;
575 		schedule_work(&work->work);
576 	}
577 
578 out:
579 	rcu_read_unlock_bh();
580 }
581 #else
rt6_probe(struct rt6_info * rt)582 static inline void rt6_probe(struct rt6_info *rt)
583 {
584 }
585 #endif
586 
587 /*
588  * Default Router Selection (RFC 2461 6.3.6)
589  */
rt6_check_dev(struct rt6_info * rt,int oif)590 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
591 {
592 	struct net_device *dev = rt->dst.dev;
593 	if (!oif || dev->ifindex == oif)
594 		return 2;
595 	if ((dev->flags & IFF_LOOPBACK) &&
596 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
597 		return 1;
598 	return 0;
599 }
600 
rt6_check_neigh(struct rt6_info * rt)601 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
602 {
603 	struct neighbour *neigh;
604 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
605 
606 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
607 	    !(rt->rt6i_flags & RTF_GATEWAY))
608 		return RT6_NUD_SUCCEED;
609 
610 	rcu_read_lock_bh();
611 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
612 	if (neigh) {
613 		read_lock(&neigh->lock);
614 		if (neigh->nud_state & NUD_VALID)
615 			ret = RT6_NUD_SUCCEED;
616 #ifdef CONFIG_IPV6_ROUTER_PREF
617 		else if (!(neigh->nud_state & NUD_FAILED))
618 			ret = RT6_NUD_SUCCEED;
619 		else
620 			ret = RT6_NUD_FAIL_PROBE;
621 #endif
622 		read_unlock(&neigh->lock);
623 	} else {
624 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
625 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
626 	}
627 	rcu_read_unlock_bh();
628 
629 	return ret;
630 }
631 
rt6_score_route(struct rt6_info * rt,int oif,int strict)632 static int rt6_score_route(struct rt6_info *rt, int oif,
633 			   int strict)
634 {
635 	int m;
636 
637 	m = rt6_check_dev(rt, oif);
638 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
639 		return RT6_NUD_FAIL_HARD;
640 #ifdef CONFIG_IPV6_ROUTER_PREF
641 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
642 #endif
643 	if (strict & RT6_LOOKUP_F_REACHABLE) {
644 		int n = rt6_check_neigh(rt);
645 		if (n < 0)
646 			return n;
647 	}
648 	return m;
649 }
650 
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match,bool * do_rr)651 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
652 				   int *mpri, struct rt6_info *match,
653 				   bool *do_rr)
654 {
655 	int m;
656 	bool match_do_rr = false;
657 	struct inet6_dev *idev = rt->rt6i_idev;
658 	struct net_device *dev = rt->dst.dev;
659 
660 	if (dev && !netif_carrier_ok(dev) &&
661 	    idev->cnf.ignore_routes_with_linkdown &&
662 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
663 		goto out;
664 
665 	if (rt6_check_expired(rt))
666 		goto out;
667 
668 	m = rt6_score_route(rt, oif, strict);
669 	if (m == RT6_NUD_FAIL_DO_RR) {
670 		match_do_rr = true;
671 		m = 0; /* lowest valid score */
672 	} else if (m == RT6_NUD_FAIL_HARD) {
673 		goto out;
674 	}
675 
676 	if (strict & RT6_LOOKUP_F_REACHABLE)
677 		rt6_probe(rt);
678 
679 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
680 	if (m > *mpri) {
681 		*do_rr = match_do_rr;
682 		*mpri = m;
683 		match = rt;
684 	}
685 out:
686 	return match;
687 }
688 
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict,bool * do_rr)689 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
690 				     struct rt6_info *rr_head,
691 				     u32 metric, int oif, int strict,
692 				     bool *do_rr)
693 {
694 	struct rt6_info *rt, *match, *cont;
695 	int mpri = -1;
696 
697 	match = NULL;
698 	cont = NULL;
699 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
700 		if (rt->rt6i_metric != metric) {
701 			cont = rt;
702 			break;
703 		}
704 
705 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
706 	}
707 
708 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
709 		if (rt->rt6i_metric != metric) {
710 			cont = rt;
711 			break;
712 		}
713 
714 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
715 	}
716 
717 	if (match || !cont)
718 		return match;
719 
720 	for (rt = cont; rt; rt = rt->dst.rt6_next)
721 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
722 
723 	return match;
724 }
725 
rt6_select(struct fib6_node * fn,int oif,int strict)726 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
727 {
728 	struct rt6_info *match, *rt0;
729 	struct net *net;
730 	bool do_rr = false;
731 
732 	rt0 = fn->rr_ptr;
733 	if (!rt0)
734 		fn->rr_ptr = rt0 = fn->leaf;
735 
736 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
737 			     &do_rr);
738 
739 	if (do_rr) {
740 		struct rt6_info *next = rt0->dst.rt6_next;
741 
742 		/* no entries matched; do round-robin */
743 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
744 			next = fn->leaf;
745 
746 		if (next != rt0)
747 			fn->rr_ptr = next;
748 	}
749 
750 	net = dev_net(rt0->dst.dev);
751 	return match ? match : net->ipv6.ip6_null_entry;
752 }
753 
rt6_is_gw_or_nonexthop(const struct rt6_info * rt)754 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
755 {
756 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
757 }
758 
759 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)760 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
761 		  const struct in6_addr *gwaddr)
762 {
763 	struct net *net = dev_net(dev);
764 	struct route_info *rinfo = (struct route_info *) opt;
765 	struct in6_addr prefix_buf, *prefix;
766 	unsigned int pref;
767 	unsigned long lifetime;
768 	struct rt6_info *rt;
769 
770 	if (len < sizeof(struct route_info)) {
771 		return -EINVAL;
772 	}
773 
774 	/* Sanity check for prefix_len and length */
775 	if (rinfo->length > 3) {
776 		return -EINVAL;
777 	} else if (rinfo->prefix_len > 128) {
778 		return -EINVAL;
779 	} else if (rinfo->prefix_len > 64) {
780 		if (rinfo->length < 2) {
781 			return -EINVAL;
782 		}
783 	} else if (rinfo->prefix_len > 0) {
784 		if (rinfo->length < 1) {
785 			return -EINVAL;
786 		}
787 	}
788 
789 	pref = rinfo->route_pref;
790 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
791 		return -EINVAL;
792 
793 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
794 
795 	if (rinfo->length == 3)
796 		prefix = (struct in6_addr *)rinfo->prefix;
797 	else {
798 		/* this function is safe */
799 		ipv6_addr_prefix(&prefix_buf,
800 				 (struct in6_addr *)rinfo->prefix,
801 				 rinfo->prefix_len);
802 		prefix = &prefix_buf;
803 	}
804 
805 	if (rinfo->prefix_len == 0)
806 		rt = rt6_get_dflt_router(gwaddr, dev);
807 	else
808 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
809 					gwaddr, dev);
810 
811 	if (rt && !lifetime) {
812 		ip6_del_rt(rt);
813 		rt = NULL;
814 	}
815 
816 	if (!rt && lifetime)
817 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
818 					dev, pref);
819 	else if (rt)
820 		rt->rt6i_flags = RTF_ROUTEINFO |
821 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
822 
823 	if (rt) {
824 		if (!addrconf_finite_timeout(lifetime))
825 			rt6_clean_expires(rt);
826 		else
827 			rt6_set_expires(rt, jiffies + HZ * lifetime);
828 
829 		ip6_rt_put(rt);
830 	}
831 	return 0;
832 }
833 #endif
834 
fib6_backtrack(struct fib6_node * fn,struct in6_addr * saddr)835 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
836 					struct in6_addr *saddr)
837 {
838 	struct fib6_node *pn;
839 	while (1) {
840 		if (fn->fn_flags & RTN_TL_ROOT)
841 			return NULL;
842 		pn = fn->parent;
843 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
844 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
845 		else
846 			fn = pn;
847 		if (fn->fn_flags & RTN_RTINFO)
848 			return fn;
849 	}
850 }
851 
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)852 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
853 					     struct fib6_table *table,
854 					     struct flowi6 *fl6, int flags)
855 {
856 	struct fib6_node *fn;
857 	struct rt6_info *rt;
858 
859 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
860 		flags &= ~RT6_LOOKUP_F_IFACE;
861 
862 	read_lock_bh(&table->tb6_lock);
863 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
864 restart:
865 	rt = fn->leaf;
866 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
867 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
868 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
869 	if (rt == net->ipv6.ip6_null_entry) {
870 		fn = fib6_backtrack(fn, &fl6->saddr);
871 		if (fn)
872 			goto restart;
873 	}
874 	dst_use(&rt->dst, jiffies);
875 	read_unlock_bh(&table->tb6_lock);
876 
877 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
878 
879 	return rt;
880 
881 }
882 
ip6_route_lookup(struct net * net,struct flowi6 * fl6,int flags)883 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
884 				    int flags)
885 {
886 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
887 }
888 EXPORT_SYMBOL_GPL(ip6_route_lookup);
889 
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)890 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
891 			    const struct in6_addr *saddr, int oif, int strict)
892 {
893 	struct flowi6 fl6 = {
894 		.flowi6_oif = oif,
895 		.daddr = *daddr,
896 	};
897 	struct dst_entry *dst;
898 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
899 
900 	if (saddr) {
901 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
902 		flags |= RT6_LOOKUP_F_HAS_SADDR;
903 	}
904 
905 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
906 	if (dst->error == 0)
907 		return (struct rt6_info *) dst;
908 
909 	dst_release(dst);
910 
911 	return NULL;
912 }
913 EXPORT_SYMBOL(rt6_lookup);
914 
915 /* ip6_ins_rt is called with FREE table->tb6_lock.
916    It takes new route entry, the addition fails by any reason the
917    route is freed. In any case, if caller does not hold it, it may
918    be destroyed.
919  */
920 
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info,struct mx6_config * mxc)921 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
922 			struct mx6_config *mxc)
923 {
924 	int err;
925 	struct fib6_table *table;
926 
927 	table = rt->rt6i_table;
928 	write_lock_bh(&table->tb6_lock);
929 	err = fib6_add(&table->tb6_root, rt, info, mxc);
930 	write_unlock_bh(&table->tb6_lock);
931 
932 	return err;
933 }
934 
ip6_ins_rt(struct rt6_info * rt)935 int ip6_ins_rt(struct rt6_info *rt)
936 {
937 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
938 	struct mx6_config mxc = { .mx = NULL, };
939 
940 	return __ip6_ins_rt(rt, &info, &mxc);
941 }
942 
ip6_rt_cache_alloc(struct rt6_info * ort,const struct in6_addr * daddr,const struct in6_addr * saddr)943 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
944 					   const struct in6_addr *daddr,
945 					   const struct in6_addr *saddr)
946 {
947 	struct rt6_info *rt;
948 
949 	/*
950 	 *	Clone the route.
951 	 */
952 
953 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
954 		ort = (struct rt6_info *)ort->dst.from;
955 
956 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
957 
958 	if (!rt)
959 		return NULL;
960 
961 	ip6_rt_copy_init(rt, ort);
962 	rt->rt6i_flags |= RTF_CACHE;
963 	rt->rt6i_metric = 0;
964 	rt->dst.flags |= DST_HOST;
965 	rt->rt6i_dst.addr = *daddr;
966 	rt->rt6i_dst.plen = 128;
967 
968 	if (!rt6_is_gw_or_nonexthop(ort)) {
969 		if (ort->rt6i_dst.plen != 128 &&
970 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
971 			rt->rt6i_flags |= RTF_ANYCAST;
972 #ifdef CONFIG_IPV6_SUBTREES
973 		if (rt->rt6i_src.plen && saddr) {
974 			rt->rt6i_src.addr = *saddr;
975 			rt->rt6i_src.plen = 128;
976 		}
977 #endif
978 	}
979 
980 	return rt;
981 }
982 
ip6_rt_pcpu_alloc(struct rt6_info * rt)983 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
984 {
985 	struct rt6_info *pcpu_rt;
986 
987 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
988 				  rt->dst.dev, rt->dst.flags);
989 
990 	if (!pcpu_rt)
991 		return NULL;
992 	ip6_rt_copy_init(pcpu_rt, rt);
993 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
994 	pcpu_rt->rt6i_flags |= RTF_PCPU;
995 	return pcpu_rt;
996 }
997 
998 /* It should be called with read_lock_bh(&tb6_lock) acquired */
rt6_get_pcpu_route(struct rt6_info * rt)999 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1000 {
1001 	struct rt6_info *pcpu_rt, **p;
1002 
1003 	p = this_cpu_ptr(rt->rt6i_pcpu);
1004 	pcpu_rt = *p;
1005 
1006 	if (pcpu_rt) {
1007 		dst_hold(&pcpu_rt->dst);
1008 		rt6_dst_from_metrics_check(pcpu_rt);
1009 	}
1010 	return pcpu_rt;
1011 }
1012 
rt6_make_pcpu_route(struct rt6_info * rt)1013 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1014 {
1015 	struct fib6_table *table = rt->rt6i_table;
1016 	struct rt6_info *pcpu_rt, *prev, **p;
1017 
1018 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1019 	if (!pcpu_rt) {
1020 		struct net *net = dev_net(rt->dst.dev);
1021 
1022 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1023 		return net->ipv6.ip6_null_entry;
1024 	}
1025 
1026 	read_lock_bh(&table->tb6_lock);
1027 	if (rt->rt6i_pcpu) {
1028 		p = this_cpu_ptr(rt->rt6i_pcpu);
1029 		prev = cmpxchg(p, NULL, pcpu_rt);
1030 		if (prev) {
1031 			/* If someone did it before us, return prev instead */
1032 			dst_destroy(&pcpu_rt->dst);
1033 			pcpu_rt = prev;
1034 		}
1035 	} else {
1036 		/* rt has been removed from the fib6 tree
1037 		 * before we have a chance to acquire the read_lock.
1038 		 * In this case, don't brother to create a pcpu rt
1039 		 * since rt is going away anyway.  The next
1040 		 * dst_check() will trigger a re-lookup.
1041 		 */
1042 		dst_destroy(&pcpu_rt->dst);
1043 		pcpu_rt = rt;
1044 	}
1045 	dst_hold(&pcpu_rt->dst);
1046 	rt6_dst_from_metrics_check(pcpu_rt);
1047 	read_unlock_bh(&table->tb6_lock);
1048 	return pcpu_rt;
1049 }
1050 
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags)1051 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1052 			       int oif, struct flowi6 *fl6, int flags)
1053 {
1054 	struct fib6_node *fn, *saved_fn;
1055 	struct rt6_info *rt;
1056 	int strict = 0;
1057 
1058 	strict |= flags & RT6_LOOKUP_F_IFACE;
1059 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1060 	if (net->ipv6.devconf_all->forwarding == 0)
1061 		strict |= RT6_LOOKUP_F_REACHABLE;
1062 
1063 	read_lock_bh(&table->tb6_lock);
1064 
1065 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1066 	saved_fn = fn;
1067 
1068 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1069 		oif = 0;
1070 
1071 redo_rt6_select:
1072 	rt = rt6_select(fn, oif, strict);
1073 	if (rt->rt6i_nsiblings)
1074 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1075 	if (rt == net->ipv6.ip6_null_entry) {
1076 		fn = fib6_backtrack(fn, &fl6->saddr);
1077 		if (fn)
1078 			goto redo_rt6_select;
1079 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1080 			/* also consider unreachable route */
1081 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1082 			fn = saved_fn;
1083 			goto redo_rt6_select;
1084 		}
1085 	}
1086 
1087 
1088 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1089 		dst_use(&rt->dst, jiffies);
1090 		read_unlock_bh(&table->tb6_lock);
1091 
1092 		rt6_dst_from_metrics_check(rt);
1093 
1094 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1095 		return rt;
1096 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1097 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1098 		/* Create a RTF_CACHE clone which will not be
1099 		 * owned by the fib6 tree.  It is for the special case where
1100 		 * the daddr in the skb during the neighbor look-up is different
1101 		 * from the fl6->daddr used to look-up route here.
1102 		 */
1103 
1104 		struct rt6_info *uncached_rt;
1105 
1106 		dst_use(&rt->dst, jiffies);
1107 		read_unlock_bh(&table->tb6_lock);
1108 
1109 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1110 		dst_release(&rt->dst);
1111 
1112 		if (uncached_rt)
1113 			rt6_uncached_list_add(uncached_rt);
1114 		else
1115 			uncached_rt = net->ipv6.ip6_null_entry;
1116 
1117 		dst_hold(&uncached_rt->dst);
1118 
1119 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1120 		return uncached_rt;
1121 
1122 	} else {
1123 		/* Get a percpu copy */
1124 
1125 		struct rt6_info *pcpu_rt;
1126 
1127 		rt->dst.lastuse = jiffies;
1128 		rt->dst.__use++;
1129 		pcpu_rt = rt6_get_pcpu_route(rt);
1130 
1131 		if (pcpu_rt) {
1132 			read_unlock_bh(&table->tb6_lock);
1133 		} else {
1134 			/* We have to do the read_unlock first
1135 			 * because rt6_make_pcpu_route() may trigger
1136 			 * ip6_dst_gc() which will take the write_lock.
1137 			 */
1138 			dst_hold(&rt->dst);
1139 			read_unlock_bh(&table->tb6_lock);
1140 			pcpu_rt = rt6_make_pcpu_route(rt);
1141 			dst_release(&rt->dst);
1142 		}
1143 
1144 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1145 		return pcpu_rt;
1146 
1147 	}
1148 }
1149 EXPORT_SYMBOL_GPL(ip6_pol_route);
1150 
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1151 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1152 					    struct flowi6 *fl6, int flags)
1153 {
1154 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1155 }
1156 
ip6_route_input_lookup(struct net * net,struct net_device * dev,struct flowi6 * fl6,int flags)1157 struct dst_entry *ip6_route_input_lookup(struct net *net,
1158 					 struct net_device *dev,
1159 					 struct flowi6 *fl6, int flags)
1160 {
1161 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1162 		flags |= RT6_LOOKUP_F_IFACE;
1163 
1164 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1165 }
1166 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1167 
ip6_route_input(struct sk_buff * skb)1168 void ip6_route_input(struct sk_buff *skb)
1169 {
1170 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1171 	struct net *net = dev_net(skb->dev);
1172 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1173 	struct ip_tunnel_info *tun_info;
1174 	struct flowi6 fl6 = {
1175 		.flowi6_iif = skb->dev->ifindex,
1176 		.daddr = iph->daddr,
1177 		.saddr = iph->saddr,
1178 		.flowlabel = ip6_flowinfo(iph),
1179 		.flowi6_mark = skb->mark,
1180 		.flowi6_proto = iph->nexthdr,
1181 	};
1182 
1183 	tun_info = skb_tunnel_info(skb);
1184 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1185 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1186 	skb_dst_drop(skb);
1187 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1188 }
1189 
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1190 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1191 					     struct flowi6 *fl6, int flags)
1192 {
1193 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1194 }
1195 
ip6_route_output_flags(struct net * net,const struct sock * sk,struct flowi6 * fl6,int flags)1196 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1197 					 struct flowi6 *fl6, int flags)
1198 {
1199 	bool any_src;
1200 
1201 	if (rt6_need_strict(&fl6->daddr)) {
1202 		struct dst_entry *dst;
1203 
1204 		dst = l3mdev_link_scope_lookup(net, fl6);
1205 		if (dst)
1206 			return dst;
1207 	}
1208 
1209 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1210 
1211 	any_src = ipv6_addr_any(&fl6->saddr);
1212 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1213 	    (fl6->flowi6_oif && any_src))
1214 		flags |= RT6_LOOKUP_F_IFACE;
1215 
1216 	if (!any_src)
1217 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1218 	else if (sk)
1219 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1220 
1221 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1222 }
1223 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1224 
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)1225 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1226 {
1227 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1228 	struct dst_entry *new = NULL;
1229 
1230 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1231 	if (rt) {
1232 		rt6_info_init(rt);
1233 
1234 		new = &rt->dst;
1235 		new->__use = 1;
1236 		new->input = dst_discard;
1237 		new->output = dst_discard_out;
1238 
1239 		dst_copy_metrics(new, &ort->dst);
1240 		rt->rt6i_idev = ort->rt6i_idev;
1241 		if (rt->rt6i_idev)
1242 			in6_dev_hold(rt->rt6i_idev);
1243 
1244 		rt->rt6i_gateway = ort->rt6i_gateway;
1245 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1246 		rt->rt6i_metric = 0;
1247 
1248 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1249 #ifdef CONFIG_IPV6_SUBTREES
1250 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1251 #endif
1252 
1253 		dst_free(new);
1254 	}
1255 
1256 	dst_release(dst_orig);
1257 	return new ? new : ERR_PTR(-ENOMEM);
1258 }
1259 
1260 /*
1261  *	Destination cache support functions
1262  */
1263 
rt6_dst_from_metrics_check(struct rt6_info * rt)1264 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1265 {
1266 	if (rt->dst.from &&
1267 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1268 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1269 }
1270 
rt6_check(struct rt6_info * rt,u32 cookie)1271 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1272 {
1273 	u32 rt_cookie = 0;
1274 
1275 	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
1276 		return NULL;
1277 
1278 	if (rt6_check_expired(rt))
1279 		return NULL;
1280 
1281 	return &rt->dst;
1282 }
1283 
rt6_dst_from_check(struct rt6_info * rt,u32 cookie)1284 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1285 {
1286 	if (!__rt6_check_expired(rt) &&
1287 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1288 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1289 		return &rt->dst;
1290 	else
1291 		return NULL;
1292 }
1293 
ip6_dst_check(struct dst_entry * dst,u32 cookie)1294 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1295 {
1296 	struct rt6_info *rt;
1297 
1298 	rt = (struct rt6_info *) dst;
1299 
1300 	/* All IPV6 dsts are created with ->obsolete set to the value
1301 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1302 	 * into this function always.
1303 	 */
1304 
1305 	rt6_dst_from_metrics_check(rt);
1306 
1307 	if (rt->rt6i_flags & RTF_PCPU ||
1308 	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1309 		return rt6_dst_from_check(rt, cookie);
1310 	else
1311 		return rt6_check(rt, cookie);
1312 }
1313 
ip6_negative_advice(struct dst_entry * dst)1314 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1315 {
1316 	struct rt6_info *rt = (struct rt6_info *) dst;
1317 
1318 	if (rt) {
1319 		if (rt->rt6i_flags & RTF_CACHE) {
1320 			if (rt6_check_expired(rt)) {
1321 				ip6_del_rt(rt);
1322 				dst = NULL;
1323 			}
1324 		} else {
1325 			dst_release(dst);
1326 			dst = NULL;
1327 		}
1328 	}
1329 	return dst;
1330 }
1331 
ip6_link_failure(struct sk_buff * skb)1332 static void ip6_link_failure(struct sk_buff *skb)
1333 {
1334 	struct rt6_info *rt;
1335 
1336 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1337 
1338 	rt = (struct rt6_info *) skb_dst(skb);
1339 	if (rt) {
1340 		if (rt->rt6i_flags & RTF_CACHE) {
1341 			dst_hold(&rt->dst);
1342 			ip6_del_rt(rt);
1343 		} else {
1344 			struct fib6_node *fn;
1345 
1346 			rcu_read_lock();
1347 			fn = rcu_dereference(rt->rt6i_node);
1348 			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1349 				fn->fn_sernum = -1;
1350 			rcu_read_unlock();
1351 		}
1352 	}
1353 }
1354 
rt6_do_update_pmtu(struct rt6_info * rt,u32 mtu)1355 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1356 {
1357 	struct net *net = dev_net(rt->dst.dev);
1358 
1359 	rt->rt6i_flags |= RTF_MODIFIED;
1360 	rt->rt6i_pmtu = mtu;
1361 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1362 }
1363 
rt6_cache_allowed_for_pmtu(const struct rt6_info * rt)1364 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1365 {
1366 	return !(rt->rt6i_flags & RTF_CACHE) &&
1367 		(rt->rt6i_flags & RTF_PCPU ||
1368 		 rcu_access_pointer(rt->rt6i_node));
1369 }
1370 
__ip6_rt_update_pmtu(struct dst_entry * dst,const struct sock * sk,const struct ipv6hdr * iph,u32 mtu)1371 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1372 				 const struct ipv6hdr *iph, u32 mtu)
1373 {
1374 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1375 
1376 	if (rt6->rt6i_flags & RTF_LOCAL)
1377 		return;
1378 
1379 	if (dst_metric_locked(dst, RTAX_MTU))
1380 		return;
1381 
1382 	dst_confirm(dst);
1383 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1384 	if (mtu >= dst_mtu(dst))
1385 		return;
1386 
1387 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1388 		rt6_do_update_pmtu(rt6, mtu);
1389 	} else {
1390 		const struct in6_addr *daddr, *saddr;
1391 		struct rt6_info *nrt6;
1392 
1393 		if (iph) {
1394 			daddr = &iph->daddr;
1395 			saddr = &iph->saddr;
1396 		} else if (sk) {
1397 			daddr = &sk->sk_v6_daddr;
1398 			saddr = &inet6_sk(sk)->saddr;
1399 		} else {
1400 			return;
1401 		}
1402 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1403 		if (nrt6) {
1404 			rt6_do_update_pmtu(nrt6, mtu);
1405 
1406 			/* ip6_ins_rt(nrt6) will bump the
1407 			 * rt6->rt6i_node->fn_sernum
1408 			 * which will fail the next rt6_check() and
1409 			 * invalidate the sk->sk_dst_cache.
1410 			 */
1411 			ip6_ins_rt(nrt6);
1412 		}
1413 	}
1414 }
1415 
ip6_rt_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)1416 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1417 			       struct sk_buff *skb, u32 mtu)
1418 {
1419 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1420 }
1421 
ip6_update_pmtu(struct sk_buff * skb,struct net * net,__be32 mtu,int oif,u32 mark,kuid_t uid)1422 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1423 		     int oif, u32 mark, kuid_t uid)
1424 {
1425 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1426 	struct dst_entry *dst;
1427 	struct flowi6 fl6;
1428 
1429 	memset(&fl6, 0, sizeof(fl6));
1430 	fl6.flowi6_oif = oif;
1431 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1432 	fl6.daddr = iph->daddr;
1433 	fl6.saddr = iph->saddr;
1434 	fl6.flowlabel = ip6_flowinfo(iph);
1435 	fl6.flowi6_uid = uid;
1436 
1437 	dst = ip6_route_output(net, NULL, &fl6);
1438 	if (!dst->error)
1439 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1440 	dst_release(dst);
1441 }
1442 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1443 
ip6_sk_update_pmtu(struct sk_buff * skb,struct sock * sk,__be32 mtu)1444 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1445 {
1446 	struct dst_entry *dst;
1447 
1448 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1449 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1450 
1451 	dst = __sk_dst_get(sk);
1452 	if (!dst || !dst->obsolete ||
1453 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1454 		return;
1455 
1456 	bh_lock_sock(sk);
1457 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1458 		ip6_datagram_dst_update(sk, false);
1459 	bh_unlock_sock(sk);
1460 }
1461 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1462 
1463 /* Handle redirects */
1464 struct ip6rd_flowi {
1465 	struct flowi6 fl6;
1466 	struct in6_addr gateway;
1467 };
1468 
__ip6_route_redirect(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1469 static struct rt6_info *__ip6_route_redirect(struct net *net,
1470 					     struct fib6_table *table,
1471 					     struct flowi6 *fl6,
1472 					     int flags)
1473 {
1474 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1475 	struct rt6_info *rt;
1476 	struct fib6_node *fn;
1477 
1478 	/* Get the "current" route for this destination and
1479 	 * check if the redirect has come from approriate router.
1480 	 *
1481 	 * RFC 4861 specifies that redirects should only be
1482 	 * accepted if they come from the nexthop to the target.
1483 	 * Due to the way the routes are chosen, this notion
1484 	 * is a bit fuzzy and one might need to check all possible
1485 	 * routes.
1486 	 */
1487 
1488 	read_lock_bh(&table->tb6_lock);
1489 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1490 restart:
1491 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1492 		if (rt6_check_expired(rt))
1493 			continue;
1494 		if (rt->dst.error)
1495 			break;
1496 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1497 			continue;
1498 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1499 			continue;
1500 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1501 			continue;
1502 		break;
1503 	}
1504 
1505 	if (!rt)
1506 		rt = net->ipv6.ip6_null_entry;
1507 	else if (rt->dst.error) {
1508 		rt = net->ipv6.ip6_null_entry;
1509 		goto out;
1510 	}
1511 
1512 	if (rt == net->ipv6.ip6_null_entry) {
1513 		fn = fib6_backtrack(fn, &fl6->saddr);
1514 		if (fn)
1515 			goto restart;
1516 	}
1517 
1518 out:
1519 	dst_hold(&rt->dst);
1520 
1521 	read_unlock_bh(&table->tb6_lock);
1522 
1523 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1524 	return rt;
1525 };
1526 
ip6_route_redirect(struct net * net,const struct flowi6 * fl6,const struct in6_addr * gateway)1527 static struct dst_entry *ip6_route_redirect(struct net *net,
1528 					const struct flowi6 *fl6,
1529 					const struct in6_addr *gateway)
1530 {
1531 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1532 	struct ip6rd_flowi rdfl;
1533 
1534 	rdfl.fl6 = *fl6;
1535 	rdfl.gateway = *gateway;
1536 
1537 	return fib6_rule_lookup(net, &rdfl.fl6,
1538 				flags, __ip6_route_redirect);
1539 }
1540 
ip6_redirect(struct sk_buff * skb,struct net * net,int oif,u32 mark,kuid_t uid)1541 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1542 		  kuid_t uid)
1543 {
1544 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1545 	struct dst_entry *dst;
1546 	struct flowi6 fl6;
1547 
1548 	memset(&fl6, 0, sizeof(fl6));
1549 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1550 	fl6.flowi6_oif = oif;
1551 	fl6.flowi6_mark = mark;
1552 	fl6.daddr = iph->daddr;
1553 	fl6.saddr = iph->saddr;
1554 	fl6.flowlabel = ip6_flowinfo(iph);
1555 	fl6.flowi6_uid = uid;
1556 
1557 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1558 	rt6_do_redirect(dst, NULL, skb);
1559 	dst_release(dst);
1560 }
1561 EXPORT_SYMBOL_GPL(ip6_redirect);
1562 
ip6_redirect_no_header(struct sk_buff * skb,struct net * net,int oif,u32 mark)1563 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1564 			    u32 mark)
1565 {
1566 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1567 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1568 	struct dst_entry *dst;
1569 	struct flowi6 fl6;
1570 
1571 	memset(&fl6, 0, sizeof(fl6));
1572 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1573 	fl6.flowi6_oif = oif;
1574 	fl6.flowi6_mark = mark;
1575 	fl6.daddr = msg->dest;
1576 	fl6.saddr = iph->daddr;
1577 	fl6.flowi6_uid = sock_net_uid(net, NULL);
1578 
1579 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1580 	rt6_do_redirect(dst, NULL, skb);
1581 	dst_release(dst);
1582 }
1583 
ip6_sk_redirect(struct sk_buff * skb,struct sock * sk)1584 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1585 {
1586 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1587 		     sk->sk_uid);
1588 }
1589 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1590 
ip6_default_advmss(const struct dst_entry * dst)1591 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1592 {
1593 	struct net_device *dev = dst->dev;
1594 	unsigned int mtu = dst_mtu(dst);
1595 	struct net *net = dev_net(dev);
1596 
1597 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1598 
1599 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1600 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1601 
1602 	/*
1603 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1604 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1605 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1606 	 * rely only on pmtu discovery"
1607 	 */
1608 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1609 		mtu = IPV6_MAXPLEN;
1610 	return mtu;
1611 }
1612 
ip6_mtu(const struct dst_entry * dst)1613 static unsigned int ip6_mtu(const struct dst_entry *dst)
1614 {
1615 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1616 	unsigned int mtu = rt->rt6i_pmtu;
1617 	struct inet6_dev *idev;
1618 
1619 	if (mtu)
1620 		goto out;
1621 
1622 	mtu = dst_metric_raw(dst, RTAX_MTU);
1623 	if (mtu)
1624 		goto out;
1625 
1626 	mtu = IPV6_MIN_MTU;
1627 
1628 	rcu_read_lock();
1629 	idev = __in6_dev_get(dst->dev);
1630 	if (idev)
1631 		mtu = idev->cnf.mtu6;
1632 	rcu_read_unlock();
1633 
1634 out:
1635 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1636 
1637 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1638 }
1639 
1640 static struct dst_entry *icmp6_dst_gc_list;
1641 static DEFINE_SPINLOCK(icmp6_dst_lock);
1642 
icmp6_dst_alloc(struct net_device * dev,struct flowi6 * fl6)1643 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1644 				  struct flowi6 *fl6)
1645 {
1646 	struct dst_entry *dst;
1647 	struct rt6_info *rt;
1648 	struct inet6_dev *idev = in6_dev_get(dev);
1649 	struct net *net = dev_net(dev);
1650 
1651 	if (unlikely(!idev))
1652 		return ERR_PTR(-ENODEV);
1653 
1654 	rt = ip6_dst_alloc(net, dev, 0);
1655 	if (unlikely(!rt)) {
1656 		in6_dev_put(idev);
1657 		dst = ERR_PTR(-ENOMEM);
1658 		goto out;
1659 	}
1660 
1661 	rt->dst.flags |= DST_HOST;
1662 	rt->dst.input = ip6_input;
1663 	rt->dst.output  = ip6_output;
1664 	atomic_set(&rt->dst.__refcnt, 1);
1665 	rt->rt6i_gateway  = fl6->daddr;
1666 	rt->rt6i_dst.addr = fl6->daddr;
1667 	rt->rt6i_dst.plen = 128;
1668 	rt->rt6i_idev     = idev;
1669 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1670 
1671 	spin_lock_bh(&icmp6_dst_lock);
1672 	rt->dst.next = icmp6_dst_gc_list;
1673 	icmp6_dst_gc_list = &rt->dst;
1674 	spin_unlock_bh(&icmp6_dst_lock);
1675 
1676 	fib6_force_start_gc(net);
1677 
1678 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1679 
1680 out:
1681 	return dst;
1682 }
1683 
icmp6_dst_gc(void)1684 int icmp6_dst_gc(void)
1685 {
1686 	struct dst_entry *dst, **pprev;
1687 	int more = 0;
1688 
1689 	spin_lock_bh(&icmp6_dst_lock);
1690 	pprev = &icmp6_dst_gc_list;
1691 
1692 	while ((dst = *pprev) != NULL) {
1693 		if (!atomic_read(&dst->__refcnt)) {
1694 			*pprev = dst->next;
1695 			dst_free(dst);
1696 		} else {
1697 			pprev = &dst->next;
1698 			++more;
1699 		}
1700 	}
1701 
1702 	spin_unlock_bh(&icmp6_dst_lock);
1703 
1704 	return more;
1705 }
1706 
icmp6_clean_all(int (* func)(struct rt6_info * rt,void * arg),void * arg)1707 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1708 			    void *arg)
1709 {
1710 	struct dst_entry *dst, **pprev;
1711 
1712 	spin_lock_bh(&icmp6_dst_lock);
1713 	pprev = &icmp6_dst_gc_list;
1714 	while ((dst = *pprev) != NULL) {
1715 		struct rt6_info *rt = (struct rt6_info *) dst;
1716 		if (func(rt, arg)) {
1717 			*pprev = dst->next;
1718 			dst_free(dst);
1719 		} else {
1720 			pprev = &dst->next;
1721 		}
1722 	}
1723 	spin_unlock_bh(&icmp6_dst_lock);
1724 }
1725 
ip6_dst_gc(struct dst_ops * ops)1726 static int ip6_dst_gc(struct dst_ops *ops)
1727 {
1728 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1729 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1730 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1731 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1732 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1733 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1734 	int entries;
1735 
1736 	entries = dst_entries_get_fast(ops);
1737 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1738 	    entries <= rt_max_size)
1739 		goto out;
1740 
1741 	net->ipv6.ip6_rt_gc_expire++;
1742 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1743 	entries = dst_entries_get_slow(ops);
1744 	if (entries < ops->gc_thresh)
1745 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1746 out:
1747 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1748 	return entries > rt_max_size;
1749 }
1750 
ip6_convert_metrics(struct mx6_config * mxc,const struct fib6_config * cfg)1751 static int ip6_convert_metrics(struct mx6_config *mxc,
1752 			       const struct fib6_config *cfg)
1753 {
1754 	bool ecn_ca = false;
1755 	struct nlattr *nla;
1756 	int remaining;
1757 	u32 *mp;
1758 
1759 	if (!cfg->fc_mx)
1760 		return 0;
1761 
1762 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1763 	if (unlikely(!mp))
1764 		return -ENOMEM;
1765 
1766 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1767 		int type = nla_type(nla);
1768 		u32 val;
1769 
1770 		if (!type)
1771 			continue;
1772 		if (unlikely(type > RTAX_MAX))
1773 			goto err;
1774 
1775 		if (type == RTAX_CC_ALGO) {
1776 			char tmp[TCP_CA_NAME_MAX];
1777 
1778 			nla_strlcpy(tmp, nla, sizeof(tmp));
1779 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1780 			if (val == TCP_CA_UNSPEC)
1781 				goto err;
1782 		} else {
1783 			val = nla_get_u32(nla);
1784 		}
1785 		if (type == RTAX_HOPLIMIT && val > 255)
1786 			val = 255;
1787 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1788 			goto err;
1789 
1790 		mp[type - 1] = val;
1791 		__set_bit(type - 1, mxc->mx_valid);
1792 	}
1793 
1794 	if (ecn_ca) {
1795 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1796 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1797 	}
1798 
1799 	mxc->mx = mp;
1800 	return 0;
1801  err:
1802 	kfree(mp);
1803 	return -EINVAL;
1804 }
1805 
ip6_nh_lookup_table(struct net * net,struct fib6_config * cfg,const struct in6_addr * gw_addr)1806 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1807 					    struct fib6_config *cfg,
1808 					    const struct in6_addr *gw_addr)
1809 {
1810 	struct flowi6 fl6 = {
1811 		.flowi6_oif = cfg->fc_ifindex,
1812 		.daddr = *gw_addr,
1813 		.saddr = cfg->fc_prefsrc,
1814 	};
1815 	struct fib6_table *table;
1816 	struct rt6_info *rt;
1817 	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1818 
1819 	table = fib6_get_table(net, cfg->fc_table);
1820 	if (!table)
1821 		return NULL;
1822 
1823 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
1824 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1825 
1826 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1827 
1828 	/* if table lookup failed, fall back to full lookup */
1829 	if (rt == net->ipv6.ip6_null_entry) {
1830 		ip6_rt_put(rt);
1831 		rt = NULL;
1832 	}
1833 
1834 	return rt;
1835 }
1836 
ip6_route_info_create(struct fib6_config * cfg)1837 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1838 {
1839 	struct net *net = cfg->fc_nlinfo.nl_net;
1840 	struct rt6_info *rt = NULL;
1841 	struct net_device *dev = NULL;
1842 	struct inet6_dev *idev = NULL;
1843 	struct fib6_table *table;
1844 	int addr_type;
1845 	int err = -EINVAL;
1846 
1847 	/* RTF_PCPU is an internal flag; can not be set by userspace */
1848 	if (cfg->fc_flags & RTF_PCPU)
1849 		goto out;
1850 
1851 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1852 		goto out;
1853 #ifndef CONFIG_IPV6_SUBTREES
1854 	if (cfg->fc_src_len)
1855 		goto out;
1856 #endif
1857 	if (cfg->fc_ifindex) {
1858 		err = -ENODEV;
1859 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1860 		if (!dev)
1861 			goto out;
1862 		idev = in6_dev_get(dev);
1863 		if (!idev)
1864 			goto out;
1865 	}
1866 
1867 	if (cfg->fc_metric == 0)
1868 		cfg->fc_metric = IP6_RT_PRIO_USER;
1869 
1870 	err = -ENOBUFS;
1871 	if (cfg->fc_nlinfo.nlh &&
1872 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1873 		table = fib6_get_table(net, cfg->fc_table);
1874 		if (!table) {
1875 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1876 			table = fib6_new_table(net, cfg->fc_table);
1877 		}
1878 	} else {
1879 		table = fib6_new_table(net, cfg->fc_table);
1880 	}
1881 
1882 	if (!table)
1883 		goto out;
1884 
1885 	rt = ip6_dst_alloc(net, NULL,
1886 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1887 
1888 	if (!rt) {
1889 		err = -ENOMEM;
1890 		goto out;
1891 	}
1892 
1893 	if (cfg->fc_flags & RTF_EXPIRES)
1894 		rt6_set_expires(rt, jiffies +
1895 				clock_t_to_jiffies(cfg->fc_expires));
1896 	else
1897 		rt6_clean_expires(rt);
1898 
1899 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1900 		cfg->fc_protocol = RTPROT_BOOT;
1901 	rt->rt6i_protocol = cfg->fc_protocol;
1902 
1903 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1904 
1905 	if (addr_type & IPV6_ADDR_MULTICAST)
1906 		rt->dst.input = ip6_mc_input;
1907 	else if (cfg->fc_flags & RTF_LOCAL)
1908 		rt->dst.input = ip6_input;
1909 	else
1910 		rt->dst.input = ip6_forward;
1911 
1912 	rt->dst.output = ip6_output;
1913 
1914 	if (cfg->fc_encap) {
1915 		struct lwtunnel_state *lwtstate;
1916 
1917 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1918 					   cfg->fc_encap, AF_INET6, cfg,
1919 					   &lwtstate);
1920 		if (err)
1921 			goto out;
1922 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1923 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1924 			rt->dst.lwtstate->orig_output = rt->dst.output;
1925 			rt->dst.output = lwtunnel_output;
1926 		}
1927 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1928 			rt->dst.lwtstate->orig_input = rt->dst.input;
1929 			rt->dst.input = lwtunnel_input;
1930 		}
1931 	}
1932 
1933 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1934 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1935 	if (rt->rt6i_dst.plen == 128)
1936 		rt->dst.flags |= DST_HOST;
1937 
1938 #ifdef CONFIG_IPV6_SUBTREES
1939 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1940 	rt->rt6i_src.plen = cfg->fc_src_len;
1941 #endif
1942 
1943 	rt->rt6i_metric = cfg->fc_metric;
1944 
1945 	/* We cannot add true routes via loopback here,
1946 	   they would result in kernel looping; promote them to reject routes
1947 	 */
1948 	if ((cfg->fc_flags & RTF_REJECT) ||
1949 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1950 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1951 	     !(cfg->fc_flags & RTF_LOCAL))) {
1952 		/* hold loopback dev/idev if we haven't done so. */
1953 		if (dev != net->loopback_dev) {
1954 			if (dev) {
1955 				dev_put(dev);
1956 				in6_dev_put(idev);
1957 			}
1958 			dev = net->loopback_dev;
1959 			dev_hold(dev);
1960 			idev = in6_dev_get(dev);
1961 			if (!idev) {
1962 				err = -ENODEV;
1963 				goto out;
1964 			}
1965 		}
1966 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1967 		switch (cfg->fc_type) {
1968 		case RTN_BLACKHOLE:
1969 			rt->dst.error = -EINVAL;
1970 			rt->dst.output = dst_discard_out;
1971 			rt->dst.input = dst_discard;
1972 			break;
1973 		case RTN_PROHIBIT:
1974 			rt->dst.error = -EACCES;
1975 			rt->dst.output = ip6_pkt_prohibit_out;
1976 			rt->dst.input = ip6_pkt_prohibit;
1977 			break;
1978 		case RTN_THROW:
1979 		case RTN_UNREACHABLE:
1980 		default:
1981 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1982 					: (cfg->fc_type == RTN_UNREACHABLE)
1983 					? -EHOSTUNREACH : -ENETUNREACH;
1984 			rt->dst.output = ip6_pkt_discard_out;
1985 			rt->dst.input = ip6_pkt_discard;
1986 			break;
1987 		}
1988 		goto install_route;
1989 	}
1990 
1991 	if (cfg->fc_flags & RTF_GATEWAY) {
1992 		const struct in6_addr *gw_addr;
1993 		int gwa_type;
1994 
1995 		gw_addr = &cfg->fc_gateway;
1996 		gwa_type = ipv6_addr_type(gw_addr);
1997 
1998 		/* if gw_addr is local we will fail to detect this in case
1999 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2000 		 * will return already-added prefix route via interface that
2001 		 * prefix route was assigned to, which might be non-loopback.
2002 		 */
2003 		err = -EINVAL;
2004 		if (ipv6_chk_addr_and_flags(net, gw_addr,
2005 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
2006 					    dev : NULL, 0, 0))
2007 			goto out;
2008 
2009 		rt->rt6i_gateway = *gw_addr;
2010 
2011 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2012 			struct rt6_info *grt = NULL;
2013 
2014 			/* IPv6 strictly inhibits using not link-local
2015 			   addresses as nexthop address.
2016 			   Otherwise, router will not able to send redirects.
2017 			   It is very good, but in some (rare!) circumstances
2018 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
2019 			   some exceptions. --ANK
2020 			 */
2021 			if (!(gwa_type & IPV6_ADDR_UNICAST))
2022 				goto out;
2023 
2024 			if (cfg->fc_table) {
2025 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2026 
2027 				if (grt) {
2028 					if (grt->rt6i_flags & RTF_GATEWAY ||
2029 					    (dev && dev != grt->dst.dev)) {
2030 						ip6_rt_put(grt);
2031 						grt = NULL;
2032 					}
2033 				}
2034 			}
2035 
2036 			if (!grt)
2037 				grt = rt6_lookup(net, gw_addr, NULL,
2038 						 cfg->fc_ifindex, 1);
2039 
2040 			err = -EHOSTUNREACH;
2041 			if (!grt)
2042 				goto out;
2043 			if (dev) {
2044 				if (dev != grt->dst.dev) {
2045 					ip6_rt_put(grt);
2046 					goto out;
2047 				}
2048 			} else {
2049 				dev = grt->dst.dev;
2050 				idev = grt->rt6i_idev;
2051 				dev_hold(dev);
2052 				in6_dev_hold(grt->rt6i_idev);
2053 			}
2054 			if (!(grt->rt6i_flags & RTF_GATEWAY))
2055 				err = 0;
2056 			ip6_rt_put(grt);
2057 
2058 			if (err)
2059 				goto out;
2060 		}
2061 		err = -EINVAL;
2062 		if (!dev || (dev->flags & IFF_LOOPBACK))
2063 			goto out;
2064 	}
2065 
2066 	err = -ENODEV;
2067 	if (!dev)
2068 		goto out;
2069 
2070 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2071 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2072 			err = -EINVAL;
2073 			goto out;
2074 		}
2075 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2076 		rt->rt6i_prefsrc.plen = 128;
2077 	} else
2078 		rt->rt6i_prefsrc.plen = 0;
2079 
2080 	rt->rt6i_flags = cfg->fc_flags;
2081 
2082 install_route:
2083 	rt->dst.dev = dev;
2084 	rt->rt6i_idev = idev;
2085 	rt->rt6i_table = table;
2086 
2087 	cfg->fc_nlinfo.nl_net = dev_net(dev);
2088 
2089 	return rt;
2090 out:
2091 	if (dev)
2092 		dev_put(dev);
2093 	if (idev)
2094 		in6_dev_put(idev);
2095 	if (rt)
2096 		dst_free(&rt->dst);
2097 
2098 	return ERR_PTR(err);
2099 }
2100 
ip6_route_add(struct fib6_config * cfg)2101 int ip6_route_add(struct fib6_config *cfg)
2102 {
2103 	struct mx6_config mxc = { .mx = NULL, };
2104 	struct rt6_info *rt;
2105 	int err;
2106 
2107 	rt = ip6_route_info_create(cfg);
2108 	if (IS_ERR(rt)) {
2109 		err = PTR_ERR(rt);
2110 		rt = NULL;
2111 		goto out;
2112 	}
2113 
2114 	err = ip6_convert_metrics(&mxc, cfg);
2115 	if (err)
2116 		goto out;
2117 
2118 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2119 
2120 	kfree(mxc.mx);
2121 
2122 	return err;
2123 out:
2124 	if (rt)
2125 		dst_free(&rt->dst);
2126 
2127 	return err;
2128 }
2129 
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)2130 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2131 {
2132 	int err;
2133 	struct fib6_table *table;
2134 	struct net *net = dev_net(rt->dst.dev);
2135 
2136 	if (rt == net->ipv6.ip6_null_entry ||
2137 	    rt->dst.flags & DST_NOCACHE) {
2138 		err = -ENOENT;
2139 		goto out;
2140 	}
2141 
2142 	table = rt->rt6i_table;
2143 	write_lock_bh(&table->tb6_lock);
2144 	err = fib6_del(rt, info);
2145 	write_unlock_bh(&table->tb6_lock);
2146 
2147 out:
2148 	ip6_rt_put(rt);
2149 	return err;
2150 }
2151 
ip6_del_rt(struct rt6_info * rt)2152 int ip6_del_rt(struct rt6_info *rt)
2153 {
2154 	struct nl_info info = {
2155 		.nl_net = dev_net(rt->dst.dev),
2156 	};
2157 	return __ip6_del_rt(rt, &info);
2158 }
2159 
ip6_route_del(struct fib6_config * cfg)2160 static int ip6_route_del(struct fib6_config *cfg)
2161 {
2162 	struct fib6_table *table;
2163 	struct fib6_node *fn;
2164 	struct rt6_info *rt;
2165 	int err = -ESRCH;
2166 
2167 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2168 	if (!table)
2169 		return err;
2170 
2171 	read_lock_bh(&table->tb6_lock);
2172 
2173 	fn = fib6_locate(&table->tb6_root,
2174 			 &cfg->fc_dst, cfg->fc_dst_len,
2175 			 &cfg->fc_src, cfg->fc_src_len);
2176 
2177 	if (fn) {
2178 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2179 			if ((rt->rt6i_flags & RTF_CACHE) &&
2180 			    !(cfg->fc_flags & RTF_CACHE))
2181 				continue;
2182 			if (cfg->fc_ifindex &&
2183 			    (!rt->dst.dev ||
2184 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2185 				continue;
2186 			if (cfg->fc_flags & RTF_GATEWAY &&
2187 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2188 				continue;
2189 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2190 				continue;
2191 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2192 				continue;
2193 			dst_hold(&rt->dst);
2194 			read_unlock_bh(&table->tb6_lock);
2195 
2196 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2197 		}
2198 	}
2199 	read_unlock_bh(&table->tb6_lock);
2200 
2201 	return err;
2202 }
2203 
rt6_do_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)2204 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2205 {
2206 	struct netevent_redirect netevent;
2207 	struct rt6_info *rt, *nrt = NULL;
2208 	struct ndisc_options ndopts;
2209 	struct inet6_dev *in6_dev;
2210 	struct neighbour *neigh;
2211 	struct rd_msg *msg;
2212 	int optlen, on_link;
2213 	u8 *lladdr;
2214 
2215 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2216 	optlen -= sizeof(*msg);
2217 
2218 	if (optlen < 0) {
2219 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2220 		return;
2221 	}
2222 
2223 	msg = (struct rd_msg *)icmp6_hdr(skb);
2224 
2225 	if (ipv6_addr_is_multicast(&msg->dest)) {
2226 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2227 		return;
2228 	}
2229 
2230 	on_link = 0;
2231 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2232 		on_link = 1;
2233 	} else if (ipv6_addr_type(&msg->target) !=
2234 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2235 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2236 		return;
2237 	}
2238 
2239 	in6_dev = __in6_dev_get(skb->dev);
2240 	if (!in6_dev)
2241 		return;
2242 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2243 		return;
2244 
2245 	/* RFC2461 8.1:
2246 	 *	The IP source address of the Redirect MUST be the same as the current
2247 	 *	first-hop router for the specified ICMP Destination Address.
2248 	 */
2249 
2250 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2251 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2252 		return;
2253 	}
2254 
2255 	lladdr = NULL;
2256 	if (ndopts.nd_opts_tgt_lladdr) {
2257 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2258 					     skb->dev);
2259 		if (!lladdr) {
2260 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2261 			return;
2262 		}
2263 	}
2264 
2265 	rt = (struct rt6_info *) dst;
2266 	if (rt->rt6i_flags & RTF_REJECT) {
2267 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2268 		return;
2269 	}
2270 
2271 	/* Redirect received -> path was valid.
2272 	 * Look, redirects are sent only in response to data packets,
2273 	 * so that this nexthop apparently is reachable. --ANK
2274 	 */
2275 	dst_confirm(&rt->dst);
2276 
2277 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2278 	if (!neigh)
2279 		return;
2280 
2281 	/*
2282 	 *	We have finally decided to accept it.
2283 	 */
2284 
2285 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2286 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2287 		     NEIGH_UPDATE_F_OVERRIDE|
2288 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2289 				     NEIGH_UPDATE_F_ISROUTER)),
2290 		     NDISC_REDIRECT, &ndopts);
2291 
2292 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2293 	if (!nrt)
2294 		goto out;
2295 
2296 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2297 	if (on_link)
2298 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2299 
2300 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2301 
2302 	if (ip6_ins_rt(nrt))
2303 		goto out;
2304 
2305 	netevent.old = &rt->dst;
2306 	netevent.new = &nrt->dst;
2307 	netevent.daddr = &msg->dest;
2308 	netevent.neigh = neigh;
2309 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2310 
2311 	if (rt->rt6i_flags & RTF_CACHE) {
2312 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2313 		ip6_del_rt(rt);
2314 	}
2315 
2316 out:
2317 	neigh_release(neigh);
2318 }
2319 
2320 /*
2321  *	Misc support functions
2322  */
2323 
rt6_set_from(struct rt6_info * rt,struct rt6_info * from)2324 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2325 {
2326 	BUG_ON(from->dst.from);
2327 
2328 	rt->rt6i_flags &= ~RTF_EXPIRES;
2329 	dst_hold(&from->dst);
2330 	rt->dst.from = &from->dst;
2331 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2332 }
2333 
ip6_rt_copy_init(struct rt6_info * rt,struct rt6_info * ort)2334 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2335 {
2336 	rt->dst.input = ort->dst.input;
2337 	rt->dst.output = ort->dst.output;
2338 	rt->rt6i_dst = ort->rt6i_dst;
2339 	rt->dst.error = ort->dst.error;
2340 	rt->rt6i_idev = ort->rt6i_idev;
2341 	if (rt->rt6i_idev)
2342 		in6_dev_hold(rt->rt6i_idev);
2343 	rt->dst.lastuse = jiffies;
2344 	rt->rt6i_gateway = ort->rt6i_gateway;
2345 	rt->rt6i_flags = ort->rt6i_flags;
2346 	rt6_set_from(rt, ort);
2347 	rt->rt6i_metric = ort->rt6i_metric;
2348 #ifdef CONFIG_IPV6_SUBTREES
2349 	rt->rt6i_src = ort->rt6i_src;
2350 #endif
2351 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2352 	rt->rt6i_table = ort->rt6i_table;
2353 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2354 }
2355 
2356 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,struct net_device * dev)2357 static struct rt6_info *rt6_get_route_info(struct net *net,
2358 					   const struct in6_addr *prefix, int prefixlen,
2359 					   const struct in6_addr *gwaddr,
2360 					   struct net_device *dev)
2361 {
2362 	u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
2363 	struct fib6_node *fn;
2364 	struct rt6_info *rt = NULL;
2365 	struct fib6_table *table;
2366 
2367 	table = fib6_get_table(net, tb_id);
2368 	if (!table)
2369 		return NULL;
2370 
2371 	read_lock_bh(&table->tb6_lock);
2372 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2373 	if (!fn)
2374 		goto out;
2375 
2376 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2377 		if (rt->dst.dev->ifindex != dev->ifindex)
2378 			continue;
2379 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2380 			continue;
2381 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2382 			continue;
2383 		dst_hold(&rt->dst);
2384 		break;
2385 	}
2386 out:
2387 	read_unlock_bh(&table->tb6_lock);
2388 	return rt;
2389 }
2390 
rt6_add_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)2391 static struct rt6_info *rt6_add_route_info(struct net *net,
2392 					   const struct in6_addr *prefix, int prefixlen,
2393 					   const struct in6_addr *gwaddr,
2394 					   struct net_device *dev,
2395 					   unsigned int pref)
2396 {
2397 	struct fib6_config cfg = {
2398 		.fc_metric	= IP6_RT_PRIO_USER,
2399 		.fc_ifindex	= dev->ifindex,
2400 		.fc_dst_len	= prefixlen,
2401 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2402 				  RTF_UP | RTF_PREF(pref),
2403 		.fc_nlinfo.portid = 0,
2404 		.fc_nlinfo.nlh = NULL,
2405 		.fc_nlinfo.nl_net = net,
2406 	};
2407 
2408 	cfg.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO),
2409 	cfg.fc_dst = *prefix;
2410 	cfg.fc_gateway = *gwaddr;
2411 
2412 	/* We should treat it as a default route if prefix length is 0. */
2413 	if (!prefixlen)
2414 		cfg.fc_flags |= RTF_DEFAULT;
2415 
2416 	ip6_route_add(&cfg);
2417 
2418 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2419 }
2420 #endif
2421 
rt6_get_dflt_router(const struct in6_addr * addr,struct net_device * dev)2422 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2423 {
2424 	u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_MAIN);
2425 	struct rt6_info *rt;
2426 	struct fib6_table *table;
2427 
2428 	table = fib6_get_table(dev_net(dev), tb_id);
2429 	if (!table)
2430 		return NULL;
2431 
2432 	read_lock_bh(&table->tb6_lock);
2433 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2434 		if (dev == rt->dst.dev &&
2435 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2436 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2437 			break;
2438 	}
2439 	if (rt)
2440 		dst_hold(&rt->dst);
2441 	read_unlock_bh(&table->tb6_lock);
2442 	return rt;
2443 }
2444 
rt6_add_dflt_router(const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)2445 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2446 				     struct net_device *dev,
2447 				     unsigned int pref)
2448 {
2449 	struct fib6_config cfg = {
2450 		.fc_table	= l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
2451 		.fc_metric	= IP6_RT_PRIO_USER,
2452 		.fc_ifindex	= dev->ifindex,
2453 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2454 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2455 		.fc_nlinfo.portid = 0,
2456 		.fc_nlinfo.nlh = NULL,
2457 		.fc_nlinfo.nl_net = dev_net(dev),
2458 	};
2459 
2460 	cfg.fc_gateway = *gwaddr;
2461 
2462 	if (!ip6_route_add(&cfg)) {
2463 		struct fib6_table *table;
2464 
2465 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
2466 		if (table)
2467 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2468 	}
2469 
2470 	return rt6_get_dflt_router(gwaddr, dev);
2471 }
2472 
rt6_addrconf_purge(struct rt6_info * rt,void * arg)2473 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2474 	if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2475 	    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2476 		return -1;
2477 	return 0;
2478 }
2479 
rt6_purge_dflt_routers(struct net * net)2480 void rt6_purge_dflt_routers(struct net *net)
2481 {
2482 	fib6_clean_all(net, rt6_addrconf_purge, NULL);
2483 }
2484 
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)2485 static void rtmsg_to_fib6_config(struct net *net,
2486 				 struct in6_rtmsg *rtmsg,
2487 				 struct fib6_config *cfg)
2488 {
2489 	memset(cfg, 0, sizeof(*cfg));
2490 
2491 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2492 			 : RT6_TABLE_MAIN;
2493 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2494 	cfg->fc_metric = rtmsg->rtmsg_metric;
2495 	cfg->fc_expires = rtmsg->rtmsg_info;
2496 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2497 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2498 	cfg->fc_flags = rtmsg->rtmsg_flags;
2499 
2500 	cfg->fc_nlinfo.nl_net = net;
2501 
2502 	cfg->fc_dst = rtmsg->rtmsg_dst;
2503 	cfg->fc_src = rtmsg->rtmsg_src;
2504 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2505 }
2506 
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)2507 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2508 {
2509 	struct fib6_config cfg;
2510 	struct in6_rtmsg rtmsg;
2511 	int err;
2512 
2513 	switch (cmd) {
2514 	case SIOCADDRT:		/* Add a route */
2515 	case SIOCDELRT:		/* Delete a route */
2516 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2517 			return -EPERM;
2518 		err = copy_from_user(&rtmsg, arg,
2519 				     sizeof(struct in6_rtmsg));
2520 		if (err)
2521 			return -EFAULT;
2522 
2523 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2524 
2525 		rtnl_lock();
2526 		switch (cmd) {
2527 		case SIOCADDRT:
2528 			err = ip6_route_add(&cfg);
2529 			break;
2530 		case SIOCDELRT:
2531 			err = ip6_route_del(&cfg);
2532 			break;
2533 		default:
2534 			err = -EINVAL;
2535 		}
2536 		rtnl_unlock();
2537 
2538 		return err;
2539 	}
2540 
2541 	return -EINVAL;
2542 }
2543 
2544 /*
2545  *	Drop the packet on the floor
2546  */
2547 
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)2548 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2549 {
2550 	int type;
2551 	struct dst_entry *dst = skb_dst(skb);
2552 	switch (ipstats_mib_noroutes) {
2553 	case IPSTATS_MIB_INNOROUTES:
2554 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2555 		if (type == IPV6_ADDR_ANY) {
2556 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2557 				      IPSTATS_MIB_INADDRERRORS);
2558 			break;
2559 		}
2560 		/* FALLTHROUGH */
2561 	case IPSTATS_MIB_OUTNOROUTES:
2562 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2563 			      ipstats_mib_noroutes);
2564 		break;
2565 	}
2566 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2567 	kfree_skb(skb);
2568 	return 0;
2569 }
2570 
ip6_pkt_discard(struct sk_buff * skb)2571 static int ip6_pkt_discard(struct sk_buff *skb)
2572 {
2573 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2574 }
2575 
ip6_pkt_discard_out(struct net * net,struct sock * sk,struct sk_buff * skb)2576 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2577 {
2578 	skb->dev = skb_dst(skb)->dev;
2579 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2580 }
2581 
ip6_pkt_prohibit(struct sk_buff * skb)2582 static int ip6_pkt_prohibit(struct sk_buff *skb)
2583 {
2584 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2585 }
2586 
ip6_pkt_prohibit_out(struct net * net,struct sock * sk,struct sk_buff * skb)2587 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2588 {
2589 	skb->dev = skb_dst(skb)->dev;
2590 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2591 }
2592 
2593 /*
2594  *	Allocate a dst for local (unicast / anycast) address.
2595  */
2596 
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,bool anycast)2597 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2598 				    const struct in6_addr *addr,
2599 				    bool anycast)
2600 {
2601 	u32 tb_id;
2602 	struct net *net = dev_net(idev->dev);
2603 	struct net_device *dev = net->loopback_dev;
2604 	struct rt6_info *rt;
2605 
2606 	/* use L3 Master device as loopback for host routes if device
2607 	 * is enslaved and address is not link local or multicast
2608 	 */
2609 	if (!rt6_need_strict(addr))
2610 		dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2611 
2612 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2613 	if (!rt)
2614 		return ERR_PTR(-ENOMEM);
2615 
2616 	in6_dev_hold(idev);
2617 
2618 	rt->dst.flags |= DST_HOST;
2619 	rt->dst.input = ip6_input;
2620 	rt->dst.output = ip6_output;
2621 	rt->rt6i_idev = idev;
2622 
2623 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2624 	if (anycast)
2625 		rt->rt6i_flags |= RTF_ANYCAST;
2626 	else
2627 		rt->rt6i_flags |= RTF_LOCAL;
2628 
2629 	rt->rt6i_gateway  = *addr;
2630 	rt->rt6i_dst.addr = *addr;
2631 	rt->rt6i_dst.plen = 128;
2632 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2633 	rt->rt6i_table = fib6_get_table(net, tb_id);
2634 	rt->dst.flags |= DST_NOCACHE;
2635 
2636 	atomic_set(&rt->dst.__refcnt, 1);
2637 
2638 	return rt;
2639 }
2640 
2641 /* remove deleted ip from prefsrc entries */
2642 struct arg_dev_net_ip {
2643 	struct net_device *dev;
2644 	struct net *net;
2645 	struct in6_addr *addr;
2646 };
2647 
fib6_remove_prefsrc(struct rt6_info * rt,void * arg)2648 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2649 {
2650 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2651 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2652 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2653 
2654 	if (((void *)rt->dst.dev == dev || !dev) &&
2655 	    rt != net->ipv6.ip6_null_entry &&
2656 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2657 		/* remove prefsrc entry */
2658 		rt->rt6i_prefsrc.plen = 0;
2659 	}
2660 	return 0;
2661 }
2662 
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)2663 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2664 {
2665 	struct net *net = dev_net(ifp->idev->dev);
2666 	struct arg_dev_net_ip adni = {
2667 		.dev = ifp->idev->dev,
2668 		.net = net,
2669 		.addr = &ifp->addr,
2670 	};
2671 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2672 }
2673 
2674 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2675 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2676 
2677 /* Remove routers and update dst entries when gateway turn into host. */
fib6_clean_tohost(struct rt6_info * rt,void * arg)2678 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2679 {
2680 	struct in6_addr *gateway = (struct in6_addr *)arg;
2681 
2682 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2683 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2684 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2685 		return -1;
2686 	}
2687 	return 0;
2688 }
2689 
rt6_clean_tohost(struct net * net,struct in6_addr * gateway)2690 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2691 {
2692 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2693 }
2694 
2695 struct arg_dev_net {
2696 	struct net_device *dev;
2697 	struct net *net;
2698 };
2699 
fib6_ifdown(struct rt6_info * rt,void * arg)2700 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2701 {
2702 	const struct arg_dev_net *adn = arg;
2703 	const struct net_device *dev = adn->dev;
2704 
2705 	if ((rt->dst.dev == dev || !dev) &&
2706 	    rt != adn->net->ipv6.ip6_null_entry)
2707 		return -1;
2708 
2709 	return 0;
2710 }
2711 
rt6_ifdown(struct net * net,struct net_device * dev)2712 void rt6_ifdown(struct net *net, struct net_device *dev)
2713 {
2714 	struct arg_dev_net adn = {
2715 		.dev = dev,
2716 		.net = net,
2717 	};
2718 
2719 	fib6_clean_all(net, fib6_ifdown, &adn);
2720 	icmp6_clean_all(fib6_ifdown, &adn);
2721 	if (dev)
2722 		rt6_uncached_list_flush_dev(net, dev);
2723 }
2724 
2725 struct rt6_mtu_change_arg {
2726 	struct net_device *dev;
2727 	unsigned int mtu;
2728 };
2729 
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2730 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2731 {
2732 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2733 	struct inet6_dev *idev;
2734 
2735 	/* In IPv6 pmtu discovery is not optional,
2736 	   so that RTAX_MTU lock cannot disable it.
2737 	   We still use this lock to block changes
2738 	   caused by addrconf/ndisc.
2739 	*/
2740 
2741 	idev = __in6_dev_get(arg->dev);
2742 	if (!idev)
2743 		return 0;
2744 
2745 	/* For administrative MTU increase, there is no way to discover
2746 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2747 	   Since RFC 1981 doesn't include administrative MTU increase
2748 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2749 	 */
2750 	/*
2751 	   If new MTU is less than route PMTU, this new MTU will be the
2752 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2753 	   decreases; if new MTU is greater than route PMTU, and the
2754 	   old MTU is the lowest MTU in the path, update the route PMTU
2755 	   to reflect the increase. In this case if the other nodes' MTU
2756 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2757 	   PMTU discouvery.
2758 	 */
2759 	if (rt->dst.dev == arg->dev &&
2760 	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
2761 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2762 		if (rt->rt6i_flags & RTF_CACHE) {
2763 			/* For RTF_CACHE with rt6i_pmtu == 0
2764 			 * (i.e. a redirected route),
2765 			 * the metrics of its rt->dst.from has already
2766 			 * been updated.
2767 			 */
2768 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2769 				rt->rt6i_pmtu = arg->mtu;
2770 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2771 			   (dst_mtu(&rt->dst) < arg->mtu &&
2772 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2773 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2774 		}
2775 	}
2776 	return 0;
2777 }
2778 
rt6_mtu_change(struct net_device * dev,unsigned int mtu)2779 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2780 {
2781 	struct rt6_mtu_change_arg arg = {
2782 		.dev = dev,
2783 		.mtu = mtu,
2784 	};
2785 
2786 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2787 }
2788 
2789 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2790 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2791 	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
2792 	[RTA_OIF]               = { .type = NLA_U32 },
2793 	[RTA_IIF]		= { .type = NLA_U32 },
2794 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2795 	[RTA_METRICS]           = { .type = NLA_NESTED },
2796 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2797 	[RTA_PREF]              = { .type = NLA_U8 },
2798 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2799 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2800 	[RTA_EXPIRES]		= { .type = NLA_U32 },
2801 	[RTA_UID]		= { .type = NLA_U32 },
2802 	[RTA_TABLE]		= { .type = NLA_U32 },
2803 };
2804 
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg)2805 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2806 			      struct fib6_config *cfg)
2807 {
2808 	struct rtmsg *rtm;
2809 	struct nlattr *tb[RTA_MAX+1];
2810 	unsigned int pref;
2811 	int err;
2812 
2813 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2814 	if (err < 0)
2815 		goto errout;
2816 
2817 	err = -EINVAL;
2818 	rtm = nlmsg_data(nlh);
2819 	memset(cfg, 0, sizeof(*cfg));
2820 
2821 	cfg->fc_table = rtm->rtm_table;
2822 	cfg->fc_dst_len = rtm->rtm_dst_len;
2823 	cfg->fc_src_len = rtm->rtm_src_len;
2824 	cfg->fc_flags = RTF_UP;
2825 	cfg->fc_protocol = rtm->rtm_protocol;
2826 	cfg->fc_type = rtm->rtm_type;
2827 
2828 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2829 	    rtm->rtm_type == RTN_BLACKHOLE ||
2830 	    rtm->rtm_type == RTN_PROHIBIT ||
2831 	    rtm->rtm_type == RTN_THROW)
2832 		cfg->fc_flags |= RTF_REJECT;
2833 
2834 	if (rtm->rtm_type == RTN_LOCAL)
2835 		cfg->fc_flags |= RTF_LOCAL;
2836 
2837 	if (rtm->rtm_flags & RTM_F_CLONED)
2838 		cfg->fc_flags |= RTF_CACHE;
2839 
2840 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2841 	cfg->fc_nlinfo.nlh = nlh;
2842 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2843 
2844 	if (tb[RTA_GATEWAY]) {
2845 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2846 		cfg->fc_flags |= RTF_GATEWAY;
2847 	}
2848 
2849 	if (tb[RTA_DST]) {
2850 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2851 
2852 		if (nla_len(tb[RTA_DST]) < plen)
2853 			goto errout;
2854 
2855 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2856 	}
2857 
2858 	if (tb[RTA_SRC]) {
2859 		int plen = (rtm->rtm_src_len + 7) >> 3;
2860 
2861 		if (nla_len(tb[RTA_SRC]) < plen)
2862 			goto errout;
2863 
2864 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2865 	}
2866 
2867 	if (tb[RTA_PREFSRC])
2868 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2869 
2870 	if (tb[RTA_OIF])
2871 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2872 
2873 	if (tb[RTA_PRIORITY])
2874 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2875 
2876 	if (tb[RTA_METRICS]) {
2877 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2878 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2879 	}
2880 
2881 	if (tb[RTA_TABLE])
2882 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2883 
2884 	if (tb[RTA_MULTIPATH]) {
2885 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2886 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2887 
2888 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
2889 						     cfg->fc_mp_len);
2890 		if (err < 0)
2891 			goto errout;
2892 	}
2893 
2894 	if (tb[RTA_PREF]) {
2895 		pref = nla_get_u8(tb[RTA_PREF]);
2896 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2897 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2898 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2899 		cfg->fc_flags |= RTF_PREF(pref);
2900 	}
2901 
2902 	if (tb[RTA_ENCAP])
2903 		cfg->fc_encap = tb[RTA_ENCAP];
2904 
2905 	if (tb[RTA_ENCAP_TYPE]) {
2906 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2907 
2908 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
2909 		if (err < 0)
2910 			goto errout;
2911 	}
2912 
2913 	if (tb[RTA_EXPIRES]) {
2914 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2915 
2916 		if (addrconf_finite_timeout(timeout)) {
2917 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2918 			cfg->fc_flags |= RTF_EXPIRES;
2919 		}
2920 	}
2921 
2922 	err = 0;
2923 errout:
2924 	return err;
2925 }
2926 
2927 struct rt6_nh {
2928 	struct rt6_info *rt6_info;
2929 	struct fib6_config r_cfg;
2930 	struct mx6_config mxc;
2931 	struct list_head next;
2932 };
2933 
ip6_print_replace_route_err(struct list_head * rt6_nh_list)2934 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2935 {
2936 	struct rt6_nh *nh;
2937 
2938 	list_for_each_entry(nh, rt6_nh_list, next) {
2939 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2940 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2941 		        nh->r_cfg.fc_ifindex);
2942 	}
2943 }
2944 
ip6_route_info_append(struct list_head * rt6_nh_list,struct rt6_info * rt,struct fib6_config * r_cfg)2945 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2946 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2947 {
2948 	struct rt6_nh *nh;
2949 	int err = -EEXIST;
2950 
2951 	list_for_each_entry(nh, rt6_nh_list, next) {
2952 		/* check if rt6_info already exists */
2953 		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
2954 			return err;
2955 	}
2956 
2957 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2958 	if (!nh)
2959 		return -ENOMEM;
2960 	nh->rt6_info = rt;
2961 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2962 	if (err) {
2963 		kfree(nh);
2964 		return err;
2965 	}
2966 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2967 	list_add_tail(&nh->next, rt6_nh_list);
2968 
2969 	return 0;
2970 }
2971 
ip6_route_multipath_add(struct fib6_config * cfg)2972 static int ip6_route_multipath_add(struct fib6_config *cfg)
2973 {
2974 	struct fib6_config r_cfg;
2975 	struct rtnexthop *rtnh;
2976 	struct rt6_info *rt;
2977 	struct rt6_nh *err_nh;
2978 	struct rt6_nh *nh, *nh_safe;
2979 	int remaining;
2980 	int attrlen;
2981 	int err = 1;
2982 	int nhn = 0;
2983 	int replace = (cfg->fc_nlinfo.nlh &&
2984 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2985 	LIST_HEAD(rt6_nh_list);
2986 
2987 	remaining = cfg->fc_mp_len;
2988 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2989 
2990 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2991 	 * rt6_info structs per nexthop
2992 	 */
2993 	while (rtnh_ok(rtnh, remaining)) {
2994 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2995 		if (rtnh->rtnh_ifindex)
2996 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2997 
2998 		attrlen = rtnh_attrlen(rtnh);
2999 		if (attrlen > 0) {
3000 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3001 
3002 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3003 			if (nla) {
3004 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
3005 				r_cfg.fc_flags |= RTF_GATEWAY;
3006 			}
3007 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3008 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3009 			if (nla)
3010 				r_cfg.fc_encap_type = nla_get_u16(nla);
3011 		}
3012 
3013 		rt = ip6_route_info_create(&r_cfg);
3014 		if (IS_ERR(rt)) {
3015 			err = PTR_ERR(rt);
3016 			rt = NULL;
3017 			goto cleanup;
3018 		}
3019 
3020 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3021 		if (err) {
3022 			dst_free(&rt->dst);
3023 			goto cleanup;
3024 		}
3025 
3026 		rtnh = rtnh_next(rtnh, &remaining);
3027 	}
3028 
3029 	err_nh = NULL;
3030 	list_for_each_entry(nh, &rt6_nh_list, next) {
3031 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
3032 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
3033 		nh->rt6_info = NULL;
3034 		if (err) {
3035 			if (replace && nhn)
3036 				ip6_print_replace_route_err(&rt6_nh_list);
3037 			err_nh = nh;
3038 			goto add_errout;
3039 		}
3040 
3041 		/* Because each route is added like a single route we remove
3042 		 * these flags after the first nexthop: if there is a collision,
3043 		 * we have already failed to add the first nexthop:
3044 		 * fib6_add_rt2node() has rejected it; when replacing, old
3045 		 * nexthops have been replaced by first new, the rest should
3046 		 * be added to it.
3047 		 */
3048 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3049 						     NLM_F_REPLACE);
3050 		nhn++;
3051 	}
3052 
3053 	goto cleanup;
3054 
3055 add_errout:
3056 	/* Delete routes that were already added */
3057 	list_for_each_entry(nh, &rt6_nh_list, next) {
3058 		if (err_nh == nh)
3059 			break;
3060 		ip6_route_del(&nh->r_cfg);
3061 	}
3062 
3063 cleanup:
3064 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3065 		if (nh->rt6_info)
3066 			dst_free(&nh->rt6_info->dst);
3067 		kfree(nh->mxc.mx);
3068 		list_del(&nh->next);
3069 		kfree(nh);
3070 	}
3071 
3072 	return err;
3073 }
3074 
ip6_route_multipath_del(struct fib6_config * cfg)3075 static int ip6_route_multipath_del(struct fib6_config *cfg)
3076 {
3077 	struct fib6_config r_cfg;
3078 	struct rtnexthop *rtnh;
3079 	int remaining;
3080 	int attrlen;
3081 	int err = 1, last_err = 0;
3082 
3083 	remaining = cfg->fc_mp_len;
3084 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3085 
3086 	/* Parse a Multipath Entry */
3087 	while (rtnh_ok(rtnh, remaining)) {
3088 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3089 		if (rtnh->rtnh_ifindex)
3090 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3091 
3092 		attrlen = rtnh_attrlen(rtnh);
3093 		if (attrlen > 0) {
3094 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3095 
3096 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3097 			if (nla) {
3098 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3099 				r_cfg.fc_flags |= RTF_GATEWAY;
3100 			}
3101 		}
3102 		err = ip6_route_del(&r_cfg);
3103 		if (err)
3104 			last_err = err;
3105 
3106 		rtnh = rtnh_next(rtnh, &remaining);
3107 	}
3108 
3109 	return last_err;
3110 }
3111 
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh)3112 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3113 {
3114 	struct fib6_config cfg;
3115 	int err;
3116 
3117 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3118 	if (err < 0)
3119 		return err;
3120 
3121 	if (cfg.fc_mp)
3122 		return ip6_route_multipath_del(&cfg);
3123 	else
3124 		return ip6_route_del(&cfg);
3125 }
3126 
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh)3127 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3128 {
3129 	struct fib6_config cfg;
3130 	int err;
3131 
3132 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3133 	if (err < 0)
3134 		return err;
3135 
3136 	if (cfg.fc_mp)
3137 		return ip6_route_multipath_add(&cfg);
3138 	else
3139 		return ip6_route_add(&cfg);
3140 }
3141 
rt6_nlmsg_size(struct rt6_info * rt)3142 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3143 {
3144 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3145 	       + nla_total_size(16) /* RTA_SRC */
3146 	       + nla_total_size(16) /* RTA_DST */
3147 	       + nla_total_size(16) /* RTA_GATEWAY */
3148 	       + nla_total_size(16) /* RTA_PREFSRC */
3149 	       + nla_total_size(4) /* RTA_TABLE */
3150 	       + nla_total_size(4) /* RTA_IIF */
3151 	       + nla_total_size(4) /* RTA_OIF */
3152 	       + nla_total_size(4) /* RTA_PRIORITY */
3153 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3154 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3155 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3156 	       + nla_total_size(1) /* RTA_PREF */
3157 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3158 }
3159 
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 portid,u32 seq,int prefix,int nowait,unsigned int flags)3160 static int rt6_fill_node(struct net *net,
3161 			 struct sk_buff *skb, struct rt6_info *rt,
3162 			 struct in6_addr *dst, struct in6_addr *src,
3163 			 int iif, int type, u32 portid, u32 seq,
3164 			 int prefix, int nowait, unsigned int flags)
3165 {
3166 	u32 metrics[RTAX_MAX];
3167 	struct rtmsg *rtm;
3168 	struct nlmsghdr *nlh;
3169 	long expires;
3170 	u32 table;
3171 
3172 	if (prefix) {	/* user wants prefix routes only */
3173 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3174 			/* success since this is not a prefix route */
3175 			return 1;
3176 		}
3177 	}
3178 
3179 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3180 	if (!nlh)
3181 		return -EMSGSIZE;
3182 
3183 	rtm = nlmsg_data(nlh);
3184 	rtm->rtm_family = AF_INET6;
3185 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3186 	rtm->rtm_src_len = rt->rt6i_src.plen;
3187 	rtm->rtm_tos = 0;
3188 	if (rt->rt6i_table)
3189 		table = rt->rt6i_table->tb6_id;
3190 	else
3191 		table = RT6_TABLE_UNSPEC;
3192 	rtm->rtm_table = table;
3193 	if (nla_put_u32(skb, RTA_TABLE, table))
3194 		goto nla_put_failure;
3195 	if (rt->rt6i_flags & RTF_REJECT) {
3196 		switch (rt->dst.error) {
3197 		case -EINVAL:
3198 			rtm->rtm_type = RTN_BLACKHOLE;
3199 			break;
3200 		case -EACCES:
3201 			rtm->rtm_type = RTN_PROHIBIT;
3202 			break;
3203 		case -EAGAIN:
3204 			rtm->rtm_type = RTN_THROW;
3205 			break;
3206 		default:
3207 			rtm->rtm_type = RTN_UNREACHABLE;
3208 			break;
3209 		}
3210 	}
3211 	else if (rt->rt6i_flags & RTF_LOCAL)
3212 		rtm->rtm_type = RTN_LOCAL;
3213 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3214 		rtm->rtm_type = RTN_LOCAL;
3215 	else
3216 		rtm->rtm_type = RTN_UNICAST;
3217 	rtm->rtm_flags = 0;
3218 	if (!netif_carrier_ok(rt->dst.dev)) {
3219 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3220 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3221 			rtm->rtm_flags |= RTNH_F_DEAD;
3222 	}
3223 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3224 	rtm->rtm_protocol = rt->rt6i_protocol;
3225 	if (rt->rt6i_flags & RTF_DYNAMIC)
3226 		rtm->rtm_protocol = RTPROT_REDIRECT;
3227 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3228 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3229 			rtm->rtm_protocol = RTPROT_RA;
3230 		else
3231 			rtm->rtm_protocol = RTPROT_KERNEL;
3232 	}
3233 
3234 	if (rt->rt6i_flags & RTF_CACHE)
3235 		rtm->rtm_flags |= RTM_F_CLONED;
3236 
3237 	if (dst) {
3238 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3239 			goto nla_put_failure;
3240 		rtm->rtm_dst_len = 128;
3241 	} else if (rtm->rtm_dst_len)
3242 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3243 			goto nla_put_failure;
3244 #ifdef CONFIG_IPV6_SUBTREES
3245 	if (src) {
3246 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3247 			goto nla_put_failure;
3248 		rtm->rtm_src_len = 128;
3249 	} else if (rtm->rtm_src_len &&
3250 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3251 		goto nla_put_failure;
3252 #endif
3253 	if (iif) {
3254 #ifdef CONFIG_IPV6_MROUTE
3255 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3256 			int err = ip6mr_get_route(net, skb, rtm, nowait,
3257 						  portid);
3258 
3259 			if (err <= 0) {
3260 				if (!nowait) {
3261 					if (err == 0)
3262 						return 0;
3263 					goto nla_put_failure;
3264 				} else {
3265 					if (err == -EMSGSIZE)
3266 						goto nla_put_failure;
3267 				}
3268 			}
3269 		} else
3270 #endif
3271 			if (nla_put_u32(skb, RTA_IIF, iif))
3272 				goto nla_put_failure;
3273 	} else if (dst) {
3274 		struct in6_addr saddr_buf;
3275 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3276 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3277 			goto nla_put_failure;
3278 	}
3279 
3280 	if (rt->rt6i_prefsrc.plen) {
3281 		struct in6_addr saddr_buf;
3282 		saddr_buf = rt->rt6i_prefsrc.addr;
3283 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3284 			goto nla_put_failure;
3285 	}
3286 
3287 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3288 	if (rt->rt6i_pmtu)
3289 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3290 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3291 		goto nla_put_failure;
3292 
3293 	if (rt->rt6i_flags & RTF_GATEWAY) {
3294 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3295 			goto nla_put_failure;
3296 	}
3297 
3298 	if (rt->dst.dev &&
3299 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3300 		goto nla_put_failure;
3301 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3302 		goto nla_put_failure;
3303 
3304 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3305 
3306 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3307 		goto nla_put_failure;
3308 
3309 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3310 		goto nla_put_failure;
3311 
3312 	if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3313 		goto nla_put_failure;
3314 
3315 	nlmsg_end(skb, nlh);
3316 	return 0;
3317 
3318 nla_put_failure:
3319 	nlmsg_cancel(skb, nlh);
3320 	return -EMSGSIZE;
3321 }
3322 
rt6_dump_route(struct rt6_info * rt,void * p_arg)3323 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3324 {
3325 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3326 	int prefix;
3327 
3328 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3329 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3330 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3331 	} else
3332 		prefix = 0;
3333 
3334 	return rt6_fill_node(arg->net,
3335 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3336 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3337 		     prefix, 0, NLM_F_MULTI);
3338 }
3339 
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh)3340 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3341 {
3342 	struct net *net = sock_net(in_skb->sk);
3343 	struct nlattr *tb[RTA_MAX+1];
3344 	struct rt6_info *rt;
3345 	struct sk_buff *skb;
3346 	struct rtmsg *rtm;
3347 	struct flowi6 fl6;
3348 	int err, iif = 0, oif = 0;
3349 
3350 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3351 	if (err < 0)
3352 		goto errout;
3353 
3354 	err = -EINVAL;
3355 	memset(&fl6, 0, sizeof(fl6));
3356 	rtm = nlmsg_data(nlh);
3357 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3358 
3359 	if (tb[RTA_SRC]) {
3360 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3361 			goto errout;
3362 
3363 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3364 	}
3365 
3366 	if (tb[RTA_DST]) {
3367 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3368 			goto errout;
3369 
3370 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3371 	}
3372 
3373 	if (tb[RTA_IIF])
3374 		iif = nla_get_u32(tb[RTA_IIF]);
3375 
3376 	if (tb[RTA_OIF])
3377 		oif = nla_get_u32(tb[RTA_OIF]);
3378 
3379 	if (tb[RTA_MARK])
3380 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3381 
3382 	if (tb[RTA_UID])
3383 		fl6.flowi6_uid = make_kuid(current_user_ns(),
3384 					   nla_get_u32(tb[RTA_UID]));
3385 	else
3386 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3387 
3388 	if (iif) {
3389 		struct net_device *dev;
3390 		int flags = 0;
3391 
3392 		dev = __dev_get_by_index(net, iif);
3393 		if (!dev) {
3394 			err = -ENODEV;
3395 			goto errout;
3396 		}
3397 
3398 		fl6.flowi6_iif = iif;
3399 
3400 		if (!ipv6_addr_any(&fl6.saddr))
3401 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3402 
3403 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3404 							       flags);
3405 	} else {
3406 		fl6.flowi6_oif = oif;
3407 
3408 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3409 	}
3410 
3411 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3412 	if (!skb) {
3413 		ip6_rt_put(rt);
3414 		err = -ENOBUFS;
3415 		goto errout;
3416 	}
3417 
3418 	/* Reserve room for dummy headers, this skb can pass
3419 	   through good chunk of routing engine.
3420 	 */
3421 	skb_reset_mac_header(skb);
3422 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3423 
3424 	skb_dst_set(skb, &rt->dst);
3425 
3426 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3427 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3428 			    nlh->nlmsg_seq, 0, 0, 0);
3429 	if (err < 0) {
3430 		kfree_skb(skb);
3431 		goto errout;
3432 	}
3433 
3434 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3435 errout:
3436 	return err;
3437 }
3438 
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info,unsigned int nlm_flags)3439 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3440 		     unsigned int nlm_flags)
3441 {
3442 	struct sk_buff *skb;
3443 	struct net *net = info->nl_net;
3444 	u32 seq;
3445 	int err;
3446 
3447 	err = -ENOBUFS;
3448 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3449 
3450 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3451 	if (!skb)
3452 		goto errout;
3453 
3454 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3455 				event, info->portid, seq, 0, 0, nlm_flags);
3456 	if (err < 0) {
3457 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3458 		WARN_ON(err == -EMSGSIZE);
3459 		kfree_skb(skb);
3460 		goto errout;
3461 	}
3462 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3463 		    info->nlh, gfp_any());
3464 	return;
3465 errout:
3466 	if (err < 0)
3467 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3468 }
3469 
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * ptr)3470 static int ip6_route_dev_notify(struct notifier_block *this,
3471 				unsigned long event, void *ptr)
3472 {
3473 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3474 	struct net *net = dev_net(dev);
3475 
3476 	if (!(dev->flags & IFF_LOOPBACK))
3477 		return NOTIFY_OK;
3478 
3479 	if (event == NETDEV_REGISTER) {
3480 		net->ipv6.ip6_null_entry->dst.dev = dev;
3481 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3482 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3483 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3484 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3485 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3486 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3487 #endif
3488 	 } else if (event == NETDEV_UNREGISTER &&
3489 		    dev->reg_state != NETREG_UNREGISTERED) {
3490 		/* NETDEV_UNREGISTER could be fired for multiple times by
3491 		 * netdev_wait_allrefs(). Make sure we only call this once.
3492 		 */
3493 		in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
3494 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3495 		in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
3496 		in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3497 #endif
3498 	}
3499 
3500 	return NOTIFY_OK;
3501 }
3502 
3503 /*
3504  *	/proc
3505  */
3506 
3507 #ifdef CONFIG_PROC_FS
3508 
3509 static const struct file_operations ipv6_route_proc_fops = {
3510 	.owner		= THIS_MODULE,
3511 	.open		= ipv6_route_open,
3512 	.read		= seq_read,
3513 	.llseek		= seq_lseek,
3514 	.release	= seq_release_net,
3515 };
3516 
rt6_stats_seq_show(struct seq_file * seq,void * v)3517 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3518 {
3519 	struct net *net = (struct net *)seq->private;
3520 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3521 		   net->ipv6.rt6_stats->fib_nodes,
3522 		   net->ipv6.rt6_stats->fib_route_nodes,
3523 		   net->ipv6.rt6_stats->fib_rt_alloc,
3524 		   net->ipv6.rt6_stats->fib_rt_entries,
3525 		   net->ipv6.rt6_stats->fib_rt_cache,
3526 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3527 		   net->ipv6.rt6_stats->fib_discarded_routes);
3528 
3529 	return 0;
3530 }
3531 
rt6_stats_seq_open(struct inode * inode,struct file * file)3532 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3533 {
3534 	return single_open_net(inode, file, rt6_stats_seq_show);
3535 }
3536 
3537 static const struct file_operations rt6_stats_seq_fops = {
3538 	.owner	 = THIS_MODULE,
3539 	.open	 = rt6_stats_seq_open,
3540 	.read	 = seq_read,
3541 	.llseek	 = seq_lseek,
3542 	.release = single_release_net,
3543 };
3544 #endif	/* CONFIG_PROC_FS */
3545 
3546 #ifdef CONFIG_SYSCTL
3547 
3548 static
ipv6_sysctl_rtcache_flush(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3549 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3550 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3551 {
3552 	struct net *net;
3553 	int delay;
3554 	if (!write)
3555 		return -EINVAL;
3556 
3557 	net = (struct net *)ctl->extra1;
3558 	delay = net->ipv6.sysctl.flush_delay;
3559 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3560 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3561 	return 0;
3562 }
3563 
3564 struct ctl_table ipv6_route_table_template[] = {
3565 	{
3566 		.procname	=	"flush",
3567 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3568 		.maxlen		=	sizeof(int),
3569 		.mode		=	0200,
3570 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3571 	},
3572 	{
3573 		.procname	=	"gc_thresh",
3574 		.data		=	&ip6_dst_ops_template.gc_thresh,
3575 		.maxlen		=	sizeof(int),
3576 		.mode		=	0644,
3577 		.proc_handler	=	proc_dointvec,
3578 	},
3579 	{
3580 		.procname	=	"max_size",
3581 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3582 		.maxlen		=	sizeof(int),
3583 		.mode		=	0644,
3584 		.proc_handler	=	proc_dointvec,
3585 	},
3586 	{
3587 		.procname	=	"gc_min_interval",
3588 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3589 		.maxlen		=	sizeof(int),
3590 		.mode		=	0644,
3591 		.proc_handler	=	proc_dointvec_jiffies,
3592 	},
3593 	{
3594 		.procname	=	"gc_timeout",
3595 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3596 		.maxlen		=	sizeof(int),
3597 		.mode		=	0644,
3598 		.proc_handler	=	proc_dointvec_jiffies,
3599 	},
3600 	{
3601 		.procname	=	"gc_interval",
3602 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3603 		.maxlen		=	sizeof(int),
3604 		.mode		=	0644,
3605 		.proc_handler	=	proc_dointvec_jiffies,
3606 	},
3607 	{
3608 		.procname	=	"gc_elasticity",
3609 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3610 		.maxlen		=	sizeof(int),
3611 		.mode		=	0644,
3612 		.proc_handler	=	proc_dointvec,
3613 	},
3614 	{
3615 		.procname	=	"mtu_expires",
3616 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3617 		.maxlen		=	sizeof(int),
3618 		.mode		=	0644,
3619 		.proc_handler	=	proc_dointvec_jiffies,
3620 	},
3621 	{
3622 		.procname	=	"min_adv_mss",
3623 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3624 		.maxlen		=	sizeof(int),
3625 		.mode		=	0644,
3626 		.proc_handler	=	proc_dointvec,
3627 	},
3628 	{
3629 		.procname	=	"gc_min_interval_ms",
3630 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3631 		.maxlen		=	sizeof(int),
3632 		.mode		=	0644,
3633 		.proc_handler	=	proc_dointvec_ms_jiffies,
3634 	},
3635 	{ }
3636 };
3637 
ipv6_route_sysctl_init(struct net * net)3638 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3639 {
3640 	struct ctl_table *table;
3641 
3642 	table = kmemdup(ipv6_route_table_template,
3643 			sizeof(ipv6_route_table_template),
3644 			GFP_KERNEL);
3645 
3646 	if (table) {
3647 		table[0].data = &net->ipv6.sysctl.flush_delay;
3648 		table[0].extra1 = net;
3649 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3650 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3651 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3652 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3653 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3654 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3655 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3656 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3657 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3658 
3659 		/* Don't export sysctls to unprivileged users */
3660 		if (net->user_ns != &init_user_ns)
3661 			table[0].procname = NULL;
3662 	}
3663 
3664 	return table;
3665 }
3666 #endif
3667 
ip6_route_net_init(struct net * net)3668 static int __net_init ip6_route_net_init(struct net *net)
3669 {
3670 	int ret = -ENOMEM;
3671 
3672 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3673 	       sizeof(net->ipv6.ip6_dst_ops));
3674 
3675 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3676 		goto out_ip6_dst_ops;
3677 
3678 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3679 					   sizeof(*net->ipv6.ip6_null_entry),
3680 					   GFP_KERNEL);
3681 	if (!net->ipv6.ip6_null_entry)
3682 		goto out_ip6_dst_entries;
3683 	net->ipv6.ip6_null_entry->dst.path =
3684 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3685 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3686 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3687 			 ip6_template_metrics, true);
3688 
3689 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3690 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3691 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3692 					       GFP_KERNEL);
3693 	if (!net->ipv6.ip6_prohibit_entry)
3694 		goto out_ip6_null_entry;
3695 	net->ipv6.ip6_prohibit_entry->dst.path =
3696 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3697 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3698 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3699 			 ip6_template_metrics, true);
3700 
3701 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3702 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3703 					       GFP_KERNEL);
3704 	if (!net->ipv6.ip6_blk_hole_entry)
3705 		goto out_ip6_prohibit_entry;
3706 	net->ipv6.ip6_blk_hole_entry->dst.path =
3707 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3708 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3709 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3710 			 ip6_template_metrics, true);
3711 #endif
3712 
3713 	net->ipv6.sysctl.flush_delay = 0;
3714 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3715 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3716 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3717 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3718 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3719 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3720 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3721 
3722 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3723 
3724 	ret = 0;
3725 out:
3726 	return ret;
3727 
3728 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3729 out_ip6_prohibit_entry:
3730 	kfree(net->ipv6.ip6_prohibit_entry);
3731 out_ip6_null_entry:
3732 	kfree(net->ipv6.ip6_null_entry);
3733 #endif
3734 out_ip6_dst_entries:
3735 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3736 out_ip6_dst_ops:
3737 	goto out;
3738 }
3739 
ip6_route_net_exit(struct net * net)3740 static void __net_exit ip6_route_net_exit(struct net *net)
3741 {
3742 	kfree(net->ipv6.ip6_null_entry);
3743 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3744 	kfree(net->ipv6.ip6_prohibit_entry);
3745 	kfree(net->ipv6.ip6_blk_hole_entry);
3746 #endif
3747 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3748 }
3749 
ip6_route_net_init_late(struct net * net)3750 static int __net_init ip6_route_net_init_late(struct net *net)
3751 {
3752 #ifdef CONFIG_PROC_FS
3753 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3754 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3755 #endif
3756 	return 0;
3757 }
3758 
ip6_route_net_exit_late(struct net * net)3759 static void __net_exit ip6_route_net_exit_late(struct net *net)
3760 {
3761 #ifdef CONFIG_PROC_FS
3762 	remove_proc_entry("ipv6_route", net->proc_net);
3763 	remove_proc_entry("rt6_stats", net->proc_net);
3764 #endif
3765 }
3766 
3767 static struct pernet_operations ip6_route_net_ops = {
3768 	.init = ip6_route_net_init,
3769 	.exit = ip6_route_net_exit,
3770 };
3771 
ipv6_inetpeer_init(struct net * net)3772 static int __net_init ipv6_inetpeer_init(struct net *net)
3773 {
3774 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3775 
3776 	if (!bp)
3777 		return -ENOMEM;
3778 	inet_peer_base_init(bp);
3779 	net->ipv6.peers = bp;
3780 	return 0;
3781 }
3782 
ipv6_inetpeer_exit(struct net * net)3783 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3784 {
3785 	struct inet_peer_base *bp = net->ipv6.peers;
3786 
3787 	net->ipv6.peers = NULL;
3788 	inetpeer_invalidate_tree(bp);
3789 	kfree(bp);
3790 }
3791 
3792 static struct pernet_operations ipv6_inetpeer_ops = {
3793 	.init	=	ipv6_inetpeer_init,
3794 	.exit	=	ipv6_inetpeer_exit,
3795 };
3796 
3797 static struct pernet_operations ip6_route_net_late_ops = {
3798 	.init = ip6_route_net_init_late,
3799 	.exit = ip6_route_net_exit_late,
3800 };
3801 
3802 static struct notifier_block ip6_route_dev_notifier = {
3803 	.notifier_call = ip6_route_dev_notify,
3804 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
3805 };
3806 
ip6_route_init_special_entries(void)3807 void __init ip6_route_init_special_entries(void)
3808 {
3809 	/* Registering of the loopback is done before this portion of code,
3810 	 * the loopback reference in rt6_info will not be taken, do it
3811 	 * manually for init_net */
3812 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3813 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3814   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3815 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3816 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3817 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3818 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3819   #endif
3820 }
3821 
ip6_route_init(void)3822 int __init ip6_route_init(void)
3823 {
3824 	int ret;
3825 	int cpu;
3826 
3827 	ret = -ENOMEM;
3828 	ip6_dst_ops_template.kmem_cachep =
3829 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3830 				  SLAB_HWCACHE_ALIGN, NULL);
3831 	if (!ip6_dst_ops_template.kmem_cachep)
3832 		goto out;
3833 
3834 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3835 	if (ret)
3836 		goto out_kmem_cache;
3837 
3838 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3839 	if (ret)
3840 		goto out_dst_entries;
3841 
3842 	ret = register_pernet_subsys(&ip6_route_net_ops);
3843 	if (ret)
3844 		goto out_register_inetpeer;
3845 
3846 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3847 
3848 	ret = fib6_init();
3849 	if (ret)
3850 		goto out_register_subsys;
3851 
3852 	ret = xfrm6_init();
3853 	if (ret)
3854 		goto out_fib6_init;
3855 
3856 	ret = fib6_rules_init();
3857 	if (ret)
3858 		goto xfrm6_init;
3859 
3860 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3861 	if (ret)
3862 		goto fib6_rules_init;
3863 
3864 	ret = -ENOBUFS;
3865 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3866 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3867 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3868 		goto out_register_late_subsys;
3869 
3870 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3871 	if (ret)
3872 		goto out_register_late_subsys;
3873 
3874 	for_each_possible_cpu(cpu) {
3875 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3876 
3877 		INIT_LIST_HEAD(&ul->head);
3878 		spin_lock_init(&ul->lock);
3879 	}
3880 
3881 out:
3882 	return ret;
3883 
3884 out_register_late_subsys:
3885 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3886 fib6_rules_init:
3887 	fib6_rules_cleanup();
3888 xfrm6_init:
3889 	xfrm6_fini();
3890 out_fib6_init:
3891 	fib6_gc_cleanup();
3892 out_register_subsys:
3893 	unregister_pernet_subsys(&ip6_route_net_ops);
3894 out_register_inetpeer:
3895 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3896 out_dst_entries:
3897 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3898 out_kmem_cache:
3899 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3900 	goto out;
3901 }
3902 
ip6_route_cleanup(void)3903 void ip6_route_cleanup(void)
3904 {
3905 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3906 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3907 	fib6_rules_cleanup();
3908 	xfrm6_fini();
3909 	fib6_gc_cleanup();
3910 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3911 	unregister_pernet_subsys(&ip6_route_net_ops);
3912 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3913 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3914 }
3915