• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 
62 #include <asm/uaccess.h>
63 
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67 
68 enum rt6_nud_state {
69 	RT6_NUD_FAIL_HARD = -3,
70 	RT6_NUD_FAIL_PROBE = -2,
71 	RT6_NUD_FAIL_DO_RR = -1,
72 	RT6_NUD_SUCCEED = 1
73 };
74 
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 				    const struct in6_addr *dest);
77 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void		ip6_dst_destroy(struct dst_entry *);
82 static void		ip6_dst_ifdown(struct dst_entry *,
83 				       struct net_device *dev, int how);
84 static int		 ip6_dst_gc(struct dst_ops *ops);
85 
86 static int		ip6_pkt_discard(struct sk_buff *skb);
87 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int		ip6_pkt_prohibit(struct sk_buff *skb);
89 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void		ip6_link_failure(struct sk_buff *skb);
91 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 					   struct sk_buff *skb, u32 mtu);
93 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 					struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96 
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
99 					   const struct in6_addr *prefix, int prefixlen,
100 					   const struct in6_addr *gwaddr, unsigned int pref);
101 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
102 					   const struct in6_addr *prefix, int prefixlen,
103 					   const struct in6_addr *gwaddr);
104 #endif
105 
rt6_bind_peer(struct rt6_info * rt,int create)106 static void rt6_bind_peer(struct rt6_info *rt, int create)
107 {
108 	struct inet_peer_base *base;
109 	struct inet_peer *peer;
110 
111 	base = inetpeer_base_ptr(rt->_rt6i_peer);
112 	if (!base)
113 		return;
114 
115 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
116 	if (peer) {
117 		if (!rt6_set_peer(rt, peer))
118 			inet_putpeer(peer);
119 	}
120 }
121 
__rt6_get_peer(struct rt6_info * rt,int create)122 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
123 {
124 	if (rt6_has_peer(rt))
125 		return rt6_peer_ptr(rt);
126 
127 	rt6_bind_peer(rt, create);
128 	return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
129 }
130 
rt6_get_peer_create(struct rt6_info * rt)131 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
132 {
133 	return __rt6_get_peer(rt, 1);
134 }
135 
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)136 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
137 {
138 	struct rt6_info *rt = (struct rt6_info *) dst;
139 	struct inet_peer *peer;
140 	u32 *p = NULL;
141 
142 	if (!(rt->dst.flags & DST_HOST))
143 		return dst_cow_metrics_generic(dst, old);
144 
145 	peer = rt6_get_peer_create(rt);
146 	if (peer) {
147 		u32 *old_p = __DST_METRICS_PTR(old);
148 		unsigned long prev, new;
149 
150 		p = peer->metrics;
151 		if (inet_metrics_new(peer) ||
152 		    (old & DST_METRICS_FORCE_OVERWRITE))
153 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
154 
155 		new = (unsigned long) p;
156 		prev = cmpxchg(&dst->_metrics, old, new);
157 
158 		if (prev != old) {
159 			p = __DST_METRICS_PTR(prev);
160 			if (prev & DST_METRICS_READ_ONLY)
161 				p = NULL;
162 		}
163 	}
164 	return p;
165 }
166 
choose_neigh_daddr(struct rt6_info * rt,struct sk_buff * skb,const void * daddr)167 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
168 					     struct sk_buff *skb,
169 					     const void *daddr)
170 {
171 	struct in6_addr *p = &rt->rt6i_gateway;
172 
173 	if (!ipv6_addr_any(p))
174 		return (const void *) p;
175 	else if (skb)
176 		return &ipv6_hdr(skb)->daddr;
177 	return daddr;
178 }
179 
ip6_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)180 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
181 					  struct sk_buff *skb,
182 					  const void *daddr)
183 {
184 	struct rt6_info *rt = (struct rt6_info *) dst;
185 	struct neighbour *n;
186 
187 	daddr = choose_neigh_daddr(rt, skb, daddr);
188 	n = __ipv6_neigh_lookup(dst->dev, daddr);
189 	if (n)
190 		return n;
191 	return neigh_create(&nd_tbl, daddr, dst->dev);
192 }
193 
194 static struct dst_ops ip6_dst_ops_template = {
195 	.family			=	AF_INET6,
196 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
197 	.gc			=	ip6_dst_gc,
198 	.gc_thresh		=	1024,
199 	.check			=	ip6_dst_check,
200 	.default_advmss		=	ip6_default_advmss,
201 	.mtu			=	ip6_mtu,
202 	.cow_metrics		=	ipv6_cow_metrics,
203 	.destroy		=	ip6_dst_destroy,
204 	.ifdown			=	ip6_dst_ifdown,
205 	.negative_advice	=	ip6_negative_advice,
206 	.link_failure		=	ip6_link_failure,
207 	.update_pmtu		=	ip6_rt_update_pmtu,
208 	.redirect		=	rt6_do_redirect,
209 	.local_out		=	__ip6_local_out,
210 	.neigh_lookup		=	ip6_neigh_lookup,
211 };
212 
ip6_blackhole_mtu(const struct dst_entry * dst)213 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
214 {
215 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
216 
217 	return mtu ? : dst->dev->mtu;
218 }
219 
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)220 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
221 					 struct sk_buff *skb, u32 mtu)
222 {
223 }
224 
ip6_rt_blackhole_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)225 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
226 				      struct sk_buff *skb)
227 {
228 }
229 
ip6_rt_blackhole_cow_metrics(struct dst_entry * dst,unsigned long old)230 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
231 					 unsigned long old)
232 {
233 	return NULL;
234 }
235 
236 static struct dst_ops ip6_dst_blackhole_ops = {
237 	.family			=	AF_INET6,
238 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
239 	.destroy		=	ip6_dst_destroy,
240 	.check			=	ip6_dst_check,
241 	.mtu			=	ip6_blackhole_mtu,
242 	.default_advmss		=	ip6_default_advmss,
243 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
244 	.redirect		=	ip6_rt_blackhole_redirect,
245 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
246 	.neigh_lookup		=	ip6_neigh_lookup,
247 };
248 
249 static const u32 ip6_template_metrics[RTAX_MAX] = {
250 	[RTAX_HOPLIMIT - 1] = 0,
251 };
252 
253 static const struct rt6_info ip6_null_entry_template = {
254 	.dst = {
255 		.__refcnt	= ATOMIC_INIT(1),
256 		.__use		= 1,
257 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
258 		.error		= -ENETUNREACH,
259 		.input		= ip6_pkt_discard,
260 		.output		= ip6_pkt_discard_out,
261 	},
262 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
263 	.rt6i_protocol  = RTPROT_KERNEL,
264 	.rt6i_metric	= ~(u32) 0,
265 	.rt6i_ref	= ATOMIC_INIT(1),
266 };
267 
268 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
269 
270 static const struct rt6_info ip6_prohibit_entry_template = {
271 	.dst = {
272 		.__refcnt	= ATOMIC_INIT(1),
273 		.__use		= 1,
274 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
275 		.error		= -EACCES,
276 		.input		= ip6_pkt_prohibit,
277 		.output		= ip6_pkt_prohibit_out,
278 	},
279 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
280 	.rt6i_protocol  = RTPROT_KERNEL,
281 	.rt6i_metric	= ~(u32) 0,
282 	.rt6i_ref	= ATOMIC_INIT(1),
283 };
284 
285 static const struct rt6_info ip6_blk_hole_entry_template = {
286 	.dst = {
287 		.__refcnt	= ATOMIC_INIT(1),
288 		.__use		= 1,
289 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
290 		.error		= -EINVAL,
291 		.input		= dst_discard,
292 		.output		= dst_discard_sk,
293 	},
294 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
295 	.rt6i_protocol  = RTPROT_KERNEL,
296 	.rt6i_metric	= ~(u32) 0,
297 	.rt6i_ref	= ATOMIC_INIT(1),
298 };
299 
300 #endif
301 
302 /* allocate dst with ip6_dst_ops */
ip6_dst_alloc(struct net * net,struct net_device * dev,int flags,struct fib6_table * table)303 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
304 					     struct net_device *dev,
305 					     int flags,
306 					     struct fib6_table *table)
307 {
308 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
309 					0, DST_OBSOLETE_FORCE_CHK, flags);
310 
311 	if (rt) {
312 		struct dst_entry *dst = &rt->dst;
313 
314 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
315 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
316 		INIT_LIST_HEAD(&rt->rt6i_siblings);
317 	}
318 	return rt;
319 }
320 
ip6_dst_destroy(struct dst_entry * dst)321 static void ip6_dst_destroy(struct dst_entry *dst)
322 {
323 	struct rt6_info *rt = (struct rt6_info *)dst;
324 	struct inet6_dev *idev = rt->rt6i_idev;
325 	struct dst_entry *from = dst->from;
326 
327 	if (!(rt->dst.flags & DST_HOST))
328 		dst_destroy_metrics_generic(dst);
329 
330 	if (idev) {
331 		rt->rt6i_idev = NULL;
332 		in6_dev_put(idev);
333 	}
334 
335 	dst->from = NULL;
336 	dst_release(from);
337 
338 	if (rt6_has_peer(rt)) {
339 		struct inet_peer *peer = rt6_peer_ptr(rt);
340 		inet_putpeer(peer);
341 	}
342 }
343 
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)344 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
345 			   int how)
346 {
347 	struct rt6_info *rt = (struct rt6_info *)dst;
348 	struct inet6_dev *idev = rt->rt6i_idev;
349 	struct net_device *loopback_dev =
350 		dev_net(dev)->loopback_dev;
351 
352 	if (dev != loopback_dev) {
353 		if (idev && idev->dev == dev) {
354 			struct inet6_dev *loopback_idev =
355 				in6_dev_get(loopback_dev);
356 			if (loopback_idev) {
357 				rt->rt6i_idev = loopback_idev;
358 				in6_dev_put(idev);
359 			}
360 		}
361 	}
362 }
363 
rt6_check_expired(const struct rt6_info * rt)364 static bool rt6_check_expired(const struct rt6_info *rt)
365 {
366 	if (rt->rt6i_flags & RTF_EXPIRES) {
367 		if (time_after(jiffies, rt->dst.expires))
368 			return true;
369 	} else if (rt->dst.from) {
370 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
371 	}
372 	return false;
373 }
374 
375 /* Multipath route selection:
376  *   Hash based function using packet header and flowlabel.
377  * Adapted from fib_info_hashfn()
378  */
rt6_info_hash_nhsfn(unsigned int candidate_count,const struct flowi6 * fl6)379 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
380 			       const struct flowi6 *fl6)
381 {
382 	unsigned int val = fl6->flowi6_proto;
383 
384 	val ^= ipv6_addr_hash(&fl6->daddr);
385 	val ^= ipv6_addr_hash(&fl6->saddr);
386 
387 	/* Work only if this not encapsulated */
388 	switch (fl6->flowi6_proto) {
389 	case IPPROTO_UDP:
390 	case IPPROTO_TCP:
391 	case IPPROTO_SCTP:
392 		val ^= (__force u16)fl6->fl6_sport;
393 		val ^= (__force u16)fl6->fl6_dport;
394 		break;
395 
396 	case IPPROTO_ICMPV6:
397 		val ^= (__force u16)fl6->fl6_icmp_type;
398 		val ^= (__force u16)fl6->fl6_icmp_code;
399 		break;
400 	}
401 	/* RFC6438 recommands to use flowlabel */
402 	val ^= (__force u32)fl6->flowlabel;
403 
404 	/* Perhaps, we need to tune, this function? */
405 	val = val ^ (val >> 7) ^ (val >> 12);
406 	return val % candidate_count;
407 }
408 
rt6_multipath_select(struct rt6_info * match,struct flowi6 * fl6,int oif,int strict)409 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
410 					     struct flowi6 *fl6, int oif,
411 					     int strict)
412 {
413 	struct rt6_info *sibling, *next_sibling;
414 	int route_choosen;
415 
416 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
417 	/* Don't change the route, if route_choosen == 0
418 	 * (siblings does not include ourself)
419 	 */
420 	if (route_choosen)
421 		list_for_each_entry_safe(sibling, next_sibling,
422 				&match->rt6i_siblings, rt6i_siblings) {
423 			route_choosen--;
424 			if (route_choosen == 0) {
425 				if (rt6_score_route(sibling, oif, strict) < 0)
426 					break;
427 				match = sibling;
428 				break;
429 			}
430 		}
431 	return match;
432 }
433 
434 /*
435  *	Route lookup. Any table->tb6_lock is implied.
436  */
437 
rt6_device_match(struct net * net,struct rt6_info * rt,const struct in6_addr * saddr,int oif,int flags)438 static inline struct rt6_info *rt6_device_match(struct net *net,
439 						    struct rt6_info *rt,
440 						    const struct in6_addr *saddr,
441 						    int oif,
442 						    int flags)
443 {
444 	struct rt6_info *local = NULL;
445 	struct rt6_info *sprt;
446 
447 	if (!oif && ipv6_addr_any(saddr))
448 		goto out;
449 
450 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
451 		struct net_device *dev = sprt->dst.dev;
452 
453 		if (oif) {
454 			if (dev->ifindex == oif)
455 				return sprt;
456 			if (dev->flags & IFF_LOOPBACK) {
457 				if (!sprt->rt6i_idev ||
458 				    sprt->rt6i_idev->dev->ifindex != oif) {
459 					if (flags & RT6_LOOKUP_F_IFACE && oif)
460 						continue;
461 					if (local && (!oif ||
462 						      local->rt6i_idev->dev->ifindex == oif))
463 						continue;
464 				}
465 				local = sprt;
466 			}
467 		} else {
468 			if (ipv6_chk_addr(net, saddr, dev,
469 					  flags & RT6_LOOKUP_F_IFACE))
470 				return sprt;
471 		}
472 	}
473 
474 	if (oif) {
475 		if (local)
476 			return local;
477 
478 		if (flags & RT6_LOOKUP_F_IFACE)
479 			return net->ipv6.ip6_null_entry;
480 	}
481 out:
482 	return rt;
483 }
484 
485 #ifdef CONFIG_IPV6_ROUTER_PREF
486 struct __rt6_probe_work {
487 	struct work_struct work;
488 	struct in6_addr target;
489 	struct net_device *dev;
490 };
491 
rt6_probe_deferred(struct work_struct * w)492 static void rt6_probe_deferred(struct work_struct *w)
493 {
494 	struct in6_addr mcaddr;
495 	struct __rt6_probe_work *work =
496 		container_of(w, struct __rt6_probe_work, work);
497 
498 	addrconf_addr_solict_mult(&work->target, &mcaddr);
499 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
500 	dev_put(work->dev);
501 	kfree(w);
502 }
503 
rt6_probe(struct rt6_info * rt)504 static void rt6_probe(struct rt6_info *rt)
505 {
506 	struct neighbour *neigh;
507 	/*
508 	 * Okay, this does not seem to be appropriate
509 	 * for now, however, we need to check if it
510 	 * is really so; aka Router Reachability Probing.
511 	 *
512 	 * Router Reachability Probe MUST be rate-limited
513 	 * to no more than one per minute.
514 	 */
515 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
516 		return;
517 	rcu_read_lock_bh();
518 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
519 	if (neigh) {
520 		write_lock(&neigh->lock);
521 		if (neigh->nud_state & NUD_VALID)
522 			goto out;
523 	}
524 
525 	if (!neigh ||
526 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
527 		struct __rt6_probe_work *work;
528 
529 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
530 
531 		if (neigh && work)
532 			__neigh_set_probe_once(neigh);
533 
534 		if (neigh)
535 			write_unlock(&neigh->lock);
536 
537 		if (work) {
538 			INIT_WORK(&work->work, rt6_probe_deferred);
539 			work->target = rt->rt6i_gateway;
540 			dev_hold(rt->dst.dev);
541 			work->dev = rt->dst.dev;
542 			schedule_work(&work->work);
543 		}
544 	} else {
545 out:
546 		write_unlock(&neigh->lock);
547 	}
548 	rcu_read_unlock_bh();
549 }
550 #else
rt6_probe(struct rt6_info * rt)551 static inline void rt6_probe(struct rt6_info *rt)
552 {
553 }
554 #endif
555 
556 /*
557  * Default Router Selection (RFC 2461 6.3.6)
558  */
rt6_check_dev(struct rt6_info * rt,int oif)559 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
560 {
561 	struct net_device *dev = rt->dst.dev;
562 	if (!oif || dev->ifindex == oif)
563 		return 2;
564 	if ((dev->flags & IFF_LOOPBACK) &&
565 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
566 		return 1;
567 	return 0;
568 }
569 
rt6_check_neigh(struct rt6_info * rt)570 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
571 {
572 	struct neighbour *neigh;
573 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
574 
575 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
576 	    !(rt->rt6i_flags & RTF_GATEWAY))
577 		return RT6_NUD_SUCCEED;
578 
579 	rcu_read_lock_bh();
580 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
581 	if (neigh) {
582 		read_lock(&neigh->lock);
583 		if (neigh->nud_state & NUD_VALID)
584 			ret = RT6_NUD_SUCCEED;
585 #ifdef CONFIG_IPV6_ROUTER_PREF
586 		else if (!(neigh->nud_state & NUD_FAILED))
587 			ret = RT6_NUD_SUCCEED;
588 		else
589 			ret = RT6_NUD_FAIL_PROBE;
590 #endif
591 		read_unlock(&neigh->lock);
592 	} else {
593 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
594 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
595 	}
596 	rcu_read_unlock_bh();
597 
598 	return ret;
599 }
600 
rt6_score_route(struct rt6_info * rt,int oif,int strict)601 static int rt6_score_route(struct rt6_info *rt, int oif,
602 			   int strict)
603 {
604 	int m;
605 
606 	m = rt6_check_dev(rt, oif);
607 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
608 		return RT6_NUD_FAIL_HARD;
609 #ifdef CONFIG_IPV6_ROUTER_PREF
610 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
611 #endif
612 	if (strict & RT6_LOOKUP_F_REACHABLE) {
613 		int n = rt6_check_neigh(rt);
614 		if (n < 0)
615 			return n;
616 	}
617 	return m;
618 }
619 
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match,bool * do_rr)620 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
621 				   int *mpri, struct rt6_info *match,
622 				   bool *do_rr)
623 {
624 	int m;
625 	bool match_do_rr = false;
626 
627 	if (rt6_check_expired(rt))
628 		goto out;
629 
630 	m = rt6_score_route(rt, oif, strict);
631 	if (m == RT6_NUD_FAIL_DO_RR) {
632 		match_do_rr = true;
633 		m = 0; /* lowest valid score */
634 	} else if (m == RT6_NUD_FAIL_HARD) {
635 		goto out;
636 	}
637 
638 	if (strict & RT6_LOOKUP_F_REACHABLE)
639 		rt6_probe(rt);
640 
641 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
642 	if (m > *mpri) {
643 		*do_rr = match_do_rr;
644 		*mpri = m;
645 		match = rt;
646 	}
647 out:
648 	return match;
649 }
650 
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict,bool * do_rr)651 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
652 				     struct rt6_info *rr_head,
653 				     u32 metric, int oif, int strict,
654 				     bool *do_rr)
655 {
656 	struct rt6_info *rt, *match;
657 	int mpri = -1;
658 
659 	match = NULL;
660 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
661 	     rt = rt->dst.rt6_next)
662 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
663 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
664 	     rt = rt->dst.rt6_next)
665 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
666 
667 	return match;
668 }
669 
rt6_select(struct fib6_node * fn,int oif,int strict)670 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
671 {
672 	struct rt6_info *match, *rt0;
673 	struct net *net;
674 	bool do_rr = false;
675 
676 	rt0 = fn->rr_ptr;
677 	if (!rt0)
678 		fn->rr_ptr = rt0 = fn->leaf;
679 
680 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
681 			     &do_rr);
682 
683 	if (do_rr) {
684 		struct rt6_info *next = rt0->dst.rt6_next;
685 
686 		/* no entries matched; do round-robin */
687 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
688 			next = fn->leaf;
689 
690 		if (next != rt0)
691 			fn->rr_ptr = next;
692 	}
693 
694 	net = dev_net(rt0->dst.dev);
695 	return match ? match : net->ipv6.ip6_null_entry;
696 }
697 
698 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)699 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
700 		  const struct in6_addr *gwaddr)
701 {
702 	struct route_info *rinfo = (struct route_info *) opt;
703 	struct in6_addr prefix_buf, *prefix;
704 	unsigned int pref;
705 	unsigned long lifetime;
706 	struct rt6_info *rt;
707 
708 	if (len < sizeof(struct route_info)) {
709 		return -EINVAL;
710 	}
711 
712 	/* Sanity check for prefix_len and length */
713 	if (rinfo->length > 3) {
714 		return -EINVAL;
715 	} else if (rinfo->prefix_len > 128) {
716 		return -EINVAL;
717 	} else if (rinfo->prefix_len > 64) {
718 		if (rinfo->length < 2) {
719 			return -EINVAL;
720 		}
721 	} else if (rinfo->prefix_len > 0) {
722 		if (rinfo->length < 1) {
723 			return -EINVAL;
724 		}
725 	}
726 
727 	pref = rinfo->route_pref;
728 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
729 		return -EINVAL;
730 
731 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
732 
733 	if (rinfo->length == 3)
734 		prefix = (struct in6_addr *)rinfo->prefix;
735 	else {
736 		/* this function is safe */
737 		ipv6_addr_prefix(&prefix_buf,
738 				 (struct in6_addr *)rinfo->prefix,
739 				 rinfo->prefix_len);
740 		prefix = &prefix_buf;
741 	}
742 
743 	if (rinfo->prefix_len == 0)
744 		rt = rt6_get_dflt_router(gwaddr, dev);
745 	else
746 		rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len,	gwaddr);
747 
748 	if (rt && !lifetime) {
749 		ip6_del_rt(rt);
750 		rt = NULL;
751 	}
752 
753 	if (!rt && lifetime)
754 		rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
755 	else if (rt)
756 		rt->rt6i_flags = RTF_ROUTEINFO |
757 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
758 
759 	if (rt) {
760 		if (!addrconf_finite_timeout(lifetime))
761 			rt6_clean_expires(rt);
762 		else
763 			rt6_set_expires(rt, jiffies + HZ * lifetime);
764 
765 		ip6_rt_put(rt);
766 	}
767 	return 0;
768 }
769 #endif
770 
771 #define BACKTRACK(__net, saddr)			\
772 do { \
773 	if (rt == __net->ipv6.ip6_null_entry) {	\
774 		struct fib6_node *pn; \
775 		while (1) { \
776 			if (fn->fn_flags & RTN_TL_ROOT) \
777 				goto out; \
778 			pn = fn->parent; \
779 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
780 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
781 			else \
782 				fn = pn; \
783 			if (fn->fn_flags & RTN_RTINFO) \
784 				goto restart; \
785 		} \
786 	} \
787 } while (0)
788 
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)789 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
790 					     struct fib6_table *table,
791 					     struct flowi6 *fl6, int flags)
792 {
793 	struct fib6_node *fn;
794 	struct rt6_info *rt;
795 
796 	read_lock_bh(&table->tb6_lock);
797 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
798 restart:
799 	rt = fn->leaf;
800 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
801 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
802 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
803 	BACKTRACK(net, &fl6->saddr);
804 out:
805 	dst_use(&rt->dst, jiffies);
806 	read_unlock_bh(&table->tb6_lock);
807 	return rt;
808 
809 }
810 
ip6_route_lookup(struct net * net,struct flowi6 * fl6,int flags)811 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
812 				    int flags)
813 {
814 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
815 }
816 EXPORT_SYMBOL_GPL(ip6_route_lookup);
817 
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)818 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
819 			    const struct in6_addr *saddr, int oif, int strict)
820 {
821 	struct flowi6 fl6 = {
822 		.flowi6_oif = oif,
823 		.daddr = *daddr,
824 	};
825 	struct dst_entry *dst;
826 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
827 
828 	if (saddr) {
829 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
830 		flags |= RT6_LOOKUP_F_HAS_SADDR;
831 	}
832 
833 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
834 	if (dst->error == 0)
835 		return (struct rt6_info *) dst;
836 
837 	dst_release(dst);
838 
839 	return NULL;
840 }
841 EXPORT_SYMBOL(rt6_lookup);
842 
843 /* ip6_ins_rt is called with FREE table->tb6_lock.
844    It takes new route entry, the addition fails by any reason the
845    route is freed. In any case, if caller does not hold it, it may
846    be destroyed.
847  */
848 
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info,struct nlattr * mx,int mx_len)849 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
850 			struct nlattr *mx, int mx_len)
851 {
852 	int err;
853 	struct fib6_table *table;
854 
855 	table = rt->rt6i_table;
856 	write_lock_bh(&table->tb6_lock);
857 	err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
858 	write_unlock_bh(&table->tb6_lock);
859 
860 	return err;
861 }
862 
ip6_ins_rt(struct rt6_info * rt)863 int ip6_ins_rt(struct rt6_info *rt)
864 {
865 	struct nl_info info = {
866 		.nl_net = dev_net(rt->dst.dev),
867 	};
868 	return __ip6_ins_rt(rt, &info, NULL, 0);
869 }
870 
rt6_alloc_cow(struct rt6_info * ort,const struct in6_addr * daddr,const struct in6_addr * saddr)871 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
872 				      const struct in6_addr *daddr,
873 				      const struct in6_addr *saddr)
874 {
875 	struct rt6_info *rt;
876 
877 	/*
878 	 *	Clone the route.
879 	 */
880 
881 	rt = ip6_rt_copy(ort, daddr);
882 
883 	if (rt) {
884 		if (ort->rt6i_dst.plen != 128 &&
885 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
886 			rt->rt6i_flags |= RTF_ANYCAST;
887 
888 		rt->rt6i_flags |= RTF_CACHE;
889 
890 #ifdef CONFIG_IPV6_SUBTREES
891 		if (rt->rt6i_src.plen && saddr) {
892 			rt->rt6i_src.addr = *saddr;
893 			rt->rt6i_src.plen = 128;
894 		}
895 #endif
896 	}
897 
898 	return rt;
899 }
900 
rt6_alloc_clone(struct rt6_info * ort,const struct in6_addr * daddr)901 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
902 					const struct in6_addr *daddr)
903 {
904 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
905 
906 	if (rt)
907 		rt->rt6i_flags |= RTF_CACHE;
908 	return rt;
909 }
910 
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags)911 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
912 				      struct flowi6 *fl6, int flags)
913 {
914 	struct fib6_node *fn;
915 	struct rt6_info *rt, *nrt;
916 	int strict = 0;
917 	int attempts = 3;
918 	int err;
919 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
920 
921 	strict |= flags & RT6_LOOKUP_F_IFACE;
922 
923 relookup:
924 	read_lock_bh(&table->tb6_lock);
925 
926 restart_2:
927 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
928 
929 restart:
930 	rt = rt6_select(fn, oif, strict | reachable);
931 	if (rt->rt6i_nsiblings)
932 		rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
933 	BACKTRACK(net, &fl6->saddr);
934 	if (rt == net->ipv6.ip6_null_entry ||
935 	    rt->rt6i_flags & RTF_CACHE)
936 		goto out;
937 
938 	dst_hold(&rt->dst);
939 	read_unlock_bh(&table->tb6_lock);
940 
941 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
942 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
943 	else if (!(rt->dst.flags & DST_HOST))
944 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
945 	else
946 		goto out2;
947 
948 	ip6_rt_put(rt);
949 	rt = nrt ? : net->ipv6.ip6_null_entry;
950 
951 	dst_hold(&rt->dst);
952 	if (nrt) {
953 		err = ip6_ins_rt(nrt);
954 		if (!err)
955 			goto out2;
956 	}
957 
958 	if (--attempts <= 0)
959 		goto out2;
960 
961 	/*
962 	 * Race condition! In the gap, when table->tb6_lock was
963 	 * released someone could insert this route.  Relookup.
964 	 */
965 	ip6_rt_put(rt);
966 	goto relookup;
967 
968 out:
969 	if (reachable) {
970 		reachable = 0;
971 		goto restart_2;
972 	}
973 	dst_hold(&rt->dst);
974 	read_unlock_bh(&table->tb6_lock);
975 out2:
976 	rt->dst.lastuse = jiffies;
977 	rt->dst.__use++;
978 
979 	return rt;
980 }
981 
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)982 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
983 					    struct flowi6 *fl6, int flags)
984 {
985 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
986 }
987 
ip6_route_input_lookup(struct net * net,struct net_device * dev,struct flowi6 * fl6,int flags)988 static struct dst_entry *ip6_route_input_lookup(struct net *net,
989 						struct net_device *dev,
990 						struct flowi6 *fl6, int flags)
991 {
992 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
993 		flags |= RT6_LOOKUP_F_IFACE;
994 
995 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
996 }
997 
ip6_route_input(struct sk_buff * skb)998 void ip6_route_input(struct sk_buff *skb)
999 {
1000 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1001 	struct net *net = dev_net(skb->dev);
1002 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1003 	struct flowi6 fl6 = {
1004 		.flowi6_iif = skb->dev->ifindex,
1005 		.daddr = iph->daddr,
1006 		.saddr = iph->saddr,
1007 		.flowlabel = ip6_flowinfo(iph),
1008 		.flowi6_mark = skb->mark,
1009 		.flowi6_proto = iph->nexthdr,
1010 	};
1011 
1012 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1013 }
1014 
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1015 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1016 					     struct flowi6 *fl6, int flags)
1017 {
1018 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1019 }
1020 
ip6_route_output(struct net * net,const struct sock * sk,struct flowi6 * fl6)1021 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1022 				    struct flowi6 *fl6)
1023 {
1024 	int flags = 0;
1025 
1026 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1027 
1028 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1029 		flags |= RT6_LOOKUP_F_IFACE;
1030 
1031 	if (!ipv6_addr_any(&fl6->saddr))
1032 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1033 	else if (sk)
1034 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1035 
1036 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1037 }
1038 EXPORT_SYMBOL(ip6_route_output);
1039 
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)1040 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1041 {
1042 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1043 	struct dst_entry *new = NULL;
1044 
1045 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1046 	if (rt) {
1047 		new = &rt->dst;
1048 
1049 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1050 		rt6_init_peer(rt, net->ipv6.peers);
1051 
1052 		new->__use = 1;
1053 		new->input = dst_discard;
1054 		new->output = dst_discard_sk;
1055 
1056 		if (dst_metrics_read_only(&ort->dst))
1057 			new->_metrics = ort->dst._metrics;
1058 		else
1059 			dst_copy_metrics(new, &ort->dst);
1060 		rt->rt6i_idev = ort->rt6i_idev;
1061 		if (rt->rt6i_idev)
1062 			in6_dev_hold(rt->rt6i_idev);
1063 
1064 		rt->rt6i_gateway = ort->rt6i_gateway;
1065 		rt->rt6i_flags = ort->rt6i_flags;
1066 		rt->rt6i_metric = 0;
1067 
1068 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1069 #ifdef CONFIG_IPV6_SUBTREES
1070 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1071 #endif
1072 
1073 		dst_free(new);
1074 	}
1075 
1076 	dst_release(dst_orig);
1077 	return new ? new : ERR_PTR(-ENOMEM);
1078 }
1079 
1080 /*
1081  *	Destination cache support functions
1082  */
1083 
ip6_dst_check(struct dst_entry * dst,u32 cookie)1084 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1085 {
1086 	struct rt6_info *rt;
1087 
1088 	rt = (struct rt6_info *) dst;
1089 
1090 	/* All IPV6 dsts are created with ->obsolete set to the value
1091 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1092 	 * into this function always.
1093 	 */
1094 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1095 		return NULL;
1096 
1097 	if (rt6_check_expired(rt))
1098 		return NULL;
1099 
1100 	return dst;
1101 }
1102 
ip6_negative_advice(struct dst_entry * dst)1103 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1104 {
1105 	struct rt6_info *rt = (struct rt6_info *) dst;
1106 
1107 	if (rt) {
1108 		if (rt->rt6i_flags & RTF_CACHE) {
1109 			if (rt6_check_expired(rt)) {
1110 				ip6_del_rt(rt);
1111 				dst = NULL;
1112 			}
1113 		} else {
1114 			dst_release(dst);
1115 			dst = NULL;
1116 		}
1117 	}
1118 	return dst;
1119 }
1120 
ip6_link_failure(struct sk_buff * skb)1121 static void ip6_link_failure(struct sk_buff *skb)
1122 {
1123 	struct rt6_info *rt;
1124 
1125 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1126 
1127 	rt = (struct rt6_info *) skb_dst(skb);
1128 	if (rt) {
1129 		if (rt->rt6i_flags & RTF_CACHE) {
1130 			dst_hold(&rt->dst);
1131 			if (ip6_del_rt(rt))
1132 				dst_free(&rt->dst);
1133 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1134 			rt->rt6i_node->fn_sernum = -1;
1135 		}
1136 	}
1137 }
1138 
ip6_rt_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)1139 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1140 			       struct sk_buff *skb, u32 mtu)
1141 {
1142 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1143 
1144 	dst_confirm(dst);
1145 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1146 		struct net *net = dev_net(dst->dev);
1147 
1148 		rt6->rt6i_flags |= RTF_MODIFIED;
1149 		if (mtu < IPV6_MIN_MTU)
1150 			mtu = IPV6_MIN_MTU;
1151 
1152 		dst_metric_set(dst, RTAX_MTU, mtu);
1153 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1154 	}
1155 }
1156 
ip6_update_pmtu(struct sk_buff * skb,struct net * net,__be32 mtu,int oif,u32 mark,kuid_t uid)1157 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1158 		     int oif, u32 mark, kuid_t uid)
1159 {
1160 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1161 	struct dst_entry *dst;
1162 	struct flowi6 fl6;
1163 
1164 	memset(&fl6, 0, sizeof(fl6));
1165 	fl6.flowi6_oif = oif;
1166 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1167 	fl6.daddr = iph->daddr;
1168 	fl6.saddr = iph->saddr;
1169 	fl6.flowlabel = ip6_flowinfo(iph);
1170 	fl6.flowi6_uid = uid;
1171 
1172 	dst = ip6_route_output(net, NULL, &fl6);
1173 	if (!dst->error)
1174 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1175 	dst_release(dst);
1176 }
1177 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1178 
ip6_sk_update_pmtu(struct sk_buff * skb,struct sock * sk,__be32 mtu)1179 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1180 {
1181 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1182 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1183 }
1184 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1185 
1186 /* Handle redirects */
1187 struct ip6rd_flowi {
1188 	struct flowi6 fl6;
1189 	struct in6_addr gateway;
1190 };
1191 
__ip6_route_redirect(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1192 static struct rt6_info *__ip6_route_redirect(struct net *net,
1193 					     struct fib6_table *table,
1194 					     struct flowi6 *fl6,
1195 					     int flags)
1196 {
1197 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1198 	struct rt6_info *rt;
1199 	struct fib6_node *fn;
1200 
1201 	/* Get the "current" route for this destination and
1202 	 * check if the redirect has come from approriate router.
1203 	 *
1204 	 * RFC 4861 specifies that redirects should only be
1205 	 * accepted if they come from the nexthop to the target.
1206 	 * Due to the way the routes are chosen, this notion
1207 	 * is a bit fuzzy and one might need to check all possible
1208 	 * routes.
1209 	 */
1210 
1211 	read_lock_bh(&table->tb6_lock);
1212 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1213 restart:
1214 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1215 		if (rt6_check_expired(rt))
1216 			continue;
1217 		if (rt->dst.error)
1218 			break;
1219 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1220 			continue;
1221 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1222 			continue;
1223 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1224 			continue;
1225 		break;
1226 	}
1227 
1228 	if (!rt)
1229 		rt = net->ipv6.ip6_null_entry;
1230 	else if (rt->dst.error) {
1231 		rt = net->ipv6.ip6_null_entry;
1232 		goto out;
1233 	}
1234 	BACKTRACK(net, &fl6->saddr);
1235 out:
1236 	dst_hold(&rt->dst);
1237 
1238 	read_unlock_bh(&table->tb6_lock);
1239 
1240 	return rt;
1241 };
1242 
ip6_route_redirect(struct net * net,const struct flowi6 * fl6,const struct in6_addr * gateway)1243 static struct dst_entry *ip6_route_redirect(struct net *net,
1244 					const struct flowi6 *fl6,
1245 					const struct in6_addr *gateway)
1246 {
1247 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1248 	struct ip6rd_flowi rdfl;
1249 
1250 	rdfl.fl6 = *fl6;
1251 	rdfl.gateway = *gateway;
1252 
1253 	return fib6_rule_lookup(net, &rdfl.fl6,
1254 				flags, __ip6_route_redirect);
1255 }
1256 
ip6_redirect(struct sk_buff * skb,struct net * net,int oif,u32 mark,kuid_t uid)1257 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1258 		  kuid_t uid)
1259 {
1260 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1261 	struct dst_entry *dst;
1262 	struct flowi6 fl6;
1263 
1264 	memset(&fl6, 0, sizeof(fl6));
1265 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1266 	fl6.flowi6_oif = oif;
1267 	fl6.flowi6_mark = mark;
1268 	fl6.daddr = iph->daddr;
1269 	fl6.saddr = iph->saddr;
1270 	fl6.flowlabel = ip6_flowinfo(iph);
1271 	fl6.flowi6_uid = uid;
1272 
1273 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1274 	rt6_do_redirect(dst, NULL, skb);
1275 	dst_release(dst);
1276 }
1277 EXPORT_SYMBOL_GPL(ip6_redirect);
1278 
ip6_redirect_no_header(struct sk_buff * skb,struct net * net,int oif,u32 mark)1279 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1280 			    u32 mark)
1281 {
1282 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1283 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1284 	struct dst_entry *dst;
1285 	struct flowi6 fl6;
1286 
1287 	memset(&fl6, 0, sizeof(fl6));
1288 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1289 	fl6.flowi6_oif = oif;
1290 	fl6.flowi6_mark = mark;
1291 	fl6.daddr = msg->dest;
1292 	fl6.saddr = iph->daddr;
1293 	fl6.flowi6_uid = sock_net_uid(net, NULL);
1294 
1295 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1296 	rt6_do_redirect(dst, NULL, skb);
1297 	dst_release(dst);
1298 }
1299 
ip6_sk_redirect(struct sk_buff * skb,struct sock * sk)1300 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1301 {
1302 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1303 		     sk->sk_uid);
1304 }
1305 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1306 
ip6_default_advmss(const struct dst_entry * dst)1307 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1308 {
1309 	struct net_device *dev = dst->dev;
1310 	unsigned int mtu = dst_mtu(dst);
1311 	struct net *net = dev_net(dev);
1312 
1313 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1314 
1315 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1316 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1317 
1318 	/*
1319 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1320 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1321 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1322 	 * rely only on pmtu discovery"
1323 	 */
1324 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1325 		mtu = IPV6_MAXPLEN;
1326 	return mtu;
1327 }
1328 
ip6_mtu(const struct dst_entry * dst)1329 static unsigned int ip6_mtu(const struct dst_entry *dst)
1330 {
1331 	struct inet6_dev *idev;
1332 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1333 
1334 	if (mtu)
1335 		goto out;
1336 
1337 	mtu = IPV6_MIN_MTU;
1338 
1339 	rcu_read_lock();
1340 	idev = __in6_dev_get(dst->dev);
1341 	if (idev)
1342 		mtu = idev->cnf.mtu6;
1343 	rcu_read_unlock();
1344 
1345 out:
1346 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1347 }
1348 
1349 static struct dst_entry *icmp6_dst_gc_list;
1350 static DEFINE_SPINLOCK(icmp6_dst_lock);
1351 
icmp6_dst_alloc(struct net_device * dev,struct flowi6 * fl6)1352 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1353 				  struct flowi6 *fl6)
1354 {
1355 	struct dst_entry *dst;
1356 	struct rt6_info *rt;
1357 	struct inet6_dev *idev = in6_dev_get(dev);
1358 	struct net *net = dev_net(dev);
1359 
1360 	if (unlikely(!idev))
1361 		return ERR_PTR(-ENODEV);
1362 
1363 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1364 	if (unlikely(!rt)) {
1365 		in6_dev_put(idev);
1366 		dst = ERR_PTR(-ENOMEM);
1367 		goto out;
1368 	}
1369 
1370 	rt->dst.flags |= DST_HOST;
1371 	rt->dst.output  = ip6_output;
1372 	atomic_set(&rt->dst.__refcnt, 1);
1373 	rt->rt6i_gateway  = fl6->daddr;
1374 	rt->rt6i_dst.addr = fl6->daddr;
1375 	rt->rt6i_dst.plen = 128;
1376 	rt->rt6i_idev     = idev;
1377 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1378 
1379 	spin_lock_bh(&icmp6_dst_lock);
1380 	rt->dst.next = icmp6_dst_gc_list;
1381 	icmp6_dst_gc_list = &rt->dst;
1382 	spin_unlock_bh(&icmp6_dst_lock);
1383 
1384 	fib6_force_start_gc(net);
1385 
1386 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1387 
1388 out:
1389 	return dst;
1390 }
1391 
icmp6_dst_gc(void)1392 int icmp6_dst_gc(void)
1393 {
1394 	struct dst_entry *dst, **pprev;
1395 	int more = 0;
1396 
1397 	spin_lock_bh(&icmp6_dst_lock);
1398 	pprev = &icmp6_dst_gc_list;
1399 
1400 	while ((dst = *pprev) != NULL) {
1401 		if (!atomic_read(&dst->__refcnt)) {
1402 			*pprev = dst->next;
1403 			dst_free(dst);
1404 		} else {
1405 			pprev = &dst->next;
1406 			++more;
1407 		}
1408 	}
1409 
1410 	spin_unlock_bh(&icmp6_dst_lock);
1411 
1412 	return more;
1413 }
1414 
icmp6_clean_all(int (* func)(struct rt6_info * rt,void * arg),void * arg)1415 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1416 			    void *arg)
1417 {
1418 	struct dst_entry *dst, **pprev;
1419 
1420 	spin_lock_bh(&icmp6_dst_lock);
1421 	pprev = &icmp6_dst_gc_list;
1422 	while ((dst = *pprev) != NULL) {
1423 		struct rt6_info *rt = (struct rt6_info *) dst;
1424 		if (func(rt, arg)) {
1425 			*pprev = dst->next;
1426 			dst_free(dst);
1427 		} else {
1428 			pprev = &dst->next;
1429 		}
1430 	}
1431 	spin_unlock_bh(&icmp6_dst_lock);
1432 }
1433 
ip6_dst_gc(struct dst_ops * ops)1434 static int ip6_dst_gc(struct dst_ops *ops)
1435 {
1436 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1437 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1438 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1439 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1440 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1441 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1442 	int entries;
1443 
1444 	entries = dst_entries_get_fast(ops);
1445 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1446 	    entries <= rt_max_size)
1447 		goto out;
1448 
1449 	net->ipv6.ip6_rt_gc_expire++;
1450 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1451 	entries = dst_entries_get_slow(ops);
1452 	if (entries < ops->gc_thresh)
1453 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1454 out:
1455 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1456 	return entries > rt_max_size;
1457 }
1458 
1459 /*
1460  *
1461  */
1462 
ip6_route_add(struct fib6_config * cfg)1463 int ip6_route_add(struct fib6_config *cfg)
1464 {
1465 	int err;
1466 	struct net *net = cfg->fc_nlinfo.nl_net;
1467 	struct rt6_info *rt = NULL;
1468 	struct net_device *dev = NULL;
1469 	struct inet6_dev *idev = NULL;
1470 	struct fib6_table *table;
1471 	int addr_type;
1472 
1473 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1474 		return -EINVAL;
1475 #ifndef CONFIG_IPV6_SUBTREES
1476 	if (cfg->fc_src_len)
1477 		return -EINVAL;
1478 #endif
1479 	if (cfg->fc_ifindex) {
1480 		err = -ENODEV;
1481 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1482 		if (!dev)
1483 			goto out;
1484 		idev = in6_dev_get(dev);
1485 		if (!idev)
1486 			goto out;
1487 	}
1488 
1489 	if (cfg->fc_metric == 0)
1490 		cfg->fc_metric = IP6_RT_PRIO_USER;
1491 
1492 	err = -ENOBUFS;
1493 	if (cfg->fc_nlinfo.nlh &&
1494 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1495 		table = fib6_get_table(net, cfg->fc_table);
1496 		if (!table) {
1497 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1498 			table = fib6_new_table(net, cfg->fc_table);
1499 		}
1500 	} else {
1501 		table = fib6_new_table(net, cfg->fc_table);
1502 	}
1503 
1504 	if (!table)
1505 		goto out;
1506 
1507 	rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1508 
1509 	if (!rt) {
1510 		err = -ENOMEM;
1511 		goto out;
1512 	}
1513 
1514 	if (cfg->fc_flags & RTF_EXPIRES)
1515 		rt6_set_expires(rt, jiffies +
1516 				clock_t_to_jiffies(cfg->fc_expires));
1517 	else
1518 		rt6_clean_expires(rt);
1519 
1520 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1521 		cfg->fc_protocol = RTPROT_BOOT;
1522 	rt->rt6i_protocol = cfg->fc_protocol;
1523 
1524 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1525 
1526 	if (addr_type & IPV6_ADDR_MULTICAST)
1527 		rt->dst.input = ip6_mc_input;
1528 	else if (cfg->fc_flags & RTF_LOCAL)
1529 		rt->dst.input = ip6_input;
1530 	else
1531 		rt->dst.input = ip6_forward;
1532 
1533 	rt->dst.output = ip6_output;
1534 
1535 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1536 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1537 	if (rt->rt6i_dst.plen == 128) {
1538 		rt->dst.flags |= DST_HOST;
1539 		dst_metrics_set_force_overwrite(&rt->dst);
1540 	}
1541 
1542 #ifdef CONFIG_IPV6_SUBTREES
1543 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1544 	rt->rt6i_src.plen = cfg->fc_src_len;
1545 #endif
1546 
1547 	rt->rt6i_metric = cfg->fc_metric;
1548 
1549 	/* We cannot add true routes via loopback here,
1550 	   they would result in kernel looping; promote them to reject routes
1551 	 */
1552 	if ((cfg->fc_flags & RTF_REJECT) ||
1553 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1554 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1555 	     !(cfg->fc_flags & RTF_LOCAL))) {
1556 		/* hold loopback dev/idev if we haven't done so. */
1557 		if (dev != net->loopback_dev) {
1558 			if (dev) {
1559 				dev_put(dev);
1560 				in6_dev_put(idev);
1561 			}
1562 			dev = net->loopback_dev;
1563 			dev_hold(dev);
1564 			idev = in6_dev_get(dev);
1565 			if (!idev) {
1566 				err = -ENODEV;
1567 				goto out;
1568 			}
1569 		}
1570 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1571 		switch (cfg->fc_type) {
1572 		case RTN_BLACKHOLE:
1573 			rt->dst.error = -EINVAL;
1574 			rt->dst.output = dst_discard_sk;
1575 			rt->dst.input = dst_discard;
1576 			break;
1577 		case RTN_PROHIBIT:
1578 			rt->dst.error = -EACCES;
1579 			rt->dst.output = ip6_pkt_prohibit_out;
1580 			rt->dst.input = ip6_pkt_prohibit;
1581 			break;
1582 		case RTN_THROW:
1583 		default:
1584 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1585 					: -ENETUNREACH;
1586 			rt->dst.output = ip6_pkt_discard_out;
1587 			rt->dst.input = ip6_pkt_discard;
1588 			break;
1589 		}
1590 		goto install_route;
1591 	}
1592 
1593 	if (cfg->fc_flags & RTF_GATEWAY) {
1594 		const struct in6_addr *gw_addr;
1595 		int gwa_type;
1596 
1597 		gw_addr = &cfg->fc_gateway;
1598 		rt->rt6i_gateway = *gw_addr;
1599 		gwa_type = ipv6_addr_type(gw_addr);
1600 
1601 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1602 			struct rt6_info *grt;
1603 
1604 			/* IPv6 strictly inhibits using not link-local
1605 			   addresses as nexthop address.
1606 			   Otherwise, router will not able to send redirects.
1607 			   It is very good, but in some (rare!) circumstances
1608 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1609 			   some exceptions. --ANK
1610 			 */
1611 			err = -EINVAL;
1612 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1613 				goto out;
1614 
1615 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1616 
1617 			err = -EHOSTUNREACH;
1618 			if (!grt)
1619 				goto out;
1620 			if (dev) {
1621 				if (dev != grt->dst.dev) {
1622 					ip6_rt_put(grt);
1623 					goto out;
1624 				}
1625 			} else {
1626 				dev = grt->dst.dev;
1627 				idev = grt->rt6i_idev;
1628 				dev_hold(dev);
1629 				in6_dev_hold(grt->rt6i_idev);
1630 			}
1631 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1632 				err = 0;
1633 			ip6_rt_put(grt);
1634 
1635 			if (err)
1636 				goto out;
1637 		}
1638 		err = -EINVAL;
1639 		if (!dev || (dev->flags & IFF_LOOPBACK))
1640 			goto out;
1641 	}
1642 
1643 	err = -ENODEV;
1644 	if (!dev)
1645 		goto out;
1646 
1647 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1648 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1649 			err = -EINVAL;
1650 			goto out;
1651 		}
1652 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1653 		rt->rt6i_prefsrc.plen = 128;
1654 	} else
1655 		rt->rt6i_prefsrc.plen = 0;
1656 
1657 	rt->rt6i_flags = cfg->fc_flags;
1658 
1659 install_route:
1660 	rt->dst.dev = dev;
1661 	rt->rt6i_idev = idev;
1662 	rt->rt6i_table = table;
1663 
1664 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1665 
1666 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
1667 
1668 out:
1669 	if (dev)
1670 		dev_put(dev);
1671 	if (idev)
1672 		in6_dev_put(idev);
1673 	if (rt)
1674 		dst_free(&rt->dst);
1675 	return err;
1676 }
1677 
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)1678 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1679 {
1680 	int err;
1681 	struct fib6_table *table;
1682 	struct net *net = dev_net(rt->dst.dev);
1683 
1684 	if (rt == net->ipv6.ip6_null_entry) {
1685 		err = -ENOENT;
1686 		goto out;
1687 	}
1688 
1689 	table = rt->rt6i_table;
1690 	write_lock_bh(&table->tb6_lock);
1691 	err = fib6_del(rt, info);
1692 	write_unlock_bh(&table->tb6_lock);
1693 
1694 out:
1695 	ip6_rt_put(rt);
1696 	return err;
1697 }
1698 
ip6_del_rt(struct rt6_info * rt)1699 int ip6_del_rt(struct rt6_info *rt)
1700 {
1701 	struct nl_info info = {
1702 		.nl_net = dev_net(rt->dst.dev),
1703 	};
1704 	return __ip6_del_rt(rt, &info);
1705 }
1706 
ip6_route_del(struct fib6_config * cfg)1707 static int ip6_route_del(struct fib6_config *cfg)
1708 {
1709 	struct fib6_table *table;
1710 	struct fib6_node *fn;
1711 	struct rt6_info *rt;
1712 	int err = -ESRCH;
1713 
1714 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1715 	if (!table)
1716 		return err;
1717 
1718 	read_lock_bh(&table->tb6_lock);
1719 
1720 	fn = fib6_locate(&table->tb6_root,
1721 			 &cfg->fc_dst, cfg->fc_dst_len,
1722 			 &cfg->fc_src, cfg->fc_src_len);
1723 
1724 	if (fn) {
1725 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1726 			if (cfg->fc_ifindex &&
1727 			    (!rt->dst.dev ||
1728 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1729 				continue;
1730 			if (cfg->fc_flags & RTF_GATEWAY &&
1731 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1732 				continue;
1733 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1734 				continue;
1735 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
1736 				continue;
1737 			dst_hold(&rt->dst);
1738 			read_unlock_bh(&table->tb6_lock);
1739 
1740 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1741 		}
1742 	}
1743 	read_unlock_bh(&table->tb6_lock);
1744 
1745 	return err;
1746 }
1747 
rt6_do_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)1748 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1749 {
1750 	struct net *net = dev_net(skb->dev);
1751 	struct netevent_redirect netevent;
1752 	struct rt6_info *rt, *nrt = NULL;
1753 	struct ndisc_options ndopts;
1754 	struct inet6_dev *in6_dev;
1755 	struct neighbour *neigh;
1756 	struct rd_msg *msg;
1757 	int optlen, on_link;
1758 	u8 *lladdr;
1759 
1760 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1761 	optlen -= sizeof(*msg);
1762 
1763 	if (optlen < 0) {
1764 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1765 		return;
1766 	}
1767 
1768 	msg = (struct rd_msg *)icmp6_hdr(skb);
1769 
1770 	if (ipv6_addr_is_multicast(&msg->dest)) {
1771 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1772 		return;
1773 	}
1774 
1775 	on_link = 0;
1776 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1777 		on_link = 1;
1778 	} else if (ipv6_addr_type(&msg->target) !=
1779 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1780 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1781 		return;
1782 	}
1783 
1784 	in6_dev = __in6_dev_get(skb->dev);
1785 	if (!in6_dev)
1786 		return;
1787 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1788 		return;
1789 
1790 	/* RFC2461 8.1:
1791 	 *	The IP source address of the Redirect MUST be the same as the current
1792 	 *	first-hop router for the specified ICMP Destination Address.
1793 	 */
1794 
1795 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1796 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1797 		return;
1798 	}
1799 
1800 	lladdr = NULL;
1801 	if (ndopts.nd_opts_tgt_lladdr) {
1802 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1803 					     skb->dev);
1804 		if (!lladdr) {
1805 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1806 			return;
1807 		}
1808 	}
1809 
1810 	rt = (struct rt6_info *) dst;
1811 	if (rt == net->ipv6.ip6_null_entry) {
1812 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1813 		return;
1814 	}
1815 
1816 	/* Redirect received -> path was valid.
1817 	 * Look, redirects are sent only in response to data packets,
1818 	 * so that this nexthop apparently is reachable. --ANK
1819 	 */
1820 	dst_confirm(&rt->dst);
1821 
1822 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1823 	if (!neigh)
1824 		return;
1825 
1826 	/*
1827 	 *	We have finally decided to accept it.
1828 	 */
1829 
1830 	neigh_update(neigh, lladdr, NUD_STALE,
1831 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1832 		     NEIGH_UPDATE_F_OVERRIDE|
1833 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1834 				     NEIGH_UPDATE_F_ISROUTER))
1835 		     );
1836 
1837 	nrt = ip6_rt_copy(rt, &msg->dest);
1838 	if (!nrt)
1839 		goto out;
1840 
1841 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1842 	if (on_link)
1843 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1844 
1845 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1846 
1847 	if (ip6_ins_rt(nrt))
1848 		goto out;
1849 
1850 	netevent.old = &rt->dst;
1851 	netevent.new = &nrt->dst;
1852 	netevent.daddr = &msg->dest;
1853 	netevent.neigh = neigh;
1854 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1855 
1856 	if (rt->rt6i_flags & RTF_CACHE) {
1857 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1858 		ip6_del_rt(rt);
1859 	}
1860 
1861 out:
1862 	neigh_release(neigh);
1863 }
1864 
1865 /*
1866  *	Misc support functions
1867  */
1868 
ip6_rt_copy(struct rt6_info * ort,const struct in6_addr * dest)1869 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1870 				    const struct in6_addr *dest)
1871 {
1872 	struct net *net = dev_net(ort->dst.dev);
1873 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1874 					    ort->rt6i_table);
1875 
1876 	if (rt) {
1877 		rt->dst.input = ort->dst.input;
1878 		rt->dst.output = ort->dst.output;
1879 		rt->dst.flags |= DST_HOST;
1880 
1881 		rt->rt6i_dst.addr = *dest;
1882 		rt->rt6i_dst.plen = 128;
1883 		dst_copy_metrics(&rt->dst, &ort->dst);
1884 		rt->dst.error = ort->dst.error;
1885 		rt->rt6i_idev = ort->rt6i_idev;
1886 		if (rt->rt6i_idev)
1887 			in6_dev_hold(rt->rt6i_idev);
1888 		rt->dst.lastuse = jiffies;
1889 
1890 		if (ort->rt6i_flags & RTF_GATEWAY)
1891 			rt->rt6i_gateway = ort->rt6i_gateway;
1892 		else
1893 			rt->rt6i_gateway = *dest;
1894 		rt->rt6i_flags = ort->rt6i_flags;
1895 		rt6_set_from(rt, ort);
1896 		rt->rt6i_metric = 0;
1897 
1898 #ifdef CONFIG_IPV6_SUBTREES
1899 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1900 #endif
1901 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1902 		rt->rt6i_table = ort->rt6i_table;
1903 	}
1904 	return rt;
1905 }
1906 
1907 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net_device * dev,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr)1908 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
1909 					   const struct in6_addr *prefix, int prefixlen,
1910 					   const struct in6_addr *gwaddr)
1911 {
1912 	struct fib6_node *fn;
1913 	struct rt6_info *rt = NULL;
1914 	struct fib6_table *table;
1915 
1916 	table = fib6_get_table(dev_net(dev),
1917 			       addrconf_rt_table(dev, RT6_TABLE_INFO));
1918 	if (!table)
1919 		return NULL;
1920 
1921 	read_lock_bh(&table->tb6_lock);
1922 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1923 	if (!fn)
1924 		goto out;
1925 
1926 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1927 		if (rt->dst.dev->ifindex != dev->ifindex)
1928 			continue;
1929 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1930 			continue;
1931 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1932 			continue;
1933 		dst_hold(&rt->dst);
1934 		break;
1935 	}
1936 out:
1937 	read_unlock_bh(&table->tb6_lock);
1938 	return rt;
1939 }
1940 
rt6_add_route_info(struct net_device * dev,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,unsigned int pref)1941 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
1942 					   const struct in6_addr *prefix, int prefixlen,
1943 					   const struct in6_addr *gwaddr, unsigned int pref)
1944 {
1945 	struct fib6_config cfg = {
1946 		.fc_table	= addrconf_rt_table(dev, RT6_TABLE_INFO),
1947 		.fc_metric	= IP6_RT_PRIO_USER,
1948 		.fc_ifindex	= dev->ifindex,
1949 		.fc_dst_len	= prefixlen,
1950 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1951 				  RTF_UP | RTF_PREF(pref),
1952 		.fc_nlinfo.portid = 0,
1953 		.fc_nlinfo.nlh = NULL,
1954 		.fc_nlinfo.nl_net = dev_net(dev),
1955 	};
1956 
1957 	cfg.fc_dst = *prefix;
1958 	cfg.fc_gateway = *gwaddr;
1959 
1960 	/* We should treat it as a default route if prefix length is 0. */
1961 	if (!prefixlen)
1962 		cfg.fc_flags |= RTF_DEFAULT;
1963 
1964 	ip6_route_add(&cfg);
1965 
1966 	return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
1967 }
1968 #endif
1969 
rt6_get_dflt_router(const struct in6_addr * addr,struct net_device * dev)1970 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1971 {
1972 	struct rt6_info *rt;
1973 	struct fib6_table *table;
1974 
1975 	table = fib6_get_table(dev_net(dev),
1976 			       addrconf_rt_table(dev, RT6_TABLE_MAIN));
1977 	if (!table)
1978 		return NULL;
1979 
1980 	read_lock_bh(&table->tb6_lock);
1981 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1982 		if (dev == rt->dst.dev &&
1983 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1984 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1985 			break;
1986 	}
1987 	if (rt)
1988 		dst_hold(&rt->dst);
1989 	read_unlock_bh(&table->tb6_lock);
1990 	return rt;
1991 }
1992 
rt6_add_dflt_router(const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)1993 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1994 				     struct net_device *dev,
1995 				     unsigned int pref)
1996 {
1997 	struct fib6_config cfg = {
1998 		.fc_table	= addrconf_rt_table(dev, RT6_TABLE_DFLT),
1999 		.fc_metric	= IP6_RT_PRIO_USER,
2000 		.fc_ifindex	= dev->ifindex,
2001 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2002 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2003 		.fc_nlinfo.portid = 0,
2004 		.fc_nlinfo.nlh = NULL,
2005 		.fc_nlinfo.nl_net = dev_net(dev),
2006 	};
2007 
2008 	cfg.fc_gateway = *gwaddr;
2009 
2010 	ip6_route_add(&cfg);
2011 
2012 	return rt6_get_dflt_router(gwaddr, dev);
2013 }
2014 
2015 
rt6_addrconf_purge(struct rt6_info * rt,void * arg)2016 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2017 	if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2018 	    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2019 		return -1;
2020 	return 0;
2021 }
2022 
rt6_purge_dflt_routers(struct net * net)2023 void rt6_purge_dflt_routers(struct net *net)
2024 {
2025 	fib6_clean_all(net, rt6_addrconf_purge, NULL);
2026 }
2027 
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)2028 static void rtmsg_to_fib6_config(struct net *net,
2029 				 struct in6_rtmsg *rtmsg,
2030 				 struct fib6_config *cfg)
2031 {
2032 	memset(cfg, 0, sizeof(*cfg));
2033 
2034 	cfg->fc_table = RT6_TABLE_MAIN;
2035 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2036 	cfg->fc_metric = rtmsg->rtmsg_metric;
2037 	cfg->fc_expires = rtmsg->rtmsg_info;
2038 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2039 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2040 	cfg->fc_flags = rtmsg->rtmsg_flags;
2041 
2042 	cfg->fc_nlinfo.nl_net = net;
2043 
2044 	cfg->fc_dst = rtmsg->rtmsg_dst;
2045 	cfg->fc_src = rtmsg->rtmsg_src;
2046 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2047 }
2048 
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)2049 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2050 {
2051 	struct fib6_config cfg;
2052 	struct in6_rtmsg rtmsg;
2053 	int err;
2054 
2055 	switch (cmd) {
2056 	case SIOCADDRT:		/* Add a route */
2057 	case SIOCDELRT:		/* Delete a route */
2058 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2059 			return -EPERM;
2060 		err = copy_from_user(&rtmsg, arg,
2061 				     sizeof(struct in6_rtmsg));
2062 		if (err)
2063 			return -EFAULT;
2064 
2065 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2066 
2067 		rtnl_lock();
2068 		switch (cmd) {
2069 		case SIOCADDRT:
2070 			err = ip6_route_add(&cfg);
2071 			break;
2072 		case SIOCDELRT:
2073 			err = ip6_route_del(&cfg);
2074 			break;
2075 		default:
2076 			err = -EINVAL;
2077 		}
2078 		rtnl_unlock();
2079 
2080 		return err;
2081 	}
2082 
2083 	return -EINVAL;
2084 }
2085 
2086 /*
2087  *	Drop the packet on the floor
2088  */
2089 
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)2090 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2091 {
2092 	int type;
2093 	struct dst_entry *dst = skb_dst(skb);
2094 	switch (ipstats_mib_noroutes) {
2095 	case IPSTATS_MIB_INNOROUTES:
2096 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2097 		if (type == IPV6_ADDR_ANY) {
2098 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2099 				      IPSTATS_MIB_INADDRERRORS);
2100 			break;
2101 		}
2102 		/* FALLTHROUGH */
2103 	case IPSTATS_MIB_OUTNOROUTES:
2104 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2105 			      ipstats_mib_noroutes);
2106 		break;
2107 	}
2108 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2109 	kfree_skb(skb);
2110 	return 0;
2111 }
2112 
ip6_pkt_discard(struct sk_buff * skb)2113 static int ip6_pkt_discard(struct sk_buff *skb)
2114 {
2115 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2116 }
2117 
ip6_pkt_discard_out(struct sock * sk,struct sk_buff * skb)2118 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2119 {
2120 	skb->dev = skb_dst(skb)->dev;
2121 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2122 }
2123 
ip6_pkt_prohibit(struct sk_buff * skb)2124 static int ip6_pkt_prohibit(struct sk_buff *skb)
2125 {
2126 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2127 }
2128 
ip6_pkt_prohibit_out(struct sock * sk,struct sk_buff * skb)2129 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2130 {
2131 	skb->dev = skb_dst(skb)->dev;
2132 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2133 }
2134 
2135 /*
2136  *	Allocate a dst for local (unicast / anycast) address.
2137  */
2138 
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,bool anycast)2139 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2140 				    const struct in6_addr *addr,
2141 				    bool anycast)
2142 {
2143 	struct net *net = dev_net(idev->dev);
2144 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2145 					    DST_NOCOUNT, NULL);
2146 	if (!rt)
2147 		return ERR_PTR(-ENOMEM);
2148 
2149 	in6_dev_hold(idev);
2150 
2151 	rt->dst.flags |= DST_HOST;
2152 	rt->dst.input = ip6_input;
2153 	rt->dst.output = ip6_output;
2154 	rt->rt6i_idev = idev;
2155 
2156 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2157 	if (anycast)
2158 		rt->rt6i_flags |= RTF_ANYCAST;
2159 	else
2160 		rt->rt6i_flags |= RTF_LOCAL;
2161 
2162 	rt->rt6i_gateway  = *addr;
2163 	rt->rt6i_dst.addr = *addr;
2164 	rt->rt6i_dst.plen = 128;
2165 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2166 
2167 	atomic_set(&rt->dst.__refcnt, 1);
2168 
2169 	return rt;
2170 }
2171 
ip6_route_get_saddr(struct net * net,struct rt6_info * rt,const struct in6_addr * daddr,unsigned int prefs,struct in6_addr * saddr)2172 int ip6_route_get_saddr(struct net *net,
2173 			struct rt6_info *rt,
2174 			const struct in6_addr *daddr,
2175 			unsigned int prefs,
2176 			struct in6_addr *saddr)
2177 {
2178 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
2179 	int err = 0;
2180 	if (rt->rt6i_prefsrc.plen)
2181 		*saddr = rt->rt6i_prefsrc.addr;
2182 	else
2183 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2184 					 daddr, prefs, saddr);
2185 	return err;
2186 }
2187 
2188 /* remove deleted ip from prefsrc entries */
2189 struct arg_dev_net_ip {
2190 	struct net_device *dev;
2191 	struct net *net;
2192 	struct in6_addr *addr;
2193 };
2194 
fib6_remove_prefsrc(struct rt6_info * rt,void * arg)2195 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2196 {
2197 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2198 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2199 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2200 
2201 	if (((void *)rt->dst.dev == dev || !dev) &&
2202 	    rt != net->ipv6.ip6_null_entry &&
2203 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2204 		/* remove prefsrc entry */
2205 		rt->rt6i_prefsrc.plen = 0;
2206 	}
2207 	return 0;
2208 }
2209 
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)2210 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2211 {
2212 	struct net *net = dev_net(ifp->idev->dev);
2213 	struct arg_dev_net_ip adni = {
2214 		.dev = ifp->idev->dev,
2215 		.net = net,
2216 		.addr = &ifp->addr,
2217 	};
2218 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2219 }
2220 
2221 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2222 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2223 
2224 /* Remove routers and update dst entries when gateway turn into host. */
fib6_clean_tohost(struct rt6_info * rt,void * arg)2225 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2226 {
2227 	struct in6_addr *gateway = (struct in6_addr *)arg;
2228 
2229 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2230 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2231 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2232 		return -1;
2233 	}
2234 	return 0;
2235 }
2236 
rt6_clean_tohost(struct net * net,struct in6_addr * gateway)2237 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2238 {
2239 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2240 }
2241 
2242 struct arg_dev_net {
2243 	struct net_device *dev;
2244 	struct net *net;
2245 };
2246 
fib6_ifdown(struct rt6_info * rt,void * arg)2247 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2248 {
2249 	const struct arg_dev_net *adn = arg;
2250 	const struct net_device *dev = adn->dev;
2251 
2252 	if ((rt->dst.dev == dev || !dev) &&
2253 	    rt != adn->net->ipv6.ip6_null_entry)
2254 		return -1;
2255 
2256 	return 0;
2257 }
2258 
rt6_ifdown(struct net * net,struct net_device * dev)2259 void rt6_ifdown(struct net *net, struct net_device *dev)
2260 {
2261 	struct arg_dev_net adn = {
2262 		.dev = dev,
2263 		.net = net,
2264 	};
2265 
2266 	fib6_clean_all(net, fib6_ifdown, &adn);
2267 	icmp6_clean_all(fib6_ifdown, &adn);
2268 }
2269 
2270 struct rt6_mtu_change_arg {
2271 	struct net_device *dev;
2272 	unsigned int mtu;
2273 };
2274 
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2275 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2276 {
2277 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2278 	struct inet6_dev *idev;
2279 
2280 	/* In IPv6 pmtu discovery is not optional,
2281 	   so that RTAX_MTU lock cannot disable it.
2282 	   We still use this lock to block changes
2283 	   caused by addrconf/ndisc.
2284 	*/
2285 
2286 	idev = __in6_dev_get(arg->dev);
2287 	if (!idev)
2288 		return 0;
2289 
2290 	/* For administrative MTU increase, there is no way to discover
2291 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2292 	   Since RFC 1981 doesn't include administrative MTU increase
2293 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2294 	 */
2295 	/*
2296 	   If new MTU is less than route PMTU, this new MTU will be the
2297 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2298 	   decreases; if new MTU is greater than route PMTU, and the
2299 	   old MTU is the lowest MTU in the path, update the route PMTU
2300 	   to reflect the increase. In this case if the other nodes' MTU
2301 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2302 	   PMTU discouvery.
2303 	 */
2304 	if (rt->dst.dev == arg->dev &&
2305 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2306 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2307 	     (dst_mtu(&rt->dst) < arg->mtu &&
2308 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2309 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2310 	}
2311 	return 0;
2312 }
2313 
rt6_mtu_change(struct net_device * dev,unsigned int mtu)2314 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2315 {
2316 	struct rt6_mtu_change_arg arg = {
2317 		.dev = dev,
2318 		.mtu = mtu,
2319 	};
2320 
2321 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2322 }
2323 
2324 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2325 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2326 	[RTA_OIF]               = { .type = NLA_U32 },
2327 	[RTA_IIF]		= { .type = NLA_U32 },
2328 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2329 	[RTA_METRICS]           = { .type = NLA_NESTED },
2330 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2331 	[RTA_UID]		= { .type = NLA_U32 },
2332 };
2333 
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg)2334 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2335 			      struct fib6_config *cfg)
2336 {
2337 	struct rtmsg *rtm;
2338 	struct nlattr *tb[RTA_MAX+1];
2339 	int err;
2340 
2341 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2342 	if (err < 0)
2343 		goto errout;
2344 
2345 	err = -EINVAL;
2346 	rtm = nlmsg_data(nlh);
2347 	memset(cfg, 0, sizeof(*cfg));
2348 
2349 	cfg->fc_table = rtm->rtm_table;
2350 	cfg->fc_dst_len = rtm->rtm_dst_len;
2351 	cfg->fc_src_len = rtm->rtm_src_len;
2352 	cfg->fc_flags = RTF_UP;
2353 	cfg->fc_protocol = rtm->rtm_protocol;
2354 	cfg->fc_type = rtm->rtm_type;
2355 
2356 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2357 	    rtm->rtm_type == RTN_BLACKHOLE ||
2358 	    rtm->rtm_type == RTN_PROHIBIT ||
2359 	    rtm->rtm_type == RTN_THROW)
2360 		cfg->fc_flags |= RTF_REJECT;
2361 
2362 	if (rtm->rtm_type == RTN_LOCAL)
2363 		cfg->fc_flags |= RTF_LOCAL;
2364 
2365 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2366 	cfg->fc_nlinfo.nlh = nlh;
2367 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2368 
2369 	if (tb[RTA_GATEWAY]) {
2370 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2371 		cfg->fc_flags |= RTF_GATEWAY;
2372 	}
2373 
2374 	if (tb[RTA_DST]) {
2375 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2376 
2377 		if (nla_len(tb[RTA_DST]) < plen)
2378 			goto errout;
2379 
2380 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2381 	}
2382 
2383 	if (tb[RTA_SRC]) {
2384 		int plen = (rtm->rtm_src_len + 7) >> 3;
2385 
2386 		if (nla_len(tb[RTA_SRC]) < plen)
2387 			goto errout;
2388 
2389 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2390 	}
2391 
2392 	if (tb[RTA_PREFSRC])
2393 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2394 
2395 	if (tb[RTA_OIF])
2396 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2397 
2398 	if (tb[RTA_PRIORITY])
2399 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2400 
2401 	if (tb[RTA_METRICS]) {
2402 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2403 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2404 	}
2405 
2406 	if (tb[RTA_TABLE])
2407 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2408 
2409 	if (tb[RTA_MULTIPATH]) {
2410 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2411 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2412 	}
2413 
2414 	err = 0;
2415 errout:
2416 	return err;
2417 }
2418 
ip6_route_multipath(struct fib6_config * cfg,int add)2419 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2420 {
2421 	struct fib6_config r_cfg;
2422 	struct rtnexthop *rtnh;
2423 	int remaining;
2424 	int attrlen;
2425 	int err = 0, last_err = 0;
2426 
2427 beginning:
2428 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2429 	remaining = cfg->fc_mp_len;
2430 
2431 	/* Parse a Multipath Entry */
2432 	while (rtnh_ok(rtnh, remaining)) {
2433 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2434 		if (rtnh->rtnh_ifindex)
2435 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2436 
2437 		attrlen = rtnh_attrlen(rtnh);
2438 		if (attrlen > 0) {
2439 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2440 
2441 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2442 			if (nla) {
2443 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2444 				r_cfg.fc_flags |= RTF_GATEWAY;
2445 			}
2446 		}
2447 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2448 		if (err) {
2449 			last_err = err;
2450 			/* If we are trying to remove a route, do not stop the
2451 			 * loop when ip6_route_del() fails (because next hop is
2452 			 * already gone), we should try to remove all next hops.
2453 			 */
2454 			if (add) {
2455 				/* If add fails, we should try to delete all
2456 				 * next hops that have been already added.
2457 				 */
2458 				add = 0;
2459 				goto beginning;
2460 			}
2461 		}
2462 		/* Because each route is added like a single route we remove
2463 		 * this flag after the first nexthop (if there is a collision,
2464 		 * we have already fail to add the first nexthop:
2465 		 * fib6_add_rt2node() has reject it).
2466 		 */
2467 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2468 		rtnh = rtnh_next(rtnh, &remaining);
2469 	}
2470 
2471 	return last_err;
2472 }
2473 
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh)2474 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2475 {
2476 	struct fib6_config cfg;
2477 	int err;
2478 
2479 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2480 	if (err < 0)
2481 		return err;
2482 
2483 	if (cfg.fc_mp)
2484 		return ip6_route_multipath(&cfg, 0);
2485 	else
2486 		return ip6_route_del(&cfg);
2487 }
2488 
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh)2489 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2490 {
2491 	struct fib6_config cfg;
2492 	int err;
2493 
2494 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2495 	if (err < 0)
2496 		return err;
2497 
2498 	if (cfg.fc_mp)
2499 		return ip6_route_multipath(&cfg, 1);
2500 	else
2501 		return ip6_route_add(&cfg);
2502 }
2503 
rt6_nlmsg_size(void)2504 static inline size_t rt6_nlmsg_size(void)
2505 {
2506 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2507 	       + nla_total_size(16) /* RTA_SRC */
2508 	       + nla_total_size(16) /* RTA_DST */
2509 	       + nla_total_size(16) /* RTA_GATEWAY */
2510 	       + nla_total_size(16) /* RTA_PREFSRC */
2511 	       + nla_total_size(4) /* RTA_TABLE */
2512 	       + nla_total_size(4) /* RTA_IIF */
2513 	       + nla_total_size(4) /* RTA_OIF */
2514 	       + nla_total_size(4) /* RTA_PRIORITY */
2515 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2516 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2517 }
2518 
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 portid,u32 seq,int prefix,int nowait,unsigned int flags)2519 static int rt6_fill_node(struct net *net,
2520 			 struct sk_buff *skb, struct rt6_info *rt,
2521 			 struct in6_addr *dst, struct in6_addr *src,
2522 			 int iif, int type, u32 portid, u32 seq,
2523 			 int prefix, int nowait, unsigned int flags)
2524 {
2525 	struct rtmsg *rtm;
2526 	struct nlmsghdr *nlh;
2527 	long expires;
2528 	u32 table;
2529 
2530 	if (prefix) {	/* user wants prefix routes only */
2531 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2532 			/* success since this is not a prefix route */
2533 			return 1;
2534 		}
2535 	}
2536 
2537 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2538 	if (!nlh)
2539 		return -EMSGSIZE;
2540 
2541 	rtm = nlmsg_data(nlh);
2542 	rtm->rtm_family = AF_INET6;
2543 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2544 	rtm->rtm_src_len = rt->rt6i_src.plen;
2545 	rtm->rtm_tos = 0;
2546 	if (rt->rt6i_table)
2547 		table = rt->rt6i_table->tb6_id;
2548 	else
2549 		table = RT6_TABLE_UNSPEC;
2550 	rtm->rtm_table = table;
2551 	if (nla_put_u32(skb, RTA_TABLE, table))
2552 		goto nla_put_failure;
2553 	if (rt->rt6i_flags & RTF_REJECT) {
2554 		switch (rt->dst.error) {
2555 		case -EINVAL:
2556 			rtm->rtm_type = RTN_BLACKHOLE;
2557 			break;
2558 		case -EACCES:
2559 			rtm->rtm_type = RTN_PROHIBIT;
2560 			break;
2561 		case -EAGAIN:
2562 			rtm->rtm_type = RTN_THROW;
2563 			break;
2564 		default:
2565 			rtm->rtm_type = RTN_UNREACHABLE;
2566 			break;
2567 		}
2568 	}
2569 	else if (rt->rt6i_flags & RTF_LOCAL)
2570 		rtm->rtm_type = RTN_LOCAL;
2571 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2572 		rtm->rtm_type = RTN_LOCAL;
2573 	else
2574 		rtm->rtm_type = RTN_UNICAST;
2575 	rtm->rtm_flags = 0;
2576 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2577 	rtm->rtm_protocol = rt->rt6i_protocol;
2578 	if (rt->rt6i_flags & RTF_DYNAMIC)
2579 		rtm->rtm_protocol = RTPROT_REDIRECT;
2580 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2581 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2582 			rtm->rtm_protocol = RTPROT_RA;
2583 		else
2584 			rtm->rtm_protocol = RTPROT_KERNEL;
2585 	}
2586 
2587 	if (rt->rt6i_flags & RTF_CACHE)
2588 		rtm->rtm_flags |= RTM_F_CLONED;
2589 
2590 	if (dst) {
2591 		if (nla_put(skb, RTA_DST, 16, dst))
2592 			goto nla_put_failure;
2593 		rtm->rtm_dst_len = 128;
2594 	} else if (rtm->rtm_dst_len)
2595 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2596 			goto nla_put_failure;
2597 #ifdef CONFIG_IPV6_SUBTREES
2598 	if (src) {
2599 		if (nla_put(skb, RTA_SRC, 16, src))
2600 			goto nla_put_failure;
2601 		rtm->rtm_src_len = 128;
2602 	} else if (rtm->rtm_src_len &&
2603 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2604 		goto nla_put_failure;
2605 #endif
2606 	if (iif) {
2607 #ifdef CONFIG_IPV6_MROUTE
2608 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2609 			int err = ip6mr_get_route(net, skb, rtm, nowait,
2610 						  portid);
2611 
2612 			if (err <= 0) {
2613 				if (!nowait) {
2614 					if (err == 0)
2615 						return 0;
2616 					goto nla_put_failure;
2617 				} else {
2618 					if (err == -EMSGSIZE)
2619 						goto nla_put_failure;
2620 				}
2621 			}
2622 		} else
2623 #endif
2624 			if (nla_put_u32(skb, RTA_IIF, iif))
2625 				goto nla_put_failure;
2626 	} else if (dst) {
2627 		struct in6_addr saddr_buf;
2628 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2629 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2630 			goto nla_put_failure;
2631 	}
2632 
2633 	if (rt->rt6i_prefsrc.plen) {
2634 		struct in6_addr saddr_buf;
2635 		saddr_buf = rt->rt6i_prefsrc.addr;
2636 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2637 			goto nla_put_failure;
2638 	}
2639 
2640 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2641 		goto nla_put_failure;
2642 
2643 	if (rt->rt6i_flags & RTF_GATEWAY) {
2644 		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2645 			goto nla_put_failure;
2646 	}
2647 
2648 	if (rt->dst.dev &&
2649 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2650 		goto nla_put_failure;
2651 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2652 		goto nla_put_failure;
2653 
2654 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2655 
2656 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2657 		goto nla_put_failure;
2658 
2659 	return nlmsg_end(skb, nlh);
2660 
2661 nla_put_failure:
2662 	nlmsg_cancel(skb, nlh);
2663 	return -EMSGSIZE;
2664 }
2665 
rt6_dump_route(struct rt6_info * rt,void * p_arg)2666 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2667 {
2668 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2669 	int prefix;
2670 
2671 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2672 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2673 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2674 	} else
2675 		prefix = 0;
2676 
2677 	return rt6_fill_node(arg->net,
2678 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2679 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2680 		     prefix, 0, NLM_F_MULTI);
2681 }
2682 
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh)2683 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2684 {
2685 	struct net *net = sock_net(in_skb->sk);
2686 	struct nlattr *tb[RTA_MAX+1];
2687 	struct rt6_info *rt;
2688 	struct sk_buff *skb;
2689 	struct rtmsg *rtm;
2690 	struct flowi6 fl6;
2691 	int err, iif = 0, oif = 0;
2692 
2693 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2694 	if (err < 0)
2695 		goto errout;
2696 
2697 	err = -EINVAL;
2698 	memset(&fl6, 0, sizeof(fl6));
2699 
2700 	if (tb[RTA_SRC]) {
2701 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2702 			goto errout;
2703 
2704 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2705 	}
2706 
2707 	if (tb[RTA_DST]) {
2708 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2709 			goto errout;
2710 
2711 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2712 	}
2713 
2714 	if (tb[RTA_IIF])
2715 		iif = nla_get_u32(tb[RTA_IIF]);
2716 
2717 	if (tb[RTA_OIF])
2718 		oif = nla_get_u32(tb[RTA_OIF]);
2719 
2720 	if (tb[RTA_MARK])
2721 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2722 
2723 	if (tb[RTA_UID])
2724 		fl6.flowi6_uid = make_kuid(current_user_ns(),
2725 					   nla_get_u32(tb[RTA_UID]));
2726 	else
2727 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
2728 
2729 	if (iif) {
2730 		struct net_device *dev;
2731 		int flags = 0;
2732 
2733 		dev = __dev_get_by_index(net, iif);
2734 		if (!dev) {
2735 			err = -ENODEV;
2736 			goto errout;
2737 		}
2738 
2739 		fl6.flowi6_iif = iif;
2740 
2741 		if (!ipv6_addr_any(&fl6.saddr))
2742 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2743 
2744 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2745 							       flags);
2746 	} else {
2747 		fl6.flowi6_oif = oif;
2748 
2749 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2750 	}
2751 
2752 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2753 	if (!skb) {
2754 		ip6_rt_put(rt);
2755 		err = -ENOBUFS;
2756 		goto errout;
2757 	}
2758 
2759 	/* Reserve room for dummy headers, this skb can pass
2760 	   through good chunk of routing engine.
2761 	 */
2762 	skb_reset_mac_header(skb);
2763 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2764 
2765 	skb_dst_set(skb, &rt->dst);
2766 
2767 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2768 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2769 			    nlh->nlmsg_seq, 0, 0, 0);
2770 	if (err < 0) {
2771 		kfree_skb(skb);
2772 		goto errout;
2773 	}
2774 
2775 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2776 errout:
2777 	return err;
2778 }
2779 
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info)2780 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2781 {
2782 	struct sk_buff *skb;
2783 	struct net *net = info->nl_net;
2784 	u32 seq;
2785 	int err;
2786 
2787 	err = -ENOBUFS;
2788 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2789 
2790 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2791 	if (!skb)
2792 		goto errout;
2793 
2794 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2795 				event, info->portid, seq, 0, 0, 0);
2796 	if (err < 0) {
2797 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2798 		WARN_ON(err == -EMSGSIZE);
2799 		kfree_skb(skb);
2800 		goto errout;
2801 	}
2802 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2803 		    info->nlh, gfp_any());
2804 	return;
2805 errout:
2806 	if (err < 0)
2807 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2808 }
2809 
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * ptr)2810 static int ip6_route_dev_notify(struct notifier_block *this,
2811 				unsigned long event, void *ptr)
2812 {
2813 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2814 	struct net *net = dev_net(dev);
2815 
2816 	if (!(dev->flags & IFF_LOOPBACK))
2817 		return NOTIFY_OK;
2818 
2819 	if (event == NETDEV_REGISTER) {
2820 		net->ipv6.ip6_null_entry->dst.dev = dev;
2821 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2822 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2823 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2824 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2825 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2826 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2827 #endif
2828 	 } else if (event == NETDEV_UNREGISTER &&
2829 		    dev->reg_state != NETREG_UNREGISTERED) {
2830 		/* NETDEV_UNREGISTER could be fired for multiple times by
2831 		 * netdev_wait_allrefs(). Make sure we only call this once.
2832 		 */
2833 		in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
2834 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2835 		in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
2836 		in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
2837 #endif
2838 	}
2839 
2840 	return NOTIFY_OK;
2841 }
2842 
2843 /*
2844  *	/proc
2845  */
2846 
2847 #ifdef CONFIG_PROC_FS
2848 
2849 static const struct file_operations ipv6_route_proc_fops = {
2850 	.owner		= THIS_MODULE,
2851 	.open		= ipv6_route_open,
2852 	.read		= seq_read,
2853 	.llseek		= seq_lseek,
2854 	.release	= seq_release_net,
2855 };
2856 
rt6_stats_seq_show(struct seq_file * seq,void * v)2857 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2858 {
2859 	struct net *net = (struct net *)seq->private;
2860 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2861 		   net->ipv6.rt6_stats->fib_nodes,
2862 		   net->ipv6.rt6_stats->fib_route_nodes,
2863 		   net->ipv6.rt6_stats->fib_rt_alloc,
2864 		   net->ipv6.rt6_stats->fib_rt_entries,
2865 		   net->ipv6.rt6_stats->fib_rt_cache,
2866 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2867 		   net->ipv6.rt6_stats->fib_discarded_routes);
2868 
2869 	return 0;
2870 }
2871 
rt6_stats_seq_open(struct inode * inode,struct file * file)2872 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2873 {
2874 	return single_open_net(inode, file, rt6_stats_seq_show);
2875 }
2876 
2877 static const struct file_operations rt6_stats_seq_fops = {
2878 	.owner	 = THIS_MODULE,
2879 	.open	 = rt6_stats_seq_open,
2880 	.read	 = seq_read,
2881 	.llseek	 = seq_lseek,
2882 	.release = single_release_net,
2883 };
2884 #endif	/* CONFIG_PROC_FS */
2885 
2886 #ifdef CONFIG_SYSCTL
2887 
2888 static
ipv6_sysctl_rtcache_flush(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2889 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2890 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2891 {
2892 	struct net *net;
2893 	int delay;
2894 	if (!write)
2895 		return -EINVAL;
2896 
2897 	net = (struct net *)ctl->extra1;
2898 	delay = net->ipv6.sysctl.flush_delay;
2899 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2900 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2901 	return 0;
2902 }
2903 
2904 struct ctl_table ipv6_route_table_template[] = {
2905 	{
2906 		.procname	=	"flush",
2907 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2908 		.maxlen		=	sizeof(int),
2909 		.mode		=	0200,
2910 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2911 	},
2912 	{
2913 		.procname	=	"gc_thresh",
2914 		.data		=	&ip6_dst_ops_template.gc_thresh,
2915 		.maxlen		=	sizeof(int),
2916 		.mode		=	0644,
2917 		.proc_handler	=	proc_dointvec,
2918 	},
2919 	{
2920 		.procname	=	"max_size",
2921 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2922 		.maxlen		=	sizeof(int),
2923 		.mode		=	0644,
2924 		.proc_handler	=	proc_dointvec,
2925 	},
2926 	{
2927 		.procname	=	"gc_min_interval",
2928 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2929 		.maxlen		=	sizeof(int),
2930 		.mode		=	0644,
2931 		.proc_handler	=	proc_dointvec_jiffies,
2932 	},
2933 	{
2934 		.procname	=	"gc_timeout",
2935 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2936 		.maxlen		=	sizeof(int),
2937 		.mode		=	0644,
2938 		.proc_handler	=	proc_dointvec_jiffies,
2939 	},
2940 	{
2941 		.procname	=	"gc_interval",
2942 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2943 		.maxlen		=	sizeof(int),
2944 		.mode		=	0644,
2945 		.proc_handler	=	proc_dointvec_jiffies,
2946 	},
2947 	{
2948 		.procname	=	"gc_elasticity",
2949 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2950 		.maxlen		=	sizeof(int),
2951 		.mode		=	0644,
2952 		.proc_handler	=	proc_dointvec,
2953 	},
2954 	{
2955 		.procname	=	"mtu_expires",
2956 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2957 		.maxlen		=	sizeof(int),
2958 		.mode		=	0644,
2959 		.proc_handler	=	proc_dointvec_jiffies,
2960 	},
2961 	{
2962 		.procname	=	"min_adv_mss",
2963 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2964 		.maxlen		=	sizeof(int),
2965 		.mode		=	0644,
2966 		.proc_handler	=	proc_dointvec,
2967 	},
2968 	{
2969 		.procname	=	"gc_min_interval_ms",
2970 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2971 		.maxlen		=	sizeof(int),
2972 		.mode		=	0644,
2973 		.proc_handler	=	proc_dointvec_ms_jiffies,
2974 	},
2975 	{ }
2976 };
2977 
ipv6_route_sysctl_init(struct net * net)2978 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2979 {
2980 	struct ctl_table *table;
2981 
2982 	table = kmemdup(ipv6_route_table_template,
2983 			sizeof(ipv6_route_table_template),
2984 			GFP_KERNEL);
2985 
2986 	if (table) {
2987 		table[0].data = &net->ipv6.sysctl.flush_delay;
2988 		table[0].extra1 = net;
2989 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2990 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2991 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2992 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2993 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2994 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2995 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2996 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2997 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2998 
2999 		/* Don't export sysctls to unprivileged users */
3000 		if (net->user_ns != &init_user_ns)
3001 			table[0].procname = NULL;
3002 	}
3003 
3004 	return table;
3005 }
3006 #endif
3007 
ip6_route_net_init(struct net * net)3008 static int __net_init ip6_route_net_init(struct net *net)
3009 {
3010 	int ret = -ENOMEM;
3011 
3012 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3013 	       sizeof(net->ipv6.ip6_dst_ops));
3014 
3015 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3016 		goto out_ip6_dst_ops;
3017 
3018 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3019 					   sizeof(*net->ipv6.ip6_null_entry),
3020 					   GFP_KERNEL);
3021 	if (!net->ipv6.ip6_null_entry)
3022 		goto out_ip6_dst_entries;
3023 	net->ipv6.ip6_null_entry->dst.path =
3024 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3025 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3026 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3027 			 ip6_template_metrics, true);
3028 
3029 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3030 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3031 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3032 					       GFP_KERNEL);
3033 	if (!net->ipv6.ip6_prohibit_entry)
3034 		goto out_ip6_null_entry;
3035 	net->ipv6.ip6_prohibit_entry->dst.path =
3036 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3037 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3038 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3039 			 ip6_template_metrics, true);
3040 
3041 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3042 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3043 					       GFP_KERNEL);
3044 	if (!net->ipv6.ip6_blk_hole_entry)
3045 		goto out_ip6_prohibit_entry;
3046 	net->ipv6.ip6_blk_hole_entry->dst.path =
3047 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3048 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3049 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3050 			 ip6_template_metrics, true);
3051 #endif
3052 
3053 	net->ipv6.sysctl.flush_delay = 0;
3054 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3055 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3056 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3057 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3058 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3059 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3060 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3061 
3062 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3063 
3064 	ret = 0;
3065 out:
3066 	return ret;
3067 
3068 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3069 out_ip6_prohibit_entry:
3070 	kfree(net->ipv6.ip6_prohibit_entry);
3071 out_ip6_null_entry:
3072 	kfree(net->ipv6.ip6_null_entry);
3073 #endif
3074 out_ip6_dst_entries:
3075 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3076 out_ip6_dst_ops:
3077 	goto out;
3078 }
3079 
ip6_route_net_exit(struct net * net)3080 static void __net_exit ip6_route_net_exit(struct net *net)
3081 {
3082 	kfree(net->ipv6.ip6_null_entry);
3083 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3084 	kfree(net->ipv6.ip6_prohibit_entry);
3085 	kfree(net->ipv6.ip6_blk_hole_entry);
3086 #endif
3087 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3088 }
3089 
ip6_route_net_init_late(struct net * net)3090 static int __net_init ip6_route_net_init_late(struct net *net)
3091 {
3092 #ifdef CONFIG_PROC_FS
3093 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3094 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3095 #endif
3096 	return 0;
3097 }
3098 
ip6_route_net_exit_late(struct net * net)3099 static void __net_exit ip6_route_net_exit_late(struct net *net)
3100 {
3101 #ifdef CONFIG_PROC_FS
3102 	remove_proc_entry("ipv6_route", net->proc_net);
3103 	remove_proc_entry("rt6_stats", net->proc_net);
3104 #endif
3105 }
3106 
3107 static struct pernet_operations ip6_route_net_ops = {
3108 	.init = ip6_route_net_init,
3109 	.exit = ip6_route_net_exit,
3110 };
3111 
ipv6_inetpeer_init(struct net * net)3112 static int __net_init ipv6_inetpeer_init(struct net *net)
3113 {
3114 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3115 
3116 	if (!bp)
3117 		return -ENOMEM;
3118 	inet_peer_base_init(bp);
3119 	net->ipv6.peers = bp;
3120 	return 0;
3121 }
3122 
ipv6_inetpeer_exit(struct net * net)3123 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3124 {
3125 	struct inet_peer_base *bp = net->ipv6.peers;
3126 
3127 	net->ipv6.peers = NULL;
3128 	inetpeer_invalidate_tree(bp);
3129 	kfree(bp);
3130 }
3131 
3132 static struct pernet_operations ipv6_inetpeer_ops = {
3133 	.init	=	ipv6_inetpeer_init,
3134 	.exit	=	ipv6_inetpeer_exit,
3135 };
3136 
3137 static struct pernet_operations ip6_route_net_late_ops = {
3138 	.init = ip6_route_net_init_late,
3139 	.exit = ip6_route_net_exit_late,
3140 };
3141 
3142 static struct notifier_block ip6_route_dev_notifier = {
3143 	.notifier_call = ip6_route_dev_notify,
3144 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
3145 };
3146 
ip6_route_init_special_entries(void)3147 void __init ip6_route_init_special_entries(void)
3148 {
3149 	/* Registering of the loopback is done before this portion of code,
3150 	 * the loopback reference in rt6_info will not be taken, do it
3151 	 * manually for init_net */
3152 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3153 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3154   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3155 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3156 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3157 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3158 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3159   #endif
3160 }
3161 
ip6_route_init(void)3162 int __init ip6_route_init(void)
3163 {
3164 	int ret;
3165 
3166 	ret = -ENOMEM;
3167 	ip6_dst_ops_template.kmem_cachep =
3168 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3169 				  SLAB_HWCACHE_ALIGN, NULL);
3170 	if (!ip6_dst_ops_template.kmem_cachep)
3171 		goto out;
3172 
3173 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3174 	if (ret)
3175 		goto out_kmem_cache;
3176 
3177 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3178 	if (ret)
3179 		goto out_dst_entries;
3180 
3181 	ret = register_pernet_subsys(&ip6_route_net_ops);
3182 	if (ret)
3183 		goto out_register_inetpeer;
3184 
3185 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3186 
3187 	ret = fib6_init();
3188 	if (ret)
3189 		goto out_register_subsys;
3190 
3191 	ret = xfrm6_init();
3192 	if (ret)
3193 		goto out_fib6_init;
3194 
3195 	ret = fib6_rules_init();
3196 	if (ret)
3197 		goto xfrm6_init;
3198 
3199 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3200 	if (ret)
3201 		goto fib6_rules_init;
3202 
3203 	ret = -ENOBUFS;
3204 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3205 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3206 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3207 		goto out_register_late_subsys;
3208 
3209 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3210 	if (ret)
3211 		goto out_register_late_subsys;
3212 
3213 out:
3214 	return ret;
3215 
3216 out_register_late_subsys:
3217 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3218 fib6_rules_init:
3219 	fib6_rules_cleanup();
3220 xfrm6_init:
3221 	xfrm6_fini();
3222 out_fib6_init:
3223 	fib6_gc_cleanup();
3224 out_register_subsys:
3225 	unregister_pernet_subsys(&ip6_route_net_ops);
3226 out_register_inetpeer:
3227 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3228 out_dst_entries:
3229 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3230 out_kmem_cache:
3231 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3232 	goto out;
3233 }
3234 
ip6_route_cleanup(void)3235 void ip6_route_cleanup(void)
3236 {
3237 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3238 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3239 	fib6_rules_cleanup();
3240 	xfrm6_fini();
3241 	fib6_gc_cleanup();
3242 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3243 	unregister_pernet_subsys(&ip6_route_net_ops);
3244 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3245 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3246 }
3247