• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 
62 #include <asm/uaccess.h>
63 
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67 
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69 				    const struct in6_addr *dest);
70 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void		ip6_dst_destroy(struct dst_entry *);
75 static void		ip6_dst_ifdown(struct dst_entry *,
76 				       struct net_device *dev, int how);
77 static int		 ip6_dst_gc(struct dst_ops *ops);
78 
79 static int		ip6_pkt_discard(struct sk_buff *skb);
80 static int		ip6_pkt_discard_out(struct sk_buff *skb);
81 static void		ip6_link_failure(struct sk_buff *skb);
82 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 					   struct sk_buff *skb, u32 mtu);
84 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85 					struct sk_buff *skb);
86 
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
89 					   const struct in6_addr *prefix, int prefixlen,
90 					   const struct in6_addr *gwaddr, unsigned int pref);
91 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
92 					   const struct in6_addr *prefix, int prefixlen,
93 					   const struct in6_addr *gwaddr);
94 #endif
95 
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97 {
98 	struct rt6_info *rt = (struct rt6_info *) dst;
99 	struct inet_peer *peer;
100 	u32 *p = NULL;
101 
102 	if (!(rt->dst.flags & DST_HOST))
103 		return NULL;
104 
105 	peer = rt6_get_peer_create(rt);
106 	if (peer) {
107 		u32 *old_p = __DST_METRICS_PTR(old);
108 		unsigned long prev, new;
109 
110 		p = peer->metrics;
111 		if (inet_metrics_new(peer))
112 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113 
114 		new = (unsigned long) p;
115 		prev = cmpxchg(&dst->_metrics, old, new);
116 
117 		if (prev != old) {
118 			p = __DST_METRICS_PTR(prev);
119 			if (prev & DST_METRICS_READ_ONLY)
120 				p = NULL;
121 		}
122 	}
123 	return p;
124 }
125 
choose_neigh_daddr(struct rt6_info * rt,struct sk_buff * skb,const void * daddr)126 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 					     struct sk_buff *skb,
128 					     const void *daddr)
129 {
130 	struct in6_addr *p = &rt->rt6i_gateway;
131 
132 	if (!ipv6_addr_any(p))
133 		return (const void *) p;
134 	else if (skb)
135 		return &ipv6_hdr(skb)->daddr;
136 	return daddr;
137 }
138 
ip6_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 					  struct sk_buff *skb,
141 					  const void *daddr)
142 {
143 	struct rt6_info *rt = (struct rt6_info *) dst;
144 	struct neighbour *n;
145 
146 	daddr = choose_neigh_daddr(rt, skb, daddr);
147 	n = __ipv6_neigh_lookup(dst->dev, daddr);
148 	if (n)
149 		return n;
150 	return neigh_create(&nd_tbl, daddr, dst->dev);
151 }
152 
153 static struct dst_ops ip6_dst_ops_template = {
154 	.family			=	AF_INET6,
155 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
156 	.gc			=	ip6_dst_gc,
157 	.gc_thresh		=	1024,
158 	.check			=	ip6_dst_check,
159 	.default_advmss		=	ip6_default_advmss,
160 	.mtu			=	ip6_mtu,
161 	.cow_metrics		=	ipv6_cow_metrics,
162 	.destroy		=	ip6_dst_destroy,
163 	.ifdown			=	ip6_dst_ifdown,
164 	.negative_advice	=	ip6_negative_advice,
165 	.link_failure		=	ip6_link_failure,
166 	.update_pmtu		=	ip6_rt_update_pmtu,
167 	.redirect		=	rt6_do_redirect,
168 	.local_out		=	__ip6_local_out,
169 	.neigh_lookup		=	ip6_neigh_lookup,
170 };
171 
ip6_blackhole_mtu(const struct dst_entry * dst)172 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
173 {
174 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
175 
176 	return mtu ? : dst->dev->mtu;
177 }
178 
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)179 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
180 					 struct sk_buff *skb, u32 mtu)
181 {
182 }
183 
ip6_rt_blackhole_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)184 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
185 				      struct sk_buff *skb)
186 {
187 }
188 
ip6_rt_blackhole_cow_metrics(struct dst_entry * dst,unsigned long old)189 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
190 					 unsigned long old)
191 {
192 	return NULL;
193 }
194 
195 static struct dst_ops ip6_dst_blackhole_ops = {
196 	.family			=	AF_INET6,
197 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
198 	.destroy		=	ip6_dst_destroy,
199 	.check			=	ip6_dst_check,
200 	.mtu			=	ip6_blackhole_mtu,
201 	.default_advmss		=	ip6_default_advmss,
202 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
203 	.redirect		=	ip6_rt_blackhole_redirect,
204 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
205 	.neigh_lookup		=	ip6_neigh_lookup,
206 };
207 
208 static const u32 ip6_template_metrics[RTAX_MAX] = {
209 	[RTAX_HOPLIMIT - 1] = 0,
210 };
211 
212 static const struct rt6_info ip6_null_entry_template = {
213 	.dst = {
214 		.__refcnt	= ATOMIC_INIT(1),
215 		.__use		= 1,
216 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
217 		.error		= -ENETUNREACH,
218 		.input		= ip6_pkt_discard,
219 		.output		= ip6_pkt_discard_out,
220 	},
221 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
222 	.rt6i_protocol  = RTPROT_KERNEL,
223 	.rt6i_metric	= ~(u32) 0,
224 	.rt6i_ref	= ATOMIC_INIT(1),
225 };
226 
227 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
228 
229 static int ip6_pkt_prohibit(struct sk_buff *skb);
230 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
231 
232 static const struct rt6_info ip6_prohibit_entry_template = {
233 	.dst = {
234 		.__refcnt	= ATOMIC_INIT(1),
235 		.__use		= 1,
236 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
237 		.error		= -EACCES,
238 		.input		= ip6_pkt_prohibit,
239 		.output		= ip6_pkt_prohibit_out,
240 	},
241 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
242 	.rt6i_protocol  = RTPROT_KERNEL,
243 	.rt6i_metric	= ~(u32) 0,
244 	.rt6i_ref	= ATOMIC_INIT(1),
245 };
246 
247 static const struct rt6_info ip6_blk_hole_entry_template = {
248 	.dst = {
249 		.__refcnt	= ATOMIC_INIT(1),
250 		.__use		= 1,
251 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
252 		.error		= -EINVAL,
253 		.input		= dst_discard,
254 		.output		= dst_discard,
255 	},
256 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
257 	.rt6i_protocol  = RTPROT_KERNEL,
258 	.rt6i_metric	= ~(u32) 0,
259 	.rt6i_ref	= ATOMIC_INIT(1),
260 };
261 
262 #endif
263 
264 /* allocate dst with ip6_dst_ops */
ip6_dst_alloc(struct net * net,struct net_device * dev,int flags,struct fib6_table * table)265 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
266 					     struct net_device *dev,
267 					     int flags,
268 					     struct fib6_table *table)
269 {
270 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
271 					0, DST_OBSOLETE_FORCE_CHK, flags);
272 
273 	if (rt) {
274 		struct dst_entry *dst = &rt->dst;
275 
276 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
277 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
278 		rt->rt6i_genid = rt_genid(net);
279 		INIT_LIST_HEAD(&rt->rt6i_siblings);
280 		rt->rt6i_nsiblings = 0;
281 	}
282 	return rt;
283 }
284 
ip6_dst_destroy(struct dst_entry * dst)285 static void ip6_dst_destroy(struct dst_entry *dst)
286 {
287 	struct rt6_info *rt = (struct rt6_info *)dst;
288 	struct inet6_dev *idev = rt->rt6i_idev;
289 	struct dst_entry *from = dst->from;
290 
291 	if (!(rt->dst.flags & DST_HOST))
292 		dst_destroy_metrics_generic(dst);
293 
294 	if (idev) {
295 		rt->rt6i_idev = NULL;
296 		in6_dev_put(idev);
297 	}
298 
299 	dst->from = NULL;
300 	dst_release(from);
301 
302 	if (rt6_has_peer(rt)) {
303 		struct inet_peer *peer = rt6_peer_ptr(rt);
304 		inet_putpeer(peer);
305 	}
306 }
307 
rt6_bind_peer(struct rt6_info * rt,int create)308 void rt6_bind_peer(struct rt6_info *rt, int create)
309 {
310 	struct inet_peer_base *base;
311 	struct inet_peer *peer;
312 
313 	base = inetpeer_base_ptr(rt->_rt6i_peer);
314 	if (!base)
315 		return;
316 
317 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
318 	if (peer) {
319 		if (!rt6_set_peer(rt, peer))
320 			inet_putpeer(peer);
321 	}
322 }
323 
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)324 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
325 			   int how)
326 {
327 	struct rt6_info *rt = (struct rt6_info *)dst;
328 	struct inet6_dev *idev = rt->rt6i_idev;
329 	struct net_device *loopback_dev =
330 		dev_net(dev)->loopback_dev;
331 
332 	if (dev != loopback_dev) {
333 		if (idev && idev->dev == dev) {
334 			struct inet6_dev *loopback_idev =
335 				in6_dev_get(loopback_dev);
336 			if (loopback_idev) {
337 				rt->rt6i_idev = loopback_idev;
338 				in6_dev_put(idev);
339 			}
340 		}
341 	}
342 }
343 
rt6_check_expired(const struct rt6_info * rt)344 static bool rt6_check_expired(const struct rt6_info *rt)
345 {
346 	if (rt->rt6i_flags & RTF_EXPIRES) {
347 		if (time_after(jiffies, rt->dst.expires))
348 			return true;
349 	} else if (rt->dst.from) {
350 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
351 	}
352 	return false;
353 }
354 
rt6_need_strict(const struct in6_addr * daddr)355 static bool rt6_need_strict(const struct in6_addr *daddr)
356 {
357 	return ipv6_addr_type(daddr) &
358 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
359 }
360 
361 /* Multipath route selection:
362  *   Hash based function using packet header and flowlabel.
363  * Adapted from fib_info_hashfn()
364  */
rt6_info_hash_nhsfn(unsigned int candidate_count,const struct flowi6 * fl6)365 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
366 			       const struct flowi6 *fl6)
367 {
368 	unsigned int val = fl6->flowi6_proto;
369 
370 	val ^= ipv6_addr_hash(&fl6->daddr);
371 	val ^= ipv6_addr_hash(&fl6->saddr);
372 
373 	/* Work only if this not encapsulated */
374 	switch (fl6->flowi6_proto) {
375 	case IPPROTO_UDP:
376 	case IPPROTO_TCP:
377 	case IPPROTO_SCTP:
378 		val ^= (__force u16)fl6->fl6_sport;
379 		val ^= (__force u16)fl6->fl6_dport;
380 		break;
381 
382 	case IPPROTO_ICMPV6:
383 		val ^= (__force u16)fl6->fl6_icmp_type;
384 		val ^= (__force u16)fl6->fl6_icmp_code;
385 		break;
386 	}
387 	/* RFC6438 recommands to use flowlabel */
388 	val ^= (__force u32)fl6->flowlabel;
389 
390 	/* Perhaps, we need to tune, this function? */
391 	val = val ^ (val >> 7) ^ (val >> 12);
392 	return val % candidate_count;
393 }
394 
rt6_multipath_select(struct rt6_info * match,struct flowi6 * fl6)395 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
396 					     struct flowi6 *fl6)
397 {
398 	struct rt6_info *sibling, *next_sibling;
399 	int route_choosen;
400 
401 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
402 	/* Don't change the route, if route_choosen == 0
403 	 * (siblings does not include ourself)
404 	 */
405 	if (route_choosen)
406 		list_for_each_entry_safe(sibling, next_sibling,
407 				&match->rt6i_siblings, rt6i_siblings) {
408 			route_choosen--;
409 			if (route_choosen == 0) {
410 				match = sibling;
411 				break;
412 			}
413 		}
414 	return match;
415 }
416 
417 /*
418  *	Route lookup. Any table->tb6_lock is implied.
419  */
420 
rt6_device_match(struct net * net,struct rt6_info * rt,const struct in6_addr * saddr,int oif,int flags)421 static inline struct rt6_info *rt6_device_match(struct net *net,
422 						    struct rt6_info *rt,
423 						    const struct in6_addr *saddr,
424 						    int oif,
425 						    int flags)
426 {
427 	struct rt6_info *local = NULL;
428 	struct rt6_info *sprt;
429 
430 	if (!oif && ipv6_addr_any(saddr))
431 		goto out;
432 
433 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
434 		struct net_device *dev = sprt->dst.dev;
435 
436 		if (oif) {
437 			if (dev->ifindex == oif)
438 				return sprt;
439 			if (dev->flags & IFF_LOOPBACK) {
440 				if (!sprt->rt6i_idev ||
441 				    sprt->rt6i_idev->dev->ifindex != oif) {
442 					if (flags & RT6_LOOKUP_F_IFACE && oif)
443 						continue;
444 					if (local && (!oif ||
445 						      local->rt6i_idev->dev->ifindex == oif))
446 						continue;
447 				}
448 				local = sprt;
449 			}
450 		} else {
451 			if (ipv6_chk_addr(net, saddr, dev,
452 					  flags & RT6_LOOKUP_F_IFACE))
453 				return sprt;
454 		}
455 	}
456 
457 	if (oif) {
458 		if (local)
459 			return local;
460 
461 		if (flags & RT6_LOOKUP_F_IFACE)
462 			return net->ipv6.ip6_null_entry;
463 	}
464 out:
465 	return rt;
466 }
467 
468 #ifdef CONFIG_IPV6_ROUTER_PREF
rt6_probe(struct rt6_info * rt)469 static void rt6_probe(struct rt6_info *rt)
470 {
471 	struct neighbour *neigh;
472 	/*
473 	 * Okay, this does not seem to be appropriate
474 	 * for now, however, we need to check if it
475 	 * is really so; aka Router Reachability Probing.
476 	 *
477 	 * Router Reachability Probe MUST be rate-limited
478 	 * to no more than one per minute.
479 	 */
480 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
481 		return;
482 	rcu_read_lock_bh();
483 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
484 	if (neigh) {
485 		write_lock(&neigh->lock);
486 		if (neigh->nud_state & NUD_VALID)
487 			goto out;
488 	}
489 
490 	if (!neigh ||
491 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
492 		struct in6_addr mcaddr;
493 		struct in6_addr *target;
494 
495 		if (neigh) {
496 			neigh->updated = jiffies;
497 			write_unlock(&neigh->lock);
498 		}
499 
500 		target = (struct in6_addr *)&rt->rt6i_gateway;
501 		addrconf_addr_solict_mult(target, &mcaddr);
502 		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
503 	} else {
504 out:
505 		write_unlock(&neigh->lock);
506 	}
507 	rcu_read_unlock_bh();
508 }
509 #else
rt6_probe(struct rt6_info * rt)510 static inline void rt6_probe(struct rt6_info *rt)
511 {
512 }
513 #endif
514 
515 /*
516  * Default Router Selection (RFC 2461 6.3.6)
517  */
rt6_check_dev(struct rt6_info * rt,int oif)518 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
519 {
520 	struct net_device *dev = rt->dst.dev;
521 	if (!oif || dev->ifindex == oif)
522 		return 2;
523 	if ((dev->flags & IFF_LOOPBACK) &&
524 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
525 		return 1;
526 	return 0;
527 }
528 
rt6_check_neigh(struct rt6_info * rt)529 static inline bool rt6_check_neigh(struct rt6_info *rt)
530 {
531 	struct neighbour *neigh;
532 	bool ret = false;
533 
534 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
535 	    !(rt->rt6i_flags & RTF_GATEWAY))
536 		return true;
537 
538 	rcu_read_lock_bh();
539 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
540 	if (neigh) {
541 		read_lock(&neigh->lock);
542 		if (neigh->nud_state & NUD_VALID)
543 			ret = true;
544 #ifdef CONFIG_IPV6_ROUTER_PREF
545 		else if (!(neigh->nud_state & NUD_FAILED))
546 			ret = true;
547 #endif
548 		read_unlock(&neigh->lock);
549 	}
550 	rcu_read_unlock_bh();
551 
552 	return ret;
553 }
554 
rt6_score_route(struct rt6_info * rt,int oif,int strict)555 static int rt6_score_route(struct rt6_info *rt, int oif,
556 			   int strict)
557 {
558 	int m;
559 
560 	m = rt6_check_dev(rt, oif);
561 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
562 		return -1;
563 #ifdef CONFIG_IPV6_ROUTER_PREF
564 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
565 #endif
566 	if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
567 		return -1;
568 	return m;
569 }
570 
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match)571 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
572 				   int *mpri, struct rt6_info *match)
573 {
574 	int m;
575 
576 	if (rt6_check_expired(rt))
577 		goto out;
578 
579 	m = rt6_score_route(rt, oif, strict);
580 	if (m < 0)
581 		goto out;
582 
583 	if (m > *mpri) {
584 		if (strict & RT6_LOOKUP_F_REACHABLE)
585 			rt6_probe(match);
586 		*mpri = m;
587 		match = rt;
588 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
589 		rt6_probe(rt);
590 	}
591 
592 out:
593 	return match;
594 }
595 
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict)596 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
597 				     struct rt6_info *rr_head,
598 				     u32 metric, int oif, int strict)
599 {
600 	struct rt6_info *rt, *match;
601 	int mpri = -1;
602 
603 	match = NULL;
604 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
605 	     rt = rt->dst.rt6_next)
606 		match = find_match(rt, oif, strict, &mpri, match);
607 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
608 	     rt = rt->dst.rt6_next)
609 		match = find_match(rt, oif, strict, &mpri, match);
610 
611 	return match;
612 }
613 
rt6_select(struct fib6_node * fn,int oif,int strict)614 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
615 {
616 	struct rt6_info *match, *rt0;
617 	struct net *net;
618 
619 	rt0 = fn->rr_ptr;
620 	if (!rt0)
621 		fn->rr_ptr = rt0 = fn->leaf;
622 
623 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
624 
625 	if (!match &&
626 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
627 		struct rt6_info *next = rt0->dst.rt6_next;
628 
629 		/* no entries matched; do round-robin */
630 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
631 			next = fn->leaf;
632 
633 		if (next != rt0)
634 			fn->rr_ptr = next;
635 	}
636 
637 	net = dev_net(rt0->dst.dev);
638 	return match ? match : net->ipv6.ip6_null_entry;
639 }
640 
641 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)642 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
643 		  const struct in6_addr *gwaddr)
644 {
645 	struct route_info *rinfo = (struct route_info *) opt;
646 	struct in6_addr prefix_buf, *prefix;
647 	unsigned int pref;
648 	unsigned long lifetime;
649 	struct rt6_info *rt;
650 
651 	if (len < sizeof(struct route_info)) {
652 		return -EINVAL;
653 	}
654 
655 	/* Sanity check for prefix_len and length */
656 	if (rinfo->length > 3) {
657 		return -EINVAL;
658 	} else if (rinfo->prefix_len > 128) {
659 		return -EINVAL;
660 	} else if (rinfo->prefix_len > 64) {
661 		if (rinfo->length < 2) {
662 			return -EINVAL;
663 		}
664 	} else if (rinfo->prefix_len > 0) {
665 		if (rinfo->length < 1) {
666 			return -EINVAL;
667 		}
668 	}
669 
670 	pref = rinfo->route_pref;
671 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
672 		return -EINVAL;
673 
674 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
675 
676 	if (rinfo->length == 3)
677 		prefix = (struct in6_addr *)rinfo->prefix;
678 	else {
679 		/* this function is safe */
680 		ipv6_addr_prefix(&prefix_buf,
681 				 (struct in6_addr *)rinfo->prefix,
682 				 rinfo->prefix_len);
683 		prefix = &prefix_buf;
684 	}
685 
686 	if (rinfo->prefix_len == 0)
687 		rt = rt6_get_dflt_router(gwaddr, dev);
688 	else
689 		rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr);
690 
691 	if (rt && !lifetime) {
692 		ip6_del_rt(rt);
693 		rt = NULL;
694 	}
695 
696 	if (!rt && lifetime)
697 		rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
698 	else if (rt)
699 		rt->rt6i_flags = RTF_ROUTEINFO |
700 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
701 
702 	if (rt) {
703 		if (!addrconf_finite_timeout(lifetime))
704 			rt6_clean_expires(rt);
705 		else
706 			rt6_set_expires(rt, jiffies + HZ * lifetime);
707 
708 		ip6_rt_put(rt);
709 	}
710 	return 0;
711 }
712 #endif
713 
714 #define BACKTRACK(__net, saddr)			\
715 do { \
716 	if (rt == __net->ipv6.ip6_null_entry) {	\
717 		struct fib6_node *pn; \
718 		while (1) { \
719 			if (fn->fn_flags & RTN_TL_ROOT) \
720 				goto out; \
721 			pn = fn->parent; \
722 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
723 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
724 			else \
725 				fn = pn; \
726 			if (fn->fn_flags & RTN_RTINFO) \
727 				goto restart; \
728 		} \
729 	} \
730 } while (0)
731 
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)732 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
733 					     struct fib6_table *table,
734 					     struct flowi6 *fl6, int flags)
735 {
736 	struct fib6_node *fn;
737 	struct rt6_info *rt;
738 
739 	read_lock_bh(&table->tb6_lock);
740 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
741 restart:
742 	rt = fn->leaf;
743 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
744 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
745 		rt = rt6_multipath_select(rt, fl6);
746 	BACKTRACK(net, &fl6->saddr);
747 out:
748 	dst_use(&rt->dst, jiffies);
749 	read_unlock_bh(&table->tb6_lock);
750 	return rt;
751 
752 }
753 
ip6_route_lookup(struct net * net,struct flowi6 * fl6,int flags)754 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
755 				    int flags)
756 {
757 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
758 }
759 EXPORT_SYMBOL_GPL(ip6_route_lookup);
760 
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)761 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
762 			    const struct in6_addr *saddr, int oif, int strict)
763 {
764 	struct flowi6 fl6 = {
765 		.flowi6_oif = oif,
766 		.daddr = *daddr,
767 	};
768 	struct dst_entry *dst;
769 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
770 
771 	if (saddr) {
772 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
773 		flags |= RT6_LOOKUP_F_HAS_SADDR;
774 	}
775 
776 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
777 	if (dst->error == 0)
778 		return (struct rt6_info *) dst;
779 
780 	dst_release(dst);
781 
782 	return NULL;
783 }
784 
785 EXPORT_SYMBOL(rt6_lookup);
786 
787 /* ip6_ins_rt is called with FREE table->tb6_lock.
788    It takes new route entry, the addition fails by any reason the
789    route is freed. In any case, if caller does not hold it, it may
790    be destroyed.
791  */
792 
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info)793 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
794 {
795 	int err;
796 	struct fib6_table *table;
797 
798 	table = rt->rt6i_table;
799 	write_lock_bh(&table->tb6_lock);
800 	err = fib6_add(&table->tb6_root, rt, info);
801 	write_unlock_bh(&table->tb6_lock);
802 
803 	return err;
804 }
805 
ip6_ins_rt(struct rt6_info * rt)806 int ip6_ins_rt(struct rt6_info *rt)
807 {
808 	struct nl_info info = {
809 		.nl_net = dev_net(rt->dst.dev),
810 	};
811 	return __ip6_ins_rt(rt, &info);
812 }
813 
rt6_alloc_cow(struct rt6_info * ort,const struct in6_addr * daddr,const struct in6_addr * saddr)814 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
815 				      const struct in6_addr *daddr,
816 				      const struct in6_addr *saddr)
817 {
818 	struct rt6_info *rt;
819 
820 	/*
821 	 *	Clone the route.
822 	 */
823 
824 	rt = ip6_rt_copy(ort, daddr);
825 
826 	if (rt) {
827 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
828 			if (ort->rt6i_dst.plen != 128 &&
829 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
830 				rt->rt6i_flags |= RTF_ANYCAST;
831 			rt->rt6i_gateway = *daddr;
832 		}
833 
834 		rt->rt6i_flags |= RTF_CACHE;
835 
836 #ifdef CONFIG_IPV6_SUBTREES
837 		if (rt->rt6i_src.plen && saddr) {
838 			rt->rt6i_src.addr = *saddr;
839 			rt->rt6i_src.plen = 128;
840 		}
841 #endif
842 	}
843 
844 	return rt;
845 }
846 
rt6_alloc_clone(struct rt6_info * ort,const struct in6_addr * daddr)847 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
848 					const struct in6_addr *daddr)
849 {
850 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
851 
852 	if (rt)
853 		rt->rt6i_flags |= RTF_CACHE;
854 	return rt;
855 }
856 
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags)857 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
858 				      struct flowi6 *fl6, int flags)
859 {
860 	struct fib6_node *fn;
861 	struct rt6_info *rt, *nrt;
862 	int strict = 0;
863 	int attempts = 3;
864 	int err;
865 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
866 
867 	strict |= flags & RT6_LOOKUP_F_IFACE;
868 
869 relookup:
870 	read_lock_bh(&table->tb6_lock);
871 
872 restart_2:
873 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
874 
875 restart:
876 	rt = rt6_select(fn, oif, strict | reachable);
877 	if (rt->rt6i_nsiblings && oif == 0)
878 		rt = rt6_multipath_select(rt, fl6);
879 	BACKTRACK(net, &fl6->saddr);
880 	if (rt == net->ipv6.ip6_null_entry ||
881 	    rt->rt6i_flags & RTF_CACHE)
882 		goto out;
883 
884 	dst_hold(&rt->dst);
885 	read_unlock_bh(&table->tb6_lock);
886 
887 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
888 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
889 	else if (!(rt->dst.flags & DST_HOST))
890 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
891 	else
892 		goto out2;
893 
894 	ip6_rt_put(rt);
895 	rt = nrt ? : net->ipv6.ip6_null_entry;
896 
897 	dst_hold(&rt->dst);
898 	if (nrt) {
899 		err = ip6_ins_rt(nrt);
900 		if (!err)
901 			goto out2;
902 	}
903 
904 	if (--attempts <= 0)
905 		goto out2;
906 
907 	/*
908 	 * Race condition! In the gap, when table->tb6_lock was
909 	 * released someone could insert this route.  Relookup.
910 	 */
911 	ip6_rt_put(rt);
912 	goto relookup;
913 
914 out:
915 	if (reachable) {
916 		reachable = 0;
917 		goto restart_2;
918 	}
919 	dst_hold(&rt->dst);
920 	read_unlock_bh(&table->tb6_lock);
921 out2:
922 	rt->dst.lastuse = jiffies;
923 	rt->dst.__use++;
924 
925 	return rt;
926 }
927 
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)928 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
929 					    struct flowi6 *fl6, int flags)
930 {
931 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
932 }
933 
ip6_route_input_lookup(struct net * net,struct net_device * dev,struct flowi6 * fl6,int flags)934 static struct dst_entry *ip6_route_input_lookup(struct net *net,
935 						struct net_device *dev,
936 						struct flowi6 *fl6, int flags)
937 {
938 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
939 		flags |= RT6_LOOKUP_F_IFACE;
940 
941 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
942 }
943 
ip6_route_input(struct sk_buff * skb)944 void ip6_route_input(struct sk_buff *skb)
945 {
946 	const struct ipv6hdr *iph = ipv6_hdr(skb);
947 	struct net *net = dev_net(skb->dev);
948 	int flags = RT6_LOOKUP_F_HAS_SADDR;
949 	struct flowi6 fl6 = {
950 		.flowi6_iif = skb->dev->ifindex,
951 		.daddr = iph->daddr,
952 		.saddr = iph->saddr,
953 		.flowlabel = ip6_flowinfo(iph),
954 		.flowi6_mark = skb->mark,
955 		.flowi6_proto = iph->nexthdr,
956 	};
957 
958 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
959 }
960 
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)961 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
962 					     struct flowi6 *fl6, int flags)
963 {
964 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
965 }
966 
ip6_route_output(struct net * net,const struct sock * sk,struct flowi6 * fl6)967 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
968 				    struct flowi6 *fl6)
969 {
970 	int flags = 0;
971 
972 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
973 
974 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
975 		flags |= RT6_LOOKUP_F_IFACE;
976 
977 	if (!ipv6_addr_any(&fl6->saddr))
978 		flags |= RT6_LOOKUP_F_HAS_SADDR;
979 	else if (sk)
980 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
981 
982 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
983 }
984 
985 EXPORT_SYMBOL(ip6_route_output);
986 
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)987 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
988 {
989 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
990 	struct dst_entry *new = NULL;
991 
992 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
993 	if (rt) {
994 		new = &rt->dst;
995 
996 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
997 		rt6_init_peer(rt, net->ipv6.peers);
998 
999 		new->__use = 1;
1000 		new->input = dst_discard;
1001 		new->output = dst_discard;
1002 
1003 		if (dst_metrics_read_only(&ort->dst))
1004 			new->_metrics = ort->dst._metrics;
1005 		else
1006 			dst_copy_metrics(new, &ort->dst);
1007 		rt->rt6i_idev = ort->rt6i_idev;
1008 		if (rt->rt6i_idev)
1009 			in6_dev_hold(rt->rt6i_idev);
1010 
1011 		rt->rt6i_gateway = ort->rt6i_gateway;
1012 		rt->rt6i_flags = ort->rt6i_flags;
1013 		rt->rt6i_metric = 0;
1014 
1015 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1016 #ifdef CONFIG_IPV6_SUBTREES
1017 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1018 #endif
1019 
1020 		dst_free(new);
1021 	}
1022 
1023 	dst_release(dst_orig);
1024 	return new ? new : ERR_PTR(-ENOMEM);
1025 }
1026 
1027 /*
1028  *	Destination cache support functions
1029  */
1030 
ip6_dst_check(struct dst_entry * dst,u32 cookie)1031 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1032 {
1033 	struct rt6_info *rt;
1034 
1035 	rt = (struct rt6_info *) dst;
1036 
1037 	/* All IPV6 dsts are created with ->obsolete set to the value
1038 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1039 	 * into this function always.
1040 	 */
1041 	if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1042 		return NULL;
1043 
1044 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1045 		return dst;
1046 
1047 	return NULL;
1048 }
1049 
ip6_negative_advice(struct dst_entry * dst)1050 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1051 {
1052 	struct rt6_info *rt = (struct rt6_info *) dst;
1053 
1054 	if (rt) {
1055 		if (rt->rt6i_flags & RTF_CACHE) {
1056 			if (rt6_check_expired(rt)) {
1057 				ip6_del_rt(rt);
1058 				dst = NULL;
1059 			}
1060 		} else {
1061 			dst_release(dst);
1062 			dst = NULL;
1063 		}
1064 	}
1065 	return dst;
1066 }
1067 
ip6_link_failure(struct sk_buff * skb)1068 static void ip6_link_failure(struct sk_buff *skb)
1069 {
1070 	struct rt6_info *rt;
1071 
1072 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1073 
1074 	rt = (struct rt6_info *) skb_dst(skb);
1075 	if (rt) {
1076 		if (rt->rt6i_flags & RTF_CACHE)
1077 			rt6_update_expires(rt, 0);
1078 		else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1079 			rt->rt6i_node->fn_sernum = -1;
1080 	}
1081 }
1082 
ip6_rt_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)1083 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1084 			       struct sk_buff *skb, u32 mtu)
1085 {
1086 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1087 
1088 	dst_confirm(dst);
1089 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1090 		struct net *net = dev_net(dst->dev);
1091 
1092 		rt6->rt6i_flags |= RTF_MODIFIED;
1093 		if (mtu < IPV6_MIN_MTU) {
1094 			u32 features = dst_metric(dst, RTAX_FEATURES);
1095 			mtu = IPV6_MIN_MTU;
1096 			features |= RTAX_FEATURE_ALLFRAG;
1097 			dst_metric_set(dst, RTAX_FEATURES, features);
1098 		}
1099 		dst_metric_set(dst, RTAX_MTU, mtu);
1100 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1101 	}
1102 }
1103 
ip6_update_pmtu(struct sk_buff * skb,struct net * net,__be32 mtu,int oif,u32 mark,kuid_t uid)1104 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1105 		     int oif, u32 mark, kuid_t uid)
1106 {
1107 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1108 	struct dst_entry *dst;
1109 	struct flowi6 fl6;
1110 
1111 	memset(&fl6, 0, sizeof(fl6));
1112 	fl6.flowi6_oif = oif;
1113 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1114 	fl6.flowi6_flags = 0;
1115 	fl6.daddr = iph->daddr;
1116 	fl6.saddr = iph->saddr;
1117 	fl6.flowlabel = ip6_flowinfo(iph);
1118 	fl6.flowi6_uid = uid;
1119 
1120 	dst = ip6_route_output(net, NULL, &fl6);
1121 	if (!dst->error)
1122 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1123 	dst_release(dst);
1124 }
1125 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1126 
ip6_sk_update_pmtu(struct sk_buff * skb,struct sock * sk,__be32 mtu)1127 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1128 {
1129 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1130 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1131 }
1132 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1133 
ip6_redirect(struct sk_buff * skb,struct net * net,int oif,u32 mark,kuid_t uid)1134 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1135 		  kuid_t uid)
1136 {
1137 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1138 	struct dst_entry *dst;
1139 	struct flowi6 fl6;
1140 
1141 	memset(&fl6, 0, sizeof(fl6));
1142 	fl6.flowi6_oif = oif;
1143 	fl6.flowi6_mark = mark;
1144 	fl6.flowi6_flags = 0;
1145 	fl6.daddr = iph->daddr;
1146 	fl6.saddr = iph->saddr;
1147 	fl6.flowlabel = ip6_flowinfo(iph);
1148 	fl6.flowi6_uid = uid;
1149 
1150 	dst = ip6_route_output(net, NULL, &fl6);
1151 	if (!dst->error)
1152 		rt6_do_redirect(dst, NULL, skb);
1153 	dst_release(dst);
1154 }
1155 EXPORT_SYMBOL_GPL(ip6_redirect);
1156 
ip6_sk_redirect(struct sk_buff * skb,struct sock * sk)1157 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1158 {
1159 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1160 		     sk->sk_uid);
1161 }
1162 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1163 
ip6_default_advmss(const struct dst_entry * dst)1164 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1165 {
1166 	struct net_device *dev = dst->dev;
1167 	unsigned int mtu = dst_mtu(dst);
1168 	struct net *net = dev_net(dev);
1169 
1170 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1171 
1172 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1173 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1174 
1175 	/*
1176 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1177 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1178 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1179 	 * rely only on pmtu discovery"
1180 	 */
1181 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1182 		mtu = IPV6_MAXPLEN;
1183 	return mtu;
1184 }
1185 
ip6_mtu(const struct dst_entry * dst)1186 static unsigned int ip6_mtu(const struct dst_entry *dst)
1187 {
1188 	struct inet6_dev *idev;
1189 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1190 
1191 	if (mtu)
1192 		return mtu;
1193 
1194 	mtu = IPV6_MIN_MTU;
1195 
1196 	rcu_read_lock();
1197 	idev = __in6_dev_get(dst->dev);
1198 	if (idev)
1199 		mtu = idev->cnf.mtu6;
1200 	rcu_read_unlock();
1201 
1202 	return mtu;
1203 }
1204 
1205 static struct dst_entry *icmp6_dst_gc_list;
1206 static DEFINE_SPINLOCK(icmp6_dst_lock);
1207 
icmp6_dst_alloc(struct net_device * dev,struct flowi6 * fl6)1208 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1209 				  struct flowi6 *fl6)
1210 {
1211 	struct dst_entry *dst;
1212 	struct rt6_info *rt;
1213 	struct inet6_dev *idev = in6_dev_get(dev);
1214 	struct net *net = dev_net(dev);
1215 
1216 	if (unlikely(!idev))
1217 		return ERR_PTR(-ENODEV);
1218 
1219 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1220 	if (unlikely(!rt)) {
1221 		in6_dev_put(idev);
1222 		dst = ERR_PTR(-ENOMEM);
1223 		goto out;
1224 	}
1225 
1226 	rt->dst.flags |= DST_HOST;
1227 	rt->dst.output  = ip6_output;
1228 	atomic_set(&rt->dst.__refcnt, 1);
1229 	rt->rt6i_dst.addr = fl6->daddr;
1230 	rt->rt6i_dst.plen = 128;
1231 	rt->rt6i_idev     = idev;
1232 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1233 
1234 	spin_lock_bh(&icmp6_dst_lock);
1235 	rt->dst.next = icmp6_dst_gc_list;
1236 	icmp6_dst_gc_list = &rt->dst;
1237 	spin_unlock_bh(&icmp6_dst_lock);
1238 
1239 	fib6_force_start_gc(net);
1240 
1241 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1242 
1243 out:
1244 	return dst;
1245 }
1246 
icmp6_dst_gc(void)1247 int icmp6_dst_gc(void)
1248 {
1249 	struct dst_entry *dst, **pprev;
1250 	int more = 0;
1251 
1252 	spin_lock_bh(&icmp6_dst_lock);
1253 	pprev = &icmp6_dst_gc_list;
1254 
1255 	while ((dst = *pprev) != NULL) {
1256 		if (!atomic_read(&dst->__refcnt)) {
1257 			*pprev = dst->next;
1258 			dst_free(dst);
1259 		} else {
1260 			pprev = &dst->next;
1261 			++more;
1262 		}
1263 	}
1264 
1265 	spin_unlock_bh(&icmp6_dst_lock);
1266 
1267 	return more;
1268 }
1269 
icmp6_clean_all(int (* func)(struct rt6_info * rt,void * arg),void * arg)1270 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1271 			    void *arg)
1272 {
1273 	struct dst_entry *dst, **pprev;
1274 
1275 	spin_lock_bh(&icmp6_dst_lock);
1276 	pprev = &icmp6_dst_gc_list;
1277 	while ((dst = *pprev) != NULL) {
1278 		struct rt6_info *rt = (struct rt6_info *) dst;
1279 		if (func(rt, arg)) {
1280 			*pprev = dst->next;
1281 			dst_free(dst);
1282 		} else {
1283 			pprev = &dst->next;
1284 		}
1285 	}
1286 	spin_unlock_bh(&icmp6_dst_lock);
1287 }
1288 
ip6_dst_gc(struct dst_ops * ops)1289 static int ip6_dst_gc(struct dst_ops *ops)
1290 {
1291 	unsigned long now = jiffies;
1292 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1293 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1294 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1295 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1296 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1297 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1298 	int entries;
1299 
1300 	entries = dst_entries_get_fast(ops);
1301 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1302 	    entries <= rt_max_size)
1303 		goto out;
1304 
1305 	net->ipv6.ip6_rt_gc_expire++;
1306 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1307 	net->ipv6.ip6_rt_last_gc = now;
1308 	entries = dst_entries_get_slow(ops);
1309 	if (entries < ops->gc_thresh)
1310 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1311 out:
1312 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1313 	return entries > rt_max_size;
1314 }
1315 
ip6_dst_hoplimit(struct dst_entry * dst)1316 int ip6_dst_hoplimit(struct dst_entry *dst)
1317 {
1318 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1319 	if (hoplimit == 0) {
1320 		struct net_device *dev = dst->dev;
1321 		struct inet6_dev *idev;
1322 
1323 		rcu_read_lock();
1324 		idev = __in6_dev_get(dev);
1325 		if (idev)
1326 			hoplimit = idev->cnf.hop_limit;
1327 		else
1328 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1329 		rcu_read_unlock();
1330 	}
1331 	return hoplimit;
1332 }
1333 EXPORT_SYMBOL(ip6_dst_hoplimit);
1334 
1335 /*
1336  *
1337  */
1338 
ip6_route_add(struct fib6_config * cfg)1339 int ip6_route_add(struct fib6_config *cfg)
1340 {
1341 	int err;
1342 	struct net *net = cfg->fc_nlinfo.nl_net;
1343 	struct rt6_info *rt = NULL;
1344 	struct net_device *dev = NULL;
1345 	struct inet6_dev *idev = NULL;
1346 	struct fib6_table *table;
1347 	int addr_type;
1348 
1349 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1350 		return -EINVAL;
1351 #ifndef CONFIG_IPV6_SUBTREES
1352 	if (cfg->fc_src_len)
1353 		return -EINVAL;
1354 #endif
1355 	if (cfg->fc_ifindex) {
1356 		err = -ENODEV;
1357 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1358 		if (!dev)
1359 			goto out;
1360 		idev = in6_dev_get(dev);
1361 		if (!idev)
1362 			goto out;
1363 	}
1364 
1365 	if (cfg->fc_metric == 0)
1366 		cfg->fc_metric = IP6_RT_PRIO_USER;
1367 
1368 	err = -ENOBUFS;
1369 	if (cfg->fc_nlinfo.nlh &&
1370 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1371 		table = fib6_get_table(net, cfg->fc_table);
1372 		if (!table) {
1373 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1374 			table = fib6_new_table(net, cfg->fc_table);
1375 		}
1376 	} else {
1377 		table = fib6_new_table(net, cfg->fc_table);
1378 	}
1379 
1380 	if (!table)
1381 		goto out;
1382 
1383 	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1384 
1385 	if (!rt) {
1386 		err = -ENOMEM;
1387 		goto out;
1388 	}
1389 
1390 	if (cfg->fc_flags & RTF_EXPIRES)
1391 		rt6_set_expires(rt, jiffies +
1392 				clock_t_to_jiffies(cfg->fc_expires));
1393 	else
1394 		rt6_clean_expires(rt);
1395 
1396 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1397 		cfg->fc_protocol = RTPROT_BOOT;
1398 	rt->rt6i_protocol = cfg->fc_protocol;
1399 
1400 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1401 
1402 	if (addr_type & IPV6_ADDR_MULTICAST)
1403 		rt->dst.input = ip6_mc_input;
1404 	else if (cfg->fc_flags & RTF_LOCAL)
1405 		rt->dst.input = ip6_input;
1406 	else
1407 		rt->dst.input = ip6_forward;
1408 
1409 	rt->dst.output = ip6_output;
1410 
1411 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1412 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1413 	if (rt->rt6i_dst.plen == 128)
1414 	       rt->dst.flags |= DST_HOST;
1415 
1416 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1417 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1418 		if (!metrics) {
1419 			err = -ENOMEM;
1420 			goto out;
1421 		}
1422 		dst_init_metrics(&rt->dst, metrics, 0);
1423 	}
1424 #ifdef CONFIG_IPV6_SUBTREES
1425 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1426 	rt->rt6i_src.plen = cfg->fc_src_len;
1427 #endif
1428 
1429 	rt->rt6i_metric = cfg->fc_metric;
1430 
1431 	/* We cannot add true routes via loopback here,
1432 	   they would result in kernel looping; promote them to reject routes
1433 	 */
1434 	if ((cfg->fc_flags & RTF_REJECT) ||
1435 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1436 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1437 	     !(cfg->fc_flags & RTF_LOCAL))) {
1438 		/* hold loopback dev/idev if we haven't done so. */
1439 		if (dev != net->loopback_dev) {
1440 			if (dev) {
1441 				dev_put(dev);
1442 				in6_dev_put(idev);
1443 			}
1444 			dev = net->loopback_dev;
1445 			dev_hold(dev);
1446 			idev = in6_dev_get(dev);
1447 			if (!idev) {
1448 				err = -ENODEV;
1449 				goto out;
1450 			}
1451 		}
1452 		rt->dst.output = ip6_pkt_discard_out;
1453 		rt->dst.input = ip6_pkt_discard;
1454 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1455 		switch (cfg->fc_type) {
1456 		case RTN_BLACKHOLE:
1457 			rt->dst.error = -EINVAL;
1458 			break;
1459 		case RTN_PROHIBIT:
1460 			rt->dst.error = -EACCES;
1461 			break;
1462 		case RTN_THROW:
1463 			rt->dst.error = -EAGAIN;
1464 			break;
1465 		default:
1466 			rt->dst.error = -ENETUNREACH;
1467 			break;
1468 		}
1469 		goto install_route;
1470 	}
1471 
1472 	if (cfg->fc_flags & RTF_GATEWAY) {
1473 		const struct in6_addr *gw_addr;
1474 		int gwa_type;
1475 
1476 		gw_addr = &cfg->fc_gateway;
1477 		rt->rt6i_gateway = *gw_addr;
1478 		gwa_type = ipv6_addr_type(gw_addr);
1479 
1480 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1481 			struct rt6_info *grt;
1482 
1483 			/* IPv6 strictly inhibits using not link-local
1484 			   addresses as nexthop address.
1485 			   Otherwise, router will not able to send redirects.
1486 			   It is very good, but in some (rare!) circumstances
1487 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1488 			   some exceptions. --ANK
1489 			 */
1490 			err = -EINVAL;
1491 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1492 				goto out;
1493 
1494 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1495 
1496 			err = -EHOSTUNREACH;
1497 			if (!grt)
1498 				goto out;
1499 			if (dev) {
1500 				if (dev != grt->dst.dev) {
1501 					ip6_rt_put(grt);
1502 					goto out;
1503 				}
1504 			} else {
1505 				dev = grt->dst.dev;
1506 				idev = grt->rt6i_idev;
1507 				dev_hold(dev);
1508 				in6_dev_hold(grt->rt6i_idev);
1509 			}
1510 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1511 				err = 0;
1512 			ip6_rt_put(grt);
1513 
1514 			if (err)
1515 				goto out;
1516 		}
1517 		err = -EINVAL;
1518 		if (!dev || (dev->flags & IFF_LOOPBACK))
1519 			goto out;
1520 	}
1521 
1522 	err = -ENODEV;
1523 	if (!dev)
1524 		goto out;
1525 
1526 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1527 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1528 			err = -EINVAL;
1529 			goto out;
1530 		}
1531 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1532 		rt->rt6i_prefsrc.plen = 128;
1533 	} else
1534 		rt->rt6i_prefsrc.plen = 0;
1535 
1536 	rt->rt6i_flags = cfg->fc_flags;
1537 
1538 install_route:
1539 	if (cfg->fc_mx) {
1540 		struct nlattr *nla;
1541 		int remaining;
1542 
1543 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1544 			int type = nla_type(nla);
1545 
1546 			if (type) {
1547 				if (type > RTAX_MAX) {
1548 					err = -EINVAL;
1549 					goto out;
1550 				}
1551 
1552 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1553 			}
1554 		}
1555 	}
1556 
1557 	rt->dst.dev = dev;
1558 	rt->rt6i_idev = idev;
1559 	rt->rt6i_table = table;
1560 
1561 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1562 
1563 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1564 
1565 out:
1566 	if (dev)
1567 		dev_put(dev);
1568 	if (idev)
1569 		in6_dev_put(idev);
1570 	if (rt)
1571 		dst_free(&rt->dst);
1572 	return err;
1573 }
1574 
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)1575 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1576 {
1577 	int err;
1578 	struct fib6_table *table;
1579 	struct net *net = dev_net(rt->dst.dev);
1580 
1581 	if (rt == net->ipv6.ip6_null_entry) {
1582 		err = -ENOENT;
1583 		goto out;
1584 	}
1585 
1586 	table = rt->rt6i_table;
1587 	write_lock_bh(&table->tb6_lock);
1588 	err = fib6_del(rt, info);
1589 	write_unlock_bh(&table->tb6_lock);
1590 
1591 out:
1592 	ip6_rt_put(rt);
1593 	return err;
1594 }
1595 
ip6_del_rt(struct rt6_info * rt)1596 int ip6_del_rt(struct rt6_info *rt)
1597 {
1598 	struct nl_info info = {
1599 		.nl_net = dev_net(rt->dst.dev),
1600 	};
1601 	return __ip6_del_rt(rt, &info);
1602 }
1603 
ip6_route_del(struct fib6_config * cfg)1604 static int ip6_route_del(struct fib6_config *cfg)
1605 {
1606 	struct fib6_table *table;
1607 	struct fib6_node *fn;
1608 	struct rt6_info *rt;
1609 	int err = -ESRCH;
1610 
1611 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1612 	if (!table)
1613 		return err;
1614 
1615 	read_lock_bh(&table->tb6_lock);
1616 
1617 	fn = fib6_locate(&table->tb6_root,
1618 			 &cfg->fc_dst, cfg->fc_dst_len,
1619 			 &cfg->fc_src, cfg->fc_src_len);
1620 
1621 	if (fn) {
1622 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1623 			if (cfg->fc_ifindex &&
1624 			    (!rt->dst.dev ||
1625 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1626 				continue;
1627 			if (cfg->fc_flags & RTF_GATEWAY &&
1628 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1629 				continue;
1630 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1631 				continue;
1632 			dst_hold(&rt->dst);
1633 			read_unlock_bh(&table->tb6_lock);
1634 
1635 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1636 		}
1637 	}
1638 	read_unlock_bh(&table->tb6_lock);
1639 
1640 	return err;
1641 }
1642 
rt6_do_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)1643 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1644 {
1645 	struct net *net = dev_net(skb->dev);
1646 	struct netevent_redirect netevent;
1647 	struct rt6_info *rt, *nrt = NULL;
1648 	struct ndisc_options ndopts;
1649 	struct inet6_dev *in6_dev;
1650 	struct neighbour *neigh;
1651 	struct rd_msg *msg;
1652 	int optlen, on_link;
1653 	u8 *lladdr;
1654 
1655 	optlen = skb->tail - skb->transport_header;
1656 	optlen -= sizeof(*msg);
1657 
1658 	if (optlen < 0) {
1659 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1660 		return;
1661 	}
1662 
1663 	msg = (struct rd_msg *)icmp6_hdr(skb);
1664 
1665 	if (ipv6_addr_is_multicast(&msg->dest)) {
1666 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1667 		return;
1668 	}
1669 
1670 	on_link = 0;
1671 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1672 		on_link = 1;
1673 	} else if (ipv6_addr_type(&msg->target) !=
1674 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1675 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1676 		return;
1677 	}
1678 
1679 	in6_dev = __in6_dev_get(skb->dev);
1680 	if (!in6_dev)
1681 		return;
1682 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1683 		return;
1684 
1685 	/* RFC2461 8.1:
1686 	 *	The IP source address of the Redirect MUST be the same as the current
1687 	 *	first-hop router for the specified ICMP Destination Address.
1688 	 */
1689 
1690 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1691 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1692 		return;
1693 	}
1694 
1695 	lladdr = NULL;
1696 	if (ndopts.nd_opts_tgt_lladdr) {
1697 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1698 					     skb->dev);
1699 		if (!lladdr) {
1700 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1701 			return;
1702 		}
1703 	}
1704 
1705 	rt = (struct rt6_info *) dst;
1706 	if (rt == net->ipv6.ip6_null_entry) {
1707 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1708 		return;
1709 	}
1710 
1711 	/* Redirect received -> path was valid.
1712 	 * Look, redirects are sent only in response to data packets,
1713 	 * so that this nexthop apparently is reachable. --ANK
1714 	 */
1715 	dst_confirm(&rt->dst);
1716 
1717 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1718 	if (!neigh)
1719 		return;
1720 
1721 	/*
1722 	 *	We have finally decided to accept it.
1723 	 */
1724 
1725 	neigh_update(neigh, lladdr, NUD_STALE,
1726 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1727 		     NEIGH_UPDATE_F_OVERRIDE|
1728 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1729 				     NEIGH_UPDATE_F_ISROUTER))
1730 		     );
1731 
1732 	nrt = ip6_rt_copy(rt, &msg->dest);
1733 	if (!nrt)
1734 		goto out;
1735 
1736 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1737 	if (on_link)
1738 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1739 
1740 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1741 
1742 	if (ip6_ins_rt(nrt))
1743 		goto out;
1744 
1745 	netevent.old = &rt->dst;
1746 	netevent.new = &nrt->dst;
1747 	netevent.daddr = &msg->dest;
1748 	netevent.neigh = neigh;
1749 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1750 
1751 	if (rt->rt6i_flags & RTF_CACHE) {
1752 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1753 		ip6_del_rt(rt);
1754 	}
1755 
1756 out:
1757 	neigh_release(neigh);
1758 }
1759 
1760 /*
1761  *	Misc support functions
1762  */
1763 
ip6_rt_copy(struct rt6_info * ort,const struct in6_addr * dest)1764 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1765 				    const struct in6_addr *dest)
1766 {
1767 	struct net *net = dev_net(ort->dst.dev);
1768 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1769 					    ort->rt6i_table);
1770 
1771 	if (rt) {
1772 		rt->dst.input = ort->dst.input;
1773 		rt->dst.output = ort->dst.output;
1774 		rt->dst.flags |= DST_HOST;
1775 
1776 		rt->rt6i_dst.addr = *dest;
1777 		rt->rt6i_dst.plen = 128;
1778 		dst_copy_metrics(&rt->dst, &ort->dst);
1779 		rt->dst.error = ort->dst.error;
1780 		rt->rt6i_idev = ort->rt6i_idev;
1781 		if (rt->rt6i_idev)
1782 			in6_dev_hold(rt->rt6i_idev);
1783 		rt->dst.lastuse = jiffies;
1784 
1785 		rt->rt6i_gateway = ort->rt6i_gateway;
1786 		rt->rt6i_flags = ort->rt6i_flags;
1787 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1788 		    (RTF_DEFAULT | RTF_ADDRCONF))
1789 			rt6_set_from(rt, ort);
1790 		rt->rt6i_metric = 0;
1791 
1792 #ifdef CONFIG_IPV6_SUBTREES
1793 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1794 #endif
1795 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1796 		rt->rt6i_table = ort->rt6i_table;
1797 	}
1798 	return rt;
1799 }
1800 
1801 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net_device * dev,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr)1802 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
1803 					   const struct in6_addr *prefix, int prefixlen,
1804 					   const struct in6_addr *gwaddr)
1805 {
1806 	struct fib6_node *fn;
1807 	struct rt6_info *rt = NULL;
1808 	struct fib6_table *table;
1809 
1810 	table = fib6_get_table(dev_net(dev),
1811 			       addrconf_rt_table(dev, RT6_TABLE_INFO));
1812 	if (!table)
1813 		return NULL;
1814 
1815 	read_lock_bh(&table->tb6_lock);
1816 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1817 	if (!fn)
1818 		goto out;
1819 
1820 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1821 		if (rt->dst.dev->ifindex != dev->ifindex)
1822 			continue;
1823 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1824 			continue;
1825 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1826 			continue;
1827 		dst_hold(&rt->dst);
1828 		break;
1829 	}
1830 out:
1831 	read_unlock_bh(&table->tb6_lock);
1832 	return rt;
1833 }
1834 
rt6_add_route_info(struct net_device * dev,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,unsigned int pref)1835 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
1836 					   const struct in6_addr *prefix, int prefixlen,
1837 					   const struct in6_addr *gwaddr, unsigned int pref)
1838 {
1839 	struct fib6_config cfg = {
1840 		.fc_table	= addrconf_rt_table(dev, RT6_TABLE_INFO),
1841 		.fc_metric	= IP6_RT_PRIO_USER,
1842 		.fc_ifindex	= dev->ifindex,
1843 		.fc_dst_len	= prefixlen,
1844 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1845 				  RTF_UP | RTF_PREF(pref),
1846 		.fc_nlinfo.portid = 0,
1847 		.fc_nlinfo.nlh = NULL,
1848 		.fc_nlinfo.nl_net = dev_net(dev),
1849 	};
1850 
1851 	cfg.fc_dst = *prefix;
1852 	cfg.fc_gateway = *gwaddr;
1853 
1854 	/* We should treat it as a default route if prefix length is 0. */
1855 	if (!prefixlen)
1856 		cfg.fc_flags |= RTF_DEFAULT;
1857 
1858 	ip6_route_add(&cfg);
1859 
1860 	return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
1861 }
1862 #endif
1863 
rt6_get_dflt_router(const struct in6_addr * addr,struct net_device * dev)1864 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1865 {
1866 	struct rt6_info *rt;
1867 	struct fib6_table *table;
1868 
1869 	table = fib6_get_table(dev_net(dev),
1870 			       addrconf_rt_table(dev, RT6_TABLE_MAIN));
1871 	if (!table)
1872 		return NULL;
1873 
1874 	read_lock_bh(&table->tb6_lock);
1875 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1876 		if (dev == rt->dst.dev &&
1877 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1878 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1879 			break;
1880 	}
1881 	if (rt)
1882 		dst_hold(&rt->dst);
1883 	read_unlock_bh(&table->tb6_lock);
1884 	return rt;
1885 }
1886 
rt6_add_dflt_router(const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)1887 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1888 				     struct net_device *dev,
1889 				     unsigned int pref)
1890 {
1891 	struct fib6_config cfg = {
1892 		.fc_table	= addrconf_rt_table(dev, RT6_TABLE_DFLT),
1893 		.fc_metric	= IP6_RT_PRIO_USER,
1894 		.fc_ifindex	= dev->ifindex,
1895 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1896 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1897 		.fc_nlinfo.portid = 0,
1898 		.fc_nlinfo.nlh = NULL,
1899 		.fc_nlinfo.nl_net = dev_net(dev),
1900 	};
1901 
1902 	cfg.fc_gateway = *gwaddr;
1903 
1904 	ip6_route_add(&cfg);
1905 
1906 	return rt6_get_dflt_router(gwaddr, dev);
1907 }
1908 
1909 
rt6_addrconf_purge(struct rt6_info * rt,void * arg)1910 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
1911 	if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1912 	    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
1913 		return -1;
1914 	return 0;
1915 }
1916 
rt6_purge_dflt_routers(struct net * net)1917 void rt6_purge_dflt_routers(struct net *net)
1918 {
1919 	fib6_clean_all(net, rt6_addrconf_purge, 0, NULL);
1920 }
1921 
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)1922 static void rtmsg_to_fib6_config(struct net *net,
1923 				 struct in6_rtmsg *rtmsg,
1924 				 struct fib6_config *cfg)
1925 {
1926 	memset(cfg, 0, sizeof(*cfg));
1927 
1928 	cfg->fc_table = RT6_TABLE_MAIN;
1929 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1930 	cfg->fc_metric = rtmsg->rtmsg_metric;
1931 	cfg->fc_expires = rtmsg->rtmsg_info;
1932 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1933 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1934 	cfg->fc_flags = rtmsg->rtmsg_flags;
1935 
1936 	cfg->fc_nlinfo.nl_net = net;
1937 
1938 	cfg->fc_dst = rtmsg->rtmsg_dst;
1939 	cfg->fc_src = rtmsg->rtmsg_src;
1940 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
1941 }
1942 
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)1943 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1944 {
1945 	struct fib6_config cfg;
1946 	struct in6_rtmsg rtmsg;
1947 	int err;
1948 
1949 	switch(cmd) {
1950 	case SIOCADDRT:		/* Add a route */
1951 	case SIOCDELRT:		/* Delete a route */
1952 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1953 			return -EPERM;
1954 		err = copy_from_user(&rtmsg, arg,
1955 				     sizeof(struct in6_rtmsg));
1956 		if (err)
1957 			return -EFAULT;
1958 
1959 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1960 
1961 		rtnl_lock();
1962 		switch (cmd) {
1963 		case SIOCADDRT:
1964 			err = ip6_route_add(&cfg);
1965 			break;
1966 		case SIOCDELRT:
1967 			err = ip6_route_del(&cfg);
1968 			break;
1969 		default:
1970 			err = -EINVAL;
1971 		}
1972 		rtnl_unlock();
1973 
1974 		return err;
1975 	}
1976 
1977 	return -EINVAL;
1978 }
1979 
1980 /*
1981  *	Drop the packet on the floor
1982  */
1983 
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)1984 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1985 {
1986 	int type;
1987 	struct dst_entry *dst = skb_dst(skb);
1988 	switch (ipstats_mib_noroutes) {
1989 	case IPSTATS_MIB_INNOROUTES:
1990 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1991 		if (type == IPV6_ADDR_ANY) {
1992 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1993 				      IPSTATS_MIB_INADDRERRORS);
1994 			break;
1995 		}
1996 		/* FALLTHROUGH */
1997 	case IPSTATS_MIB_OUTNOROUTES:
1998 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1999 			      ipstats_mib_noroutes);
2000 		break;
2001 	}
2002 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2003 	kfree_skb(skb);
2004 	return 0;
2005 }
2006 
ip6_pkt_discard(struct sk_buff * skb)2007 static int ip6_pkt_discard(struct sk_buff *skb)
2008 {
2009 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2010 }
2011 
ip6_pkt_discard_out(struct sk_buff * skb)2012 static int ip6_pkt_discard_out(struct sk_buff *skb)
2013 {
2014 	skb->dev = skb_dst(skb)->dev;
2015 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2016 }
2017 
2018 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2019 
ip6_pkt_prohibit(struct sk_buff * skb)2020 static int ip6_pkt_prohibit(struct sk_buff *skb)
2021 {
2022 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2023 }
2024 
ip6_pkt_prohibit_out(struct sk_buff * skb)2025 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2026 {
2027 	skb->dev = skb_dst(skb)->dev;
2028 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2029 }
2030 
2031 #endif
2032 
2033 /*
2034  *	Allocate a dst for local (unicast / anycast) address.
2035  */
2036 
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,bool anycast)2037 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2038 				    const struct in6_addr *addr,
2039 				    bool anycast)
2040 {
2041 	struct net *net = dev_net(idev->dev);
2042 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2043 
2044 	if (!rt) {
2045 		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2046 		return ERR_PTR(-ENOMEM);
2047 	}
2048 
2049 	in6_dev_hold(idev);
2050 
2051 	rt->dst.flags |= DST_HOST;
2052 	rt->dst.input = ip6_input;
2053 	rt->dst.output = ip6_output;
2054 	rt->rt6i_idev = idev;
2055 
2056 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2057 	if (anycast)
2058 		rt->rt6i_flags |= RTF_ANYCAST;
2059 	else
2060 		rt->rt6i_flags |= RTF_LOCAL;
2061 
2062 	rt->rt6i_dst.addr = *addr;
2063 	rt->rt6i_dst.plen = 128;
2064 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2065 
2066 	atomic_set(&rt->dst.__refcnt, 1);
2067 
2068 	return rt;
2069 }
2070 
ip6_route_get_saddr(struct net * net,struct rt6_info * rt,const struct in6_addr * daddr,unsigned int prefs,struct in6_addr * saddr)2071 int ip6_route_get_saddr(struct net *net,
2072 			struct rt6_info *rt,
2073 			const struct in6_addr *daddr,
2074 			unsigned int prefs,
2075 			struct in6_addr *saddr)
2076 {
2077 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2078 	int err = 0;
2079 	if (rt->rt6i_prefsrc.plen)
2080 		*saddr = rt->rt6i_prefsrc.addr;
2081 	else
2082 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2083 					 daddr, prefs, saddr);
2084 	return err;
2085 }
2086 
2087 /* remove deleted ip from prefsrc entries */
2088 struct arg_dev_net_ip {
2089 	struct net_device *dev;
2090 	struct net *net;
2091 	struct in6_addr *addr;
2092 };
2093 
fib6_remove_prefsrc(struct rt6_info * rt,void * arg)2094 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2095 {
2096 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2097 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2098 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2099 
2100 	if (((void *)rt->dst.dev == dev || !dev) &&
2101 	    rt != net->ipv6.ip6_null_entry &&
2102 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2103 		/* remove prefsrc entry */
2104 		rt->rt6i_prefsrc.plen = 0;
2105 	}
2106 	return 0;
2107 }
2108 
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)2109 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2110 {
2111 	struct net *net = dev_net(ifp->idev->dev);
2112 	struct arg_dev_net_ip adni = {
2113 		.dev = ifp->idev->dev,
2114 		.net = net,
2115 		.addr = &ifp->addr,
2116 	};
2117 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2118 }
2119 
2120 struct arg_dev_net {
2121 	struct net_device *dev;
2122 	struct net *net;
2123 };
2124 
fib6_ifdown(struct rt6_info * rt,void * arg)2125 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2126 {
2127 	const struct arg_dev_net *adn = arg;
2128 	const struct net_device *dev = adn->dev;
2129 
2130 	if ((rt->dst.dev == dev || !dev) &&
2131 	    rt != adn->net->ipv6.ip6_null_entry)
2132 		return -1;
2133 
2134 	return 0;
2135 }
2136 
rt6_ifdown(struct net * net,struct net_device * dev)2137 void rt6_ifdown(struct net *net, struct net_device *dev)
2138 {
2139 	struct arg_dev_net adn = {
2140 		.dev = dev,
2141 		.net = net,
2142 	};
2143 
2144 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2145 	icmp6_clean_all(fib6_ifdown, &adn);
2146 }
2147 
2148 struct rt6_mtu_change_arg {
2149 	struct net_device *dev;
2150 	unsigned int mtu;
2151 };
2152 
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2153 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2154 {
2155 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2156 	struct inet6_dev *idev;
2157 
2158 	/* In IPv6 pmtu discovery is not optional,
2159 	   so that RTAX_MTU lock cannot disable it.
2160 	   We still use this lock to block changes
2161 	   caused by addrconf/ndisc.
2162 	*/
2163 
2164 	idev = __in6_dev_get(arg->dev);
2165 	if (!idev)
2166 		return 0;
2167 
2168 	/* For administrative MTU increase, there is no way to discover
2169 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2170 	   Since RFC 1981 doesn't include administrative MTU increase
2171 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2172 	 */
2173 	/*
2174 	   If new MTU is less than route PMTU, this new MTU will be the
2175 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2176 	   decreases; if new MTU is greater than route PMTU, and the
2177 	   old MTU is the lowest MTU in the path, update the route PMTU
2178 	   to reflect the increase. In this case if the other nodes' MTU
2179 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2180 	   PMTU discouvery.
2181 	 */
2182 	if (rt->dst.dev == arg->dev &&
2183 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2184 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2185 	     (dst_mtu(&rt->dst) < arg->mtu &&
2186 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2187 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2188 	}
2189 	return 0;
2190 }
2191 
rt6_mtu_change(struct net_device * dev,unsigned int mtu)2192 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2193 {
2194 	struct rt6_mtu_change_arg arg = {
2195 		.dev = dev,
2196 		.mtu = mtu,
2197 	};
2198 
2199 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2200 }
2201 
2202 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2203 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2204 	[RTA_OIF]               = { .type = NLA_U32 },
2205 	[RTA_IIF]		= { .type = NLA_U32 },
2206 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2207 	[RTA_METRICS]           = { .type = NLA_NESTED },
2208 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2209 	[RTA_UID]		= { .type = NLA_U32 },
2210 };
2211 
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg)2212 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2213 			      struct fib6_config *cfg)
2214 {
2215 	struct rtmsg *rtm;
2216 	struct nlattr *tb[RTA_MAX+1];
2217 	int err;
2218 
2219 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2220 	if (err < 0)
2221 		goto errout;
2222 
2223 	err = -EINVAL;
2224 	rtm = nlmsg_data(nlh);
2225 	memset(cfg, 0, sizeof(*cfg));
2226 
2227 	cfg->fc_table = rtm->rtm_table;
2228 	cfg->fc_dst_len = rtm->rtm_dst_len;
2229 	cfg->fc_src_len = rtm->rtm_src_len;
2230 	cfg->fc_flags = RTF_UP;
2231 	cfg->fc_protocol = rtm->rtm_protocol;
2232 	cfg->fc_type = rtm->rtm_type;
2233 
2234 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2235 	    rtm->rtm_type == RTN_BLACKHOLE ||
2236 	    rtm->rtm_type == RTN_PROHIBIT ||
2237 	    rtm->rtm_type == RTN_THROW)
2238 		cfg->fc_flags |= RTF_REJECT;
2239 
2240 	if (rtm->rtm_type == RTN_LOCAL)
2241 		cfg->fc_flags |= RTF_LOCAL;
2242 
2243 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2244 	cfg->fc_nlinfo.nlh = nlh;
2245 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2246 
2247 	if (tb[RTA_GATEWAY]) {
2248 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2249 		cfg->fc_flags |= RTF_GATEWAY;
2250 	}
2251 
2252 	if (tb[RTA_DST]) {
2253 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2254 
2255 		if (nla_len(tb[RTA_DST]) < plen)
2256 			goto errout;
2257 
2258 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2259 	}
2260 
2261 	if (tb[RTA_SRC]) {
2262 		int plen = (rtm->rtm_src_len + 7) >> 3;
2263 
2264 		if (nla_len(tb[RTA_SRC]) < plen)
2265 			goto errout;
2266 
2267 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2268 	}
2269 
2270 	if (tb[RTA_PREFSRC])
2271 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2272 
2273 	if (tb[RTA_OIF])
2274 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2275 
2276 	if (tb[RTA_PRIORITY])
2277 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2278 
2279 	if (tb[RTA_METRICS]) {
2280 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2281 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2282 	}
2283 
2284 	if (tb[RTA_TABLE])
2285 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2286 
2287 	if (tb[RTA_MULTIPATH]) {
2288 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2289 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2290 	}
2291 
2292 	err = 0;
2293 errout:
2294 	return err;
2295 }
2296 
ip6_route_multipath(struct fib6_config * cfg,int add)2297 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2298 {
2299 	struct fib6_config r_cfg;
2300 	struct rtnexthop *rtnh;
2301 	int remaining;
2302 	int attrlen;
2303 	int err = 0, last_err = 0;
2304 
2305 beginning:
2306 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2307 	remaining = cfg->fc_mp_len;
2308 
2309 	/* Parse a Multipath Entry */
2310 	while (rtnh_ok(rtnh, remaining)) {
2311 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2312 		if (rtnh->rtnh_ifindex)
2313 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2314 
2315 		attrlen = rtnh_attrlen(rtnh);
2316 		if (attrlen > 0) {
2317 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2318 
2319 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2320 			if (nla) {
2321 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2322 				r_cfg.fc_flags |= RTF_GATEWAY;
2323 			}
2324 		}
2325 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2326 		if (err) {
2327 			last_err = err;
2328 			/* If we are trying to remove a route, do not stop the
2329 			 * loop when ip6_route_del() fails (because next hop is
2330 			 * already gone), we should try to remove all next hops.
2331 			 */
2332 			if (add) {
2333 				/* If add fails, we should try to delete all
2334 				 * next hops that have been already added.
2335 				 */
2336 				add = 0;
2337 				goto beginning;
2338 			}
2339 		}
2340 		/* Because each route is added like a single route we remove
2341 		 * this flag after the first nexthop (if there is a collision,
2342 		 * we have already fail to add the first nexthop:
2343 		 * fib6_add_rt2node() has reject it).
2344 		 */
2345 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2346 		rtnh = rtnh_next(rtnh, &remaining);
2347 	}
2348 
2349 	return last_err;
2350 }
2351 
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh)2352 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2353 {
2354 	struct fib6_config cfg;
2355 	int err;
2356 
2357 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2358 	if (err < 0)
2359 		return err;
2360 
2361 	if (cfg.fc_mp)
2362 		return ip6_route_multipath(&cfg, 0);
2363 	else
2364 		return ip6_route_del(&cfg);
2365 }
2366 
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh)2367 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2368 {
2369 	struct fib6_config cfg;
2370 	int err;
2371 
2372 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2373 	if (err < 0)
2374 		return err;
2375 
2376 	if (cfg.fc_mp)
2377 		return ip6_route_multipath(&cfg, 1);
2378 	else
2379 		return ip6_route_add(&cfg);
2380 }
2381 
rt6_nlmsg_size(void)2382 static inline size_t rt6_nlmsg_size(void)
2383 {
2384 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2385 	       + nla_total_size(16) /* RTA_SRC */
2386 	       + nla_total_size(16) /* RTA_DST */
2387 	       + nla_total_size(16) /* RTA_GATEWAY */
2388 	       + nla_total_size(16) /* RTA_PREFSRC */
2389 	       + nla_total_size(4) /* RTA_TABLE */
2390 	       + nla_total_size(4) /* RTA_IIF */
2391 	       + nla_total_size(4) /* RTA_OIF */
2392 	       + nla_total_size(4) /* RTA_PRIORITY */
2393 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2394 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2395 }
2396 
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 portid,u32 seq,int prefix,int nowait,unsigned int flags)2397 static int rt6_fill_node(struct net *net,
2398 			 struct sk_buff *skb, struct rt6_info *rt,
2399 			 struct in6_addr *dst, struct in6_addr *src,
2400 			 int iif, int type, u32 portid, u32 seq,
2401 			 int prefix, int nowait, unsigned int flags)
2402 {
2403 	struct rtmsg *rtm;
2404 	struct nlmsghdr *nlh;
2405 	long expires;
2406 	u32 table;
2407 
2408 	if (prefix) {	/* user wants prefix routes only */
2409 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2410 			/* success since this is not a prefix route */
2411 			return 1;
2412 		}
2413 	}
2414 
2415 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2416 	if (!nlh)
2417 		return -EMSGSIZE;
2418 
2419 	rtm = nlmsg_data(nlh);
2420 	rtm->rtm_family = AF_INET6;
2421 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2422 	rtm->rtm_src_len = rt->rt6i_src.plen;
2423 	rtm->rtm_tos = 0;
2424 	if (rt->rt6i_table)
2425 		table = rt->rt6i_table->tb6_id;
2426 	else
2427 		table = RT6_TABLE_UNSPEC;
2428 	rtm->rtm_table = table;
2429 	if (nla_put_u32(skb, RTA_TABLE, table))
2430 		goto nla_put_failure;
2431 	if (rt->rt6i_flags & RTF_REJECT) {
2432 		switch (rt->dst.error) {
2433 		case -EINVAL:
2434 			rtm->rtm_type = RTN_BLACKHOLE;
2435 			break;
2436 		case -EACCES:
2437 			rtm->rtm_type = RTN_PROHIBIT;
2438 			break;
2439 		case -EAGAIN:
2440 			rtm->rtm_type = RTN_THROW;
2441 			break;
2442 		default:
2443 			rtm->rtm_type = RTN_UNREACHABLE;
2444 			break;
2445 		}
2446 	}
2447 	else if (rt->rt6i_flags & RTF_LOCAL)
2448 		rtm->rtm_type = RTN_LOCAL;
2449 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2450 		rtm->rtm_type = RTN_LOCAL;
2451 	else
2452 		rtm->rtm_type = RTN_UNICAST;
2453 	rtm->rtm_flags = 0;
2454 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2455 	rtm->rtm_protocol = rt->rt6i_protocol;
2456 	if (rt->rt6i_flags & RTF_DYNAMIC)
2457 		rtm->rtm_protocol = RTPROT_REDIRECT;
2458 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2459 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2460 			rtm->rtm_protocol = RTPROT_RA;
2461 		else
2462 			rtm->rtm_protocol = RTPROT_KERNEL;
2463 	}
2464 
2465 	if (rt->rt6i_flags & RTF_CACHE)
2466 		rtm->rtm_flags |= RTM_F_CLONED;
2467 
2468 	if (dst) {
2469 		if (nla_put(skb, RTA_DST, 16, dst))
2470 			goto nla_put_failure;
2471 		rtm->rtm_dst_len = 128;
2472 	} else if (rtm->rtm_dst_len)
2473 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2474 			goto nla_put_failure;
2475 #ifdef CONFIG_IPV6_SUBTREES
2476 	if (src) {
2477 		if (nla_put(skb, RTA_SRC, 16, src))
2478 			goto nla_put_failure;
2479 		rtm->rtm_src_len = 128;
2480 	} else if (rtm->rtm_src_len &&
2481 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2482 		goto nla_put_failure;
2483 #endif
2484 	if (iif) {
2485 #ifdef CONFIG_IPV6_MROUTE
2486 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2487 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2488 			if (err <= 0) {
2489 				if (!nowait) {
2490 					if (err == 0)
2491 						return 0;
2492 					goto nla_put_failure;
2493 				} else {
2494 					if (err == -EMSGSIZE)
2495 						goto nla_put_failure;
2496 				}
2497 			}
2498 		} else
2499 #endif
2500 			if (nla_put_u32(skb, RTA_IIF, iif))
2501 				goto nla_put_failure;
2502 	} else if (dst) {
2503 		struct in6_addr saddr_buf;
2504 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2505 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2506 			goto nla_put_failure;
2507 	}
2508 
2509 	if (rt->rt6i_prefsrc.plen) {
2510 		struct in6_addr saddr_buf;
2511 		saddr_buf = rt->rt6i_prefsrc.addr;
2512 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2513 			goto nla_put_failure;
2514 	}
2515 
2516 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2517 		goto nla_put_failure;
2518 
2519 	if (rt->rt6i_flags & RTF_GATEWAY) {
2520 		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2521 			goto nla_put_failure;
2522 	}
2523 
2524 	if (rt->dst.dev &&
2525 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2526 		goto nla_put_failure;
2527 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2528 		goto nla_put_failure;
2529 
2530 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2531 
2532 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2533 		goto nla_put_failure;
2534 
2535 	return nlmsg_end(skb, nlh);
2536 
2537 nla_put_failure:
2538 	nlmsg_cancel(skb, nlh);
2539 	return -EMSGSIZE;
2540 }
2541 
rt6_dump_route(struct rt6_info * rt,void * p_arg)2542 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2543 {
2544 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2545 	int prefix;
2546 
2547 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2548 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2549 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2550 	} else
2551 		prefix = 0;
2552 
2553 	return rt6_fill_node(arg->net,
2554 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2555 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2556 		     prefix, 0, NLM_F_MULTI);
2557 }
2558 
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh)2559 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2560 {
2561 	struct net *net = sock_net(in_skb->sk);
2562 	struct nlattr *tb[RTA_MAX+1];
2563 	struct rt6_info *rt;
2564 	struct sk_buff *skb;
2565 	struct rtmsg *rtm;
2566 	struct flowi6 fl6;
2567 	int err, iif = 0, oif = 0;
2568 
2569 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2570 	if (err < 0)
2571 		goto errout;
2572 
2573 	err = -EINVAL;
2574 	memset(&fl6, 0, sizeof(fl6));
2575 
2576 	if (tb[RTA_SRC]) {
2577 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2578 			goto errout;
2579 
2580 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2581 	}
2582 
2583 	if (tb[RTA_DST]) {
2584 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2585 			goto errout;
2586 
2587 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2588 	}
2589 
2590 	if (tb[RTA_IIF])
2591 		iif = nla_get_u32(tb[RTA_IIF]);
2592 
2593 	if (tb[RTA_OIF])
2594 		oif = nla_get_u32(tb[RTA_OIF]);
2595 
2596 	if (tb[RTA_MARK])
2597 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2598 
2599 	if (tb[RTA_UID])
2600 		fl6.flowi6_uid = make_kuid(current_user_ns(),
2601 					   nla_get_u32(tb[RTA_UID]));
2602 	else
2603 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
2604 
2605 	if (iif) {
2606 		struct net_device *dev;
2607 		int flags = 0;
2608 
2609 		dev = __dev_get_by_index(net, iif);
2610 		if (!dev) {
2611 			err = -ENODEV;
2612 			goto errout;
2613 		}
2614 
2615 		fl6.flowi6_iif = iif;
2616 
2617 		if (!ipv6_addr_any(&fl6.saddr))
2618 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2619 
2620 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2621 							       flags);
2622 	} else {
2623 		fl6.flowi6_oif = oif;
2624 
2625 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2626 	}
2627 
2628 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2629 	if (!skb) {
2630 		ip6_rt_put(rt);
2631 		err = -ENOBUFS;
2632 		goto errout;
2633 	}
2634 
2635 	/* Reserve room for dummy headers, this skb can pass
2636 	   through good chunk of routing engine.
2637 	 */
2638 	skb_reset_mac_header(skb);
2639 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2640 
2641 	skb_dst_set(skb, &rt->dst);
2642 
2643 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2644 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2645 			    nlh->nlmsg_seq, 0, 0, 0);
2646 	if (err < 0) {
2647 		kfree_skb(skb);
2648 		goto errout;
2649 	}
2650 
2651 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2652 errout:
2653 	return err;
2654 }
2655 
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info)2656 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2657 {
2658 	struct sk_buff *skb;
2659 	struct net *net = info->nl_net;
2660 	u32 seq;
2661 	int err;
2662 
2663 	err = -ENOBUFS;
2664 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2665 
2666 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2667 	if (!skb)
2668 		goto errout;
2669 
2670 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2671 				event, info->portid, seq, 0, 0, 0);
2672 	if (err < 0) {
2673 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2674 		WARN_ON(err == -EMSGSIZE);
2675 		kfree_skb(skb);
2676 		goto errout;
2677 	}
2678 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2679 		    info->nlh, gfp_any());
2680 	return;
2681 errout:
2682 	if (err < 0)
2683 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2684 }
2685 
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * data)2686 static int ip6_route_dev_notify(struct notifier_block *this,
2687 				unsigned long event, void *data)
2688 {
2689 	struct net_device *dev = (struct net_device *)data;
2690 	struct net *net = dev_net(dev);
2691 
2692 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2693 		net->ipv6.ip6_null_entry->dst.dev = dev;
2694 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2695 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2696 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2697 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2698 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2699 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2700 #endif
2701 	}
2702 
2703 	return NOTIFY_OK;
2704 }
2705 
2706 /*
2707  *	/proc
2708  */
2709 
2710 #ifdef CONFIG_PROC_FS
2711 
2712 struct rt6_proc_arg
2713 {
2714 	char *buffer;
2715 	int offset;
2716 	int length;
2717 	int skip;
2718 	int len;
2719 };
2720 
rt6_info_route(struct rt6_info * rt,void * p_arg)2721 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2722 {
2723 	struct seq_file *m = p_arg;
2724 
2725 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2726 
2727 #ifdef CONFIG_IPV6_SUBTREES
2728 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2729 #else
2730 	seq_puts(m, "00000000000000000000000000000000 00 ");
2731 #endif
2732 	if (rt->rt6i_flags & RTF_GATEWAY) {
2733 		seq_printf(m, "%pi6", &rt->rt6i_gateway);
2734 	} else {
2735 		seq_puts(m, "00000000000000000000000000000000");
2736 	}
2737 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2738 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2739 		   rt->dst.__use, rt->rt6i_flags,
2740 		   rt->dst.dev ? rt->dst.dev->name : "");
2741 	return 0;
2742 }
2743 
ipv6_route_show(struct seq_file * m,void * v)2744 static int ipv6_route_show(struct seq_file *m, void *v)
2745 {
2746 	struct net *net = (struct net *)m->private;
2747 	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2748 	return 0;
2749 }
2750 
ipv6_route_open(struct inode * inode,struct file * file)2751 static int ipv6_route_open(struct inode *inode, struct file *file)
2752 {
2753 	return single_open_net(inode, file, ipv6_route_show);
2754 }
2755 
2756 static const struct file_operations ipv6_route_proc_fops = {
2757 	.owner		= THIS_MODULE,
2758 	.open		= ipv6_route_open,
2759 	.read		= seq_read,
2760 	.llseek		= seq_lseek,
2761 	.release	= single_release_net,
2762 };
2763 
rt6_stats_seq_show(struct seq_file * seq,void * v)2764 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2765 {
2766 	struct net *net = (struct net *)seq->private;
2767 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2768 		   net->ipv6.rt6_stats->fib_nodes,
2769 		   net->ipv6.rt6_stats->fib_route_nodes,
2770 		   net->ipv6.rt6_stats->fib_rt_alloc,
2771 		   net->ipv6.rt6_stats->fib_rt_entries,
2772 		   net->ipv6.rt6_stats->fib_rt_cache,
2773 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2774 		   net->ipv6.rt6_stats->fib_discarded_routes);
2775 
2776 	return 0;
2777 }
2778 
rt6_stats_seq_open(struct inode * inode,struct file * file)2779 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2780 {
2781 	return single_open_net(inode, file, rt6_stats_seq_show);
2782 }
2783 
2784 static const struct file_operations rt6_stats_seq_fops = {
2785 	.owner	 = THIS_MODULE,
2786 	.open	 = rt6_stats_seq_open,
2787 	.read	 = seq_read,
2788 	.llseek	 = seq_lseek,
2789 	.release = single_release_net,
2790 };
2791 #endif	/* CONFIG_PROC_FS */
2792 
2793 #ifdef CONFIG_SYSCTL
2794 
2795 static
ipv6_sysctl_rtcache_flush(ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2796 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2797 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2798 {
2799 	struct net *net;
2800 	int delay;
2801 	if (!write)
2802 		return -EINVAL;
2803 
2804 	net = (struct net *)ctl->extra1;
2805 	delay = net->ipv6.sysctl.flush_delay;
2806 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2807 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2808 	return 0;
2809 }
2810 
2811 ctl_table ipv6_route_table_template[] = {
2812 	{
2813 		.procname	=	"flush",
2814 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2815 		.maxlen		=	sizeof(int),
2816 		.mode		=	0200,
2817 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2818 	},
2819 	{
2820 		.procname	=	"gc_thresh",
2821 		.data		=	&ip6_dst_ops_template.gc_thresh,
2822 		.maxlen		=	sizeof(int),
2823 		.mode		=	0644,
2824 		.proc_handler	=	proc_dointvec,
2825 	},
2826 	{
2827 		.procname	=	"max_size",
2828 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2829 		.maxlen		=	sizeof(int),
2830 		.mode		=	0644,
2831 		.proc_handler	=	proc_dointvec,
2832 	},
2833 	{
2834 		.procname	=	"gc_min_interval",
2835 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2836 		.maxlen		=	sizeof(int),
2837 		.mode		=	0644,
2838 		.proc_handler	=	proc_dointvec_jiffies,
2839 	},
2840 	{
2841 		.procname	=	"gc_timeout",
2842 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2843 		.maxlen		=	sizeof(int),
2844 		.mode		=	0644,
2845 		.proc_handler	=	proc_dointvec_jiffies,
2846 	},
2847 	{
2848 		.procname	=	"gc_interval",
2849 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2850 		.maxlen		=	sizeof(int),
2851 		.mode		=	0644,
2852 		.proc_handler	=	proc_dointvec_jiffies,
2853 	},
2854 	{
2855 		.procname	=	"gc_elasticity",
2856 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2857 		.maxlen		=	sizeof(int),
2858 		.mode		=	0644,
2859 		.proc_handler	=	proc_dointvec,
2860 	},
2861 	{
2862 		.procname	=	"mtu_expires",
2863 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2864 		.maxlen		=	sizeof(int),
2865 		.mode		=	0644,
2866 		.proc_handler	=	proc_dointvec_jiffies,
2867 	},
2868 	{
2869 		.procname	=	"min_adv_mss",
2870 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2871 		.maxlen		=	sizeof(int),
2872 		.mode		=	0644,
2873 		.proc_handler	=	proc_dointvec,
2874 	},
2875 	{
2876 		.procname	=	"gc_min_interval_ms",
2877 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2878 		.maxlen		=	sizeof(int),
2879 		.mode		=	0644,
2880 		.proc_handler	=	proc_dointvec_ms_jiffies,
2881 	},
2882 	{ }
2883 };
2884 
ipv6_route_sysctl_init(struct net * net)2885 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2886 {
2887 	struct ctl_table *table;
2888 
2889 	table = kmemdup(ipv6_route_table_template,
2890 			sizeof(ipv6_route_table_template),
2891 			GFP_KERNEL);
2892 
2893 	if (table) {
2894 		table[0].data = &net->ipv6.sysctl.flush_delay;
2895 		table[0].extra1 = net;
2896 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2897 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2898 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2899 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2900 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2901 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2902 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2903 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2904 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2905 
2906 		/* Don't export sysctls to unprivileged users */
2907 		if (net->user_ns != &init_user_ns)
2908 			table[0].procname = NULL;
2909 	}
2910 
2911 	return table;
2912 }
2913 #endif
2914 
ip6_route_net_init(struct net * net)2915 static int __net_init ip6_route_net_init(struct net *net)
2916 {
2917 	int ret = -ENOMEM;
2918 
2919 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2920 	       sizeof(net->ipv6.ip6_dst_ops));
2921 
2922 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2923 		goto out_ip6_dst_ops;
2924 
2925 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2926 					   sizeof(*net->ipv6.ip6_null_entry),
2927 					   GFP_KERNEL);
2928 	if (!net->ipv6.ip6_null_entry)
2929 		goto out_ip6_dst_entries;
2930 	net->ipv6.ip6_null_entry->dst.path =
2931 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2932 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2933 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2934 			 ip6_template_metrics, true);
2935 
2936 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2937 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2938 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2939 					       GFP_KERNEL);
2940 	if (!net->ipv6.ip6_prohibit_entry)
2941 		goto out_ip6_null_entry;
2942 	net->ipv6.ip6_prohibit_entry->dst.path =
2943 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2944 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2945 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2946 			 ip6_template_metrics, true);
2947 
2948 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2949 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2950 					       GFP_KERNEL);
2951 	if (!net->ipv6.ip6_blk_hole_entry)
2952 		goto out_ip6_prohibit_entry;
2953 	net->ipv6.ip6_blk_hole_entry->dst.path =
2954 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2955 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2956 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2957 			 ip6_template_metrics, true);
2958 #endif
2959 
2960 	net->ipv6.sysctl.flush_delay = 0;
2961 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2962 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2963 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2964 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2965 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2966 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2967 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2968 
2969 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2970 
2971 	ret = 0;
2972 out:
2973 	return ret;
2974 
2975 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2976 out_ip6_prohibit_entry:
2977 	kfree(net->ipv6.ip6_prohibit_entry);
2978 out_ip6_null_entry:
2979 	kfree(net->ipv6.ip6_null_entry);
2980 #endif
2981 out_ip6_dst_entries:
2982 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2983 out_ip6_dst_ops:
2984 	goto out;
2985 }
2986 
ip6_route_net_exit(struct net * net)2987 static void __net_exit ip6_route_net_exit(struct net *net)
2988 {
2989 	kfree(net->ipv6.ip6_null_entry);
2990 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2991 	kfree(net->ipv6.ip6_prohibit_entry);
2992 	kfree(net->ipv6.ip6_blk_hole_entry);
2993 #endif
2994 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2995 }
2996 
ip6_route_net_init_late(struct net * net)2997 static int __net_init ip6_route_net_init_late(struct net *net)
2998 {
2999 #ifdef CONFIG_PROC_FS
3000 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3001 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3002 #endif
3003 	return 0;
3004 }
3005 
ip6_route_net_exit_late(struct net * net)3006 static void __net_exit ip6_route_net_exit_late(struct net *net)
3007 {
3008 #ifdef CONFIG_PROC_FS
3009 	remove_proc_entry("ipv6_route", net->proc_net);
3010 	remove_proc_entry("rt6_stats", net->proc_net);
3011 #endif
3012 }
3013 
3014 static struct pernet_operations ip6_route_net_ops = {
3015 	.init = ip6_route_net_init,
3016 	.exit = ip6_route_net_exit,
3017 };
3018 
ipv6_inetpeer_init(struct net * net)3019 static int __net_init ipv6_inetpeer_init(struct net *net)
3020 {
3021 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3022 
3023 	if (!bp)
3024 		return -ENOMEM;
3025 	inet_peer_base_init(bp);
3026 	net->ipv6.peers = bp;
3027 	return 0;
3028 }
3029 
ipv6_inetpeer_exit(struct net * net)3030 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3031 {
3032 	struct inet_peer_base *bp = net->ipv6.peers;
3033 
3034 	net->ipv6.peers = NULL;
3035 	inetpeer_invalidate_tree(bp);
3036 	kfree(bp);
3037 }
3038 
3039 static struct pernet_operations ipv6_inetpeer_ops = {
3040 	.init	=	ipv6_inetpeer_init,
3041 	.exit	=	ipv6_inetpeer_exit,
3042 };
3043 
3044 static struct pernet_operations ip6_route_net_late_ops = {
3045 	.init = ip6_route_net_init_late,
3046 	.exit = ip6_route_net_exit_late,
3047 };
3048 
3049 static struct notifier_block ip6_route_dev_notifier = {
3050 	.notifier_call = ip6_route_dev_notify,
3051 	.priority = 0,
3052 };
3053 
ip6_route_init(void)3054 int __init ip6_route_init(void)
3055 {
3056 	int ret;
3057 
3058 	ret = -ENOMEM;
3059 	ip6_dst_ops_template.kmem_cachep =
3060 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3061 				  SLAB_HWCACHE_ALIGN, NULL);
3062 	if (!ip6_dst_ops_template.kmem_cachep)
3063 		goto out;
3064 
3065 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3066 	if (ret)
3067 		goto out_kmem_cache;
3068 
3069 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3070 	if (ret)
3071 		goto out_dst_entries;
3072 
3073 	ret = register_pernet_subsys(&ip6_route_net_ops);
3074 	if (ret)
3075 		goto out_register_inetpeer;
3076 
3077 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3078 
3079 	/* Registering of the loopback is done before this portion of code,
3080 	 * the loopback reference in rt6_info will not be taken, do it
3081 	 * manually for init_net */
3082 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3083 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3084   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3085 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3086 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3087 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3088 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3089   #endif
3090 	ret = fib6_init();
3091 	if (ret)
3092 		goto out_register_subsys;
3093 
3094 	ret = xfrm6_init();
3095 	if (ret)
3096 		goto out_fib6_init;
3097 
3098 	ret = fib6_rules_init();
3099 	if (ret)
3100 		goto xfrm6_init;
3101 
3102 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3103 	if (ret)
3104 		goto fib6_rules_init;
3105 
3106 	ret = -ENOBUFS;
3107 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3108 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3109 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3110 		goto out_register_late_subsys;
3111 
3112 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3113 	if (ret)
3114 		goto out_register_late_subsys;
3115 
3116 out:
3117 	return ret;
3118 
3119 out_register_late_subsys:
3120 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3121 fib6_rules_init:
3122 	fib6_rules_cleanup();
3123 xfrm6_init:
3124 	xfrm6_fini();
3125 out_fib6_init:
3126 	fib6_gc_cleanup();
3127 out_register_subsys:
3128 	unregister_pernet_subsys(&ip6_route_net_ops);
3129 out_register_inetpeer:
3130 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3131 out_dst_entries:
3132 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3133 out_kmem_cache:
3134 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3135 	goto out;
3136 }
3137 
ip6_route_cleanup(void)3138 void ip6_route_cleanup(void)
3139 {
3140 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3141 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3142 	fib6_rules_cleanup();
3143 	xfrm6_fini();
3144 	fib6_gc_cleanup();
3145 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3146 	unregister_pernet_subsys(&ip6_route_net_ops);
3147 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3148 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3149 }
3150