1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69 const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void ip6_dst_destroy(struct dst_entry *);
75 static void ip6_dst_ifdown(struct dst_entry *,
76 struct net_device *dev, int how);
77 static int ip6_dst_gc(struct dst_ops *ops);
78
79 static int ip6_pkt_discard(struct sk_buff *skb);
80 static int ip6_pkt_discard_out(struct sk_buff *skb);
81 static void ip6_link_failure(struct sk_buff *skb);
82 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 struct sk_buff *skb, u32 mtu);
84 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85 struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, unsigned int pref);
91 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr);
94 #endif
95
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97 {
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
105 peer = rt6_get_peer_create(rt);
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124 }
125
choose_neigh_daddr(struct rt6_info * rt,struct sk_buff * skb,const void * daddr)126 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
129 {
130 struct in6_addr *p = &rt->rt6i_gateway;
131
132 if (!ipv6_addr_any(p))
133 return (const void *) p;
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
136 return daddr;
137 }
138
ip6_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
142 {
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
146 daddr = choose_neigh_daddr(rt, skb, daddr);
147 n = __ipv6_neigh_lookup(dst->dev, daddr);
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151 }
152
153 static struct dst_ops ip6_dst_ops_template = {
154 .family = AF_INET6,
155 .protocol = cpu_to_be16(ETH_P_IPV6),
156 .gc = ip6_dst_gc,
157 .gc_thresh = 1024,
158 .check = ip6_dst_check,
159 .default_advmss = ip6_default_advmss,
160 .mtu = ip6_mtu,
161 .cow_metrics = ipv6_cow_metrics,
162 .destroy = ip6_dst_destroy,
163 .ifdown = ip6_dst_ifdown,
164 .negative_advice = ip6_negative_advice,
165 .link_failure = ip6_link_failure,
166 .update_pmtu = ip6_rt_update_pmtu,
167 .redirect = rt6_do_redirect,
168 .local_out = __ip6_local_out,
169 .neigh_lookup = ip6_neigh_lookup,
170 };
171
ip6_blackhole_mtu(const struct dst_entry * dst)172 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
173 {
174 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
175
176 return mtu ? : dst->dev->mtu;
177 }
178
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)179 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
180 struct sk_buff *skb, u32 mtu)
181 {
182 }
183
ip6_rt_blackhole_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)184 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
185 struct sk_buff *skb)
186 {
187 }
188
ip6_rt_blackhole_cow_metrics(struct dst_entry * dst,unsigned long old)189 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
190 unsigned long old)
191 {
192 return NULL;
193 }
194
195 static struct dst_ops ip6_dst_blackhole_ops = {
196 .family = AF_INET6,
197 .protocol = cpu_to_be16(ETH_P_IPV6),
198 .destroy = ip6_dst_destroy,
199 .check = ip6_dst_check,
200 .mtu = ip6_blackhole_mtu,
201 .default_advmss = ip6_default_advmss,
202 .update_pmtu = ip6_rt_blackhole_update_pmtu,
203 .redirect = ip6_rt_blackhole_redirect,
204 .cow_metrics = ip6_rt_blackhole_cow_metrics,
205 .neigh_lookup = ip6_neigh_lookup,
206 };
207
208 static const u32 ip6_template_metrics[RTAX_MAX] = {
209 [RTAX_HOPLIMIT - 1] = 0,
210 };
211
212 static const struct rt6_info ip6_null_entry_template = {
213 .dst = {
214 .__refcnt = ATOMIC_INIT(1),
215 .__use = 1,
216 .obsolete = DST_OBSOLETE_FORCE_CHK,
217 .error = -ENETUNREACH,
218 .input = ip6_pkt_discard,
219 .output = ip6_pkt_discard_out,
220 },
221 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
222 .rt6i_protocol = RTPROT_KERNEL,
223 .rt6i_metric = ~(u32) 0,
224 .rt6i_ref = ATOMIC_INIT(1),
225 };
226
227 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
228
229 static int ip6_pkt_prohibit(struct sk_buff *skb);
230 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
231
232 static const struct rt6_info ip6_prohibit_entry_template = {
233 .dst = {
234 .__refcnt = ATOMIC_INIT(1),
235 .__use = 1,
236 .obsolete = DST_OBSOLETE_FORCE_CHK,
237 .error = -EACCES,
238 .input = ip6_pkt_prohibit,
239 .output = ip6_pkt_prohibit_out,
240 },
241 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
242 .rt6i_protocol = RTPROT_KERNEL,
243 .rt6i_metric = ~(u32) 0,
244 .rt6i_ref = ATOMIC_INIT(1),
245 };
246
247 static const struct rt6_info ip6_blk_hole_entry_template = {
248 .dst = {
249 .__refcnt = ATOMIC_INIT(1),
250 .__use = 1,
251 .obsolete = DST_OBSOLETE_FORCE_CHK,
252 .error = -EINVAL,
253 .input = dst_discard,
254 .output = dst_discard,
255 },
256 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
257 .rt6i_protocol = RTPROT_KERNEL,
258 .rt6i_metric = ~(u32) 0,
259 .rt6i_ref = ATOMIC_INIT(1),
260 };
261
262 #endif
263
264 /* allocate dst with ip6_dst_ops */
ip6_dst_alloc(struct net * net,struct net_device * dev,int flags,struct fib6_table * table)265 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
266 struct net_device *dev,
267 int flags,
268 struct fib6_table *table)
269 {
270 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
271 0, DST_OBSOLETE_FORCE_CHK, flags);
272
273 if (rt) {
274 struct dst_entry *dst = &rt->dst;
275
276 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
277 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
278 rt->rt6i_genid = rt_genid(net);
279 INIT_LIST_HEAD(&rt->rt6i_siblings);
280 rt->rt6i_nsiblings = 0;
281 }
282 return rt;
283 }
284
ip6_dst_destroy(struct dst_entry * dst)285 static void ip6_dst_destroy(struct dst_entry *dst)
286 {
287 struct rt6_info *rt = (struct rt6_info *)dst;
288 struct inet6_dev *idev = rt->rt6i_idev;
289 struct dst_entry *from = dst->from;
290
291 if (!(rt->dst.flags & DST_HOST))
292 dst_destroy_metrics_generic(dst);
293
294 if (idev) {
295 rt->rt6i_idev = NULL;
296 in6_dev_put(idev);
297 }
298
299 dst->from = NULL;
300 dst_release(from);
301
302 if (rt6_has_peer(rt)) {
303 struct inet_peer *peer = rt6_peer_ptr(rt);
304 inet_putpeer(peer);
305 }
306 }
307
rt6_bind_peer(struct rt6_info * rt,int create)308 void rt6_bind_peer(struct rt6_info *rt, int create)
309 {
310 struct inet_peer_base *base;
311 struct inet_peer *peer;
312
313 base = inetpeer_base_ptr(rt->_rt6i_peer);
314 if (!base)
315 return;
316
317 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
318 if (peer) {
319 if (!rt6_set_peer(rt, peer))
320 inet_putpeer(peer);
321 }
322 }
323
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)324 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
325 int how)
326 {
327 struct rt6_info *rt = (struct rt6_info *)dst;
328 struct inet6_dev *idev = rt->rt6i_idev;
329 struct net_device *loopback_dev =
330 dev_net(dev)->loopback_dev;
331
332 if (dev != loopback_dev) {
333 if (idev && idev->dev == dev) {
334 struct inet6_dev *loopback_idev =
335 in6_dev_get(loopback_dev);
336 if (loopback_idev) {
337 rt->rt6i_idev = loopback_idev;
338 in6_dev_put(idev);
339 }
340 }
341 }
342 }
343
rt6_check_expired(const struct rt6_info * rt)344 static bool rt6_check_expired(const struct rt6_info *rt)
345 {
346 if (rt->rt6i_flags & RTF_EXPIRES) {
347 if (time_after(jiffies, rt->dst.expires))
348 return true;
349 } else if (rt->dst.from) {
350 return rt6_check_expired((struct rt6_info *) rt->dst.from);
351 }
352 return false;
353 }
354
rt6_need_strict(const struct in6_addr * daddr)355 static bool rt6_need_strict(const struct in6_addr *daddr)
356 {
357 return ipv6_addr_type(daddr) &
358 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
359 }
360
361 /* Multipath route selection:
362 * Hash based function using packet header and flowlabel.
363 * Adapted from fib_info_hashfn()
364 */
rt6_info_hash_nhsfn(unsigned int candidate_count,const struct flowi6 * fl6)365 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
366 const struct flowi6 *fl6)
367 {
368 unsigned int val = fl6->flowi6_proto;
369
370 val ^= ipv6_addr_hash(&fl6->daddr);
371 val ^= ipv6_addr_hash(&fl6->saddr);
372
373 /* Work only if this not encapsulated */
374 switch (fl6->flowi6_proto) {
375 case IPPROTO_UDP:
376 case IPPROTO_TCP:
377 case IPPROTO_SCTP:
378 val ^= (__force u16)fl6->fl6_sport;
379 val ^= (__force u16)fl6->fl6_dport;
380 break;
381
382 case IPPROTO_ICMPV6:
383 val ^= (__force u16)fl6->fl6_icmp_type;
384 val ^= (__force u16)fl6->fl6_icmp_code;
385 break;
386 }
387 /* RFC6438 recommands to use flowlabel */
388 val ^= (__force u32)fl6->flowlabel;
389
390 /* Perhaps, we need to tune, this function? */
391 val = val ^ (val >> 7) ^ (val >> 12);
392 return val % candidate_count;
393 }
394
rt6_multipath_select(struct rt6_info * match,struct flowi6 * fl6)395 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
396 struct flowi6 *fl6)
397 {
398 struct rt6_info *sibling, *next_sibling;
399 int route_choosen;
400
401 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
402 /* Don't change the route, if route_choosen == 0
403 * (siblings does not include ourself)
404 */
405 if (route_choosen)
406 list_for_each_entry_safe(sibling, next_sibling,
407 &match->rt6i_siblings, rt6i_siblings) {
408 route_choosen--;
409 if (route_choosen == 0) {
410 match = sibling;
411 break;
412 }
413 }
414 return match;
415 }
416
417 /*
418 * Route lookup. Any table->tb6_lock is implied.
419 */
420
rt6_device_match(struct net * net,struct rt6_info * rt,const struct in6_addr * saddr,int oif,int flags)421 static inline struct rt6_info *rt6_device_match(struct net *net,
422 struct rt6_info *rt,
423 const struct in6_addr *saddr,
424 int oif,
425 int flags)
426 {
427 struct rt6_info *local = NULL;
428 struct rt6_info *sprt;
429
430 if (!oif && ipv6_addr_any(saddr))
431 goto out;
432
433 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
434 struct net_device *dev = sprt->dst.dev;
435
436 if (oif) {
437 if (dev->ifindex == oif)
438 return sprt;
439 if (dev->flags & IFF_LOOPBACK) {
440 if (!sprt->rt6i_idev ||
441 sprt->rt6i_idev->dev->ifindex != oif) {
442 if (flags & RT6_LOOKUP_F_IFACE && oif)
443 continue;
444 if (local && (!oif ||
445 local->rt6i_idev->dev->ifindex == oif))
446 continue;
447 }
448 local = sprt;
449 }
450 } else {
451 if (ipv6_chk_addr(net, saddr, dev,
452 flags & RT6_LOOKUP_F_IFACE))
453 return sprt;
454 }
455 }
456
457 if (oif) {
458 if (local)
459 return local;
460
461 if (flags & RT6_LOOKUP_F_IFACE)
462 return net->ipv6.ip6_null_entry;
463 }
464 out:
465 return rt;
466 }
467
468 #ifdef CONFIG_IPV6_ROUTER_PREF
rt6_probe(struct rt6_info * rt)469 static void rt6_probe(struct rt6_info *rt)
470 {
471 struct neighbour *neigh;
472 /*
473 * Okay, this does not seem to be appropriate
474 * for now, however, we need to check if it
475 * is really so; aka Router Reachability Probing.
476 *
477 * Router Reachability Probe MUST be rate-limited
478 * to no more than one per minute.
479 */
480 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
481 return;
482 rcu_read_lock_bh();
483 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
484 if (neigh) {
485 write_lock(&neigh->lock);
486 if (neigh->nud_state & NUD_VALID)
487 goto out;
488 }
489
490 if (!neigh ||
491 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
492 struct in6_addr mcaddr;
493 struct in6_addr *target;
494
495 if (neigh) {
496 neigh->updated = jiffies;
497 write_unlock(&neigh->lock);
498 }
499
500 target = (struct in6_addr *)&rt->rt6i_gateway;
501 addrconf_addr_solict_mult(target, &mcaddr);
502 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
503 } else {
504 out:
505 write_unlock(&neigh->lock);
506 }
507 rcu_read_unlock_bh();
508 }
509 #else
rt6_probe(struct rt6_info * rt)510 static inline void rt6_probe(struct rt6_info *rt)
511 {
512 }
513 #endif
514
515 /*
516 * Default Router Selection (RFC 2461 6.3.6)
517 */
rt6_check_dev(struct rt6_info * rt,int oif)518 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
519 {
520 struct net_device *dev = rt->dst.dev;
521 if (!oif || dev->ifindex == oif)
522 return 2;
523 if ((dev->flags & IFF_LOOPBACK) &&
524 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
525 return 1;
526 return 0;
527 }
528
rt6_check_neigh(struct rt6_info * rt)529 static inline bool rt6_check_neigh(struct rt6_info *rt)
530 {
531 struct neighbour *neigh;
532 bool ret = false;
533
534 if (rt->rt6i_flags & RTF_NONEXTHOP ||
535 !(rt->rt6i_flags & RTF_GATEWAY))
536 return true;
537
538 rcu_read_lock_bh();
539 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
540 if (neigh) {
541 read_lock(&neigh->lock);
542 if (neigh->nud_state & NUD_VALID)
543 ret = true;
544 #ifdef CONFIG_IPV6_ROUTER_PREF
545 else if (!(neigh->nud_state & NUD_FAILED))
546 ret = true;
547 #endif
548 read_unlock(&neigh->lock);
549 }
550 rcu_read_unlock_bh();
551
552 return ret;
553 }
554
rt6_score_route(struct rt6_info * rt,int oif,int strict)555 static int rt6_score_route(struct rt6_info *rt, int oif,
556 int strict)
557 {
558 int m;
559
560 m = rt6_check_dev(rt, oif);
561 if (!m && (strict & RT6_LOOKUP_F_IFACE))
562 return -1;
563 #ifdef CONFIG_IPV6_ROUTER_PREF
564 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
565 #endif
566 if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
567 return -1;
568 return m;
569 }
570
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match)571 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
572 int *mpri, struct rt6_info *match)
573 {
574 int m;
575
576 if (rt6_check_expired(rt))
577 goto out;
578
579 m = rt6_score_route(rt, oif, strict);
580 if (m < 0)
581 goto out;
582
583 if (m > *mpri) {
584 if (strict & RT6_LOOKUP_F_REACHABLE)
585 rt6_probe(match);
586 *mpri = m;
587 match = rt;
588 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
589 rt6_probe(rt);
590 }
591
592 out:
593 return match;
594 }
595
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict)596 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
597 struct rt6_info *rr_head,
598 u32 metric, int oif, int strict)
599 {
600 struct rt6_info *rt, *match;
601 int mpri = -1;
602
603 match = NULL;
604 for (rt = rr_head; rt && rt->rt6i_metric == metric;
605 rt = rt->dst.rt6_next)
606 match = find_match(rt, oif, strict, &mpri, match);
607 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
608 rt = rt->dst.rt6_next)
609 match = find_match(rt, oif, strict, &mpri, match);
610
611 return match;
612 }
613
rt6_select(struct fib6_node * fn,int oif,int strict)614 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
615 {
616 struct rt6_info *match, *rt0;
617 struct net *net;
618
619 rt0 = fn->rr_ptr;
620 if (!rt0)
621 fn->rr_ptr = rt0 = fn->leaf;
622
623 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
624
625 if (!match &&
626 (strict & RT6_LOOKUP_F_REACHABLE)) {
627 struct rt6_info *next = rt0->dst.rt6_next;
628
629 /* no entries matched; do round-robin */
630 if (!next || next->rt6i_metric != rt0->rt6i_metric)
631 next = fn->leaf;
632
633 if (next != rt0)
634 fn->rr_ptr = next;
635 }
636
637 net = dev_net(rt0->dst.dev);
638 return match ? match : net->ipv6.ip6_null_entry;
639 }
640
641 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)642 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
643 const struct in6_addr *gwaddr)
644 {
645 struct route_info *rinfo = (struct route_info *) opt;
646 struct in6_addr prefix_buf, *prefix;
647 unsigned int pref;
648 unsigned long lifetime;
649 struct rt6_info *rt;
650
651 if (len < sizeof(struct route_info)) {
652 return -EINVAL;
653 }
654
655 /* Sanity check for prefix_len and length */
656 if (rinfo->length > 3) {
657 return -EINVAL;
658 } else if (rinfo->prefix_len > 128) {
659 return -EINVAL;
660 } else if (rinfo->prefix_len > 64) {
661 if (rinfo->length < 2) {
662 return -EINVAL;
663 }
664 } else if (rinfo->prefix_len > 0) {
665 if (rinfo->length < 1) {
666 return -EINVAL;
667 }
668 }
669
670 pref = rinfo->route_pref;
671 if (pref == ICMPV6_ROUTER_PREF_INVALID)
672 return -EINVAL;
673
674 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
675
676 if (rinfo->length == 3)
677 prefix = (struct in6_addr *)rinfo->prefix;
678 else {
679 /* this function is safe */
680 ipv6_addr_prefix(&prefix_buf,
681 (struct in6_addr *)rinfo->prefix,
682 rinfo->prefix_len);
683 prefix = &prefix_buf;
684 }
685
686 if (rinfo->prefix_len == 0)
687 rt = rt6_get_dflt_router(gwaddr, dev);
688 else
689 rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr);
690
691 if (rt && !lifetime) {
692 ip6_del_rt(rt);
693 rt = NULL;
694 }
695
696 if (!rt && lifetime)
697 rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
698 else if (rt)
699 rt->rt6i_flags = RTF_ROUTEINFO |
700 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
701
702 if (rt) {
703 if (!addrconf_finite_timeout(lifetime))
704 rt6_clean_expires(rt);
705 else
706 rt6_set_expires(rt, jiffies + HZ * lifetime);
707
708 ip6_rt_put(rt);
709 }
710 return 0;
711 }
712 #endif
713
714 #define BACKTRACK(__net, saddr) \
715 do { \
716 if (rt == __net->ipv6.ip6_null_entry) { \
717 struct fib6_node *pn; \
718 while (1) { \
719 if (fn->fn_flags & RTN_TL_ROOT) \
720 goto out; \
721 pn = fn->parent; \
722 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
723 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
724 else \
725 fn = pn; \
726 if (fn->fn_flags & RTN_RTINFO) \
727 goto restart; \
728 } \
729 } \
730 } while (0)
731
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)732 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
733 struct fib6_table *table,
734 struct flowi6 *fl6, int flags)
735 {
736 struct fib6_node *fn;
737 struct rt6_info *rt;
738
739 read_lock_bh(&table->tb6_lock);
740 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
741 restart:
742 rt = fn->leaf;
743 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
744 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
745 rt = rt6_multipath_select(rt, fl6);
746 BACKTRACK(net, &fl6->saddr);
747 out:
748 dst_use(&rt->dst, jiffies);
749 read_unlock_bh(&table->tb6_lock);
750 return rt;
751
752 }
753
ip6_route_lookup(struct net * net,struct flowi6 * fl6,int flags)754 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
755 int flags)
756 {
757 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
758 }
759 EXPORT_SYMBOL_GPL(ip6_route_lookup);
760
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)761 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
762 const struct in6_addr *saddr, int oif, int strict)
763 {
764 struct flowi6 fl6 = {
765 .flowi6_oif = oif,
766 .daddr = *daddr,
767 };
768 struct dst_entry *dst;
769 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
770
771 if (saddr) {
772 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
773 flags |= RT6_LOOKUP_F_HAS_SADDR;
774 }
775
776 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
777 if (dst->error == 0)
778 return (struct rt6_info *) dst;
779
780 dst_release(dst);
781
782 return NULL;
783 }
784
785 EXPORT_SYMBOL(rt6_lookup);
786
787 /* ip6_ins_rt is called with FREE table->tb6_lock.
788 It takes new route entry, the addition fails by any reason the
789 route is freed. In any case, if caller does not hold it, it may
790 be destroyed.
791 */
792
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info)793 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
794 {
795 int err;
796 struct fib6_table *table;
797
798 table = rt->rt6i_table;
799 write_lock_bh(&table->tb6_lock);
800 err = fib6_add(&table->tb6_root, rt, info);
801 write_unlock_bh(&table->tb6_lock);
802
803 return err;
804 }
805
ip6_ins_rt(struct rt6_info * rt)806 int ip6_ins_rt(struct rt6_info *rt)
807 {
808 struct nl_info info = {
809 .nl_net = dev_net(rt->dst.dev),
810 };
811 return __ip6_ins_rt(rt, &info);
812 }
813
rt6_alloc_cow(struct rt6_info * ort,const struct in6_addr * daddr,const struct in6_addr * saddr)814 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
815 const struct in6_addr *daddr,
816 const struct in6_addr *saddr)
817 {
818 struct rt6_info *rt;
819
820 /*
821 * Clone the route.
822 */
823
824 rt = ip6_rt_copy(ort, daddr);
825
826 if (rt) {
827 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
828 if (ort->rt6i_dst.plen != 128 &&
829 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
830 rt->rt6i_flags |= RTF_ANYCAST;
831 rt->rt6i_gateway = *daddr;
832 }
833
834 rt->rt6i_flags |= RTF_CACHE;
835
836 #ifdef CONFIG_IPV6_SUBTREES
837 if (rt->rt6i_src.plen && saddr) {
838 rt->rt6i_src.addr = *saddr;
839 rt->rt6i_src.plen = 128;
840 }
841 #endif
842 }
843
844 return rt;
845 }
846
rt6_alloc_clone(struct rt6_info * ort,const struct in6_addr * daddr)847 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
848 const struct in6_addr *daddr)
849 {
850 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
851
852 if (rt)
853 rt->rt6i_flags |= RTF_CACHE;
854 return rt;
855 }
856
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags)857 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
858 struct flowi6 *fl6, int flags)
859 {
860 struct fib6_node *fn;
861 struct rt6_info *rt, *nrt;
862 int strict = 0;
863 int attempts = 3;
864 int err;
865 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
866
867 strict |= flags & RT6_LOOKUP_F_IFACE;
868
869 relookup:
870 read_lock_bh(&table->tb6_lock);
871
872 restart_2:
873 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
874
875 restart:
876 rt = rt6_select(fn, oif, strict | reachable);
877 if (rt->rt6i_nsiblings && oif == 0)
878 rt = rt6_multipath_select(rt, fl6);
879 BACKTRACK(net, &fl6->saddr);
880 if (rt == net->ipv6.ip6_null_entry ||
881 rt->rt6i_flags & RTF_CACHE)
882 goto out;
883
884 dst_hold(&rt->dst);
885 read_unlock_bh(&table->tb6_lock);
886
887 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
888 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
889 else if (!(rt->dst.flags & DST_HOST))
890 nrt = rt6_alloc_clone(rt, &fl6->daddr);
891 else
892 goto out2;
893
894 ip6_rt_put(rt);
895 rt = nrt ? : net->ipv6.ip6_null_entry;
896
897 dst_hold(&rt->dst);
898 if (nrt) {
899 err = ip6_ins_rt(nrt);
900 if (!err)
901 goto out2;
902 }
903
904 if (--attempts <= 0)
905 goto out2;
906
907 /*
908 * Race condition! In the gap, when table->tb6_lock was
909 * released someone could insert this route. Relookup.
910 */
911 ip6_rt_put(rt);
912 goto relookup;
913
914 out:
915 if (reachable) {
916 reachable = 0;
917 goto restart_2;
918 }
919 dst_hold(&rt->dst);
920 read_unlock_bh(&table->tb6_lock);
921 out2:
922 rt->dst.lastuse = jiffies;
923 rt->dst.__use++;
924
925 return rt;
926 }
927
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)928 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
929 struct flowi6 *fl6, int flags)
930 {
931 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
932 }
933
ip6_route_input_lookup(struct net * net,struct net_device * dev,struct flowi6 * fl6,int flags)934 static struct dst_entry *ip6_route_input_lookup(struct net *net,
935 struct net_device *dev,
936 struct flowi6 *fl6, int flags)
937 {
938 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
939 flags |= RT6_LOOKUP_F_IFACE;
940
941 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
942 }
943
ip6_route_input(struct sk_buff * skb)944 void ip6_route_input(struct sk_buff *skb)
945 {
946 const struct ipv6hdr *iph = ipv6_hdr(skb);
947 struct net *net = dev_net(skb->dev);
948 int flags = RT6_LOOKUP_F_HAS_SADDR;
949 struct flowi6 fl6 = {
950 .flowi6_iif = skb->dev->ifindex,
951 .daddr = iph->daddr,
952 .saddr = iph->saddr,
953 .flowlabel = ip6_flowinfo(iph),
954 .flowi6_mark = skb->mark,
955 .flowi6_proto = iph->nexthdr,
956 };
957
958 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
959 }
960
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)961 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
962 struct flowi6 *fl6, int flags)
963 {
964 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
965 }
966
ip6_route_output(struct net * net,const struct sock * sk,struct flowi6 * fl6)967 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
968 struct flowi6 *fl6)
969 {
970 int flags = 0;
971
972 fl6->flowi6_iif = LOOPBACK_IFINDEX;
973
974 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
975 flags |= RT6_LOOKUP_F_IFACE;
976
977 if (!ipv6_addr_any(&fl6->saddr))
978 flags |= RT6_LOOKUP_F_HAS_SADDR;
979 else if (sk)
980 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
981
982 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
983 }
984
985 EXPORT_SYMBOL(ip6_route_output);
986
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)987 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
988 {
989 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
990 struct dst_entry *new = NULL;
991
992 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
993 if (rt) {
994 new = &rt->dst;
995
996 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
997 rt6_init_peer(rt, net->ipv6.peers);
998
999 new->__use = 1;
1000 new->input = dst_discard;
1001 new->output = dst_discard;
1002
1003 if (dst_metrics_read_only(&ort->dst))
1004 new->_metrics = ort->dst._metrics;
1005 else
1006 dst_copy_metrics(new, &ort->dst);
1007 rt->rt6i_idev = ort->rt6i_idev;
1008 if (rt->rt6i_idev)
1009 in6_dev_hold(rt->rt6i_idev);
1010
1011 rt->rt6i_gateway = ort->rt6i_gateway;
1012 rt->rt6i_flags = ort->rt6i_flags;
1013 rt->rt6i_metric = 0;
1014
1015 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1016 #ifdef CONFIG_IPV6_SUBTREES
1017 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1018 #endif
1019
1020 dst_free(new);
1021 }
1022
1023 dst_release(dst_orig);
1024 return new ? new : ERR_PTR(-ENOMEM);
1025 }
1026
1027 /*
1028 * Destination cache support functions
1029 */
1030
ip6_dst_check(struct dst_entry * dst,u32 cookie)1031 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1032 {
1033 struct rt6_info *rt;
1034
1035 rt = (struct rt6_info *) dst;
1036
1037 /* All IPV6 dsts are created with ->obsolete set to the value
1038 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1039 * into this function always.
1040 */
1041 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1042 return NULL;
1043
1044 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1045 return dst;
1046
1047 return NULL;
1048 }
1049
ip6_negative_advice(struct dst_entry * dst)1050 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1051 {
1052 struct rt6_info *rt = (struct rt6_info *) dst;
1053
1054 if (rt) {
1055 if (rt->rt6i_flags & RTF_CACHE) {
1056 if (rt6_check_expired(rt)) {
1057 ip6_del_rt(rt);
1058 dst = NULL;
1059 }
1060 } else {
1061 dst_release(dst);
1062 dst = NULL;
1063 }
1064 }
1065 return dst;
1066 }
1067
ip6_link_failure(struct sk_buff * skb)1068 static void ip6_link_failure(struct sk_buff *skb)
1069 {
1070 struct rt6_info *rt;
1071
1072 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1073
1074 rt = (struct rt6_info *) skb_dst(skb);
1075 if (rt) {
1076 if (rt->rt6i_flags & RTF_CACHE)
1077 rt6_update_expires(rt, 0);
1078 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1079 rt->rt6i_node->fn_sernum = -1;
1080 }
1081 }
1082
ip6_rt_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)1083 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1084 struct sk_buff *skb, u32 mtu)
1085 {
1086 struct rt6_info *rt6 = (struct rt6_info*)dst;
1087
1088 dst_confirm(dst);
1089 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1090 struct net *net = dev_net(dst->dev);
1091
1092 rt6->rt6i_flags |= RTF_MODIFIED;
1093 if (mtu < IPV6_MIN_MTU) {
1094 u32 features = dst_metric(dst, RTAX_FEATURES);
1095 mtu = IPV6_MIN_MTU;
1096 features |= RTAX_FEATURE_ALLFRAG;
1097 dst_metric_set(dst, RTAX_FEATURES, features);
1098 }
1099 dst_metric_set(dst, RTAX_MTU, mtu);
1100 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1101 }
1102 }
1103
ip6_update_pmtu(struct sk_buff * skb,struct net * net,__be32 mtu,int oif,u32 mark,kuid_t uid)1104 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1105 int oif, u32 mark, kuid_t uid)
1106 {
1107 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1108 struct dst_entry *dst;
1109 struct flowi6 fl6;
1110
1111 memset(&fl6, 0, sizeof(fl6));
1112 fl6.flowi6_oif = oif;
1113 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1114 fl6.flowi6_flags = 0;
1115 fl6.daddr = iph->daddr;
1116 fl6.saddr = iph->saddr;
1117 fl6.flowlabel = ip6_flowinfo(iph);
1118 fl6.flowi6_uid = uid;
1119
1120 dst = ip6_route_output(net, NULL, &fl6);
1121 if (!dst->error)
1122 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1123 dst_release(dst);
1124 }
1125 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1126
ip6_sk_update_pmtu(struct sk_buff * skb,struct sock * sk,__be32 mtu)1127 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1128 {
1129 ip6_update_pmtu(skb, sock_net(sk), mtu,
1130 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1131 }
1132 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1133
ip6_redirect(struct sk_buff * skb,struct net * net,int oif,u32 mark,kuid_t uid)1134 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1135 kuid_t uid)
1136 {
1137 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1138 struct dst_entry *dst;
1139 struct flowi6 fl6;
1140
1141 memset(&fl6, 0, sizeof(fl6));
1142 fl6.flowi6_oif = oif;
1143 fl6.flowi6_mark = mark;
1144 fl6.flowi6_flags = 0;
1145 fl6.daddr = iph->daddr;
1146 fl6.saddr = iph->saddr;
1147 fl6.flowlabel = ip6_flowinfo(iph);
1148 fl6.flowi6_uid = uid;
1149
1150 dst = ip6_route_output(net, NULL, &fl6);
1151 if (!dst->error)
1152 rt6_do_redirect(dst, NULL, skb);
1153 dst_release(dst);
1154 }
1155 EXPORT_SYMBOL_GPL(ip6_redirect);
1156
ip6_sk_redirect(struct sk_buff * skb,struct sock * sk)1157 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1158 {
1159 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1160 sk->sk_uid);
1161 }
1162 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1163
ip6_default_advmss(const struct dst_entry * dst)1164 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1165 {
1166 struct net_device *dev = dst->dev;
1167 unsigned int mtu = dst_mtu(dst);
1168 struct net *net = dev_net(dev);
1169
1170 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1171
1172 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1173 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1174
1175 /*
1176 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1177 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1178 * IPV6_MAXPLEN is also valid and means: "any MSS,
1179 * rely only on pmtu discovery"
1180 */
1181 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1182 mtu = IPV6_MAXPLEN;
1183 return mtu;
1184 }
1185
ip6_mtu(const struct dst_entry * dst)1186 static unsigned int ip6_mtu(const struct dst_entry *dst)
1187 {
1188 struct inet6_dev *idev;
1189 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1190
1191 if (mtu)
1192 return mtu;
1193
1194 mtu = IPV6_MIN_MTU;
1195
1196 rcu_read_lock();
1197 idev = __in6_dev_get(dst->dev);
1198 if (idev)
1199 mtu = idev->cnf.mtu6;
1200 rcu_read_unlock();
1201
1202 return mtu;
1203 }
1204
1205 static struct dst_entry *icmp6_dst_gc_list;
1206 static DEFINE_SPINLOCK(icmp6_dst_lock);
1207
icmp6_dst_alloc(struct net_device * dev,struct flowi6 * fl6)1208 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1209 struct flowi6 *fl6)
1210 {
1211 struct dst_entry *dst;
1212 struct rt6_info *rt;
1213 struct inet6_dev *idev = in6_dev_get(dev);
1214 struct net *net = dev_net(dev);
1215
1216 if (unlikely(!idev))
1217 return ERR_PTR(-ENODEV);
1218
1219 rt = ip6_dst_alloc(net, dev, 0, NULL);
1220 if (unlikely(!rt)) {
1221 in6_dev_put(idev);
1222 dst = ERR_PTR(-ENOMEM);
1223 goto out;
1224 }
1225
1226 rt->dst.flags |= DST_HOST;
1227 rt->dst.output = ip6_output;
1228 atomic_set(&rt->dst.__refcnt, 1);
1229 rt->rt6i_dst.addr = fl6->daddr;
1230 rt->rt6i_dst.plen = 128;
1231 rt->rt6i_idev = idev;
1232 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1233
1234 spin_lock_bh(&icmp6_dst_lock);
1235 rt->dst.next = icmp6_dst_gc_list;
1236 icmp6_dst_gc_list = &rt->dst;
1237 spin_unlock_bh(&icmp6_dst_lock);
1238
1239 fib6_force_start_gc(net);
1240
1241 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1242
1243 out:
1244 return dst;
1245 }
1246
icmp6_dst_gc(void)1247 int icmp6_dst_gc(void)
1248 {
1249 struct dst_entry *dst, **pprev;
1250 int more = 0;
1251
1252 spin_lock_bh(&icmp6_dst_lock);
1253 pprev = &icmp6_dst_gc_list;
1254
1255 while ((dst = *pprev) != NULL) {
1256 if (!atomic_read(&dst->__refcnt)) {
1257 *pprev = dst->next;
1258 dst_free(dst);
1259 } else {
1260 pprev = &dst->next;
1261 ++more;
1262 }
1263 }
1264
1265 spin_unlock_bh(&icmp6_dst_lock);
1266
1267 return more;
1268 }
1269
icmp6_clean_all(int (* func)(struct rt6_info * rt,void * arg),void * arg)1270 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1271 void *arg)
1272 {
1273 struct dst_entry *dst, **pprev;
1274
1275 spin_lock_bh(&icmp6_dst_lock);
1276 pprev = &icmp6_dst_gc_list;
1277 while ((dst = *pprev) != NULL) {
1278 struct rt6_info *rt = (struct rt6_info *) dst;
1279 if (func(rt, arg)) {
1280 *pprev = dst->next;
1281 dst_free(dst);
1282 } else {
1283 pprev = &dst->next;
1284 }
1285 }
1286 spin_unlock_bh(&icmp6_dst_lock);
1287 }
1288
ip6_dst_gc(struct dst_ops * ops)1289 static int ip6_dst_gc(struct dst_ops *ops)
1290 {
1291 unsigned long now = jiffies;
1292 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1293 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1294 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1295 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1296 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1297 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1298 int entries;
1299
1300 entries = dst_entries_get_fast(ops);
1301 if (time_after(rt_last_gc + rt_min_interval, now) &&
1302 entries <= rt_max_size)
1303 goto out;
1304
1305 net->ipv6.ip6_rt_gc_expire++;
1306 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1307 net->ipv6.ip6_rt_last_gc = now;
1308 entries = dst_entries_get_slow(ops);
1309 if (entries < ops->gc_thresh)
1310 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1311 out:
1312 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1313 return entries > rt_max_size;
1314 }
1315
ip6_dst_hoplimit(struct dst_entry * dst)1316 int ip6_dst_hoplimit(struct dst_entry *dst)
1317 {
1318 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1319 if (hoplimit == 0) {
1320 struct net_device *dev = dst->dev;
1321 struct inet6_dev *idev;
1322
1323 rcu_read_lock();
1324 idev = __in6_dev_get(dev);
1325 if (idev)
1326 hoplimit = idev->cnf.hop_limit;
1327 else
1328 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1329 rcu_read_unlock();
1330 }
1331 return hoplimit;
1332 }
1333 EXPORT_SYMBOL(ip6_dst_hoplimit);
1334
1335 /*
1336 *
1337 */
1338
ip6_route_add(struct fib6_config * cfg)1339 int ip6_route_add(struct fib6_config *cfg)
1340 {
1341 int err;
1342 struct net *net = cfg->fc_nlinfo.nl_net;
1343 struct rt6_info *rt = NULL;
1344 struct net_device *dev = NULL;
1345 struct inet6_dev *idev = NULL;
1346 struct fib6_table *table;
1347 int addr_type;
1348
1349 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1350 return -EINVAL;
1351 #ifndef CONFIG_IPV6_SUBTREES
1352 if (cfg->fc_src_len)
1353 return -EINVAL;
1354 #endif
1355 if (cfg->fc_ifindex) {
1356 err = -ENODEV;
1357 dev = dev_get_by_index(net, cfg->fc_ifindex);
1358 if (!dev)
1359 goto out;
1360 idev = in6_dev_get(dev);
1361 if (!idev)
1362 goto out;
1363 }
1364
1365 if (cfg->fc_metric == 0)
1366 cfg->fc_metric = IP6_RT_PRIO_USER;
1367
1368 err = -ENOBUFS;
1369 if (cfg->fc_nlinfo.nlh &&
1370 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1371 table = fib6_get_table(net, cfg->fc_table);
1372 if (!table) {
1373 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1374 table = fib6_new_table(net, cfg->fc_table);
1375 }
1376 } else {
1377 table = fib6_new_table(net, cfg->fc_table);
1378 }
1379
1380 if (!table)
1381 goto out;
1382
1383 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1384
1385 if (!rt) {
1386 err = -ENOMEM;
1387 goto out;
1388 }
1389
1390 if (cfg->fc_flags & RTF_EXPIRES)
1391 rt6_set_expires(rt, jiffies +
1392 clock_t_to_jiffies(cfg->fc_expires));
1393 else
1394 rt6_clean_expires(rt);
1395
1396 if (cfg->fc_protocol == RTPROT_UNSPEC)
1397 cfg->fc_protocol = RTPROT_BOOT;
1398 rt->rt6i_protocol = cfg->fc_protocol;
1399
1400 addr_type = ipv6_addr_type(&cfg->fc_dst);
1401
1402 if (addr_type & IPV6_ADDR_MULTICAST)
1403 rt->dst.input = ip6_mc_input;
1404 else if (cfg->fc_flags & RTF_LOCAL)
1405 rt->dst.input = ip6_input;
1406 else
1407 rt->dst.input = ip6_forward;
1408
1409 rt->dst.output = ip6_output;
1410
1411 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1412 rt->rt6i_dst.plen = cfg->fc_dst_len;
1413 if (rt->rt6i_dst.plen == 128)
1414 rt->dst.flags |= DST_HOST;
1415
1416 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1417 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1418 if (!metrics) {
1419 err = -ENOMEM;
1420 goto out;
1421 }
1422 dst_init_metrics(&rt->dst, metrics, 0);
1423 }
1424 #ifdef CONFIG_IPV6_SUBTREES
1425 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1426 rt->rt6i_src.plen = cfg->fc_src_len;
1427 #endif
1428
1429 rt->rt6i_metric = cfg->fc_metric;
1430
1431 /* We cannot add true routes via loopback here,
1432 they would result in kernel looping; promote them to reject routes
1433 */
1434 if ((cfg->fc_flags & RTF_REJECT) ||
1435 (dev && (dev->flags & IFF_LOOPBACK) &&
1436 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1437 !(cfg->fc_flags & RTF_LOCAL))) {
1438 /* hold loopback dev/idev if we haven't done so. */
1439 if (dev != net->loopback_dev) {
1440 if (dev) {
1441 dev_put(dev);
1442 in6_dev_put(idev);
1443 }
1444 dev = net->loopback_dev;
1445 dev_hold(dev);
1446 idev = in6_dev_get(dev);
1447 if (!idev) {
1448 err = -ENODEV;
1449 goto out;
1450 }
1451 }
1452 rt->dst.output = ip6_pkt_discard_out;
1453 rt->dst.input = ip6_pkt_discard;
1454 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1455 switch (cfg->fc_type) {
1456 case RTN_BLACKHOLE:
1457 rt->dst.error = -EINVAL;
1458 break;
1459 case RTN_PROHIBIT:
1460 rt->dst.error = -EACCES;
1461 break;
1462 case RTN_THROW:
1463 rt->dst.error = -EAGAIN;
1464 break;
1465 default:
1466 rt->dst.error = -ENETUNREACH;
1467 break;
1468 }
1469 goto install_route;
1470 }
1471
1472 if (cfg->fc_flags & RTF_GATEWAY) {
1473 const struct in6_addr *gw_addr;
1474 int gwa_type;
1475
1476 gw_addr = &cfg->fc_gateway;
1477 rt->rt6i_gateway = *gw_addr;
1478 gwa_type = ipv6_addr_type(gw_addr);
1479
1480 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1481 struct rt6_info *grt;
1482
1483 /* IPv6 strictly inhibits using not link-local
1484 addresses as nexthop address.
1485 Otherwise, router will not able to send redirects.
1486 It is very good, but in some (rare!) circumstances
1487 (SIT, PtP, NBMA NOARP links) it is handy to allow
1488 some exceptions. --ANK
1489 */
1490 err = -EINVAL;
1491 if (!(gwa_type & IPV6_ADDR_UNICAST))
1492 goto out;
1493
1494 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1495
1496 err = -EHOSTUNREACH;
1497 if (!grt)
1498 goto out;
1499 if (dev) {
1500 if (dev != grt->dst.dev) {
1501 ip6_rt_put(grt);
1502 goto out;
1503 }
1504 } else {
1505 dev = grt->dst.dev;
1506 idev = grt->rt6i_idev;
1507 dev_hold(dev);
1508 in6_dev_hold(grt->rt6i_idev);
1509 }
1510 if (!(grt->rt6i_flags & RTF_GATEWAY))
1511 err = 0;
1512 ip6_rt_put(grt);
1513
1514 if (err)
1515 goto out;
1516 }
1517 err = -EINVAL;
1518 if (!dev || (dev->flags & IFF_LOOPBACK))
1519 goto out;
1520 }
1521
1522 err = -ENODEV;
1523 if (!dev)
1524 goto out;
1525
1526 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1527 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1528 err = -EINVAL;
1529 goto out;
1530 }
1531 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1532 rt->rt6i_prefsrc.plen = 128;
1533 } else
1534 rt->rt6i_prefsrc.plen = 0;
1535
1536 rt->rt6i_flags = cfg->fc_flags;
1537
1538 install_route:
1539 if (cfg->fc_mx) {
1540 struct nlattr *nla;
1541 int remaining;
1542
1543 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1544 int type = nla_type(nla);
1545
1546 if (type) {
1547 if (type > RTAX_MAX) {
1548 err = -EINVAL;
1549 goto out;
1550 }
1551
1552 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1553 }
1554 }
1555 }
1556
1557 rt->dst.dev = dev;
1558 rt->rt6i_idev = idev;
1559 rt->rt6i_table = table;
1560
1561 cfg->fc_nlinfo.nl_net = dev_net(dev);
1562
1563 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1564
1565 out:
1566 if (dev)
1567 dev_put(dev);
1568 if (idev)
1569 in6_dev_put(idev);
1570 if (rt)
1571 dst_free(&rt->dst);
1572 return err;
1573 }
1574
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)1575 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1576 {
1577 int err;
1578 struct fib6_table *table;
1579 struct net *net = dev_net(rt->dst.dev);
1580
1581 if (rt == net->ipv6.ip6_null_entry) {
1582 err = -ENOENT;
1583 goto out;
1584 }
1585
1586 table = rt->rt6i_table;
1587 write_lock_bh(&table->tb6_lock);
1588 err = fib6_del(rt, info);
1589 write_unlock_bh(&table->tb6_lock);
1590
1591 out:
1592 ip6_rt_put(rt);
1593 return err;
1594 }
1595
ip6_del_rt(struct rt6_info * rt)1596 int ip6_del_rt(struct rt6_info *rt)
1597 {
1598 struct nl_info info = {
1599 .nl_net = dev_net(rt->dst.dev),
1600 };
1601 return __ip6_del_rt(rt, &info);
1602 }
1603
ip6_route_del(struct fib6_config * cfg)1604 static int ip6_route_del(struct fib6_config *cfg)
1605 {
1606 struct fib6_table *table;
1607 struct fib6_node *fn;
1608 struct rt6_info *rt;
1609 int err = -ESRCH;
1610
1611 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1612 if (!table)
1613 return err;
1614
1615 read_lock_bh(&table->tb6_lock);
1616
1617 fn = fib6_locate(&table->tb6_root,
1618 &cfg->fc_dst, cfg->fc_dst_len,
1619 &cfg->fc_src, cfg->fc_src_len);
1620
1621 if (fn) {
1622 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1623 if (cfg->fc_ifindex &&
1624 (!rt->dst.dev ||
1625 rt->dst.dev->ifindex != cfg->fc_ifindex))
1626 continue;
1627 if (cfg->fc_flags & RTF_GATEWAY &&
1628 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1629 continue;
1630 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1631 continue;
1632 dst_hold(&rt->dst);
1633 read_unlock_bh(&table->tb6_lock);
1634
1635 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1636 }
1637 }
1638 read_unlock_bh(&table->tb6_lock);
1639
1640 return err;
1641 }
1642
rt6_do_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)1643 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1644 {
1645 struct net *net = dev_net(skb->dev);
1646 struct netevent_redirect netevent;
1647 struct rt6_info *rt, *nrt = NULL;
1648 struct ndisc_options ndopts;
1649 struct inet6_dev *in6_dev;
1650 struct neighbour *neigh;
1651 struct rd_msg *msg;
1652 int optlen, on_link;
1653 u8 *lladdr;
1654
1655 optlen = skb->tail - skb->transport_header;
1656 optlen -= sizeof(*msg);
1657
1658 if (optlen < 0) {
1659 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1660 return;
1661 }
1662
1663 msg = (struct rd_msg *)icmp6_hdr(skb);
1664
1665 if (ipv6_addr_is_multicast(&msg->dest)) {
1666 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1667 return;
1668 }
1669
1670 on_link = 0;
1671 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1672 on_link = 1;
1673 } else if (ipv6_addr_type(&msg->target) !=
1674 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1675 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1676 return;
1677 }
1678
1679 in6_dev = __in6_dev_get(skb->dev);
1680 if (!in6_dev)
1681 return;
1682 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1683 return;
1684
1685 /* RFC2461 8.1:
1686 * The IP source address of the Redirect MUST be the same as the current
1687 * first-hop router for the specified ICMP Destination Address.
1688 */
1689
1690 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1691 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1692 return;
1693 }
1694
1695 lladdr = NULL;
1696 if (ndopts.nd_opts_tgt_lladdr) {
1697 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1698 skb->dev);
1699 if (!lladdr) {
1700 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1701 return;
1702 }
1703 }
1704
1705 rt = (struct rt6_info *) dst;
1706 if (rt == net->ipv6.ip6_null_entry) {
1707 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1708 return;
1709 }
1710
1711 /* Redirect received -> path was valid.
1712 * Look, redirects are sent only in response to data packets,
1713 * so that this nexthop apparently is reachable. --ANK
1714 */
1715 dst_confirm(&rt->dst);
1716
1717 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1718 if (!neigh)
1719 return;
1720
1721 /*
1722 * We have finally decided to accept it.
1723 */
1724
1725 neigh_update(neigh, lladdr, NUD_STALE,
1726 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1727 NEIGH_UPDATE_F_OVERRIDE|
1728 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1729 NEIGH_UPDATE_F_ISROUTER))
1730 );
1731
1732 nrt = ip6_rt_copy(rt, &msg->dest);
1733 if (!nrt)
1734 goto out;
1735
1736 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1737 if (on_link)
1738 nrt->rt6i_flags &= ~RTF_GATEWAY;
1739
1740 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1741
1742 if (ip6_ins_rt(nrt))
1743 goto out;
1744
1745 netevent.old = &rt->dst;
1746 netevent.new = &nrt->dst;
1747 netevent.daddr = &msg->dest;
1748 netevent.neigh = neigh;
1749 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1750
1751 if (rt->rt6i_flags & RTF_CACHE) {
1752 rt = (struct rt6_info *) dst_clone(&rt->dst);
1753 ip6_del_rt(rt);
1754 }
1755
1756 out:
1757 neigh_release(neigh);
1758 }
1759
1760 /*
1761 * Misc support functions
1762 */
1763
ip6_rt_copy(struct rt6_info * ort,const struct in6_addr * dest)1764 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1765 const struct in6_addr *dest)
1766 {
1767 struct net *net = dev_net(ort->dst.dev);
1768 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1769 ort->rt6i_table);
1770
1771 if (rt) {
1772 rt->dst.input = ort->dst.input;
1773 rt->dst.output = ort->dst.output;
1774 rt->dst.flags |= DST_HOST;
1775
1776 rt->rt6i_dst.addr = *dest;
1777 rt->rt6i_dst.plen = 128;
1778 dst_copy_metrics(&rt->dst, &ort->dst);
1779 rt->dst.error = ort->dst.error;
1780 rt->rt6i_idev = ort->rt6i_idev;
1781 if (rt->rt6i_idev)
1782 in6_dev_hold(rt->rt6i_idev);
1783 rt->dst.lastuse = jiffies;
1784
1785 rt->rt6i_gateway = ort->rt6i_gateway;
1786 rt->rt6i_flags = ort->rt6i_flags;
1787 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1788 (RTF_DEFAULT | RTF_ADDRCONF))
1789 rt6_set_from(rt, ort);
1790 rt->rt6i_metric = 0;
1791
1792 #ifdef CONFIG_IPV6_SUBTREES
1793 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1794 #endif
1795 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1796 rt->rt6i_table = ort->rt6i_table;
1797 }
1798 return rt;
1799 }
1800
1801 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net_device * dev,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr)1802 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
1803 const struct in6_addr *prefix, int prefixlen,
1804 const struct in6_addr *gwaddr)
1805 {
1806 struct fib6_node *fn;
1807 struct rt6_info *rt = NULL;
1808 struct fib6_table *table;
1809
1810 table = fib6_get_table(dev_net(dev),
1811 addrconf_rt_table(dev, RT6_TABLE_INFO));
1812 if (!table)
1813 return NULL;
1814
1815 read_lock_bh(&table->tb6_lock);
1816 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1817 if (!fn)
1818 goto out;
1819
1820 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1821 if (rt->dst.dev->ifindex != dev->ifindex)
1822 continue;
1823 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1824 continue;
1825 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1826 continue;
1827 dst_hold(&rt->dst);
1828 break;
1829 }
1830 out:
1831 read_unlock_bh(&table->tb6_lock);
1832 return rt;
1833 }
1834
rt6_add_route_info(struct net_device * dev,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,unsigned int pref)1835 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
1836 const struct in6_addr *prefix, int prefixlen,
1837 const struct in6_addr *gwaddr, unsigned int pref)
1838 {
1839 struct fib6_config cfg = {
1840 .fc_table = addrconf_rt_table(dev, RT6_TABLE_INFO),
1841 .fc_metric = IP6_RT_PRIO_USER,
1842 .fc_ifindex = dev->ifindex,
1843 .fc_dst_len = prefixlen,
1844 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1845 RTF_UP | RTF_PREF(pref),
1846 .fc_nlinfo.portid = 0,
1847 .fc_nlinfo.nlh = NULL,
1848 .fc_nlinfo.nl_net = dev_net(dev),
1849 };
1850
1851 cfg.fc_dst = *prefix;
1852 cfg.fc_gateway = *gwaddr;
1853
1854 /* We should treat it as a default route if prefix length is 0. */
1855 if (!prefixlen)
1856 cfg.fc_flags |= RTF_DEFAULT;
1857
1858 ip6_route_add(&cfg);
1859
1860 return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
1861 }
1862 #endif
1863
rt6_get_dflt_router(const struct in6_addr * addr,struct net_device * dev)1864 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1865 {
1866 struct rt6_info *rt;
1867 struct fib6_table *table;
1868
1869 table = fib6_get_table(dev_net(dev),
1870 addrconf_rt_table(dev, RT6_TABLE_MAIN));
1871 if (!table)
1872 return NULL;
1873
1874 read_lock_bh(&table->tb6_lock);
1875 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1876 if (dev == rt->dst.dev &&
1877 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1878 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1879 break;
1880 }
1881 if (rt)
1882 dst_hold(&rt->dst);
1883 read_unlock_bh(&table->tb6_lock);
1884 return rt;
1885 }
1886
rt6_add_dflt_router(const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)1887 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1888 struct net_device *dev,
1889 unsigned int pref)
1890 {
1891 struct fib6_config cfg = {
1892 .fc_table = addrconf_rt_table(dev, RT6_TABLE_DFLT),
1893 .fc_metric = IP6_RT_PRIO_USER,
1894 .fc_ifindex = dev->ifindex,
1895 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1896 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1897 .fc_nlinfo.portid = 0,
1898 .fc_nlinfo.nlh = NULL,
1899 .fc_nlinfo.nl_net = dev_net(dev),
1900 };
1901
1902 cfg.fc_gateway = *gwaddr;
1903
1904 ip6_route_add(&cfg);
1905
1906 return rt6_get_dflt_router(gwaddr, dev);
1907 }
1908
1909
rt6_addrconf_purge(struct rt6_info * rt,void * arg)1910 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
1911 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1912 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
1913 return -1;
1914 return 0;
1915 }
1916
rt6_purge_dflt_routers(struct net * net)1917 void rt6_purge_dflt_routers(struct net *net)
1918 {
1919 fib6_clean_all(net, rt6_addrconf_purge, 0, NULL);
1920 }
1921
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)1922 static void rtmsg_to_fib6_config(struct net *net,
1923 struct in6_rtmsg *rtmsg,
1924 struct fib6_config *cfg)
1925 {
1926 memset(cfg, 0, sizeof(*cfg));
1927
1928 cfg->fc_table = RT6_TABLE_MAIN;
1929 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1930 cfg->fc_metric = rtmsg->rtmsg_metric;
1931 cfg->fc_expires = rtmsg->rtmsg_info;
1932 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1933 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1934 cfg->fc_flags = rtmsg->rtmsg_flags;
1935
1936 cfg->fc_nlinfo.nl_net = net;
1937
1938 cfg->fc_dst = rtmsg->rtmsg_dst;
1939 cfg->fc_src = rtmsg->rtmsg_src;
1940 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1941 }
1942
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)1943 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1944 {
1945 struct fib6_config cfg;
1946 struct in6_rtmsg rtmsg;
1947 int err;
1948
1949 switch(cmd) {
1950 case SIOCADDRT: /* Add a route */
1951 case SIOCDELRT: /* Delete a route */
1952 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1953 return -EPERM;
1954 err = copy_from_user(&rtmsg, arg,
1955 sizeof(struct in6_rtmsg));
1956 if (err)
1957 return -EFAULT;
1958
1959 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1960
1961 rtnl_lock();
1962 switch (cmd) {
1963 case SIOCADDRT:
1964 err = ip6_route_add(&cfg);
1965 break;
1966 case SIOCDELRT:
1967 err = ip6_route_del(&cfg);
1968 break;
1969 default:
1970 err = -EINVAL;
1971 }
1972 rtnl_unlock();
1973
1974 return err;
1975 }
1976
1977 return -EINVAL;
1978 }
1979
1980 /*
1981 * Drop the packet on the floor
1982 */
1983
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)1984 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1985 {
1986 int type;
1987 struct dst_entry *dst = skb_dst(skb);
1988 switch (ipstats_mib_noroutes) {
1989 case IPSTATS_MIB_INNOROUTES:
1990 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1991 if (type == IPV6_ADDR_ANY) {
1992 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1993 IPSTATS_MIB_INADDRERRORS);
1994 break;
1995 }
1996 /* FALLTHROUGH */
1997 case IPSTATS_MIB_OUTNOROUTES:
1998 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1999 ipstats_mib_noroutes);
2000 break;
2001 }
2002 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2003 kfree_skb(skb);
2004 return 0;
2005 }
2006
ip6_pkt_discard(struct sk_buff * skb)2007 static int ip6_pkt_discard(struct sk_buff *skb)
2008 {
2009 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2010 }
2011
ip6_pkt_discard_out(struct sk_buff * skb)2012 static int ip6_pkt_discard_out(struct sk_buff *skb)
2013 {
2014 skb->dev = skb_dst(skb)->dev;
2015 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2016 }
2017
2018 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2019
ip6_pkt_prohibit(struct sk_buff * skb)2020 static int ip6_pkt_prohibit(struct sk_buff *skb)
2021 {
2022 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2023 }
2024
ip6_pkt_prohibit_out(struct sk_buff * skb)2025 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2026 {
2027 skb->dev = skb_dst(skb)->dev;
2028 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2029 }
2030
2031 #endif
2032
2033 /*
2034 * Allocate a dst for local (unicast / anycast) address.
2035 */
2036
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,bool anycast)2037 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2038 const struct in6_addr *addr,
2039 bool anycast)
2040 {
2041 struct net *net = dev_net(idev->dev);
2042 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2043
2044 if (!rt) {
2045 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2046 return ERR_PTR(-ENOMEM);
2047 }
2048
2049 in6_dev_hold(idev);
2050
2051 rt->dst.flags |= DST_HOST;
2052 rt->dst.input = ip6_input;
2053 rt->dst.output = ip6_output;
2054 rt->rt6i_idev = idev;
2055
2056 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2057 if (anycast)
2058 rt->rt6i_flags |= RTF_ANYCAST;
2059 else
2060 rt->rt6i_flags |= RTF_LOCAL;
2061
2062 rt->rt6i_dst.addr = *addr;
2063 rt->rt6i_dst.plen = 128;
2064 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2065
2066 atomic_set(&rt->dst.__refcnt, 1);
2067
2068 return rt;
2069 }
2070
ip6_route_get_saddr(struct net * net,struct rt6_info * rt,const struct in6_addr * daddr,unsigned int prefs,struct in6_addr * saddr)2071 int ip6_route_get_saddr(struct net *net,
2072 struct rt6_info *rt,
2073 const struct in6_addr *daddr,
2074 unsigned int prefs,
2075 struct in6_addr *saddr)
2076 {
2077 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2078 int err = 0;
2079 if (rt->rt6i_prefsrc.plen)
2080 *saddr = rt->rt6i_prefsrc.addr;
2081 else
2082 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2083 daddr, prefs, saddr);
2084 return err;
2085 }
2086
2087 /* remove deleted ip from prefsrc entries */
2088 struct arg_dev_net_ip {
2089 struct net_device *dev;
2090 struct net *net;
2091 struct in6_addr *addr;
2092 };
2093
fib6_remove_prefsrc(struct rt6_info * rt,void * arg)2094 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2095 {
2096 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2097 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2098 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2099
2100 if (((void *)rt->dst.dev == dev || !dev) &&
2101 rt != net->ipv6.ip6_null_entry &&
2102 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2103 /* remove prefsrc entry */
2104 rt->rt6i_prefsrc.plen = 0;
2105 }
2106 return 0;
2107 }
2108
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)2109 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2110 {
2111 struct net *net = dev_net(ifp->idev->dev);
2112 struct arg_dev_net_ip adni = {
2113 .dev = ifp->idev->dev,
2114 .net = net,
2115 .addr = &ifp->addr,
2116 };
2117 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2118 }
2119
2120 struct arg_dev_net {
2121 struct net_device *dev;
2122 struct net *net;
2123 };
2124
fib6_ifdown(struct rt6_info * rt,void * arg)2125 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2126 {
2127 const struct arg_dev_net *adn = arg;
2128 const struct net_device *dev = adn->dev;
2129
2130 if ((rt->dst.dev == dev || !dev) &&
2131 rt != adn->net->ipv6.ip6_null_entry)
2132 return -1;
2133
2134 return 0;
2135 }
2136
rt6_ifdown(struct net * net,struct net_device * dev)2137 void rt6_ifdown(struct net *net, struct net_device *dev)
2138 {
2139 struct arg_dev_net adn = {
2140 .dev = dev,
2141 .net = net,
2142 };
2143
2144 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2145 icmp6_clean_all(fib6_ifdown, &adn);
2146 }
2147
2148 struct rt6_mtu_change_arg {
2149 struct net_device *dev;
2150 unsigned int mtu;
2151 };
2152
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2153 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2154 {
2155 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2156 struct inet6_dev *idev;
2157
2158 /* In IPv6 pmtu discovery is not optional,
2159 so that RTAX_MTU lock cannot disable it.
2160 We still use this lock to block changes
2161 caused by addrconf/ndisc.
2162 */
2163
2164 idev = __in6_dev_get(arg->dev);
2165 if (!idev)
2166 return 0;
2167
2168 /* For administrative MTU increase, there is no way to discover
2169 IPv6 PMTU increase, so PMTU increase should be updated here.
2170 Since RFC 1981 doesn't include administrative MTU increase
2171 update PMTU increase is a MUST. (i.e. jumbo frame)
2172 */
2173 /*
2174 If new MTU is less than route PMTU, this new MTU will be the
2175 lowest MTU in the path, update the route PMTU to reflect PMTU
2176 decreases; if new MTU is greater than route PMTU, and the
2177 old MTU is the lowest MTU in the path, update the route PMTU
2178 to reflect the increase. In this case if the other nodes' MTU
2179 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2180 PMTU discouvery.
2181 */
2182 if (rt->dst.dev == arg->dev &&
2183 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2184 (dst_mtu(&rt->dst) >= arg->mtu ||
2185 (dst_mtu(&rt->dst) < arg->mtu &&
2186 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2187 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2188 }
2189 return 0;
2190 }
2191
rt6_mtu_change(struct net_device * dev,unsigned int mtu)2192 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2193 {
2194 struct rt6_mtu_change_arg arg = {
2195 .dev = dev,
2196 .mtu = mtu,
2197 };
2198
2199 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2200 }
2201
2202 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2203 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2204 [RTA_OIF] = { .type = NLA_U32 },
2205 [RTA_IIF] = { .type = NLA_U32 },
2206 [RTA_PRIORITY] = { .type = NLA_U32 },
2207 [RTA_METRICS] = { .type = NLA_NESTED },
2208 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2209 [RTA_UID] = { .type = NLA_U32 },
2210 };
2211
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg)2212 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2213 struct fib6_config *cfg)
2214 {
2215 struct rtmsg *rtm;
2216 struct nlattr *tb[RTA_MAX+1];
2217 int err;
2218
2219 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2220 if (err < 0)
2221 goto errout;
2222
2223 err = -EINVAL;
2224 rtm = nlmsg_data(nlh);
2225 memset(cfg, 0, sizeof(*cfg));
2226
2227 cfg->fc_table = rtm->rtm_table;
2228 cfg->fc_dst_len = rtm->rtm_dst_len;
2229 cfg->fc_src_len = rtm->rtm_src_len;
2230 cfg->fc_flags = RTF_UP;
2231 cfg->fc_protocol = rtm->rtm_protocol;
2232 cfg->fc_type = rtm->rtm_type;
2233
2234 if (rtm->rtm_type == RTN_UNREACHABLE ||
2235 rtm->rtm_type == RTN_BLACKHOLE ||
2236 rtm->rtm_type == RTN_PROHIBIT ||
2237 rtm->rtm_type == RTN_THROW)
2238 cfg->fc_flags |= RTF_REJECT;
2239
2240 if (rtm->rtm_type == RTN_LOCAL)
2241 cfg->fc_flags |= RTF_LOCAL;
2242
2243 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2244 cfg->fc_nlinfo.nlh = nlh;
2245 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2246
2247 if (tb[RTA_GATEWAY]) {
2248 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2249 cfg->fc_flags |= RTF_GATEWAY;
2250 }
2251
2252 if (tb[RTA_DST]) {
2253 int plen = (rtm->rtm_dst_len + 7) >> 3;
2254
2255 if (nla_len(tb[RTA_DST]) < plen)
2256 goto errout;
2257
2258 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2259 }
2260
2261 if (tb[RTA_SRC]) {
2262 int plen = (rtm->rtm_src_len + 7) >> 3;
2263
2264 if (nla_len(tb[RTA_SRC]) < plen)
2265 goto errout;
2266
2267 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2268 }
2269
2270 if (tb[RTA_PREFSRC])
2271 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2272
2273 if (tb[RTA_OIF])
2274 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2275
2276 if (tb[RTA_PRIORITY])
2277 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2278
2279 if (tb[RTA_METRICS]) {
2280 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2281 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2282 }
2283
2284 if (tb[RTA_TABLE])
2285 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2286
2287 if (tb[RTA_MULTIPATH]) {
2288 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2289 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2290 }
2291
2292 err = 0;
2293 errout:
2294 return err;
2295 }
2296
ip6_route_multipath(struct fib6_config * cfg,int add)2297 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2298 {
2299 struct fib6_config r_cfg;
2300 struct rtnexthop *rtnh;
2301 int remaining;
2302 int attrlen;
2303 int err = 0, last_err = 0;
2304
2305 beginning:
2306 rtnh = (struct rtnexthop *)cfg->fc_mp;
2307 remaining = cfg->fc_mp_len;
2308
2309 /* Parse a Multipath Entry */
2310 while (rtnh_ok(rtnh, remaining)) {
2311 memcpy(&r_cfg, cfg, sizeof(*cfg));
2312 if (rtnh->rtnh_ifindex)
2313 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2314
2315 attrlen = rtnh_attrlen(rtnh);
2316 if (attrlen > 0) {
2317 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2318
2319 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2320 if (nla) {
2321 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2322 r_cfg.fc_flags |= RTF_GATEWAY;
2323 }
2324 }
2325 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2326 if (err) {
2327 last_err = err;
2328 /* If we are trying to remove a route, do not stop the
2329 * loop when ip6_route_del() fails (because next hop is
2330 * already gone), we should try to remove all next hops.
2331 */
2332 if (add) {
2333 /* If add fails, we should try to delete all
2334 * next hops that have been already added.
2335 */
2336 add = 0;
2337 goto beginning;
2338 }
2339 }
2340 /* Because each route is added like a single route we remove
2341 * this flag after the first nexthop (if there is a collision,
2342 * we have already fail to add the first nexthop:
2343 * fib6_add_rt2node() has reject it).
2344 */
2345 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2346 rtnh = rtnh_next(rtnh, &remaining);
2347 }
2348
2349 return last_err;
2350 }
2351
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh)2352 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2353 {
2354 struct fib6_config cfg;
2355 int err;
2356
2357 err = rtm_to_fib6_config(skb, nlh, &cfg);
2358 if (err < 0)
2359 return err;
2360
2361 if (cfg.fc_mp)
2362 return ip6_route_multipath(&cfg, 0);
2363 else
2364 return ip6_route_del(&cfg);
2365 }
2366
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh)2367 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2368 {
2369 struct fib6_config cfg;
2370 int err;
2371
2372 err = rtm_to_fib6_config(skb, nlh, &cfg);
2373 if (err < 0)
2374 return err;
2375
2376 if (cfg.fc_mp)
2377 return ip6_route_multipath(&cfg, 1);
2378 else
2379 return ip6_route_add(&cfg);
2380 }
2381
rt6_nlmsg_size(void)2382 static inline size_t rt6_nlmsg_size(void)
2383 {
2384 return NLMSG_ALIGN(sizeof(struct rtmsg))
2385 + nla_total_size(16) /* RTA_SRC */
2386 + nla_total_size(16) /* RTA_DST */
2387 + nla_total_size(16) /* RTA_GATEWAY */
2388 + nla_total_size(16) /* RTA_PREFSRC */
2389 + nla_total_size(4) /* RTA_TABLE */
2390 + nla_total_size(4) /* RTA_IIF */
2391 + nla_total_size(4) /* RTA_OIF */
2392 + nla_total_size(4) /* RTA_PRIORITY */
2393 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2394 + nla_total_size(sizeof(struct rta_cacheinfo));
2395 }
2396
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 portid,u32 seq,int prefix,int nowait,unsigned int flags)2397 static int rt6_fill_node(struct net *net,
2398 struct sk_buff *skb, struct rt6_info *rt,
2399 struct in6_addr *dst, struct in6_addr *src,
2400 int iif, int type, u32 portid, u32 seq,
2401 int prefix, int nowait, unsigned int flags)
2402 {
2403 struct rtmsg *rtm;
2404 struct nlmsghdr *nlh;
2405 long expires;
2406 u32 table;
2407
2408 if (prefix) { /* user wants prefix routes only */
2409 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2410 /* success since this is not a prefix route */
2411 return 1;
2412 }
2413 }
2414
2415 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2416 if (!nlh)
2417 return -EMSGSIZE;
2418
2419 rtm = nlmsg_data(nlh);
2420 rtm->rtm_family = AF_INET6;
2421 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2422 rtm->rtm_src_len = rt->rt6i_src.plen;
2423 rtm->rtm_tos = 0;
2424 if (rt->rt6i_table)
2425 table = rt->rt6i_table->tb6_id;
2426 else
2427 table = RT6_TABLE_UNSPEC;
2428 rtm->rtm_table = table;
2429 if (nla_put_u32(skb, RTA_TABLE, table))
2430 goto nla_put_failure;
2431 if (rt->rt6i_flags & RTF_REJECT) {
2432 switch (rt->dst.error) {
2433 case -EINVAL:
2434 rtm->rtm_type = RTN_BLACKHOLE;
2435 break;
2436 case -EACCES:
2437 rtm->rtm_type = RTN_PROHIBIT;
2438 break;
2439 case -EAGAIN:
2440 rtm->rtm_type = RTN_THROW;
2441 break;
2442 default:
2443 rtm->rtm_type = RTN_UNREACHABLE;
2444 break;
2445 }
2446 }
2447 else if (rt->rt6i_flags & RTF_LOCAL)
2448 rtm->rtm_type = RTN_LOCAL;
2449 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2450 rtm->rtm_type = RTN_LOCAL;
2451 else
2452 rtm->rtm_type = RTN_UNICAST;
2453 rtm->rtm_flags = 0;
2454 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2455 rtm->rtm_protocol = rt->rt6i_protocol;
2456 if (rt->rt6i_flags & RTF_DYNAMIC)
2457 rtm->rtm_protocol = RTPROT_REDIRECT;
2458 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2459 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2460 rtm->rtm_protocol = RTPROT_RA;
2461 else
2462 rtm->rtm_protocol = RTPROT_KERNEL;
2463 }
2464
2465 if (rt->rt6i_flags & RTF_CACHE)
2466 rtm->rtm_flags |= RTM_F_CLONED;
2467
2468 if (dst) {
2469 if (nla_put(skb, RTA_DST, 16, dst))
2470 goto nla_put_failure;
2471 rtm->rtm_dst_len = 128;
2472 } else if (rtm->rtm_dst_len)
2473 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2474 goto nla_put_failure;
2475 #ifdef CONFIG_IPV6_SUBTREES
2476 if (src) {
2477 if (nla_put(skb, RTA_SRC, 16, src))
2478 goto nla_put_failure;
2479 rtm->rtm_src_len = 128;
2480 } else if (rtm->rtm_src_len &&
2481 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2482 goto nla_put_failure;
2483 #endif
2484 if (iif) {
2485 #ifdef CONFIG_IPV6_MROUTE
2486 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2487 int err = ip6mr_get_route(net, skb, rtm, nowait);
2488 if (err <= 0) {
2489 if (!nowait) {
2490 if (err == 0)
2491 return 0;
2492 goto nla_put_failure;
2493 } else {
2494 if (err == -EMSGSIZE)
2495 goto nla_put_failure;
2496 }
2497 }
2498 } else
2499 #endif
2500 if (nla_put_u32(skb, RTA_IIF, iif))
2501 goto nla_put_failure;
2502 } else if (dst) {
2503 struct in6_addr saddr_buf;
2504 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2505 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2506 goto nla_put_failure;
2507 }
2508
2509 if (rt->rt6i_prefsrc.plen) {
2510 struct in6_addr saddr_buf;
2511 saddr_buf = rt->rt6i_prefsrc.addr;
2512 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2513 goto nla_put_failure;
2514 }
2515
2516 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2517 goto nla_put_failure;
2518
2519 if (rt->rt6i_flags & RTF_GATEWAY) {
2520 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2521 goto nla_put_failure;
2522 }
2523
2524 if (rt->dst.dev &&
2525 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2526 goto nla_put_failure;
2527 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2528 goto nla_put_failure;
2529
2530 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2531
2532 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2533 goto nla_put_failure;
2534
2535 return nlmsg_end(skb, nlh);
2536
2537 nla_put_failure:
2538 nlmsg_cancel(skb, nlh);
2539 return -EMSGSIZE;
2540 }
2541
rt6_dump_route(struct rt6_info * rt,void * p_arg)2542 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2543 {
2544 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2545 int prefix;
2546
2547 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2548 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2549 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2550 } else
2551 prefix = 0;
2552
2553 return rt6_fill_node(arg->net,
2554 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2555 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2556 prefix, 0, NLM_F_MULTI);
2557 }
2558
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh)2559 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2560 {
2561 struct net *net = sock_net(in_skb->sk);
2562 struct nlattr *tb[RTA_MAX+1];
2563 struct rt6_info *rt;
2564 struct sk_buff *skb;
2565 struct rtmsg *rtm;
2566 struct flowi6 fl6;
2567 int err, iif = 0, oif = 0;
2568
2569 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2570 if (err < 0)
2571 goto errout;
2572
2573 err = -EINVAL;
2574 memset(&fl6, 0, sizeof(fl6));
2575
2576 if (tb[RTA_SRC]) {
2577 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2578 goto errout;
2579
2580 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2581 }
2582
2583 if (tb[RTA_DST]) {
2584 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2585 goto errout;
2586
2587 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2588 }
2589
2590 if (tb[RTA_IIF])
2591 iif = nla_get_u32(tb[RTA_IIF]);
2592
2593 if (tb[RTA_OIF])
2594 oif = nla_get_u32(tb[RTA_OIF]);
2595
2596 if (tb[RTA_MARK])
2597 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2598
2599 if (tb[RTA_UID])
2600 fl6.flowi6_uid = make_kuid(current_user_ns(),
2601 nla_get_u32(tb[RTA_UID]));
2602 else
2603 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
2604
2605 if (iif) {
2606 struct net_device *dev;
2607 int flags = 0;
2608
2609 dev = __dev_get_by_index(net, iif);
2610 if (!dev) {
2611 err = -ENODEV;
2612 goto errout;
2613 }
2614
2615 fl6.flowi6_iif = iif;
2616
2617 if (!ipv6_addr_any(&fl6.saddr))
2618 flags |= RT6_LOOKUP_F_HAS_SADDR;
2619
2620 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2621 flags);
2622 } else {
2623 fl6.flowi6_oif = oif;
2624
2625 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2626 }
2627
2628 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2629 if (!skb) {
2630 ip6_rt_put(rt);
2631 err = -ENOBUFS;
2632 goto errout;
2633 }
2634
2635 /* Reserve room for dummy headers, this skb can pass
2636 through good chunk of routing engine.
2637 */
2638 skb_reset_mac_header(skb);
2639 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2640
2641 skb_dst_set(skb, &rt->dst);
2642
2643 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2644 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2645 nlh->nlmsg_seq, 0, 0, 0);
2646 if (err < 0) {
2647 kfree_skb(skb);
2648 goto errout;
2649 }
2650
2651 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2652 errout:
2653 return err;
2654 }
2655
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info)2656 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2657 {
2658 struct sk_buff *skb;
2659 struct net *net = info->nl_net;
2660 u32 seq;
2661 int err;
2662
2663 err = -ENOBUFS;
2664 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2665
2666 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2667 if (!skb)
2668 goto errout;
2669
2670 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2671 event, info->portid, seq, 0, 0, 0);
2672 if (err < 0) {
2673 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2674 WARN_ON(err == -EMSGSIZE);
2675 kfree_skb(skb);
2676 goto errout;
2677 }
2678 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2679 info->nlh, gfp_any());
2680 return;
2681 errout:
2682 if (err < 0)
2683 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2684 }
2685
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * data)2686 static int ip6_route_dev_notify(struct notifier_block *this,
2687 unsigned long event, void *data)
2688 {
2689 struct net_device *dev = (struct net_device *)data;
2690 struct net *net = dev_net(dev);
2691
2692 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2693 net->ipv6.ip6_null_entry->dst.dev = dev;
2694 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2695 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2696 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2697 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2698 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2699 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2700 #endif
2701 }
2702
2703 return NOTIFY_OK;
2704 }
2705
2706 /*
2707 * /proc
2708 */
2709
2710 #ifdef CONFIG_PROC_FS
2711
2712 struct rt6_proc_arg
2713 {
2714 char *buffer;
2715 int offset;
2716 int length;
2717 int skip;
2718 int len;
2719 };
2720
rt6_info_route(struct rt6_info * rt,void * p_arg)2721 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2722 {
2723 struct seq_file *m = p_arg;
2724
2725 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2726
2727 #ifdef CONFIG_IPV6_SUBTREES
2728 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2729 #else
2730 seq_puts(m, "00000000000000000000000000000000 00 ");
2731 #endif
2732 if (rt->rt6i_flags & RTF_GATEWAY) {
2733 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2734 } else {
2735 seq_puts(m, "00000000000000000000000000000000");
2736 }
2737 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2738 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2739 rt->dst.__use, rt->rt6i_flags,
2740 rt->dst.dev ? rt->dst.dev->name : "");
2741 return 0;
2742 }
2743
ipv6_route_show(struct seq_file * m,void * v)2744 static int ipv6_route_show(struct seq_file *m, void *v)
2745 {
2746 struct net *net = (struct net *)m->private;
2747 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2748 return 0;
2749 }
2750
ipv6_route_open(struct inode * inode,struct file * file)2751 static int ipv6_route_open(struct inode *inode, struct file *file)
2752 {
2753 return single_open_net(inode, file, ipv6_route_show);
2754 }
2755
2756 static const struct file_operations ipv6_route_proc_fops = {
2757 .owner = THIS_MODULE,
2758 .open = ipv6_route_open,
2759 .read = seq_read,
2760 .llseek = seq_lseek,
2761 .release = single_release_net,
2762 };
2763
rt6_stats_seq_show(struct seq_file * seq,void * v)2764 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2765 {
2766 struct net *net = (struct net *)seq->private;
2767 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2768 net->ipv6.rt6_stats->fib_nodes,
2769 net->ipv6.rt6_stats->fib_route_nodes,
2770 net->ipv6.rt6_stats->fib_rt_alloc,
2771 net->ipv6.rt6_stats->fib_rt_entries,
2772 net->ipv6.rt6_stats->fib_rt_cache,
2773 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2774 net->ipv6.rt6_stats->fib_discarded_routes);
2775
2776 return 0;
2777 }
2778
rt6_stats_seq_open(struct inode * inode,struct file * file)2779 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2780 {
2781 return single_open_net(inode, file, rt6_stats_seq_show);
2782 }
2783
2784 static const struct file_operations rt6_stats_seq_fops = {
2785 .owner = THIS_MODULE,
2786 .open = rt6_stats_seq_open,
2787 .read = seq_read,
2788 .llseek = seq_lseek,
2789 .release = single_release_net,
2790 };
2791 #endif /* CONFIG_PROC_FS */
2792
2793 #ifdef CONFIG_SYSCTL
2794
2795 static
ipv6_sysctl_rtcache_flush(ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2796 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2797 void __user *buffer, size_t *lenp, loff_t *ppos)
2798 {
2799 struct net *net;
2800 int delay;
2801 if (!write)
2802 return -EINVAL;
2803
2804 net = (struct net *)ctl->extra1;
2805 delay = net->ipv6.sysctl.flush_delay;
2806 proc_dointvec(ctl, write, buffer, lenp, ppos);
2807 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2808 return 0;
2809 }
2810
2811 ctl_table ipv6_route_table_template[] = {
2812 {
2813 .procname = "flush",
2814 .data = &init_net.ipv6.sysctl.flush_delay,
2815 .maxlen = sizeof(int),
2816 .mode = 0200,
2817 .proc_handler = ipv6_sysctl_rtcache_flush
2818 },
2819 {
2820 .procname = "gc_thresh",
2821 .data = &ip6_dst_ops_template.gc_thresh,
2822 .maxlen = sizeof(int),
2823 .mode = 0644,
2824 .proc_handler = proc_dointvec,
2825 },
2826 {
2827 .procname = "max_size",
2828 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2829 .maxlen = sizeof(int),
2830 .mode = 0644,
2831 .proc_handler = proc_dointvec,
2832 },
2833 {
2834 .procname = "gc_min_interval",
2835 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2836 .maxlen = sizeof(int),
2837 .mode = 0644,
2838 .proc_handler = proc_dointvec_jiffies,
2839 },
2840 {
2841 .procname = "gc_timeout",
2842 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2843 .maxlen = sizeof(int),
2844 .mode = 0644,
2845 .proc_handler = proc_dointvec_jiffies,
2846 },
2847 {
2848 .procname = "gc_interval",
2849 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2850 .maxlen = sizeof(int),
2851 .mode = 0644,
2852 .proc_handler = proc_dointvec_jiffies,
2853 },
2854 {
2855 .procname = "gc_elasticity",
2856 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2857 .maxlen = sizeof(int),
2858 .mode = 0644,
2859 .proc_handler = proc_dointvec,
2860 },
2861 {
2862 .procname = "mtu_expires",
2863 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2864 .maxlen = sizeof(int),
2865 .mode = 0644,
2866 .proc_handler = proc_dointvec_jiffies,
2867 },
2868 {
2869 .procname = "min_adv_mss",
2870 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2871 .maxlen = sizeof(int),
2872 .mode = 0644,
2873 .proc_handler = proc_dointvec,
2874 },
2875 {
2876 .procname = "gc_min_interval_ms",
2877 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2878 .maxlen = sizeof(int),
2879 .mode = 0644,
2880 .proc_handler = proc_dointvec_ms_jiffies,
2881 },
2882 { }
2883 };
2884
ipv6_route_sysctl_init(struct net * net)2885 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2886 {
2887 struct ctl_table *table;
2888
2889 table = kmemdup(ipv6_route_table_template,
2890 sizeof(ipv6_route_table_template),
2891 GFP_KERNEL);
2892
2893 if (table) {
2894 table[0].data = &net->ipv6.sysctl.flush_delay;
2895 table[0].extra1 = net;
2896 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2897 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2898 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2899 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2900 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2901 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2902 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2903 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2904 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2905
2906 /* Don't export sysctls to unprivileged users */
2907 if (net->user_ns != &init_user_ns)
2908 table[0].procname = NULL;
2909 }
2910
2911 return table;
2912 }
2913 #endif
2914
ip6_route_net_init(struct net * net)2915 static int __net_init ip6_route_net_init(struct net *net)
2916 {
2917 int ret = -ENOMEM;
2918
2919 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2920 sizeof(net->ipv6.ip6_dst_ops));
2921
2922 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2923 goto out_ip6_dst_ops;
2924
2925 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2926 sizeof(*net->ipv6.ip6_null_entry),
2927 GFP_KERNEL);
2928 if (!net->ipv6.ip6_null_entry)
2929 goto out_ip6_dst_entries;
2930 net->ipv6.ip6_null_entry->dst.path =
2931 (struct dst_entry *)net->ipv6.ip6_null_entry;
2932 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2933 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2934 ip6_template_metrics, true);
2935
2936 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2937 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2938 sizeof(*net->ipv6.ip6_prohibit_entry),
2939 GFP_KERNEL);
2940 if (!net->ipv6.ip6_prohibit_entry)
2941 goto out_ip6_null_entry;
2942 net->ipv6.ip6_prohibit_entry->dst.path =
2943 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2944 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2945 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2946 ip6_template_metrics, true);
2947
2948 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2949 sizeof(*net->ipv6.ip6_blk_hole_entry),
2950 GFP_KERNEL);
2951 if (!net->ipv6.ip6_blk_hole_entry)
2952 goto out_ip6_prohibit_entry;
2953 net->ipv6.ip6_blk_hole_entry->dst.path =
2954 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2955 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2956 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2957 ip6_template_metrics, true);
2958 #endif
2959
2960 net->ipv6.sysctl.flush_delay = 0;
2961 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2962 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2963 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2964 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2965 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2966 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2967 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2968
2969 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2970
2971 ret = 0;
2972 out:
2973 return ret;
2974
2975 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2976 out_ip6_prohibit_entry:
2977 kfree(net->ipv6.ip6_prohibit_entry);
2978 out_ip6_null_entry:
2979 kfree(net->ipv6.ip6_null_entry);
2980 #endif
2981 out_ip6_dst_entries:
2982 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2983 out_ip6_dst_ops:
2984 goto out;
2985 }
2986
ip6_route_net_exit(struct net * net)2987 static void __net_exit ip6_route_net_exit(struct net *net)
2988 {
2989 kfree(net->ipv6.ip6_null_entry);
2990 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2991 kfree(net->ipv6.ip6_prohibit_entry);
2992 kfree(net->ipv6.ip6_blk_hole_entry);
2993 #endif
2994 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2995 }
2996
ip6_route_net_init_late(struct net * net)2997 static int __net_init ip6_route_net_init_late(struct net *net)
2998 {
2999 #ifdef CONFIG_PROC_FS
3000 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3001 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3002 #endif
3003 return 0;
3004 }
3005
ip6_route_net_exit_late(struct net * net)3006 static void __net_exit ip6_route_net_exit_late(struct net *net)
3007 {
3008 #ifdef CONFIG_PROC_FS
3009 remove_proc_entry("ipv6_route", net->proc_net);
3010 remove_proc_entry("rt6_stats", net->proc_net);
3011 #endif
3012 }
3013
3014 static struct pernet_operations ip6_route_net_ops = {
3015 .init = ip6_route_net_init,
3016 .exit = ip6_route_net_exit,
3017 };
3018
ipv6_inetpeer_init(struct net * net)3019 static int __net_init ipv6_inetpeer_init(struct net *net)
3020 {
3021 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3022
3023 if (!bp)
3024 return -ENOMEM;
3025 inet_peer_base_init(bp);
3026 net->ipv6.peers = bp;
3027 return 0;
3028 }
3029
ipv6_inetpeer_exit(struct net * net)3030 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3031 {
3032 struct inet_peer_base *bp = net->ipv6.peers;
3033
3034 net->ipv6.peers = NULL;
3035 inetpeer_invalidate_tree(bp);
3036 kfree(bp);
3037 }
3038
3039 static struct pernet_operations ipv6_inetpeer_ops = {
3040 .init = ipv6_inetpeer_init,
3041 .exit = ipv6_inetpeer_exit,
3042 };
3043
3044 static struct pernet_operations ip6_route_net_late_ops = {
3045 .init = ip6_route_net_init_late,
3046 .exit = ip6_route_net_exit_late,
3047 };
3048
3049 static struct notifier_block ip6_route_dev_notifier = {
3050 .notifier_call = ip6_route_dev_notify,
3051 .priority = 0,
3052 };
3053
ip6_route_init(void)3054 int __init ip6_route_init(void)
3055 {
3056 int ret;
3057
3058 ret = -ENOMEM;
3059 ip6_dst_ops_template.kmem_cachep =
3060 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3061 SLAB_HWCACHE_ALIGN, NULL);
3062 if (!ip6_dst_ops_template.kmem_cachep)
3063 goto out;
3064
3065 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3066 if (ret)
3067 goto out_kmem_cache;
3068
3069 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3070 if (ret)
3071 goto out_dst_entries;
3072
3073 ret = register_pernet_subsys(&ip6_route_net_ops);
3074 if (ret)
3075 goto out_register_inetpeer;
3076
3077 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3078
3079 /* Registering of the loopback is done before this portion of code,
3080 * the loopback reference in rt6_info will not be taken, do it
3081 * manually for init_net */
3082 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3083 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3084 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3085 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3086 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3087 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3088 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3089 #endif
3090 ret = fib6_init();
3091 if (ret)
3092 goto out_register_subsys;
3093
3094 ret = xfrm6_init();
3095 if (ret)
3096 goto out_fib6_init;
3097
3098 ret = fib6_rules_init();
3099 if (ret)
3100 goto xfrm6_init;
3101
3102 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3103 if (ret)
3104 goto fib6_rules_init;
3105
3106 ret = -ENOBUFS;
3107 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3108 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3109 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3110 goto out_register_late_subsys;
3111
3112 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3113 if (ret)
3114 goto out_register_late_subsys;
3115
3116 out:
3117 return ret;
3118
3119 out_register_late_subsys:
3120 unregister_pernet_subsys(&ip6_route_net_late_ops);
3121 fib6_rules_init:
3122 fib6_rules_cleanup();
3123 xfrm6_init:
3124 xfrm6_fini();
3125 out_fib6_init:
3126 fib6_gc_cleanup();
3127 out_register_subsys:
3128 unregister_pernet_subsys(&ip6_route_net_ops);
3129 out_register_inetpeer:
3130 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3131 out_dst_entries:
3132 dst_entries_destroy(&ip6_dst_blackhole_ops);
3133 out_kmem_cache:
3134 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3135 goto out;
3136 }
3137
ip6_route_cleanup(void)3138 void ip6_route_cleanup(void)
3139 {
3140 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3141 unregister_pernet_subsys(&ip6_route_net_late_ops);
3142 fib6_rules_cleanup();
3143 xfrm6_fini();
3144 fib6_gc_cleanup();
3145 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3146 unregister_pernet_subsys(&ip6_route_net_ops);
3147 dst_entries_destroy(&ip6_dst_blackhole_ops);
3148 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3149 }
3150