1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69 RT6_NUD_FAIL_HARD = -3,
70 RT6_NUD_FAIL_PROBE = -2,
71 RT6_NUD_FAIL_DO_RR = -1,
72 RT6_NUD_SUCCEED = 1
73 };
74
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void ip6_dst_destroy(struct dst_entry *);
82 static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
84 static int ip6_dst_gc(struct dst_ops *ops);
85
86 static int ip6_pkt_discard(struct sk_buff *skb);
87 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int ip6_pkt_prohibit(struct sk_buff *skb);
89 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void ip6_link_failure(struct sk_buff *skb);
91 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
99 const struct in6_addr *prefix, int prefixlen,
100 const struct in6_addr *gwaddr, unsigned int pref);
101 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
102 const struct in6_addr *prefix, int prefixlen,
103 const struct in6_addr *gwaddr);
104 #endif
105
rt6_bind_peer(struct rt6_info * rt,int create)106 static void rt6_bind_peer(struct rt6_info *rt, int create)
107 {
108 struct inet_peer_base *base;
109 struct inet_peer *peer;
110
111 base = inetpeer_base_ptr(rt->_rt6i_peer);
112 if (!base)
113 return;
114
115 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
116 if (peer) {
117 if (!rt6_set_peer(rt, peer))
118 inet_putpeer(peer);
119 }
120 }
121
__rt6_get_peer(struct rt6_info * rt,int create)122 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
123 {
124 if (rt6_has_peer(rt))
125 return rt6_peer_ptr(rt);
126
127 rt6_bind_peer(rt, create);
128 return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
129 }
130
rt6_get_peer_create(struct rt6_info * rt)131 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
132 {
133 return __rt6_get_peer(rt, 1);
134 }
135
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)136 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
137 {
138 struct rt6_info *rt = (struct rt6_info *) dst;
139 struct inet_peer *peer;
140 u32 *p = NULL;
141
142 if (!(rt->dst.flags & DST_HOST))
143 return dst_cow_metrics_generic(dst, old);
144
145 peer = rt6_get_peer_create(rt);
146 if (peer) {
147 u32 *old_p = __DST_METRICS_PTR(old);
148 unsigned long prev, new;
149
150 p = peer->metrics;
151 if (inet_metrics_new(peer) ||
152 (old & DST_METRICS_FORCE_OVERWRITE))
153 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
154
155 new = (unsigned long) p;
156 prev = cmpxchg(&dst->_metrics, old, new);
157
158 if (prev != old) {
159 p = __DST_METRICS_PTR(prev);
160 if (prev & DST_METRICS_READ_ONLY)
161 p = NULL;
162 }
163 }
164 return p;
165 }
166
choose_neigh_daddr(struct rt6_info * rt,struct sk_buff * skb,const void * daddr)167 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
168 struct sk_buff *skb,
169 const void *daddr)
170 {
171 struct in6_addr *p = &rt->rt6i_gateway;
172
173 if (!ipv6_addr_any(p))
174 return (const void *) p;
175 else if (skb)
176 return &ipv6_hdr(skb)->daddr;
177 return daddr;
178 }
179
ip6_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)180 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
181 struct sk_buff *skb,
182 const void *daddr)
183 {
184 struct rt6_info *rt = (struct rt6_info *) dst;
185 struct neighbour *n;
186
187 daddr = choose_neigh_daddr(rt, skb, daddr);
188 n = __ipv6_neigh_lookup(dst->dev, daddr);
189 if (n)
190 return n;
191 return neigh_create(&nd_tbl, daddr, dst->dev);
192 }
193
194 static struct dst_ops ip6_dst_ops_template = {
195 .family = AF_INET6,
196 .protocol = cpu_to_be16(ETH_P_IPV6),
197 .gc = ip6_dst_gc,
198 .gc_thresh = 1024,
199 .check = ip6_dst_check,
200 .default_advmss = ip6_default_advmss,
201 .mtu = ip6_mtu,
202 .cow_metrics = ipv6_cow_metrics,
203 .destroy = ip6_dst_destroy,
204 .ifdown = ip6_dst_ifdown,
205 .negative_advice = ip6_negative_advice,
206 .link_failure = ip6_link_failure,
207 .update_pmtu = ip6_rt_update_pmtu,
208 .redirect = rt6_do_redirect,
209 .local_out = __ip6_local_out,
210 .neigh_lookup = ip6_neigh_lookup,
211 };
212
ip6_blackhole_mtu(const struct dst_entry * dst)213 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
214 {
215 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
216
217 return mtu ? : dst->dev->mtu;
218 }
219
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)220 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
221 struct sk_buff *skb, u32 mtu)
222 {
223 }
224
ip6_rt_blackhole_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)225 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
226 struct sk_buff *skb)
227 {
228 }
229
ip6_rt_blackhole_cow_metrics(struct dst_entry * dst,unsigned long old)230 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
231 unsigned long old)
232 {
233 return NULL;
234 }
235
236 static struct dst_ops ip6_dst_blackhole_ops = {
237 .family = AF_INET6,
238 .protocol = cpu_to_be16(ETH_P_IPV6),
239 .destroy = ip6_dst_destroy,
240 .check = ip6_dst_check,
241 .mtu = ip6_blackhole_mtu,
242 .default_advmss = ip6_default_advmss,
243 .update_pmtu = ip6_rt_blackhole_update_pmtu,
244 .redirect = ip6_rt_blackhole_redirect,
245 .cow_metrics = ip6_rt_blackhole_cow_metrics,
246 .neigh_lookup = ip6_neigh_lookup,
247 };
248
249 static const u32 ip6_template_metrics[RTAX_MAX] = {
250 [RTAX_HOPLIMIT - 1] = 0,
251 };
252
253 static const struct rt6_info ip6_null_entry_template = {
254 .dst = {
255 .__refcnt = ATOMIC_INIT(1),
256 .__use = 1,
257 .obsolete = DST_OBSOLETE_FORCE_CHK,
258 .error = -ENETUNREACH,
259 .input = ip6_pkt_discard,
260 .output = ip6_pkt_discard_out,
261 },
262 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
263 .rt6i_protocol = RTPROT_KERNEL,
264 .rt6i_metric = ~(u32) 0,
265 .rt6i_ref = ATOMIC_INIT(1),
266 };
267
268 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
269
270 static const struct rt6_info ip6_prohibit_entry_template = {
271 .dst = {
272 .__refcnt = ATOMIC_INIT(1),
273 .__use = 1,
274 .obsolete = DST_OBSOLETE_FORCE_CHK,
275 .error = -EACCES,
276 .input = ip6_pkt_prohibit,
277 .output = ip6_pkt_prohibit_out,
278 },
279 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
280 .rt6i_protocol = RTPROT_KERNEL,
281 .rt6i_metric = ~(u32) 0,
282 .rt6i_ref = ATOMIC_INIT(1),
283 };
284
285 static const struct rt6_info ip6_blk_hole_entry_template = {
286 .dst = {
287 .__refcnt = ATOMIC_INIT(1),
288 .__use = 1,
289 .obsolete = DST_OBSOLETE_FORCE_CHK,
290 .error = -EINVAL,
291 .input = dst_discard,
292 .output = dst_discard_sk,
293 },
294 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
295 .rt6i_protocol = RTPROT_KERNEL,
296 .rt6i_metric = ~(u32) 0,
297 .rt6i_ref = ATOMIC_INIT(1),
298 };
299
300 #endif
301
302 /* allocate dst with ip6_dst_ops */
ip6_dst_alloc(struct net * net,struct net_device * dev,int flags,struct fib6_table * table)303 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
304 struct net_device *dev,
305 int flags,
306 struct fib6_table *table)
307 {
308 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
309 0, DST_OBSOLETE_FORCE_CHK, flags);
310
311 if (rt) {
312 struct dst_entry *dst = &rt->dst;
313
314 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
315 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
316 INIT_LIST_HEAD(&rt->rt6i_siblings);
317 }
318 return rt;
319 }
320
ip6_dst_destroy(struct dst_entry * dst)321 static void ip6_dst_destroy(struct dst_entry *dst)
322 {
323 struct rt6_info *rt = (struct rt6_info *)dst;
324 struct inet6_dev *idev = rt->rt6i_idev;
325 struct dst_entry *from = dst->from;
326
327 if (!(rt->dst.flags & DST_HOST))
328 dst_destroy_metrics_generic(dst);
329
330 if (idev) {
331 rt->rt6i_idev = NULL;
332 in6_dev_put(idev);
333 }
334
335 dst->from = NULL;
336 dst_release(from);
337
338 if (rt6_has_peer(rt)) {
339 struct inet_peer *peer = rt6_peer_ptr(rt);
340 inet_putpeer(peer);
341 }
342 }
343
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)344 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
345 int how)
346 {
347 struct rt6_info *rt = (struct rt6_info *)dst;
348 struct inet6_dev *idev = rt->rt6i_idev;
349 struct net_device *loopback_dev =
350 dev_net(dev)->loopback_dev;
351
352 if (dev != loopback_dev) {
353 if (idev && idev->dev == dev) {
354 struct inet6_dev *loopback_idev =
355 in6_dev_get(loopback_dev);
356 if (loopback_idev) {
357 rt->rt6i_idev = loopback_idev;
358 in6_dev_put(idev);
359 }
360 }
361 }
362 }
363
rt6_check_expired(const struct rt6_info * rt)364 static bool rt6_check_expired(const struct rt6_info *rt)
365 {
366 if (rt->rt6i_flags & RTF_EXPIRES) {
367 if (time_after(jiffies, rt->dst.expires))
368 return true;
369 } else if (rt->dst.from) {
370 return rt6_check_expired((struct rt6_info *) rt->dst.from);
371 }
372 return false;
373 }
374
375 /* Multipath route selection:
376 * Hash based function using packet header and flowlabel.
377 * Adapted from fib_info_hashfn()
378 */
rt6_info_hash_nhsfn(unsigned int candidate_count,const struct flowi6 * fl6)379 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
380 const struct flowi6 *fl6)
381 {
382 unsigned int val = fl6->flowi6_proto;
383
384 val ^= ipv6_addr_hash(&fl6->daddr);
385 val ^= ipv6_addr_hash(&fl6->saddr);
386
387 /* Work only if this not encapsulated */
388 switch (fl6->flowi6_proto) {
389 case IPPROTO_UDP:
390 case IPPROTO_TCP:
391 case IPPROTO_SCTP:
392 val ^= (__force u16)fl6->fl6_sport;
393 val ^= (__force u16)fl6->fl6_dport;
394 break;
395
396 case IPPROTO_ICMPV6:
397 val ^= (__force u16)fl6->fl6_icmp_type;
398 val ^= (__force u16)fl6->fl6_icmp_code;
399 break;
400 }
401 /* RFC6438 recommands to use flowlabel */
402 val ^= (__force u32)fl6->flowlabel;
403
404 /* Perhaps, we need to tune, this function? */
405 val = val ^ (val >> 7) ^ (val >> 12);
406 return val % candidate_count;
407 }
408
rt6_multipath_select(struct rt6_info * match,struct flowi6 * fl6,int oif,int strict)409 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
410 struct flowi6 *fl6, int oif,
411 int strict)
412 {
413 struct rt6_info *sibling, *next_sibling;
414 int route_choosen;
415
416 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
417 /* Don't change the route, if route_choosen == 0
418 * (siblings does not include ourself)
419 */
420 if (route_choosen)
421 list_for_each_entry_safe(sibling, next_sibling,
422 &match->rt6i_siblings, rt6i_siblings) {
423 route_choosen--;
424 if (route_choosen == 0) {
425 if (rt6_score_route(sibling, oif, strict) < 0)
426 break;
427 match = sibling;
428 break;
429 }
430 }
431 return match;
432 }
433
434 /*
435 * Route lookup. Any table->tb6_lock is implied.
436 */
437
rt6_device_match(struct net * net,struct rt6_info * rt,const struct in6_addr * saddr,int oif,int flags)438 static inline struct rt6_info *rt6_device_match(struct net *net,
439 struct rt6_info *rt,
440 const struct in6_addr *saddr,
441 int oif,
442 int flags)
443 {
444 struct rt6_info *local = NULL;
445 struct rt6_info *sprt;
446
447 if (!oif && ipv6_addr_any(saddr))
448 goto out;
449
450 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
451 struct net_device *dev = sprt->dst.dev;
452
453 if (oif) {
454 if (dev->ifindex == oif)
455 return sprt;
456 if (dev->flags & IFF_LOOPBACK) {
457 if (!sprt->rt6i_idev ||
458 sprt->rt6i_idev->dev->ifindex != oif) {
459 if (flags & RT6_LOOKUP_F_IFACE && oif)
460 continue;
461 if (local && (!oif ||
462 local->rt6i_idev->dev->ifindex == oif))
463 continue;
464 }
465 local = sprt;
466 }
467 } else {
468 if (ipv6_chk_addr(net, saddr, dev,
469 flags & RT6_LOOKUP_F_IFACE))
470 return sprt;
471 }
472 }
473
474 if (oif) {
475 if (local)
476 return local;
477
478 if (flags & RT6_LOOKUP_F_IFACE)
479 return net->ipv6.ip6_null_entry;
480 }
481 out:
482 return rt;
483 }
484
485 #ifdef CONFIG_IPV6_ROUTER_PREF
486 struct __rt6_probe_work {
487 struct work_struct work;
488 struct in6_addr target;
489 struct net_device *dev;
490 };
491
rt6_probe_deferred(struct work_struct * w)492 static void rt6_probe_deferred(struct work_struct *w)
493 {
494 struct in6_addr mcaddr;
495 struct __rt6_probe_work *work =
496 container_of(w, struct __rt6_probe_work, work);
497
498 addrconf_addr_solict_mult(&work->target, &mcaddr);
499 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
500 dev_put(work->dev);
501 kfree(w);
502 }
503
rt6_probe(struct rt6_info * rt)504 static void rt6_probe(struct rt6_info *rt)
505 {
506 struct neighbour *neigh;
507 /*
508 * Okay, this does not seem to be appropriate
509 * for now, however, we need to check if it
510 * is really so; aka Router Reachability Probing.
511 *
512 * Router Reachability Probe MUST be rate-limited
513 * to no more than one per minute.
514 */
515 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
516 return;
517 rcu_read_lock_bh();
518 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
519 if (neigh) {
520 write_lock(&neigh->lock);
521 if (neigh->nud_state & NUD_VALID)
522 goto out;
523 }
524
525 if (!neigh ||
526 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
527 struct __rt6_probe_work *work;
528
529 work = kmalloc(sizeof(*work), GFP_ATOMIC);
530
531 if (neigh && work)
532 __neigh_set_probe_once(neigh);
533
534 if (neigh)
535 write_unlock(&neigh->lock);
536
537 if (work) {
538 INIT_WORK(&work->work, rt6_probe_deferred);
539 work->target = rt->rt6i_gateway;
540 dev_hold(rt->dst.dev);
541 work->dev = rt->dst.dev;
542 schedule_work(&work->work);
543 }
544 } else {
545 out:
546 write_unlock(&neigh->lock);
547 }
548 rcu_read_unlock_bh();
549 }
550 #else
rt6_probe(struct rt6_info * rt)551 static inline void rt6_probe(struct rt6_info *rt)
552 {
553 }
554 #endif
555
556 /*
557 * Default Router Selection (RFC 2461 6.3.6)
558 */
rt6_check_dev(struct rt6_info * rt,int oif)559 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
560 {
561 struct net_device *dev = rt->dst.dev;
562 if (!oif || dev->ifindex == oif)
563 return 2;
564 if ((dev->flags & IFF_LOOPBACK) &&
565 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
566 return 1;
567 return 0;
568 }
569
rt6_check_neigh(struct rt6_info * rt)570 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
571 {
572 struct neighbour *neigh;
573 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
574
575 if (rt->rt6i_flags & RTF_NONEXTHOP ||
576 !(rt->rt6i_flags & RTF_GATEWAY))
577 return RT6_NUD_SUCCEED;
578
579 rcu_read_lock_bh();
580 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
581 if (neigh) {
582 read_lock(&neigh->lock);
583 if (neigh->nud_state & NUD_VALID)
584 ret = RT6_NUD_SUCCEED;
585 #ifdef CONFIG_IPV6_ROUTER_PREF
586 else if (!(neigh->nud_state & NUD_FAILED))
587 ret = RT6_NUD_SUCCEED;
588 else
589 ret = RT6_NUD_FAIL_PROBE;
590 #endif
591 read_unlock(&neigh->lock);
592 } else {
593 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
594 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
595 }
596 rcu_read_unlock_bh();
597
598 return ret;
599 }
600
rt6_score_route(struct rt6_info * rt,int oif,int strict)601 static int rt6_score_route(struct rt6_info *rt, int oif,
602 int strict)
603 {
604 int m;
605
606 m = rt6_check_dev(rt, oif);
607 if (!m && (strict & RT6_LOOKUP_F_IFACE))
608 return RT6_NUD_FAIL_HARD;
609 #ifdef CONFIG_IPV6_ROUTER_PREF
610 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
611 #endif
612 if (strict & RT6_LOOKUP_F_REACHABLE) {
613 int n = rt6_check_neigh(rt);
614 if (n < 0)
615 return n;
616 }
617 return m;
618 }
619
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match,bool * do_rr)620 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
621 int *mpri, struct rt6_info *match,
622 bool *do_rr)
623 {
624 int m;
625 bool match_do_rr = false;
626
627 if (rt6_check_expired(rt))
628 goto out;
629
630 m = rt6_score_route(rt, oif, strict);
631 if (m == RT6_NUD_FAIL_DO_RR) {
632 match_do_rr = true;
633 m = 0; /* lowest valid score */
634 } else if (m == RT6_NUD_FAIL_HARD) {
635 goto out;
636 }
637
638 if (strict & RT6_LOOKUP_F_REACHABLE)
639 rt6_probe(rt);
640
641 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
642 if (m > *mpri) {
643 *do_rr = match_do_rr;
644 *mpri = m;
645 match = rt;
646 }
647 out:
648 return match;
649 }
650
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict,bool * do_rr)651 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
652 struct rt6_info *rr_head,
653 u32 metric, int oif, int strict,
654 bool *do_rr)
655 {
656 struct rt6_info *rt, *match;
657 int mpri = -1;
658
659 match = NULL;
660 for (rt = rr_head; rt && rt->rt6i_metric == metric;
661 rt = rt->dst.rt6_next)
662 match = find_match(rt, oif, strict, &mpri, match, do_rr);
663 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
664 rt = rt->dst.rt6_next)
665 match = find_match(rt, oif, strict, &mpri, match, do_rr);
666
667 return match;
668 }
669
rt6_select(struct fib6_node * fn,int oif,int strict)670 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
671 {
672 struct rt6_info *match, *rt0;
673 struct net *net;
674 bool do_rr = false;
675
676 rt0 = fn->rr_ptr;
677 if (!rt0)
678 fn->rr_ptr = rt0 = fn->leaf;
679
680 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
681 &do_rr);
682
683 if (do_rr) {
684 struct rt6_info *next = rt0->dst.rt6_next;
685
686 /* no entries matched; do round-robin */
687 if (!next || next->rt6i_metric != rt0->rt6i_metric)
688 next = fn->leaf;
689
690 if (next != rt0)
691 fn->rr_ptr = next;
692 }
693
694 net = dev_net(rt0->dst.dev);
695 return match ? match : net->ipv6.ip6_null_entry;
696 }
697
698 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)699 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
700 const struct in6_addr *gwaddr)
701 {
702 struct route_info *rinfo = (struct route_info *) opt;
703 struct in6_addr prefix_buf, *prefix;
704 unsigned int pref;
705 unsigned long lifetime;
706 struct rt6_info *rt;
707
708 if (len < sizeof(struct route_info)) {
709 return -EINVAL;
710 }
711
712 /* Sanity check for prefix_len and length */
713 if (rinfo->length > 3) {
714 return -EINVAL;
715 } else if (rinfo->prefix_len > 128) {
716 return -EINVAL;
717 } else if (rinfo->prefix_len > 64) {
718 if (rinfo->length < 2) {
719 return -EINVAL;
720 }
721 } else if (rinfo->prefix_len > 0) {
722 if (rinfo->length < 1) {
723 return -EINVAL;
724 }
725 }
726
727 pref = rinfo->route_pref;
728 if (pref == ICMPV6_ROUTER_PREF_INVALID)
729 return -EINVAL;
730
731 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
732
733 if (rinfo->length == 3)
734 prefix = (struct in6_addr *)rinfo->prefix;
735 else {
736 /* this function is safe */
737 ipv6_addr_prefix(&prefix_buf,
738 (struct in6_addr *)rinfo->prefix,
739 rinfo->prefix_len);
740 prefix = &prefix_buf;
741 }
742
743 if (rinfo->prefix_len == 0)
744 rt = rt6_get_dflt_router(gwaddr, dev);
745 else
746 rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr);
747
748 if (rt && !lifetime) {
749 ip6_del_rt(rt);
750 rt = NULL;
751 }
752
753 if (!rt && lifetime)
754 rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
755 else if (rt)
756 rt->rt6i_flags = RTF_ROUTEINFO |
757 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
758
759 if (rt) {
760 if (!addrconf_finite_timeout(lifetime))
761 rt6_clean_expires(rt);
762 else
763 rt6_set_expires(rt, jiffies + HZ * lifetime);
764
765 ip6_rt_put(rt);
766 }
767 return 0;
768 }
769 #endif
770
771 #define BACKTRACK(__net, saddr) \
772 do { \
773 if (rt == __net->ipv6.ip6_null_entry) { \
774 struct fib6_node *pn; \
775 while (1) { \
776 if (fn->fn_flags & RTN_TL_ROOT) \
777 goto out; \
778 pn = fn->parent; \
779 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
780 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
781 else \
782 fn = pn; \
783 if (fn->fn_flags & RTN_RTINFO) \
784 goto restart; \
785 } \
786 } \
787 } while (0)
788
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)789 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
790 struct fib6_table *table,
791 struct flowi6 *fl6, int flags)
792 {
793 struct fib6_node *fn;
794 struct rt6_info *rt;
795
796 read_lock_bh(&table->tb6_lock);
797 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
798 restart:
799 rt = fn->leaf;
800 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
801 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
802 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
803 BACKTRACK(net, &fl6->saddr);
804 out:
805 dst_use(&rt->dst, jiffies);
806 read_unlock_bh(&table->tb6_lock);
807 return rt;
808
809 }
810
ip6_route_lookup(struct net * net,struct flowi6 * fl6,int flags)811 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
812 int flags)
813 {
814 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
815 }
816 EXPORT_SYMBOL_GPL(ip6_route_lookup);
817
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)818 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
819 const struct in6_addr *saddr, int oif, int strict)
820 {
821 struct flowi6 fl6 = {
822 .flowi6_oif = oif,
823 .daddr = *daddr,
824 };
825 struct dst_entry *dst;
826 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
827
828 if (saddr) {
829 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
830 flags |= RT6_LOOKUP_F_HAS_SADDR;
831 }
832
833 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
834 if (dst->error == 0)
835 return (struct rt6_info *) dst;
836
837 dst_release(dst);
838
839 return NULL;
840 }
841 EXPORT_SYMBOL(rt6_lookup);
842
843 /* ip6_ins_rt is called with FREE table->tb6_lock.
844 It takes new route entry, the addition fails by any reason the
845 route is freed. In any case, if caller does not hold it, it may
846 be destroyed.
847 */
848
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info,struct nlattr * mx,int mx_len)849 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
850 struct nlattr *mx, int mx_len)
851 {
852 int err;
853 struct fib6_table *table;
854
855 table = rt->rt6i_table;
856 write_lock_bh(&table->tb6_lock);
857 err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
858 write_unlock_bh(&table->tb6_lock);
859
860 return err;
861 }
862
ip6_ins_rt(struct rt6_info * rt)863 int ip6_ins_rt(struct rt6_info *rt)
864 {
865 struct nl_info info = {
866 .nl_net = dev_net(rt->dst.dev),
867 };
868 return __ip6_ins_rt(rt, &info, NULL, 0);
869 }
870
rt6_alloc_cow(struct rt6_info * ort,const struct in6_addr * daddr,const struct in6_addr * saddr)871 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
872 const struct in6_addr *daddr,
873 const struct in6_addr *saddr)
874 {
875 struct rt6_info *rt;
876
877 /*
878 * Clone the route.
879 */
880
881 rt = ip6_rt_copy(ort, daddr);
882
883 if (rt) {
884 if (ort->rt6i_dst.plen != 128 &&
885 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
886 rt->rt6i_flags |= RTF_ANYCAST;
887
888 rt->rt6i_flags |= RTF_CACHE;
889
890 #ifdef CONFIG_IPV6_SUBTREES
891 if (rt->rt6i_src.plen && saddr) {
892 rt->rt6i_src.addr = *saddr;
893 rt->rt6i_src.plen = 128;
894 }
895 #endif
896 }
897
898 return rt;
899 }
900
rt6_alloc_clone(struct rt6_info * ort,const struct in6_addr * daddr)901 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
902 const struct in6_addr *daddr)
903 {
904 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
905
906 if (rt)
907 rt->rt6i_flags |= RTF_CACHE;
908 return rt;
909 }
910
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags)911 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
912 struct flowi6 *fl6, int flags)
913 {
914 struct fib6_node *fn;
915 struct rt6_info *rt, *nrt;
916 int strict = 0;
917 int attempts = 3;
918 int err;
919 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
920
921 strict |= flags & RT6_LOOKUP_F_IFACE;
922
923 relookup:
924 read_lock_bh(&table->tb6_lock);
925
926 restart_2:
927 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
928
929 restart:
930 rt = rt6_select(fn, oif, strict | reachable);
931 if (rt->rt6i_nsiblings)
932 rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
933 BACKTRACK(net, &fl6->saddr);
934 if (rt == net->ipv6.ip6_null_entry ||
935 rt->rt6i_flags & RTF_CACHE)
936 goto out;
937
938 dst_hold(&rt->dst);
939 read_unlock_bh(&table->tb6_lock);
940
941 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
942 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
943 else if (!(rt->dst.flags & DST_HOST))
944 nrt = rt6_alloc_clone(rt, &fl6->daddr);
945 else
946 goto out2;
947
948 ip6_rt_put(rt);
949 rt = nrt ? : net->ipv6.ip6_null_entry;
950
951 dst_hold(&rt->dst);
952 if (nrt) {
953 err = ip6_ins_rt(nrt);
954 if (!err)
955 goto out2;
956 }
957
958 if (--attempts <= 0)
959 goto out2;
960
961 /*
962 * Race condition! In the gap, when table->tb6_lock was
963 * released someone could insert this route. Relookup.
964 */
965 ip6_rt_put(rt);
966 goto relookup;
967
968 out:
969 if (reachable) {
970 reachable = 0;
971 goto restart_2;
972 }
973 dst_hold(&rt->dst);
974 read_unlock_bh(&table->tb6_lock);
975 out2:
976 rt->dst.lastuse = jiffies;
977 rt->dst.__use++;
978
979 return rt;
980 }
981
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)982 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
983 struct flowi6 *fl6, int flags)
984 {
985 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
986 }
987
ip6_route_input_lookup(struct net * net,struct net_device * dev,struct flowi6 * fl6,int flags)988 static struct dst_entry *ip6_route_input_lookup(struct net *net,
989 struct net_device *dev,
990 struct flowi6 *fl6, int flags)
991 {
992 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
993 flags |= RT6_LOOKUP_F_IFACE;
994
995 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
996 }
997
ip6_route_input(struct sk_buff * skb)998 void ip6_route_input(struct sk_buff *skb)
999 {
1000 const struct ipv6hdr *iph = ipv6_hdr(skb);
1001 struct net *net = dev_net(skb->dev);
1002 int flags = RT6_LOOKUP_F_HAS_SADDR;
1003 struct flowi6 fl6 = {
1004 .flowi6_iif = skb->dev->ifindex,
1005 .daddr = iph->daddr,
1006 .saddr = iph->saddr,
1007 .flowlabel = ip6_flowinfo(iph),
1008 .flowi6_mark = skb->mark,
1009 .flowi6_proto = iph->nexthdr,
1010 };
1011
1012 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1013 }
1014
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1015 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1016 struct flowi6 *fl6, int flags)
1017 {
1018 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1019 }
1020
ip6_route_output(struct net * net,const struct sock * sk,struct flowi6 * fl6)1021 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1022 struct flowi6 *fl6)
1023 {
1024 int flags = 0;
1025
1026 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1027
1028 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1029 flags |= RT6_LOOKUP_F_IFACE;
1030
1031 if (!ipv6_addr_any(&fl6->saddr))
1032 flags |= RT6_LOOKUP_F_HAS_SADDR;
1033 else if (sk)
1034 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1035
1036 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1037 }
1038 EXPORT_SYMBOL(ip6_route_output);
1039
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)1040 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1041 {
1042 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1043 struct dst_entry *new = NULL;
1044
1045 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1046 if (rt) {
1047 new = &rt->dst;
1048
1049 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1050 rt6_init_peer(rt, net->ipv6.peers);
1051
1052 new->__use = 1;
1053 new->input = dst_discard;
1054 new->output = dst_discard_sk;
1055
1056 if (dst_metrics_read_only(&ort->dst))
1057 new->_metrics = ort->dst._metrics;
1058 else
1059 dst_copy_metrics(new, &ort->dst);
1060 rt->rt6i_idev = ort->rt6i_idev;
1061 if (rt->rt6i_idev)
1062 in6_dev_hold(rt->rt6i_idev);
1063
1064 rt->rt6i_gateway = ort->rt6i_gateway;
1065 rt->rt6i_flags = ort->rt6i_flags;
1066 rt->rt6i_metric = 0;
1067
1068 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1069 #ifdef CONFIG_IPV6_SUBTREES
1070 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1071 #endif
1072
1073 dst_free(new);
1074 }
1075
1076 dst_release(dst_orig);
1077 return new ? new : ERR_PTR(-ENOMEM);
1078 }
1079
1080 /*
1081 * Destination cache support functions
1082 */
1083
ip6_dst_check(struct dst_entry * dst,u32 cookie)1084 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1085 {
1086 struct rt6_info *rt;
1087
1088 rt = (struct rt6_info *) dst;
1089
1090 /* All IPV6 dsts are created with ->obsolete set to the value
1091 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1092 * into this function always.
1093 */
1094 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1095 return NULL;
1096
1097 if (rt6_check_expired(rt))
1098 return NULL;
1099
1100 return dst;
1101 }
1102
ip6_negative_advice(struct dst_entry * dst)1103 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1104 {
1105 struct rt6_info *rt = (struct rt6_info *) dst;
1106
1107 if (rt) {
1108 if (rt->rt6i_flags & RTF_CACHE) {
1109 if (rt6_check_expired(rt)) {
1110 ip6_del_rt(rt);
1111 dst = NULL;
1112 }
1113 } else {
1114 dst_release(dst);
1115 dst = NULL;
1116 }
1117 }
1118 return dst;
1119 }
1120
ip6_link_failure(struct sk_buff * skb)1121 static void ip6_link_failure(struct sk_buff *skb)
1122 {
1123 struct rt6_info *rt;
1124
1125 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1126
1127 rt = (struct rt6_info *) skb_dst(skb);
1128 if (rt) {
1129 if (rt->rt6i_flags & RTF_CACHE) {
1130 dst_hold(&rt->dst);
1131 if (ip6_del_rt(rt))
1132 dst_free(&rt->dst);
1133 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1134 rt->rt6i_node->fn_sernum = -1;
1135 }
1136 }
1137 }
1138
ip6_rt_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)1139 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1140 struct sk_buff *skb, u32 mtu)
1141 {
1142 struct rt6_info *rt6 = (struct rt6_info *)dst;
1143
1144 dst_confirm(dst);
1145 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1146 struct net *net = dev_net(dst->dev);
1147
1148 rt6->rt6i_flags |= RTF_MODIFIED;
1149 if (mtu < IPV6_MIN_MTU)
1150 mtu = IPV6_MIN_MTU;
1151
1152 dst_metric_set(dst, RTAX_MTU, mtu);
1153 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1154 }
1155 }
1156
ip6_update_pmtu(struct sk_buff * skb,struct net * net,__be32 mtu,int oif,u32 mark,kuid_t uid)1157 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1158 int oif, u32 mark, kuid_t uid)
1159 {
1160 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1161 struct dst_entry *dst;
1162 struct flowi6 fl6;
1163
1164 memset(&fl6, 0, sizeof(fl6));
1165 fl6.flowi6_oif = oif;
1166 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1167 fl6.daddr = iph->daddr;
1168 fl6.saddr = iph->saddr;
1169 fl6.flowlabel = ip6_flowinfo(iph);
1170 fl6.flowi6_uid = uid;
1171
1172 dst = ip6_route_output(net, NULL, &fl6);
1173 if (!dst->error)
1174 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1175 dst_release(dst);
1176 }
1177 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1178
ip6_sk_update_pmtu(struct sk_buff * skb,struct sock * sk,__be32 mtu)1179 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1180 {
1181 ip6_update_pmtu(skb, sock_net(sk), mtu,
1182 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1183 }
1184 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1185
1186 /* Handle redirects */
1187 struct ip6rd_flowi {
1188 struct flowi6 fl6;
1189 struct in6_addr gateway;
1190 };
1191
__ip6_route_redirect(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1192 static struct rt6_info *__ip6_route_redirect(struct net *net,
1193 struct fib6_table *table,
1194 struct flowi6 *fl6,
1195 int flags)
1196 {
1197 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1198 struct rt6_info *rt;
1199 struct fib6_node *fn;
1200
1201 /* Get the "current" route for this destination and
1202 * check if the redirect has come from approriate router.
1203 *
1204 * RFC 4861 specifies that redirects should only be
1205 * accepted if they come from the nexthop to the target.
1206 * Due to the way the routes are chosen, this notion
1207 * is a bit fuzzy and one might need to check all possible
1208 * routes.
1209 */
1210
1211 read_lock_bh(&table->tb6_lock);
1212 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1213 restart:
1214 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1215 if (rt6_check_expired(rt))
1216 continue;
1217 if (rt->dst.error)
1218 break;
1219 if (!(rt->rt6i_flags & RTF_GATEWAY))
1220 continue;
1221 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1222 continue;
1223 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1224 continue;
1225 break;
1226 }
1227
1228 if (!rt)
1229 rt = net->ipv6.ip6_null_entry;
1230 else if (rt->dst.error) {
1231 rt = net->ipv6.ip6_null_entry;
1232 goto out;
1233 }
1234 BACKTRACK(net, &fl6->saddr);
1235 out:
1236 dst_hold(&rt->dst);
1237
1238 read_unlock_bh(&table->tb6_lock);
1239
1240 return rt;
1241 };
1242
ip6_route_redirect(struct net * net,const struct flowi6 * fl6,const struct in6_addr * gateway)1243 static struct dst_entry *ip6_route_redirect(struct net *net,
1244 const struct flowi6 *fl6,
1245 const struct in6_addr *gateway)
1246 {
1247 int flags = RT6_LOOKUP_F_HAS_SADDR;
1248 struct ip6rd_flowi rdfl;
1249
1250 rdfl.fl6 = *fl6;
1251 rdfl.gateway = *gateway;
1252
1253 return fib6_rule_lookup(net, &rdfl.fl6,
1254 flags, __ip6_route_redirect);
1255 }
1256
ip6_redirect(struct sk_buff * skb,struct net * net,int oif,u32 mark,kuid_t uid)1257 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1258 kuid_t uid)
1259 {
1260 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1261 struct dst_entry *dst;
1262 struct flowi6 fl6;
1263
1264 memset(&fl6, 0, sizeof(fl6));
1265 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1266 fl6.flowi6_oif = oif;
1267 fl6.flowi6_mark = mark;
1268 fl6.daddr = iph->daddr;
1269 fl6.saddr = iph->saddr;
1270 fl6.flowlabel = ip6_flowinfo(iph);
1271 fl6.flowi6_uid = uid;
1272
1273 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1274 rt6_do_redirect(dst, NULL, skb);
1275 dst_release(dst);
1276 }
1277 EXPORT_SYMBOL_GPL(ip6_redirect);
1278
ip6_redirect_no_header(struct sk_buff * skb,struct net * net,int oif,u32 mark)1279 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1280 u32 mark)
1281 {
1282 const struct ipv6hdr *iph = ipv6_hdr(skb);
1283 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1284 struct dst_entry *dst;
1285 struct flowi6 fl6;
1286
1287 memset(&fl6, 0, sizeof(fl6));
1288 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1289 fl6.flowi6_oif = oif;
1290 fl6.flowi6_mark = mark;
1291 fl6.daddr = msg->dest;
1292 fl6.saddr = iph->daddr;
1293 fl6.flowi6_uid = sock_net_uid(net, NULL);
1294
1295 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1296 rt6_do_redirect(dst, NULL, skb);
1297 dst_release(dst);
1298 }
1299
ip6_sk_redirect(struct sk_buff * skb,struct sock * sk)1300 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1301 {
1302 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1303 sk->sk_uid);
1304 }
1305 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1306
ip6_default_advmss(const struct dst_entry * dst)1307 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1308 {
1309 struct net_device *dev = dst->dev;
1310 unsigned int mtu = dst_mtu(dst);
1311 struct net *net = dev_net(dev);
1312
1313 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1314
1315 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1316 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1317
1318 /*
1319 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1320 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1321 * IPV6_MAXPLEN is also valid and means: "any MSS,
1322 * rely only on pmtu discovery"
1323 */
1324 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1325 mtu = IPV6_MAXPLEN;
1326 return mtu;
1327 }
1328
ip6_mtu(const struct dst_entry * dst)1329 static unsigned int ip6_mtu(const struct dst_entry *dst)
1330 {
1331 struct inet6_dev *idev;
1332 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1333
1334 if (mtu)
1335 goto out;
1336
1337 mtu = IPV6_MIN_MTU;
1338
1339 rcu_read_lock();
1340 idev = __in6_dev_get(dst->dev);
1341 if (idev)
1342 mtu = idev->cnf.mtu6;
1343 rcu_read_unlock();
1344
1345 out:
1346 return min_t(unsigned int, mtu, IP6_MAX_MTU);
1347 }
1348
1349 static struct dst_entry *icmp6_dst_gc_list;
1350 static DEFINE_SPINLOCK(icmp6_dst_lock);
1351
icmp6_dst_alloc(struct net_device * dev,struct flowi6 * fl6)1352 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1353 struct flowi6 *fl6)
1354 {
1355 struct dst_entry *dst;
1356 struct rt6_info *rt;
1357 struct inet6_dev *idev = in6_dev_get(dev);
1358 struct net *net = dev_net(dev);
1359
1360 if (unlikely(!idev))
1361 return ERR_PTR(-ENODEV);
1362
1363 rt = ip6_dst_alloc(net, dev, 0, NULL);
1364 if (unlikely(!rt)) {
1365 in6_dev_put(idev);
1366 dst = ERR_PTR(-ENOMEM);
1367 goto out;
1368 }
1369
1370 rt->dst.flags |= DST_HOST;
1371 rt->dst.output = ip6_output;
1372 atomic_set(&rt->dst.__refcnt, 1);
1373 rt->rt6i_gateway = fl6->daddr;
1374 rt->rt6i_dst.addr = fl6->daddr;
1375 rt->rt6i_dst.plen = 128;
1376 rt->rt6i_idev = idev;
1377 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1378
1379 spin_lock_bh(&icmp6_dst_lock);
1380 rt->dst.next = icmp6_dst_gc_list;
1381 icmp6_dst_gc_list = &rt->dst;
1382 spin_unlock_bh(&icmp6_dst_lock);
1383
1384 fib6_force_start_gc(net);
1385
1386 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1387
1388 out:
1389 return dst;
1390 }
1391
icmp6_dst_gc(void)1392 int icmp6_dst_gc(void)
1393 {
1394 struct dst_entry *dst, **pprev;
1395 int more = 0;
1396
1397 spin_lock_bh(&icmp6_dst_lock);
1398 pprev = &icmp6_dst_gc_list;
1399
1400 while ((dst = *pprev) != NULL) {
1401 if (!atomic_read(&dst->__refcnt)) {
1402 *pprev = dst->next;
1403 dst_free(dst);
1404 } else {
1405 pprev = &dst->next;
1406 ++more;
1407 }
1408 }
1409
1410 spin_unlock_bh(&icmp6_dst_lock);
1411
1412 return more;
1413 }
1414
icmp6_clean_all(int (* func)(struct rt6_info * rt,void * arg),void * arg)1415 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1416 void *arg)
1417 {
1418 struct dst_entry *dst, **pprev;
1419
1420 spin_lock_bh(&icmp6_dst_lock);
1421 pprev = &icmp6_dst_gc_list;
1422 while ((dst = *pprev) != NULL) {
1423 struct rt6_info *rt = (struct rt6_info *) dst;
1424 if (func(rt, arg)) {
1425 *pprev = dst->next;
1426 dst_free(dst);
1427 } else {
1428 pprev = &dst->next;
1429 }
1430 }
1431 spin_unlock_bh(&icmp6_dst_lock);
1432 }
1433
ip6_dst_gc(struct dst_ops * ops)1434 static int ip6_dst_gc(struct dst_ops *ops)
1435 {
1436 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1437 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1438 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1439 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1440 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1441 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1442 int entries;
1443
1444 entries = dst_entries_get_fast(ops);
1445 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1446 entries <= rt_max_size)
1447 goto out;
1448
1449 net->ipv6.ip6_rt_gc_expire++;
1450 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1451 entries = dst_entries_get_slow(ops);
1452 if (entries < ops->gc_thresh)
1453 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1454 out:
1455 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1456 return entries > rt_max_size;
1457 }
1458
1459 /*
1460 *
1461 */
1462
ip6_route_add(struct fib6_config * cfg)1463 int ip6_route_add(struct fib6_config *cfg)
1464 {
1465 int err;
1466 struct net *net = cfg->fc_nlinfo.nl_net;
1467 struct rt6_info *rt = NULL;
1468 struct net_device *dev = NULL;
1469 struct inet6_dev *idev = NULL;
1470 struct fib6_table *table;
1471 int addr_type;
1472
1473 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1474 return -EINVAL;
1475 #ifndef CONFIG_IPV6_SUBTREES
1476 if (cfg->fc_src_len)
1477 return -EINVAL;
1478 #endif
1479 if (cfg->fc_ifindex) {
1480 err = -ENODEV;
1481 dev = dev_get_by_index(net, cfg->fc_ifindex);
1482 if (!dev)
1483 goto out;
1484 idev = in6_dev_get(dev);
1485 if (!idev)
1486 goto out;
1487 }
1488
1489 if (cfg->fc_metric == 0)
1490 cfg->fc_metric = IP6_RT_PRIO_USER;
1491
1492 err = -ENOBUFS;
1493 if (cfg->fc_nlinfo.nlh &&
1494 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1495 table = fib6_get_table(net, cfg->fc_table);
1496 if (!table) {
1497 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1498 table = fib6_new_table(net, cfg->fc_table);
1499 }
1500 } else {
1501 table = fib6_new_table(net, cfg->fc_table);
1502 }
1503
1504 if (!table)
1505 goto out;
1506
1507 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1508
1509 if (!rt) {
1510 err = -ENOMEM;
1511 goto out;
1512 }
1513
1514 if (cfg->fc_flags & RTF_EXPIRES)
1515 rt6_set_expires(rt, jiffies +
1516 clock_t_to_jiffies(cfg->fc_expires));
1517 else
1518 rt6_clean_expires(rt);
1519
1520 if (cfg->fc_protocol == RTPROT_UNSPEC)
1521 cfg->fc_protocol = RTPROT_BOOT;
1522 rt->rt6i_protocol = cfg->fc_protocol;
1523
1524 addr_type = ipv6_addr_type(&cfg->fc_dst);
1525
1526 if (addr_type & IPV6_ADDR_MULTICAST)
1527 rt->dst.input = ip6_mc_input;
1528 else if (cfg->fc_flags & RTF_LOCAL)
1529 rt->dst.input = ip6_input;
1530 else
1531 rt->dst.input = ip6_forward;
1532
1533 rt->dst.output = ip6_output;
1534
1535 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1536 rt->rt6i_dst.plen = cfg->fc_dst_len;
1537 if (rt->rt6i_dst.plen == 128) {
1538 rt->dst.flags |= DST_HOST;
1539 dst_metrics_set_force_overwrite(&rt->dst);
1540 }
1541
1542 #ifdef CONFIG_IPV6_SUBTREES
1543 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1544 rt->rt6i_src.plen = cfg->fc_src_len;
1545 #endif
1546
1547 rt->rt6i_metric = cfg->fc_metric;
1548
1549 /* We cannot add true routes via loopback here,
1550 they would result in kernel looping; promote them to reject routes
1551 */
1552 if ((cfg->fc_flags & RTF_REJECT) ||
1553 (dev && (dev->flags & IFF_LOOPBACK) &&
1554 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1555 !(cfg->fc_flags & RTF_LOCAL))) {
1556 /* hold loopback dev/idev if we haven't done so. */
1557 if (dev != net->loopback_dev) {
1558 if (dev) {
1559 dev_put(dev);
1560 in6_dev_put(idev);
1561 }
1562 dev = net->loopback_dev;
1563 dev_hold(dev);
1564 idev = in6_dev_get(dev);
1565 if (!idev) {
1566 err = -ENODEV;
1567 goto out;
1568 }
1569 }
1570 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1571 switch (cfg->fc_type) {
1572 case RTN_BLACKHOLE:
1573 rt->dst.error = -EINVAL;
1574 rt->dst.output = dst_discard_sk;
1575 rt->dst.input = dst_discard;
1576 break;
1577 case RTN_PROHIBIT:
1578 rt->dst.error = -EACCES;
1579 rt->dst.output = ip6_pkt_prohibit_out;
1580 rt->dst.input = ip6_pkt_prohibit;
1581 break;
1582 case RTN_THROW:
1583 default:
1584 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1585 : -ENETUNREACH;
1586 rt->dst.output = ip6_pkt_discard_out;
1587 rt->dst.input = ip6_pkt_discard;
1588 break;
1589 }
1590 goto install_route;
1591 }
1592
1593 if (cfg->fc_flags & RTF_GATEWAY) {
1594 const struct in6_addr *gw_addr;
1595 int gwa_type;
1596
1597 gw_addr = &cfg->fc_gateway;
1598 rt->rt6i_gateway = *gw_addr;
1599 gwa_type = ipv6_addr_type(gw_addr);
1600
1601 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1602 struct rt6_info *grt;
1603
1604 /* IPv6 strictly inhibits using not link-local
1605 addresses as nexthop address.
1606 Otherwise, router will not able to send redirects.
1607 It is very good, but in some (rare!) circumstances
1608 (SIT, PtP, NBMA NOARP links) it is handy to allow
1609 some exceptions. --ANK
1610 */
1611 err = -EINVAL;
1612 if (!(gwa_type & IPV6_ADDR_UNICAST))
1613 goto out;
1614
1615 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1616
1617 err = -EHOSTUNREACH;
1618 if (!grt)
1619 goto out;
1620 if (dev) {
1621 if (dev != grt->dst.dev) {
1622 ip6_rt_put(grt);
1623 goto out;
1624 }
1625 } else {
1626 dev = grt->dst.dev;
1627 idev = grt->rt6i_idev;
1628 dev_hold(dev);
1629 in6_dev_hold(grt->rt6i_idev);
1630 }
1631 if (!(grt->rt6i_flags & RTF_GATEWAY))
1632 err = 0;
1633 ip6_rt_put(grt);
1634
1635 if (err)
1636 goto out;
1637 }
1638 err = -EINVAL;
1639 if (!dev || (dev->flags & IFF_LOOPBACK))
1640 goto out;
1641 }
1642
1643 err = -ENODEV;
1644 if (!dev)
1645 goto out;
1646
1647 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1648 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1649 err = -EINVAL;
1650 goto out;
1651 }
1652 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1653 rt->rt6i_prefsrc.plen = 128;
1654 } else
1655 rt->rt6i_prefsrc.plen = 0;
1656
1657 rt->rt6i_flags = cfg->fc_flags;
1658
1659 install_route:
1660 rt->dst.dev = dev;
1661 rt->rt6i_idev = idev;
1662 rt->rt6i_table = table;
1663
1664 cfg->fc_nlinfo.nl_net = dev_net(dev);
1665
1666 return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
1667
1668 out:
1669 if (dev)
1670 dev_put(dev);
1671 if (idev)
1672 in6_dev_put(idev);
1673 if (rt)
1674 dst_free(&rt->dst);
1675 return err;
1676 }
1677
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)1678 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1679 {
1680 int err;
1681 struct fib6_table *table;
1682 struct net *net = dev_net(rt->dst.dev);
1683
1684 if (rt == net->ipv6.ip6_null_entry) {
1685 err = -ENOENT;
1686 goto out;
1687 }
1688
1689 table = rt->rt6i_table;
1690 write_lock_bh(&table->tb6_lock);
1691 err = fib6_del(rt, info);
1692 write_unlock_bh(&table->tb6_lock);
1693
1694 out:
1695 ip6_rt_put(rt);
1696 return err;
1697 }
1698
ip6_del_rt(struct rt6_info * rt)1699 int ip6_del_rt(struct rt6_info *rt)
1700 {
1701 struct nl_info info = {
1702 .nl_net = dev_net(rt->dst.dev),
1703 };
1704 return __ip6_del_rt(rt, &info);
1705 }
1706
ip6_route_del(struct fib6_config * cfg)1707 static int ip6_route_del(struct fib6_config *cfg)
1708 {
1709 struct fib6_table *table;
1710 struct fib6_node *fn;
1711 struct rt6_info *rt;
1712 int err = -ESRCH;
1713
1714 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1715 if (!table)
1716 return err;
1717
1718 read_lock_bh(&table->tb6_lock);
1719
1720 fn = fib6_locate(&table->tb6_root,
1721 &cfg->fc_dst, cfg->fc_dst_len,
1722 &cfg->fc_src, cfg->fc_src_len);
1723
1724 if (fn) {
1725 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1726 if (cfg->fc_ifindex &&
1727 (!rt->dst.dev ||
1728 rt->dst.dev->ifindex != cfg->fc_ifindex))
1729 continue;
1730 if (cfg->fc_flags & RTF_GATEWAY &&
1731 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1732 continue;
1733 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1734 continue;
1735 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
1736 continue;
1737 dst_hold(&rt->dst);
1738 read_unlock_bh(&table->tb6_lock);
1739
1740 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1741 }
1742 }
1743 read_unlock_bh(&table->tb6_lock);
1744
1745 return err;
1746 }
1747
rt6_do_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)1748 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1749 {
1750 struct net *net = dev_net(skb->dev);
1751 struct netevent_redirect netevent;
1752 struct rt6_info *rt, *nrt = NULL;
1753 struct ndisc_options ndopts;
1754 struct inet6_dev *in6_dev;
1755 struct neighbour *neigh;
1756 struct rd_msg *msg;
1757 int optlen, on_link;
1758 u8 *lladdr;
1759
1760 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1761 optlen -= sizeof(*msg);
1762
1763 if (optlen < 0) {
1764 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1765 return;
1766 }
1767
1768 msg = (struct rd_msg *)icmp6_hdr(skb);
1769
1770 if (ipv6_addr_is_multicast(&msg->dest)) {
1771 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1772 return;
1773 }
1774
1775 on_link = 0;
1776 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1777 on_link = 1;
1778 } else if (ipv6_addr_type(&msg->target) !=
1779 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1780 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1781 return;
1782 }
1783
1784 in6_dev = __in6_dev_get(skb->dev);
1785 if (!in6_dev)
1786 return;
1787 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1788 return;
1789
1790 /* RFC2461 8.1:
1791 * The IP source address of the Redirect MUST be the same as the current
1792 * first-hop router for the specified ICMP Destination Address.
1793 */
1794
1795 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1796 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1797 return;
1798 }
1799
1800 lladdr = NULL;
1801 if (ndopts.nd_opts_tgt_lladdr) {
1802 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1803 skb->dev);
1804 if (!lladdr) {
1805 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1806 return;
1807 }
1808 }
1809
1810 rt = (struct rt6_info *) dst;
1811 if (rt == net->ipv6.ip6_null_entry) {
1812 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1813 return;
1814 }
1815
1816 /* Redirect received -> path was valid.
1817 * Look, redirects are sent only in response to data packets,
1818 * so that this nexthop apparently is reachable. --ANK
1819 */
1820 dst_confirm(&rt->dst);
1821
1822 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1823 if (!neigh)
1824 return;
1825
1826 /*
1827 * We have finally decided to accept it.
1828 */
1829
1830 neigh_update(neigh, lladdr, NUD_STALE,
1831 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1832 NEIGH_UPDATE_F_OVERRIDE|
1833 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1834 NEIGH_UPDATE_F_ISROUTER))
1835 );
1836
1837 nrt = ip6_rt_copy(rt, &msg->dest);
1838 if (!nrt)
1839 goto out;
1840
1841 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1842 if (on_link)
1843 nrt->rt6i_flags &= ~RTF_GATEWAY;
1844
1845 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1846
1847 if (ip6_ins_rt(nrt))
1848 goto out;
1849
1850 netevent.old = &rt->dst;
1851 netevent.new = &nrt->dst;
1852 netevent.daddr = &msg->dest;
1853 netevent.neigh = neigh;
1854 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1855
1856 if (rt->rt6i_flags & RTF_CACHE) {
1857 rt = (struct rt6_info *) dst_clone(&rt->dst);
1858 ip6_del_rt(rt);
1859 }
1860
1861 out:
1862 neigh_release(neigh);
1863 }
1864
1865 /*
1866 * Misc support functions
1867 */
1868
ip6_rt_copy(struct rt6_info * ort,const struct in6_addr * dest)1869 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1870 const struct in6_addr *dest)
1871 {
1872 struct net *net = dev_net(ort->dst.dev);
1873 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1874 ort->rt6i_table);
1875
1876 if (rt) {
1877 rt->dst.input = ort->dst.input;
1878 rt->dst.output = ort->dst.output;
1879 rt->dst.flags |= DST_HOST;
1880
1881 rt->rt6i_dst.addr = *dest;
1882 rt->rt6i_dst.plen = 128;
1883 dst_copy_metrics(&rt->dst, &ort->dst);
1884 rt->dst.error = ort->dst.error;
1885 rt->rt6i_idev = ort->rt6i_idev;
1886 if (rt->rt6i_idev)
1887 in6_dev_hold(rt->rt6i_idev);
1888 rt->dst.lastuse = jiffies;
1889
1890 if (ort->rt6i_flags & RTF_GATEWAY)
1891 rt->rt6i_gateway = ort->rt6i_gateway;
1892 else
1893 rt->rt6i_gateway = *dest;
1894 rt->rt6i_flags = ort->rt6i_flags;
1895 rt6_set_from(rt, ort);
1896 rt->rt6i_metric = 0;
1897
1898 #ifdef CONFIG_IPV6_SUBTREES
1899 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1900 #endif
1901 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1902 rt->rt6i_table = ort->rt6i_table;
1903 }
1904 return rt;
1905 }
1906
1907 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net_device * dev,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr)1908 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
1909 const struct in6_addr *prefix, int prefixlen,
1910 const struct in6_addr *gwaddr)
1911 {
1912 struct fib6_node *fn;
1913 struct rt6_info *rt = NULL;
1914 struct fib6_table *table;
1915
1916 table = fib6_get_table(dev_net(dev),
1917 addrconf_rt_table(dev, RT6_TABLE_INFO));
1918 if (!table)
1919 return NULL;
1920
1921 read_lock_bh(&table->tb6_lock);
1922 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1923 if (!fn)
1924 goto out;
1925
1926 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1927 if (rt->dst.dev->ifindex != dev->ifindex)
1928 continue;
1929 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1930 continue;
1931 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1932 continue;
1933 dst_hold(&rt->dst);
1934 break;
1935 }
1936 out:
1937 read_unlock_bh(&table->tb6_lock);
1938 return rt;
1939 }
1940
rt6_add_route_info(struct net_device * dev,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,unsigned int pref)1941 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
1942 const struct in6_addr *prefix, int prefixlen,
1943 const struct in6_addr *gwaddr, unsigned int pref)
1944 {
1945 struct fib6_config cfg = {
1946 .fc_table = addrconf_rt_table(dev, RT6_TABLE_INFO),
1947 .fc_metric = IP6_RT_PRIO_USER,
1948 .fc_ifindex = dev->ifindex,
1949 .fc_dst_len = prefixlen,
1950 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1951 RTF_UP | RTF_PREF(pref),
1952 .fc_nlinfo.portid = 0,
1953 .fc_nlinfo.nlh = NULL,
1954 .fc_nlinfo.nl_net = dev_net(dev),
1955 };
1956
1957 cfg.fc_dst = *prefix;
1958 cfg.fc_gateway = *gwaddr;
1959
1960 /* We should treat it as a default route if prefix length is 0. */
1961 if (!prefixlen)
1962 cfg.fc_flags |= RTF_DEFAULT;
1963
1964 ip6_route_add(&cfg);
1965
1966 return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
1967 }
1968 #endif
1969
rt6_get_dflt_router(const struct in6_addr * addr,struct net_device * dev)1970 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1971 {
1972 struct rt6_info *rt;
1973 struct fib6_table *table;
1974
1975 table = fib6_get_table(dev_net(dev),
1976 addrconf_rt_table(dev, RT6_TABLE_MAIN));
1977 if (!table)
1978 return NULL;
1979
1980 read_lock_bh(&table->tb6_lock);
1981 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1982 if (dev == rt->dst.dev &&
1983 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1984 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1985 break;
1986 }
1987 if (rt)
1988 dst_hold(&rt->dst);
1989 read_unlock_bh(&table->tb6_lock);
1990 return rt;
1991 }
1992
rt6_add_dflt_router(const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)1993 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1994 struct net_device *dev,
1995 unsigned int pref)
1996 {
1997 struct fib6_config cfg = {
1998 .fc_table = addrconf_rt_table(dev, RT6_TABLE_DFLT),
1999 .fc_metric = IP6_RT_PRIO_USER,
2000 .fc_ifindex = dev->ifindex,
2001 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2002 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2003 .fc_nlinfo.portid = 0,
2004 .fc_nlinfo.nlh = NULL,
2005 .fc_nlinfo.nl_net = dev_net(dev),
2006 };
2007
2008 cfg.fc_gateway = *gwaddr;
2009
2010 ip6_route_add(&cfg);
2011
2012 return rt6_get_dflt_router(gwaddr, dev);
2013 }
2014
2015
rt6_addrconf_purge(struct rt6_info * rt,void * arg)2016 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2017 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2018 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2019 return -1;
2020 return 0;
2021 }
2022
rt6_purge_dflt_routers(struct net * net)2023 void rt6_purge_dflt_routers(struct net *net)
2024 {
2025 fib6_clean_all(net, rt6_addrconf_purge, NULL);
2026 }
2027
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)2028 static void rtmsg_to_fib6_config(struct net *net,
2029 struct in6_rtmsg *rtmsg,
2030 struct fib6_config *cfg)
2031 {
2032 memset(cfg, 0, sizeof(*cfg));
2033
2034 cfg->fc_table = RT6_TABLE_MAIN;
2035 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2036 cfg->fc_metric = rtmsg->rtmsg_metric;
2037 cfg->fc_expires = rtmsg->rtmsg_info;
2038 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2039 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2040 cfg->fc_flags = rtmsg->rtmsg_flags;
2041
2042 cfg->fc_nlinfo.nl_net = net;
2043
2044 cfg->fc_dst = rtmsg->rtmsg_dst;
2045 cfg->fc_src = rtmsg->rtmsg_src;
2046 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2047 }
2048
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)2049 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2050 {
2051 struct fib6_config cfg;
2052 struct in6_rtmsg rtmsg;
2053 int err;
2054
2055 switch (cmd) {
2056 case SIOCADDRT: /* Add a route */
2057 case SIOCDELRT: /* Delete a route */
2058 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2059 return -EPERM;
2060 err = copy_from_user(&rtmsg, arg,
2061 sizeof(struct in6_rtmsg));
2062 if (err)
2063 return -EFAULT;
2064
2065 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2066
2067 rtnl_lock();
2068 switch (cmd) {
2069 case SIOCADDRT:
2070 err = ip6_route_add(&cfg);
2071 break;
2072 case SIOCDELRT:
2073 err = ip6_route_del(&cfg);
2074 break;
2075 default:
2076 err = -EINVAL;
2077 }
2078 rtnl_unlock();
2079
2080 return err;
2081 }
2082
2083 return -EINVAL;
2084 }
2085
2086 /*
2087 * Drop the packet on the floor
2088 */
2089
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)2090 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2091 {
2092 int type;
2093 struct dst_entry *dst = skb_dst(skb);
2094 switch (ipstats_mib_noroutes) {
2095 case IPSTATS_MIB_INNOROUTES:
2096 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2097 if (type == IPV6_ADDR_ANY) {
2098 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2099 IPSTATS_MIB_INADDRERRORS);
2100 break;
2101 }
2102 /* FALLTHROUGH */
2103 case IPSTATS_MIB_OUTNOROUTES:
2104 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2105 ipstats_mib_noroutes);
2106 break;
2107 }
2108 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2109 kfree_skb(skb);
2110 return 0;
2111 }
2112
ip6_pkt_discard(struct sk_buff * skb)2113 static int ip6_pkt_discard(struct sk_buff *skb)
2114 {
2115 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2116 }
2117
ip6_pkt_discard_out(struct sock * sk,struct sk_buff * skb)2118 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2119 {
2120 skb->dev = skb_dst(skb)->dev;
2121 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2122 }
2123
ip6_pkt_prohibit(struct sk_buff * skb)2124 static int ip6_pkt_prohibit(struct sk_buff *skb)
2125 {
2126 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2127 }
2128
ip6_pkt_prohibit_out(struct sock * sk,struct sk_buff * skb)2129 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2130 {
2131 skb->dev = skb_dst(skb)->dev;
2132 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2133 }
2134
2135 /*
2136 * Allocate a dst for local (unicast / anycast) address.
2137 */
2138
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,bool anycast)2139 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2140 const struct in6_addr *addr,
2141 bool anycast)
2142 {
2143 struct net *net = dev_net(idev->dev);
2144 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2145 DST_NOCOUNT, NULL);
2146 if (!rt)
2147 return ERR_PTR(-ENOMEM);
2148
2149 in6_dev_hold(idev);
2150
2151 rt->dst.flags |= DST_HOST;
2152 rt->dst.input = ip6_input;
2153 rt->dst.output = ip6_output;
2154 rt->rt6i_idev = idev;
2155
2156 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2157 if (anycast)
2158 rt->rt6i_flags |= RTF_ANYCAST;
2159 else
2160 rt->rt6i_flags |= RTF_LOCAL;
2161
2162 rt->rt6i_gateway = *addr;
2163 rt->rt6i_dst.addr = *addr;
2164 rt->rt6i_dst.plen = 128;
2165 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2166
2167 atomic_set(&rt->dst.__refcnt, 1);
2168
2169 return rt;
2170 }
2171
ip6_route_get_saddr(struct net * net,struct rt6_info * rt,const struct in6_addr * daddr,unsigned int prefs,struct in6_addr * saddr)2172 int ip6_route_get_saddr(struct net *net,
2173 struct rt6_info *rt,
2174 const struct in6_addr *daddr,
2175 unsigned int prefs,
2176 struct in6_addr *saddr)
2177 {
2178 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
2179 int err = 0;
2180 if (rt->rt6i_prefsrc.plen)
2181 *saddr = rt->rt6i_prefsrc.addr;
2182 else
2183 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2184 daddr, prefs, saddr);
2185 return err;
2186 }
2187
2188 /* remove deleted ip from prefsrc entries */
2189 struct arg_dev_net_ip {
2190 struct net_device *dev;
2191 struct net *net;
2192 struct in6_addr *addr;
2193 };
2194
fib6_remove_prefsrc(struct rt6_info * rt,void * arg)2195 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2196 {
2197 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2198 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2199 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2200
2201 if (((void *)rt->dst.dev == dev || !dev) &&
2202 rt != net->ipv6.ip6_null_entry &&
2203 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2204 /* remove prefsrc entry */
2205 rt->rt6i_prefsrc.plen = 0;
2206 }
2207 return 0;
2208 }
2209
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)2210 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2211 {
2212 struct net *net = dev_net(ifp->idev->dev);
2213 struct arg_dev_net_ip adni = {
2214 .dev = ifp->idev->dev,
2215 .net = net,
2216 .addr = &ifp->addr,
2217 };
2218 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2219 }
2220
2221 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2222 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2223
2224 /* Remove routers and update dst entries when gateway turn into host. */
fib6_clean_tohost(struct rt6_info * rt,void * arg)2225 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2226 {
2227 struct in6_addr *gateway = (struct in6_addr *)arg;
2228
2229 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2230 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2231 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2232 return -1;
2233 }
2234 return 0;
2235 }
2236
rt6_clean_tohost(struct net * net,struct in6_addr * gateway)2237 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2238 {
2239 fib6_clean_all(net, fib6_clean_tohost, gateway);
2240 }
2241
2242 struct arg_dev_net {
2243 struct net_device *dev;
2244 struct net *net;
2245 };
2246
fib6_ifdown(struct rt6_info * rt,void * arg)2247 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2248 {
2249 const struct arg_dev_net *adn = arg;
2250 const struct net_device *dev = adn->dev;
2251
2252 if ((rt->dst.dev == dev || !dev) &&
2253 rt != adn->net->ipv6.ip6_null_entry)
2254 return -1;
2255
2256 return 0;
2257 }
2258
rt6_ifdown(struct net * net,struct net_device * dev)2259 void rt6_ifdown(struct net *net, struct net_device *dev)
2260 {
2261 struct arg_dev_net adn = {
2262 .dev = dev,
2263 .net = net,
2264 };
2265
2266 fib6_clean_all(net, fib6_ifdown, &adn);
2267 icmp6_clean_all(fib6_ifdown, &adn);
2268 }
2269
2270 struct rt6_mtu_change_arg {
2271 struct net_device *dev;
2272 unsigned int mtu;
2273 };
2274
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2275 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2276 {
2277 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2278 struct inet6_dev *idev;
2279
2280 /* In IPv6 pmtu discovery is not optional,
2281 so that RTAX_MTU lock cannot disable it.
2282 We still use this lock to block changes
2283 caused by addrconf/ndisc.
2284 */
2285
2286 idev = __in6_dev_get(arg->dev);
2287 if (!idev)
2288 return 0;
2289
2290 /* For administrative MTU increase, there is no way to discover
2291 IPv6 PMTU increase, so PMTU increase should be updated here.
2292 Since RFC 1981 doesn't include administrative MTU increase
2293 update PMTU increase is a MUST. (i.e. jumbo frame)
2294 */
2295 /*
2296 If new MTU is less than route PMTU, this new MTU will be the
2297 lowest MTU in the path, update the route PMTU to reflect PMTU
2298 decreases; if new MTU is greater than route PMTU, and the
2299 old MTU is the lowest MTU in the path, update the route PMTU
2300 to reflect the increase. In this case if the other nodes' MTU
2301 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2302 PMTU discouvery.
2303 */
2304 if (rt->dst.dev == arg->dev &&
2305 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2306 (dst_mtu(&rt->dst) >= arg->mtu ||
2307 (dst_mtu(&rt->dst) < arg->mtu &&
2308 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2309 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2310 }
2311 return 0;
2312 }
2313
rt6_mtu_change(struct net_device * dev,unsigned int mtu)2314 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2315 {
2316 struct rt6_mtu_change_arg arg = {
2317 .dev = dev,
2318 .mtu = mtu,
2319 };
2320
2321 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2322 }
2323
2324 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2325 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2326 [RTA_OIF] = { .type = NLA_U32 },
2327 [RTA_IIF] = { .type = NLA_U32 },
2328 [RTA_PRIORITY] = { .type = NLA_U32 },
2329 [RTA_METRICS] = { .type = NLA_NESTED },
2330 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2331 [RTA_UID] = { .type = NLA_U32 },
2332 };
2333
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg)2334 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2335 struct fib6_config *cfg)
2336 {
2337 struct rtmsg *rtm;
2338 struct nlattr *tb[RTA_MAX+1];
2339 int err;
2340
2341 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2342 if (err < 0)
2343 goto errout;
2344
2345 err = -EINVAL;
2346 rtm = nlmsg_data(nlh);
2347 memset(cfg, 0, sizeof(*cfg));
2348
2349 cfg->fc_table = rtm->rtm_table;
2350 cfg->fc_dst_len = rtm->rtm_dst_len;
2351 cfg->fc_src_len = rtm->rtm_src_len;
2352 cfg->fc_flags = RTF_UP;
2353 cfg->fc_protocol = rtm->rtm_protocol;
2354 cfg->fc_type = rtm->rtm_type;
2355
2356 if (rtm->rtm_type == RTN_UNREACHABLE ||
2357 rtm->rtm_type == RTN_BLACKHOLE ||
2358 rtm->rtm_type == RTN_PROHIBIT ||
2359 rtm->rtm_type == RTN_THROW)
2360 cfg->fc_flags |= RTF_REJECT;
2361
2362 if (rtm->rtm_type == RTN_LOCAL)
2363 cfg->fc_flags |= RTF_LOCAL;
2364
2365 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2366 cfg->fc_nlinfo.nlh = nlh;
2367 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2368
2369 if (tb[RTA_GATEWAY]) {
2370 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2371 cfg->fc_flags |= RTF_GATEWAY;
2372 }
2373
2374 if (tb[RTA_DST]) {
2375 int plen = (rtm->rtm_dst_len + 7) >> 3;
2376
2377 if (nla_len(tb[RTA_DST]) < plen)
2378 goto errout;
2379
2380 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2381 }
2382
2383 if (tb[RTA_SRC]) {
2384 int plen = (rtm->rtm_src_len + 7) >> 3;
2385
2386 if (nla_len(tb[RTA_SRC]) < plen)
2387 goto errout;
2388
2389 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2390 }
2391
2392 if (tb[RTA_PREFSRC])
2393 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2394
2395 if (tb[RTA_OIF])
2396 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2397
2398 if (tb[RTA_PRIORITY])
2399 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2400
2401 if (tb[RTA_METRICS]) {
2402 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2403 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2404 }
2405
2406 if (tb[RTA_TABLE])
2407 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2408
2409 if (tb[RTA_MULTIPATH]) {
2410 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2411 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2412 }
2413
2414 err = 0;
2415 errout:
2416 return err;
2417 }
2418
ip6_route_multipath(struct fib6_config * cfg,int add)2419 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2420 {
2421 struct fib6_config r_cfg;
2422 struct rtnexthop *rtnh;
2423 int remaining;
2424 int attrlen;
2425 int err = 0, last_err = 0;
2426
2427 beginning:
2428 rtnh = (struct rtnexthop *)cfg->fc_mp;
2429 remaining = cfg->fc_mp_len;
2430
2431 /* Parse a Multipath Entry */
2432 while (rtnh_ok(rtnh, remaining)) {
2433 memcpy(&r_cfg, cfg, sizeof(*cfg));
2434 if (rtnh->rtnh_ifindex)
2435 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2436
2437 attrlen = rtnh_attrlen(rtnh);
2438 if (attrlen > 0) {
2439 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2440
2441 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2442 if (nla) {
2443 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2444 r_cfg.fc_flags |= RTF_GATEWAY;
2445 }
2446 }
2447 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2448 if (err) {
2449 last_err = err;
2450 /* If we are trying to remove a route, do not stop the
2451 * loop when ip6_route_del() fails (because next hop is
2452 * already gone), we should try to remove all next hops.
2453 */
2454 if (add) {
2455 /* If add fails, we should try to delete all
2456 * next hops that have been already added.
2457 */
2458 add = 0;
2459 goto beginning;
2460 }
2461 }
2462 /* Because each route is added like a single route we remove
2463 * this flag after the first nexthop (if there is a collision,
2464 * we have already fail to add the first nexthop:
2465 * fib6_add_rt2node() has reject it).
2466 */
2467 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2468 rtnh = rtnh_next(rtnh, &remaining);
2469 }
2470
2471 return last_err;
2472 }
2473
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh)2474 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2475 {
2476 struct fib6_config cfg;
2477 int err;
2478
2479 err = rtm_to_fib6_config(skb, nlh, &cfg);
2480 if (err < 0)
2481 return err;
2482
2483 if (cfg.fc_mp)
2484 return ip6_route_multipath(&cfg, 0);
2485 else
2486 return ip6_route_del(&cfg);
2487 }
2488
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh)2489 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2490 {
2491 struct fib6_config cfg;
2492 int err;
2493
2494 err = rtm_to_fib6_config(skb, nlh, &cfg);
2495 if (err < 0)
2496 return err;
2497
2498 if (cfg.fc_mp)
2499 return ip6_route_multipath(&cfg, 1);
2500 else
2501 return ip6_route_add(&cfg);
2502 }
2503
rt6_nlmsg_size(void)2504 static inline size_t rt6_nlmsg_size(void)
2505 {
2506 return NLMSG_ALIGN(sizeof(struct rtmsg))
2507 + nla_total_size(16) /* RTA_SRC */
2508 + nla_total_size(16) /* RTA_DST */
2509 + nla_total_size(16) /* RTA_GATEWAY */
2510 + nla_total_size(16) /* RTA_PREFSRC */
2511 + nla_total_size(4) /* RTA_TABLE */
2512 + nla_total_size(4) /* RTA_IIF */
2513 + nla_total_size(4) /* RTA_OIF */
2514 + nla_total_size(4) /* RTA_PRIORITY */
2515 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2516 + nla_total_size(sizeof(struct rta_cacheinfo));
2517 }
2518
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 portid,u32 seq,int prefix,int nowait,unsigned int flags)2519 static int rt6_fill_node(struct net *net,
2520 struct sk_buff *skb, struct rt6_info *rt,
2521 struct in6_addr *dst, struct in6_addr *src,
2522 int iif, int type, u32 portid, u32 seq,
2523 int prefix, int nowait, unsigned int flags)
2524 {
2525 struct rtmsg *rtm;
2526 struct nlmsghdr *nlh;
2527 long expires;
2528 u32 table;
2529
2530 if (prefix) { /* user wants prefix routes only */
2531 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2532 /* success since this is not a prefix route */
2533 return 1;
2534 }
2535 }
2536
2537 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2538 if (!nlh)
2539 return -EMSGSIZE;
2540
2541 rtm = nlmsg_data(nlh);
2542 rtm->rtm_family = AF_INET6;
2543 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2544 rtm->rtm_src_len = rt->rt6i_src.plen;
2545 rtm->rtm_tos = 0;
2546 if (rt->rt6i_table)
2547 table = rt->rt6i_table->tb6_id;
2548 else
2549 table = RT6_TABLE_UNSPEC;
2550 rtm->rtm_table = table;
2551 if (nla_put_u32(skb, RTA_TABLE, table))
2552 goto nla_put_failure;
2553 if (rt->rt6i_flags & RTF_REJECT) {
2554 switch (rt->dst.error) {
2555 case -EINVAL:
2556 rtm->rtm_type = RTN_BLACKHOLE;
2557 break;
2558 case -EACCES:
2559 rtm->rtm_type = RTN_PROHIBIT;
2560 break;
2561 case -EAGAIN:
2562 rtm->rtm_type = RTN_THROW;
2563 break;
2564 default:
2565 rtm->rtm_type = RTN_UNREACHABLE;
2566 break;
2567 }
2568 }
2569 else if (rt->rt6i_flags & RTF_LOCAL)
2570 rtm->rtm_type = RTN_LOCAL;
2571 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2572 rtm->rtm_type = RTN_LOCAL;
2573 else
2574 rtm->rtm_type = RTN_UNICAST;
2575 rtm->rtm_flags = 0;
2576 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2577 rtm->rtm_protocol = rt->rt6i_protocol;
2578 if (rt->rt6i_flags & RTF_DYNAMIC)
2579 rtm->rtm_protocol = RTPROT_REDIRECT;
2580 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2581 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2582 rtm->rtm_protocol = RTPROT_RA;
2583 else
2584 rtm->rtm_protocol = RTPROT_KERNEL;
2585 }
2586
2587 if (rt->rt6i_flags & RTF_CACHE)
2588 rtm->rtm_flags |= RTM_F_CLONED;
2589
2590 if (dst) {
2591 if (nla_put(skb, RTA_DST, 16, dst))
2592 goto nla_put_failure;
2593 rtm->rtm_dst_len = 128;
2594 } else if (rtm->rtm_dst_len)
2595 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2596 goto nla_put_failure;
2597 #ifdef CONFIG_IPV6_SUBTREES
2598 if (src) {
2599 if (nla_put(skb, RTA_SRC, 16, src))
2600 goto nla_put_failure;
2601 rtm->rtm_src_len = 128;
2602 } else if (rtm->rtm_src_len &&
2603 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2604 goto nla_put_failure;
2605 #endif
2606 if (iif) {
2607 #ifdef CONFIG_IPV6_MROUTE
2608 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2609 int err = ip6mr_get_route(net, skb, rtm, nowait,
2610 portid);
2611
2612 if (err <= 0) {
2613 if (!nowait) {
2614 if (err == 0)
2615 return 0;
2616 goto nla_put_failure;
2617 } else {
2618 if (err == -EMSGSIZE)
2619 goto nla_put_failure;
2620 }
2621 }
2622 } else
2623 #endif
2624 if (nla_put_u32(skb, RTA_IIF, iif))
2625 goto nla_put_failure;
2626 } else if (dst) {
2627 struct in6_addr saddr_buf;
2628 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2629 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2630 goto nla_put_failure;
2631 }
2632
2633 if (rt->rt6i_prefsrc.plen) {
2634 struct in6_addr saddr_buf;
2635 saddr_buf = rt->rt6i_prefsrc.addr;
2636 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2637 goto nla_put_failure;
2638 }
2639
2640 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2641 goto nla_put_failure;
2642
2643 if (rt->rt6i_flags & RTF_GATEWAY) {
2644 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2645 goto nla_put_failure;
2646 }
2647
2648 if (rt->dst.dev &&
2649 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2650 goto nla_put_failure;
2651 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2652 goto nla_put_failure;
2653
2654 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2655
2656 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2657 goto nla_put_failure;
2658
2659 return nlmsg_end(skb, nlh);
2660
2661 nla_put_failure:
2662 nlmsg_cancel(skb, nlh);
2663 return -EMSGSIZE;
2664 }
2665
rt6_dump_route(struct rt6_info * rt,void * p_arg)2666 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2667 {
2668 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2669 int prefix;
2670
2671 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2672 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2673 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2674 } else
2675 prefix = 0;
2676
2677 return rt6_fill_node(arg->net,
2678 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2679 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2680 prefix, 0, NLM_F_MULTI);
2681 }
2682
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh)2683 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2684 {
2685 struct net *net = sock_net(in_skb->sk);
2686 struct nlattr *tb[RTA_MAX+1];
2687 struct rt6_info *rt;
2688 struct sk_buff *skb;
2689 struct rtmsg *rtm;
2690 struct flowi6 fl6;
2691 int err, iif = 0, oif = 0;
2692
2693 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2694 if (err < 0)
2695 goto errout;
2696
2697 err = -EINVAL;
2698 memset(&fl6, 0, sizeof(fl6));
2699
2700 if (tb[RTA_SRC]) {
2701 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2702 goto errout;
2703
2704 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2705 }
2706
2707 if (tb[RTA_DST]) {
2708 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2709 goto errout;
2710
2711 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2712 }
2713
2714 if (tb[RTA_IIF])
2715 iif = nla_get_u32(tb[RTA_IIF]);
2716
2717 if (tb[RTA_OIF])
2718 oif = nla_get_u32(tb[RTA_OIF]);
2719
2720 if (tb[RTA_MARK])
2721 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2722
2723 if (tb[RTA_UID])
2724 fl6.flowi6_uid = make_kuid(current_user_ns(),
2725 nla_get_u32(tb[RTA_UID]));
2726 else
2727 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
2728
2729 if (iif) {
2730 struct net_device *dev;
2731 int flags = 0;
2732
2733 dev = __dev_get_by_index(net, iif);
2734 if (!dev) {
2735 err = -ENODEV;
2736 goto errout;
2737 }
2738
2739 fl6.flowi6_iif = iif;
2740
2741 if (!ipv6_addr_any(&fl6.saddr))
2742 flags |= RT6_LOOKUP_F_HAS_SADDR;
2743
2744 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2745 flags);
2746 } else {
2747 fl6.flowi6_oif = oif;
2748
2749 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2750 }
2751
2752 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2753 if (!skb) {
2754 ip6_rt_put(rt);
2755 err = -ENOBUFS;
2756 goto errout;
2757 }
2758
2759 /* Reserve room for dummy headers, this skb can pass
2760 through good chunk of routing engine.
2761 */
2762 skb_reset_mac_header(skb);
2763 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2764
2765 skb_dst_set(skb, &rt->dst);
2766
2767 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2768 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2769 nlh->nlmsg_seq, 0, 0, 0);
2770 if (err < 0) {
2771 kfree_skb(skb);
2772 goto errout;
2773 }
2774
2775 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2776 errout:
2777 return err;
2778 }
2779
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info)2780 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2781 {
2782 struct sk_buff *skb;
2783 struct net *net = info->nl_net;
2784 u32 seq;
2785 int err;
2786
2787 err = -ENOBUFS;
2788 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2789
2790 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2791 if (!skb)
2792 goto errout;
2793
2794 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2795 event, info->portid, seq, 0, 0, 0);
2796 if (err < 0) {
2797 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2798 WARN_ON(err == -EMSGSIZE);
2799 kfree_skb(skb);
2800 goto errout;
2801 }
2802 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2803 info->nlh, gfp_any());
2804 return;
2805 errout:
2806 if (err < 0)
2807 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2808 }
2809
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * ptr)2810 static int ip6_route_dev_notify(struct notifier_block *this,
2811 unsigned long event, void *ptr)
2812 {
2813 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2814 struct net *net = dev_net(dev);
2815
2816 if (!(dev->flags & IFF_LOOPBACK))
2817 return NOTIFY_OK;
2818
2819 if (event == NETDEV_REGISTER) {
2820 net->ipv6.ip6_null_entry->dst.dev = dev;
2821 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2822 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2823 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2824 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2825 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2826 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2827 #endif
2828 } else if (event == NETDEV_UNREGISTER &&
2829 dev->reg_state != NETREG_UNREGISTERED) {
2830 /* NETDEV_UNREGISTER could be fired for multiple times by
2831 * netdev_wait_allrefs(). Make sure we only call this once.
2832 */
2833 in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
2834 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2835 in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
2836 in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
2837 #endif
2838 }
2839
2840 return NOTIFY_OK;
2841 }
2842
2843 /*
2844 * /proc
2845 */
2846
2847 #ifdef CONFIG_PROC_FS
2848
2849 static const struct file_operations ipv6_route_proc_fops = {
2850 .owner = THIS_MODULE,
2851 .open = ipv6_route_open,
2852 .read = seq_read,
2853 .llseek = seq_lseek,
2854 .release = seq_release_net,
2855 };
2856
rt6_stats_seq_show(struct seq_file * seq,void * v)2857 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2858 {
2859 struct net *net = (struct net *)seq->private;
2860 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2861 net->ipv6.rt6_stats->fib_nodes,
2862 net->ipv6.rt6_stats->fib_route_nodes,
2863 net->ipv6.rt6_stats->fib_rt_alloc,
2864 net->ipv6.rt6_stats->fib_rt_entries,
2865 net->ipv6.rt6_stats->fib_rt_cache,
2866 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2867 net->ipv6.rt6_stats->fib_discarded_routes);
2868
2869 return 0;
2870 }
2871
rt6_stats_seq_open(struct inode * inode,struct file * file)2872 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2873 {
2874 return single_open_net(inode, file, rt6_stats_seq_show);
2875 }
2876
2877 static const struct file_operations rt6_stats_seq_fops = {
2878 .owner = THIS_MODULE,
2879 .open = rt6_stats_seq_open,
2880 .read = seq_read,
2881 .llseek = seq_lseek,
2882 .release = single_release_net,
2883 };
2884 #endif /* CONFIG_PROC_FS */
2885
2886 #ifdef CONFIG_SYSCTL
2887
2888 static
ipv6_sysctl_rtcache_flush(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2889 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2890 void __user *buffer, size_t *lenp, loff_t *ppos)
2891 {
2892 struct net *net;
2893 int delay;
2894 if (!write)
2895 return -EINVAL;
2896
2897 net = (struct net *)ctl->extra1;
2898 delay = net->ipv6.sysctl.flush_delay;
2899 proc_dointvec(ctl, write, buffer, lenp, ppos);
2900 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2901 return 0;
2902 }
2903
2904 struct ctl_table ipv6_route_table_template[] = {
2905 {
2906 .procname = "flush",
2907 .data = &init_net.ipv6.sysctl.flush_delay,
2908 .maxlen = sizeof(int),
2909 .mode = 0200,
2910 .proc_handler = ipv6_sysctl_rtcache_flush
2911 },
2912 {
2913 .procname = "gc_thresh",
2914 .data = &ip6_dst_ops_template.gc_thresh,
2915 .maxlen = sizeof(int),
2916 .mode = 0644,
2917 .proc_handler = proc_dointvec,
2918 },
2919 {
2920 .procname = "max_size",
2921 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2922 .maxlen = sizeof(int),
2923 .mode = 0644,
2924 .proc_handler = proc_dointvec,
2925 },
2926 {
2927 .procname = "gc_min_interval",
2928 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2929 .maxlen = sizeof(int),
2930 .mode = 0644,
2931 .proc_handler = proc_dointvec_jiffies,
2932 },
2933 {
2934 .procname = "gc_timeout",
2935 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2936 .maxlen = sizeof(int),
2937 .mode = 0644,
2938 .proc_handler = proc_dointvec_jiffies,
2939 },
2940 {
2941 .procname = "gc_interval",
2942 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2943 .maxlen = sizeof(int),
2944 .mode = 0644,
2945 .proc_handler = proc_dointvec_jiffies,
2946 },
2947 {
2948 .procname = "gc_elasticity",
2949 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2950 .maxlen = sizeof(int),
2951 .mode = 0644,
2952 .proc_handler = proc_dointvec,
2953 },
2954 {
2955 .procname = "mtu_expires",
2956 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2957 .maxlen = sizeof(int),
2958 .mode = 0644,
2959 .proc_handler = proc_dointvec_jiffies,
2960 },
2961 {
2962 .procname = "min_adv_mss",
2963 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2964 .maxlen = sizeof(int),
2965 .mode = 0644,
2966 .proc_handler = proc_dointvec,
2967 },
2968 {
2969 .procname = "gc_min_interval_ms",
2970 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2971 .maxlen = sizeof(int),
2972 .mode = 0644,
2973 .proc_handler = proc_dointvec_ms_jiffies,
2974 },
2975 { }
2976 };
2977
ipv6_route_sysctl_init(struct net * net)2978 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2979 {
2980 struct ctl_table *table;
2981
2982 table = kmemdup(ipv6_route_table_template,
2983 sizeof(ipv6_route_table_template),
2984 GFP_KERNEL);
2985
2986 if (table) {
2987 table[0].data = &net->ipv6.sysctl.flush_delay;
2988 table[0].extra1 = net;
2989 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2990 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2991 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2992 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2993 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2994 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2995 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2996 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2997 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2998
2999 /* Don't export sysctls to unprivileged users */
3000 if (net->user_ns != &init_user_ns)
3001 table[0].procname = NULL;
3002 }
3003
3004 return table;
3005 }
3006 #endif
3007
ip6_route_net_init(struct net * net)3008 static int __net_init ip6_route_net_init(struct net *net)
3009 {
3010 int ret = -ENOMEM;
3011
3012 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3013 sizeof(net->ipv6.ip6_dst_ops));
3014
3015 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3016 goto out_ip6_dst_ops;
3017
3018 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3019 sizeof(*net->ipv6.ip6_null_entry),
3020 GFP_KERNEL);
3021 if (!net->ipv6.ip6_null_entry)
3022 goto out_ip6_dst_entries;
3023 net->ipv6.ip6_null_entry->dst.path =
3024 (struct dst_entry *)net->ipv6.ip6_null_entry;
3025 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3026 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3027 ip6_template_metrics, true);
3028
3029 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3030 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3031 sizeof(*net->ipv6.ip6_prohibit_entry),
3032 GFP_KERNEL);
3033 if (!net->ipv6.ip6_prohibit_entry)
3034 goto out_ip6_null_entry;
3035 net->ipv6.ip6_prohibit_entry->dst.path =
3036 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3037 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3038 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3039 ip6_template_metrics, true);
3040
3041 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3042 sizeof(*net->ipv6.ip6_blk_hole_entry),
3043 GFP_KERNEL);
3044 if (!net->ipv6.ip6_blk_hole_entry)
3045 goto out_ip6_prohibit_entry;
3046 net->ipv6.ip6_blk_hole_entry->dst.path =
3047 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3048 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3049 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3050 ip6_template_metrics, true);
3051 #endif
3052
3053 net->ipv6.sysctl.flush_delay = 0;
3054 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3055 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3056 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3057 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3058 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3059 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3060 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3061
3062 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3063
3064 ret = 0;
3065 out:
3066 return ret;
3067
3068 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3069 out_ip6_prohibit_entry:
3070 kfree(net->ipv6.ip6_prohibit_entry);
3071 out_ip6_null_entry:
3072 kfree(net->ipv6.ip6_null_entry);
3073 #endif
3074 out_ip6_dst_entries:
3075 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3076 out_ip6_dst_ops:
3077 goto out;
3078 }
3079
ip6_route_net_exit(struct net * net)3080 static void __net_exit ip6_route_net_exit(struct net *net)
3081 {
3082 kfree(net->ipv6.ip6_null_entry);
3083 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3084 kfree(net->ipv6.ip6_prohibit_entry);
3085 kfree(net->ipv6.ip6_blk_hole_entry);
3086 #endif
3087 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3088 }
3089
ip6_route_net_init_late(struct net * net)3090 static int __net_init ip6_route_net_init_late(struct net *net)
3091 {
3092 #ifdef CONFIG_PROC_FS
3093 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3094 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3095 #endif
3096 return 0;
3097 }
3098
ip6_route_net_exit_late(struct net * net)3099 static void __net_exit ip6_route_net_exit_late(struct net *net)
3100 {
3101 #ifdef CONFIG_PROC_FS
3102 remove_proc_entry("ipv6_route", net->proc_net);
3103 remove_proc_entry("rt6_stats", net->proc_net);
3104 #endif
3105 }
3106
3107 static struct pernet_operations ip6_route_net_ops = {
3108 .init = ip6_route_net_init,
3109 .exit = ip6_route_net_exit,
3110 };
3111
ipv6_inetpeer_init(struct net * net)3112 static int __net_init ipv6_inetpeer_init(struct net *net)
3113 {
3114 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3115
3116 if (!bp)
3117 return -ENOMEM;
3118 inet_peer_base_init(bp);
3119 net->ipv6.peers = bp;
3120 return 0;
3121 }
3122
ipv6_inetpeer_exit(struct net * net)3123 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3124 {
3125 struct inet_peer_base *bp = net->ipv6.peers;
3126
3127 net->ipv6.peers = NULL;
3128 inetpeer_invalidate_tree(bp);
3129 kfree(bp);
3130 }
3131
3132 static struct pernet_operations ipv6_inetpeer_ops = {
3133 .init = ipv6_inetpeer_init,
3134 .exit = ipv6_inetpeer_exit,
3135 };
3136
3137 static struct pernet_operations ip6_route_net_late_ops = {
3138 .init = ip6_route_net_init_late,
3139 .exit = ip6_route_net_exit_late,
3140 };
3141
3142 static struct notifier_block ip6_route_dev_notifier = {
3143 .notifier_call = ip6_route_dev_notify,
3144 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
3145 };
3146
ip6_route_init_special_entries(void)3147 void __init ip6_route_init_special_entries(void)
3148 {
3149 /* Registering of the loopback is done before this portion of code,
3150 * the loopback reference in rt6_info will not be taken, do it
3151 * manually for init_net */
3152 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3153 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3154 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3155 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3156 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3157 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3158 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3159 #endif
3160 }
3161
ip6_route_init(void)3162 int __init ip6_route_init(void)
3163 {
3164 int ret;
3165
3166 ret = -ENOMEM;
3167 ip6_dst_ops_template.kmem_cachep =
3168 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3169 SLAB_HWCACHE_ALIGN, NULL);
3170 if (!ip6_dst_ops_template.kmem_cachep)
3171 goto out;
3172
3173 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3174 if (ret)
3175 goto out_kmem_cache;
3176
3177 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3178 if (ret)
3179 goto out_dst_entries;
3180
3181 ret = register_pernet_subsys(&ip6_route_net_ops);
3182 if (ret)
3183 goto out_register_inetpeer;
3184
3185 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3186
3187 ret = fib6_init();
3188 if (ret)
3189 goto out_register_subsys;
3190
3191 ret = xfrm6_init();
3192 if (ret)
3193 goto out_fib6_init;
3194
3195 ret = fib6_rules_init();
3196 if (ret)
3197 goto xfrm6_init;
3198
3199 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3200 if (ret)
3201 goto fib6_rules_init;
3202
3203 ret = -ENOBUFS;
3204 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3205 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3206 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3207 goto out_register_late_subsys;
3208
3209 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3210 if (ret)
3211 goto out_register_late_subsys;
3212
3213 out:
3214 return ret;
3215
3216 out_register_late_subsys:
3217 unregister_pernet_subsys(&ip6_route_net_late_ops);
3218 fib6_rules_init:
3219 fib6_rules_cleanup();
3220 xfrm6_init:
3221 xfrm6_fini();
3222 out_fib6_init:
3223 fib6_gc_cleanup();
3224 out_register_subsys:
3225 unregister_pernet_subsys(&ip6_route_net_ops);
3226 out_register_inetpeer:
3227 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3228 out_dst_entries:
3229 dst_entries_destroy(&ip6_dst_blackhole_ops);
3230 out_kmem_cache:
3231 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3232 goto out;
3233 }
3234
ip6_route_cleanup(void)3235 void ip6_route_cleanup(void)
3236 {
3237 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3238 unregister_pernet_subsys(&ip6_route_net_late_ops);
3239 fib6_rules_cleanup();
3240 xfrm6_fini();
3241 fib6_gc_cleanup();
3242 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3243 unregister_pernet_subsys(&ip6_route_net_ops);
3244 dst_entries_destroy(&ip6_dst_blackhole_ops);
3245 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3246 }
3247