1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66
67 #include <asm/uaccess.h>
68
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72
73 enum rt6_nud_state {
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
77 RT6_NUD_SUCCEED = 1
78 };
79
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void ip6_dst_destroy(struct dst_entry *);
86 static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
88 static int ip6_dst_gc(struct dst_ops *ops);
89
90 static int ip6_pkt_discard(struct sk_buff *skb);
91 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int ip6_pkt_prohibit(struct sk_buff *skb);
93 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void ip6_link_failure(struct sk_buff *skb);
95 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
99 static void rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info *rt6_add_route_info(struct net *net,
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr,
106 struct net_device *dev,
107 unsigned int pref);
108 static struct rt6_info *rt6_get_route_info(struct net *net,
109 const struct in6_addr *prefix, int prefixlen,
110 const struct in6_addr *gwaddr,
111 struct net_device *dev);
112 #endif
113
114 struct uncached_list {
115 spinlock_t lock;
116 struct list_head head;
117 };
118
119 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
120
rt6_uncached_list_add(struct rt6_info * rt)121 static void rt6_uncached_list_add(struct rt6_info *rt)
122 {
123 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
124
125 rt->dst.flags |= DST_NOCACHE;
126 rt->rt6i_uncached_list = ul;
127
128 spin_lock_bh(&ul->lock);
129 list_add_tail(&rt->rt6i_uncached, &ul->head);
130 spin_unlock_bh(&ul->lock);
131 }
132
rt6_uncached_list_del(struct rt6_info * rt)133 static void rt6_uncached_list_del(struct rt6_info *rt)
134 {
135 if (!list_empty(&rt->rt6i_uncached)) {
136 struct uncached_list *ul = rt->rt6i_uncached_list;
137
138 spin_lock_bh(&ul->lock);
139 list_del(&rt->rt6i_uncached);
140 spin_unlock_bh(&ul->lock);
141 }
142 }
143
rt6_uncached_list_flush_dev(struct net * net,struct net_device * dev)144 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
145 {
146 struct net_device *loopback_dev = net->loopback_dev;
147 int cpu;
148
149 if (dev == loopback_dev)
150 return;
151
152 for_each_possible_cpu(cpu) {
153 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
154 struct rt6_info *rt;
155
156 spin_lock_bh(&ul->lock);
157 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
158 struct inet6_dev *rt_idev = rt->rt6i_idev;
159 struct net_device *rt_dev = rt->dst.dev;
160
161 if (rt_idev->dev == dev) {
162 rt->rt6i_idev = in6_dev_get(loopback_dev);
163 in6_dev_put(rt_idev);
164 }
165
166 if (rt_dev == dev) {
167 rt->dst.dev = loopback_dev;
168 dev_hold(rt->dst.dev);
169 dev_put(rt_dev);
170 }
171 }
172 spin_unlock_bh(&ul->lock);
173 }
174 }
175
rt6_pcpu_cow_metrics(struct rt6_info * rt)176 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
177 {
178 return dst_metrics_write_ptr(rt->dst.from);
179 }
180
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)181 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
182 {
183 struct rt6_info *rt = (struct rt6_info *)dst;
184
185 if (rt->rt6i_flags & RTF_PCPU)
186 return rt6_pcpu_cow_metrics(rt);
187 else if (rt->rt6i_flags & RTF_CACHE)
188 return NULL;
189 else
190 return dst_cow_metrics_generic(dst, old);
191 }
192
choose_neigh_daddr(struct rt6_info * rt,struct sk_buff * skb,const void * daddr)193 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
194 struct sk_buff *skb,
195 const void *daddr)
196 {
197 struct in6_addr *p = &rt->rt6i_gateway;
198
199 if (!ipv6_addr_any(p))
200 return (const void *) p;
201 else if (skb)
202 return &ipv6_hdr(skb)->daddr;
203 return daddr;
204 }
205
ip6_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)206 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
207 struct sk_buff *skb,
208 const void *daddr)
209 {
210 struct rt6_info *rt = (struct rt6_info *) dst;
211 struct neighbour *n;
212
213 daddr = choose_neigh_daddr(rt, skb, daddr);
214 n = __ipv6_neigh_lookup(dst->dev, daddr);
215 if (n)
216 return n;
217 return neigh_create(&nd_tbl, daddr, dst->dev);
218 }
219
220 static struct dst_ops ip6_dst_ops_template = {
221 .family = AF_INET6,
222 .gc = ip6_dst_gc,
223 .gc_thresh = 1024,
224 .check = ip6_dst_check,
225 .default_advmss = ip6_default_advmss,
226 .mtu = ip6_mtu,
227 .cow_metrics = ipv6_cow_metrics,
228 .destroy = ip6_dst_destroy,
229 .ifdown = ip6_dst_ifdown,
230 .negative_advice = ip6_negative_advice,
231 .link_failure = ip6_link_failure,
232 .update_pmtu = ip6_rt_update_pmtu,
233 .redirect = rt6_do_redirect,
234 .local_out = __ip6_local_out,
235 .neigh_lookup = ip6_neigh_lookup,
236 };
237
ip6_blackhole_mtu(const struct dst_entry * dst)238 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
239 {
240 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
241
242 return mtu ? : dst->dev->mtu;
243 }
244
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)245 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
246 struct sk_buff *skb, u32 mtu)
247 {
248 }
249
ip6_rt_blackhole_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)250 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
251 struct sk_buff *skb)
252 {
253 }
254
255 static struct dst_ops ip6_dst_blackhole_ops = {
256 .family = AF_INET6,
257 .destroy = ip6_dst_destroy,
258 .check = ip6_dst_check,
259 .mtu = ip6_blackhole_mtu,
260 .default_advmss = ip6_default_advmss,
261 .update_pmtu = ip6_rt_blackhole_update_pmtu,
262 .redirect = ip6_rt_blackhole_redirect,
263 .cow_metrics = dst_cow_metrics_generic,
264 .neigh_lookup = ip6_neigh_lookup,
265 };
266
267 static const u32 ip6_template_metrics[RTAX_MAX] = {
268 [RTAX_HOPLIMIT - 1] = 0,
269 };
270
271 static const struct rt6_info ip6_null_entry_template = {
272 .dst = {
273 .__refcnt = ATOMIC_INIT(1),
274 .__use = 1,
275 .obsolete = DST_OBSOLETE_FORCE_CHK,
276 .error = -ENETUNREACH,
277 .input = ip6_pkt_discard,
278 .output = ip6_pkt_discard_out,
279 },
280 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
281 .rt6i_protocol = RTPROT_KERNEL,
282 .rt6i_metric = ~(u32) 0,
283 .rt6i_ref = ATOMIC_INIT(1),
284 };
285
286 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
287
288 static const struct rt6_info ip6_prohibit_entry_template = {
289 .dst = {
290 .__refcnt = ATOMIC_INIT(1),
291 .__use = 1,
292 .obsolete = DST_OBSOLETE_FORCE_CHK,
293 .error = -EACCES,
294 .input = ip6_pkt_prohibit,
295 .output = ip6_pkt_prohibit_out,
296 },
297 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
298 .rt6i_protocol = RTPROT_KERNEL,
299 .rt6i_metric = ~(u32) 0,
300 .rt6i_ref = ATOMIC_INIT(1),
301 };
302
303 static const struct rt6_info ip6_blk_hole_entry_template = {
304 .dst = {
305 .__refcnt = ATOMIC_INIT(1),
306 .__use = 1,
307 .obsolete = DST_OBSOLETE_FORCE_CHK,
308 .error = -EINVAL,
309 .input = dst_discard,
310 .output = dst_discard_out,
311 },
312 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
313 .rt6i_protocol = RTPROT_KERNEL,
314 .rt6i_metric = ~(u32) 0,
315 .rt6i_ref = ATOMIC_INIT(1),
316 };
317
318 #endif
319
rt6_info_init(struct rt6_info * rt)320 static void rt6_info_init(struct rt6_info *rt)
321 {
322 struct dst_entry *dst = &rt->dst;
323
324 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
325 INIT_LIST_HEAD(&rt->rt6i_siblings);
326 INIT_LIST_HEAD(&rt->rt6i_uncached);
327 }
328
329 /* allocate dst with ip6_dst_ops */
__ip6_dst_alloc(struct net * net,struct net_device * dev,int flags)330 static struct rt6_info *__ip6_dst_alloc(struct net *net,
331 struct net_device *dev,
332 int flags)
333 {
334 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
335 0, DST_OBSOLETE_FORCE_CHK, flags);
336
337 if (rt)
338 rt6_info_init(rt);
339
340 return rt;
341 }
342
ip6_dst_alloc(struct net * net,struct net_device * dev,int flags)343 struct rt6_info *ip6_dst_alloc(struct net *net,
344 struct net_device *dev,
345 int flags)
346 {
347 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
348
349 if (rt) {
350 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
351 if (rt->rt6i_pcpu) {
352 int cpu;
353
354 for_each_possible_cpu(cpu) {
355 struct rt6_info **p;
356
357 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
358 /* no one shares rt */
359 *p = NULL;
360 }
361 } else {
362 dst_destroy((struct dst_entry *)rt);
363 return NULL;
364 }
365 }
366
367 return rt;
368 }
369 EXPORT_SYMBOL(ip6_dst_alloc);
370
ip6_dst_destroy(struct dst_entry * dst)371 static void ip6_dst_destroy(struct dst_entry *dst)
372 {
373 struct rt6_info *rt = (struct rt6_info *)dst;
374 struct dst_entry *from = dst->from;
375 struct inet6_dev *idev;
376
377 dst_destroy_metrics_generic(dst);
378 free_percpu(rt->rt6i_pcpu);
379 rt6_uncached_list_del(rt);
380
381 idev = rt->rt6i_idev;
382 if (idev) {
383 rt->rt6i_idev = NULL;
384 in6_dev_put(idev);
385 }
386
387 dst->from = NULL;
388 dst_release(from);
389 }
390
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)391 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
392 int how)
393 {
394 struct rt6_info *rt = (struct rt6_info *)dst;
395 struct inet6_dev *idev = rt->rt6i_idev;
396 struct net_device *loopback_dev =
397 dev_net(dev)->loopback_dev;
398
399 if (dev != loopback_dev) {
400 if (idev && idev->dev == dev) {
401 struct inet6_dev *loopback_idev =
402 in6_dev_get(loopback_dev);
403 if (loopback_idev) {
404 rt->rt6i_idev = loopback_idev;
405 in6_dev_put(idev);
406 }
407 }
408 }
409 }
410
__rt6_check_expired(const struct rt6_info * rt)411 static bool __rt6_check_expired(const struct rt6_info *rt)
412 {
413 if (rt->rt6i_flags & RTF_EXPIRES)
414 return time_after(jiffies, rt->dst.expires);
415 else
416 return false;
417 }
418
rt6_check_expired(const struct rt6_info * rt)419 static bool rt6_check_expired(const struct rt6_info *rt)
420 {
421 if (rt->rt6i_flags & RTF_EXPIRES) {
422 if (time_after(jiffies, rt->dst.expires))
423 return true;
424 } else if (rt->dst.from) {
425 return rt6_check_expired((struct rt6_info *) rt->dst.from);
426 }
427 return false;
428 }
429
430 /* Multipath route selection:
431 * Hash based function using packet header and flowlabel.
432 * Adapted from fib_info_hashfn()
433 */
rt6_info_hash_nhsfn(unsigned int candidate_count,const struct flowi6 * fl6)434 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
435 const struct flowi6 *fl6)
436 {
437 return get_hash_from_flowi6(fl6) % candidate_count;
438 }
439
rt6_multipath_select(struct rt6_info * match,struct flowi6 * fl6,int oif,int strict)440 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
441 struct flowi6 *fl6, int oif,
442 int strict)
443 {
444 struct rt6_info *sibling, *next_sibling;
445 int route_choosen;
446
447 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
448 /* Don't change the route, if route_choosen == 0
449 * (siblings does not include ourself)
450 */
451 if (route_choosen)
452 list_for_each_entry_safe(sibling, next_sibling,
453 &match->rt6i_siblings, rt6i_siblings) {
454 route_choosen--;
455 if (route_choosen == 0) {
456 if (rt6_score_route(sibling, oif, strict) < 0)
457 break;
458 match = sibling;
459 break;
460 }
461 }
462 return match;
463 }
464
465 /*
466 * Route lookup. Any table->tb6_lock is implied.
467 */
468
rt6_device_match(struct net * net,struct rt6_info * rt,const struct in6_addr * saddr,int oif,int flags)469 static inline struct rt6_info *rt6_device_match(struct net *net,
470 struct rt6_info *rt,
471 const struct in6_addr *saddr,
472 int oif,
473 int flags)
474 {
475 struct rt6_info *local = NULL;
476 struct rt6_info *sprt;
477
478 if (!oif && ipv6_addr_any(saddr))
479 goto out;
480
481 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
482 struct net_device *dev = sprt->dst.dev;
483
484 if (oif) {
485 if (dev->ifindex == oif)
486 return sprt;
487 if (dev->flags & IFF_LOOPBACK) {
488 if (!sprt->rt6i_idev ||
489 sprt->rt6i_idev->dev->ifindex != oif) {
490 if (flags & RT6_LOOKUP_F_IFACE)
491 continue;
492 if (local &&
493 local->rt6i_idev->dev->ifindex == oif)
494 continue;
495 }
496 local = sprt;
497 }
498 } else {
499 if (ipv6_chk_addr(net, saddr, dev,
500 flags & RT6_LOOKUP_F_IFACE))
501 return sprt;
502 }
503 }
504
505 if (oif) {
506 if (local)
507 return local;
508
509 if (flags & RT6_LOOKUP_F_IFACE)
510 return net->ipv6.ip6_null_entry;
511 }
512 out:
513 return rt;
514 }
515
516 #ifdef CONFIG_IPV6_ROUTER_PREF
517 struct __rt6_probe_work {
518 struct work_struct work;
519 struct in6_addr target;
520 struct net_device *dev;
521 };
522
rt6_probe_deferred(struct work_struct * w)523 static void rt6_probe_deferred(struct work_struct *w)
524 {
525 struct in6_addr mcaddr;
526 struct __rt6_probe_work *work =
527 container_of(w, struct __rt6_probe_work, work);
528
529 addrconf_addr_solict_mult(&work->target, &mcaddr);
530 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
531 dev_put(work->dev);
532 kfree(work);
533 }
534
rt6_probe(struct rt6_info * rt)535 static void rt6_probe(struct rt6_info *rt)
536 {
537 struct __rt6_probe_work *work;
538 struct neighbour *neigh;
539 /*
540 * Okay, this does not seem to be appropriate
541 * for now, however, we need to check if it
542 * is really so; aka Router Reachability Probing.
543 *
544 * Router Reachability Probe MUST be rate-limited
545 * to no more than one per minute.
546 */
547 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
548 return;
549 rcu_read_lock_bh();
550 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
551 if (neigh) {
552 if (neigh->nud_state & NUD_VALID)
553 goto out;
554
555 work = NULL;
556 write_lock(&neigh->lock);
557 if (!(neigh->nud_state & NUD_VALID) &&
558 time_after(jiffies,
559 neigh->updated +
560 rt->rt6i_idev->cnf.rtr_probe_interval)) {
561 work = kmalloc(sizeof(*work), GFP_ATOMIC);
562 if (work)
563 __neigh_set_probe_once(neigh);
564 }
565 write_unlock(&neigh->lock);
566 } else {
567 work = kmalloc(sizeof(*work), GFP_ATOMIC);
568 }
569
570 if (work) {
571 INIT_WORK(&work->work, rt6_probe_deferred);
572 work->target = rt->rt6i_gateway;
573 dev_hold(rt->dst.dev);
574 work->dev = rt->dst.dev;
575 schedule_work(&work->work);
576 }
577
578 out:
579 rcu_read_unlock_bh();
580 }
581 #else
rt6_probe(struct rt6_info * rt)582 static inline void rt6_probe(struct rt6_info *rt)
583 {
584 }
585 #endif
586
587 /*
588 * Default Router Selection (RFC 2461 6.3.6)
589 */
rt6_check_dev(struct rt6_info * rt,int oif)590 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
591 {
592 struct net_device *dev = rt->dst.dev;
593 if (!oif || dev->ifindex == oif)
594 return 2;
595 if ((dev->flags & IFF_LOOPBACK) &&
596 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
597 return 1;
598 return 0;
599 }
600
rt6_check_neigh(struct rt6_info * rt)601 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
602 {
603 struct neighbour *neigh;
604 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
605
606 if (rt->rt6i_flags & RTF_NONEXTHOP ||
607 !(rt->rt6i_flags & RTF_GATEWAY))
608 return RT6_NUD_SUCCEED;
609
610 rcu_read_lock_bh();
611 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
612 if (neigh) {
613 read_lock(&neigh->lock);
614 if (neigh->nud_state & NUD_VALID)
615 ret = RT6_NUD_SUCCEED;
616 #ifdef CONFIG_IPV6_ROUTER_PREF
617 else if (!(neigh->nud_state & NUD_FAILED))
618 ret = RT6_NUD_SUCCEED;
619 else
620 ret = RT6_NUD_FAIL_PROBE;
621 #endif
622 read_unlock(&neigh->lock);
623 } else {
624 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
625 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
626 }
627 rcu_read_unlock_bh();
628
629 return ret;
630 }
631
rt6_score_route(struct rt6_info * rt,int oif,int strict)632 static int rt6_score_route(struct rt6_info *rt, int oif,
633 int strict)
634 {
635 int m;
636
637 m = rt6_check_dev(rt, oif);
638 if (!m && (strict & RT6_LOOKUP_F_IFACE))
639 return RT6_NUD_FAIL_HARD;
640 #ifdef CONFIG_IPV6_ROUTER_PREF
641 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
642 #endif
643 if (strict & RT6_LOOKUP_F_REACHABLE) {
644 int n = rt6_check_neigh(rt);
645 if (n < 0)
646 return n;
647 }
648 return m;
649 }
650
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match,bool * do_rr)651 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
652 int *mpri, struct rt6_info *match,
653 bool *do_rr)
654 {
655 int m;
656 bool match_do_rr = false;
657 struct inet6_dev *idev = rt->rt6i_idev;
658 struct net_device *dev = rt->dst.dev;
659
660 if (dev && !netif_carrier_ok(dev) &&
661 idev->cnf.ignore_routes_with_linkdown &&
662 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
663 goto out;
664
665 if (rt6_check_expired(rt))
666 goto out;
667
668 m = rt6_score_route(rt, oif, strict);
669 if (m == RT6_NUD_FAIL_DO_RR) {
670 match_do_rr = true;
671 m = 0; /* lowest valid score */
672 } else if (m == RT6_NUD_FAIL_HARD) {
673 goto out;
674 }
675
676 if (strict & RT6_LOOKUP_F_REACHABLE)
677 rt6_probe(rt);
678
679 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
680 if (m > *mpri) {
681 *do_rr = match_do_rr;
682 *mpri = m;
683 match = rt;
684 }
685 out:
686 return match;
687 }
688
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict,bool * do_rr)689 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
690 struct rt6_info *rr_head,
691 u32 metric, int oif, int strict,
692 bool *do_rr)
693 {
694 struct rt6_info *rt, *match, *cont;
695 int mpri = -1;
696
697 match = NULL;
698 cont = NULL;
699 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
700 if (rt->rt6i_metric != metric) {
701 cont = rt;
702 break;
703 }
704
705 match = find_match(rt, oif, strict, &mpri, match, do_rr);
706 }
707
708 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
709 if (rt->rt6i_metric != metric) {
710 cont = rt;
711 break;
712 }
713
714 match = find_match(rt, oif, strict, &mpri, match, do_rr);
715 }
716
717 if (match || !cont)
718 return match;
719
720 for (rt = cont; rt; rt = rt->dst.rt6_next)
721 match = find_match(rt, oif, strict, &mpri, match, do_rr);
722
723 return match;
724 }
725
rt6_select(struct fib6_node * fn,int oif,int strict)726 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
727 {
728 struct rt6_info *match, *rt0;
729 struct net *net;
730 bool do_rr = false;
731
732 rt0 = fn->rr_ptr;
733 if (!rt0)
734 fn->rr_ptr = rt0 = fn->leaf;
735
736 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
737 &do_rr);
738
739 if (do_rr) {
740 struct rt6_info *next = rt0->dst.rt6_next;
741
742 /* no entries matched; do round-robin */
743 if (!next || next->rt6i_metric != rt0->rt6i_metric)
744 next = fn->leaf;
745
746 if (next != rt0)
747 fn->rr_ptr = next;
748 }
749
750 net = dev_net(rt0->dst.dev);
751 return match ? match : net->ipv6.ip6_null_entry;
752 }
753
rt6_is_gw_or_nonexthop(const struct rt6_info * rt)754 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
755 {
756 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
757 }
758
759 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)760 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
761 const struct in6_addr *gwaddr)
762 {
763 struct net *net = dev_net(dev);
764 struct route_info *rinfo = (struct route_info *) opt;
765 struct in6_addr prefix_buf, *prefix;
766 unsigned int pref;
767 unsigned long lifetime;
768 struct rt6_info *rt;
769
770 if (len < sizeof(struct route_info)) {
771 return -EINVAL;
772 }
773
774 /* Sanity check for prefix_len and length */
775 if (rinfo->length > 3) {
776 return -EINVAL;
777 } else if (rinfo->prefix_len > 128) {
778 return -EINVAL;
779 } else if (rinfo->prefix_len > 64) {
780 if (rinfo->length < 2) {
781 return -EINVAL;
782 }
783 } else if (rinfo->prefix_len > 0) {
784 if (rinfo->length < 1) {
785 return -EINVAL;
786 }
787 }
788
789 pref = rinfo->route_pref;
790 if (pref == ICMPV6_ROUTER_PREF_INVALID)
791 return -EINVAL;
792
793 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
794
795 if (rinfo->length == 3)
796 prefix = (struct in6_addr *)rinfo->prefix;
797 else {
798 /* this function is safe */
799 ipv6_addr_prefix(&prefix_buf,
800 (struct in6_addr *)rinfo->prefix,
801 rinfo->prefix_len);
802 prefix = &prefix_buf;
803 }
804
805 if (rinfo->prefix_len == 0)
806 rt = rt6_get_dflt_router(gwaddr, dev);
807 else
808 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
809 gwaddr, dev);
810
811 if (rt && !lifetime) {
812 ip6_del_rt(rt);
813 rt = NULL;
814 }
815
816 if (!rt && lifetime)
817 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
818 dev, pref);
819 else if (rt)
820 rt->rt6i_flags = RTF_ROUTEINFO |
821 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
822
823 if (rt) {
824 if (!addrconf_finite_timeout(lifetime))
825 rt6_clean_expires(rt);
826 else
827 rt6_set_expires(rt, jiffies + HZ * lifetime);
828
829 ip6_rt_put(rt);
830 }
831 return 0;
832 }
833 #endif
834
fib6_backtrack(struct fib6_node * fn,struct in6_addr * saddr)835 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
836 struct in6_addr *saddr)
837 {
838 struct fib6_node *pn;
839 while (1) {
840 if (fn->fn_flags & RTN_TL_ROOT)
841 return NULL;
842 pn = fn->parent;
843 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
844 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
845 else
846 fn = pn;
847 if (fn->fn_flags & RTN_RTINFO)
848 return fn;
849 }
850 }
851
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)852 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
853 struct fib6_table *table,
854 struct flowi6 *fl6, int flags)
855 {
856 struct fib6_node *fn;
857 struct rt6_info *rt;
858
859 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
860 flags &= ~RT6_LOOKUP_F_IFACE;
861
862 read_lock_bh(&table->tb6_lock);
863 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
864 restart:
865 rt = fn->leaf;
866 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
867 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
868 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
869 if (rt == net->ipv6.ip6_null_entry) {
870 fn = fib6_backtrack(fn, &fl6->saddr);
871 if (fn)
872 goto restart;
873 }
874 dst_use(&rt->dst, jiffies);
875 read_unlock_bh(&table->tb6_lock);
876
877 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
878
879 return rt;
880
881 }
882
ip6_route_lookup(struct net * net,struct flowi6 * fl6,int flags)883 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
884 int flags)
885 {
886 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
887 }
888 EXPORT_SYMBOL_GPL(ip6_route_lookup);
889
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)890 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
891 const struct in6_addr *saddr, int oif, int strict)
892 {
893 struct flowi6 fl6 = {
894 .flowi6_oif = oif,
895 .daddr = *daddr,
896 };
897 struct dst_entry *dst;
898 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
899
900 if (saddr) {
901 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
902 flags |= RT6_LOOKUP_F_HAS_SADDR;
903 }
904
905 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
906 if (dst->error == 0)
907 return (struct rt6_info *) dst;
908
909 dst_release(dst);
910
911 return NULL;
912 }
913 EXPORT_SYMBOL(rt6_lookup);
914
915 /* ip6_ins_rt is called with FREE table->tb6_lock.
916 It takes new route entry, the addition fails by any reason the
917 route is freed. In any case, if caller does not hold it, it may
918 be destroyed.
919 */
920
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info,struct mx6_config * mxc)921 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
922 struct mx6_config *mxc)
923 {
924 int err;
925 struct fib6_table *table;
926
927 table = rt->rt6i_table;
928 write_lock_bh(&table->tb6_lock);
929 err = fib6_add(&table->tb6_root, rt, info, mxc);
930 write_unlock_bh(&table->tb6_lock);
931
932 return err;
933 }
934
ip6_ins_rt(struct rt6_info * rt)935 int ip6_ins_rt(struct rt6_info *rt)
936 {
937 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
938 struct mx6_config mxc = { .mx = NULL, };
939
940 return __ip6_ins_rt(rt, &info, &mxc);
941 }
942
ip6_rt_cache_alloc(struct rt6_info * ort,const struct in6_addr * daddr,const struct in6_addr * saddr)943 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
944 const struct in6_addr *daddr,
945 const struct in6_addr *saddr)
946 {
947 struct rt6_info *rt;
948
949 /*
950 * Clone the route.
951 */
952
953 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
954 ort = (struct rt6_info *)ort->dst.from;
955
956 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
957
958 if (!rt)
959 return NULL;
960
961 ip6_rt_copy_init(rt, ort);
962 rt->rt6i_flags |= RTF_CACHE;
963 rt->rt6i_metric = 0;
964 rt->dst.flags |= DST_HOST;
965 rt->rt6i_dst.addr = *daddr;
966 rt->rt6i_dst.plen = 128;
967
968 if (!rt6_is_gw_or_nonexthop(ort)) {
969 if (ort->rt6i_dst.plen != 128 &&
970 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
971 rt->rt6i_flags |= RTF_ANYCAST;
972 #ifdef CONFIG_IPV6_SUBTREES
973 if (rt->rt6i_src.plen && saddr) {
974 rt->rt6i_src.addr = *saddr;
975 rt->rt6i_src.plen = 128;
976 }
977 #endif
978 }
979
980 return rt;
981 }
982
ip6_rt_pcpu_alloc(struct rt6_info * rt)983 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
984 {
985 struct rt6_info *pcpu_rt;
986
987 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
988 rt->dst.dev, rt->dst.flags);
989
990 if (!pcpu_rt)
991 return NULL;
992 ip6_rt_copy_init(pcpu_rt, rt);
993 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
994 pcpu_rt->rt6i_flags |= RTF_PCPU;
995 return pcpu_rt;
996 }
997
998 /* It should be called with read_lock_bh(&tb6_lock) acquired */
rt6_get_pcpu_route(struct rt6_info * rt)999 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1000 {
1001 struct rt6_info *pcpu_rt, **p;
1002
1003 p = this_cpu_ptr(rt->rt6i_pcpu);
1004 pcpu_rt = *p;
1005
1006 if (pcpu_rt) {
1007 dst_hold(&pcpu_rt->dst);
1008 rt6_dst_from_metrics_check(pcpu_rt);
1009 }
1010 return pcpu_rt;
1011 }
1012
rt6_make_pcpu_route(struct rt6_info * rt)1013 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1014 {
1015 struct fib6_table *table = rt->rt6i_table;
1016 struct rt6_info *pcpu_rt, *prev, **p;
1017
1018 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1019 if (!pcpu_rt) {
1020 struct net *net = dev_net(rt->dst.dev);
1021
1022 dst_hold(&net->ipv6.ip6_null_entry->dst);
1023 return net->ipv6.ip6_null_entry;
1024 }
1025
1026 read_lock_bh(&table->tb6_lock);
1027 if (rt->rt6i_pcpu) {
1028 p = this_cpu_ptr(rt->rt6i_pcpu);
1029 prev = cmpxchg(p, NULL, pcpu_rt);
1030 if (prev) {
1031 /* If someone did it before us, return prev instead */
1032 dst_destroy(&pcpu_rt->dst);
1033 pcpu_rt = prev;
1034 }
1035 } else {
1036 /* rt has been removed from the fib6 tree
1037 * before we have a chance to acquire the read_lock.
1038 * In this case, don't brother to create a pcpu rt
1039 * since rt is going away anyway. The next
1040 * dst_check() will trigger a re-lookup.
1041 */
1042 dst_destroy(&pcpu_rt->dst);
1043 pcpu_rt = rt;
1044 }
1045 dst_hold(&pcpu_rt->dst);
1046 rt6_dst_from_metrics_check(pcpu_rt);
1047 read_unlock_bh(&table->tb6_lock);
1048 return pcpu_rt;
1049 }
1050
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags)1051 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1052 int oif, struct flowi6 *fl6, int flags)
1053 {
1054 struct fib6_node *fn, *saved_fn;
1055 struct rt6_info *rt;
1056 int strict = 0;
1057
1058 strict |= flags & RT6_LOOKUP_F_IFACE;
1059 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1060 if (net->ipv6.devconf_all->forwarding == 0)
1061 strict |= RT6_LOOKUP_F_REACHABLE;
1062
1063 read_lock_bh(&table->tb6_lock);
1064
1065 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1066 saved_fn = fn;
1067
1068 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1069 oif = 0;
1070
1071 redo_rt6_select:
1072 rt = rt6_select(fn, oif, strict);
1073 if (rt->rt6i_nsiblings)
1074 rt = rt6_multipath_select(rt, fl6, oif, strict);
1075 if (rt == net->ipv6.ip6_null_entry) {
1076 fn = fib6_backtrack(fn, &fl6->saddr);
1077 if (fn)
1078 goto redo_rt6_select;
1079 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1080 /* also consider unreachable route */
1081 strict &= ~RT6_LOOKUP_F_REACHABLE;
1082 fn = saved_fn;
1083 goto redo_rt6_select;
1084 }
1085 }
1086
1087
1088 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1089 dst_use(&rt->dst, jiffies);
1090 read_unlock_bh(&table->tb6_lock);
1091
1092 rt6_dst_from_metrics_check(rt);
1093
1094 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1095 return rt;
1096 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1097 !(rt->rt6i_flags & RTF_GATEWAY))) {
1098 /* Create a RTF_CACHE clone which will not be
1099 * owned by the fib6 tree. It is for the special case where
1100 * the daddr in the skb during the neighbor look-up is different
1101 * from the fl6->daddr used to look-up route here.
1102 */
1103
1104 struct rt6_info *uncached_rt;
1105
1106 dst_use(&rt->dst, jiffies);
1107 read_unlock_bh(&table->tb6_lock);
1108
1109 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1110 dst_release(&rt->dst);
1111
1112 if (uncached_rt)
1113 rt6_uncached_list_add(uncached_rt);
1114 else
1115 uncached_rt = net->ipv6.ip6_null_entry;
1116
1117 dst_hold(&uncached_rt->dst);
1118
1119 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1120 return uncached_rt;
1121
1122 } else {
1123 /* Get a percpu copy */
1124
1125 struct rt6_info *pcpu_rt;
1126
1127 rt->dst.lastuse = jiffies;
1128 rt->dst.__use++;
1129 pcpu_rt = rt6_get_pcpu_route(rt);
1130
1131 if (pcpu_rt) {
1132 read_unlock_bh(&table->tb6_lock);
1133 } else {
1134 /* We have to do the read_unlock first
1135 * because rt6_make_pcpu_route() may trigger
1136 * ip6_dst_gc() which will take the write_lock.
1137 */
1138 dst_hold(&rt->dst);
1139 read_unlock_bh(&table->tb6_lock);
1140 pcpu_rt = rt6_make_pcpu_route(rt);
1141 dst_release(&rt->dst);
1142 }
1143
1144 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1145 return pcpu_rt;
1146
1147 }
1148 }
1149 EXPORT_SYMBOL_GPL(ip6_pol_route);
1150
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1151 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1152 struct flowi6 *fl6, int flags)
1153 {
1154 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1155 }
1156
ip6_route_input_lookup(struct net * net,struct net_device * dev,struct flowi6 * fl6,int flags)1157 struct dst_entry *ip6_route_input_lookup(struct net *net,
1158 struct net_device *dev,
1159 struct flowi6 *fl6, int flags)
1160 {
1161 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1162 flags |= RT6_LOOKUP_F_IFACE;
1163
1164 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1165 }
1166 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1167
ip6_route_input(struct sk_buff * skb)1168 void ip6_route_input(struct sk_buff *skb)
1169 {
1170 const struct ipv6hdr *iph = ipv6_hdr(skb);
1171 struct net *net = dev_net(skb->dev);
1172 int flags = RT6_LOOKUP_F_HAS_SADDR;
1173 struct ip_tunnel_info *tun_info;
1174 struct flowi6 fl6 = {
1175 .flowi6_iif = skb->dev->ifindex,
1176 .daddr = iph->daddr,
1177 .saddr = iph->saddr,
1178 .flowlabel = ip6_flowinfo(iph),
1179 .flowi6_mark = skb->mark,
1180 .flowi6_proto = iph->nexthdr,
1181 };
1182
1183 tun_info = skb_tunnel_info(skb);
1184 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1185 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1186 skb_dst_drop(skb);
1187 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1188 }
1189
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1190 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1191 struct flowi6 *fl6, int flags)
1192 {
1193 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1194 }
1195
ip6_route_output_flags(struct net * net,const struct sock * sk,struct flowi6 * fl6,int flags)1196 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1197 struct flowi6 *fl6, int flags)
1198 {
1199 bool any_src;
1200
1201 if (rt6_need_strict(&fl6->daddr)) {
1202 struct dst_entry *dst;
1203
1204 dst = l3mdev_link_scope_lookup(net, fl6);
1205 if (dst)
1206 return dst;
1207 }
1208
1209 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1210
1211 any_src = ipv6_addr_any(&fl6->saddr);
1212 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1213 (fl6->flowi6_oif && any_src))
1214 flags |= RT6_LOOKUP_F_IFACE;
1215
1216 if (!any_src)
1217 flags |= RT6_LOOKUP_F_HAS_SADDR;
1218 else if (sk)
1219 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1220
1221 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1222 }
1223 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1224
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)1225 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1226 {
1227 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1228 struct dst_entry *new = NULL;
1229
1230 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1231 if (rt) {
1232 rt6_info_init(rt);
1233
1234 new = &rt->dst;
1235 new->__use = 1;
1236 new->input = dst_discard;
1237 new->output = dst_discard_out;
1238
1239 dst_copy_metrics(new, &ort->dst);
1240 rt->rt6i_idev = ort->rt6i_idev;
1241 if (rt->rt6i_idev)
1242 in6_dev_hold(rt->rt6i_idev);
1243
1244 rt->rt6i_gateway = ort->rt6i_gateway;
1245 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1246 rt->rt6i_metric = 0;
1247
1248 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1249 #ifdef CONFIG_IPV6_SUBTREES
1250 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1251 #endif
1252
1253 dst_free(new);
1254 }
1255
1256 dst_release(dst_orig);
1257 return new ? new : ERR_PTR(-ENOMEM);
1258 }
1259
1260 /*
1261 * Destination cache support functions
1262 */
1263
rt6_dst_from_metrics_check(struct rt6_info * rt)1264 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1265 {
1266 if (rt->dst.from &&
1267 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1268 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1269 }
1270
rt6_check(struct rt6_info * rt,u32 cookie)1271 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1272 {
1273 u32 rt_cookie = 0;
1274
1275 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
1276 return NULL;
1277
1278 if (rt6_check_expired(rt))
1279 return NULL;
1280
1281 return &rt->dst;
1282 }
1283
rt6_dst_from_check(struct rt6_info * rt,u32 cookie)1284 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1285 {
1286 if (!__rt6_check_expired(rt) &&
1287 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1288 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1289 return &rt->dst;
1290 else
1291 return NULL;
1292 }
1293
ip6_dst_check(struct dst_entry * dst,u32 cookie)1294 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1295 {
1296 struct rt6_info *rt;
1297
1298 rt = (struct rt6_info *) dst;
1299
1300 /* All IPV6 dsts are created with ->obsolete set to the value
1301 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1302 * into this function always.
1303 */
1304
1305 rt6_dst_from_metrics_check(rt);
1306
1307 if (rt->rt6i_flags & RTF_PCPU ||
1308 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1309 return rt6_dst_from_check(rt, cookie);
1310 else
1311 return rt6_check(rt, cookie);
1312 }
1313
ip6_negative_advice(struct dst_entry * dst)1314 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1315 {
1316 struct rt6_info *rt = (struct rt6_info *) dst;
1317
1318 if (rt) {
1319 if (rt->rt6i_flags & RTF_CACHE) {
1320 if (rt6_check_expired(rt)) {
1321 ip6_del_rt(rt);
1322 dst = NULL;
1323 }
1324 } else {
1325 dst_release(dst);
1326 dst = NULL;
1327 }
1328 }
1329 return dst;
1330 }
1331
ip6_link_failure(struct sk_buff * skb)1332 static void ip6_link_failure(struct sk_buff *skb)
1333 {
1334 struct rt6_info *rt;
1335
1336 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1337
1338 rt = (struct rt6_info *) skb_dst(skb);
1339 if (rt) {
1340 if (rt->rt6i_flags & RTF_CACHE) {
1341 dst_hold(&rt->dst);
1342 ip6_del_rt(rt);
1343 } else {
1344 struct fib6_node *fn;
1345
1346 rcu_read_lock();
1347 fn = rcu_dereference(rt->rt6i_node);
1348 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1349 fn->fn_sernum = -1;
1350 rcu_read_unlock();
1351 }
1352 }
1353 }
1354
rt6_do_update_pmtu(struct rt6_info * rt,u32 mtu)1355 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1356 {
1357 struct net *net = dev_net(rt->dst.dev);
1358
1359 rt->rt6i_flags |= RTF_MODIFIED;
1360 rt->rt6i_pmtu = mtu;
1361 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1362 }
1363
rt6_cache_allowed_for_pmtu(const struct rt6_info * rt)1364 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1365 {
1366 return !(rt->rt6i_flags & RTF_CACHE) &&
1367 (rt->rt6i_flags & RTF_PCPU ||
1368 rcu_access_pointer(rt->rt6i_node));
1369 }
1370
__ip6_rt_update_pmtu(struct dst_entry * dst,const struct sock * sk,const struct ipv6hdr * iph,u32 mtu)1371 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1372 const struct ipv6hdr *iph, u32 mtu)
1373 {
1374 struct rt6_info *rt6 = (struct rt6_info *)dst;
1375
1376 if (rt6->rt6i_flags & RTF_LOCAL)
1377 return;
1378
1379 if (dst_metric_locked(dst, RTAX_MTU))
1380 return;
1381
1382 dst_confirm(dst);
1383 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1384 if (mtu >= dst_mtu(dst))
1385 return;
1386
1387 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1388 rt6_do_update_pmtu(rt6, mtu);
1389 } else {
1390 const struct in6_addr *daddr, *saddr;
1391 struct rt6_info *nrt6;
1392
1393 if (iph) {
1394 daddr = &iph->daddr;
1395 saddr = &iph->saddr;
1396 } else if (sk) {
1397 daddr = &sk->sk_v6_daddr;
1398 saddr = &inet6_sk(sk)->saddr;
1399 } else {
1400 return;
1401 }
1402 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1403 if (nrt6) {
1404 rt6_do_update_pmtu(nrt6, mtu);
1405
1406 /* ip6_ins_rt(nrt6) will bump the
1407 * rt6->rt6i_node->fn_sernum
1408 * which will fail the next rt6_check() and
1409 * invalidate the sk->sk_dst_cache.
1410 */
1411 ip6_ins_rt(nrt6);
1412 }
1413 }
1414 }
1415
ip6_rt_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu)1416 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1417 struct sk_buff *skb, u32 mtu)
1418 {
1419 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1420 }
1421
ip6_update_pmtu(struct sk_buff * skb,struct net * net,__be32 mtu,int oif,u32 mark,kuid_t uid)1422 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1423 int oif, u32 mark, kuid_t uid)
1424 {
1425 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1426 struct dst_entry *dst;
1427 struct flowi6 fl6;
1428
1429 memset(&fl6, 0, sizeof(fl6));
1430 fl6.flowi6_oif = oif;
1431 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1432 fl6.daddr = iph->daddr;
1433 fl6.saddr = iph->saddr;
1434 fl6.flowlabel = ip6_flowinfo(iph);
1435 fl6.flowi6_uid = uid;
1436
1437 dst = ip6_route_output(net, NULL, &fl6);
1438 if (!dst->error)
1439 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1440 dst_release(dst);
1441 }
1442 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1443
ip6_sk_update_pmtu(struct sk_buff * skb,struct sock * sk,__be32 mtu)1444 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1445 {
1446 struct dst_entry *dst;
1447
1448 ip6_update_pmtu(skb, sock_net(sk), mtu,
1449 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1450
1451 dst = __sk_dst_get(sk);
1452 if (!dst || !dst->obsolete ||
1453 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1454 return;
1455
1456 bh_lock_sock(sk);
1457 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1458 ip6_datagram_dst_update(sk, false);
1459 bh_unlock_sock(sk);
1460 }
1461 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1462
1463 /* Handle redirects */
1464 struct ip6rd_flowi {
1465 struct flowi6 fl6;
1466 struct in6_addr gateway;
1467 };
1468
__ip6_route_redirect(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1469 static struct rt6_info *__ip6_route_redirect(struct net *net,
1470 struct fib6_table *table,
1471 struct flowi6 *fl6,
1472 int flags)
1473 {
1474 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1475 struct rt6_info *rt;
1476 struct fib6_node *fn;
1477
1478 /* Get the "current" route for this destination and
1479 * check if the redirect has come from approriate router.
1480 *
1481 * RFC 4861 specifies that redirects should only be
1482 * accepted if they come from the nexthop to the target.
1483 * Due to the way the routes are chosen, this notion
1484 * is a bit fuzzy and one might need to check all possible
1485 * routes.
1486 */
1487
1488 read_lock_bh(&table->tb6_lock);
1489 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1490 restart:
1491 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1492 if (rt6_check_expired(rt))
1493 continue;
1494 if (rt->dst.error)
1495 break;
1496 if (!(rt->rt6i_flags & RTF_GATEWAY))
1497 continue;
1498 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1499 continue;
1500 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1501 continue;
1502 break;
1503 }
1504
1505 if (!rt)
1506 rt = net->ipv6.ip6_null_entry;
1507 else if (rt->dst.error) {
1508 rt = net->ipv6.ip6_null_entry;
1509 goto out;
1510 }
1511
1512 if (rt == net->ipv6.ip6_null_entry) {
1513 fn = fib6_backtrack(fn, &fl6->saddr);
1514 if (fn)
1515 goto restart;
1516 }
1517
1518 out:
1519 dst_hold(&rt->dst);
1520
1521 read_unlock_bh(&table->tb6_lock);
1522
1523 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1524 return rt;
1525 };
1526
ip6_route_redirect(struct net * net,const struct flowi6 * fl6,const struct in6_addr * gateway)1527 static struct dst_entry *ip6_route_redirect(struct net *net,
1528 const struct flowi6 *fl6,
1529 const struct in6_addr *gateway)
1530 {
1531 int flags = RT6_LOOKUP_F_HAS_SADDR;
1532 struct ip6rd_flowi rdfl;
1533
1534 rdfl.fl6 = *fl6;
1535 rdfl.gateway = *gateway;
1536
1537 return fib6_rule_lookup(net, &rdfl.fl6,
1538 flags, __ip6_route_redirect);
1539 }
1540
ip6_redirect(struct sk_buff * skb,struct net * net,int oif,u32 mark,kuid_t uid)1541 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1542 kuid_t uid)
1543 {
1544 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1545 struct dst_entry *dst;
1546 struct flowi6 fl6;
1547
1548 memset(&fl6, 0, sizeof(fl6));
1549 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1550 fl6.flowi6_oif = oif;
1551 fl6.flowi6_mark = mark;
1552 fl6.daddr = iph->daddr;
1553 fl6.saddr = iph->saddr;
1554 fl6.flowlabel = ip6_flowinfo(iph);
1555 fl6.flowi6_uid = uid;
1556
1557 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1558 rt6_do_redirect(dst, NULL, skb);
1559 dst_release(dst);
1560 }
1561 EXPORT_SYMBOL_GPL(ip6_redirect);
1562
ip6_redirect_no_header(struct sk_buff * skb,struct net * net,int oif,u32 mark)1563 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1564 u32 mark)
1565 {
1566 const struct ipv6hdr *iph = ipv6_hdr(skb);
1567 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1568 struct dst_entry *dst;
1569 struct flowi6 fl6;
1570
1571 memset(&fl6, 0, sizeof(fl6));
1572 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1573 fl6.flowi6_oif = oif;
1574 fl6.flowi6_mark = mark;
1575 fl6.daddr = msg->dest;
1576 fl6.saddr = iph->daddr;
1577 fl6.flowi6_uid = sock_net_uid(net, NULL);
1578
1579 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1580 rt6_do_redirect(dst, NULL, skb);
1581 dst_release(dst);
1582 }
1583
ip6_sk_redirect(struct sk_buff * skb,struct sock * sk)1584 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1585 {
1586 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1587 sk->sk_uid);
1588 }
1589 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1590
ip6_default_advmss(const struct dst_entry * dst)1591 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1592 {
1593 struct net_device *dev = dst->dev;
1594 unsigned int mtu = dst_mtu(dst);
1595 struct net *net = dev_net(dev);
1596
1597 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1598
1599 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1600 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1601
1602 /*
1603 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1604 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1605 * IPV6_MAXPLEN is also valid and means: "any MSS,
1606 * rely only on pmtu discovery"
1607 */
1608 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1609 mtu = IPV6_MAXPLEN;
1610 return mtu;
1611 }
1612
ip6_mtu(const struct dst_entry * dst)1613 static unsigned int ip6_mtu(const struct dst_entry *dst)
1614 {
1615 const struct rt6_info *rt = (const struct rt6_info *)dst;
1616 unsigned int mtu = rt->rt6i_pmtu;
1617 struct inet6_dev *idev;
1618
1619 if (mtu)
1620 goto out;
1621
1622 mtu = dst_metric_raw(dst, RTAX_MTU);
1623 if (mtu)
1624 goto out;
1625
1626 mtu = IPV6_MIN_MTU;
1627
1628 rcu_read_lock();
1629 idev = __in6_dev_get(dst->dev);
1630 if (idev)
1631 mtu = idev->cnf.mtu6;
1632 rcu_read_unlock();
1633
1634 out:
1635 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1636
1637 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1638 }
1639
1640 static struct dst_entry *icmp6_dst_gc_list;
1641 static DEFINE_SPINLOCK(icmp6_dst_lock);
1642
icmp6_dst_alloc(struct net_device * dev,struct flowi6 * fl6)1643 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1644 struct flowi6 *fl6)
1645 {
1646 struct dst_entry *dst;
1647 struct rt6_info *rt;
1648 struct inet6_dev *idev = in6_dev_get(dev);
1649 struct net *net = dev_net(dev);
1650
1651 if (unlikely(!idev))
1652 return ERR_PTR(-ENODEV);
1653
1654 rt = ip6_dst_alloc(net, dev, 0);
1655 if (unlikely(!rt)) {
1656 in6_dev_put(idev);
1657 dst = ERR_PTR(-ENOMEM);
1658 goto out;
1659 }
1660
1661 rt->dst.flags |= DST_HOST;
1662 rt->dst.input = ip6_input;
1663 rt->dst.output = ip6_output;
1664 atomic_set(&rt->dst.__refcnt, 1);
1665 rt->rt6i_gateway = fl6->daddr;
1666 rt->rt6i_dst.addr = fl6->daddr;
1667 rt->rt6i_dst.plen = 128;
1668 rt->rt6i_idev = idev;
1669 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1670
1671 spin_lock_bh(&icmp6_dst_lock);
1672 rt->dst.next = icmp6_dst_gc_list;
1673 icmp6_dst_gc_list = &rt->dst;
1674 spin_unlock_bh(&icmp6_dst_lock);
1675
1676 fib6_force_start_gc(net);
1677
1678 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1679
1680 out:
1681 return dst;
1682 }
1683
icmp6_dst_gc(void)1684 int icmp6_dst_gc(void)
1685 {
1686 struct dst_entry *dst, **pprev;
1687 int more = 0;
1688
1689 spin_lock_bh(&icmp6_dst_lock);
1690 pprev = &icmp6_dst_gc_list;
1691
1692 while ((dst = *pprev) != NULL) {
1693 if (!atomic_read(&dst->__refcnt)) {
1694 *pprev = dst->next;
1695 dst_free(dst);
1696 } else {
1697 pprev = &dst->next;
1698 ++more;
1699 }
1700 }
1701
1702 spin_unlock_bh(&icmp6_dst_lock);
1703
1704 return more;
1705 }
1706
icmp6_clean_all(int (* func)(struct rt6_info * rt,void * arg),void * arg)1707 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1708 void *arg)
1709 {
1710 struct dst_entry *dst, **pprev;
1711
1712 spin_lock_bh(&icmp6_dst_lock);
1713 pprev = &icmp6_dst_gc_list;
1714 while ((dst = *pprev) != NULL) {
1715 struct rt6_info *rt = (struct rt6_info *) dst;
1716 if (func(rt, arg)) {
1717 *pprev = dst->next;
1718 dst_free(dst);
1719 } else {
1720 pprev = &dst->next;
1721 }
1722 }
1723 spin_unlock_bh(&icmp6_dst_lock);
1724 }
1725
ip6_dst_gc(struct dst_ops * ops)1726 static int ip6_dst_gc(struct dst_ops *ops)
1727 {
1728 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1729 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1730 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1731 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1732 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1733 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1734 int entries;
1735
1736 entries = dst_entries_get_fast(ops);
1737 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1738 entries <= rt_max_size)
1739 goto out;
1740
1741 net->ipv6.ip6_rt_gc_expire++;
1742 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1743 entries = dst_entries_get_slow(ops);
1744 if (entries < ops->gc_thresh)
1745 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1746 out:
1747 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1748 return entries > rt_max_size;
1749 }
1750
ip6_convert_metrics(struct mx6_config * mxc,const struct fib6_config * cfg)1751 static int ip6_convert_metrics(struct mx6_config *mxc,
1752 const struct fib6_config *cfg)
1753 {
1754 bool ecn_ca = false;
1755 struct nlattr *nla;
1756 int remaining;
1757 u32 *mp;
1758
1759 if (!cfg->fc_mx)
1760 return 0;
1761
1762 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1763 if (unlikely(!mp))
1764 return -ENOMEM;
1765
1766 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1767 int type = nla_type(nla);
1768 u32 val;
1769
1770 if (!type)
1771 continue;
1772 if (unlikely(type > RTAX_MAX))
1773 goto err;
1774
1775 if (type == RTAX_CC_ALGO) {
1776 char tmp[TCP_CA_NAME_MAX];
1777
1778 nla_strlcpy(tmp, nla, sizeof(tmp));
1779 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1780 if (val == TCP_CA_UNSPEC)
1781 goto err;
1782 } else {
1783 val = nla_get_u32(nla);
1784 }
1785 if (type == RTAX_HOPLIMIT && val > 255)
1786 val = 255;
1787 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1788 goto err;
1789
1790 mp[type - 1] = val;
1791 __set_bit(type - 1, mxc->mx_valid);
1792 }
1793
1794 if (ecn_ca) {
1795 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1796 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1797 }
1798
1799 mxc->mx = mp;
1800 return 0;
1801 err:
1802 kfree(mp);
1803 return -EINVAL;
1804 }
1805
ip6_nh_lookup_table(struct net * net,struct fib6_config * cfg,const struct in6_addr * gw_addr)1806 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1807 struct fib6_config *cfg,
1808 const struct in6_addr *gw_addr)
1809 {
1810 struct flowi6 fl6 = {
1811 .flowi6_oif = cfg->fc_ifindex,
1812 .daddr = *gw_addr,
1813 .saddr = cfg->fc_prefsrc,
1814 };
1815 struct fib6_table *table;
1816 struct rt6_info *rt;
1817 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1818
1819 table = fib6_get_table(net, cfg->fc_table);
1820 if (!table)
1821 return NULL;
1822
1823 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1824 flags |= RT6_LOOKUP_F_HAS_SADDR;
1825
1826 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1827
1828 /* if table lookup failed, fall back to full lookup */
1829 if (rt == net->ipv6.ip6_null_entry) {
1830 ip6_rt_put(rt);
1831 rt = NULL;
1832 }
1833
1834 return rt;
1835 }
1836
ip6_route_info_create(struct fib6_config * cfg)1837 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1838 {
1839 struct net *net = cfg->fc_nlinfo.nl_net;
1840 struct rt6_info *rt = NULL;
1841 struct net_device *dev = NULL;
1842 struct inet6_dev *idev = NULL;
1843 struct fib6_table *table;
1844 int addr_type;
1845 int err = -EINVAL;
1846
1847 /* RTF_PCPU is an internal flag; can not be set by userspace */
1848 if (cfg->fc_flags & RTF_PCPU)
1849 goto out;
1850
1851 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1852 goto out;
1853 #ifndef CONFIG_IPV6_SUBTREES
1854 if (cfg->fc_src_len)
1855 goto out;
1856 #endif
1857 if (cfg->fc_ifindex) {
1858 err = -ENODEV;
1859 dev = dev_get_by_index(net, cfg->fc_ifindex);
1860 if (!dev)
1861 goto out;
1862 idev = in6_dev_get(dev);
1863 if (!idev)
1864 goto out;
1865 }
1866
1867 if (cfg->fc_metric == 0)
1868 cfg->fc_metric = IP6_RT_PRIO_USER;
1869
1870 err = -ENOBUFS;
1871 if (cfg->fc_nlinfo.nlh &&
1872 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1873 table = fib6_get_table(net, cfg->fc_table);
1874 if (!table) {
1875 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1876 table = fib6_new_table(net, cfg->fc_table);
1877 }
1878 } else {
1879 table = fib6_new_table(net, cfg->fc_table);
1880 }
1881
1882 if (!table)
1883 goto out;
1884
1885 rt = ip6_dst_alloc(net, NULL,
1886 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1887
1888 if (!rt) {
1889 err = -ENOMEM;
1890 goto out;
1891 }
1892
1893 if (cfg->fc_flags & RTF_EXPIRES)
1894 rt6_set_expires(rt, jiffies +
1895 clock_t_to_jiffies(cfg->fc_expires));
1896 else
1897 rt6_clean_expires(rt);
1898
1899 if (cfg->fc_protocol == RTPROT_UNSPEC)
1900 cfg->fc_protocol = RTPROT_BOOT;
1901 rt->rt6i_protocol = cfg->fc_protocol;
1902
1903 addr_type = ipv6_addr_type(&cfg->fc_dst);
1904
1905 if (addr_type & IPV6_ADDR_MULTICAST)
1906 rt->dst.input = ip6_mc_input;
1907 else if (cfg->fc_flags & RTF_LOCAL)
1908 rt->dst.input = ip6_input;
1909 else
1910 rt->dst.input = ip6_forward;
1911
1912 rt->dst.output = ip6_output;
1913
1914 if (cfg->fc_encap) {
1915 struct lwtunnel_state *lwtstate;
1916
1917 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1918 cfg->fc_encap, AF_INET6, cfg,
1919 &lwtstate);
1920 if (err)
1921 goto out;
1922 rt->dst.lwtstate = lwtstate_get(lwtstate);
1923 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1924 rt->dst.lwtstate->orig_output = rt->dst.output;
1925 rt->dst.output = lwtunnel_output;
1926 }
1927 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1928 rt->dst.lwtstate->orig_input = rt->dst.input;
1929 rt->dst.input = lwtunnel_input;
1930 }
1931 }
1932
1933 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1934 rt->rt6i_dst.plen = cfg->fc_dst_len;
1935 if (rt->rt6i_dst.plen == 128)
1936 rt->dst.flags |= DST_HOST;
1937
1938 #ifdef CONFIG_IPV6_SUBTREES
1939 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1940 rt->rt6i_src.plen = cfg->fc_src_len;
1941 #endif
1942
1943 rt->rt6i_metric = cfg->fc_metric;
1944
1945 /* We cannot add true routes via loopback here,
1946 they would result in kernel looping; promote them to reject routes
1947 */
1948 if ((cfg->fc_flags & RTF_REJECT) ||
1949 (dev && (dev->flags & IFF_LOOPBACK) &&
1950 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1951 !(cfg->fc_flags & RTF_LOCAL))) {
1952 /* hold loopback dev/idev if we haven't done so. */
1953 if (dev != net->loopback_dev) {
1954 if (dev) {
1955 dev_put(dev);
1956 in6_dev_put(idev);
1957 }
1958 dev = net->loopback_dev;
1959 dev_hold(dev);
1960 idev = in6_dev_get(dev);
1961 if (!idev) {
1962 err = -ENODEV;
1963 goto out;
1964 }
1965 }
1966 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1967 switch (cfg->fc_type) {
1968 case RTN_BLACKHOLE:
1969 rt->dst.error = -EINVAL;
1970 rt->dst.output = dst_discard_out;
1971 rt->dst.input = dst_discard;
1972 break;
1973 case RTN_PROHIBIT:
1974 rt->dst.error = -EACCES;
1975 rt->dst.output = ip6_pkt_prohibit_out;
1976 rt->dst.input = ip6_pkt_prohibit;
1977 break;
1978 case RTN_THROW:
1979 case RTN_UNREACHABLE:
1980 default:
1981 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1982 : (cfg->fc_type == RTN_UNREACHABLE)
1983 ? -EHOSTUNREACH : -ENETUNREACH;
1984 rt->dst.output = ip6_pkt_discard_out;
1985 rt->dst.input = ip6_pkt_discard;
1986 break;
1987 }
1988 goto install_route;
1989 }
1990
1991 if (cfg->fc_flags & RTF_GATEWAY) {
1992 const struct in6_addr *gw_addr;
1993 int gwa_type;
1994
1995 gw_addr = &cfg->fc_gateway;
1996 gwa_type = ipv6_addr_type(gw_addr);
1997
1998 /* if gw_addr is local we will fail to detect this in case
1999 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2000 * will return already-added prefix route via interface that
2001 * prefix route was assigned to, which might be non-loopback.
2002 */
2003 err = -EINVAL;
2004 if (ipv6_chk_addr_and_flags(net, gw_addr,
2005 gwa_type & IPV6_ADDR_LINKLOCAL ?
2006 dev : NULL, 0, 0))
2007 goto out;
2008
2009 rt->rt6i_gateway = *gw_addr;
2010
2011 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2012 struct rt6_info *grt = NULL;
2013
2014 /* IPv6 strictly inhibits using not link-local
2015 addresses as nexthop address.
2016 Otherwise, router will not able to send redirects.
2017 It is very good, but in some (rare!) circumstances
2018 (SIT, PtP, NBMA NOARP links) it is handy to allow
2019 some exceptions. --ANK
2020 */
2021 if (!(gwa_type & IPV6_ADDR_UNICAST))
2022 goto out;
2023
2024 if (cfg->fc_table) {
2025 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2026
2027 if (grt) {
2028 if (grt->rt6i_flags & RTF_GATEWAY ||
2029 (dev && dev != grt->dst.dev)) {
2030 ip6_rt_put(grt);
2031 grt = NULL;
2032 }
2033 }
2034 }
2035
2036 if (!grt)
2037 grt = rt6_lookup(net, gw_addr, NULL,
2038 cfg->fc_ifindex, 1);
2039
2040 err = -EHOSTUNREACH;
2041 if (!grt)
2042 goto out;
2043 if (dev) {
2044 if (dev != grt->dst.dev) {
2045 ip6_rt_put(grt);
2046 goto out;
2047 }
2048 } else {
2049 dev = grt->dst.dev;
2050 idev = grt->rt6i_idev;
2051 dev_hold(dev);
2052 in6_dev_hold(grt->rt6i_idev);
2053 }
2054 if (!(grt->rt6i_flags & RTF_GATEWAY))
2055 err = 0;
2056 ip6_rt_put(grt);
2057
2058 if (err)
2059 goto out;
2060 }
2061 err = -EINVAL;
2062 if (!dev || (dev->flags & IFF_LOOPBACK))
2063 goto out;
2064 }
2065
2066 err = -ENODEV;
2067 if (!dev)
2068 goto out;
2069
2070 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2071 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2072 err = -EINVAL;
2073 goto out;
2074 }
2075 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2076 rt->rt6i_prefsrc.plen = 128;
2077 } else
2078 rt->rt6i_prefsrc.plen = 0;
2079
2080 rt->rt6i_flags = cfg->fc_flags;
2081
2082 install_route:
2083 rt->dst.dev = dev;
2084 rt->rt6i_idev = idev;
2085 rt->rt6i_table = table;
2086
2087 cfg->fc_nlinfo.nl_net = dev_net(dev);
2088
2089 return rt;
2090 out:
2091 if (dev)
2092 dev_put(dev);
2093 if (idev)
2094 in6_dev_put(idev);
2095 if (rt)
2096 dst_free(&rt->dst);
2097
2098 return ERR_PTR(err);
2099 }
2100
ip6_route_add(struct fib6_config * cfg)2101 int ip6_route_add(struct fib6_config *cfg)
2102 {
2103 struct mx6_config mxc = { .mx = NULL, };
2104 struct rt6_info *rt;
2105 int err;
2106
2107 rt = ip6_route_info_create(cfg);
2108 if (IS_ERR(rt)) {
2109 err = PTR_ERR(rt);
2110 rt = NULL;
2111 goto out;
2112 }
2113
2114 err = ip6_convert_metrics(&mxc, cfg);
2115 if (err)
2116 goto out;
2117
2118 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2119
2120 kfree(mxc.mx);
2121
2122 return err;
2123 out:
2124 if (rt)
2125 dst_free(&rt->dst);
2126
2127 return err;
2128 }
2129
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)2130 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2131 {
2132 int err;
2133 struct fib6_table *table;
2134 struct net *net = dev_net(rt->dst.dev);
2135
2136 if (rt == net->ipv6.ip6_null_entry ||
2137 rt->dst.flags & DST_NOCACHE) {
2138 err = -ENOENT;
2139 goto out;
2140 }
2141
2142 table = rt->rt6i_table;
2143 write_lock_bh(&table->tb6_lock);
2144 err = fib6_del(rt, info);
2145 write_unlock_bh(&table->tb6_lock);
2146
2147 out:
2148 ip6_rt_put(rt);
2149 return err;
2150 }
2151
ip6_del_rt(struct rt6_info * rt)2152 int ip6_del_rt(struct rt6_info *rt)
2153 {
2154 struct nl_info info = {
2155 .nl_net = dev_net(rt->dst.dev),
2156 };
2157 return __ip6_del_rt(rt, &info);
2158 }
2159
ip6_route_del(struct fib6_config * cfg)2160 static int ip6_route_del(struct fib6_config *cfg)
2161 {
2162 struct fib6_table *table;
2163 struct fib6_node *fn;
2164 struct rt6_info *rt;
2165 int err = -ESRCH;
2166
2167 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2168 if (!table)
2169 return err;
2170
2171 read_lock_bh(&table->tb6_lock);
2172
2173 fn = fib6_locate(&table->tb6_root,
2174 &cfg->fc_dst, cfg->fc_dst_len,
2175 &cfg->fc_src, cfg->fc_src_len);
2176
2177 if (fn) {
2178 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2179 if ((rt->rt6i_flags & RTF_CACHE) &&
2180 !(cfg->fc_flags & RTF_CACHE))
2181 continue;
2182 if (cfg->fc_ifindex &&
2183 (!rt->dst.dev ||
2184 rt->dst.dev->ifindex != cfg->fc_ifindex))
2185 continue;
2186 if (cfg->fc_flags & RTF_GATEWAY &&
2187 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2188 continue;
2189 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2190 continue;
2191 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2192 continue;
2193 dst_hold(&rt->dst);
2194 read_unlock_bh(&table->tb6_lock);
2195
2196 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2197 }
2198 }
2199 read_unlock_bh(&table->tb6_lock);
2200
2201 return err;
2202 }
2203
rt6_do_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)2204 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2205 {
2206 struct netevent_redirect netevent;
2207 struct rt6_info *rt, *nrt = NULL;
2208 struct ndisc_options ndopts;
2209 struct inet6_dev *in6_dev;
2210 struct neighbour *neigh;
2211 struct rd_msg *msg;
2212 int optlen, on_link;
2213 u8 *lladdr;
2214
2215 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2216 optlen -= sizeof(*msg);
2217
2218 if (optlen < 0) {
2219 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2220 return;
2221 }
2222
2223 msg = (struct rd_msg *)icmp6_hdr(skb);
2224
2225 if (ipv6_addr_is_multicast(&msg->dest)) {
2226 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2227 return;
2228 }
2229
2230 on_link = 0;
2231 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2232 on_link = 1;
2233 } else if (ipv6_addr_type(&msg->target) !=
2234 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2235 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2236 return;
2237 }
2238
2239 in6_dev = __in6_dev_get(skb->dev);
2240 if (!in6_dev)
2241 return;
2242 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2243 return;
2244
2245 /* RFC2461 8.1:
2246 * The IP source address of the Redirect MUST be the same as the current
2247 * first-hop router for the specified ICMP Destination Address.
2248 */
2249
2250 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2251 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2252 return;
2253 }
2254
2255 lladdr = NULL;
2256 if (ndopts.nd_opts_tgt_lladdr) {
2257 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2258 skb->dev);
2259 if (!lladdr) {
2260 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2261 return;
2262 }
2263 }
2264
2265 rt = (struct rt6_info *) dst;
2266 if (rt->rt6i_flags & RTF_REJECT) {
2267 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2268 return;
2269 }
2270
2271 /* Redirect received -> path was valid.
2272 * Look, redirects are sent only in response to data packets,
2273 * so that this nexthop apparently is reachable. --ANK
2274 */
2275 dst_confirm(&rt->dst);
2276
2277 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2278 if (!neigh)
2279 return;
2280
2281 /*
2282 * We have finally decided to accept it.
2283 */
2284
2285 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2286 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2287 NEIGH_UPDATE_F_OVERRIDE|
2288 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2289 NEIGH_UPDATE_F_ISROUTER)),
2290 NDISC_REDIRECT, &ndopts);
2291
2292 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2293 if (!nrt)
2294 goto out;
2295
2296 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2297 if (on_link)
2298 nrt->rt6i_flags &= ~RTF_GATEWAY;
2299
2300 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2301
2302 if (ip6_ins_rt(nrt))
2303 goto out;
2304
2305 netevent.old = &rt->dst;
2306 netevent.new = &nrt->dst;
2307 netevent.daddr = &msg->dest;
2308 netevent.neigh = neigh;
2309 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2310
2311 if (rt->rt6i_flags & RTF_CACHE) {
2312 rt = (struct rt6_info *) dst_clone(&rt->dst);
2313 ip6_del_rt(rt);
2314 }
2315
2316 out:
2317 neigh_release(neigh);
2318 }
2319
2320 /*
2321 * Misc support functions
2322 */
2323
rt6_set_from(struct rt6_info * rt,struct rt6_info * from)2324 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2325 {
2326 BUG_ON(from->dst.from);
2327
2328 rt->rt6i_flags &= ~RTF_EXPIRES;
2329 dst_hold(&from->dst);
2330 rt->dst.from = &from->dst;
2331 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2332 }
2333
ip6_rt_copy_init(struct rt6_info * rt,struct rt6_info * ort)2334 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2335 {
2336 rt->dst.input = ort->dst.input;
2337 rt->dst.output = ort->dst.output;
2338 rt->rt6i_dst = ort->rt6i_dst;
2339 rt->dst.error = ort->dst.error;
2340 rt->rt6i_idev = ort->rt6i_idev;
2341 if (rt->rt6i_idev)
2342 in6_dev_hold(rt->rt6i_idev);
2343 rt->dst.lastuse = jiffies;
2344 rt->rt6i_gateway = ort->rt6i_gateway;
2345 rt->rt6i_flags = ort->rt6i_flags;
2346 rt6_set_from(rt, ort);
2347 rt->rt6i_metric = ort->rt6i_metric;
2348 #ifdef CONFIG_IPV6_SUBTREES
2349 rt->rt6i_src = ort->rt6i_src;
2350 #endif
2351 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2352 rt->rt6i_table = ort->rt6i_table;
2353 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2354 }
2355
2356 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,struct net_device * dev)2357 static struct rt6_info *rt6_get_route_info(struct net *net,
2358 const struct in6_addr *prefix, int prefixlen,
2359 const struct in6_addr *gwaddr,
2360 struct net_device *dev)
2361 {
2362 u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
2363 struct fib6_node *fn;
2364 struct rt6_info *rt = NULL;
2365 struct fib6_table *table;
2366
2367 table = fib6_get_table(net, tb_id);
2368 if (!table)
2369 return NULL;
2370
2371 read_lock_bh(&table->tb6_lock);
2372 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2373 if (!fn)
2374 goto out;
2375
2376 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2377 if (rt->dst.dev->ifindex != dev->ifindex)
2378 continue;
2379 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2380 continue;
2381 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2382 continue;
2383 dst_hold(&rt->dst);
2384 break;
2385 }
2386 out:
2387 read_unlock_bh(&table->tb6_lock);
2388 return rt;
2389 }
2390
rt6_add_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)2391 static struct rt6_info *rt6_add_route_info(struct net *net,
2392 const struct in6_addr *prefix, int prefixlen,
2393 const struct in6_addr *gwaddr,
2394 struct net_device *dev,
2395 unsigned int pref)
2396 {
2397 struct fib6_config cfg = {
2398 .fc_metric = IP6_RT_PRIO_USER,
2399 .fc_ifindex = dev->ifindex,
2400 .fc_dst_len = prefixlen,
2401 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2402 RTF_UP | RTF_PREF(pref),
2403 .fc_nlinfo.portid = 0,
2404 .fc_nlinfo.nlh = NULL,
2405 .fc_nlinfo.nl_net = net,
2406 };
2407
2408 cfg.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO),
2409 cfg.fc_dst = *prefix;
2410 cfg.fc_gateway = *gwaddr;
2411
2412 /* We should treat it as a default route if prefix length is 0. */
2413 if (!prefixlen)
2414 cfg.fc_flags |= RTF_DEFAULT;
2415
2416 ip6_route_add(&cfg);
2417
2418 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2419 }
2420 #endif
2421
rt6_get_dflt_router(const struct in6_addr * addr,struct net_device * dev)2422 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2423 {
2424 u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_MAIN);
2425 struct rt6_info *rt;
2426 struct fib6_table *table;
2427
2428 table = fib6_get_table(dev_net(dev), tb_id);
2429 if (!table)
2430 return NULL;
2431
2432 read_lock_bh(&table->tb6_lock);
2433 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2434 if (dev == rt->dst.dev &&
2435 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2436 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2437 break;
2438 }
2439 if (rt)
2440 dst_hold(&rt->dst);
2441 read_unlock_bh(&table->tb6_lock);
2442 return rt;
2443 }
2444
rt6_add_dflt_router(const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)2445 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2446 struct net_device *dev,
2447 unsigned int pref)
2448 {
2449 struct fib6_config cfg = {
2450 .fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
2451 .fc_metric = IP6_RT_PRIO_USER,
2452 .fc_ifindex = dev->ifindex,
2453 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2454 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2455 .fc_nlinfo.portid = 0,
2456 .fc_nlinfo.nlh = NULL,
2457 .fc_nlinfo.nl_net = dev_net(dev),
2458 };
2459
2460 cfg.fc_gateway = *gwaddr;
2461
2462 if (!ip6_route_add(&cfg)) {
2463 struct fib6_table *table;
2464
2465 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2466 if (table)
2467 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2468 }
2469
2470 return rt6_get_dflt_router(gwaddr, dev);
2471 }
2472
rt6_addrconf_purge(struct rt6_info * rt,void * arg)2473 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2474 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2475 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2476 return -1;
2477 return 0;
2478 }
2479
rt6_purge_dflt_routers(struct net * net)2480 void rt6_purge_dflt_routers(struct net *net)
2481 {
2482 fib6_clean_all(net, rt6_addrconf_purge, NULL);
2483 }
2484
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)2485 static void rtmsg_to_fib6_config(struct net *net,
2486 struct in6_rtmsg *rtmsg,
2487 struct fib6_config *cfg)
2488 {
2489 memset(cfg, 0, sizeof(*cfg));
2490
2491 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2492 : RT6_TABLE_MAIN;
2493 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2494 cfg->fc_metric = rtmsg->rtmsg_metric;
2495 cfg->fc_expires = rtmsg->rtmsg_info;
2496 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2497 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2498 cfg->fc_flags = rtmsg->rtmsg_flags;
2499
2500 cfg->fc_nlinfo.nl_net = net;
2501
2502 cfg->fc_dst = rtmsg->rtmsg_dst;
2503 cfg->fc_src = rtmsg->rtmsg_src;
2504 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2505 }
2506
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)2507 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2508 {
2509 struct fib6_config cfg;
2510 struct in6_rtmsg rtmsg;
2511 int err;
2512
2513 switch (cmd) {
2514 case SIOCADDRT: /* Add a route */
2515 case SIOCDELRT: /* Delete a route */
2516 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2517 return -EPERM;
2518 err = copy_from_user(&rtmsg, arg,
2519 sizeof(struct in6_rtmsg));
2520 if (err)
2521 return -EFAULT;
2522
2523 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2524
2525 rtnl_lock();
2526 switch (cmd) {
2527 case SIOCADDRT:
2528 err = ip6_route_add(&cfg);
2529 break;
2530 case SIOCDELRT:
2531 err = ip6_route_del(&cfg);
2532 break;
2533 default:
2534 err = -EINVAL;
2535 }
2536 rtnl_unlock();
2537
2538 return err;
2539 }
2540
2541 return -EINVAL;
2542 }
2543
2544 /*
2545 * Drop the packet on the floor
2546 */
2547
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)2548 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2549 {
2550 int type;
2551 struct dst_entry *dst = skb_dst(skb);
2552 switch (ipstats_mib_noroutes) {
2553 case IPSTATS_MIB_INNOROUTES:
2554 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2555 if (type == IPV6_ADDR_ANY) {
2556 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2557 IPSTATS_MIB_INADDRERRORS);
2558 break;
2559 }
2560 /* FALLTHROUGH */
2561 case IPSTATS_MIB_OUTNOROUTES:
2562 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2563 ipstats_mib_noroutes);
2564 break;
2565 }
2566 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2567 kfree_skb(skb);
2568 return 0;
2569 }
2570
ip6_pkt_discard(struct sk_buff * skb)2571 static int ip6_pkt_discard(struct sk_buff *skb)
2572 {
2573 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2574 }
2575
ip6_pkt_discard_out(struct net * net,struct sock * sk,struct sk_buff * skb)2576 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2577 {
2578 skb->dev = skb_dst(skb)->dev;
2579 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2580 }
2581
ip6_pkt_prohibit(struct sk_buff * skb)2582 static int ip6_pkt_prohibit(struct sk_buff *skb)
2583 {
2584 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2585 }
2586
ip6_pkt_prohibit_out(struct net * net,struct sock * sk,struct sk_buff * skb)2587 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2588 {
2589 skb->dev = skb_dst(skb)->dev;
2590 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2591 }
2592
2593 /*
2594 * Allocate a dst for local (unicast / anycast) address.
2595 */
2596
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,bool anycast)2597 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2598 const struct in6_addr *addr,
2599 bool anycast)
2600 {
2601 u32 tb_id;
2602 struct net *net = dev_net(idev->dev);
2603 struct net_device *dev = net->loopback_dev;
2604 struct rt6_info *rt;
2605
2606 /* use L3 Master device as loopback for host routes if device
2607 * is enslaved and address is not link local or multicast
2608 */
2609 if (!rt6_need_strict(addr))
2610 dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2611
2612 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2613 if (!rt)
2614 return ERR_PTR(-ENOMEM);
2615
2616 in6_dev_hold(idev);
2617
2618 rt->dst.flags |= DST_HOST;
2619 rt->dst.input = ip6_input;
2620 rt->dst.output = ip6_output;
2621 rt->rt6i_idev = idev;
2622
2623 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2624 if (anycast)
2625 rt->rt6i_flags |= RTF_ANYCAST;
2626 else
2627 rt->rt6i_flags |= RTF_LOCAL;
2628
2629 rt->rt6i_gateway = *addr;
2630 rt->rt6i_dst.addr = *addr;
2631 rt->rt6i_dst.plen = 128;
2632 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2633 rt->rt6i_table = fib6_get_table(net, tb_id);
2634 rt->dst.flags |= DST_NOCACHE;
2635
2636 atomic_set(&rt->dst.__refcnt, 1);
2637
2638 return rt;
2639 }
2640
2641 /* remove deleted ip from prefsrc entries */
2642 struct arg_dev_net_ip {
2643 struct net_device *dev;
2644 struct net *net;
2645 struct in6_addr *addr;
2646 };
2647
fib6_remove_prefsrc(struct rt6_info * rt,void * arg)2648 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2649 {
2650 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2651 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2652 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2653
2654 if (((void *)rt->dst.dev == dev || !dev) &&
2655 rt != net->ipv6.ip6_null_entry &&
2656 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2657 /* remove prefsrc entry */
2658 rt->rt6i_prefsrc.plen = 0;
2659 }
2660 return 0;
2661 }
2662
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)2663 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2664 {
2665 struct net *net = dev_net(ifp->idev->dev);
2666 struct arg_dev_net_ip adni = {
2667 .dev = ifp->idev->dev,
2668 .net = net,
2669 .addr = &ifp->addr,
2670 };
2671 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2672 }
2673
2674 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2675 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2676
2677 /* Remove routers and update dst entries when gateway turn into host. */
fib6_clean_tohost(struct rt6_info * rt,void * arg)2678 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2679 {
2680 struct in6_addr *gateway = (struct in6_addr *)arg;
2681
2682 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2683 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2684 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2685 return -1;
2686 }
2687 return 0;
2688 }
2689
rt6_clean_tohost(struct net * net,struct in6_addr * gateway)2690 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2691 {
2692 fib6_clean_all(net, fib6_clean_tohost, gateway);
2693 }
2694
2695 struct arg_dev_net {
2696 struct net_device *dev;
2697 struct net *net;
2698 };
2699
fib6_ifdown(struct rt6_info * rt,void * arg)2700 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2701 {
2702 const struct arg_dev_net *adn = arg;
2703 const struct net_device *dev = adn->dev;
2704
2705 if ((rt->dst.dev == dev || !dev) &&
2706 rt != adn->net->ipv6.ip6_null_entry)
2707 return -1;
2708
2709 return 0;
2710 }
2711
rt6_ifdown(struct net * net,struct net_device * dev)2712 void rt6_ifdown(struct net *net, struct net_device *dev)
2713 {
2714 struct arg_dev_net adn = {
2715 .dev = dev,
2716 .net = net,
2717 };
2718
2719 fib6_clean_all(net, fib6_ifdown, &adn);
2720 icmp6_clean_all(fib6_ifdown, &adn);
2721 if (dev)
2722 rt6_uncached_list_flush_dev(net, dev);
2723 }
2724
2725 struct rt6_mtu_change_arg {
2726 struct net_device *dev;
2727 unsigned int mtu;
2728 };
2729
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2730 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2731 {
2732 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2733 struct inet6_dev *idev;
2734
2735 /* In IPv6 pmtu discovery is not optional,
2736 so that RTAX_MTU lock cannot disable it.
2737 We still use this lock to block changes
2738 caused by addrconf/ndisc.
2739 */
2740
2741 idev = __in6_dev_get(arg->dev);
2742 if (!idev)
2743 return 0;
2744
2745 /* For administrative MTU increase, there is no way to discover
2746 IPv6 PMTU increase, so PMTU increase should be updated here.
2747 Since RFC 1981 doesn't include administrative MTU increase
2748 update PMTU increase is a MUST. (i.e. jumbo frame)
2749 */
2750 /*
2751 If new MTU is less than route PMTU, this new MTU will be the
2752 lowest MTU in the path, update the route PMTU to reflect PMTU
2753 decreases; if new MTU is greater than route PMTU, and the
2754 old MTU is the lowest MTU in the path, update the route PMTU
2755 to reflect the increase. In this case if the other nodes' MTU
2756 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2757 PMTU discouvery.
2758 */
2759 if (rt->dst.dev == arg->dev &&
2760 dst_metric_raw(&rt->dst, RTAX_MTU) &&
2761 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2762 if (rt->rt6i_flags & RTF_CACHE) {
2763 /* For RTF_CACHE with rt6i_pmtu == 0
2764 * (i.e. a redirected route),
2765 * the metrics of its rt->dst.from has already
2766 * been updated.
2767 */
2768 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2769 rt->rt6i_pmtu = arg->mtu;
2770 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2771 (dst_mtu(&rt->dst) < arg->mtu &&
2772 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2773 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2774 }
2775 }
2776 return 0;
2777 }
2778
rt6_mtu_change(struct net_device * dev,unsigned int mtu)2779 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2780 {
2781 struct rt6_mtu_change_arg arg = {
2782 .dev = dev,
2783 .mtu = mtu,
2784 };
2785
2786 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2787 }
2788
2789 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2790 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2791 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
2792 [RTA_OIF] = { .type = NLA_U32 },
2793 [RTA_IIF] = { .type = NLA_U32 },
2794 [RTA_PRIORITY] = { .type = NLA_U32 },
2795 [RTA_METRICS] = { .type = NLA_NESTED },
2796 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2797 [RTA_PREF] = { .type = NLA_U8 },
2798 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2799 [RTA_ENCAP] = { .type = NLA_NESTED },
2800 [RTA_EXPIRES] = { .type = NLA_U32 },
2801 [RTA_UID] = { .type = NLA_U32 },
2802 [RTA_TABLE] = { .type = NLA_U32 },
2803 };
2804
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg)2805 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2806 struct fib6_config *cfg)
2807 {
2808 struct rtmsg *rtm;
2809 struct nlattr *tb[RTA_MAX+1];
2810 unsigned int pref;
2811 int err;
2812
2813 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2814 if (err < 0)
2815 goto errout;
2816
2817 err = -EINVAL;
2818 rtm = nlmsg_data(nlh);
2819 memset(cfg, 0, sizeof(*cfg));
2820
2821 cfg->fc_table = rtm->rtm_table;
2822 cfg->fc_dst_len = rtm->rtm_dst_len;
2823 cfg->fc_src_len = rtm->rtm_src_len;
2824 cfg->fc_flags = RTF_UP;
2825 cfg->fc_protocol = rtm->rtm_protocol;
2826 cfg->fc_type = rtm->rtm_type;
2827
2828 if (rtm->rtm_type == RTN_UNREACHABLE ||
2829 rtm->rtm_type == RTN_BLACKHOLE ||
2830 rtm->rtm_type == RTN_PROHIBIT ||
2831 rtm->rtm_type == RTN_THROW)
2832 cfg->fc_flags |= RTF_REJECT;
2833
2834 if (rtm->rtm_type == RTN_LOCAL)
2835 cfg->fc_flags |= RTF_LOCAL;
2836
2837 if (rtm->rtm_flags & RTM_F_CLONED)
2838 cfg->fc_flags |= RTF_CACHE;
2839
2840 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2841 cfg->fc_nlinfo.nlh = nlh;
2842 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2843
2844 if (tb[RTA_GATEWAY]) {
2845 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2846 cfg->fc_flags |= RTF_GATEWAY;
2847 }
2848
2849 if (tb[RTA_DST]) {
2850 int plen = (rtm->rtm_dst_len + 7) >> 3;
2851
2852 if (nla_len(tb[RTA_DST]) < plen)
2853 goto errout;
2854
2855 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2856 }
2857
2858 if (tb[RTA_SRC]) {
2859 int plen = (rtm->rtm_src_len + 7) >> 3;
2860
2861 if (nla_len(tb[RTA_SRC]) < plen)
2862 goto errout;
2863
2864 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2865 }
2866
2867 if (tb[RTA_PREFSRC])
2868 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2869
2870 if (tb[RTA_OIF])
2871 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2872
2873 if (tb[RTA_PRIORITY])
2874 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2875
2876 if (tb[RTA_METRICS]) {
2877 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2878 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2879 }
2880
2881 if (tb[RTA_TABLE])
2882 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2883
2884 if (tb[RTA_MULTIPATH]) {
2885 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2886 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2887
2888 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
2889 cfg->fc_mp_len);
2890 if (err < 0)
2891 goto errout;
2892 }
2893
2894 if (tb[RTA_PREF]) {
2895 pref = nla_get_u8(tb[RTA_PREF]);
2896 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2897 pref != ICMPV6_ROUTER_PREF_HIGH)
2898 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2899 cfg->fc_flags |= RTF_PREF(pref);
2900 }
2901
2902 if (tb[RTA_ENCAP])
2903 cfg->fc_encap = tb[RTA_ENCAP];
2904
2905 if (tb[RTA_ENCAP_TYPE]) {
2906 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2907
2908 err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
2909 if (err < 0)
2910 goto errout;
2911 }
2912
2913 if (tb[RTA_EXPIRES]) {
2914 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2915
2916 if (addrconf_finite_timeout(timeout)) {
2917 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2918 cfg->fc_flags |= RTF_EXPIRES;
2919 }
2920 }
2921
2922 err = 0;
2923 errout:
2924 return err;
2925 }
2926
2927 struct rt6_nh {
2928 struct rt6_info *rt6_info;
2929 struct fib6_config r_cfg;
2930 struct mx6_config mxc;
2931 struct list_head next;
2932 };
2933
ip6_print_replace_route_err(struct list_head * rt6_nh_list)2934 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2935 {
2936 struct rt6_nh *nh;
2937
2938 list_for_each_entry(nh, rt6_nh_list, next) {
2939 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2940 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2941 nh->r_cfg.fc_ifindex);
2942 }
2943 }
2944
ip6_route_info_append(struct list_head * rt6_nh_list,struct rt6_info * rt,struct fib6_config * r_cfg)2945 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2946 struct rt6_info *rt, struct fib6_config *r_cfg)
2947 {
2948 struct rt6_nh *nh;
2949 int err = -EEXIST;
2950
2951 list_for_each_entry(nh, rt6_nh_list, next) {
2952 /* check if rt6_info already exists */
2953 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
2954 return err;
2955 }
2956
2957 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2958 if (!nh)
2959 return -ENOMEM;
2960 nh->rt6_info = rt;
2961 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2962 if (err) {
2963 kfree(nh);
2964 return err;
2965 }
2966 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2967 list_add_tail(&nh->next, rt6_nh_list);
2968
2969 return 0;
2970 }
2971
ip6_route_multipath_add(struct fib6_config * cfg)2972 static int ip6_route_multipath_add(struct fib6_config *cfg)
2973 {
2974 struct fib6_config r_cfg;
2975 struct rtnexthop *rtnh;
2976 struct rt6_info *rt;
2977 struct rt6_nh *err_nh;
2978 struct rt6_nh *nh, *nh_safe;
2979 int remaining;
2980 int attrlen;
2981 int err = 1;
2982 int nhn = 0;
2983 int replace = (cfg->fc_nlinfo.nlh &&
2984 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2985 LIST_HEAD(rt6_nh_list);
2986
2987 remaining = cfg->fc_mp_len;
2988 rtnh = (struct rtnexthop *)cfg->fc_mp;
2989
2990 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2991 * rt6_info structs per nexthop
2992 */
2993 while (rtnh_ok(rtnh, remaining)) {
2994 memcpy(&r_cfg, cfg, sizeof(*cfg));
2995 if (rtnh->rtnh_ifindex)
2996 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2997
2998 attrlen = rtnh_attrlen(rtnh);
2999 if (attrlen > 0) {
3000 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3001
3002 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3003 if (nla) {
3004 r_cfg.fc_gateway = nla_get_in6_addr(nla);
3005 r_cfg.fc_flags |= RTF_GATEWAY;
3006 }
3007 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3008 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3009 if (nla)
3010 r_cfg.fc_encap_type = nla_get_u16(nla);
3011 }
3012
3013 rt = ip6_route_info_create(&r_cfg);
3014 if (IS_ERR(rt)) {
3015 err = PTR_ERR(rt);
3016 rt = NULL;
3017 goto cleanup;
3018 }
3019
3020 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3021 if (err) {
3022 dst_free(&rt->dst);
3023 goto cleanup;
3024 }
3025
3026 rtnh = rtnh_next(rtnh, &remaining);
3027 }
3028
3029 err_nh = NULL;
3030 list_for_each_entry(nh, &rt6_nh_list, next) {
3031 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
3032 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3033 nh->rt6_info = NULL;
3034 if (err) {
3035 if (replace && nhn)
3036 ip6_print_replace_route_err(&rt6_nh_list);
3037 err_nh = nh;
3038 goto add_errout;
3039 }
3040
3041 /* Because each route is added like a single route we remove
3042 * these flags after the first nexthop: if there is a collision,
3043 * we have already failed to add the first nexthop:
3044 * fib6_add_rt2node() has rejected it; when replacing, old
3045 * nexthops have been replaced by first new, the rest should
3046 * be added to it.
3047 */
3048 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3049 NLM_F_REPLACE);
3050 nhn++;
3051 }
3052
3053 goto cleanup;
3054
3055 add_errout:
3056 /* Delete routes that were already added */
3057 list_for_each_entry(nh, &rt6_nh_list, next) {
3058 if (err_nh == nh)
3059 break;
3060 ip6_route_del(&nh->r_cfg);
3061 }
3062
3063 cleanup:
3064 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3065 if (nh->rt6_info)
3066 dst_free(&nh->rt6_info->dst);
3067 kfree(nh->mxc.mx);
3068 list_del(&nh->next);
3069 kfree(nh);
3070 }
3071
3072 return err;
3073 }
3074
ip6_route_multipath_del(struct fib6_config * cfg)3075 static int ip6_route_multipath_del(struct fib6_config *cfg)
3076 {
3077 struct fib6_config r_cfg;
3078 struct rtnexthop *rtnh;
3079 int remaining;
3080 int attrlen;
3081 int err = 1, last_err = 0;
3082
3083 remaining = cfg->fc_mp_len;
3084 rtnh = (struct rtnexthop *)cfg->fc_mp;
3085
3086 /* Parse a Multipath Entry */
3087 while (rtnh_ok(rtnh, remaining)) {
3088 memcpy(&r_cfg, cfg, sizeof(*cfg));
3089 if (rtnh->rtnh_ifindex)
3090 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3091
3092 attrlen = rtnh_attrlen(rtnh);
3093 if (attrlen > 0) {
3094 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3095
3096 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3097 if (nla) {
3098 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3099 r_cfg.fc_flags |= RTF_GATEWAY;
3100 }
3101 }
3102 err = ip6_route_del(&r_cfg);
3103 if (err)
3104 last_err = err;
3105
3106 rtnh = rtnh_next(rtnh, &remaining);
3107 }
3108
3109 return last_err;
3110 }
3111
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh)3112 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3113 {
3114 struct fib6_config cfg;
3115 int err;
3116
3117 err = rtm_to_fib6_config(skb, nlh, &cfg);
3118 if (err < 0)
3119 return err;
3120
3121 if (cfg.fc_mp)
3122 return ip6_route_multipath_del(&cfg);
3123 else
3124 return ip6_route_del(&cfg);
3125 }
3126
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh)3127 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3128 {
3129 struct fib6_config cfg;
3130 int err;
3131
3132 err = rtm_to_fib6_config(skb, nlh, &cfg);
3133 if (err < 0)
3134 return err;
3135
3136 if (cfg.fc_mp)
3137 return ip6_route_multipath_add(&cfg);
3138 else
3139 return ip6_route_add(&cfg);
3140 }
3141
rt6_nlmsg_size(struct rt6_info * rt)3142 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3143 {
3144 return NLMSG_ALIGN(sizeof(struct rtmsg))
3145 + nla_total_size(16) /* RTA_SRC */
3146 + nla_total_size(16) /* RTA_DST */
3147 + nla_total_size(16) /* RTA_GATEWAY */
3148 + nla_total_size(16) /* RTA_PREFSRC */
3149 + nla_total_size(4) /* RTA_TABLE */
3150 + nla_total_size(4) /* RTA_IIF */
3151 + nla_total_size(4) /* RTA_OIF */
3152 + nla_total_size(4) /* RTA_PRIORITY */
3153 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3154 + nla_total_size(sizeof(struct rta_cacheinfo))
3155 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3156 + nla_total_size(1) /* RTA_PREF */
3157 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3158 }
3159
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 portid,u32 seq,int prefix,int nowait,unsigned int flags)3160 static int rt6_fill_node(struct net *net,
3161 struct sk_buff *skb, struct rt6_info *rt,
3162 struct in6_addr *dst, struct in6_addr *src,
3163 int iif, int type, u32 portid, u32 seq,
3164 int prefix, int nowait, unsigned int flags)
3165 {
3166 u32 metrics[RTAX_MAX];
3167 struct rtmsg *rtm;
3168 struct nlmsghdr *nlh;
3169 long expires;
3170 u32 table;
3171
3172 if (prefix) { /* user wants prefix routes only */
3173 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3174 /* success since this is not a prefix route */
3175 return 1;
3176 }
3177 }
3178
3179 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3180 if (!nlh)
3181 return -EMSGSIZE;
3182
3183 rtm = nlmsg_data(nlh);
3184 rtm->rtm_family = AF_INET6;
3185 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3186 rtm->rtm_src_len = rt->rt6i_src.plen;
3187 rtm->rtm_tos = 0;
3188 if (rt->rt6i_table)
3189 table = rt->rt6i_table->tb6_id;
3190 else
3191 table = RT6_TABLE_UNSPEC;
3192 rtm->rtm_table = table;
3193 if (nla_put_u32(skb, RTA_TABLE, table))
3194 goto nla_put_failure;
3195 if (rt->rt6i_flags & RTF_REJECT) {
3196 switch (rt->dst.error) {
3197 case -EINVAL:
3198 rtm->rtm_type = RTN_BLACKHOLE;
3199 break;
3200 case -EACCES:
3201 rtm->rtm_type = RTN_PROHIBIT;
3202 break;
3203 case -EAGAIN:
3204 rtm->rtm_type = RTN_THROW;
3205 break;
3206 default:
3207 rtm->rtm_type = RTN_UNREACHABLE;
3208 break;
3209 }
3210 }
3211 else if (rt->rt6i_flags & RTF_LOCAL)
3212 rtm->rtm_type = RTN_LOCAL;
3213 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3214 rtm->rtm_type = RTN_LOCAL;
3215 else
3216 rtm->rtm_type = RTN_UNICAST;
3217 rtm->rtm_flags = 0;
3218 if (!netif_carrier_ok(rt->dst.dev)) {
3219 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3220 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3221 rtm->rtm_flags |= RTNH_F_DEAD;
3222 }
3223 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3224 rtm->rtm_protocol = rt->rt6i_protocol;
3225 if (rt->rt6i_flags & RTF_DYNAMIC)
3226 rtm->rtm_protocol = RTPROT_REDIRECT;
3227 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3228 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3229 rtm->rtm_protocol = RTPROT_RA;
3230 else
3231 rtm->rtm_protocol = RTPROT_KERNEL;
3232 }
3233
3234 if (rt->rt6i_flags & RTF_CACHE)
3235 rtm->rtm_flags |= RTM_F_CLONED;
3236
3237 if (dst) {
3238 if (nla_put_in6_addr(skb, RTA_DST, dst))
3239 goto nla_put_failure;
3240 rtm->rtm_dst_len = 128;
3241 } else if (rtm->rtm_dst_len)
3242 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3243 goto nla_put_failure;
3244 #ifdef CONFIG_IPV6_SUBTREES
3245 if (src) {
3246 if (nla_put_in6_addr(skb, RTA_SRC, src))
3247 goto nla_put_failure;
3248 rtm->rtm_src_len = 128;
3249 } else if (rtm->rtm_src_len &&
3250 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3251 goto nla_put_failure;
3252 #endif
3253 if (iif) {
3254 #ifdef CONFIG_IPV6_MROUTE
3255 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3256 int err = ip6mr_get_route(net, skb, rtm, nowait,
3257 portid);
3258
3259 if (err <= 0) {
3260 if (!nowait) {
3261 if (err == 0)
3262 return 0;
3263 goto nla_put_failure;
3264 } else {
3265 if (err == -EMSGSIZE)
3266 goto nla_put_failure;
3267 }
3268 }
3269 } else
3270 #endif
3271 if (nla_put_u32(skb, RTA_IIF, iif))
3272 goto nla_put_failure;
3273 } else if (dst) {
3274 struct in6_addr saddr_buf;
3275 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3276 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3277 goto nla_put_failure;
3278 }
3279
3280 if (rt->rt6i_prefsrc.plen) {
3281 struct in6_addr saddr_buf;
3282 saddr_buf = rt->rt6i_prefsrc.addr;
3283 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3284 goto nla_put_failure;
3285 }
3286
3287 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3288 if (rt->rt6i_pmtu)
3289 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3290 if (rtnetlink_put_metrics(skb, metrics) < 0)
3291 goto nla_put_failure;
3292
3293 if (rt->rt6i_flags & RTF_GATEWAY) {
3294 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3295 goto nla_put_failure;
3296 }
3297
3298 if (rt->dst.dev &&
3299 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3300 goto nla_put_failure;
3301 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3302 goto nla_put_failure;
3303
3304 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3305
3306 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3307 goto nla_put_failure;
3308
3309 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3310 goto nla_put_failure;
3311
3312 if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3313 goto nla_put_failure;
3314
3315 nlmsg_end(skb, nlh);
3316 return 0;
3317
3318 nla_put_failure:
3319 nlmsg_cancel(skb, nlh);
3320 return -EMSGSIZE;
3321 }
3322
rt6_dump_route(struct rt6_info * rt,void * p_arg)3323 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3324 {
3325 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3326 int prefix;
3327
3328 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3329 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3330 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3331 } else
3332 prefix = 0;
3333
3334 return rt6_fill_node(arg->net,
3335 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3336 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3337 prefix, 0, NLM_F_MULTI);
3338 }
3339
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh)3340 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3341 {
3342 struct net *net = sock_net(in_skb->sk);
3343 struct nlattr *tb[RTA_MAX+1];
3344 struct rt6_info *rt;
3345 struct sk_buff *skb;
3346 struct rtmsg *rtm;
3347 struct flowi6 fl6;
3348 int err, iif = 0, oif = 0;
3349
3350 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3351 if (err < 0)
3352 goto errout;
3353
3354 err = -EINVAL;
3355 memset(&fl6, 0, sizeof(fl6));
3356 rtm = nlmsg_data(nlh);
3357 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3358
3359 if (tb[RTA_SRC]) {
3360 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3361 goto errout;
3362
3363 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3364 }
3365
3366 if (tb[RTA_DST]) {
3367 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3368 goto errout;
3369
3370 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3371 }
3372
3373 if (tb[RTA_IIF])
3374 iif = nla_get_u32(tb[RTA_IIF]);
3375
3376 if (tb[RTA_OIF])
3377 oif = nla_get_u32(tb[RTA_OIF]);
3378
3379 if (tb[RTA_MARK])
3380 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3381
3382 if (tb[RTA_UID])
3383 fl6.flowi6_uid = make_kuid(current_user_ns(),
3384 nla_get_u32(tb[RTA_UID]));
3385 else
3386 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3387
3388 if (iif) {
3389 struct net_device *dev;
3390 int flags = 0;
3391
3392 dev = __dev_get_by_index(net, iif);
3393 if (!dev) {
3394 err = -ENODEV;
3395 goto errout;
3396 }
3397
3398 fl6.flowi6_iif = iif;
3399
3400 if (!ipv6_addr_any(&fl6.saddr))
3401 flags |= RT6_LOOKUP_F_HAS_SADDR;
3402
3403 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3404 flags);
3405 } else {
3406 fl6.flowi6_oif = oif;
3407
3408 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3409 }
3410
3411 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3412 if (!skb) {
3413 ip6_rt_put(rt);
3414 err = -ENOBUFS;
3415 goto errout;
3416 }
3417
3418 /* Reserve room for dummy headers, this skb can pass
3419 through good chunk of routing engine.
3420 */
3421 skb_reset_mac_header(skb);
3422 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3423
3424 skb_dst_set(skb, &rt->dst);
3425
3426 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3427 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3428 nlh->nlmsg_seq, 0, 0, 0);
3429 if (err < 0) {
3430 kfree_skb(skb);
3431 goto errout;
3432 }
3433
3434 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3435 errout:
3436 return err;
3437 }
3438
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info,unsigned int nlm_flags)3439 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3440 unsigned int nlm_flags)
3441 {
3442 struct sk_buff *skb;
3443 struct net *net = info->nl_net;
3444 u32 seq;
3445 int err;
3446
3447 err = -ENOBUFS;
3448 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3449
3450 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3451 if (!skb)
3452 goto errout;
3453
3454 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3455 event, info->portid, seq, 0, 0, nlm_flags);
3456 if (err < 0) {
3457 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3458 WARN_ON(err == -EMSGSIZE);
3459 kfree_skb(skb);
3460 goto errout;
3461 }
3462 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3463 info->nlh, gfp_any());
3464 return;
3465 errout:
3466 if (err < 0)
3467 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3468 }
3469
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * ptr)3470 static int ip6_route_dev_notify(struct notifier_block *this,
3471 unsigned long event, void *ptr)
3472 {
3473 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3474 struct net *net = dev_net(dev);
3475
3476 if (!(dev->flags & IFF_LOOPBACK))
3477 return NOTIFY_OK;
3478
3479 if (event == NETDEV_REGISTER) {
3480 net->ipv6.ip6_null_entry->dst.dev = dev;
3481 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3482 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3483 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3484 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3485 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3486 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3487 #endif
3488 } else if (event == NETDEV_UNREGISTER &&
3489 dev->reg_state != NETREG_UNREGISTERED) {
3490 /* NETDEV_UNREGISTER could be fired for multiple times by
3491 * netdev_wait_allrefs(). Make sure we only call this once.
3492 */
3493 in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
3494 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3495 in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
3496 in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3497 #endif
3498 }
3499
3500 return NOTIFY_OK;
3501 }
3502
3503 /*
3504 * /proc
3505 */
3506
3507 #ifdef CONFIG_PROC_FS
3508
3509 static const struct file_operations ipv6_route_proc_fops = {
3510 .owner = THIS_MODULE,
3511 .open = ipv6_route_open,
3512 .read = seq_read,
3513 .llseek = seq_lseek,
3514 .release = seq_release_net,
3515 };
3516
rt6_stats_seq_show(struct seq_file * seq,void * v)3517 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3518 {
3519 struct net *net = (struct net *)seq->private;
3520 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3521 net->ipv6.rt6_stats->fib_nodes,
3522 net->ipv6.rt6_stats->fib_route_nodes,
3523 net->ipv6.rt6_stats->fib_rt_alloc,
3524 net->ipv6.rt6_stats->fib_rt_entries,
3525 net->ipv6.rt6_stats->fib_rt_cache,
3526 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3527 net->ipv6.rt6_stats->fib_discarded_routes);
3528
3529 return 0;
3530 }
3531
rt6_stats_seq_open(struct inode * inode,struct file * file)3532 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3533 {
3534 return single_open_net(inode, file, rt6_stats_seq_show);
3535 }
3536
3537 static const struct file_operations rt6_stats_seq_fops = {
3538 .owner = THIS_MODULE,
3539 .open = rt6_stats_seq_open,
3540 .read = seq_read,
3541 .llseek = seq_lseek,
3542 .release = single_release_net,
3543 };
3544 #endif /* CONFIG_PROC_FS */
3545
3546 #ifdef CONFIG_SYSCTL
3547
3548 static
ipv6_sysctl_rtcache_flush(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3549 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3550 void __user *buffer, size_t *lenp, loff_t *ppos)
3551 {
3552 struct net *net;
3553 int delay;
3554 if (!write)
3555 return -EINVAL;
3556
3557 net = (struct net *)ctl->extra1;
3558 delay = net->ipv6.sysctl.flush_delay;
3559 proc_dointvec(ctl, write, buffer, lenp, ppos);
3560 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3561 return 0;
3562 }
3563
3564 struct ctl_table ipv6_route_table_template[] = {
3565 {
3566 .procname = "flush",
3567 .data = &init_net.ipv6.sysctl.flush_delay,
3568 .maxlen = sizeof(int),
3569 .mode = 0200,
3570 .proc_handler = ipv6_sysctl_rtcache_flush
3571 },
3572 {
3573 .procname = "gc_thresh",
3574 .data = &ip6_dst_ops_template.gc_thresh,
3575 .maxlen = sizeof(int),
3576 .mode = 0644,
3577 .proc_handler = proc_dointvec,
3578 },
3579 {
3580 .procname = "max_size",
3581 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
3582 .maxlen = sizeof(int),
3583 .mode = 0644,
3584 .proc_handler = proc_dointvec,
3585 },
3586 {
3587 .procname = "gc_min_interval",
3588 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3589 .maxlen = sizeof(int),
3590 .mode = 0644,
3591 .proc_handler = proc_dointvec_jiffies,
3592 },
3593 {
3594 .procname = "gc_timeout",
3595 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3596 .maxlen = sizeof(int),
3597 .mode = 0644,
3598 .proc_handler = proc_dointvec_jiffies,
3599 },
3600 {
3601 .procname = "gc_interval",
3602 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3603 .maxlen = sizeof(int),
3604 .mode = 0644,
3605 .proc_handler = proc_dointvec_jiffies,
3606 },
3607 {
3608 .procname = "gc_elasticity",
3609 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3610 .maxlen = sizeof(int),
3611 .mode = 0644,
3612 .proc_handler = proc_dointvec,
3613 },
3614 {
3615 .procname = "mtu_expires",
3616 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3617 .maxlen = sizeof(int),
3618 .mode = 0644,
3619 .proc_handler = proc_dointvec_jiffies,
3620 },
3621 {
3622 .procname = "min_adv_mss",
3623 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3624 .maxlen = sizeof(int),
3625 .mode = 0644,
3626 .proc_handler = proc_dointvec,
3627 },
3628 {
3629 .procname = "gc_min_interval_ms",
3630 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3631 .maxlen = sizeof(int),
3632 .mode = 0644,
3633 .proc_handler = proc_dointvec_ms_jiffies,
3634 },
3635 { }
3636 };
3637
ipv6_route_sysctl_init(struct net * net)3638 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3639 {
3640 struct ctl_table *table;
3641
3642 table = kmemdup(ipv6_route_table_template,
3643 sizeof(ipv6_route_table_template),
3644 GFP_KERNEL);
3645
3646 if (table) {
3647 table[0].data = &net->ipv6.sysctl.flush_delay;
3648 table[0].extra1 = net;
3649 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3650 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3651 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3652 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3653 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3654 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3655 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3656 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3657 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3658
3659 /* Don't export sysctls to unprivileged users */
3660 if (net->user_ns != &init_user_ns)
3661 table[0].procname = NULL;
3662 }
3663
3664 return table;
3665 }
3666 #endif
3667
ip6_route_net_init(struct net * net)3668 static int __net_init ip6_route_net_init(struct net *net)
3669 {
3670 int ret = -ENOMEM;
3671
3672 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3673 sizeof(net->ipv6.ip6_dst_ops));
3674
3675 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3676 goto out_ip6_dst_ops;
3677
3678 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3679 sizeof(*net->ipv6.ip6_null_entry),
3680 GFP_KERNEL);
3681 if (!net->ipv6.ip6_null_entry)
3682 goto out_ip6_dst_entries;
3683 net->ipv6.ip6_null_entry->dst.path =
3684 (struct dst_entry *)net->ipv6.ip6_null_entry;
3685 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3686 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3687 ip6_template_metrics, true);
3688
3689 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3690 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3691 sizeof(*net->ipv6.ip6_prohibit_entry),
3692 GFP_KERNEL);
3693 if (!net->ipv6.ip6_prohibit_entry)
3694 goto out_ip6_null_entry;
3695 net->ipv6.ip6_prohibit_entry->dst.path =
3696 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3697 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3698 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3699 ip6_template_metrics, true);
3700
3701 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3702 sizeof(*net->ipv6.ip6_blk_hole_entry),
3703 GFP_KERNEL);
3704 if (!net->ipv6.ip6_blk_hole_entry)
3705 goto out_ip6_prohibit_entry;
3706 net->ipv6.ip6_blk_hole_entry->dst.path =
3707 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3708 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3709 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3710 ip6_template_metrics, true);
3711 #endif
3712
3713 net->ipv6.sysctl.flush_delay = 0;
3714 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3715 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3716 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3717 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3718 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3719 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3720 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3721
3722 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3723
3724 ret = 0;
3725 out:
3726 return ret;
3727
3728 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3729 out_ip6_prohibit_entry:
3730 kfree(net->ipv6.ip6_prohibit_entry);
3731 out_ip6_null_entry:
3732 kfree(net->ipv6.ip6_null_entry);
3733 #endif
3734 out_ip6_dst_entries:
3735 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3736 out_ip6_dst_ops:
3737 goto out;
3738 }
3739
ip6_route_net_exit(struct net * net)3740 static void __net_exit ip6_route_net_exit(struct net *net)
3741 {
3742 kfree(net->ipv6.ip6_null_entry);
3743 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3744 kfree(net->ipv6.ip6_prohibit_entry);
3745 kfree(net->ipv6.ip6_blk_hole_entry);
3746 #endif
3747 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3748 }
3749
ip6_route_net_init_late(struct net * net)3750 static int __net_init ip6_route_net_init_late(struct net *net)
3751 {
3752 #ifdef CONFIG_PROC_FS
3753 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3754 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3755 #endif
3756 return 0;
3757 }
3758
ip6_route_net_exit_late(struct net * net)3759 static void __net_exit ip6_route_net_exit_late(struct net *net)
3760 {
3761 #ifdef CONFIG_PROC_FS
3762 remove_proc_entry("ipv6_route", net->proc_net);
3763 remove_proc_entry("rt6_stats", net->proc_net);
3764 #endif
3765 }
3766
3767 static struct pernet_operations ip6_route_net_ops = {
3768 .init = ip6_route_net_init,
3769 .exit = ip6_route_net_exit,
3770 };
3771
ipv6_inetpeer_init(struct net * net)3772 static int __net_init ipv6_inetpeer_init(struct net *net)
3773 {
3774 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3775
3776 if (!bp)
3777 return -ENOMEM;
3778 inet_peer_base_init(bp);
3779 net->ipv6.peers = bp;
3780 return 0;
3781 }
3782
ipv6_inetpeer_exit(struct net * net)3783 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3784 {
3785 struct inet_peer_base *bp = net->ipv6.peers;
3786
3787 net->ipv6.peers = NULL;
3788 inetpeer_invalidate_tree(bp);
3789 kfree(bp);
3790 }
3791
3792 static struct pernet_operations ipv6_inetpeer_ops = {
3793 .init = ipv6_inetpeer_init,
3794 .exit = ipv6_inetpeer_exit,
3795 };
3796
3797 static struct pernet_operations ip6_route_net_late_ops = {
3798 .init = ip6_route_net_init_late,
3799 .exit = ip6_route_net_exit_late,
3800 };
3801
3802 static struct notifier_block ip6_route_dev_notifier = {
3803 .notifier_call = ip6_route_dev_notify,
3804 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
3805 };
3806
ip6_route_init_special_entries(void)3807 void __init ip6_route_init_special_entries(void)
3808 {
3809 /* Registering of the loopback is done before this portion of code,
3810 * the loopback reference in rt6_info will not be taken, do it
3811 * manually for init_net */
3812 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3813 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3814 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3815 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3816 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3817 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3818 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3819 #endif
3820 }
3821
ip6_route_init(void)3822 int __init ip6_route_init(void)
3823 {
3824 int ret;
3825 int cpu;
3826
3827 ret = -ENOMEM;
3828 ip6_dst_ops_template.kmem_cachep =
3829 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3830 SLAB_HWCACHE_ALIGN, NULL);
3831 if (!ip6_dst_ops_template.kmem_cachep)
3832 goto out;
3833
3834 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3835 if (ret)
3836 goto out_kmem_cache;
3837
3838 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3839 if (ret)
3840 goto out_dst_entries;
3841
3842 ret = register_pernet_subsys(&ip6_route_net_ops);
3843 if (ret)
3844 goto out_register_inetpeer;
3845
3846 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3847
3848 ret = fib6_init();
3849 if (ret)
3850 goto out_register_subsys;
3851
3852 ret = xfrm6_init();
3853 if (ret)
3854 goto out_fib6_init;
3855
3856 ret = fib6_rules_init();
3857 if (ret)
3858 goto xfrm6_init;
3859
3860 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3861 if (ret)
3862 goto fib6_rules_init;
3863
3864 ret = -ENOBUFS;
3865 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3866 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3867 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3868 goto out_register_late_subsys;
3869
3870 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3871 if (ret)
3872 goto out_register_late_subsys;
3873
3874 for_each_possible_cpu(cpu) {
3875 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3876
3877 INIT_LIST_HEAD(&ul->head);
3878 spin_lock_init(&ul->lock);
3879 }
3880
3881 out:
3882 return ret;
3883
3884 out_register_late_subsys:
3885 unregister_pernet_subsys(&ip6_route_net_late_ops);
3886 fib6_rules_init:
3887 fib6_rules_cleanup();
3888 xfrm6_init:
3889 xfrm6_fini();
3890 out_fib6_init:
3891 fib6_gc_cleanup();
3892 out_register_subsys:
3893 unregister_pernet_subsys(&ip6_route_net_ops);
3894 out_register_inetpeer:
3895 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3896 out_dst_entries:
3897 dst_entries_destroy(&ip6_dst_blackhole_ops);
3898 out_kmem_cache:
3899 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3900 goto out;
3901 }
3902
ip6_route_cleanup(void)3903 void ip6_route_cleanup(void)
3904 {
3905 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3906 unregister_pernet_subsys(&ip6_route_net_late_ops);
3907 fib6_rules_cleanup();
3908 xfrm6_fini();
3909 fib6_gc_cleanup();
3910 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3911 unregister_pernet_subsys(&ip6_route_net_ops);
3912 dst_entries_destroy(&ip6_dst_blackhole_ops);
3913 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3914 }
3915