1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66
67 #include <linux/uaccess.h>
68
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72
73 enum rt6_nud_state {
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
77 RT6_NUD_SUCCEED = 1
78 };
79
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void ip6_dst_destroy(struct dst_entry *);
86 static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
88 static int ip6_dst_gc(struct dst_ops *ops);
89
90 static int ip6_pkt_discard(struct sk_buff *skb);
91 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int ip6_pkt_prohibit(struct sk_buff *skb);
93 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void ip6_link_failure(struct sk_buff *skb);
95 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu,
97 bool confirm_neigh);
98 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
100 static void rt6_dst_from_metrics_check(struct rt6_info *rt);
101 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
102 static size_t rt6_nlmsg_size(struct rt6_info *rt);
103 static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
108
109 #ifdef CONFIG_IPV6_ROUTE_INFO
110 static struct rt6_info *rt6_add_route_info(struct net *net,
111 const struct in6_addr *prefix, int prefixlen,
112 const struct in6_addr *gwaddr,
113 struct net_device *dev,
114 unsigned int pref);
115 static struct rt6_info *rt6_get_route_info(struct net *net,
116 const struct in6_addr *prefix, int prefixlen,
117 const struct in6_addr *gwaddr,
118 struct net_device *dev);
119 #endif
120
121 struct uncached_list {
122 spinlock_t lock;
123 struct list_head head;
124 };
125
126 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
127
rt6_uncached_list_add(struct rt6_info * rt)128 static void rt6_uncached_list_add(struct rt6_info *rt)
129 {
130 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
131
132 rt->rt6i_uncached_list = ul;
133
134 spin_lock_bh(&ul->lock);
135 list_add_tail(&rt->rt6i_uncached, &ul->head);
136 spin_unlock_bh(&ul->lock);
137 }
138
rt6_uncached_list_del(struct rt6_info * rt)139 static void rt6_uncached_list_del(struct rt6_info *rt)
140 {
141 if (!list_empty(&rt->rt6i_uncached)) {
142 struct uncached_list *ul = rt->rt6i_uncached_list;
143
144 spin_lock_bh(&ul->lock);
145 list_del(&rt->rt6i_uncached);
146 spin_unlock_bh(&ul->lock);
147 }
148 }
149
rt6_uncached_list_flush_dev(struct net * net,struct net_device * dev)150 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
151 {
152 struct net_device *loopback_dev = net->loopback_dev;
153 int cpu;
154
155 if (dev == loopback_dev)
156 return;
157
158 for_each_possible_cpu(cpu) {
159 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
160 struct rt6_info *rt;
161
162 spin_lock_bh(&ul->lock);
163 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
164 struct inet6_dev *rt_idev = rt->rt6i_idev;
165 struct net_device *rt_dev = rt->dst.dev;
166
167 if (rt_idev->dev == dev) {
168 rt->rt6i_idev = in6_dev_get(loopback_dev);
169 in6_dev_put(rt_idev);
170 }
171
172 if (rt_dev == dev) {
173 rt->dst.dev = loopback_dev;
174 dev_hold(rt->dst.dev);
175 dev_put(rt_dev);
176 }
177 }
178 spin_unlock_bh(&ul->lock);
179 }
180 }
181
rt6_pcpu_cow_metrics(struct rt6_info * rt)182 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
183 {
184 return dst_metrics_write_ptr(rt->dst.from);
185 }
186
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)187 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
188 {
189 struct rt6_info *rt = (struct rt6_info *)dst;
190
191 if (rt->rt6i_flags & RTF_PCPU)
192 return rt6_pcpu_cow_metrics(rt);
193 else if (rt->rt6i_flags & RTF_CACHE)
194 return NULL;
195 else
196 return dst_cow_metrics_generic(dst, old);
197 }
198
choose_neigh_daddr(struct rt6_info * rt,struct sk_buff * skb,const void * daddr)199 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
200 struct sk_buff *skb,
201 const void *daddr)
202 {
203 struct in6_addr *p = &rt->rt6i_gateway;
204
205 if (!ipv6_addr_any(p))
206 return (const void *) p;
207 else if (skb)
208 return &ipv6_hdr(skb)->daddr;
209 return daddr;
210 }
211
ip6_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)212 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
213 struct sk_buff *skb,
214 const void *daddr)
215 {
216 struct rt6_info *rt = (struct rt6_info *) dst;
217 struct neighbour *n;
218
219 daddr = choose_neigh_daddr(rt, skb, daddr);
220 n = __ipv6_neigh_lookup(dst->dev, daddr);
221 if (n)
222 return n;
223 return neigh_create(&nd_tbl, daddr, dst->dev);
224 }
225
ip6_confirm_neigh(const struct dst_entry * dst,const void * daddr)226 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
227 {
228 struct net_device *dev = dst->dev;
229 struct rt6_info *rt = (struct rt6_info *)dst;
230
231 daddr = choose_neigh_daddr(rt, NULL, daddr);
232 if (!daddr)
233 return;
234 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
235 return;
236 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
237 return;
238 __ipv6_confirm_neigh(dev, daddr);
239 }
240
241 static struct dst_ops ip6_dst_ops_template = {
242 .family = AF_INET6,
243 .gc = ip6_dst_gc,
244 .gc_thresh = 1024,
245 .check = ip6_dst_check,
246 .default_advmss = ip6_default_advmss,
247 .mtu = ip6_mtu,
248 .cow_metrics = ipv6_cow_metrics,
249 .destroy = ip6_dst_destroy,
250 .ifdown = ip6_dst_ifdown,
251 .negative_advice = ip6_negative_advice,
252 .link_failure = ip6_link_failure,
253 .update_pmtu = ip6_rt_update_pmtu,
254 .redirect = rt6_do_redirect,
255 .local_out = __ip6_local_out,
256 .neigh_lookup = ip6_neigh_lookup,
257 .confirm_neigh = ip6_confirm_neigh,
258 };
259
ip6_blackhole_mtu(const struct dst_entry * dst)260 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
261 {
262 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
263
264 return mtu ? : dst->dev->mtu;
265 }
266
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu,bool confirm_neigh)267 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
268 struct sk_buff *skb, u32 mtu,
269 bool confirm_neigh)
270 {
271 }
272
ip6_rt_blackhole_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)273 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
274 struct sk_buff *skb)
275 {
276 }
277
278 static struct dst_ops ip6_dst_blackhole_ops = {
279 .family = AF_INET6,
280 .destroy = ip6_dst_destroy,
281 .check = ip6_dst_check,
282 .mtu = ip6_blackhole_mtu,
283 .default_advmss = ip6_default_advmss,
284 .update_pmtu = ip6_rt_blackhole_update_pmtu,
285 .redirect = ip6_rt_blackhole_redirect,
286 .cow_metrics = dst_cow_metrics_generic,
287 .neigh_lookup = ip6_neigh_lookup,
288 };
289
290 static const u32 ip6_template_metrics[RTAX_MAX] = {
291 [RTAX_HOPLIMIT - 1] = 0,
292 };
293
294 static const struct rt6_info ip6_null_entry_template = {
295 .dst = {
296 .__refcnt = ATOMIC_INIT(1),
297 .__use = 1,
298 .obsolete = DST_OBSOLETE_FORCE_CHK,
299 .error = -ENETUNREACH,
300 .input = ip6_pkt_discard,
301 .output = ip6_pkt_discard_out,
302 },
303 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
304 .rt6i_protocol = RTPROT_KERNEL,
305 .rt6i_metric = ~(u32) 0,
306 .rt6i_ref = ATOMIC_INIT(1),
307 };
308
309 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
310
311 static const struct rt6_info ip6_prohibit_entry_template = {
312 .dst = {
313 .__refcnt = ATOMIC_INIT(1),
314 .__use = 1,
315 .obsolete = DST_OBSOLETE_FORCE_CHK,
316 .error = -EACCES,
317 .input = ip6_pkt_prohibit,
318 .output = ip6_pkt_prohibit_out,
319 },
320 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
321 .rt6i_protocol = RTPROT_KERNEL,
322 .rt6i_metric = ~(u32) 0,
323 .rt6i_ref = ATOMIC_INIT(1),
324 };
325
326 static const struct rt6_info ip6_blk_hole_entry_template = {
327 .dst = {
328 .__refcnt = ATOMIC_INIT(1),
329 .__use = 1,
330 .obsolete = DST_OBSOLETE_FORCE_CHK,
331 .error = -EINVAL,
332 .input = dst_discard,
333 .output = dst_discard_out,
334 },
335 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
336 .rt6i_protocol = RTPROT_KERNEL,
337 .rt6i_metric = ~(u32) 0,
338 .rt6i_ref = ATOMIC_INIT(1),
339 };
340
341 #endif
342
rt6_info_init(struct rt6_info * rt)343 static void rt6_info_init(struct rt6_info *rt)
344 {
345 struct dst_entry *dst = &rt->dst;
346
347 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
348 INIT_LIST_HEAD(&rt->rt6i_siblings);
349 INIT_LIST_HEAD(&rt->rt6i_uncached);
350 }
351
352 /* allocate dst with ip6_dst_ops */
__ip6_dst_alloc(struct net * net,struct net_device * dev,int flags)353 static struct rt6_info *__ip6_dst_alloc(struct net *net,
354 struct net_device *dev,
355 int flags)
356 {
357 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
358 1, DST_OBSOLETE_FORCE_CHK, flags);
359
360 if (rt)
361 rt6_info_init(rt);
362
363 return rt;
364 }
365
ip6_dst_alloc(struct net * net,struct net_device * dev,int flags)366 struct rt6_info *ip6_dst_alloc(struct net *net,
367 struct net_device *dev,
368 int flags)
369 {
370 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
371
372 if (rt) {
373 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
374 if (rt->rt6i_pcpu) {
375 int cpu;
376
377 for_each_possible_cpu(cpu) {
378 struct rt6_info **p;
379
380 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
381 /* no one shares rt */
382 *p = NULL;
383 }
384 } else {
385 dst_release_immediate(&rt->dst);
386 return NULL;
387 }
388 }
389
390 return rt;
391 }
392 EXPORT_SYMBOL(ip6_dst_alloc);
393
ip6_dst_destroy(struct dst_entry * dst)394 static void ip6_dst_destroy(struct dst_entry *dst)
395 {
396 struct rt6_info *rt = (struct rt6_info *)dst;
397 struct dst_entry *from = dst->from;
398 struct inet6_dev *idev;
399
400 dst_destroy_metrics_generic(dst);
401 free_percpu(rt->rt6i_pcpu);
402 rt6_uncached_list_del(rt);
403
404 idev = rt->rt6i_idev;
405 if (idev) {
406 rt->rt6i_idev = NULL;
407 in6_dev_put(idev);
408 }
409
410 dst->from = NULL;
411 dst_release(from);
412 }
413
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)414 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
415 int how)
416 {
417 struct rt6_info *rt = (struct rt6_info *)dst;
418 struct inet6_dev *idev = rt->rt6i_idev;
419 struct net_device *loopback_dev =
420 dev_net(dev)->loopback_dev;
421
422 if (idev && idev->dev != loopback_dev) {
423 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
424 if (loopback_idev) {
425 rt->rt6i_idev = loopback_idev;
426 in6_dev_put(idev);
427 }
428 }
429 }
430
__rt6_check_expired(const struct rt6_info * rt)431 static bool __rt6_check_expired(const struct rt6_info *rt)
432 {
433 if (rt->rt6i_flags & RTF_EXPIRES)
434 return time_after(jiffies, rt->dst.expires);
435 else
436 return false;
437 }
438
rt6_check_expired(const struct rt6_info * rt)439 static bool rt6_check_expired(const struct rt6_info *rt)
440 {
441 if (rt->rt6i_flags & RTF_EXPIRES) {
442 if (time_after(jiffies, rt->dst.expires))
443 return true;
444 } else if (rt->dst.from) {
445 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
446 rt6_check_expired((struct rt6_info *)rt->dst.from);
447 }
448 return false;
449 }
450
rt6_multipath_select(struct rt6_info * match,struct flowi6 * fl6,int oif,int strict)451 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
452 struct flowi6 *fl6, int oif,
453 int strict)
454 {
455 struct rt6_info *sibling, *next_sibling;
456 int route_choosen;
457
458 /* We might have already computed the hash for ICMPv6 errors. In such
459 * case it will always be non-zero. Otherwise now is the time to do it.
460 */
461 if (!fl6->mp_hash)
462 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
463
464 route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
465 /* Don't change the route, if route_choosen == 0
466 * (siblings does not include ourself)
467 */
468 if (route_choosen)
469 list_for_each_entry_safe(sibling, next_sibling,
470 &match->rt6i_siblings, rt6i_siblings) {
471 route_choosen--;
472 if (route_choosen == 0) {
473 if (rt6_score_route(sibling, oif, strict) < 0)
474 break;
475 match = sibling;
476 break;
477 }
478 }
479 return match;
480 }
481
482 /*
483 * Route lookup. Any table->tb6_lock is implied.
484 */
485
rt6_device_match(struct net * net,struct rt6_info * rt,const struct in6_addr * saddr,int oif,int flags)486 static inline struct rt6_info *rt6_device_match(struct net *net,
487 struct rt6_info *rt,
488 const struct in6_addr *saddr,
489 int oif,
490 int flags)
491 {
492 struct rt6_info *local = NULL;
493 struct rt6_info *sprt;
494
495 if (!oif && ipv6_addr_any(saddr))
496 goto out;
497
498 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
499 struct net_device *dev = sprt->dst.dev;
500
501 if (oif) {
502 if (dev->ifindex == oif)
503 return sprt;
504 if (dev->flags & IFF_LOOPBACK) {
505 if (!sprt->rt6i_idev ||
506 sprt->rt6i_idev->dev->ifindex != oif) {
507 if (flags & RT6_LOOKUP_F_IFACE)
508 continue;
509 if (local &&
510 local->rt6i_idev->dev->ifindex == oif)
511 continue;
512 }
513 local = sprt;
514 }
515 } else {
516 if (ipv6_chk_addr(net, saddr, dev,
517 flags & RT6_LOOKUP_F_IFACE))
518 return sprt;
519 }
520 }
521
522 if (oif) {
523 if (local)
524 return local;
525
526 if (flags & RT6_LOOKUP_F_IFACE)
527 return net->ipv6.ip6_null_entry;
528 }
529 out:
530 return rt;
531 }
532
533 #ifdef CONFIG_IPV6_ROUTER_PREF
534 struct __rt6_probe_work {
535 struct work_struct work;
536 struct in6_addr target;
537 struct net_device *dev;
538 };
539
rt6_probe_deferred(struct work_struct * w)540 static void rt6_probe_deferred(struct work_struct *w)
541 {
542 struct in6_addr mcaddr;
543 struct __rt6_probe_work *work =
544 container_of(w, struct __rt6_probe_work, work);
545
546 addrconf_addr_solict_mult(&work->target, &mcaddr);
547 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
548 dev_put(work->dev);
549 kfree(work);
550 }
551
rt6_probe(struct rt6_info * rt)552 static void rt6_probe(struct rt6_info *rt)
553 {
554 struct __rt6_probe_work *work;
555 struct neighbour *neigh;
556 /*
557 * Okay, this does not seem to be appropriate
558 * for now, however, we need to check if it
559 * is really so; aka Router Reachability Probing.
560 *
561 * Router Reachability Probe MUST be rate-limited
562 * to no more than one per minute.
563 */
564 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
565 return;
566 rcu_read_lock_bh();
567 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
568 if (neigh) {
569 if (neigh->nud_state & NUD_VALID)
570 goto out;
571
572 work = NULL;
573 write_lock(&neigh->lock);
574 if (!(neigh->nud_state & NUD_VALID) &&
575 time_after(jiffies,
576 neigh->updated +
577 rt->rt6i_idev->cnf.rtr_probe_interval)) {
578 work = kmalloc(sizeof(*work), GFP_ATOMIC);
579 if (work)
580 __neigh_set_probe_once(neigh);
581 }
582 write_unlock(&neigh->lock);
583 } else {
584 work = kmalloc(sizeof(*work), GFP_ATOMIC);
585 }
586
587 if (work) {
588 INIT_WORK(&work->work, rt6_probe_deferred);
589 work->target = rt->rt6i_gateway;
590 dev_hold(rt->dst.dev);
591 work->dev = rt->dst.dev;
592 schedule_work(&work->work);
593 }
594
595 out:
596 rcu_read_unlock_bh();
597 }
598 #else
rt6_probe(struct rt6_info * rt)599 static inline void rt6_probe(struct rt6_info *rt)
600 {
601 }
602 #endif
603
604 /*
605 * Default Router Selection (RFC 2461 6.3.6)
606 */
rt6_check_dev(struct rt6_info * rt,int oif)607 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
608 {
609 struct net_device *dev = rt->dst.dev;
610 if (!oif || dev->ifindex == oif)
611 return 2;
612 if ((dev->flags & IFF_LOOPBACK) &&
613 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
614 return 1;
615 return 0;
616 }
617
rt6_check_neigh(struct rt6_info * rt)618 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
619 {
620 struct neighbour *neigh;
621 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
622
623 if (rt->rt6i_flags & RTF_NONEXTHOP ||
624 !(rt->rt6i_flags & RTF_GATEWAY))
625 return RT6_NUD_SUCCEED;
626
627 rcu_read_lock_bh();
628 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
629 if (neigh) {
630 read_lock(&neigh->lock);
631 if (neigh->nud_state & NUD_VALID)
632 ret = RT6_NUD_SUCCEED;
633 #ifdef CONFIG_IPV6_ROUTER_PREF
634 else if (!(neigh->nud_state & NUD_FAILED))
635 ret = RT6_NUD_SUCCEED;
636 else
637 ret = RT6_NUD_FAIL_PROBE;
638 #endif
639 read_unlock(&neigh->lock);
640 } else {
641 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
642 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
643 }
644 rcu_read_unlock_bh();
645
646 return ret;
647 }
648
rt6_score_route(struct rt6_info * rt,int oif,int strict)649 static int rt6_score_route(struct rt6_info *rt, int oif,
650 int strict)
651 {
652 int m;
653
654 m = rt6_check_dev(rt, oif);
655 if (!m && (strict & RT6_LOOKUP_F_IFACE))
656 return RT6_NUD_FAIL_HARD;
657 #ifdef CONFIG_IPV6_ROUTER_PREF
658 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
659 #endif
660 if (strict & RT6_LOOKUP_F_REACHABLE) {
661 int n = rt6_check_neigh(rt);
662 if (n < 0)
663 return n;
664 }
665 return m;
666 }
667
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match,bool * do_rr)668 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
669 int *mpri, struct rt6_info *match,
670 bool *do_rr)
671 {
672 int m;
673 bool match_do_rr = false;
674 struct inet6_dev *idev = rt->rt6i_idev;
675 struct net_device *dev = rt->dst.dev;
676
677 if (dev && !netif_carrier_ok(dev) &&
678 idev->cnf.ignore_routes_with_linkdown &&
679 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
680 goto out;
681
682 if (rt6_check_expired(rt))
683 goto out;
684
685 m = rt6_score_route(rt, oif, strict);
686 if (m == RT6_NUD_FAIL_DO_RR) {
687 match_do_rr = true;
688 m = 0; /* lowest valid score */
689 } else if (m == RT6_NUD_FAIL_HARD) {
690 goto out;
691 }
692
693 if (strict & RT6_LOOKUP_F_REACHABLE)
694 rt6_probe(rt);
695
696 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
697 if (m > *mpri) {
698 *do_rr = match_do_rr;
699 *mpri = m;
700 match = rt;
701 }
702 out:
703 return match;
704 }
705
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict,bool * do_rr)706 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
707 struct rt6_info *rr_head,
708 u32 metric, int oif, int strict,
709 bool *do_rr)
710 {
711 struct rt6_info *rt, *match, *cont;
712 int mpri = -1;
713
714 match = NULL;
715 cont = NULL;
716 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
717 if (rt->rt6i_metric != metric) {
718 cont = rt;
719 break;
720 }
721
722 match = find_match(rt, oif, strict, &mpri, match, do_rr);
723 }
724
725 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
726 if (rt->rt6i_metric != metric) {
727 cont = rt;
728 break;
729 }
730
731 match = find_match(rt, oif, strict, &mpri, match, do_rr);
732 }
733
734 if (match || !cont)
735 return match;
736
737 for (rt = cont; rt; rt = rt->dst.rt6_next)
738 match = find_match(rt, oif, strict, &mpri, match, do_rr);
739
740 return match;
741 }
742
rt6_select(struct fib6_node * fn,int oif,int strict)743 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
744 {
745 struct rt6_info *match, *rt0;
746 struct net *net;
747 bool do_rr = false;
748
749 rt0 = fn->rr_ptr;
750 if (!rt0)
751 fn->rr_ptr = rt0 = fn->leaf;
752
753 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
754 &do_rr);
755
756 if (do_rr) {
757 struct rt6_info *next = rt0->dst.rt6_next;
758
759 /* no entries matched; do round-robin */
760 if (!next || next->rt6i_metric != rt0->rt6i_metric)
761 next = fn->leaf;
762
763 if (next != rt0)
764 fn->rr_ptr = next;
765 }
766
767 net = dev_net(rt0->dst.dev);
768 return match ? match : net->ipv6.ip6_null_entry;
769 }
770
rt6_is_gw_or_nonexthop(const struct rt6_info * rt)771 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
772 {
773 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
774 }
775
776 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)777 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
778 const struct in6_addr *gwaddr)
779 {
780 struct net *net = dev_net(dev);
781 struct route_info *rinfo = (struct route_info *) opt;
782 struct in6_addr prefix_buf, *prefix;
783 unsigned int pref;
784 unsigned long lifetime;
785 struct rt6_info *rt;
786
787 if (len < sizeof(struct route_info)) {
788 return -EINVAL;
789 }
790
791 /* Sanity check for prefix_len and length */
792 if (rinfo->length > 3) {
793 return -EINVAL;
794 } else if (rinfo->prefix_len > 128) {
795 return -EINVAL;
796 } else if (rinfo->prefix_len > 64) {
797 if (rinfo->length < 2) {
798 return -EINVAL;
799 }
800 } else if (rinfo->prefix_len > 0) {
801 if (rinfo->length < 1) {
802 return -EINVAL;
803 }
804 }
805
806 pref = rinfo->route_pref;
807 if (pref == ICMPV6_ROUTER_PREF_INVALID)
808 return -EINVAL;
809
810 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
811
812 if (rinfo->length == 3)
813 prefix = (struct in6_addr *)rinfo->prefix;
814 else {
815 /* this function is safe */
816 ipv6_addr_prefix(&prefix_buf,
817 (struct in6_addr *)rinfo->prefix,
818 rinfo->prefix_len);
819 prefix = &prefix_buf;
820 }
821
822 if (rinfo->prefix_len == 0)
823 rt = rt6_get_dflt_router(gwaddr, dev);
824 else
825 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
826 gwaddr, dev);
827
828 if (rt && !lifetime) {
829 ip6_del_rt(rt);
830 rt = NULL;
831 }
832
833 if (!rt && lifetime)
834 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
835 dev, pref);
836 else if (rt)
837 rt->rt6i_flags = RTF_ROUTEINFO |
838 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
839
840 if (rt) {
841 if (!addrconf_finite_timeout(lifetime))
842 rt6_clean_expires(rt);
843 else
844 rt6_set_expires(rt, jiffies + HZ * lifetime);
845
846 ip6_rt_put(rt);
847 }
848 return 0;
849 }
850 #endif
851
fib6_backtrack(struct fib6_node * fn,struct in6_addr * saddr)852 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
853 struct in6_addr *saddr)
854 {
855 struct fib6_node *pn;
856 while (1) {
857 if (fn->fn_flags & RTN_TL_ROOT)
858 return NULL;
859 pn = fn->parent;
860 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
861 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
862 else
863 fn = pn;
864 if (fn->fn_flags & RTN_RTINFO)
865 return fn;
866 }
867 }
868
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)869 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
870 struct fib6_table *table,
871 struct flowi6 *fl6, int flags)
872 {
873 struct fib6_node *fn;
874 struct rt6_info *rt;
875
876 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
877 flags &= ~RT6_LOOKUP_F_IFACE;
878
879 read_lock_bh(&table->tb6_lock);
880 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
881 restart:
882 rt = fn->leaf;
883 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
884 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
885 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
886 if (rt == net->ipv6.ip6_null_entry) {
887 fn = fib6_backtrack(fn, &fl6->saddr);
888 if (fn)
889 goto restart;
890 }
891 dst_use(&rt->dst, jiffies);
892 read_unlock_bh(&table->tb6_lock);
893
894 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
895
896 return rt;
897
898 }
899
ip6_route_lookup(struct net * net,struct flowi6 * fl6,int flags)900 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
901 int flags)
902 {
903 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
904 }
905 EXPORT_SYMBOL_GPL(ip6_route_lookup);
906
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)907 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
908 const struct in6_addr *saddr, int oif, int strict)
909 {
910 struct flowi6 fl6 = {
911 .flowi6_oif = oif,
912 .daddr = *daddr,
913 };
914 struct dst_entry *dst;
915 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
916
917 if (saddr) {
918 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
919 flags |= RT6_LOOKUP_F_HAS_SADDR;
920 }
921
922 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
923 if (dst->error == 0)
924 return (struct rt6_info *) dst;
925
926 dst_release(dst);
927
928 return NULL;
929 }
930 EXPORT_SYMBOL(rt6_lookup);
931
932 /* ip6_ins_rt is called with FREE table->tb6_lock.
933 * It takes new route entry, the addition fails by any reason the
934 * route is released.
935 * Caller must hold dst before calling it.
936 */
937
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info,struct mx6_config * mxc,struct netlink_ext_ack * extack)938 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
939 struct mx6_config *mxc,
940 struct netlink_ext_ack *extack)
941 {
942 int err;
943 struct fib6_table *table;
944
945 table = rt->rt6i_table;
946 write_lock_bh(&table->tb6_lock);
947 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
948 write_unlock_bh(&table->tb6_lock);
949
950 return err;
951 }
952
ip6_ins_rt(struct rt6_info * rt)953 int ip6_ins_rt(struct rt6_info *rt)
954 {
955 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
956 struct mx6_config mxc = { .mx = NULL, };
957
958 /* Hold dst to account for the reference from the fib6 tree */
959 dst_hold(&rt->dst);
960 return __ip6_ins_rt(rt, &info, &mxc, NULL);
961 }
962
963 /* called with rcu_lock held */
ip6_rt_get_dev_rcu(struct rt6_info * rt)964 static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
965 {
966 struct net_device *dev = rt->dst.dev;
967
968 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
969 /* for copies of local routes, dst->dev needs to be the
970 * device if it is a master device, the master device if
971 * device is enslaved, and the loopback as the default
972 */
973 if (netif_is_l3_slave(dev) &&
974 !rt6_need_strict(&rt->rt6i_dst.addr))
975 dev = l3mdev_master_dev_rcu(dev);
976 else if (!netif_is_l3_master(dev))
977 dev = dev_net(dev)->loopback_dev;
978 /* last case is netif_is_l3_master(dev) is true in which
979 * case we want dev returned to be dev
980 */
981 }
982
983 return dev;
984 }
985
ip6_rt_cache_alloc(struct rt6_info * ort,const struct in6_addr * daddr,const struct in6_addr * saddr)986 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
987 const struct in6_addr *daddr,
988 const struct in6_addr *saddr)
989 {
990 struct net_device *dev;
991 struct rt6_info *rt;
992
993 /*
994 * Clone the route.
995 */
996
997 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
998 ort = (struct rt6_info *)ort->dst.from;
999
1000 rcu_read_lock();
1001 dev = ip6_rt_get_dev_rcu(ort);
1002 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1003 rcu_read_unlock();
1004 if (!rt)
1005 return NULL;
1006
1007 ip6_rt_copy_init(rt, ort);
1008 rt->rt6i_flags |= RTF_CACHE;
1009 rt->rt6i_metric = 0;
1010 rt->dst.flags |= DST_HOST;
1011 rt->rt6i_dst.addr = *daddr;
1012 rt->rt6i_dst.plen = 128;
1013
1014 if (!rt6_is_gw_or_nonexthop(ort)) {
1015 if (ort->rt6i_dst.plen != 128 &&
1016 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1017 rt->rt6i_flags |= RTF_ANYCAST;
1018 #ifdef CONFIG_IPV6_SUBTREES
1019 if (rt->rt6i_src.plen && saddr) {
1020 rt->rt6i_src.addr = *saddr;
1021 rt->rt6i_src.plen = 128;
1022 }
1023 #endif
1024 }
1025
1026 return rt;
1027 }
1028
ip6_rt_pcpu_alloc(struct rt6_info * rt)1029 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1030 {
1031 struct net_device *dev;
1032 struct rt6_info *pcpu_rt;
1033
1034 rcu_read_lock();
1035 dev = ip6_rt_get_dev_rcu(rt);
1036 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1037 rcu_read_unlock();
1038 if (!pcpu_rt)
1039 return NULL;
1040 ip6_rt_copy_init(pcpu_rt, rt);
1041 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1042 pcpu_rt->rt6i_flags |= RTF_PCPU;
1043 return pcpu_rt;
1044 }
1045
1046 /* It should be called with read_lock_bh(&tb6_lock) acquired */
rt6_get_pcpu_route(struct rt6_info * rt)1047 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1048 {
1049 struct rt6_info *pcpu_rt, **p;
1050
1051 p = this_cpu_ptr(rt->rt6i_pcpu);
1052 pcpu_rt = *p;
1053
1054 if (pcpu_rt) {
1055 dst_hold(&pcpu_rt->dst);
1056 rt6_dst_from_metrics_check(pcpu_rt);
1057 }
1058 return pcpu_rt;
1059 }
1060
rt6_make_pcpu_route(struct rt6_info * rt)1061 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1062 {
1063 struct fib6_table *table = rt->rt6i_table;
1064 struct rt6_info *pcpu_rt, *prev, **p;
1065
1066 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1067 if (!pcpu_rt) {
1068 struct net *net = dev_net(rt->dst.dev);
1069
1070 dst_hold(&net->ipv6.ip6_null_entry->dst);
1071 return net->ipv6.ip6_null_entry;
1072 }
1073
1074 read_lock_bh(&table->tb6_lock);
1075 if (rt->rt6i_pcpu) {
1076 p = this_cpu_ptr(rt->rt6i_pcpu);
1077 prev = cmpxchg(p, NULL, pcpu_rt);
1078 if (prev) {
1079 /* If someone did it before us, return prev instead */
1080 dst_release_immediate(&pcpu_rt->dst);
1081 pcpu_rt = prev;
1082 }
1083 } else {
1084 /* rt has been removed from the fib6 tree
1085 * before we have a chance to acquire the read_lock.
1086 * In this case, don't brother to create a pcpu rt
1087 * since rt is going away anyway. The next
1088 * dst_check() will trigger a re-lookup.
1089 */
1090 dst_release_immediate(&pcpu_rt->dst);
1091 pcpu_rt = rt;
1092 }
1093 dst_hold(&pcpu_rt->dst);
1094 rt6_dst_from_metrics_check(pcpu_rt);
1095 read_unlock_bh(&table->tb6_lock);
1096 return pcpu_rt;
1097 }
1098
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags)1099 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1100 int oif, struct flowi6 *fl6, int flags)
1101 {
1102 struct fib6_node *fn, *saved_fn;
1103 struct rt6_info *rt;
1104 int strict = 0;
1105
1106 strict |= flags & RT6_LOOKUP_F_IFACE;
1107 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1108 if (net->ipv6.devconf_all->forwarding == 0)
1109 strict |= RT6_LOOKUP_F_REACHABLE;
1110
1111 read_lock_bh(&table->tb6_lock);
1112
1113 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1114 saved_fn = fn;
1115
1116 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1117 oif = 0;
1118
1119 redo_rt6_select:
1120 rt = rt6_select(fn, oif, strict);
1121 if (rt->rt6i_nsiblings)
1122 rt = rt6_multipath_select(rt, fl6, oif, strict);
1123 if (rt == net->ipv6.ip6_null_entry) {
1124 fn = fib6_backtrack(fn, &fl6->saddr);
1125 if (fn)
1126 goto redo_rt6_select;
1127 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1128 /* also consider unreachable route */
1129 strict &= ~RT6_LOOKUP_F_REACHABLE;
1130 fn = saved_fn;
1131 goto redo_rt6_select;
1132 }
1133 }
1134
1135
1136 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1137 dst_use(&rt->dst, jiffies);
1138 read_unlock_bh(&table->tb6_lock);
1139
1140 rt6_dst_from_metrics_check(rt);
1141
1142 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1143 return rt;
1144 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1145 !(rt->rt6i_flags & RTF_GATEWAY))) {
1146 /* Create a RTF_CACHE clone which will not be
1147 * owned by the fib6 tree. It is for the special case where
1148 * the daddr in the skb during the neighbor look-up is different
1149 * from the fl6->daddr used to look-up route here.
1150 */
1151
1152 struct rt6_info *uncached_rt;
1153
1154 dst_use(&rt->dst, jiffies);
1155 read_unlock_bh(&table->tb6_lock);
1156
1157 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1158 dst_release(&rt->dst);
1159
1160 if (uncached_rt) {
1161 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1162 * No need for another dst_hold()
1163 */
1164 rt6_uncached_list_add(uncached_rt);
1165 } else {
1166 uncached_rt = net->ipv6.ip6_null_entry;
1167 dst_hold(&uncached_rt->dst);
1168 }
1169
1170 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1171 return uncached_rt;
1172
1173 } else {
1174 /* Get a percpu copy */
1175
1176 struct rt6_info *pcpu_rt;
1177
1178 rt->dst.lastuse = jiffies;
1179 rt->dst.__use++;
1180 pcpu_rt = rt6_get_pcpu_route(rt);
1181
1182 if (pcpu_rt) {
1183 read_unlock_bh(&table->tb6_lock);
1184 } else {
1185 /* We have to do the read_unlock first
1186 * because rt6_make_pcpu_route() may trigger
1187 * ip6_dst_gc() which will take the write_lock.
1188 */
1189 dst_hold(&rt->dst);
1190 read_unlock_bh(&table->tb6_lock);
1191 pcpu_rt = rt6_make_pcpu_route(rt);
1192 dst_release(&rt->dst);
1193 }
1194
1195 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1196 return pcpu_rt;
1197
1198 }
1199 }
1200 EXPORT_SYMBOL_GPL(ip6_pol_route);
1201
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1202 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1203 struct flowi6 *fl6, int flags)
1204 {
1205 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1206 }
1207
ip6_route_input_lookup(struct net * net,struct net_device * dev,struct flowi6 * fl6,int flags)1208 struct dst_entry *ip6_route_input_lookup(struct net *net,
1209 struct net_device *dev,
1210 struct flowi6 *fl6, int flags)
1211 {
1212 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1213 flags |= RT6_LOOKUP_F_IFACE;
1214
1215 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1216 }
1217 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1218
ip6_multipath_l3_keys(const struct sk_buff * skb,struct flow_keys * keys)1219 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1220 struct flow_keys *keys)
1221 {
1222 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1223 const struct ipv6hdr *key_iph = outer_iph;
1224 const struct ipv6hdr *inner_iph;
1225 const struct icmp6hdr *icmph;
1226 struct ipv6hdr _inner_iph;
1227 struct icmp6hdr _icmph;
1228
1229 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1230 goto out;
1231
1232 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1233 sizeof(_icmph), &_icmph);
1234 if (!icmph)
1235 goto out;
1236
1237 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1238 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1239 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1240 icmph->icmp6_type != ICMPV6_PARAMPROB)
1241 goto out;
1242
1243 inner_iph = skb_header_pointer(skb,
1244 skb_transport_offset(skb) + sizeof(*icmph),
1245 sizeof(_inner_iph), &_inner_iph);
1246 if (!inner_iph)
1247 goto out;
1248
1249 key_iph = inner_iph;
1250 out:
1251 memset(keys, 0, sizeof(*keys));
1252 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1253 keys->addrs.v6addrs.src = key_iph->saddr;
1254 keys->addrs.v6addrs.dst = key_iph->daddr;
1255 keys->tags.flow_label = ip6_flowlabel(key_iph);
1256 keys->basic.ip_proto = key_iph->nexthdr;
1257 }
1258
1259 /* if skb is set it will be used and fl6 can be NULL */
rt6_multipath_hash(const struct flowi6 * fl6,const struct sk_buff * skb)1260 u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1261 {
1262 struct flow_keys hash_keys;
1263
1264 if (skb) {
1265 ip6_multipath_l3_keys(skb, &hash_keys);
1266 return flow_hash_from_keys(&hash_keys);
1267 }
1268
1269 return get_hash_from_flowi6(fl6);
1270 }
1271
ip6_route_input(struct sk_buff * skb)1272 void ip6_route_input(struct sk_buff *skb)
1273 {
1274 const struct ipv6hdr *iph = ipv6_hdr(skb);
1275 struct net *net = dev_net(skb->dev);
1276 int flags = RT6_LOOKUP_F_HAS_SADDR;
1277 struct ip_tunnel_info *tun_info;
1278 struct flowi6 fl6 = {
1279 .flowi6_iif = skb->dev->ifindex,
1280 .daddr = iph->daddr,
1281 .saddr = iph->saddr,
1282 .flowlabel = ip6_flowinfo(iph),
1283 .flowi6_mark = skb->mark,
1284 .flowi6_proto = iph->nexthdr,
1285 };
1286
1287 tun_info = skb_tunnel_info(skb);
1288 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1289 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1290 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1291 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
1292 skb_dst_drop(skb);
1293 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1294 }
1295
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1296 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1297 struct flowi6 *fl6, int flags)
1298 {
1299 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1300 }
1301
ip6_route_output_flags(struct net * net,const struct sock * sk,struct flowi6 * fl6,int flags)1302 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1303 struct flowi6 *fl6, int flags)
1304 {
1305 bool any_src;
1306
1307 if (rt6_need_strict(&fl6->daddr)) {
1308 struct dst_entry *dst;
1309
1310 dst = l3mdev_link_scope_lookup(net, fl6);
1311 if (dst)
1312 return dst;
1313 }
1314
1315 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1316
1317 any_src = ipv6_addr_any(&fl6->saddr);
1318 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1319 (fl6->flowi6_oif && any_src))
1320 flags |= RT6_LOOKUP_F_IFACE;
1321
1322 if (!any_src)
1323 flags |= RT6_LOOKUP_F_HAS_SADDR;
1324 else if (sk)
1325 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1326
1327 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1328 }
1329 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1330
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)1331 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1332 {
1333 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1334 struct net_device *loopback_dev = net->loopback_dev;
1335 struct dst_entry *new = NULL;
1336
1337 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1338 DST_OBSOLETE_DEAD, 0);
1339 if (rt) {
1340 rt6_info_init(rt);
1341
1342 new = &rt->dst;
1343 new->__use = 1;
1344 new->input = dst_discard;
1345 new->output = dst_discard_out;
1346
1347 dst_copy_metrics(new, &ort->dst);
1348
1349 rt->rt6i_idev = in6_dev_get(loopback_dev);
1350 rt->rt6i_gateway = ort->rt6i_gateway;
1351 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1352 rt->rt6i_metric = 0;
1353
1354 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1355 #ifdef CONFIG_IPV6_SUBTREES
1356 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1357 #endif
1358 }
1359
1360 dst_release(dst_orig);
1361 return new ? new : ERR_PTR(-ENOMEM);
1362 }
1363
1364 /*
1365 * Destination cache support functions
1366 */
1367
rt6_dst_from_metrics_check(struct rt6_info * rt)1368 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1369 {
1370 if (rt->dst.from &&
1371 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1372 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1373 }
1374
rt6_check(struct rt6_info * rt,u32 cookie)1375 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1376 {
1377 u32 rt_cookie = 0;
1378
1379 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
1380 return NULL;
1381
1382 if (rt6_check_expired(rt))
1383 return NULL;
1384
1385 return &rt->dst;
1386 }
1387
rt6_dst_from_check(struct rt6_info * rt,u32 cookie)1388 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1389 {
1390 if (!__rt6_check_expired(rt) &&
1391 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1392 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1393 return &rt->dst;
1394 else
1395 return NULL;
1396 }
1397
ip6_dst_check(struct dst_entry * dst,u32 cookie)1398 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1399 {
1400 struct rt6_info *rt;
1401
1402 rt = (struct rt6_info *) dst;
1403
1404 /* All IPV6 dsts are created with ->obsolete set to the value
1405 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1406 * into this function always.
1407 */
1408
1409 rt6_dst_from_metrics_check(rt);
1410
1411 if (rt->rt6i_flags & RTF_PCPU ||
1412 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
1413 return rt6_dst_from_check(rt, cookie);
1414 else
1415 return rt6_check(rt, cookie);
1416 }
1417
ip6_negative_advice(struct dst_entry * dst)1418 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1419 {
1420 struct rt6_info *rt = (struct rt6_info *) dst;
1421
1422 if (rt) {
1423 if (rt->rt6i_flags & RTF_CACHE) {
1424 if (rt6_check_expired(rt)) {
1425 ip6_del_rt(rt);
1426 dst = NULL;
1427 }
1428 } else {
1429 dst_release(dst);
1430 dst = NULL;
1431 }
1432 }
1433 return dst;
1434 }
1435
ip6_link_failure(struct sk_buff * skb)1436 static void ip6_link_failure(struct sk_buff *skb)
1437 {
1438 struct rt6_info *rt;
1439
1440 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1441
1442 rt = (struct rt6_info *) skb_dst(skb);
1443 if (rt) {
1444 if (rt->rt6i_flags & RTF_CACHE) {
1445 if (dst_hold_safe(&rt->dst))
1446 ip6_del_rt(rt);
1447 } else {
1448 struct fib6_node *fn;
1449
1450 rcu_read_lock();
1451 fn = rcu_dereference(rt->rt6i_node);
1452 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1453 fn->fn_sernum = -1;
1454 rcu_read_unlock();
1455 }
1456 }
1457 }
1458
rt6_do_update_pmtu(struct rt6_info * rt,u32 mtu)1459 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1460 {
1461 struct net *net = dev_net(rt->dst.dev);
1462
1463 rt->rt6i_flags |= RTF_MODIFIED;
1464 rt->rt6i_pmtu = mtu;
1465 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1466 }
1467
rt6_cache_allowed_for_pmtu(const struct rt6_info * rt)1468 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1469 {
1470 return !(rt->rt6i_flags & RTF_CACHE) &&
1471 (rt->rt6i_flags & RTF_PCPU ||
1472 rcu_access_pointer(rt->rt6i_node));
1473 }
1474
__ip6_rt_update_pmtu(struct dst_entry * dst,const struct sock * sk,const struct ipv6hdr * iph,u32 mtu,bool confirm_neigh)1475 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1476 const struct ipv6hdr *iph, u32 mtu,
1477 bool confirm_neigh)
1478 {
1479 const struct in6_addr *daddr, *saddr;
1480 struct rt6_info *rt6 = (struct rt6_info *)dst;
1481
1482 if (dst_metric_locked(dst, RTAX_MTU))
1483 return;
1484
1485 if (iph) {
1486 daddr = &iph->daddr;
1487 saddr = &iph->saddr;
1488 } else if (sk) {
1489 daddr = &sk->sk_v6_daddr;
1490 saddr = &inet6_sk(sk)->saddr;
1491 } else {
1492 daddr = NULL;
1493 saddr = NULL;
1494 }
1495
1496 if (confirm_neigh)
1497 dst_confirm_neigh(dst, daddr);
1498
1499 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1500 if (mtu >= dst_mtu(dst))
1501 return;
1502
1503 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1504 rt6_do_update_pmtu(rt6, mtu);
1505 } else if (daddr) {
1506 struct rt6_info *nrt6;
1507
1508 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1509 if (nrt6) {
1510 rt6_do_update_pmtu(nrt6, mtu);
1511
1512 /* ip6_ins_rt(nrt6) will bump the
1513 * rt6->rt6i_node->fn_sernum
1514 * which will fail the next rt6_check() and
1515 * invalidate the sk->sk_dst_cache.
1516 */
1517 ip6_ins_rt(nrt6);
1518 /* Release the reference taken in
1519 * ip6_rt_cache_alloc()
1520 */
1521 dst_release(&nrt6->dst);
1522 }
1523 }
1524 }
1525
ip6_rt_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu,bool confirm_neigh)1526 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1527 struct sk_buff *skb, u32 mtu,
1528 bool confirm_neigh)
1529 {
1530 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
1531 confirm_neigh);
1532 }
1533
ip6_update_pmtu(struct sk_buff * skb,struct net * net,__be32 mtu,int oif,u32 mark,kuid_t uid)1534 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1535 int oif, u32 mark, kuid_t uid)
1536 {
1537 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1538 struct dst_entry *dst;
1539 struct flowi6 fl6;
1540
1541 memset(&fl6, 0, sizeof(fl6));
1542 fl6.flowi6_oif = oif;
1543 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1544 fl6.daddr = iph->daddr;
1545 fl6.saddr = iph->saddr;
1546 fl6.flowlabel = ip6_flowinfo(iph);
1547 fl6.flowi6_uid = uid;
1548
1549 dst = ip6_route_output(net, NULL, &fl6);
1550 if (!dst->error)
1551 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
1552 dst_release(dst);
1553 }
1554 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1555
ip6_sk_update_pmtu(struct sk_buff * skb,struct sock * sk,__be32 mtu)1556 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1557 {
1558 int oif = sk->sk_bound_dev_if;
1559 struct dst_entry *dst;
1560
1561 if (!oif && skb->dev)
1562 oif = l3mdev_master_ifindex(skb->dev);
1563
1564 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
1565
1566 dst = __sk_dst_get(sk);
1567 if (!dst || !dst->obsolete ||
1568 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1569 return;
1570
1571 bh_lock_sock(sk);
1572 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1573 ip6_datagram_dst_update(sk, false);
1574 bh_unlock_sock(sk);
1575 }
1576 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1577
1578 /* Handle redirects */
1579 struct ip6rd_flowi {
1580 struct flowi6 fl6;
1581 struct in6_addr gateway;
1582 };
1583
__ip6_route_redirect(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1584 static struct rt6_info *__ip6_route_redirect(struct net *net,
1585 struct fib6_table *table,
1586 struct flowi6 *fl6,
1587 int flags)
1588 {
1589 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1590 struct rt6_info *rt;
1591 struct fib6_node *fn;
1592
1593 /* Get the "current" route for this destination and
1594 * check if the redirect has come from appropriate router.
1595 *
1596 * RFC 4861 specifies that redirects should only be
1597 * accepted if they come from the nexthop to the target.
1598 * Due to the way the routes are chosen, this notion
1599 * is a bit fuzzy and one might need to check all possible
1600 * routes.
1601 */
1602
1603 read_lock_bh(&table->tb6_lock);
1604 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1605 restart:
1606 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1607 if (rt6_check_expired(rt))
1608 continue;
1609 if (rt->dst.error)
1610 break;
1611 if (!(rt->rt6i_flags & RTF_GATEWAY))
1612 continue;
1613 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1614 continue;
1615 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1616 continue;
1617 break;
1618 }
1619
1620 if (!rt)
1621 rt = net->ipv6.ip6_null_entry;
1622 else if (rt->dst.error) {
1623 rt = net->ipv6.ip6_null_entry;
1624 goto out;
1625 }
1626
1627 if (rt == net->ipv6.ip6_null_entry) {
1628 fn = fib6_backtrack(fn, &fl6->saddr);
1629 if (fn)
1630 goto restart;
1631 }
1632
1633 out:
1634 dst_hold(&rt->dst);
1635
1636 read_unlock_bh(&table->tb6_lock);
1637
1638 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1639 return rt;
1640 };
1641
ip6_route_redirect(struct net * net,const struct flowi6 * fl6,const struct in6_addr * gateway)1642 static struct dst_entry *ip6_route_redirect(struct net *net,
1643 const struct flowi6 *fl6,
1644 const struct in6_addr *gateway)
1645 {
1646 int flags = RT6_LOOKUP_F_HAS_SADDR;
1647 struct ip6rd_flowi rdfl;
1648
1649 rdfl.fl6 = *fl6;
1650 rdfl.gateway = *gateway;
1651
1652 return fib6_rule_lookup(net, &rdfl.fl6,
1653 flags, __ip6_route_redirect);
1654 }
1655
ip6_redirect(struct sk_buff * skb,struct net * net,int oif,u32 mark,kuid_t uid)1656 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1657 kuid_t uid)
1658 {
1659 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1660 struct dst_entry *dst;
1661 struct flowi6 fl6;
1662
1663 memset(&fl6, 0, sizeof(fl6));
1664 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1665 fl6.flowi6_oif = oif;
1666 fl6.flowi6_mark = mark;
1667 fl6.daddr = iph->daddr;
1668 fl6.saddr = iph->saddr;
1669 fl6.flowlabel = ip6_flowinfo(iph);
1670 fl6.flowi6_uid = uid;
1671
1672 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1673 rt6_do_redirect(dst, NULL, skb);
1674 dst_release(dst);
1675 }
1676 EXPORT_SYMBOL_GPL(ip6_redirect);
1677
ip6_redirect_no_header(struct sk_buff * skb,struct net * net,int oif,u32 mark)1678 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1679 u32 mark)
1680 {
1681 const struct ipv6hdr *iph = ipv6_hdr(skb);
1682 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1683 struct dst_entry *dst;
1684 struct flowi6 fl6;
1685
1686 memset(&fl6, 0, sizeof(fl6));
1687 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1688 fl6.flowi6_oif = oif;
1689 fl6.flowi6_mark = mark;
1690 fl6.daddr = msg->dest;
1691 fl6.saddr = iph->daddr;
1692 fl6.flowi6_uid = sock_net_uid(net, NULL);
1693
1694 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1695 rt6_do_redirect(dst, NULL, skb);
1696 dst_release(dst);
1697 }
1698
ip6_sk_redirect(struct sk_buff * skb,struct sock * sk)1699 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1700 {
1701 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1702 sk->sk_uid);
1703 }
1704 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1705
ip6_default_advmss(const struct dst_entry * dst)1706 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1707 {
1708 struct net_device *dev = dst->dev;
1709 unsigned int mtu = dst_mtu(dst);
1710 struct net *net = dev_net(dev);
1711
1712 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1713
1714 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1715 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1716
1717 /*
1718 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1719 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1720 * IPV6_MAXPLEN is also valid and means: "any MSS,
1721 * rely only on pmtu discovery"
1722 */
1723 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1724 mtu = IPV6_MAXPLEN;
1725 return mtu;
1726 }
1727
ip6_mtu(const struct dst_entry * dst)1728 static unsigned int ip6_mtu(const struct dst_entry *dst)
1729 {
1730 const struct rt6_info *rt = (const struct rt6_info *)dst;
1731 unsigned int mtu = rt->rt6i_pmtu;
1732 struct inet6_dev *idev;
1733
1734 if (mtu)
1735 goto out;
1736
1737 mtu = dst_metric_raw(dst, RTAX_MTU);
1738 if (mtu)
1739 goto out;
1740
1741 mtu = IPV6_MIN_MTU;
1742
1743 rcu_read_lock();
1744 idev = __in6_dev_get(dst->dev);
1745 if (idev)
1746 mtu = idev->cnf.mtu6;
1747 rcu_read_unlock();
1748
1749 out:
1750 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1751
1752 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1753 }
1754
icmp6_dst_alloc(struct net_device * dev,struct flowi6 * fl6)1755 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1756 struct flowi6 *fl6)
1757 {
1758 struct dst_entry *dst;
1759 struct rt6_info *rt;
1760 struct inet6_dev *idev = in6_dev_get(dev);
1761 struct net *net = dev_net(dev);
1762
1763 if (unlikely(!idev))
1764 return ERR_PTR(-ENODEV);
1765
1766 rt = ip6_dst_alloc(net, dev, 0);
1767 if (unlikely(!rt)) {
1768 in6_dev_put(idev);
1769 dst = ERR_PTR(-ENOMEM);
1770 goto out;
1771 }
1772
1773 rt->dst.flags |= DST_HOST;
1774 rt->dst.input = ip6_input;
1775 rt->dst.output = ip6_output;
1776 rt->rt6i_gateway = fl6->daddr;
1777 rt->rt6i_dst.addr = fl6->daddr;
1778 rt->rt6i_dst.plen = 128;
1779 rt->rt6i_idev = idev;
1780 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1781
1782 /* Add this dst into uncached_list so that rt6_ifdown() can
1783 * do proper release of the net_device
1784 */
1785 rt6_uncached_list_add(rt);
1786
1787 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1788
1789 out:
1790 return dst;
1791 }
1792
ip6_dst_gc(struct dst_ops * ops)1793 static int ip6_dst_gc(struct dst_ops *ops)
1794 {
1795 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1796 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1797 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1798 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1799 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1800 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1801 int entries;
1802
1803 entries = dst_entries_get_fast(ops);
1804 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1805 entries <= rt_max_size)
1806 goto out;
1807
1808 net->ipv6.ip6_rt_gc_expire++;
1809 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1810 entries = dst_entries_get_slow(ops);
1811 if (entries < ops->gc_thresh)
1812 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1813 out:
1814 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1815 return entries > rt_max_size;
1816 }
1817
ip6_convert_metrics(struct mx6_config * mxc,const struct fib6_config * cfg)1818 static int ip6_convert_metrics(struct mx6_config *mxc,
1819 const struct fib6_config *cfg)
1820 {
1821 bool ecn_ca = false;
1822 struct nlattr *nla;
1823 int remaining;
1824 u32 *mp;
1825
1826 if (!cfg->fc_mx)
1827 return 0;
1828
1829 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1830 if (unlikely(!mp))
1831 return -ENOMEM;
1832
1833 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1834 int type = nla_type(nla);
1835 u32 val;
1836
1837 if (!type)
1838 continue;
1839 if (unlikely(type > RTAX_MAX))
1840 goto err;
1841
1842 if (type == RTAX_CC_ALGO) {
1843 char tmp[TCP_CA_NAME_MAX];
1844
1845 nla_strlcpy(tmp, nla, sizeof(tmp));
1846 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1847 if (val == TCP_CA_UNSPEC)
1848 goto err;
1849 } else {
1850 val = nla_get_u32(nla);
1851 }
1852 if (type == RTAX_HOPLIMIT && val > 255)
1853 val = 255;
1854 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1855 goto err;
1856
1857 mp[type - 1] = val;
1858 __set_bit(type - 1, mxc->mx_valid);
1859 }
1860
1861 if (ecn_ca) {
1862 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1863 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1864 }
1865
1866 mxc->mx = mp;
1867 return 0;
1868 err:
1869 kfree(mp);
1870 return -EINVAL;
1871 }
1872
ip6_nh_lookup_table(struct net * net,struct fib6_config * cfg,const struct in6_addr * gw_addr)1873 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1874 struct fib6_config *cfg,
1875 const struct in6_addr *gw_addr)
1876 {
1877 struct flowi6 fl6 = {
1878 .flowi6_oif = cfg->fc_ifindex,
1879 .daddr = *gw_addr,
1880 .saddr = cfg->fc_prefsrc,
1881 };
1882 struct fib6_table *table;
1883 struct rt6_info *rt;
1884 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1885
1886 table = fib6_get_table(net, cfg->fc_table);
1887 if (!table)
1888 return NULL;
1889
1890 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1891 flags |= RT6_LOOKUP_F_HAS_SADDR;
1892
1893 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1894
1895 /* if table lookup failed, fall back to full lookup */
1896 if (rt == net->ipv6.ip6_null_entry) {
1897 ip6_rt_put(rt);
1898 rt = NULL;
1899 }
1900
1901 return rt;
1902 }
1903
ip6_route_info_create(struct fib6_config * cfg,struct netlink_ext_ack * extack)1904 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1905 struct netlink_ext_ack *extack)
1906 {
1907 struct net *net = cfg->fc_nlinfo.nl_net;
1908 struct rt6_info *rt = NULL;
1909 struct net_device *dev = NULL;
1910 struct inet6_dev *idev = NULL;
1911 struct fib6_table *table;
1912 int addr_type;
1913 int err = -EINVAL;
1914
1915 /* RTF_PCPU is an internal flag; can not be set by userspace */
1916 if (cfg->fc_flags & RTF_PCPU) {
1917 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
1918 goto out;
1919 }
1920
1921 if (cfg->fc_dst_len > 128) {
1922 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1923 goto out;
1924 }
1925 if (cfg->fc_src_len > 128) {
1926 NL_SET_ERR_MSG(extack, "Invalid source address length");
1927 goto out;
1928 }
1929 #ifndef CONFIG_IPV6_SUBTREES
1930 if (cfg->fc_src_len) {
1931 NL_SET_ERR_MSG(extack,
1932 "Specifying source address requires IPV6_SUBTREES to be enabled");
1933 goto out;
1934 }
1935 #endif
1936 if (cfg->fc_ifindex) {
1937 err = -ENODEV;
1938 dev = dev_get_by_index(net, cfg->fc_ifindex);
1939 if (!dev)
1940 goto out;
1941 idev = in6_dev_get(dev);
1942 if (!idev)
1943 goto out;
1944 }
1945
1946 if (cfg->fc_metric == 0)
1947 cfg->fc_metric = IP6_RT_PRIO_USER;
1948
1949 err = -ENOBUFS;
1950 if (cfg->fc_nlinfo.nlh &&
1951 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1952 table = fib6_get_table(net, cfg->fc_table);
1953 if (!table) {
1954 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1955 table = fib6_new_table(net, cfg->fc_table);
1956 }
1957 } else {
1958 table = fib6_new_table(net, cfg->fc_table);
1959 }
1960
1961 if (!table)
1962 goto out;
1963
1964 rt = ip6_dst_alloc(net, NULL,
1965 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1966
1967 if (!rt) {
1968 err = -ENOMEM;
1969 goto out;
1970 }
1971
1972 if (cfg->fc_flags & RTF_EXPIRES)
1973 rt6_set_expires(rt, jiffies +
1974 clock_t_to_jiffies(cfg->fc_expires));
1975 else
1976 rt6_clean_expires(rt);
1977
1978 if (cfg->fc_protocol == RTPROT_UNSPEC)
1979 cfg->fc_protocol = RTPROT_BOOT;
1980 rt->rt6i_protocol = cfg->fc_protocol;
1981
1982 addr_type = ipv6_addr_type(&cfg->fc_dst);
1983
1984 if (addr_type & IPV6_ADDR_MULTICAST)
1985 rt->dst.input = ip6_mc_input;
1986 else if (cfg->fc_flags & RTF_LOCAL)
1987 rt->dst.input = ip6_input;
1988 else
1989 rt->dst.input = ip6_forward;
1990
1991 rt->dst.output = ip6_output;
1992
1993 if (cfg->fc_encap) {
1994 struct lwtunnel_state *lwtstate;
1995
1996 err = lwtunnel_build_state(cfg->fc_encap_type,
1997 cfg->fc_encap, AF_INET6, cfg,
1998 &lwtstate, extack);
1999 if (err)
2000 goto out;
2001 rt->dst.lwtstate = lwtstate_get(lwtstate);
2002 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2003 rt->dst.lwtstate->orig_output = rt->dst.output;
2004 rt->dst.output = lwtunnel_output;
2005 }
2006 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2007 rt->dst.lwtstate->orig_input = rt->dst.input;
2008 rt->dst.input = lwtunnel_input;
2009 }
2010 }
2011
2012 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2013 rt->rt6i_dst.plen = cfg->fc_dst_len;
2014 if (rt->rt6i_dst.plen == 128)
2015 rt->dst.flags |= DST_HOST;
2016
2017 #ifdef CONFIG_IPV6_SUBTREES
2018 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2019 rt->rt6i_src.plen = cfg->fc_src_len;
2020 #endif
2021
2022 rt->rt6i_metric = cfg->fc_metric;
2023
2024 /* We cannot add true routes via loopback here,
2025 they would result in kernel looping; promote them to reject routes
2026 */
2027 if ((cfg->fc_flags & RTF_REJECT) ||
2028 (dev && (dev->flags & IFF_LOOPBACK) &&
2029 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2030 !(cfg->fc_flags & RTF_LOCAL))) {
2031 /* hold loopback dev/idev if we haven't done so. */
2032 if (dev != net->loopback_dev) {
2033 if (dev) {
2034 dev_put(dev);
2035 in6_dev_put(idev);
2036 }
2037 dev = net->loopback_dev;
2038 dev_hold(dev);
2039 idev = in6_dev_get(dev);
2040 if (!idev) {
2041 err = -ENODEV;
2042 goto out;
2043 }
2044 }
2045 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
2046 switch (cfg->fc_type) {
2047 case RTN_BLACKHOLE:
2048 rt->dst.error = -EINVAL;
2049 rt->dst.output = dst_discard_out;
2050 rt->dst.input = dst_discard;
2051 break;
2052 case RTN_PROHIBIT:
2053 rt->dst.error = -EACCES;
2054 rt->dst.output = ip6_pkt_prohibit_out;
2055 rt->dst.input = ip6_pkt_prohibit;
2056 break;
2057 case RTN_THROW:
2058 case RTN_UNREACHABLE:
2059 default:
2060 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
2061 : (cfg->fc_type == RTN_UNREACHABLE)
2062 ? -EHOSTUNREACH : -ENETUNREACH;
2063 rt->dst.output = ip6_pkt_discard_out;
2064 rt->dst.input = ip6_pkt_discard;
2065 break;
2066 }
2067 goto install_route;
2068 }
2069
2070 if (cfg->fc_flags & RTF_GATEWAY) {
2071 const struct in6_addr *gw_addr;
2072 int gwa_type;
2073
2074 gw_addr = &cfg->fc_gateway;
2075 gwa_type = ipv6_addr_type(gw_addr);
2076
2077 /* if gw_addr is local we will fail to detect this in case
2078 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2079 * will return already-added prefix route via interface that
2080 * prefix route was assigned to, which might be non-loopback.
2081 */
2082 err = -EINVAL;
2083 if (ipv6_chk_addr_and_flags(net, gw_addr,
2084 gwa_type & IPV6_ADDR_LINKLOCAL ?
2085 dev : NULL, 0, 0)) {
2086 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2087 goto out;
2088 }
2089 rt->rt6i_gateway = *gw_addr;
2090
2091 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2092 struct rt6_info *grt = NULL;
2093
2094 /* IPv6 strictly inhibits using not link-local
2095 addresses as nexthop address.
2096 Otherwise, router will not able to send redirects.
2097 It is very good, but in some (rare!) circumstances
2098 (SIT, PtP, NBMA NOARP links) it is handy to allow
2099 some exceptions. --ANK
2100 We allow IPv4-mapped nexthops to support RFC4798-type
2101 addressing
2102 */
2103 if (!(gwa_type & (IPV6_ADDR_UNICAST |
2104 IPV6_ADDR_MAPPED))) {
2105 NL_SET_ERR_MSG(extack,
2106 "Invalid gateway address");
2107 goto out;
2108 }
2109
2110 if (cfg->fc_table) {
2111 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2112
2113 if (grt) {
2114 if (grt->rt6i_flags & RTF_GATEWAY ||
2115 (dev && dev != grt->dst.dev)) {
2116 ip6_rt_put(grt);
2117 grt = NULL;
2118 }
2119 }
2120 }
2121
2122 if (!grt)
2123 grt = rt6_lookup(net, gw_addr, NULL,
2124 cfg->fc_ifindex, 1);
2125
2126 err = -EHOSTUNREACH;
2127 if (!grt)
2128 goto out;
2129 if (dev) {
2130 if (dev != grt->dst.dev) {
2131 ip6_rt_put(grt);
2132 goto out;
2133 }
2134 } else {
2135 dev = grt->dst.dev;
2136 idev = grt->rt6i_idev;
2137 dev_hold(dev);
2138 in6_dev_hold(grt->rt6i_idev);
2139 }
2140 if (!(grt->rt6i_flags & RTF_GATEWAY))
2141 err = 0;
2142 ip6_rt_put(grt);
2143
2144 if (err)
2145 goto out;
2146 }
2147 err = -EINVAL;
2148 if (!dev) {
2149 NL_SET_ERR_MSG(extack, "Egress device not specified");
2150 goto out;
2151 } else if (dev->flags & IFF_LOOPBACK) {
2152 NL_SET_ERR_MSG(extack,
2153 "Egress device can not be loopback device for this route");
2154 goto out;
2155 }
2156 }
2157
2158 err = -ENODEV;
2159 if (!dev)
2160 goto out;
2161
2162 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2163 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2164 NL_SET_ERR_MSG(extack, "Invalid source address");
2165 err = -EINVAL;
2166 goto out;
2167 }
2168 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2169 rt->rt6i_prefsrc.plen = 128;
2170 } else
2171 rt->rt6i_prefsrc.plen = 0;
2172
2173 rt->rt6i_flags = cfg->fc_flags;
2174
2175 install_route:
2176 rt->dst.dev = dev;
2177 rt->rt6i_idev = idev;
2178 rt->rt6i_table = table;
2179
2180 cfg->fc_nlinfo.nl_net = dev_net(dev);
2181
2182 return rt;
2183 out:
2184 if (dev)
2185 dev_put(dev);
2186 if (idev)
2187 in6_dev_put(idev);
2188 if (rt)
2189 dst_release_immediate(&rt->dst);
2190
2191 return ERR_PTR(err);
2192 }
2193
ip6_route_add(struct fib6_config * cfg,struct netlink_ext_ack * extack)2194 int ip6_route_add(struct fib6_config *cfg,
2195 struct netlink_ext_ack *extack)
2196 {
2197 struct mx6_config mxc = { .mx = NULL, };
2198 struct rt6_info *rt;
2199 int err;
2200
2201 rt = ip6_route_info_create(cfg, extack);
2202 if (IS_ERR(rt)) {
2203 err = PTR_ERR(rt);
2204 rt = NULL;
2205 goto out;
2206 }
2207
2208 err = ip6_convert_metrics(&mxc, cfg);
2209 if (err)
2210 goto out;
2211
2212 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
2213
2214 kfree(mxc.mx);
2215
2216 return err;
2217 out:
2218 if (rt)
2219 dst_release_immediate(&rt->dst);
2220
2221 return err;
2222 }
2223
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)2224 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2225 {
2226 int err;
2227 struct fib6_table *table;
2228 struct net *net = dev_net(rt->dst.dev);
2229
2230 if (rt == net->ipv6.ip6_null_entry) {
2231 err = -ENOENT;
2232 goto out;
2233 }
2234
2235 table = rt->rt6i_table;
2236 write_lock_bh(&table->tb6_lock);
2237 err = fib6_del(rt, info);
2238 write_unlock_bh(&table->tb6_lock);
2239
2240 out:
2241 ip6_rt_put(rt);
2242 return err;
2243 }
2244
ip6_del_rt(struct rt6_info * rt)2245 int ip6_del_rt(struct rt6_info *rt)
2246 {
2247 struct nl_info info = {
2248 .nl_net = dev_net(rt->dst.dev),
2249 };
2250 return __ip6_del_rt(rt, &info);
2251 }
2252
__ip6_del_rt_siblings(struct rt6_info * rt,struct fib6_config * cfg)2253 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2254 {
2255 struct nl_info *info = &cfg->fc_nlinfo;
2256 struct net *net = info->nl_net;
2257 struct sk_buff *skb = NULL;
2258 struct fib6_table *table;
2259 int err = -ENOENT;
2260
2261 if (rt == net->ipv6.ip6_null_entry)
2262 goto out_put;
2263 table = rt->rt6i_table;
2264 write_lock_bh(&table->tb6_lock);
2265
2266 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2267 struct rt6_info *sibling, *next_sibling;
2268
2269 /* prefer to send a single notification with all hops */
2270 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2271 if (skb) {
2272 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2273
2274 if (rt6_fill_node(net, skb, rt,
2275 NULL, NULL, 0, RTM_DELROUTE,
2276 info->portid, seq, 0) < 0) {
2277 kfree_skb(skb);
2278 skb = NULL;
2279 } else
2280 info->skip_notify = 1;
2281 }
2282
2283 list_for_each_entry_safe(sibling, next_sibling,
2284 &rt->rt6i_siblings,
2285 rt6i_siblings) {
2286 err = fib6_del(sibling, info);
2287 if (err)
2288 goto out_unlock;
2289 }
2290 }
2291
2292 err = fib6_del(rt, info);
2293 out_unlock:
2294 write_unlock_bh(&table->tb6_lock);
2295 out_put:
2296 ip6_rt_put(rt);
2297
2298 if (skb) {
2299 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2300 info->nlh, gfp_any());
2301 }
2302 return err;
2303 }
2304
ip6_route_del(struct fib6_config * cfg,struct netlink_ext_ack * extack)2305 static int ip6_route_del(struct fib6_config *cfg,
2306 struct netlink_ext_ack *extack)
2307 {
2308 struct fib6_table *table;
2309 struct fib6_node *fn;
2310 struct rt6_info *rt;
2311 int err = -ESRCH;
2312
2313 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2314 if (!table) {
2315 NL_SET_ERR_MSG(extack, "FIB table does not exist");
2316 return err;
2317 }
2318
2319 read_lock_bh(&table->tb6_lock);
2320
2321 fn = fib6_locate(&table->tb6_root,
2322 &cfg->fc_dst, cfg->fc_dst_len,
2323 &cfg->fc_src, cfg->fc_src_len);
2324
2325 if (fn) {
2326 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2327 if ((rt->rt6i_flags & RTF_CACHE) &&
2328 !(cfg->fc_flags & RTF_CACHE))
2329 continue;
2330 if (cfg->fc_ifindex &&
2331 (!rt->dst.dev ||
2332 rt->dst.dev->ifindex != cfg->fc_ifindex))
2333 continue;
2334 if (cfg->fc_flags & RTF_GATEWAY &&
2335 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2336 continue;
2337 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2338 continue;
2339 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2340 continue;
2341 dst_hold(&rt->dst);
2342 read_unlock_bh(&table->tb6_lock);
2343
2344 /* if gateway was specified only delete the one hop */
2345 if (cfg->fc_flags & RTF_GATEWAY)
2346 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2347
2348 return __ip6_del_rt_siblings(rt, cfg);
2349 }
2350 }
2351 read_unlock_bh(&table->tb6_lock);
2352
2353 return err;
2354 }
2355
rt6_do_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)2356 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2357 {
2358 struct netevent_redirect netevent;
2359 struct rt6_info *rt, *nrt = NULL;
2360 struct ndisc_options ndopts;
2361 struct inet6_dev *in6_dev;
2362 struct neighbour *neigh;
2363 struct rd_msg *msg;
2364 int optlen, on_link;
2365 u8 *lladdr;
2366
2367 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2368 optlen -= sizeof(*msg);
2369
2370 if (optlen < 0) {
2371 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2372 return;
2373 }
2374
2375 msg = (struct rd_msg *)icmp6_hdr(skb);
2376
2377 if (ipv6_addr_is_multicast(&msg->dest)) {
2378 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2379 return;
2380 }
2381
2382 on_link = 0;
2383 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2384 on_link = 1;
2385 } else if (ipv6_addr_type(&msg->target) !=
2386 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2387 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2388 return;
2389 }
2390
2391 in6_dev = __in6_dev_get(skb->dev);
2392 if (!in6_dev)
2393 return;
2394 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2395 return;
2396
2397 /* RFC2461 8.1:
2398 * The IP source address of the Redirect MUST be the same as the current
2399 * first-hop router for the specified ICMP Destination Address.
2400 */
2401
2402 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2403 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2404 return;
2405 }
2406
2407 lladdr = NULL;
2408 if (ndopts.nd_opts_tgt_lladdr) {
2409 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2410 skb->dev);
2411 if (!lladdr) {
2412 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2413 return;
2414 }
2415 }
2416
2417 rt = (struct rt6_info *) dst;
2418 if (rt->rt6i_flags & RTF_REJECT) {
2419 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2420 return;
2421 }
2422
2423 /* Redirect received -> path was valid.
2424 * Look, redirects are sent only in response to data packets,
2425 * so that this nexthop apparently is reachable. --ANK
2426 */
2427 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2428
2429 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2430 if (!neigh)
2431 return;
2432
2433 /*
2434 * We have finally decided to accept it.
2435 */
2436
2437 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2438 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2439 NEIGH_UPDATE_F_OVERRIDE|
2440 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2441 NEIGH_UPDATE_F_ISROUTER)),
2442 NDISC_REDIRECT, &ndopts);
2443
2444 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2445 if (!nrt)
2446 goto out;
2447
2448 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2449 if (on_link)
2450 nrt->rt6i_flags &= ~RTF_GATEWAY;
2451
2452 nrt->rt6i_protocol = RTPROT_REDIRECT;
2453 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2454
2455 if (ip6_ins_rt(nrt))
2456 goto out_release;
2457
2458 netevent.old = &rt->dst;
2459 netevent.new = &nrt->dst;
2460 netevent.daddr = &msg->dest;
2461 netevent.neigh = neigh;
2462 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2463
2464 if (rt->rt6i_flags & RTF_CACHE) {
2465 rt = (struct rt6_info *) dst_clone(&rt->dst);
2466 ip6_del_rt(rt);
2467 }
2468
2469 out_release:
2470 /* Release the reference taken in
2471 * ip6_rt_cache_alloc()
2472 */
2473 dst_release(&nrt->dst);
2474
2475 out:
2476 neigh_release(neigh);
2477 }
2478
2479 /*
2480 * Misc support functions
2481 */
2482
rt6_set_from(struct rt6_info * rt,struct rt6_info * from)2483 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2484 {
2485 BUG_ON(from->dst.from);
2486
2487 rt->rt6i_flags &= ~RTF_EXPIRES;
2488 dst_hold(&from->dst);
2489 rt->dst.from = &from->dst;
2490 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2491 }
2492
ip6_rt_copy_init(struct rt6_info * rt,struct rt6_info * ort)2493 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2494 {
2495 rt->dst.input = ort->dst.input;
2496 rt->dst.output = ort->dst.output;
2497 rt->rt6i_dst = ort->rt6i_dst;
2498 rt->dst.error = ort->dst.error;
2499 rt->rt6i_idev = ort->rt6i_idev;
2500 if (rt->rt6i_idev)
2501 in6_dev_hold(rt->rt6i_idev);
2502 rt->dst.lastuse = jiffies;
2503 rt->rt6i_gateway = ort->rt6i_gateway;
2504 rt->rt6i_flags = ort->rt6i_flags;
2505 rt6_set_from(rt, ort);
2506 rt->rt6i_metric = ort->rt6i_metric;
2507 #ifdef CONFIG_IPV6_SUBTREES
2508 rt->rt6i_src = ort->rt6i_src;
2509 #endif
2510 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2511 rt->rt6i_table = ort->rt6i_table;
2512 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2513 }
2514
2515 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,struct net_device * dev)2516 static struct rt6_info *rt6_get_route_info(struct net *net,
2517 const struct in6_addr *prefix, int prefixlen,
2518 const struct in6_addr *gwaddr,
2519 struct net_device *dev)
2520 {
2521 u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
2522 struct fib6_node *fn;
2523 struct rt6_info *rt = NULL;
2524 struct fib6_table *table;
2525
2526 table = fib6_get_table(net, tb_id);
2527 if (!table)
2528 return NULL;
2529
2530 read_lock_bh(&table->tb6_lock);
2531 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2532 if (!fn)
2533 goto out;
2534
2535 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2536 if (rt->dst.dev->ifindex != dev->ifindex)
2537 continue;
2538 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2539 continue;
2540 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2541 continue;
2542 dst_hold(&rt->dst);
2543 break;
2544 }
2545 out:
2546 read_unlock_bh(&table->tb6_lock);
2547 return rt;
2548 }
2549
rt6_add_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)2550 static struct rt6_info *rt6_add_route_info(struct net *net,
2551 const struct in6_addr *prefix, int prefixlen,
2552 const struct in6_addr *gwaddr,
2553 struct net_device *dev,
2554 unsigned int pref)
2555 {
2556 struct fib6_config cfg = {
2557 .fc_metric = IP6_RT_PRIO_USER,
2558 .fc_ifindex = dev->ifindex,
2559 .fc_dst_len = prefixlen,
2560 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2561 RTF_UP | RTF_PREF(pref),
2562 .fc_protocol = RTPROT_RA,
2563 .fc_nlinfo.portid = 0,
2564 .fc_nlinfo.nlh = NULL,
2565 .fc_nlinfo.nl_net = net,
2566 };
2567
2568 cfg.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO),
2569 cfg.fc_dst = *prefix;
2570 cfg.fc_gateway = *gwaddr;
2571
2572 /* We should treat it as a default route if prefix length is 0. */
2573 if (!prefixlen)
2574 cfg.fc_flags |= RTF_DEFAULT;
2575
2576 ip6_route_add(&cfg, NULL);
2577
2578 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2579 }
2580 #endif
2581
rt6_get_dflt_router(const struct in6_addr * addr,struct net_device * dev)2582 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2583 {
2584 u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_MAIN);
2585 struct rt6_info *rt;
2586 struct fib6_table *table;
2587
2588 table = fib6_get_table(dev_net(dev), tb_id);
2589 if (!table)
2590 return NULL;
2591
2592 read_lock_bh(&table->tb6_lock);
2593 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2594 if (dev == rt->dst.dev &&
2595 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2596 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2597 break;
2598 }
2599 if (rt)
2600 dst_hold(&rt->dst);
2601 read_unlock_bh(&table->tb6_lock);
2602 return rt;
2603 }
2604
rt6_add_dflt_router(const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)2605 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2606 struct net_device *dev,
2607 unsigned int pref)
2608 {
2609 struct fib6_config cfg = {
2610 .fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
2611 .fc_metric = IP6_RT_PRIO_USER,
2612 .fc_ifindex = dev->ifindex,
2613 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2614 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2615 .fc_protocol = RTPROT_RA,
2616 .fc_nlinfo.portid = 0,
2617 .fc_nlinfo.nlh = NULL,
2618 .fc_nlinfo.nl_net = dev_net(dev),
2619 };
2620
2621 cfg.fc_gateway = *gwaddr;
2622
2623 if (!ip6_route_add(&cfg, NULL)) {
2624 struct fib6_table *table;
2625
2626 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2627 if (table)
2628 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2629 }
2630
2631 return rt6_get_dflt_router(gwaddr, dev);
2632 }
2633
rt6_addrconf_purge(struct rt6_info * rt,void * arg)2634 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2635 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2636 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2637 return -1;
2638 return 0;
2639 }
2640
rt6_purge_dflt_routers(struct net * net)2641 void rt6_purge_dflt_routers(struct net *net)
2642 {
2643 fib6_clean_all(net, rt6_addrconf_purge, NULL);
2644 }
2645
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)2646 static void rtmsg_to_fib6_config(struct net *net,
2647 struct in6_rtmsg *rtmsg,
2648 struct fib6_config *cfg)
2649 {
2650 memset(cfg, 0, sizeof(*cfg));
2651
2652 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2653 : RT6_TABLE_MAIN;
2654 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2655 cfg->fc_metric = rtmsg->rtmsg_metric;
2656 cfg->fc_expires = rtmsg->rtmsg_info;
2657 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2658 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2659 cfg->fc_flags = rtmsg->rtmsg_flags;
2660
2661 cfg->fc_nlinfo.nl_net = net;
2662
2663 cfg->fc_dst = rtmsg->rtmsg_dst;
2664 cfg->fc_src = rtmsg->rtmsg_src;
2665 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2666 }
2667
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)2668 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2669 {
2670 struct fib6_config cfg;
2671 struct in6_rtmsg rtmsg;
2672 int err;
2673
2674 switch (cmd) {
2675 case SIOCADDRT: /* Add a route */
2676 case SIOCDELRT: /* Delete a route */
2677 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2678 return -EPERM;
2679 err = copy_from_user(&rtmsg, arg,
2680 sizeof(struct in6_rtmsg));
2681 if (err)
2682 return -EFAULT;
2683
2684 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2685
2686 rtnl_lock();
2687 switch (cmd) {
2688 case SIOCADDRT:
2689 err = ip6_route_add(&cfg, NULL);
2690 break;
2691 case SIOCDELRT:
2692 err = ip6_route_del(&cfg, NULL);
2693 break;
2694 default:
2695 err = -EINVAL;
2696 }
2697 rtnl_unlock();
2698
2699 return err;
2700 }
2701
2702 return -EINVAL;
2703 }
2704
2705 /*
2706 * Drop the packet on the floor
2707 */
2708
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)2709 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2710 {
2711 int type;
2712 struct dst_entry *dst = skb_dst(skb);
2713 switch (ipstats_mib_noroutes) {
2714 case IPSTATS_MIB_INNOROUTES:
2715 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2716 if (type == IPV6_ADDR_ANY) {
2717 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2718 IPSTATS_MIB_INADDRERRORS);
2719 break;
2720 }
2721 /* FALLTHROUGH */
2722 case IPSTATS_MIB_OUTNOROUTES:
2723 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2724 ipstats_mib_noroutes);
2725 break;
2726 }
2727 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2728 kfree_skb(skb);
2729 return 0;
2730 }
2731
ip6_pkt_discard(struct sk_buff * skb)2732 static int ip6_pkt_discard(struct sk_buff *skb)
2733 {
2734 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2735 }
2736
ip6_pkt_discard_out(struct net * net,struct sock * sk,struct sk_buff * skb)2737 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2738 {
2739 skb->dev = skb_dst(skb)->dev;
2740 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2741 }
2742
ip6_pkt_prohibit(struct sk_buff * skb)2743 static int ip6_pkt_prohibit(struct sk_buff *skb)
2744 {
2745 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2746 }
2747
ip6_pkt_prohibit_out(struct net * net,struct sock * sk,struct sk_buff * skb)2748 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2749 {
2750 skb->dev = skb_dst(skb)->dev;
2751 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2752 }
2753
2754 /*
2755 * Allocate a dst for local (unicast / anycast) address.
2756 */
2757
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,bool anycast)2758 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2759 const struct in6_addr *addr,
2760 bool anycast)
2761 {
2762 u32 tb_id;
2763 struct net *net = dev_net(idev->dev);
2764 struct net_device *dev = idev->dev;
2765 struct rt6_info *rt;
2766
2767 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2768 if (!rt)
2769 return ERR_PTR(-ENOMEM);
2770
2771 in6_dev_hold(idev);
2772
2773 rt->dst.flags |= DST_HOST;
2774 rt->dst.input = ip6_input;
2775 rt->dst.output = ip6_output;
2776 rt->rt6i_idev = idev;
2777
2778 rt->rt6i_protocol = RTPROT_KERNEL;
2779 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2780 if (anycast)
2781 rt->rt6i_flags |= RTF_ANYCAST;
2782 else
2783 rt->rt6i_flags |= RTF_LOCAL;
2784
2785 rt->rt6i_gateway = *addr;
2786 rt->rt6i_dst.addr = *addr;
2787 rt->rt6i_dst.plen = 128;
2788 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2789 rt->rt6i_table = fib6_get_table(net, tb_id);
2790
2791 return rt;
2792 }
2793
2794 /* remove deleted ip from prefsrc entries */
2795 struct arg_dev_net_ip {
2796 struct net_device *dev;
2797 struct net *net;
2798 struct in6_addr *addr;
2799 };
2800
fib6_remove_prefsrc(struct rt6_info * rt,void * arg)2801 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2802 {
2803 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2804 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2805 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2806
2807 if (((void *)rt->dst.dev == dev || !dev) &&
2808 rt != net->ipv6.ip6_null_entry &&
2809 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2810 /* remove prefsrc entry */
2811 rt->rt6i_prefsrc.plen = 0;
2812 }
2813 return 0;
2814 }
2815
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)2816 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2817 {
2818 struct net *net = dev_net(ifp->idev->dev);
2819 struct arg_dev_net_ip adni = {
2820 .dev = ifp->idev->dev,
2821 .net = net,
2822 .addr = &ifp->addr,
2823 };
2824 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2825 }
2826
2827 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2828 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2829
2830 /* Remove routers and update dst entries when gateway turn into host. */
fib6_clean_tohost(struct rt6_info * rt,void * arg)2831 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2832 {
2833 struct in6_addr *gateway = (struct in6_addr *)arg;
2834
2835 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2836 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2837 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2838 return -1;
2839 }
2840 return 0;
2841 }
2842
rt6_clean_tohost(struct net * net,struct in6_addr * gateway)2843 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2844 {
2845 fib6_clean_all(net, fib6_clean_tohost, gateway);
2846 }
2847
2848 struct arg_dev_net {
2849 struct net_device *dev;
2850 struct net *net;
2851 };
2852
2853 /* called with write lock held for table with rt */
fib6_ifdown(struct rt6_info * rt,void * arg)2854 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2855 {
2856 const struct arg_dev_net *adn = arg;
2857 const struct net_device *dev = adn->dev;
2858
2859 if ((rt->dst.dev == dev || !dev) &&
2860 rt != adn->net->ipv6.ip6_null_entry &&
2861 (rt->rt6i_nsiblings == 0 ||
2862 (dev && netdev_unregistering(dev)) ||
2863 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
2864 return -1;
2865
2866 return 0;
2867 }
2868
rt6_ifdown(struct net * net,struct net_device * dev)2869 void rt6_ifdown(struct net *net, struct net_device *dev)
2870 {
2871 struct arg_dev_net adn = {
2872 .dev = dev,
2873 .net = net,
2874 };
2875
2876 fib6_clean_all(net, fib6_ifdown, &adn);
2877 if (dev)
2878 rt6_uncached_list_flush_dev(net, dev);
2879 }
2880
2881 struct rt6_mtu_change_arg {
2882 struct net_device *dev;
2883 unsigned int mtu;
2884 };
2885
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2886 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2887 {
2888 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2889 struct inet6_dev *idev;
2890
2891 /* In IPv6 pmtu discovery is not optional,
2892 so that RTAX_MTU lock cannot disable it.
2893 We still use this lock to block changes
2894 caused by addrconf/ndisc.
2895 */
2896
2897 idev = __in6_dev_get(arg->dev);
2898 if (!idev)
2899 return 0;
2900
2901 /* For administrative MTU increase, there is no way to discover
2902 IPv6 PMTU increase, so PMTU increase should be updated here.
2903 Since RFC 1981 doesn't include administrative MTU increase
2904 update PMTU increase is a MUST. (i.e. jumbo frame)
2905 */
2906 /*
2907 If new MTU is less than route PMTU, this new MTU will be the
2908 lowest MTU in the path, update the route PMTU to reflect PMTU
2909 decreases; if new MTU is greater than route PMTU, and the
2910 old MTU is the lowest MTU in the path, update the route PMTU
2911 to reflect the increase. In this case if the other nodes' MTU
2912 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2913 PMTU discovery.
2914 */
2915 if (rt->dst.dev == arg->dev &&
2916 dst_metric_raw(&rt->dst, RTAX_MTU) &&
2917 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2918 if (rt->rt6i_flags & RTF_CACHE) {
2919 /* For RTF_CACHE with rt6i_pmtu == 0
2920 * (i.e. a redirected route),
2921 * the metrics of its rt->dst.from has already
2922 * been updated.
2923 */
2924 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2925 rt->rt6i_pmtu = arg->mtu;
2926 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2927 (dst_mtu(&rt->dst) < arg->mtu &&
2928 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2929 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2930 }
2931 }
2932 return 0;
2933 }
2934
rt6_mtu_change(struct net_device * dev,unsigned int mtu)2935 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2936 {
2937 struct rt6_mtu_change_arg arg = {
2938 .dev = dev,
2939 .mtu = mtu,
2940 };
2941
2942 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2943 }
2944
2945 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2946 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2947 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
2948 [RTA_OIF] = { .type = NLA_U32 },
2949 [RTA_IIF] = { .type = NLA_U32 },
2950 [RTA_PRIORITY] = { .type = NLA_U32 },
2951 [RTA_METRICS] = { .type = NLA_NESTED },
2952 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2953 [RTA_PREF] = { .type = NLA_U8 },
2954 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2955 [RTA_ENCAP] = { .type = NLA_NESTED },
2956 [RTA_EXPIRES] = { .type = NLA_U32 },
2957 [RTA_UID] = { .type = NLA_U32 },
2958 [RTA_MARK] = { .type = NLA_U32 },
2959 [RTA_TABLE] = { .type = NLA_U32 },
2960 };
2961
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg,struct netlink_ext_ack * extack)2962 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2963 struct fib6_config *cfg,
2964 struct netlink_ext_ack *extack)
2965 {
2966 struct rtmsg *rtm;
2967 struct nlattr *tb[RTA_MAX+1];
2968 unsigned int pref;
2969 int err;
2970
2971 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
2972 NULL);
2973 if (err < 0)
2974 goto errout;
2975
2976 err = -EINVAL;
2977 rtm = nlmsg_data(nlh);
2978 memset(cfg, 0, sizeof(*cfg));
2979
2980 cfg->fc_table = rtm->rtm_table;
2981 cfg->fc_dst_len = rtm->rtm_dst_len;
2982 cfg->fc_src_len = rtm->rtm_src_len;
2983 cfg->fc_flags = RTF_UP;
2984 cfg->fc_protocol = rtm->rtm_protocol;
2985 cfg->fc_type = rtm->rtm_type;
2986
2987 if (rtm->rtm_type == RTN_UNREACHABLE ||
2988 rtm->rtm_type == RTN_BLACKHOLE ||
2989 rtm->rtm_type == RTN_PROHIBIT ||
2990 rtm->rtm_type == RTN_THROW)
2991 cfg->fc_flags |= RTF_REJECT;
2992
2993 if (rtm->rtm_type == RTN_LOCAL)
2994 cfg->fc_flags |= RTF_LOCAL;
2995
2996 if (rtm->rtm_flags & RTM_F_CLONED)
2997 cfg->fc_flags |= RTF_CACHE;
2998
2999 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
3000 cfg->fc_nlinfo.nlh = nlh;
3001 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
3002
3003 if (tb[RTA_GATEWAY]) {
3004 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
3005 cfg->fc_flags |= RTF_GATEWAY;
3006 }
3007 if (tb[RTA_VIA]) {
3008 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
3009 goto errout;
3010 }
3011
3012 if (tb[RTA_DST]) {
3013 int plen = (rtm->rtm_dst_len + 7) >> 3;
3014
3015 if (nla_len(tb[RTA_DST]) < plen)
3016 goto errout;
3017
3018 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
3019 }
3020
3021 if (tb[RTA_SRC]) {
3022 int plen = (rtm->rtm_src_len + 7) >> 3;
3023
3024 if (nla_len(tb[RTA_SRC]) < plen)
3025 goto errout;
3026
3027 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
3028 }
3029
3030 if (tb[RTA_PREFSRC])
3031 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
3032
3033 if (tb[RTA_OIF])
3034 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3035
3036 if (tb[RTA_PRIORITY])
3037 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3038
3039 if (tb[RTA_METRICS]) {
3040 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3041 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
3042 }
3043
3044 if (tb[RTA_TABLE])
3045 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3046
3047 if (tb[RTA_MULTIPATH]) {
3048 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3049 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
3050
3051 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
3052 cfg->fc_mp_len, extack);
3053 if (err < 0)
3054 goto errout;
3055 }
3056
3057 if (tb[RTA_PREF]) {
3058 pref = nla_get_u8(tb[RTA_PREF]);
3059 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3060 pref != ICMPV6_ROUTER_PREF_HIGH)
3061 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3062 cfg->fc_flags |= RTF_PREF(pref);
3063 }
3064
3065 if (tb[RTA_ENCAP])
3066 cfg->fc_encap = tb[RTA_ENCAP];
3067
3068 if (tb[RTA_ENCAP_TYPE]) {
3069 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3070
3071 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
3072 if (err < 0)
3073 goto errout;
3074 }
3075
3076 if (tb[RTA_EXPIRES]) {
3077 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3078
3079 if (addrconf_finite_timeout(timeout)) {
3080 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3081 cfg->fc_flags |= RTF_EXPIRES;
3082 }
3083 }
3084
3085 err = 0;
3086 errout:
3087 return err;
3088 }
3089
3090 struct rt6_nh {
3091 struct rt6_info *rt6_info;
3092 struct fib6_config r_cfg;
3093 struct mx6_config mxc;
3094 struct list_head next;
3095 };
3096
ip6_print_replace_route_err(struct list_head * rt6_nh_list)3097 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3098 {
3099 struct rt6_nh *nh;
3100
3101 list_for_each_entry(nh, rt6_nh_list, next) {
3102 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3103 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3104 nh->r_cfg.fc_ifindex);
3105 }
3106 }
3107
ip6_route_info_append(struct list_head * rt6_nh_list,struct rt6_info * rt,struct fib6_config * r_cfg)3108 static int ip6_route_info_append(struct list_head *rt6_nh_list,
3109 struct rt6_info *rt, struct fib6_config *r_cfg)
3110 {
3111 struct rt6_nh *nh;
3112 int err = -EEXIST;
3113
3114 list_for_each_entry(nh, rt6_nh_list, next) {
3115 /* check if rt6_info already exists */
3116 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
3117 return err;
3118 }
3119
3120 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3121 if (!nh)
3122 return -ENOMEM;
3123 nh->rt6_info = rt;
3124 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3125 if (err) {
3126 kfree(nh);
3127 return err;
3128 }
3129 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3130 list_add_tail(&nh->next, rt6_nh_list);
3131
3132 return 0;
3133 }
3134
ip6_route_mpath_notify(struct rt6_info * rt,struct rt6_info * rt_last,struct nl_info * info,__u16 nlflags)3135 static void ip6_route_mpath_notify(struct rt6_info *rt,
3136 struct rt6_info *rt_last,
3137 struct nl_info *info,
3138 __u16 nlflags)
3139 {
3140 /* if this is an APPEND route, then rt points to the first route
3141 * inserted and rt_last points to last route inserted. Userspace
3142 * wants a consistent dump of the route which starts at the first
3143 * nexthop. Since sibling routes are always added at the end of
3144 * the list, find the first sibling of the last route appended
3145 */
3146 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3147 rt = list_first_entry(&rt_last->rt6i_siblings,
3148 struct rt6_info,
3149 rt6i_siblings);
3150 }
3151
3152 if (rt)
3153 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3154 }
3155
ip6_route_multipath_add(struct fib6_config * cfg,struct netlink_ext_ack * extack)3156 static int ip6_route_multipath_add(struct fib6_config *cfg,
3157 struct netlink_ext_ack *extack)
3158 {
3159 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3160 struct nl_info *info = &cfg->fc_nlinfo;
3161 struct fib6_config r_cfg;
3162 struct rtnexthop *rtnh;
3163 struct rt6_info *rt;
3164 struct rt6_nh *err_nh;
3165 struct rt6_nh *nh, *nh_safe;
3166 __u16 nlflags;
3167 int remaining;
3168 int attrlen;
3169 int err = 1;
3170 int nhn = 0;
3171 int replace = (cfg->fc_nlinfo.nlh &&
3172 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3173 LIST_HEAD(rt6_nh_list);
3174
3175 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3176 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3177 nlflags |= NLM_F_APPEND;
3178
3179 remaining = cfg->fc_mp_len;
3180 rtnh = (struct rtnexthop *)cfg->fc_mp;
3181
3182 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3183 * rt6_info structs per nexthop
3184 */
3185 while (rtnh_ok(rtnh, remaining)) {
3186 memcpy(&r_cfg, cfg, sizeof(*cfg));
3187 if (rtnh->rtnh_ifindex)
3188 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3189
3190 attrlen = rtnh_attrlen(rtnh);
3191 if (attrlen > 0) {
3192 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3193
3194 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3195 if (nla) {
3196 r_cfg.fc_gateway = nla_get_in6_addr(nla);
3197 r_cfg.fc_flags |= RTF_GATEWAY;
3198 }
3199 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3200 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3201 if (nla)
3202 r_cfg.fc_encap_type = nla_get_u16(nla);
3203 }
3204
3205 rt = ip6_route_info_create(&r_cfg, extack);
3206 if (IS_ERR(rt)) {
3207 err = PTR_ERR(rt);
3208 rt = NULL;
3209 goto cleanup;
3210 }
3211
3212 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3213 if (err) {
3214 dst_release_immediate(&rt->dst);
3215 goto cleanup;
3216 }
3217
3218 rtnh = rtnh_next(rtnh, &remaining);
3219 }
3220
3221 /* for add and replace send one notification with all nexthops.
3222 * Skip the notification in fib6_add_rt2node and send one with
3223 * the full route when done
3224 */
3225 info->skip_notify = 1;
3226
3227 err_nh = NULL;
3228 list_for_each_entry(nh, &rt6_nh_list, next) {
3229 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3230
3231 if (!err) {
3232 /* save reference to last route successfully inserted */
3233 rt_last = nh->rt6_info;
3234
3235 /* save reference to first route for notification */
3236 if (!rt_notif)
3237 rt_notif = nh->rt6_info;
3238 }
3239
3240 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3241 nh->rt6_info = NULL;
3242 if (err) {
3243 if (replace && nhn)
3244 ip6_print_replace_route_err(&rt6_nh_list);
3245 err_nh = nh;
3246 goto add_errout;
3247 }
3248
3249 /* Because each route is added like a single route we remove
3250 * these flags after the first nexthop: if there is a collision,
3251 * we have already failed to add the first nexthop:
3252 * fib6_add_rt2node() has rejected it; when replacing, old
3253 * nexthops have been replaced by first new, the rest should
3254 * be added to it.
3255 */
3256 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3257 NLM_F_REPLACE);
3258 cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
3259 nhn++;
3260 }
3261
3262 /* success ... tell user about new route */
3263 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3264 goto cleanup;
3265
3266 add_errout:
3267 /* send notification for routes that were added so that
3268 * the delete notifications sent by ip6_route_del are
3269 * coherent
3270 */
3271 if (rt_notif)
3272 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3273
3274 /* Delete routes that were already added */
3275 list_for_each_entry(nh, &rt6_nh_list, next) {
3276 if (err_nh == nh)
3277 break;
3278 ip6_route_del(&nh->r_cfg, extack);
3279 }
3280
3281 cleanup:
3282 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3283 if (nh->rt6_info)
3284 dst_release_immediate(&nh->rt6_info->dst);
3285 kfree(nh->mxc.mx);
3286 list_del(&nh->next);
3287 kfree(nh);
3288 }
3289
3290 return err;
3291 }
3292
ip6_route_multipath_del(struct fib6_config * cfg,struct netlink_ext_ack * extack)3293 static int ip6_route_multipath_del(struct fib6_config *cfg,
3294 struct netlink_ext_ack *extack)
3295 {
3296 struct fib6_config r_cfg;
3297 struct rtnexthop *rtnh;
3298 int remaining;
3299 int attrlen;
3300 int err = 1, last_err = 0;
3301
3302 remaining = cfg->fc_mp_len;
3303 rtnh = (struct rtnexthop *)cfg->fc_mp;
3304
3305 /* Parse a Multipath Entry */
3306 while (rtnh_ok(rtnh, remaining)) {
3307 memcpy(&r_cfg, cfg, sizeof(*cfg));
3308 if (rtnh->rtnh_ifindex)
3309 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3310
3311 attrlen = rtnh_attrlen(rtnh);
3312 if (attrlen > 0) {
3313 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3314
3315 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3316 if (nla) {
3317 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3318 r_cfg.fc_flags |= RTF_GATEWAY;
3319 }
3320 }
3321 err = ip6_route_del(&r_cfg, extack);
3322 if (err)
3323 last_err = err;
3324
3325 rtnh = rtnh_next(rtnh, &remaining);
3326 }
3327
3328 return last_err;
3329 }
3330
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)3331 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3332 struct netlink_ext_ack *extack)
3333 {
3334 struct fib6_config cfg;
3335 int err;
3336
3337 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3338 if (err < 0)
3339 return err;
3340
3341 if (cfg.fc_mp)
3342 return ip6_route_multipath_del(&cfg, extack);
3343 else {
3344 cfg.fc_delete_all_nh = 1;
3345 return ip6_route_del(&cfg, extack);
3346 }
3347 }
3348
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)3349 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3350 struct netlink_ext_ack *extack)
3351 {
3352 struct fib6_config cfg;
3353 int err;
3354
3355 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3356 if (err < 0)
3357 return err;
3358
3359 if (cfg.fc_mp)
3360 return ip6_route_multipath_add(&cfg, extack);
3361 else
3362 return ip6_route_add(&cfg, extack);
3363 }
3364
rt6_nlmsg_size(struct rt6_info * rt)3365 static size_t rt6_nlmsg_size(struct rt6_info *rt)
3366 {
3367 int nexthop_len = 0;
3368
3369 if (rt->rt6i_nsiblings) {
3370 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3371 + NLA_ALIGN(sizeof(struct rtnexthop))
3372 + nla_total_size(16) /* RTA_GATEWAY */
3373 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3374
3375 nexthop_len *= rt->rt6i_nsiblings;
3376 }
3377
3378 return NLMSG_ALIGN(sizeof(struct rtmsg))
3379 + nla_total_size(16) /* RTA_SRC */
3380 + nla_total_size(16) /* RTA_DST */
3381 + nla_total_size(16) /* RTA_GATEWAY */
3382 + nla_total_size(16) /* RTA_PREFSRC */
3383 + nla_total_size(4) /* RTA_TABLE */
3384 + nla_total_size(4) /* RTA_IIF */
3385 + nla_total_size(4) /* RTA_OIF */
3386 + nla_total_size(4) /* RTA_PRIORITY */
3387 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3388 + nla_total_size(sizeof(struct rta_cacheinfo))
3389 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3390 + nla_total_size(1) /* RTA_PREF */
3391 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3392 + nexthop_len;
3393 }
3394
rt6_nexthop_info(struct sk_buff * skb,struct rt6_info * rt,unsigned int * flags,bool skip_oif)3395 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3396 unsigned int *flags, bool skip_oif)
3397 {
3398 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3399 *flags |= RTNH_F_LINKDOWN;
3400 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3401 *flags |= RTNH_F_DEAD;
3402 }
3403
3404 if (rt->rt6i_flags & RTF_GATEWAY) {
3405 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3406 goto nla_put_failure;
3407 }
3408
3409 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
3410 *flags |= RTNH_F_OFFLOAD;
3411
3412 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3413 if (!skip_oif && rt->dst.dev &&
3414 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3415 goto nla_put_failure;
3416
3417 if (rt->dst.lwtstate &&
3418 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3419 goto nla_put_failure;
3420
3421 return 0;
3422
3423 nla_put_failure:
3424 return -EMSGSIZE;
3425 }
3426
3427 /* add multipath next hop */
rt6_add_nexthop(struct sk_buff * skb,struct rt6_info * rt)3428 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3429 {
3430 struct rtnexthop *rtnh;
3431 unsigned int flags = 0;
3432
3433 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3434 if (!rtnh)
3435 goto nla_put_failure;
3436
3437 rtnh->rtnh_hops = 0;
3438 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3439
3440 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
3441 goto nla_put_failure;
3442
3443 rtnh->rtnh_flags = flags;
3444
3445 /* length of rtnetlink header + attributes */
3446 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3447
3448 return 0;
3449
3450 nla_put_failure:
3451 return -EMSGSIZE;
3452 }
3453
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 portid,u32 seq,unsigned int flags)3454 static int rt6_fill_node(struct net *net,
3455 struct sk_buff *skb, struct rt6_info *rt,
3456 struct in6_addr *dst, struct in6_addr *src,
3457 int iif, int type, u32 portid, u32 seq,
3458 unsigned int flags)
3459 {
3460 u32 metrics[RTAX_MAX];
3461 struct rtmsg *rtm;
3462 struct nlmsghdr *nlh;
3463 long expires;
3464 u32 table;
3465
3466 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3467 if (!nlh)
3468 return -EMSGSIZE;
3469
3470 rtm = nlmsg_data(nlh);
3471 rtm->rtm_family = AF_INET6;
3472 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3473 rtm->rtm_src_len = rt->rt6i_src.plen;
3474 rtm->rtm_tos = 0;
3475 if (rt->rt6i_table)
3476 table = rt->rt6i_table->tb6_id;
3477 else
3478 table = RT6_TABLE_UNSPEC;
3479 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
3480 if (nla_put_u32(skb, RTA_TABLE, table))
3481 goto nla_put_failure;
3482 if (rt->rt6i_flags & RTF_REJECT) {
3483 switch (rt->dst.error) {
3484 case -EINVAL:
3485 rtm->rtm_type = RTN_BLACKHOLE;
3486 break;
3487 case -EACCES:
3488 rtm->rtm_type = RTN_PROHIBIT;
3489 break;
3490 case -EAGAIN:
3491 rtm->rtm_type = RTN_THROW;
3492 break;
3493 default:
3494 rtm->rtm_type = RTN_UNREACHABLE;
3495 break;
3496 }
3497 }
3498 else if (rt->rt6i_flags & RTF_LOCAL)
3499 rtm->rtm_type = RTN_LOCAL;
3500 else if (rt->rt6i_flags & RTF_ANYCAST)
3501 rtm->rtm_type = RTN_ANYCAST;
3502 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3503 rtm->rtm_type = RTN_LOCAL;
3504 else
3505 rtm->rtm_type = RTN_UNICAST;
3506 rtm->rtm_flags = 0;
3507 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3508 rtm->rtm_protocol = rt->rt6i_protocol;
3509
3510 if (rt->rt6i_flags & RTF_CACHE)
3511 rtm->rtm_flags |= RTM_F_CLONED;
3512
3513 if (dst) {
3514 if (nla_put_in6_addr(skb, RTA_DST, dst))
3515 goto nla_put_failure;
3516 rtm->rtm_dst_len = 128;
3517 } else if (rtm->rtm_dst_len)
3518 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3519 goto nla_put_failure;
3520 #ifdef CONFIG_IPV6_SUBTREES
3521 if (src) {
3522 if (nla_put_in6_addr(skb, RTA_SRC, src))
3523 goto nla_put_failure;
3524 rtm->rtm_src_len = 128;
3525 } else if (rtm->rtm_src_len &&
3526 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3527 goto nla_put_failure;
3528 #endif
3529 if (iif) {
3530 #ifdef CONFIG_IPV6_MROUTE
3531 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3532 int err = ip6mr_get_route(net, skb, rtm, portid);
3533
3534 if (err == 0)
3535 return 0;
3536 if (err < 0)
3537 goto nla_put_failure;
3538 } else
3539 #endif
3540 if (nla_put_u32(skb, RTA_IIF, iif))
3541 goto nla_put_failure;
3542 } else if (dst) {
3543 struct in6_addr saddr_buf;
3544 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3545 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3546 goto nla_put_failure;
3547 }
3548
3549 if (rt->rt6i_prefsrc.plen) {
3550 struct in6_addr saddr_buf;
3551 saddr_buf = rt->rt6i_prefsrc.addr;
3552 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3553 goto nla_put_failure;
3554 }
3555
3556 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3557 if (rt->rt6i_pmtu)
3558 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3559 if (rtnetlink_put_metrics(skb, metrics) < 0)
3560 goto nla_put_failure;
3561
3562 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3563 goto nla_put_failure;
3564
3565 /* For multipath routes, walk the siblings list and add
3566 * each as a nexthop within RTA_MULTIPATH.
3567 */
3568 if (rt->rt6i_nsiblings) {
3569 struct rt6_info *sibling, *next_sibling;
3570 struct nlattr *mp;
3571
3572 mp = nla_nest_start(skb, RTA_MULTIPATH);
3573 if (!mp)
3574 goto nla_put_failure;
3575
3576 if (rt6_add_nexthop(skb, rt) < 0)
3577 goto nla_put_failure;
3578
3579 list_for_each_entry_safe(sibling, next_sibling,
3580 &rt->rt6i_siblings, rt6i_siblings) {
3581 if (rt6_add_nexthop(skb, sibling) < 0)
3582 goto nla_put_failure;
3583 }
3584
3585 nla_nest_end(skb, mp);
3586 } else {
3587 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
3588 goto nla_put_failure;
3589 }
3590
3591 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3592
3593 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3594 goto nla_put_failure;
3595
3596 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3597 goto nla_put_failure;
3598
3599
3600 nlmsg_end(skb, nlh);
3601 return 0;
3602
3603 nla_put_failure:
3604 nlmsg_cancel(skb, nlh);
3605 return -EMSGSIZE;
3606 }
3607
rt6_dump_route(struct rt6_info * rt,void * p_arg)3608 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3609 {
3610 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3611 struct net *net = arg->net;
3612
3613 if (rt == net->ipv6.ip6_null_entry)
3614 return 0;
3615
3616 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3617 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3618
3619 /* user wants prefix routes only */
3620 if (rtm->rtm_flags & RTM_F_PREFIX &&
3621 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3622 /* success since this is not a prefix route */
3623 return 1;
3624 }
3625 }
3626
3627 return rt6_fill_node(net,
3628 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3629 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3630 NLM_F_MULTI);
3631 }
3632
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)3633 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3634 struct netlink_ext_ack *extack)
3635 {
3636 struct net *net = sock_net(in_skb->sk);
3637 struct nlattr *tb[RTA_MAX+1];
3638 int err, iif = 0, oif = 0;
3639 struct dst_entry *dst;
3640 struct rt6_info *rt;
3641 struct sk_buff *skb;
3642 struct rtmsg *rtm;
3643 struct flowi6 fl6;
3644 bool fibmatch;
3645
3646 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3647 extack);
3648 if (err < 0)
3649 goto errout;
3650
3651 err = -EINVAL;
3652 memset(&fl6, 0, sizeof(fl6));
3653 rtm = nlmsg_data(nlh);
3654 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3655 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
3656
3657 if (tb[RTA_SRC]) {
3658 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3659 goto errout;
3660
3661 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3662 }
3663
3664 if (tb[RTA_DST]) {
3665 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3666 goto errout;
3667
3668 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3669 }
3670
3671 if (tb[RTA_IIF])
3672 iif = nla_get_u32(tb[RTA_IIF]);
3673
3674 if (tb[RTA_OIF])
3675 oif = nla_get_u32(tb[RTA_OIF]);
3676
3677 if (tb[RTA_MARK])
3678 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3679
3680 if (tb[RTA_UID])
3681 fl6.flowi6_uid = make_kuid(current_user_ns(),
3682 nla_get_u32(tb[RTA_UID]));
3683 else
3684 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3685
3686 if (iif) {
3687 struct net_device *dev;
3688 int flags = 0;
3689
3690 rcu_read_lock();
3691
3692 dev = dev_get_by_index_rcu(net, iif);
3693 if (!dev) {
3694 rcu_read_unlock();
3695 err = -ENODEV;
3696 goto errout;
3697 }
3698
3699 fl6.flowi6_iif = iif;
3700
3701 if (!ipv6_addr_any(&fl6.saddr))
3702 flags |= RT6_LOOKUP_F_HAS_SADDR;
3703
3704 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
3705
3706 rcu_read_unlock();
3707 } else {
3708 fl6.flowi6_oif = oif;
3709
3710 dst = ip6_route_output(net, NULL, &fl6);
3711 }
3712
3713
3714 rt = container_of(dst, struct rt6_info, dst);
3715 if (rt->dst.error) {
3716 err = rt->dst.error;
3717 ip6_rt_put(rt);
3718 goto errout;
3719 }
3720
3721 if (rt == net->ipv6.ip6_null_entry) {
3722 err = rt->dst.error;
3723 ip6_rt_put(rt);
3724 goto errout;
3725 }
3726
3727 if (fibmatch && rt->dst.from) {
3728 struct rt6_info *ort = container_of(rt->dst.from,
3729 struct rt6_info, dst);
3730
3731 dst_hold(&ort->dst);
3732 ip6_rt_put(rt);
3733 rt = ort;
3734 }
3735
3736 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3737 if (!skb) {
3738 ip6_rt_put(rt);
3739 err = -ENOBUFS;
3740 goto errout;
3741 }
3742
3743 skb_dst_set(skb, &rt->dst);
3744 if (fibmatch)
3745 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
3746 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3747 nlh->nlmsg_seq, 0);
3748 else
3749 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3750 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3751 nlh->nlmsg_seq, 0);
3752 if (err < 0) {
3753 kfree_skb(skb);
3754 goto errout;
3755 }
3756
3757 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3758 errout:
3759 return err;
3760 }
3761
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info,unsigned int nlm_flags)3762 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3763 unsigned int nlm_flags)
3764 {
3765 struct sk_buff *skb;
3766 struct net *net = info->nl_net;
3767 u32 seq;
3768 int err;
3769
3770 err = -ENOBUFS;
3771 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3772
3773 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3774 if (!skb)
3775 goto errout;
3776
3777 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3778 event, info->portid, seq, nlm_flags);
3779 if (err < 0) {
3780 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3781 WARN_ON(err == -EMSGSIZE);
3782 kfree_skb(skb);
3783 goto errout;
3784 }
3785 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3786 info->nlh, gfp_any());
3787 return;
3788 errout:
3789 if (err < 0)
3790 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3791 }
3792
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * ptr)3793 static int ip6_route_dev_notify(struct notifier_block *this,
3794 unsigned long event, void *ptr)
3795 {
3796 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3797 struct net *net = dev_net(dev);
3798
3799 if (!(dev->flags & IFF_LOOPBACK))
3800 return NOTIFY_OK;
3801
3802 if (event == NETDEV_REGISTER) {
3803 net->ipv6.ip6_null_entry->dst.dev = dev;
3804 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3805 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3806 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3807 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3808 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3809 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3810 #endif
3811 } else if (event == NETDEV_UNREGISTER &&
3812 dev->reg_state != NETREG_UNREGISTERED) {
3813 /* NETDEV_UNREGISTER could be fired for multiple times by
3814 * netdev_wait_allrefs(). Make sure we only call this once.
3815 */
3816 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
3817 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3818 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
3819 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3820 #endif
3821 }
3822
3823 return NOTIFY_OK;
3824 }
3825
3826 /*
3827 * /proc
3828 */
3829
3830 #ifdef CONFIG_PROC_FS
3831
3832 static const struct file_operations ipv6_route_proc_fops = {
3833 .owner = THIS_MODULE,
3834 .open = ipv6_route_open,
3835 .read = seq_read,
3836 .llseek = seq_lseek,
3837 .release = seq_release_net,
3838 };
3839
rt6_stats_seq_show(struct seq_file * seq,void * v)3840 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3841 {
3842 struct net *net = (struct net *)seq->private;
3843 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3844 net->ipv6.rt6_stats->fib_nodes,
3845 net->ipv6.rt6_stats->fib_route_nodes,
3846 net->ipv6.rt6_stats->fib_rt_alloc,
3847 net->ipv6.rt6_stats->fib_rt_entries,
3848 net->ipv6.rt6_stats->fib_rt_cache,
3849 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3850 net->ipv6.rt6_stats->fib_discarded_routes);
3851
3852 return 0;
3853 }
3854
rt6_stats_seq_open(struct inode * inode,struct file * file)3855 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3856 {
3857 return single_open_net(inode, file, rt6_stats_seq_show);
3858 }
3859
3860 static const struct file_operations rt6_stats_seq_fops = {
3861 .owner = THIS_MODULE,
3862 .open = rt6_stats_seq_open,
3863 .read = seq_read,
3864 .llseek = seq_lseek,
3865 .release = single_release_net,
3866 };
3867 #endif /* CONFIG_PROC_FS */
3868
3869 #ifdef CONFIG_SYSCTL
3870
3871 static
ipv6_sysctl_rtcache_flush(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3872 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3873 void __user *buffer, size_t *lenp, loff_t *ppos)
3874 {
3875 struct net *net;
3876 int delay;
3877 if (!write)
3878 return -EINVAL;
3879
3880 net = (struct net *)ctl->extra1;
3881 delay = net->ipv6.sysctl.flush_delay;
3882 proc_dointvec(ctl, write, buffer, lenp, ppos);
3883 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3884 return 0;
3885 }
3886
3887 struct ctl_table ipv6_route_table_template[] = {
3888 {
3889 .procname = "flush",
3890 .data = &init_net.ipv6.sysctl.flush_delay,
3891 .maxlen = sizeof(int),
3892 .mode = 0200,
3893 .proc_handler = ipv6_sysctl_rtcache_flush
3894 },
3895 {
3896 .procname = "gc_thresh",
3897 .data = &ip6_dst_ops_template.gc_thresh,
3898 .maxlen = sizeof(int),
3899 .mode = 0644,
3900 .proc_handler = proc_dointvec,
3901 },
3902 {
3903 .procname = "max_size",
3904 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
3905 .maxlen = sizeof(int),
3906 .mode = 0644,
3907 .proc_handler = proc_dointvec,
3908 },
3909 {
3910 .procname = "gc_min_interval",
3911 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3912 .maxlen = sizeof(int),
3913 .mode = 0644,
3914 .proc_handler = proc_dointvec_jiffies,
3915 },
3916 {
3917 .procname = "gc_timeout",
3918 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3919 .maxlen = sizeof(int),
3920 .mode = 0644,
3921 .proc_handler = proc_dointvec_jiffies,
3922 },
3923 {
3924 .procname = "gc_interval",
3925 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3926 .maxlen = sizeof(int),
3927 .mode = 0644,
3928 .proc_handler = proc_dointvec_jiffies,
3929 },
3930 {
3931 .procname = "gc_elasticity",
3932 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3933 .maxlen = sizeof(int),
3934 .mode = 0644,
3935 .proc_handler = proc_dointvec,
3936 },
3937 {
3938 .procname = "mtu_expires",
3939 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3940 .maxlen = sizeof(int),
3941 .mode = 0644,
3942 .proc_handler = proc_dointvec_jiffies,
3943 },
3944 {
3945 .procname = "min_adv_mss",
3946 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3947 .maxlen = sizeof(int),
3948 .mode = 0644,
3949 .proc_handler = proc_dointvec,
3950 },
3951 {
3952 .procname = "gc_min_interval_ms",
3953 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3954 .maxlen = sizeof(int),
3955 .mode = 0644,
3956 .proc_handler = proc_dointvec_ms_jiffies,
3957 },
3958 { }
3959 };
3960
ipv6_route_sysctl_init(struct net * net)3961 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3962 {
3963 struct ctl_table *table;
3964
3965 table = kmemdup(ipv6_route_table_template,
3966 sizeof(ipv6_route_table_template),
3967 GFP_KERNEL);
3968
3969 if (table) {
3970 table[0].data = &net->ipv6.sysctl.flush_delay;
3971 table[0].extra1 = net;
3972 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3973 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3974 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3975 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3976 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3977 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3978 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3979 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3980 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3981
3982 /* Don't export sysctls to unprivileged users */
3983 if (net->user_ns != &init_user_ns)
3984 table[0].procname = NULL;
3985 }
3986
3987 return table;
3988 }
3989 #endif
3990
ip6_route_net_init(struct net * net)3991 static int __net_init ip6_route_net_init(struct net *net)
3992 {
3993 int ret = -ENOMEM;
3994
3995 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3996 sizeof(net->ipv6.ip6_dst_ops));
3997
3998 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3999 goto out_ip6_dst_ops;
4000
4001 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4002 sizeof(*net->ipv6.ip6_null_entry),
4003 GFP_KERNEL);
4004 if (!net->ipv6.ip6_null_entry)
4005 goto out_ip6_dst_entries;
4006 net->ipv6.ip6_null_entry->dst.path =
4007 (struct dst_entry *)net->ipv6.ip6_null_entry;
4008 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4009 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4010 ip6_template_metrics, true);
4011
4012 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4013 net->ipv6.fib6_has_custom_rules = false;
4014 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4015 sizeof(*net->ipv6.ip6_prohibit_entry),
4016 GFP_KERNEL);
4017 if (!net->ipv6.ip6_prohibit_entry)
4018 goto out_ip6_null_entry;
4019 net->ipv6.ip6_prohibit_entry->dst.path =
4020 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
4021 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4022 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4023 ip6_template_metrics, true);
4024
4025 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4026 sizeof(*net->ipv6.ip6_blk_hole_entry),
4027 GFP_KERNEL);
4028 if (!net->ipv6.ip6_blk_hole_entry)
4029 goto out_ip6_prohibit_entry;
4030 net->ipv6.ip6_blk_hole_entry->dst.path =
4031 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
4032 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4033 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4034 ip6_template_metrics, true);
4035 #endif
4036
4037 net->ipv6.sysctl.flush_delay = 0;
4038 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4039 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4040 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4041 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4042 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4043 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4044 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4045
4046 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4047
4048 ret = 0;
4049 out:
4050 return ret;
4051
4052 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4053 out_ip6_prohibit_entry:
4054 kfree(net->ipv6.ip6_prohibit_entry);
4055 out_ip6_null_entry:
4056 kfree(net->ipv6.ip6_null_entry);
4057 #endif
4058 out_ip6_dst_entries:
4059 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4060 out_ip6_dst_ops:
4061 goto out;
4062 }
4063
ip6_route_net_exit(struct net * net)4064 static void __net_exit ip6_route_net_exit(struct net *net)
4065 {
4066 kfree(net->ipv6.ip6_null_entry);
4067 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4068 kfree(net->ipv6.ip6_prohibit_entry);
4069 kfree(net->ipv6.ip6_blk_hole_entry);
4070 #endif
4071 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4072 }
4073
ip6_route_net_init_late(struct net * net)4074 static int __net_init ip6_route_net_init_late(struct net *net)
4075 {
4076 #ifdef CONFIG_PROC_FS
4077 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4078 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
4079 #endif
4080 return 0;
4081 }
4082
ip6_route_net_exit_late(struct net * net)4083 static void __net_exit ip6_route_net_exit_late(struct net *net)
4084 {
4085 #ifdef CONFIG_PROC_FS
4086 remove_proc_entry("ipv6_route", net->proc_net);
4087 remove_proc_entry("rt6_stats", net->proc_net);
4088 #endif
4089 }
4090
4091 static struct pernet_operations ip6_route_net_ops = {
4092 .init = ip6_route_net_init,
4093 .exit = ip6_route_net_exit,
4094 };
4095
ipv6_inetpeer_init(struct net * net)4096 static int __net_init ipv6_inetpeer_init(struct net *net)
4097 {
4098 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4099
4100 if (!bp)
4101 return -ENOMEM;
4102 inet_peer_base_init(bp);
4103 net->ipv6.peers = bp;
4104 return 0;
4105 }
4106
ipv6_inetpeer_exit(struct net * net)4107 static void __net_exit ipv6_inetpeer_exit(struct net *net)
4108 {
4109 struct inet_peer_base *bp = net->ipv6.peers;
4110
4111 net->ipv6.peers = NULL;
4112 inetpeer_invalidate_tree(bp);
4113 kfree(bp);
4114 }
4115
4116 static struct pernet_operations ipv6_inetpeer_ops = {
4117 .init = ipv6_inetpeer_init,
4118 .exit = ipv6_inetpeer_exit,
4119 };
4120
4121 static struct pernet_operations ip6_route_net_late_ops = {
4122 .init = ip6_route_net_init_late,
4123 .exit = ip6_route_net_exit_late,
4124 };
4125
4126 static struct notifier_block ip6_route_dev_notifier = {
4127 .notifier_call = ip6_route_dev_notify,
4128 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4129 };
4130
ip6_route_init_special_entries(void)4131 void __init ip6_route_init_special_entries(void)
4132 {
4133 /* Registering of the loopback is done before this portion of code,
4134 * the loopback reference in rt6_info will not be taken, do it
4135 * manually for init_net */
4136 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4137 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4138 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4139 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4140 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4141 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4142 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4143 #endif
4144 }
4145
ip6_route_init(void)4146 int __init ip6_route_init(void)
4147 {
4148 int ret;
4149 int cpu;
4150
4151 ret = -ENOMEM;
4152 ip6_dst_ops_template.kmem_cachep =
4153 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4154 SLAB_HWCACHE_ALIGN, NULL);
4155 if (!ip6_dst_ops_template.kmem_cachep)
4156 goto out;
4157
4158 ret = dst_entries_init(&ip6_dst_blackhole_ops);
4159 if (ret)
4160 goto out_kmem_cache;
4161
4162 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4163 if (ret)
4164 goto out_dst_entries;
4165
4166 ret = register_pernet_subsys(&ip6_route_net_ops);
4167 if (ret)
4168 goto out_register_inetpeer;
4169
4170 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4171
4172 ret = fib6_init();
4173 if (ret)
4174 goto out_register_subsys;
4175
4176 ret = xfrm6_init();
4177 if (ret)
4178 goto out_fib6_init;
4179
4180 ret = fib6_rules_init();
4181 if (ret)
4182 goto xfrm6_init;
4183
4184 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4185 if (ret)
4186 goto fib6_rules_init;
4187
4188 ret = -ENOBUFS;
4189 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4190 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
4191 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4192 RTNL_FLAG_DOIT_UNLOCKED))
4193 goto out_register_late_subsys;
4194
4195 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4196 if (ret)
4197 goto out_register_late_subsys;
4198
4199 for_each_possible_cpu(cpu) {
4200 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4201
4202 INIT_LIST_HEAD(&ul->head);
4203 spin_lock_init(&ul->lock);
4204 }
4205
4206 out:
4207 return ret;
4208
4209 out_register_late_subsys:
4210 unregister_pernet_subsys(&ip6_route_net_late_ops);
4211 fib6_rules_init:
4212 fib6_rules_cleanup();
4213 xfrm6_init:
4214 xfrm6_fini();
4215 out_fib6_init:
4216 fib6_gc_cleanup();
4217 out_register_subsys:
4218 unregister_pernet_subsys(&ip6_route_net_ops);
4219 out_register_inetpeer:
4220 unregister_pernet_subsys(&ipv6_inetpeer_ops);
4221 out_dst_entries:
4222 dst_entries_destroy(&ip6_dst_blackhole_ops);
4223 out_kmem_cache:
4224 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4225 goto out;
4226 }
4227
ip6_route_cleanup(void)4228 void ip6_route_cleanup(void)
4229 {
4230 unregister_netdevice_notifier(&ip6_route_dev_notifier);
4231 unregister_pernet_subsys(&ip6_route_net_late_ops);
4232 fib6_rules_cleanup();
4233 xfrm6_fini();
4234 fib6_gc_cleanup();
4235 unregister_pernet_subsys(&ipv6_inetpeer_ops);
4236 unregister_pernet_subsys(&ip6_route_net_ops);
4237 dst_entries_destroy(&ip6_dst_blackhole_ops);
4238 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4239 }
4240