1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Based on net/ipv4/route.c
4 * Authors: Ross Biro
5 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
6 * Alan Cox, <gw4pts@gw4pts.ampr.org>
7 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
8 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * Fixes:
11 * Alan Cox : Verify area fixes.
12 * Alan Cox : cli() protects routing changes
13 * Rui Oliveira : ICMP routing table updates
14 * (rco@di.uminho.pt) Routing table insertion and update
15 * Linus Torvalds : Rewrote bits to be sensible
16 * Alan Cox : Added BSD route gw semantics
17 * Alan Cox : Super /proc >4K
18 * Alan Cox : MTU in route table
19 * Alan Cox : MSS actually. Also added the window
20 * clamper.
21 * Sam Lantinga : Fixed route matching in rt_del()
22 * Alan Cox : Routing cache support.
23 * Alan Cox : Removed compatibility cruft.
24 * Alan Cox : RTF_REJECT support.
25 * Alan Cox : TCP irtt support.
26 * Jonathan Naylor : Added Metric support.
27 * Miquel van Smoorenburg : BSD API fixes.
28 * Miquel van Smoorenburg : Metrics.
29 * Alan Cox : Use __u32 properly
30 * Alan Cox : Aligned routing errors more closely with BSD
31 * our system is still very different.
32 * Alan Cox : Faster /proc handling
33 * Alexey Kuznetsov : Massive rework to support tree based routing,
34 * routing caches and better behaviour.
35 *
36 * Olaf Erb : irtt wasn't being copied right.
37 * Bjorn Ekwall : Kerneld route support.
38 * Alan Cox : Multicast fixed (I hope)
39 * Pavel Krauz : Limited broadcast fixed
40 * Mike McLagan : Routing by source
41 * Alexey Kuznetsov : End of old history. Split to fib.c and
42 * route.c and rewritten from scratch.
43 * Andi Kleen : Load-limit warning messages.
44 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
45 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
46 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
47 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
48 * Marc Boucher : routing by fwmark
49 * Robert Olsson : Added rt_cache statistics
50 * Arnaldo C. Melo : Convert proc stuff to seq_file
51 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
52 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
53 * Ilia Sotnikov : Removed TOS from hash calculations
54 *
55 * Based on net/ipv6/route.c
56 * Authors:
57 * Pedro Roque <roque@di.fc.ul.pt>
58 *
59 * Changes:
60 *
61 * YOSHIFUJI Hideaki @USAGI
62 * reworked default router selection.
63 * - respect outgoing interface
64 * - select from (probably) reachable routers (i.e.
65 * routers in REACHABLE, STALE, DELAY or PROBE states).
66 * - always select the same router if it is (probably)
67 * reachable. otherwise, round-robin the list.
68 * Ville Nuorvala
69 * Fixed routing subtrees.
70 *
71 * NewIP INET
72 * An implementation of the TCP/IP protocol suite for the LINUX
73 * operating system. NewIP INET is implemented using the BSD Socket
74 * interface as the means of communication with the user level.
75 *
76 * ROUTE - implementation of the NewIP router.
77 */
78 #define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__
79
80 #include <linux/module.h>
81 #include <linux/netdevice.h>
82 #include <linux/socket.h>
83 #include <linux/in.h>
84 #include <linux/kernel.h>
85 #include <linux/sockios.h>
86 #include <linux/string.h>
87 #include <linux/vmalloc.h>
88 #include <linux/capability.h>
89 #include <linux/proc_fs.h>
90
91 #include <net/sock.h>
92 #include <net/udp.h>
93 #include <net/inet_common.h>
94 #include <net/protocol.h>
95 #include <net/dst.h>
96 #include <net/lwtunnel.h>
97 #include <linux/uaccess.h> /* copy_from_user() */
98 #include <linux/rtnetlink.h> /* rtnl_lock() */
99 #include <linux/inetdevice.h>
100
101 #include <net/nip_route.h>
102 #include <net/nip_fib.h>
103 #include <net/nip_addrconf.h>
104 #include <net/nndisc.h>
105 #include <net/nip.h>
106
107 #include <linux/newip_route.h>
108 #include "nip_hdr.h"
109 #include "tcp_nip_parameter.h"
110
111 static int nip_pkt_discard(struct sk_buff *skb);
112 static int nip_pkt_discard_out(struct net *net, struct sock *sk,
113 struct sk_buff *skb);
114 static unsigned int nip_mtu(const struct dst_entry *dst);
115
116 static const struct nip_rt_info nip_null_entry_template = {
117 .dst = {
118 .__refcnt = ATOMIC_INIT(1),
119 .__use = 1,
120 .obsolete = DST_OBSOLETE_FORCE_CHK,
121 .error = -ENETUNREACH,
122 .input = nip_pkt_discard,
123 .output = nip_pkt_discard_out,
124 },
125 .rt_ref = ATOMIC_INIT(1),
126 };
127
128 static const struct nip_rt_info nip_broadcast_entry_template = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = DST_OBSOLETE_FORCE_CHK,
133 .input = nip_input,
134 .output = nip_output,
135 },
136 .rt_ref = ATOMIC_INIT(1),
137 };
138
nip_nexthop(struct nip_rt_info * rt,struct nip_addr * daddr)139 struct nip_addr *nip_nexthop(struct nip_rt_info *rt, struct nip_addr *daddr)
140 {
141 if (rt->rt_flags & RTF_GATEWAY)
142 return &rt->gateway;
143 else
144 return daddr;
145 }
146
rtmsg_to_fibni_config(struct net * net,struct nip_rtmsg * rtmsg,struct nip_fib_config * cfg)147 static void rtmsg_to_fibni_config(struct net *net, struct nip_rtmsg *rtmsg,
148 struct nip_fib_config *cfg)
149 {
150 memset(cfg, 0, sizeof(*cfg));
151
152 cfg->fc_table = NIP_RT_TABLE_MAIN;
153 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
154 cfg->fc_metric = rtmsg->rtmsg_metric;
155 cfg->fc_expires = rtmsg->rtmsg_info;
156
157 cfg->fc_flags = rtmsg->rtmsg_flags;
158
159 cfg->fc_nlinfo.nl_net = net;
160
161 cfg->fc_dst = rtmsg->rtmsg_dst;
162 cfg->fc_src = rtmsg->rtmsg_src;
163 cfg->fc_gateway = rtmsg->rtmsg_gateway;
164 }
165
nip_rt_info_init(struct nip_rt_info * rt)166 static void nip_rt_info_init(struct nip_rt_info *rt)
167 {
168 struct dst_entry *dst = &rt->dst;
169
170 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
171 rt->from = NULL;
172 }
173
__nip_dst_alloc(struct net * net,struct net_device * dev,int flags)174 static struct nip_rt_info *__nip_dst_alloc(struct net *net,
175 struct net_device *dev, int flags)
176 {
177 struct nip_rt_info *rt =
178 dst_alloc(&net->newip.nip_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
179 flags);
180
181 if (rt)
182 nip_rt_info_init(rt);
183
184 return rt;
185 }
186
nip_dst_alloc(struct net * net,struct net_device * dev,int flags)187 struct nip_rt_info *nip_dst_alloc(struct net *net, struct net_device *dev,
188 int flags)
189 {
190 struct nip_rt_info *rt = __nip_dst_alloc(net, dev, flags);
191
192 if (rt) {
193 rt->rt_pcpu =
194 alloc_percpu_gfp(struct nip_rt_info *, GFP_ATOMIC);
195 if (rt->rt_pcpu) {
196 int cpu;
197
198 for_each_possible_cpu(cpu) {
199 struct nip_rt_info **p;
200
201 p = per_cpu_ptr(rt->rt_pcpu, cpu);
202 /* no one shares rt */
203 *p = NULL;
204 }
205 } else {
206 dst_destroy((struct dst_entry *)rt);
207 return NULL;
208 }
209 }
210
211 return rt;
212 }
213
nip_rt_dst_from_metrics_check(struct nip_rt_info * rt)214 static void nip_rt_dst_from_metrics_check(struct nip_rt_info *rt)
215 {
216 if (rt->from &&
217 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->from))
218 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->from), true);
219 }
220
nip_rt_get_pcpu_route(struct nip_rt_info * rt)221 static struct nip_rt_info *nip_rt_get_pcpu_route(struct nip_rt_info *rt)
222 {
223 struct nip_rt_info *pcpu_rt, **p;
224
225 p = this_cpu_ptr(rt->rt_pcpu);
226 pcpu_rt = *p;
227
228 if (pcpu_rt) {
229 dst_hold(&pcpu_rt->dst);
230 nip_rt_dst_from_metrics_check(pcpu_rt);
231 }
232 return pcpu_rt;
233 }
234
nip_rt_set_from(struct nip_rt_info * rt,struct nip_rt_info * from)235 static void nip_rt_set_from(struct nip_rt_info *rt, struct nip_rt_info *from)
236 {
237 WARN_ON(from->from);
238
239 rt->rt_flags &= ~RTF_EXPIRES;
240 dst_hold(&from->dst);
241 rt->from = &from->dst;
242 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
243 }
244
nip_rt_copy_init(struct nip_rt_info * rt,struct nip_rt_info * ort)245 static void nip_rt_copy_init(struct nip_rt_info *rt, struct nip_rt_info *ort)
246 {
247 rt->dst.input = ort->dst.input;
248 rt->dst.output = ort->dst.output;
249 rt->rt_dst = ort->rt_dst;
250 rt->dst.error = ort->dst.error;
251 rt->rt_idev = ort->rt_idev;
252 if (rt->rt_idev)
253 nin_dev_hold(rt->rt_idev);
254
255 rt->dst.lastuse = jiffies;
256 rt->gateway = ort->gateway;
257 rt->rt_flags = ort->rt_flags;
258 nip_rt_set_from(rt, ort);
259 rt->rt_metric = ort->rt_metric;
260 rt->rt_table = ort->rt_table;
261 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
262 }
263
nip_rt_pcpu_alloc(struct nip_rt_info * rt)264 static struct nip_rt_info *nip_rt_pcpu_alloc(struct nip_rt_info *rt)
265 {
266 struct nip_rt_info *pcpu_rt;
267
268 pcpu_rt = __nip_dst_alloc(dev_net(rt->dst.dev),
269 rt->dst.dev, rt->dst.flags);
270 if (!pcpu_rt)
271 return NULL;
272 nip_rt_copy_init(pcpu_rt, rt);
273 pcpu_rt->rt_protocol = rt->rt_protocol;
274 pcpu_rt->rt_flags |= RTF_PCPU;
275 return pcpu_rt;
276 }
277
nip_rt_make_pcpu_route(struct nip_rt_info * rt)278 static struct nip_rt_info *nip_rt_make_pcpu_route(struct nip_rt_info *rt)
279 {
280 struct nip_rt_info *pcpu_rt, *prev;
281
282 pcpu_rt = nip_rt_pcpu_alloc(rt);
283 if (!pcpu_rt) {
284 struct net *net = dev_net(rt->dst.dev);
285
286 dst_hold(&net->newip.nip_null_entry->dst);
287 return net->newip.nip_null_entry;
288 }
289
290 rcu_read_lock_bh();
291 if (rt->rt_pcpu) {
292 struct nip_rt_info **p = this_cpu_ptr(rt->rt_pcpu);
293
294 prev = cmpxchg(p, NULL, pcpu_rt);
295 if (prev) {
296 /* If someone did it before us, return prev instead */
297 dst_destroy(&pcpu_rt->dst);
298 pcpu_rt = prev;
299 }
300 } else {
301 dst_destroy(&pcpu_rt->dst);
302 pcpu_rt = rt;
303 }
304 dst_hold(&pcpu_rt->dst);
305 nip_rt_dst_from_metrics_check(pcpu_rt);
306 rcu_read_unlock_bh();
307 return pcpu_rt;
308 }
309
nip_pol_route_input(struct net * net,struct nip_fib_table * table,struct flow_nip * fln,int flags)310 static struct nip_rt_info *nip_pol_route_input(struct net *net,
311 struct nip_fib_table *table,
312 struct flow_nip *fln, int flags)
313 {
314 return nip_pol_route(net, table, fln->FLOWIN_IIF, fln, flags);
315 }
316
nip_route_input_lookup(struct net * net,struct net_device * dev,struct flow_nip * fln,int flags,int * tbl_type)317 struct dst_entry *nip_route_input_lookup(struct net *net,
318 struct net_device *dev,
319 struct flow_nip *fln, int flags, int *tbl_type)
320 {
321 return nip_fib_rule_lookup(net, fln, flags, tbl_type, nip_pol_route_input);
322 }
323
nip_route_input(struct sk_buff * skb)324 int nip_route_input(struct sk_buff *skb)
325 {
326 struct net *net = dev_net(skb->dev);
327 int flags = 0;
328 struct flow_nip fln = {
329 .FLOWIN_IIF = skb->skb_iif,
330 .daddr = nipcb(skb)->dstaddr,
331 .saddr = nipcb(skb)->srcaddr,
332 };
333 struct dst_entry *out_dst;
334 int tbl_type = 0;
335
336 if (nip_addr_eq(&fln.daddr, &nip_broadcast_addr_arp)) {
337 nip_dbg("recv broadcast packet");
338 dst_hold(&net->newip.nip_broadcast_entry->dst);
339 skb_dst_set(skb,
340 (struct dst_entry *)net->newip.nip_broadcast_entry);
341 return 0;
342 }
343
344 out_dst = nip_route_input_lookup(net, skb->dev, &fln, flags, &tbl_type);
345 skb_dst_set(skb, out_dst);
346
347 if (tbl_type == RT_TABLE_MAIN) {
348 struct ninet_dev *nin_dev = rcu_dereference(skb->dev->nip_ptr);
349 struct ninet_dev *nout_dev = rcu_dereference(out_dst->dev->nip_ptr);
350
351 /* When global variable ipv4 all/send_redirects or
352 * corresponding network/send_redirects is 1,
353 * IN_DEV_TX_REDIRECTS() conditions are valid.
354 * send_redirects default is 1.
355 */
356 if (nin_dev == nout_dev &&
357 IN_DEV_TX_REDIRECTS(rcu_dereference(out_dst->dev->ip_ptr))) {
358 nip_dbg("The inlet and outlet are the same");
359 return 1;
360 }
361 }
362 return 0;
363 }
364
nip_pol_route_output(struct net * net,struct nip_fib_table * table,struct flow_nip * fln,int flags)365 static struct nip_rt_info *nip_pol_route_output(struct net *net,
366 struct nip_fib_table *table,
367 struct flow_nip *fln, int flags)
368 {
369 return nip_pol_route(net, table, fln->FLOWIN_OIF, fln, flags);
370 }
371
nip_route_output_flags(struct net * net,const struct sock * sk,struct flow_nip * fln,int flags)372 struct dst_entry *nip_route_output_flags(struct net *net, const struct sock *sk,
373 struct flow_nip *fln, int flags)
374 {
375 struct dst_entry *dst;
376 struct nip_rt_info *rt;
377 int tbl_type = 0;
378
379 dst = nip_fib_rule_lookup(net, fln, flags, &tbl_type, nip_pol_route_output);
380 rt = (struct nip_rt_info *)dst;
381
382 if (!(rt->rt_flags & RTF_LOCAL))
383 return dst;
384
385 rcu_read_lock();
386 if (rt->rt_idev) {
387 read_lock_bh(&rt->rt_idev->lock);
388 /* search saddr in idev->addr */
389 if (!list_empty(&rt->rt_idev->addr_list)) {
390 struct ninet_ifaddr *ifp;
391
392 list_for_each_entry(ifp, &rt->rt_idev->addr_list, if_list) {
393 fln->saddr = ifp->addr;
394 break;
395 }
396 }
397 read_unlock_bh(&rt->rt_idev->lock);
398 }
399 rcu_read_unlock();
400
401 dst_release(dst);
402 dst_hold(&net->newip.nip_broadcast_entry->dst);
403 return &net->newip.nip_broadcast_entry->dst;
404 }
405
nip_pol_route(struct net * net,struct nip_fib_table * table,int oif,struct flow_nip * fln,int flags)406 struct nip_rt_info *nip_pol_route(struct net *net, struct nip_fib_table *table,
407 int oif, struct flow_nip *fln, int flags)
408 {
409 struct nip_fib_node *fn;
410 struct nip_rt_info *rt, *pcpu_rt;
411
412 rcu_read_lock_bh();
413 fn = nip_fib_locate(table->nip_tb_head, &fln->daddr);
414 if (!fn) {
415 rcu_read_unlock_bh();
416 nip_dbg("search fail");
417 rt = net->newip.nip_null_entry;
418 dst_hold(&rt->dst);
419 return rt;
420 }
421 rt = fn->nip_route_info;
422
423 /* Get a percpu copy */
424 rt->dst.lastuse = jiffies;
425 rt->dst.__use++;
426 pcpu_rt = nip_rt_get_pcpu_route(rt);
427 nip_dbg("cpu id=%d", smp_processor_id());
428 if (pcpu_rt) {
429 rcu_read_unlock_bh();
430 nip_dbg("pcpu found");
431 } else {
432 dst_hold(&rt->dst);
433 rcu_read_unlock_bh();
434 pcpu_rt = nip_rt_make_pcpu_route(rt);
435 dst_release(&rt->dst);
436 }
437
438 nip_dbg("rt dst.__refcnt=%d, pcpu dst.__refcnt=%d",
439 atomic_read(&rt->dst.__refcnt),
440 atomic_read(&pcpu_rt->dst.__refcnt));
441 return pcpu_rt;
442 }
443
nip_bind_addr_check(struct net * net,struct nip_addr * addr)444 bool nip_bind_addr_check(struct net *net,
445 struct nip_addr *addr)
446 {
447 struct nip_fib_node *fn;
448 struct nip_fib_table *fib_tbl = net->newip.nip_fib_local_tbl;
449
450 if (nip_addr_invalid(addr)) {
451 nip_dbg("binding-addr invalid, bitlen=%u", addr->bitlen);
452 return false;
453 }
454
455 if (nip_addr_eq(addr, &nip_any_addr)) {
456 nip_dbg("binding-addr is any addr");
457 return true;
458 }
459
460 rcu_read_lock_bh();
461 fn = nip_fib_locate(fib_tbl->nip_tb_head, addr);
462 rcu_read_unlock_bh();
463 if (!fn) {
464 nip_dbg("binding-addr is not local addr");
465 return false;
466 }
467
468 nip_dbg("binding-addr is local addr");
469 return true;
470 }
471
nip_route_info_create(struct nip_fib_config * cfg)472 static struct nip_rt_info *nip_route_info_create(struct nip_fib_config *cfg)
473 {
474 struct net *net = cfg->fc_nlinfo.nl_net;
475 struct nip_rt_info *rt = NULL;
476 struct net_device *dev = NULL;
477 struct ninet_dev *idev = NULL;
478 struct nip_fib_table *table;
479 int err = -ENODEV;
480
481 /* find net_device */
482 dev = dev_get_by_index(net, cfg->fc_ifindex);
483 if (!dev) {
484 nip_dbg("fail to get dev by ifindex(%u)", cfg->fc_ifindex);
485 goto out;
486 }
487
488 /* find ninet_dev,which has the newip address list */
489 idev = nin_dev_get(dev);
490 if (!idev) {
491 nip_dbg("fail to get ninet dev (ifindex=%u)", cfg->fc_ifindex);
492 goto out;
493 }
494 /* Do not add a route when the network port is not running
495 * to avoid incorrect route selection
496 */
497 if (!netif_running(idev->dev)) {
498 nip_dbg("network interface is not running");
499 goto out;
500 }
501 if (cfg->fc_metric == 0)
502 cfg->fc_metric = NIP_RT_PRIO_USER;
503
504 err = -ENOBUFS;
505 table = nip_fib_get_table(net, cfg->fc_table);
506 if (!table) {
507 nip_dbg("fail to get fib table (fc_table=%u)", cfg->fc_table);
508 goto out;
509 }
510
511 rt = nip_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
512 if (!rt) {
513 nip_dbg("fail to alloc dst mem");
514 err = -ENOMEM;
515 goto out;
516 }
517
518 nip_rt_clean_expires(rt);
519
520 if (cfg->fc_protocol == RTPROT_UNSPEC)
521 cfg->fc_protocol = RTPROT_BOOT;
522 rt->rt_protocol = cfg->fc_protocol;
523
524 if (cfg->fc_flags & RTF_LOCAL) {
525 rt->dst.input = nip_input;
526 nip_dbg("rt->dst.input=nip_input, ifindex=%u", cfg->fc_ifindex);
527 } else {
528 rt->dst.input = nip_forward;
529 nip_dbg("rt->dst.input=nip_forward, ifindex=%u", cfg->fc_ifindex);
530 }
531
532 rt->dst.output = nip_output;
533 rt->rt_dst = cfg->fc_dst;
534 rt->rt_src = cfg->fc_src;
535 rt->rt_metric = cfg->fc_metric;
536
537 if (cfg->fc_flags & RTF_GATEWAY)
538 rt->gateway = cfg->fc_gateway;
539 else
540 rt->gateway = nip_any_addr;
541
542 rt->rt_flags = cfg->fc_flags;
543 rt->dst.dev = dev;
544 rt->rt_idev = idev;
545 rt->rt_table = table;
546
547 return rt;
548 out:
549 if (dev)
550 dev_put(dev);
551 if (idev)
552 nin_dev_put(idev);
553 return ERR_PTR(err);
554 }
555
556 /* __nip_ins_rt is called with FREE table->nip_tb_lock.
557 * It takes new route entry, the addition fails by any reason the
558 * route is released.
559 */
__nip_ins_rt(struct nip_rt_info * rt)560 static int __nip_ins_rt(struct nip_rt_info *rt)
561 {
562 int err;
563 struct nip_fib_table *table;
564
565 table = rt->rt_table;
566
567 spin_lock_bh(&table->nip_tb_lock);
568 err = nip_fib_add(table, rt);
569 spin_unlock_bh(&table->nip_tb_lock);
570
571 return err;
572 }
573
nip_ins_rt(struct nip_rt_info * rt)574 int nip_ins_rt(struct nip_rt_info *rt)
575 {
576 /* Hold dst to account for the reference from the nip fib hash */
577 dst_hold(&rt->dst);
578 return __nip_ins_rt(rt);
579 }
580
nip_route_add(struct nip_fib_config * cfg)581 int nip_route_add(struct nip_fib_config *cfg)
582 {
583 struct nip_rt_info *rt;
584 int err;
585
586 rt = nip_route_info_create(cfg);
587 if (IS_ERR(rt)) {
588 nip_dbg("fail to creat route info");
589 err = PTR_ERR(rt);
590 rt = NULL;
591 goto out;
592 }
593
594 err = __nip_ins_rt(rt);
595 out:
596 return err;
597 }
598
__nip_del_rt(struct nip_rt_info * rt,struct nl_info * info)599 static int __nip_del_rt(struct nip_rt_info *rt, struct nl_info *info)
600 {
601 int err;
602 struct nip_fib_table *table;
603 struct net *net = dev_net(rt->dst.dev);
604
605 if (rt == net->newip.nip_null_entry) {
606 err = -ENOENT;
607 goto out;
608 }
609
610 table = rt->rt_table;
611 spin_lock_bh(&table->nip_tb_lock);
612 err = nip_fib_del(rt, info);
613 spin_unlock_bh(&table->nip_tb_lock);
614
615 out:
616 nip_rt_put(rt);
617 return err;
618 }
619
nip_del_rt(struct nip_rt_info * rt)620 int nip_del_rt(struct nip_rt_info *rt)
621 {
622 struct nl_info info = {
623 .nl_net = dev_net(rt->dst.dev),
624 };
625 return __nip_del_rt(rt, &info);
626 }
627
nip_route_del(struct nip_fib_config * cfg)628 static int nip_route_del(struct nip_fib_config *cfg)
629 {
630 struct net *net = cfg->fc_nlinfo.nl_net;
631 struct nip_fib_table *table;
632 struct nip_fib_node *fn;
633 struct nip_rt_info *rt;
634 int err = -ESRCH;
635
636 table = nip_fib_get_table(net, cfg->fc_table);
637 if (!table)
638 return err;
639
640 rcu_read_lock_bh();
641 fn = nip_fib_locate(table->nip_tb_head, &cfg->fc_dst);
642 if (fn) {
643 rt = fn->nip_route_info;
644 dst_hold(&rt->dst);
645 rcu_read_unlock_bh();
646
647 return __nip_del_rt(rt, &cfg->fc_nlinfo);
648 }
649 rcu_read_unlock_bh();
650
651 return err;
652 }
653
nip_route_ioctl(struct net * net,unsigned int cmd,struct nip_rtmsg * rtmsg)654 int nip_route_ioctl(struct net *net, unsigned int cmd, struct nip_rtmsg *rtmsg)
655 {
656 struct nip_fib_config cfg;
657 int err;
658
659 rtmsg_to_fibni_config(net, rtmsg, &cfg);
660 if (nip_addr_invalid(&cfg.fc_dst)) {
661 nip_dbg("nip daddr invalid, bitlen=%u", cfg.fc_dst.bitlen);
662 return -EFAULT;
663 }
664
665 if (cfg.fc_flags & RTF_GATEWAY) {
666 if (nip_addr_invalid(&cfg.fc_gateway)) {
667 nip_dbg("nip gateway daddr invalid, bitlen=%u",
668 cfg.fc_gateway.bitlen);
669 return -EFAULT;
670 }
671 }
672
673 rtnl_lock();
674 switch (cmd) {
675 case SIOCADDRT: /* Add a route */
676 err = nip_route_add(&cfg);
677 break;
678 case SIOCDELRT: /* Delete a route */
679 err = nip_route_del(&cfg);
680 break;
681 default:
682 err = -EINVAL;
683 }
684 rtnl_unlock();
685
686 return err;
687 }
688
nip_dst_destroy(struct dst_entry * dst)689 static void nip_dst_destroy(struct dst_entry *dst)
690 {
691 struct nip_rt_info *rt = (struct nip_rt_info *)dst;
692 struct dst_entry *from = rt->from;
693 struct ninet_dev *idev;
694
695 dst_destroy_metrics_generic(dst);
696 free_percpu(rt->rt_pcpu);
697
698 idev = rt->rt_idev;
699 if (idev) {
700 rt->rt_idev = NULL;
701 nip_dbg("idev->refcnt=%u", refcount_read(&idev->refcnt));
702 nin_dev_put(idev);
703 }
704
705 if (from)
706 nip_dbg("from->__refcnt=%d", atomic_read(&from->__refcnt));
707 rt->from = NULL;
708 dst_release(from);
709 }
710
nip_choose_neigh_daddr(struct nip_rt_info * rt,struct sk_buff * skb,const void * daddr)711 static inline const void *nip_choose_neigh_daddr(struct nip_rt_info *rt,
712 struct sk_buff *skb,
713 const void *daddr)
714 {
715 struct nip_addr *p = &rt->gateway;
716
717 if (rt->rt_flags & RTF_GATEWAY)
718 return (const void *)p;
719 else if (skb)
720 return &nipcb(skb)->dstaddr;
721 return daddr;
722 }
723
nip_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)724 static struct neighbour *nip_neigh_lookup(const struct dst_entry *dst,
725 struct sk_buff *skb,
726 const void *daddr)
727 {
728 struct nip_rt_info *rt = (struct nip_rt_info *)dst;
729 struct neighbour *n;
730
731 daddr = nip_choose_neigh_daddr(rt, skb, daddr);
732 n = __nip_neigh_lookup(dst->dev, daddr);
733 if (n)
734 return n;
735 return neigh_create(&nnd_tbl, daddr, dst->dev);
736 }
737
nip_dst_check(struct dst_entry * dst,u32 cookie)738 static struct dst_entry *nip_dst_check(struct dst_entry *dst, u32 cookie)
739 {
740 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK)
741 return NULL;
742 return dst;
743 }
744
745 /* Used to calculate the MSS value required by TCP
746 * Because there is no MSS in the TCP of NewIP,
747 * the value is calculated based on the MTU of the network port
748 */
nip_default_advmss(const struct dst_entry * dst)749 static unsigned int nip_default_advmss(const struct dst_entry *dst)
750 {
751 unsigned int mtu = dst_mtu(dst);
752
753 mtu -= NIP_HDR_MAX + sizeof(struct tcphdr);
754
755 return mtu;
756 }
757
nip_mtu(const struct dst_entry * dst)758 static unsigned int nip_mtu(const struct dst_entry *dst)
759 {
760 unsigned int mtu;
761 struct ninet_dev *idev;
762
763 mtu = NIP_MIN_MTU;
764
765 rcu_read_lock();
766 idev = __nin_dev_get(dst->dev);
767 if (idev)
768 mtu = idev->cnf.mtu;
769 rcu_read_unlock();
770
771 return mtu;
772 }
773
nip_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)774 static void nip_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
775 int how)
776 {
777 struct nip_rt_info *rt = (struct nip_rt_info *)dst;
778 struct ninet_dev *idev = rt->rt_idev;
779 struct net_device *loopback_dev =
780 dev_net(dev)->loopback_dev;
781
782 if (idev && idev->dev != loopback_dev) {
783 struct ninet_dev *loopback_idev = nin_dev_get(loopback_dev);
784
785 if (loopback_idev) {
786 rt->rt_idev = loopback_idev;
787 nin_dev_put(idev);
788 }
789 }
790 }
791
792 static struct dst_ops nip_dst_ops_template = {
793 .family = AF_NINET,
794 .destroy = nip_dst_destroy,
795 .ifdown = nip_dst_ifdown,
796 .neigh_lookup = nip_neigh_lookup,
797 .check = nip_dst_check,
798 .default_advmss = nip_default_advmss,
799 .mtu = nip_mtu,
800 };
801
nip_pkt_discard(struct sk_buff * skb)802 static int nip_pkt_discard(struct sk_buff *skb)
803 {
804 kfree_skb(skb);
805 return 0;
806 }
807
nip_pkt_discard_out(struct net * net,struct sock * sk,struct sk_buff * skb)808 static int nip_pkt_discard_out(struct net *net, struct sock *sk,
809 struct sk_buff *skb)
810 {
811 kfree_skb(skb);
812 return 0;
813 }
814
nip_addrconf_dst_alloc(struct ninet_dev * idev,const struct nip_addr * addr)815 struct nip_rt_info *nip_addrconf_dst_alloc(struct ninet_dev *idev,
816 const struct nip_addr *addr)
817 {
818 u32 tb_id;
819 struct net *net = dev_net(idev->dev);
820 struct net_device *dev = idev->dev;
821 struct nip_rt_info *rt;
822
823 rt = nip_dst_alloc(net, dev, DST_NOCOUNT);
824 if (!rt)
825 return ERR_PTR(-ENOMEM);
826
827 nin_dev_hold(idev);
828
829 rt->dst.flags |= DST_HOST;
830 rt->dst.input = nip_input;
831 rt->dst.output = nip_output;
832 rt->rt_idev = idev;
833
834 rt->rt_protocol = RTPROT_KERNEL;
835 rt->rt_flags = RTF_UP | RTF_NONEXTHOP;
836 rt->rt_flags |= RTF_LOCAL;
837
838 rt->gateway = *addr;
839 rt->rt_dst = *addr;
840 tb_id = NIP_RT_TABLE_LOCAL;
841 rt->rt_table = nip_fib_get_table(net, tb_id);
842
843 return rt;
844 }
845
846 struct arg_dev_net {
847 struct net_device *dev;
848 struct net *net;
849 };
850
851 /* Determine whether an RT should be deleted along with ifDown
852 * called with nip_tb_lock held for table with rt
853 */
nip_fib_ifdown(struct nip_rt_info * rt,void * arg)854 static int nip_fib_ifdown(struct nip_rt_info *rt, void *arg)
855 {
856 const struct arg_dev_net *adn = arg;
857 const struct net_device *dev = adn->dev;
858 bool not_same_dev = (rt->dst.dev == dev || !dev);
859 bool not_null_entry = (rt != adn->net->newip.nip_null_entry);
860 bool not_broadcast_entry = (rt != adn->net->newip.nip_broadcast_entry);
861 bool dev_unregister = (dev && netdev_unregistering(dev));
862 bool ignore_route_ifdown = (!rt->rt_idev->cnf.ignore_routes_with_linkdown);
863
864 if (not_same_dev && not_null_entry && not_broadcast_entry &&
865 (dev_unregister || ignore_route_ifdown))
866 return -1;
867
868 nip_dbg("don`t del route with %s down, ifindex=%u, not_same_dev=%u, not_null_entry=%u",
869 dev->name, dev->ifindex, not_same_dev, not_null_entry);
870 nip_dbg("not_broadcast_entry=%u, dev_unregister=%u, ignore_route_ifdown=%u",
871 not_broadcast_entry, dev_unregister, ignore_route_ifdown);
872 return 0;
873 }
874
nip_rt_ifdown(struct net * net,struct net_device * dev)875 void nip_rt_ifdown(struct net *net, struct net_device *dev)
876 {
877 struct arg_dev_net adn = {
878 .dev = dev,
879 .net = net,
880 };
881
882 nip_fib_clean_all(net, nip_fib_ifdown, &adn);
883 }
884
nip_route_net_init(struct net * net)885 static int __net_init nip_route_net_init(struct net *net)
886 {
887 int ret = -ENOMEM;
888
889 memcpy(&net->newip.nip_dst_ops, &nip_dst_ops_template,
890 sizeof(net->newip.nip_dst_ops));
891
892 if (dst_entries_init(&net->newip.nip_dst_ops) < 0)
893 goto out;
894
895 net->newip.nip_null_entry = kmemdup(&nip_null_entry_template,
896 sizeof(*net->newip.nip_null_entry),
897 GFP_KERNEL);
898 if (!net->newip.nip_null_entry)
899 goto out_nip_dst_entries;
900 net->newip.nip_null_entry->dst.ops = &net->newip.nip_dst_ops;
901 dst_init_metrics(&net->newip.nip_null_entry->dst, dst_default_metrics.metrics, true);
902
903 net->newip.nip_broadcast_entry =
904 kmemdup(&nip_broadcast_entry_template,
905 sizeof(*net->newip.nip_broadcast_entry),
906 GFP_KERNEL);
907 if (!net->newip.nip_broadcast_entry)
908 goto out_nip_null_entry;
909 net->newip.nip_broadcast_entry->dst.ops = &net->newip.nip_dst_ops;
910 dst_init_metrics(&net->newip.nip_broadcast_entry->dst, dst_default_metrics.metrics, true);
911 ret = 0;
912 out:
913 return ret;
914
915 out_nip_null_entry:
916 kfree(net->newip.nip_null_entry);
917 out_nip_dst_entries:
918 dst_entries_destroy(&net->newip.nip_dst_ops);
919 goto out;
920 }
921
nip_route_net_exit(struct net * net)922 static void __net_exit nip_route_net_exit(struct net *net)
923 {
924 kfree(net->newip.nip_broadcast_entry);
925 kfree(net->newip.nip_null_entry);
926 dst_entries_destroy(&net->newip.nip_dst_ops);
927 }
928
929 static struct pernet_operations nip_route_net_ops = {
930 .init = nip_route_net_init,
931 .exit = nip_route_net_exit,
932 };
933
nip_route_dev_notify(struct notifier_block * this,unsigned long event,void * ptr)934 static int nip_route_dev_notify(struct notifier_block *this,
935 unsigned long event, void *ptr)
936 {
937 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
938 struct net *net = dev_net(dev);
939
940 if (!(dev->flags & IFF_LOOPBACK))
941 return NOTIFY_OK;
942
943 if (event == NETDEV_REGISTER) {
944 net->newip.nip_null_entry->dst.dev = dev;
945 net->newip.nip_null_entry->rt_idev = nin_dev_get(dev);
946
947 net->newip.nip_broadcast_entry->dst.dev = dev;
948 net->newip.nip_broadcast_entry->rt_idev = nin_dev_get(dev);
949 } else if (event == NETDEV_UNREGISTER &&
950 dev->reg_state != NETREG_UNREGISTERED) {
951 nin_dev_put_clear(&net->newip.nip_null_entry->rt_idev);
952 nin_dev_put_clear(&net->newip.nip_broadcast_entry->rt_idev);
953 }
954
955 return NOTIFY_OK;
956 }
957
seq_printf_nipaddr_to_proc(struct seq_file * seq,struct nip_addr * addr)958 static void seq_printf_nipaddr_to_proc(struct seq_file *seq,
959 struct nip_addr *addr)
960 {
961 int i = 0;
962
963 for (i = 0; i < addr->bitlen / NIP_ADDR_BIT_LEN_8; i++)
964 seq_printf(seq, "%02x", addr->NIP_ADDR_FIELD8[i]);
965
966 seq_puts(seq, "\t");
967 }
968
nip_route_show_table(struct seq_file * seq,struct nip_fib_table * table)969 static void nip_route_show_table(struct seq_file *seq,
970 struct nip_fib_table *table)
971 {
972 struct nip_fib_node *fn;
973 int i;
974
975 rcu_read_lock_bh();
976 for (i = 0; i < NIN_ROUTE_HSIZE; i++) {
977 hlist_for_each_entry_rcu(fn, &table->nip_tb_head[i],
978 fib_hlist) {
979 struct nip_rt_info *rt = fn->nip_route_info;
980
981 seq_printf_nipaddr_to_proc(seq, &rt->rt_dst);
982 seq_printf_nipaddr_to_proc(seq, &rt->gateway);
983 seq_printf(seq, "%4u %4s\n", rt->rt_flags,
984 rt->dst.dev ? rt->dst.dev->name : "");
985 }
986 }
987 rcu_read_unlock_bh();
988 }
989
nip_route_proc_show(struct seq_file * seq,void * v)990 static int nip_route_proc_show(struct seq_file *seq, void *v)
991 {
992 struct net *net = seq->private;
993
994 nip_route_show_table(seq, net->newip.nip_fib_main_tbl);
995 nip_route_show_table(seq, net->newip.nip_fib_local_tbl);
996
997 return 0;
998 }
999
nip_route_net_init_late(struct net * net)1000 static int __net_init nip_route_net_init_late(struct net *net)
1001 {
1002 proc_create_net_single("nip_route", 0444, net->proc_net,
1003 nip_route_proc_show, NULL);
1004 return 0;
1005 }
1006
nip_route_net_exit_late(struct net * net)1007 static void __net_exit nip_route_net_exit_late(struct net *net)
1008 {
1009 remove_proc_entry("nip_route", net->proc_net);
1010 }
1011
1012 static struct pernet_operations nip_route_net_late_ops = {
1013 .init = nip_route_net_init_late,
1014 .exit = nip_route_net_exit_late,
1015 };
1016
1017 static struct notifier_block nip_route_dev_notifier = {
1018 .notifier_call = nip_route_dev_notify,
1019 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
1020 };
1021
nip_route_init(void)1022 int __init nip_route_init(void)
1023 {
1024 int ret;
1025
1026 ret = -ENOMEM;
1027
1028 nip_dst_ops_template.kmem_cachep =
1029 kmem_cache_create("nip_dst_cache", sizeof(struct nip_rt_info), 0,
1030 SLAB_HWCACHE_ALIGN, NULL);
1031 if (!nip_dst_ops_template.kmem_cachep)
1032 goto out;
1033
1034 ret = register_pernet_subsys(&nip_route_net_ops);
1035 if (ret)
1036 goto out_kmem_cache;
1037
1038 ret = nip_fib_init();
1039 if (ret)
1040 goto out_register_subsys;
1041
1042 ret = register_pernet_subsys(&nip_route_net_late_ops);
1043 if (ret)
1044 goto out_nip_fib_init;
1045
1046 ret = register_netdevice_notifier(&nip_route_dev_notifier);
1047 if (ret)
1048 goto out_register_late_subsys;
1049
1050 out:
1051 return ret;
1052
1053 out_register_late_subsys:
1054 unregister_pernet_subsys(&nip_route_net_late_ops);
1055 out_nip_fib_init:
1056 nip_fib_gc_cleanup();
1057 out_register_subsys:
1058 unregister_pernet_subsys(&nip_route_net_ops);
1059 out_kmem_cache:
1060 kmem_cache_destroy(nip_dst_ops_template.kmem_cachep);
1061 goto out;
1062 }
1063
nip_route_cleanup(void)1064 void nip_route_cleanup(void)
1065 {
1066 unregister_pernet_subsys(&nip_route_net_late_ops);
1067 nip_fib_gc_cleanup();
1068 unregister_pernet_subsys(&nip_route_net_ops);
1069 kmem_cache_destroy(nip_dst_ops_template.kmem_cachep);
1070 }
1071
1072