• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Based on net/ipv4/route.c
4  * Authors:	Ross Biro
5  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
6  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
7  *		Linus Torvalds, <Linus.Torvalds@helsinki.fi>
8  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  *
10  * Fixes:
11  *		Alan Cox	:	Verify area fixes.
12  *		Alan Cox	:	cli() protects routing changes
13  *		Rui Oliveira	:	ICMP routing table updates
14  *		(rco@di.uminho.pt)	Routing table insertion and update
15  *		Linus Torvalds	:	Rewrote bits to be sensible
16  *		Alan Cox	:	Added BSD route gw semantics
17  *		Alan Cox	:	Super /proc >4K
18  *		Alan Cox	:	MTU in route table
19  *		Alan Cox	:	MSS actually. Also added the window
20  *					clamper.
21  *		Sam Lantinga	:	Fixed route matching in rt_del()
22  *		Alan Cox	:	Routing cache support.
23  *		Alan Cox	:	Removed compatibility cruft.
24  *		Alan Cox	:	RTF_REJECT support.
25  *		Alan Cox	:	TCP irtt support.
26  *		Jonathan Naylor	:	Added Metric support.
27  *	Miquel van Smoorenburg	:	BSD API fixes.
28  *	Miquel van Smoorenburg	:	Metrics.
29  *		Alan Cox	:	Use __u32 properly
30  *		Alan Cox	:	Aligned routing errors more closely with BSD
31  *					our system is still very different.
32  *		Alan Cox	:	Faster /proc handling
33  *	Alexey Kuznetsov	:	Massive rework to support tree based routing,
34  *					routing caches and better behaviour.
35  *
36  *		Olaf Erb	:	irtt wasn't being copied right.
37  *		Bjorn Ekwall	:	Kerneld route support.
38  *		Alan Cox	:	Multicast fixed (I hope)
39  *		Pavel Krauz	:	Limited broadcast fixed
40  *		Mike McLagan	:	Routing by source
41  *	Alexey Kuznetsov	:	End of old history. Split to fib.c and
42  *					route.c and rewritten from scratch.
43  *		Andi Kleen	:	Load-limit warning messages.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
45  *	Vitaly E. Lavrov	:	Race condition in ip_route_input_slow.
46  *	Tobias Ringstrom	:	Uninitialized res.type in ip_route_output_slow.
47  *	Vladimir V. Ivanov	:	IP rule info (flowid) is really useful.
48  *		Marc Boucher	:	routing by fwmark
49  *	Robert Olsson		:	Added rt_cache statistics
50  *	Arnaldo C. Melo		:	Convert proc stuff to seq_file
51  *	Eric Dumazet		:	hashed spinlocks and rt_check_expire() fixes.
52  *	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
53  *	Ilia Sotnikov		:	Removed TOS from hash calculations
54  *
55  * Based on net/ipv6/route.c
56  *	Authors:
57  *	Pedro Roque		<roque@di.fc.ul.pt>
58  *
59  *	Changes:
60  *
61  *	YOSHIFUJI Hideaki @USAGI
62  *		reworked default router selection.
63  *		- respect outgoing interface
64  *		- select from (probably) reachable routers (i.e.
65  *		routers in REACHABLE, STALE, DELAY or PROBE states).
66  *		- always select the same router if it is (probably)
67  *		reachable.  otherwise, round-robin the list.
68  *	Ville Nuorvala
69  *		Fixed routing subtrees.
70  *
71  * NewIP INET
72  * An implementation of the TCP/IP protocol suite for the LINUX
73  * operating system. NewIP INET is implemented using the  BSD Socket
74  * interface as the means of communication with the user level.
75  *
76  * ROUTE - implementation of the NewIP router.
77  */
78 #define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__
79 
80 #include <linux/module.h>
81 #include <linux/netdevice.h>
82 #include <linux/socket.h>
83 #include <linux/in.h>
84 #include <linux/kernel.h>
85 #include <linux/sockios.h>
86 #include <linux/string.h>
87 #include <linux/vmalloc.h>
88 #include <linux/capability.h>
89 #include <linux/proc_fs.h>
90 
91 #include <net/sock.h>
92 #include <net/udp.h>
93 #include <net/inet_common.h>
94 #include <net/protocol.h>
95 #include <net/dst.h>
96 #include <net/lwtunnel.h>
97 #include <linux/uaccess.h>   /* copy_from_user() */
98 #include <linux/rtnetlink.h> /* rtnl_lock() */
99 #include <linux/inetdevice.h>
100 
101 #include <net/nip_route.h>
102 #include <net/nip_fib.h>
103 #include <net/nip_addrconf.h>
104 #include <net/nndisc.h>
105 #include <net/nip.h>
106 
107 #include <linux/newip_route.h>
108 #include "nip_hdr.h"
109 #include "tcp_nip_parameter.h"
110 
111 static int nip_pkt_discard(struct sk_buff *skb);
112 static int nip_pkt_discard_out(struct net *net, struct sock *sk,
113 			       struct sk_buff *skb);
114 static unsigned int	 nip_mtu(const struct dst_entry *dst);
115 
116 static const struct nip_rt_info nip_null_entry_template = {
117 	.dst = {
118 		.__refcnt = ATOMIC_INIT(1),
119 		.__use = 1,
120 		.obsolete = DST_OBSOLETE_FORCE_CHK,
121 		.error = -ENETUNREACH,
122 		.input = nip_pkt_discard,
123 		.output = nip_pkt_discard_out,
124 		 },
125 	.rt_ref = ATOMIC_INIT(1),
126 };
127 
128 static const struct nip_rt_info nip_broadcast_entry_template = {
129 	.dst = {
130 		.__refcnt = ATOMIC_INIT(1),
131 		.__use = 1,
132 		.obsolete = DST_OBSOLETE_FORCE_CHK,
133 		.input = nip_input,
134 		.output = nip_output,
135 		 },
136 	.rt_ref = ATOMIC_INIT(1),
137 };
138 
nip_nexthop(struct nip_rt_info * rt,struct nip_addr * daddr)139 struct nip_addr *nip_nexthop(struct nip_rt_info *rt, struct nip_addr *daddr)
140 {
141 	if (rt->rt_flags & RTF_GATEWAY)
142 		return &rt->gateway;
143 	else
144 		return daddr;
145 }
146 
rtmsg_to_fibni_config(struct net * net,struct nip_rtmsg * rtmsg,struct nip_fib_config * cfg)147 static void rtmsg_to_fibni_config(struct net *net, struct nip_rtmsg *rtmsg,
148 				  struct nip_fib_config *cfg)
149 {
150 	memset(cfg, 0, sizeof(*cfg));
151 
152 	cfg->fc_table = NIP_RT_TABLE_MAIN;
153 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
154 	cfg->fc_metric = rtmsg->rtmsg_metric;
155 	cfg->fc_expires = rtmsg->rtmsg_info;
156 
157 	cfg->fc_flags = rtmsg->rtmsg_flags;
158 
159 	cfg->fc_nlinfo.nl_net = net;
160 
161 	cfg->fc_dst = rtmsg->rtmsg_dst;
162 	cfg->fc_src = rtmsg->rtmsg_src;
163 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
164 }
165 
nip_rt_info_init(struct nip_rt_info * rt)166 static void nip_rt_info_init(struct nip_rt_info *rt)
167 {
168 	struct dst_entry *dst = &rt->dst;
169 
170 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
171 	rt->from = NULL;
172 }
173 
__nip_dst_alloc(struct net * net,struct net_device * dev,int flags)174 static struct nip_rt_info *__nip_dst_alloc(struct net *net,
175 					   struct net_device *dev, int flags)
176 {
177 	struct nip_rt_info *rt =
178 	    dst_alloc(&net->newip.nip_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
179 		      flags);
180 
181 	if (rt)
182 		nip_rt_info_init(rt);
183 
184 	return rt;
185 }
186 
nip_dst_alloc(struct net * net,struct net_device * dev,int flags)187 struct nip_rt_info *nip_dst_alloc(struct net *net, struct net_device *dev,
188 				  int flags)
189 {
190 	struct nip_rt_info *rt = __nip_dst_alloc(net, dev, flags);
191 
192 	if (rt) {
193 		rt->rt_pcpu =
194 		    alloc_percpu_gfp(struct nip_rt_info *, GFP_ATOMIC);
195 		if (rt->rt_pcpu) {
196 			int cpu;
197 
198 			for_each_possible_cpu(cpu) {
199 				struct nip_rt_info **p;
200 
201 				p = per_cpu_ptr(rt->rt_pcpu, cpu);
202 				/* no one shares rt */
203 				*p = NULL;
204 			}
205 		} else {
206 			dst_destroy((struct dst_entry *)rt);
207 			return NULL;
208 		}
209 	}
210 
211 	return rt;
212 }
213 
nip_rt_dst_from_metrics_check(struct nip_rt_info * rt)214 static void nip_rt_dst_from_metrics_check(struct nip_rt_info *rt)
215 {
216 	if (rt->from &&
217 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->from))
218 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->from), true);
219 }
220 
nip_rt_get_pcpu_route(struct nip_rt_info * rt)221 static struct nip_rt_info *nip_rt_get_pcpu_route(struct nip_rt_info *rt)
222 {
223 	struct nip_rt_info *pcpu_rt, **p;
224 
225 	p = this_cpu_ptr(rt->rt_pcpu);
226 	pcpu_rt = *p;
227 
228 	if (pcpu_rt) {
229 		dst_hold(&pcpu_rt->dst);
230 		nip_rt_dst_from_metrics_check(pcpu_rt);
231 	}
232 	return pcpu_rt;
233 }
234 
nip_rt_set_from(struct nip_rt_info * rt,struct nip_rt_info * from)235 static void nip_rt_set_from(struct nip_rt_info *rt, struct nip_rt_info *from)
236 {
237 	WARN_ON(from->from);
238 
239 	rt->rt_flags &= ~RTF_EXPIRES;
240 	dst_hold(&from->dst);
241 	rt->from = &from->dst;
242 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
243 }
244 
nip_rt_copy_init(struct nip_rt_info * rt,struct nip_rt_info * ort)245 static void nip_rt_copy_init(struct nip_rt_info *rt, struct nip_rt_info *ort)
246 {
247 	rt->dst.input = ort->dst.input;
248 	rt->dst.output = ort->dst.output;
249 	rt->rt_dst = ort->rt_dst;
250 	rt->dst.error = ort->dst.error;
251 	rt->rt_idev = ort->rt_idev;
252 	if (rt->rt_idev)
253 		nin_dev_hold(rt->rt_idev);
254 
255 	rt->dst.lastuse = jiffies;
256 	rt->gateway = ort->gateway;
257 	rt->rt_flags = ort->rt_flags;
258 	nip_rt_set_from(rt, ort);
259 	rt->rt_metric = ort->rt_metric;
260 	rt->rt_table = ort->rt_table;
261 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
262 }
263 
nip_rt_pcpu_alloc(struct nip_rt_info * rt)264 static struct nip_rt_info *nip_rt_pcpu_alloc(struct nip_rt_info *rt)
265 {
266 	struct nip_rt_info *pcpu_rt;
267 
268 	pcpu_rt = __nip_dst_alloc(dev_net(rt->dst.dev),
269 				  rt->dst.dev, rt->dst.flags);
270 	if (!pcpu_rt)
271 		return NULL;
272 	nip_rt_copy_init(pcpu_rt, rt);
273 	pcpu_rt->rt_protocol = rt->rt_protocol;
274 	pcpu_rt->rt_flags |= RTF_PCPU;
275 	return pcpu_rt;
276 }
277 
nip_rt_make_pcpu_route(struct nip_rt_info * rt)278 static struct nip_rt_info *nip_rt_make_pcpu_route(struct nip_rt_info *rt)
279 {
280 	struct nip_rt_info *pcpu_rt, *prev;
281 
282 	pcpu_rt = nip_rt_pcpu_alloc(rt);
283 	if (!pcpu_rt) {
284 		struct net *net = dev_net(rt->dst.dev);
285 
286 		dst_hold(&net->newip.nip_null_entry->dst);
287 		return net->newip.nip_null_entry;
288 	}
289 
290 	rcu_read_lock_bh();
291 	if (rt->rt_pcpu) {
292 		struct nip_rt_info **p = this_cpu_ptr(rt->rt_pcpu);
293 
294 		prev = cmpxchg(p, NULL, pcpu_rt);
295 		if (prev) {
296 			/* If someone did it before us, return prev instead */
297 			dst_destroy(&pcpu_rt->dst);
298 			pcpu_rt = prev;
299 		}
300 	} else {
301 		dst_destroy(&pcpu_rt->dst);
302 		pcpu_rt = rt;
303 	}
304 	dst_hold(&pcpu_rt->dst);
305 	nip_rt_dst_from_metrics_check(pcpu_rt);
306 	rcu_read_unlock_bh();
307 	return pcpu_rt;
308 }
309 
nip_pol_route_input(struct net * net,struct nip_fib_table * table,struct flow_nip * fln,int flags)310 static struct nip_rt_info *nip_pol_route_input(struct net *net,
311 					       struct nip_fib_table *table,
312 					       struct flow_nip *fln, int flags)
313 {
314 	return nip_pol_route(net, table, fln->FLOWIN_IIF, fln, flags);
315 }
316 
nip_route_input_lookup(struct net * net,struct net_device * dev,struct flow_nip * fln,int flags,int * tbl_type)317 struct dst_entry *nip_route_input_lookup(struct net *net,
318 					 struct net_device *dev,
319 					 struct flow_nip *fln, int flags, int *tbl_type)
320 {
321 	return nip_fib_rule_lookup(net, fln, flags, tbl_type, nip_pol_route_input);
322 }
323 
nip_route_input(struct sk_buff * skb)324 int nip_route_input(struct sk_buff *skb)
325 {
326 	struct net *net = dev_net(skb->dev);
327 	int flags = 0;
328 	struct flow_nip fln = {
329 		.FLOWIN_IIF = skb->skb_iif,
330 		.daddr = nipcb(skb)->dstaddr,
331 		.saddr = nipcb(skb)->srcaddr,
332 	};
333 	struct dst_entry *out_dst;
334 	int tbl_type = 0;
335 
336 	if (nip_addr_eq(&fln.daddr, &nip_broadcast_addr_arp)) {
337 		nip_dbg("recv broadcast packet");
338 		dst_hold(&net->newip.nip_broadcast_entry->dst);
339 		skb_dst_set(skb,
340 			    (struct dst_entry *)net->newip.nip_broadcast_entry);
341 		return 0;
342 	}
343 
344 	out_dst = nip_route_input_lookup(net, skb->dev, &fln, flags, &tbl_type);
345 	skb_dst_set(skb, out_dst);
346 
347 	if (tbl_type == RT_TABLE_MAIN) {
348 		struct ninet_dev *nin_dev = rcu_dereference(skb->dev->nip_ptr);
349 		struct ninet_dev *nout_dev = rcu_dereference(out_dst->dev->nip_ptr);
350 
351 		/* When global variable ipv4 all/send_redirects or
352 		 * corresponding network/send_redirects is 1,
353 		 * IN_DEV_TX_REDIRECTS() conditions are valid.
354 		 * send_redirects default is 1.
355 		 */
356 		if (nin_dev == nout_dev &&
357 		    IN_DEV_TX_REDIRECTS(rcu_dereference(out_dst->dev->ip_ptr))) {
358 			nip_dbg("The inlet and outlet are the same");
359 			return 1;
360 		}
361 	}
362 	return 0;
363 }
364 
nip_pol_route_output(struct net * net,struct nip_fib_table * table,struct flow_nip * fln,int flags)365 static struct nip_rt_info *nip_pol_route_output(struct net *net,
366 						struct nip_fib_table *table,
367 						struct flow_nip *fln, int flags)
368 {
369 	return nip_pol_route(net, table, fln->FLOWIN_OIF, fln, flags);
370 }
371 
nip_route_output_flags(struct net * net,const struct sock * sk,struct flow_nip * fln,int flags)372 struct dst_entry *nip_route_output_flags(struct net *net, const struct sock *sk,
373 					 struct flow_nip *fln, int flags)
374 {
375 	struct dst_entry *dst;
376 	struct nip_rt_info *rt;
377 	int tbl_type = 0;
378 
379 	dst = nip_fib_rule_lookup(net, fln, flags, &tbl_type, nip_pol_route_output);
380 	rt = (struct nip_rt_info *)dst;
381 
382 	if (!(rt->rt_flags & RTF_LOCAL))
383 		return dst;
384 
385 	rcu_read_lock();
386 	if (rt->rt_idev) {
387 		read_lock_bh(&rt->rt_idev->lock);
388 		/* search saddr in idev->addr */
389 		if (!list_empty(&rt->rt_idev->addr_list)) {
390 			struct ninet_ifaddr *ifp;
391 
392 			list_for_each_entry(ifp, &rt->rt_idev->addr_list, if_list) {
393 				fln->saddr = ifp->addr;
394 				break;
395 			}
396 		}
397 		read_unlock_bh(&rt->rt_idev->lock);
398 	}
399 	rcu_read_unlock();
400 
401 	dst_release(dst);
402 	dst_hold(&net->newip.nip_broadcast_entry->dst);
403 	return &net->newip.nip_broadcast_entry->dst;
404 }
405 
nip_pol_route(struct net * net,struct nip_fib_table * table,int oif,struct flow_nip * fln,int flags)406 struct nip_rt_info *nip_pol_route(struct net *net, struct nip_fib_table *table,
407 				  int oif, struct flow_nip *fln, int flags)
408 {
409 	struct nip_fib_node *fn;
410 	struct nip_rt_info *rt, *pcpu_rt;
411 
412 	rcu_read_lock_bh();
413 	fn = nip_fib_locate(table->nip_tb_head, &fln->daddr);
414 	if (!fn) {
415 		rcu_read_unlock_bh();
416 		nip_dbg("search fail");
417 		rt = net->newip.nip_null_entry;
418 		dst_hold(&rt->dst);
419 		return rt;
420 	}
421 	rt = fn->nip_route_info;
422 
423 	/* Get a percpu copy */
424 	rt->dst.lastuse = jiffies;
425 	rt->dst.__use++;
426 	pcpu_rt = nip_rt_get_pcpu_route(rt);
427 	nip_dbg("cpu id=%d", smp_processor_id());
428 	if (pcpu_rt) {
429 		rcu_read_unlock_bh();
430 		nip_dbg("pcpu found");
431 	} else {
432 		dst_hold(&rt->dst);
433 		rcu_read_unlock_bh();
434 		pcpu_rt = nip_rt_make_pcpu_route(rt);
435 		dst_release(&rt->dst);
436 	}
437 
438 	nip_dbg("rt dst.__refcnt=%d, pcpu dst.__refcnt=%d",
439 		atomic_read(&rt->dst.__refcnt),
440 		atomic_read(&pcpu_rt->dst.__refcnt));
441 	return pcpu_rt;
442 }
443 
nip_bind_addr_check(struct net * net,struct nip_addr * addr)444 bool nip_bind_addr_check(struct net *net,
445 			 struct nip_addr *addr)
446 {
447 	struct nip_fib_node *fn;
448 	struct nip_fib_table *fib_tbl = net->newip.nip_fib_local_tbl;
449 
450 	if (nip_addr_invalid(addr)) {
451 		nip_dbg("binding-addr invalid, bitlen=%u", addr->bitlen);
452 		return false;
453 	}
454 
455 	if (nip_addr_eq(addr, &nip_any_addr)) {
456 		nip_dbg("binding-addr is any addr");
457 		return true;
458 	}
459 
460 	rcu_read_lock_bh();
461 	fn = nip_fib_locate(fib_tbl->nip_tb_head, addr);
462 	rcu_read_unlock_bh();
463 	if (!fn) {
464 		nip_dbg("binding-addr is not local addr");
465 		return false;
466 	}
467 
468 	nip_dbg("binding-addr is local addr");
469 	return true;
470 }
471 
nip_route_info_create(struct nip_fib_config * cfg)472 static struct nip_rt_info *nip_route_info_create(struct nip_fib_config *cfg)
473 {
474 	struct net *net = cfg->fc_nlinfo.nl_net;
475 	struct nip_rt_info *rt = NULL;
476 	struct net_device *dev = NULL;
477 	struct ninet_dev *idev = NULL;
478 	struct nip_fib_table *table;
479 	int err = -ENODEV;
480 
481 	/* find net_device */
482 	dev = dev_get_by_index(net, cfg->fc_ifindex);
483 	if (!dev) {
484 		nip_dbg("fail to get dev by ifindex(%u)", cfg->fc_ifindex);
485 		goto out;
486 	}
487 
488 	/* find ninet_dev,which has the newip address list */
489 	idev = nin_dev_get(dev);
490 	if (!idev) {
491 		nip_dbg("fail to get ninet dev (ifindex=%u)", cfg->fc_ifindex);
492 		goto out;
493 	}
494 	/* Do not add a route when the network port is not running
495 	 * to avoid incorrect route selection
496 	 */
497 	if (!netif_running(idev->dev)) {
498 		nip_dbg("network interface is not running");
499 		goto out;
500 	}
501 	if (cfg->fc_metric == 0)
502 		cfg->fc_metric = NIP_RT_PRIO_USER;
503 
504 	err = -ENOBUFS;
505 	table = nip_fib_get_table(net, cfg->fc_table);
506 	if (!table) {
507 		nip_dbg("fail to get fib table (fc_table=%u)", cfg->fc_table);
508 		goto out;
509 	}
510 
511 	rt = nip_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
512 	if (!rt) {
513 		nip_dbg("fail to alloc dst mem");
514 		err = -ENOMEM;
515 		goto out;
516 	}
517 
518 	nip_rt_clean_expires(rt);
519 
520 	if (cfg->fc_protocol == RTPROT_UNSPEC)
521 		cfg->fc_protocol = RTPROT_BOOT;
522 	rt->rt_protocol = cfg->fc_protocol;
523 
524 	if (cfg->fc_flags & RTF_LOCAL) {
525 		rt->dst.input = nip_input;
526 		nip_dbg("rt->dst.input=nip_input, ifindex=%u", cfg->fc_ifindex);
527 	} else {
528 		rt->dst.input = nip_forward;
529 		nip_dbg("rt->dst.input=nip_forward, ifindex=%u", cfg->fc_ifindex);
530 	}
531 
532 	rt->dst.output = nip_output;
533 	rt->rt_dst = cfg->fc_dst;
534 	rt->rt_src = cfg->fc_src;
535 	rt->rt_metric = cfg->fc_metric;
536 
537 	if (cfg->fc_flags & RTF_GATEWAY)
538 		rt->gateway = cfg->fc_gateway;
539 	else
540 		rt->gateway = nip_any_addr;
541 
542 	rt->rt_flags = cfg->fc_flags;
543 	rt->dst.dev = dev;
544 	rt->rt_idev = idev;
545 	rt->rt_table = table;
546 
547 	return rt;
548 out:
549 	if (dev)
550 		dev_put(dev);
551 	if (idev)
552 		nin_dev_put(idev);
553 	return ERR_PTR(err);
554 }
555 
556 /* __nip_ins_rt is called with FREE table->nip_tb_lock.
557  * It takes new route entry, the addition fails by any reason the
558  * route is released.
559  */
__nip_ins_rt(struct nip_rt_info * rt)560 static int __nip_ins_rt(struct nip_rt_info *rt)
561 {
562 	int err;
563 	struct nip_fib_table *table;
564 
565 	table = rt->rt_table;
566 
567 	spin_lock_bh(&table->nip_tb_lock);
568 	err = nip_fib_add(table, rt);
569 	spin_unlock_bh(&table->nip_tb_lock);
570 
571 	return err;
572 }
573 
nip_ins_rt(struct nip_rt_info * rt)574 int nip_ins_rt(struct nip_rt_info *rt)
575 {
576 	/* Hold dst to account for the reference from the nip fib hash */
577 	dst_hold(&rt->dst);
578 	return __nip_ins_rt(rt);
579 }
580 
nip_route_add(struct nip_fib_config * cfg)581 int nip_route_add(struct nip_fib_config *cfg)
582 {
583 	struct nip_rt_info *rt;
584 	int err;
585 
586 	rt = nip_route_info_create(cfg);
587 	if (IS_ERR(rt)) {
588 		nip_dbg("fail to creat route info");
589 		err = PTR_ERR(rt);
590 		rt = NULL;
591 		goto out;
592 	}
593 
594 	err = __nip_ins_rt(rt);
595 out:
596 	return err;
597 }
598 
__nip_del_rt(struct nip_rt_info * rt,struct nl_info * info)599 static int __nip_del_rt(struct nip_rt_info *rt, struct nl_info *info)
600 {
601 	int err;
602 	struct nip_fib_table *table;
603 	struct net *net = dev_net(rt->dst.dev);
604 
605 	if (rt == net->newip.nip_null_entry) {
606 		err = -ENOENT;
607 		goto out;
608 	}
609 
610 	table = rt->rt_table;
611 	spin_lock_bh(&table->nip_tb_lock);
612 	err = nip_fib_del(rt, info);
613 	spin_unlock_bh(&table->nip_tb_lock);
614 
615 out:
616 	nip_rt_put(rt);
617 	return err;
618 }
619 
nip_del_rt(struct nip_rt_info * rt)620 int nip_del_rt(struct nip_rt_info *rt)
621 {
622 	struct nl_info info = {
623 		.nl_net = dev_net(rt->dst.dev),
624 	};
625 	return __nip_del_rt(rt, &info);
626 }
627 
nip_route_del(struct nip_fib_config * cfg)628 static int nip_route_del(struct nip_fib_config *cfg)
629 {
630 	struct net *net = cfg->fc_nlinfo.nl_net;
631 	struct nip_fib_table *table;
632 	struct nip_fib_node *fn;
633 	struct nip_rt_info *rt;
634 	int err = -ESRCH;
635 
636 	table = nip_fib_get_table(net, cfg->fc_table);
637 	if (!table)
638 		return err;
639 
640 	rcu_read_lock_bh();
641 	fn = nip_fib_locate(table->nip_tb_head, &cfg->fc_dst);
642 	if (fn) {
643 		rt = fn->nip_route_info;
644 		dst_hold(&rt->dst);
645 		rcu_read_unlock_bh();
646 
647 		return __nip_del_rt(rt, &cfg->fc_nlinfo);
648 	}
649 	rcu_read_unlock_bh();
650 
651 	return err;
652 }
653 
nip_route_ioctl(struct net * net,unsigned int cmd,struct nip_rtmsg * rtmsg)654 int nip_route_ioctl(struct net *net, unsigned int cmd, struct nip_rtmsg *rtmsg)
655 {
656 	struct nip_fib_config cfg;
657 	int err;
658 
659 	rtmsg_to_fibni_config(net, rtmsg, &cfg);
660 	if (nip_addr_invalid(&cfg.fc_dst)) {
661 		nip_dbg("nip daddr invalid, bitlen=%u", cfg.fc_dst.bitlen);
662 		return -EFAULT;
663 	}
664 
665 	if (cfg.fc_flags & RTF_GATEWAY) {
666 		if (nip_addr_invalid(&cfg.fc_gateway)) {
667 			nip_dbg("nip gateway daddr invalid, bitlen=%u",
668 				cfg.fc_gateway.bitlen);
669 			return -EFAULT;
670 		}
671 	}
672 
673 	rtnl_lock();
674 	switch (cmd) {
675 	case SIOCADDRT: /* Add a route */
676 		err = nip_route_add(&cfg);
677 		break;
678 	case SIOCDELRT: /* Delete a route */
679 		err = nip_route_del(&cfg);
680 		break;
681 	default:
682 		err = -EINVAL;
683 	}
684 	rtnl_unlock();
685 
686 	return err;
687 }
688 
nip_dst_destroy(struct dst_entry * dst)689 static void nip_dst_destroy(struct dst_entry *dst)
690 {
691 	struct nip_rt_info *rt = (struct nip_rt_info *)dst;
692 	struct dst_entry *from = rt->from;
693 	struct ninet_dev *idev;
694 
695 	dst_destroy_metrics_generic(dst);
696 	free_percpu(rt->rt_pcpu);
697 
698 	idev = rt->rt_idev;
699 	if (idev) {
700 		rt->rt_idev = NULL;
701 		nip_dbg("idev->refcnt=%u", refcount_read(&idev->refcnt));
702 		nin_dev_put(idev);
703 	}
704 
705 	if (from)
706 		nip_dbg("from->__refcnt=%d", atomic_read(&from->__refcnt));
707 	rt->from = NULL;
708 	dst_release(from);
709 }
710 
nip_choose_neigh_daddr(struct nip_rt_info * rt,struct sk_buff * skb,const void * daddr)711 static inline const void *nip_choose_neigh_daddr(struct nip_rt_info *rt,
712 						 struct sk_buff *skb,
713 						 const void *daddr)
714 {
715 	struct nip_addr *p = &rt->gateway;
716 
717 	if (rt->rt_flags & RTF_GATEWAY)
718 		return (const void *)p;
719 	else if (skb)
720 		return &nipcb(skb)->dstaddr;
721 	return daddr;
722 }
723 
nip_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)724 static struct neighbour *nip_neigh_lookup(const struct dst_entry *dst,
725 					  struct sk_buff *skb,
726 					  const void *daddr)
727 {
728 	struct nip_rt_info *rt = (struct nip_rt_info *)dst;
729 	struct neighbour *n;
730 
731 	daddr = nip_choose_neigh_daddr(rt, skb, daddr);
732 	n = __nip_neigh_lookup(dst->dev, daddr);
733 	if (n)
734 		return n;
735 	return neigh_create(&nnd_tbl, daddr, dst->dev);
736 }
737 
nip_dst_check(struct dst_entry * dst,u32 cookie)738 static struct dst_entry *nip_dst_check(struct dst_entry *dst, u32 cookie)
739 {
740 	if (dst->obsolete != DST_OBSOLETE_FORCE_CHK)
741 		return NULL;
742 	return dst;
743 }
744 
745 /* Used to calculate the MSS value required by TCP
746  * Because there is no MSS in the TCP of NewIP,
747  * the value is calculated based on the MTU of the network port
748  */
nip_default_advmss(const struct dst_entry * dst)749 static unsigned int nip_default_advmss(const struct dst_entry *dst)
750 {
751 	unsigned int mtu = dst_mtu(dst);
752 
753 	mtu -= NIP_HDR_MAX + sizeof(struct tcphdr);
754 
755 	return mtu;
756 }
757 
nip_mtu(const struct dst_entry * dst)758 static unsigned int nip_mtu(const struct dst_entry *dst)
759 {
760 	unsigned int mtu;
761 	struct ninet_dev *idev;
762 
763 	mtu = NIP_MIN_MTU;
764 
765 	rcu_read_lock();
766 	idev = __nin_dev_get(dst->dev);
767 	if (idev)
768 		mtu = idev->cnf.mtu;
769 	rcu_read_unlock();
770 
771 	return mtu;
772 }
773 
nip_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)774 static void nip_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
775 			   int how)
776 {
777 	struct nip_rt_info *rt = (struct nip_rt_info *)dst;
778 	struct ninet_dev *idev = rt->rt_idev;
779 	struct net_device *loopback_dev =
780 		dev_net(dev)->loopback_dev;
781 
782 	if (idev && idev->dev != loopback_dev) {
783 		struct ninet_dev *loopback_idev = nin_dev_get(loopback_dev);
784 
785 		if (loopback_idev) {
786 			rt->rt_idev = loopback_idev;
787 			nin_dev_put(idev);
788 		}
789 	}
790 }
791 
792 static struct dst_ops nip_dst_ops_template = {
793 	.family			= AF_NINET,
794 	.destroy		= nip_dst_destroy,
795 	.ifdown			= nip_dst_ifdown,
796 	.neigh_lookup		= nip_neigh_lookup,
797 	.check			= nip_dst_check,
798 	.default_advmss		= nip_default_advmss,
799 	.mtu			= nip_mtu,
800 };
801 
nip_pkt_discard(struct sk_buff * skb)802 static int nip_pkt_discard(struct sk_buff *skb)
803 {
804 	kfree_skb(skb);
805 	return 0;
806 }
807 
nip_pkt_discard_out(struct net * net,struct sock * sk,struct sk_buff * skb)808 static int nip_pkt_discard_out(struct net *net, struct sock *sk,
809 			       struct sk_buff *skb)
810 {
811 	kfree_skb(skb);
812 	return 0;
813 }
814 
nip_addrconf_dst_alloc(struct ninet_dev * idev,const struct nip_addr * addr)815 struct nip_rt_info *nip_addrconf_dst_alloc(struct ninet_dev *idev,
816 					   const struct nip_addr *addr)
817 {
818 	u32 tb_id;
819 	struct net *net = dev_net(idev->dev);
820 	struct net_device *dev = idev->dev;
821 	struct nip_rt_info *rt;
822 
823 	rt = nip_dst_alloc(net, dev, DST_NOCOUNT);
824 	if (!rt)
825 		return ERR_PTR(-ENOMEM);
826 
827 	nin_dev_hold(idev);
828 
829 	rt->dst.flags |= DST_HOST;
830 	rt->dst.input = nip_input;
831 	rt->dst.output = nip_output;
832 	rt->rt_idev = idev;
833 
834 	rt->rt_protocol = RTPROT_KERNEL;
835 	rt->rt_flags = RTF_UP | RTF_NONEXTHOP;
836 	rt->rt_flags |= RTF_LOCAL;
837 
838 	rt->gateway = *addr;
839 	rt->rt_dst = *addr;
840 	tb_id = NIP_RT_TABLE_LOCAL;
841 	rt->rt_table = nip_fib_get_table(net, tb_id);
842 
843 	return rt;
844 }
845 
846 struct arg_dev_net {
847 	struct net_device *dev;
848 	struct net *net;
849 };
850 
851 /* Determine whether an RT should be deleted along with ifDown
852  * called with nip_tb_lock held for table with rt
853  */
nip_fib_ifdown(struct nip_rt_info * rt,void * arg)854 static int nip_fib_ifdown(struct nip_rt_info *rt, void *arg)
855 {
856 	const struct arg_dev_net *adn = arg;
857 	const struct net_device *dev = adn->dev;
858 	bool not_same_dev = (rt->dst.dev == dev || !dev);
859 	bool not_null_entry = (rt != adn->net->newip.nip_null_entry);
860 	bool not_broadcast_entry = (rt != adn->net->newip.nip_broadcast_entry);
861 	bool dev_unregister = (dev && netdev_unregistering(dev));
862 	bool ignore_route_ifdown = (!rt->rt_idev->cnf.ignore_routes_with_linkdown);
863 
864 	if (not_same_dev && not_null_entry && not_broadcast_entry &&
865 	    (dev_unregister || ignore_route_ifdown))
866 		return -1;
867 
868 	nip_dbg("don`t del route with %s down, ifindex=%u, not_same_dev=%u, not_null_entry=%u",
869 		dev->name, dev->ifindex, not_same_dev, not_null_entry);
870 	nip_dbg("not_broadcast_entry=%u, dev_unregister=%u, ignore_route_ifdown=%u",
871 		not_broadcast_entry, dev_unregister, ignore_route_ifdown);
872 	return 0;
873 }
874 
nip_rt_ifdown(struct net * net,struct net_device * dev)875 void nip_rt_ifdown(struct net *net, struct net_device *dev)
876 {
877 	struct arg_dev_net adn = {
878 		.dev = dev,
879 		.net = net,
880 	};
881 
882 	nip_fib_clean_all(net, nip_fib_ifdown, &adn);
883 }
884 
nip_route_net_init(struct net * net)885 static int __net_init nip_route_net_init(struct net *net)
886 {
887 	int ret = -ENOMEM;
888 
889 	memcpy(&net->newip.nip_dst_ops, &nip_dst_ops_template,
890 	       sizeof(net->newip.nip_dst_ops));
891 
892 	if (dst_entries_init(&net->newip.nip_dst_ops) < 0)
893 		goto out;
894 
895 	net->newip.nip_null_entry = kmemdup(&nip_null_entry_template,
896 					    sizeof(*net->newip.nip_null_entry),
897 					    GFP_KERNEL);
898 	if (!net->newip.nip_null_entry)
899 		goto out_nip_dst_entries;
900 	net->newip.nip_null_entry->dst.ops = &net->newip.nip_dst_ops;
901 	dst_init_metrics(&net->newip.nip_null_entry->dst, dst_default_metrics.metrics, true);
902 
903 	net->newip.nip_broadcast_entry =
904 		kmemdup(&nip_broadcast_entry_template,
905 			sizeof(*net->newip.nip_broadcast_entry),
906 						 GFP_KERNEL);
907 	if (!net->newip.nip_broadcast_entry)
908 		goto out_nip_null_entry;
909 	net->newip.nip_broadcast_entry->dst.ops = &net->newip.nip_dst_ops;
910 	dst_init_metrics(&net->newip.nip_broadcast_entry->dst, dst_default_metrics.metrics, true);
911 	ret = 0;
912 out:
913 	return ret;
914 
915 out_nip_null_entry:
916 	kfree(net->newip.nip_null_entry);
917 out_nip_dst_entries:
918 	dst_entries_destroy(&net->newip.nip_dst_ops);
919 	goto out;
920 }
921 
nip_route_net_exit(struct net * net)922 static void __net_exit nip_route_net_exit(struct net *net)
923 {
924 	kfree(net->newip.nip_broadcast_entry);
925 	kfree(net->newip.nip_null_entry);
926 	dst_entries_destroy(&net->newip.nip_dst_ops);
927 }
928 
929 static struct pernet_operations nip_route_net_ops = {
930 	.init = nip_route_net_init,
931 	.exit = nip_route_net_exit,
932 };
933 
nip_route_dev_notify(struct notifier_block * this,unsigned long event,void * ptr)934 static int nip_route_dev_notify(struct notifier_block *this,
935 				unsigned long event, void *ptr)
936 {
937 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
938 	struct net *net = dev_net(dev);
939 
940 	if (!(dev->flags & IFF_LOOPBACK))
941 		return NOTIFY_OK;
942 
943 	if (event == NETDEV_REGISTER) {
944 		net->newip.nip_null_entry->dst.dev = dev;
945 		net->newip.nip_null_entry->rt_idev = nin_dev_get(dev);
946 
947 		net->newip.nip_broadcast_entry->dst.dev = dev;
948 		net->newip.nip_broadcast_entry->rt_idev = nin_dev_get(dev);
949 	} else if (event == NETDEV_UNREGISTER &&
950 		   dev->reg_state != NETREG_UNREGISTERED) {
951 		nin_dev_put_clear(&net->newip.nip_null_entry->rt_idev);
952 		nin_dev_put_clear(&net->newip.nip_broadcast_entry->rt_idev);
953 	}
954 
955 	return NOTIFY_OK;
956 }
957 
seq_printf_nipaddr_to_proc(struct seq_file * seq,struct nip_addr * addr)958 static void seq_printf_nipaddr_to_proc(struct seq_file *seq,
959 				       struct nip_addr *addr)
960 {
961 	int i = 0;
962 
963 	for (i = 0; i < addr->bitlen / NIP_ADDR_BIT_LEN_8; i++)
964 		seq_printf(seq, "%02x", addr->NIP_ADDR_FIELD8[i]);
965 
966 	seq_puts(seq, "\t");
967 }
968 
nip_route_show_table(struct seq_file * seq,struct nip_fib_table * table)969 static void nip_route_show_table(struct seq_file *seq,
970 				 struct nip_fib_table *table)
971 {
972 	struct nip_fib_node *fn;
973 	int i;
974 
975 	rcu_read_lock_bh();
976 	for (i = 0; i < NIN_ROUTE_HSIZE; i++) {
977 		hlist_for_each_entry_rcu(fn, &table->nip_tb_head[i],
978 					 fib_hlist) {
979 			struct nip_rt_info *rt = fn->nip_route_info;
980 
981 			seq_printf_nipaddr_to_proc(seq, &rt->rt_dst);
982 			seq_printf_nipaddr_to_proc(seq, &rt->gateway);
983 			seq_printf(seq, "%4u %4s\n", rt->rt_flags,
984 				   rt->dst.dev ? rt->dst.dev->name : "");
985 		}
986 	}
987 	rcu_read_unlock_bh();
988 }
989 
nip_route_proc_show(struct seq_file * seq,void * v)990 static int nip_route_proc_show(struct seq_file *seq, void *v)
991 {
992 	struct net *net = seq->private;
993 
994 	nip_route_show_table(seq, net->newip.nip_fib_main_tbl);
995 	nip_route_show_table(seq, net->newip.nip_fib_local_tbl);
996 
997 	return 0;
998 }
999 
nip_route_net_init_late(struct net * net)1000 static int __net_init nip_route_net_init_late(struct net *net)
1001 {
1002 	proc_create_net_single("nip_route", 0444, net->proc_net,
1003 			       nip_route_proc_show, NULL);
1004 	return 0;
1005 }
1006 
nip_route_net_exit_late(struct net * net)1007 static void __net_exit nip_route_net_exit_late(struct net *net)
1008 {
1009 	remove_proc_entry("nip_route", net->proc_net);
1010 }
1011 
1012 static struct pernet_operations nip_route_net_late_ops = {
1013 	.init = nip_route_net_init_late,
1014 	.exit = nip_route_net_exit_late,
1015 };
1016 
1017 static struct notifier_block nip_route_dev_notifier = {
1018 	.notifier_call = nip_route_dev_notify,
1019 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
1020 };
1021 
nip_route_init(void)1022 int __init nip_route_init(void)
1023 {
1024 	int ret;
1025 
1026 	ret = -ENOMEM;
1027 
1028 	nip_dst_ops_template.kmem_cachep =
1029 	    kmem_cache_create("nip_dst_cache", sizeof(struct nip_rt_info), 0,
1030 			      SLAB_HWCACHE_ALIGN, NULL);
1031 	if (!nip_dst_ops_template.kmem_cachep)
1032 		goto out;
1033 
1034 	ret = register_pernet_subsys(&nip_route_net_ops);
1035 	if (ret)
1036 		goto out_kmem_cache;
1037 
1038 	ret = nip_fib_init();
1039 	if (ret)
1040 		goto out_register_subsys;
1041 
1042 	ret = register_pernet_subsys(&nip_route_net_late_ops);
1043 	if (ret)
1044 		goto out_nip_fib_init;
1045 
1046 	ret = register_netdevice_notifier(&nip_route_dev_notifier);
1047 	if (ret)
1048 		goto out_register_late_subsys;
1049 
1050 out:
1051 	return ret;
1052 
1053 out_register_late_subsys:
1054 	unregister_pernet_subsys(&nip_route_net_late_ops);
1055 out_nip_fib_init:
1056 	nip_fib_gc_cleanup();
1057 out_register_subsys:
1058 	unregister_pernet_subsys(&nip_route_net_ops);
1059 out_kmem_cache:
1060 	kmem_cache_destroy(nip_dst_ops_template.kmem_cachep);
1061 	goto out;
1062 }
1063 
nip_route_cleanup(void)1064 void nip_route_cleanup(void)
1065 {
1066 	unregister_pernet_subsys(&nip_route_net_late_ops);
1067 	nip_fib_gc_cleanup();
1068 	unregister_pernet_subsys(&nip_route_net_ops);
1069 	kmem_cache_destroy(nip_dst_ops_template.kmem_cachep);
1070 }
1071 
1072