• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * INET		An implementation of the TCP/IP protocol suite for the LINUX
3   *		operating system.  INET is implemented using the  BSD Socket
4   *		interface as the means of communication with the user level.
5   *
6   *		IPv4 Forwarding Information Base: FIB frontend.
7   *
8   * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9   *
10   *		This program is free software; you can redistribute it and/or
11   *		modify it under the terms of the GNU General Public License
12   *		as published by the Free Software Foundation; either version
13   *		2 of the License, or (at your option) any later version.
14   */
15  
16  #include <linux/module.h>
17  #include <asm/uaccess.h>
18  #include <linux/bitops.h>
19  #include <linux/capability.h>
20  #include <linux/types.h>
21  #include <linux/kernel.h>
22  #include <linux/mm.h>
23  #include <linux/string.h>
24  #include <linux/socket.h>
25  #include <linux/sockios.h>
26  #include <linux/errno.h>
27  #include <linux/in.h>
28  #include <linux/inet.h>
29  #include <linux/inetdevice.h>
30  #include <linux/netdevice.h>
31  #include <linux/if_addr.h>
32  #include <linux/if_arp.h>
33  #include <linux/skbuff.h>
34  #include <linux/cache.h>
35  #include <linux/init.h>
36  #include <linux/list.h>
37  #include <linux/slab.h>
38  
39  #include <net/ip.h>
40  #include <net/protocol.h>
41  #include <net/route.h>
42  #include <net/tcp.h>
43  #include <net/sock.h>
44  #include <net/arp.h>
45  #include <net/ip_fib.h>
46  #include <net/rtnetlink.h>
47  #include <net/xfrm.h>
48  #include <net/l3mdev.h>
49  #include <net/lwtunnel.h>
50  #include <trace/events/fib.h>
51  
52  #ifndef CONFIG_IP_MULTIPLE_TABLES
53  
fib4_rules_init(struct net * net)54  static int __net_init fib4_rules_init(struct net *net)
55  {
56  	struct fib_table *local_table, *main_table;
57  
58  	main_table  = fib_trie_table(RT_TABLE_MAIN, NULL);
59  	if (!main_table)
60  		return -ENOMEM;
61  
62  	local_table = fib_trie_table(RT_TABLE_LOCAL, main_table);
63  	if (!local_table)
64  		goto fail;
65  
66  	hlist_add_head_rcu(&local_table->tb_hlist,
67  				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
68  	hlist_add_head_rcu(&main_table->tb_hlist,
69  				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
70  	return 0;
71  
72  fail:
73  	fib_free_table(main_table);
74  	return -ENOMEM;
75  }
76  #else
77  
fib_new_table(struct net * net,u32 id)78  struct fib_table *fib_new_table(struct net *net, u32 id)
79  {
80  	struct fib_table *tb, *alias = NULL;
81  	unsigned int h;
82  
83  	if (id == 0)
84  		id = RT_TABLE_MAIN;
85  	tb = fib_get_table(net, id);
86  	if (tb)
87  		return tb;
88  
89  	if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules)
90  		alias = fib_new_table(net, RT_TABLE_MAIN);
91  
92  	tb = fib_trie_table(id, alias);
93  	if (!tb)
94  		return NULL;
95  
96  	switch (id) {
97  	case RT_TABLE_MAIN:
98  		rcu_assign_pointer(net->ipv4.fib_main, tb);
99  		break;
100  	case RT_TABLE_DEFAULT:
101  		rcu_assign_pointer(net->ipv4.fib_default, tb);
102  		break;
103  	default:
104  		break;
105  	}
106  
107  	h = id & (FIB_TABLE_HASHSZ - 1);
108  	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
109  	return tb;
110  }
111  EXPORT_SYMBOL_GPL(fib_new_table);
112  
113  /* caller must hold either rtnl or rcu read lock */
fib_get_table(struct net * net,u32 id)114  struct fib_table *fib_get_table(struct net *net, u32 id)
115  {
116  	struct fib_table *tb;
117  	struct hlist_head *head;
118  	unsigned int h;
119  
120  	if (id == 0)
121  		id = RT_TABLE_MAIN;
122  	h = id & (FIB_TABLE_HASHSZ - 1);
123  
124  	head = &net->ipv4.fib_table_hash[h];
125  	hlist_for_each_entry_rcu(tb, head, tb_hlist) {
126  		if (tb->tb_id == id)
127  			return tb;
128  	}
129  	return NULL;
130  }
131  #endif /* CONFIG_IP_MULTIPLE_TABLES */
132  
fib_replace_table(struct net * net,struct fib_table * old,struct fib_table * new)133  static void fib_replace_table(struct net *net, struct fib_table *old,
134  			      struct fib_table *new)
135  {
136  #ifdef CONFIG_IP_MULTIPLE_TABLES
137  	switch (new->tb_id) {
138  	case RT_TABLE_MAIN:
139  		rcu_assign_pointer(net->ipv4.fib_main, new);
140  		break;
141  	case RT_TABLE_DEFAULT:
142  		rcu_assign_pointer(net->ipv4.fib_default, new);
143  		break;
144  	default:
145  		break;
146  	}
147  
148  #endif
149  	/* replace the old table in the hlist */
150  	hlist_replace_rcu(&old->tb_hlist, &new->tb_hlist);
151  }
152  
fib_unmerge(struct net * net)153  int fib_unmerge(struct net *net)
154  {
155  	struct fib_table *old, *new, *main_table;
156  
157  	/* attempt to fetch local table if it has been allocated */
158  	old = fib_get_table(net, RT_TABLE_LOCAL);
159  	if (!old)
160  		return 0;
161  
162  	new = fib_trie_unmerge(old);
163  	if (!new)
164  		return -ENOMEM;
165  
166  	/* table is already unmerged */
167  	if (new == old)
168  		return 0;
169  
170  	/* replace merged table with clean table */
171  	fib_replace_table(net, old, new);
172  	fib_free_table(old);
173  
174  	/* attempt to fetch main table if it has been allocated */
175  	main_table = fib_get_table(net, RT_TABLE_MAIN);
176  	if (!main_table)
177  		return 0;
178  
179  	/* flush local entries from main table */
180  	fib_table_flush_external(main_table);
181  
182  	return 0;
183  }
184  
fib_flush(struct net * net)185  static void fib_flush(struct net *net)
186  {
187  	int flushed = 0;
188  	unsigned int h;
189  
190  	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
191  		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
192  		struct hlist_node *tmp;
193  		struct fib_table *tb;
194  
195  		hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
196  			flushed += fib_table_flush(net, tb);
197  	}
198  
199  	if (flushed)
200  		rt_cache_flush(net);
201  }
202  
203  /*
204   * Find address type as if only "dev" was present in the system. If
205   * on_dev is NULL then all interfaces are taken into consideration.
206   */
__inet_dev_addr_type(struct net * net,const struct net_device * dev,__be32 addr,u32 tb_id)207  static inline unsigned int __inet_dev_addr_type(struct net *net,
208  						const struct net_device *dev,
209  						__be32 addr, u32 tb_id)
210  {
211  	struct flowi4		fl4 = { .daddr = addr };
212  	struct fib_result	res;
213  	unsigned int ret = RTN_BROADCAST;
214  	struct fib_table *table;
215  
216  	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
217  		return RTN_BROADCAST;
218  	if (ipv4_is_multicast(addr))
219  		return RTN_MULTICAST;
220  
221  	rcu_read_lock();
222  
223  	table = fib_get_table(net, tb_id);
224  	if (table) {
225  		ret = RTN_UNICAST;
226  		if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
227  			if (!dev || dev == res.fi->fib_dev)
228  				ret = res.type;
229  		}
230  	}
231  
232  	rcu_read_unlock();
233  	return ret;
234  }
235  
inet_addr_type_table(struct net * net,__be32 addr,u32 tb_id)236  unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id)
237  {
238  	return __inet_dev_addr_type(net, NULL, addr, tb_id);
239  }
240  EXPORT_SYMBOL(inet_addr_type_table);
241  
inet_addr_type(struct net * net,__be32 addr)242  unsigned int inet_addr_type(struct net *net, __be32 addr)
243  {
244  	return __inet_dev_addr_type(net, NULL, addr, RT_TABLE_LOCAL);
245  }
246  EXPORT_SYMBOL(inet_addr_type);
247  
inet_dev_addr_type(struct net * net,const struct net_device * dev,__be32 addr)248  unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
249  				__be32 addr)
250  {
251  	u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
252  
253  	return __inet_dev_addr_type(net, dev, addr, rt_table);
254  }
255  EXPORT_SYMBOL(inet_dev_addr_type);
256  
257  /* inet_addr_type with dev == NULL but using the table from a dev
258   * if one is associated
259   */
inet_addr_type_dev_table(struct net * net,const struct net_device * dev,__be32 addr)260  unsigned int inet_addr_type_dev_table(struct net *net,
261  				      const struct net_device *dev,
262  				      __be32 addr)
263  {
264  	u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
265  
266  	return __inet_dev_addr_type(net, NULL, addr, rt_table);
267  }
268  EXPORT_SYMBOL(inet_addr_type_dev_table);
269  
fib_compute_spec_dst(struct sk_buff * skb)270  __be32 fib_compute_spec_dst(struct sk_buff *skb)
271  {
272  	struct net_device *dev = skb->dev;
273  	struct in_device *in_dev;
274  	struct fib_result res;
275  	struct rtable *rt;
276  	struct net *net;
277  	int scope;
278  
279  	rt = skb_rtable(skb);
280  	if ((rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL)) ==
281  	    RTCF_LOCAL)
282  		return ip_hdr(skb)->daddr;
283  
284  	in_dev = __in_dev_get_rcu(dev);
285  	BUG_ON(!in_dev);
286  
287  	net = dev_net(dev);
288  
289  	scope = RT_SCOPE_UNIVERSE;
290  	if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
291  		struct flowi4 fl4 = {
292  			.flowi4_iif = LOOPBACK_IFINDEX,
293  			.daddr = ip_hdr(skb)->saddr,
294  			.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
295  			.flowi4_scope = scope,
296  			.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
297  		};
298  		if (!fib_lookup(net, &fl4, &res, 0))
299  			return FIB_RES_PREFSRC(net, res);
300  	} else {
301  		scope = RT_SCOPE_LINK;
302  	}
303  
304  	return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
305  }
306  
307  /* Given (packet source, input interface) and optional (dst, oif, tos):
308   * - (main) check, that source is valid i.e. not broadcast or our local
309   *   address.
310   * - figure out what "logical" interface this packet arrived
311   *   and calculate "specific destination" address.
312   * - check, that packet arrived from expected physical interface.
313   * called with rcu_read_lock()
314   */
__fib_validate_source(struct sk_buff * skb,__be32 src,__be32 dst,u8 tos,int oif,struct net_device * dev,int rpf,struct in_device * idev,u32 * itag)315  static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
316  				 u8 tos, int oif, struct net_device *dev,
317  				 int rpf, struct in_device *idev, u32 *itag)
318  {
319  	int ret, no_addr;
320  	struct fib_result res;
321  	struct flowi4 fl4;
322  	struct net *net = dev_net(dev);
323  	bool dev_match;
324  
325  	fl4.flowi4_oif = 0;
326  	fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
327  	if (!fl4.flowi4_iif)
328  		fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
329  	fl4.daddr = src;
330  	fl4.saddr = dst;
331  	fl4.flowi4_tos = tos;
332  	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
333  	fl4.flowi4_tun_key.tun_id = 0;
334  	fl4.flowi4_flags = 0;
335  	fl4.flowi4_uid = sock_net_uid(net, NULL);
336  
337  	no_addr = idev->ifa_list == NULL;
338  
339  	fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
340  
341  	trace_fib_validate_source(dev, &fl4);
342  
343  	if (fib_lookup(net, &fl4, &res, 0))
344  		goto last_resort;
345  	if (res.type != RTN_UNICAST &&
346  	    (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
347  		goto e_inval;
348  	if (!rpf && !fib_num_tclassid_users(net) &&
349  	    (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
350  		goto last_resort;
351  	fib_combine_itag(itag, &res);
352  	dev_match = false;
353  
354  #ifdef CONFIG_IP_ROUTE_MULTIPATH
355  	for (ret = 0; ret < res.fi->fib_nhs; ret++) {
356  		struct fib_nh *nh = &res.fi->fib_nh[ret];
357  
358  		if (nh->nh_dev == dev) {
359  			dev_match = true;
360  			break;
361  		} else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
362  			dev_match = true;
363  			break;
364  		}
365  	}
366  #else
367  	if (FIB_RES_DEV(res) == dev)
368  		dev_match = true;
369  #endif
370  	if (dev_match) {
371  		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
372  		return ret;
373  	}
374  	if (no_addr)
375  		goto last_resort;
376  	if (rpf == 1)
377  		goto e_rpf;
378  	fl4.flowi4_oif = dev->ifindex;
379  
380  	ret = 0;
381  	if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
382  		if (res.type == RTN_UNICAST)
383  			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
384  	}
385  	return ret;
386  
387  last_resort:
388  	if (rpf)
389  		goto e_rpf;
390  	*itag = 0;
391  	return 0;
392  
393  e_inval:
394  	return -EINVAL;
395  e_rpf:
396  	return -EXDEV;
397  }
398  
399  /* Ignore rp_filter for packets protected by IPsec. */
fib_validate_source(struct sk_buff * skb,__be32 src,__be32 dst,u8 tos,int oif,struct net_device * dev,struct in_device * idev,u32 * itag)400  int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
401  			u8 tos, int oif, struct net_device *dev,
402  			struct in_device *idev, u32 *itag)
403  {
404  	int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
405  
406  	if (!r && !fib_num_tclassid_users(dev_net(dev)) &&
407  	    IN_DEV_ACCEPT_LOCAL(idev) &&
408  	    (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
409  		*itag = 0;
410  		return 0;
411  	}
412  	return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
413  }
414  
sk_extract_addr(struct sockaddr * addr)415  static inline __be32 sk_extract_addr(struct sockaddr *addr)
416  {
417  	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
418  }
419  
put_rtax(struct nlattr * mx,int len,int type,u32 value)420  static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
421  {
422  	struct nlattr *nla;
423  
424  	nla = (struct nlattr *) ((char *) mx + len);
425  	nla->nla_type = type;
426  	nla->nla_len = nla_attr_size(4);
427  	*(u32 *) nla_data(nla) = value;
428  
429  	return len + nla_total_size(4);
430  }
431  
rtentry_to_fib_config(struct net * net,int cmd,struct rtentry * rt,struct fib_config * cfg)432  static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
433  				 struct fib_config *cfg)
434  {
435  	__be32 addr;
436  	int plen;
437  
438  	memset(cfg, 0, sizeof(*cfg));
439  	cfg->fc_nlinfo.nl_net = net;
440  
441  	if (rt->rt_dst.sa_family != AF_INET)
442  		return -EAFNOSUPPORT;
443  
444  	/*
445  	 * Check mask for validity:
446  	 * a) it must be contiguous.
447  	 * b) destination must have all host bits clear.
448  	 * c) if application forgot to set correct family (AF_INET),
449  	 *    reject request unless it is absolutely clear i.e.
450  	 *    both family and mask are zero.
451  	 */
452  	plen = 32;
453  	addr = sk_extract_addr(&rt->rt_dst);
454  	if (!(rt->rt_flags & RTF_HOST)) {
455  		__be32 mask = sk_extract_addr(&rt->rt_genmask);
456  
457  		if (rt->rt_genmask.sa_family != AF_INET) {
458  			if (mask || rt->rt_genmask.sa_family)
459  				return -EAFNOSUPPORT;
460  		}
461  
462  		if (bad_mask(mask, addr))
463  			return -EINVAL;
464  
465  		plen = inet_mask_len(mask);
466  	}
467  
468  	cfg->fc_dst_len = plen;
469  	cfg->fc_dst = addr;
470  
471  	if (cmd != SIOCDELRT) {
472  		cfg->fc_nlflags = NLM_F_CREATE;
473  		cfg->fc_protocol = RTPROT_BOOT;
474  	}
475  
476  	if (rt->rt_metric)
477  		cfg->fc_priority = rt->rt_metric - 1;
478  
479  	if (rt->rt_flags & RTF_REJECT) {
480  		cfg->fc_scope = RT_SCOPE_HOST;
481  		cfg->fc_type = RTN_UNREACHABLE;
482  		return 0;
483  	}
484  
485  	cfg->fc_scope = RT_SCOPE_NOWHERE;
486  	cfg->fc_type = RTN_UNICAST;
487  
488  	if (rt->rt_dev) {
489  		char *colon;
490  		struct net_device *dev;
491  		char devname[IFNAMSIZ];
492  
493  		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
494  			return -EFAULT;
495  
496  		devname[IFNAMSIZ-1] = 0;
497  		colon = strchr(devname, ':');
498  		if (colon)
499  			*colon = 0;
500  		dev = __dev_get_by_name(net, devname);
501  		if (!dev)
502  			return -ENODEV;
503  		cfg->fc_oif = dev->ifindex;
504  		cfg->fc_table = l3mdev_fib_table(dev);
505  		if (colon) {
506  			struct in_ifaddr *ifa;
507  			struct in_device *in_dev = __in_dev_get_rtnl(dev);
508  			if (!in_dev)
509  				return -ENODEV;
510  			*colon = ':';
511  			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
512  				if (strcmp(ifa->ifa_label, devname) == 0)
513  					break;
514  			if (!ifa)
515  				return -ENODEV;
516  			cfg->fc_prefsrc = ifa->ifa_local;
517  		}
518  	}
519  
520  	addr = sk_extract_addr(&rt->rt_gateway);
521  	if (rt->rt_gateway.sa_family == AF_INET && addr) {
522  		unsigned int addr_type;
523  
524  		cfg->fc_gw = addr;
525  		addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
526  		if (rt->rt_flags & RTF_GATEWAY &&
527  		    addr_type == RTN_UNICAST)
528  			cfg->fc_scope = RT_SCOPE_UNIVERSE;
529  	}
530  
531  	if (cmd == SIOCDELRT)
532  		return 0;
533  
534  	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
535  		return -EINVAL;
536  
537  	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
538  		cfg->fc_scope = RT_SCOPE_LINK;
539  
540  	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
541  		struct nlattr *mx;
542  		int len = 0;
543  
544  		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
545  		if (!mx)
546  			return -ENOMEM;
547  
548  		if (rt->rt_flags & RTF_MTU)
549  			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
550  
551  		if (rt->rt_flags & RTF_WINDOW)
552  			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
553  
554  		if (rt->rt_flags & RTF_IRTT)
555  			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
556  
557  		cfg->fc_mx = mx;
558  		cfg->fc_mx_len = len;
559  	}
560  
561  	return 0;
562  }
563  
564  /*
565   * Handle IP routing ioctl calls.
566   * These are used to manipulate the routing tables
567   */
ip_rt_ioctl(struct net * net,unsigned int cmd,void __user * arg)568  int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
569  {
570  	struct fib_config cfg;
571  	struct rtentry rt;
572  	int err;
573  
574  	switch (cmd) {
575  	case SIOCADDRT:		/* Add a route */
576  	case SIOCDELRT:		/* Delete a route */
577  		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
578  			return -EPERM;
579  
580  		if (copy_from_user(&rt, arg, sizeof(rt)))
581  			return -EFAULT;
582  
583  		rtnl_lock();
584  		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
585  		if (err == 0) {
586  			struct fib_table *tb;
587  
588  			if (cmd == SIOCDELRT) {
589  				tb = fib_get_table(net, cfg.fc_table);
590  				if (tb)
591  					err = fib_table_delete(net, tb, &cfg);
592  				else
593  					err = -ESRCH;
594  			} else {
595  				tb = fib_new_table(net, cfg.fc_table);
596  				if (tb)
597  					err = fib_table_insert(net, tb, &cfg);
598  				else
599  					err = -ENOBUFS;
600  			}
601  
602  			/* allocated by rtentry_to_fib_config() */
603  			kfree(cfg.fc_mx);
604  		}
605  		rtnl_unlock();
606  		return err;
607  	}
608  	return -EINVAL;
609  }
610  
611  const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
612  	[RTA_DST]		= { .type = NLA_U32 },
613  	[RTA_SRC]		= { .type = NLA_U32 },
614  	[RTA_IIF]		= { .type = NLA_U32 },
615  	[RTA_OIF]		= { .type = NLA_U32 },
616  	[RTA_GATEWAY]		= { .type = NLA_U32 },
617  	[RTA_PRIORITY]		= { .type = NLA_U32 },
618  	[RTA_PREFSRC]		= { .type = NLA_U32 },
619  	[RTA_METRICS]		= { .type = NLA_NESTED },
620  	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
621  	[RTA_FLOW]		= { .type = NLA_U32 },
622  	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
623  	[RTA_ENCAP]		= { .type = NLA_NESTED },
624  	[RTA_UID]		= { .type = NLA_U32 },
625  };
626  
rtm_to_fib_config(struct net * net,struct sk_buff * skb,struct nlmsghdr * nlh,struct fib_config * cfg)627  static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
628  			     struct nlmsghdr *nlh, struct fib_config *cfg)
629  {
630  	struct nlattr *attr;
631  	int err, remaining;
632  	struct rtmsg *rtm;
633  
634  	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
635  	if (err < 0)
636  		goto errout;
637  
638  	memset(cfg, 0, sizeof(*cfg));
639  
640  	rtm = nlmsg_data(nlh);
641  	cfg->fc_dst_len = rtm->rtm_dst_len;
642  	cfg->fc_tos = rtm->rtm_tos;
643  	cfg->fc_table = rtm->rtm_table;
644  	cfg->fc_protocol = rtm->rtm_protocol;
645  	cfg->fc_scope = rtm->rtm_scope;
646  	cfg->fc_type = rtm->rtm_type;
647  	cfg->fc_flags = rtm->rtm_flags;
648  	cfg->fc_nlflags = nlh->nlmsg_flags;
649  
650  	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
651  	cfg->fc_nlinfo.nlh = nlh;
652  	cfg->fc_nlinfo.nl_net = net;
653  
654  	if (cfg->fc_type > RTN_MAX) {
655  		err = -EINVAL;
656  		goto errout;
657  	}
658  
659  	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
660  		switch (nla_type(attr)) {
661  		case RTA_DST:
662  			cfg->fc_dst = nla_get_be32(attr);
663  			break;
664  		case RTA_OIF:
665  			cfg->fc_oif = nla_get_u32(attr);
666  			break;
667  		case RTA_GATEWAY:
668  			cfg->fc_gw = nla_get_be32(attr);
669  			break;
670  		case RTA_PRIORITY:
671  			cfg->fc_priority = nla_get_u32(attr);
672  			break;
673  		case RTA_PREFSRC:
674  			cfg->fc_prefsrc = nla_get_be32(attr);
675  			break;
676  		case RTA_METRICS:
677  			cfg->fc_mx = nla_data(attr);
678  			cfg->fc_mx_len = nla_len(attr);
679  			break;
680  		case RTA_MULTIPATH:
681  			err = lwtunnel_valid_encap_type_attr(nla_data(attr),
682  							     nla_len(attr));
683  			if (err < 0)
684  				goto errout;
685  			cfg->fc_mp = nla_data(attr);
686  			cfg->fc_mp_len = nla_len(attr);
687  			break;
688  		case RTA_FLOW:
689  			cfg->fc_flow = nla_get_u32(attr);
690  			break;
691  		case RTA_TABLE:
692  			cfg->fc_table = nla_get_u32(attr);
693  			break;
694  		case RTA_ENCAP:
695  			cfg->fc_encap = attr;
696  			break;
697  		case RTA_ENCAP_TYPE:
698  			cfg->fc_encap_type = nla_get_u16(attr);
699  			err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
700  			if (err < 0)
701  				goto errout;
702  			break;
703  		}
704  	}
705  
706  	return 0;
707  errout:
708  	return err;
709  }
710  
inet_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh)711  static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
712  {
713  	struct net *net = sock_net(skb->sk);
714  	struct fib_config cfg;
715  	struct fib_table *tb;
716  	int err;
717  
718  	err = rtm_to_fib_config(net, skb, nlh, &cfg);
719  	if (err < 0)
720  		goto errout;
721  
722  	tb = fib_get_table(net, cfg.fc_table);
723  	if (!tb) {
724  		err = -ESRCH;
725  		goto errout;
726  	}
727  
728  	err = fib_table_delete(net, tb, &cfg);
729  errout:
730  	return err;
731  }
732  
inet_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh)733  static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
734  {
735  	struct net *net = sock_net(skb->sk);
736  	struct fib_config cfg;
737  	struct fib_table *tb;
738  	int err;
739  
740  	err = rtm_to_fib_config(net, skb, nlh, &cfg);
741  	if (err < 0)
742  		goto errout;
743  
744  	tb = fib_new_table(net, cfg.fc_table);
745  	if (!tb) {
746  		err = -ENOBUFS;
747  		goto errout;
748  	}
749  
750  	err = fib_table_insert(net, tb, &cfg);
751  errout:
752  	return err;
753  }
754  
inet_dump_fib(struct sk_buff * skb,struct netlink_callback * cb)755  static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
756  {
757  	struct net *net = sock_net(skb->sk);
758  	unsigned int h, s_h;
759  	unsigned int e = 0, s_e;
760  	struct fib_table *tb;
761  	struct hlist_head *head;
762  	int dumped = 0, err;
763  
764  	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
765  	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
766  		return skb->len;
767  
768  	s_h = cb->args[0];
769  	s_e = cb->args[1];
770  
771  	rcu_read_lock();
772  
773  	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
774  		e = 0;
775  		head = &net->ipv4.fib_table_hash[h];
776  		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
777  			if (e < s_e)
778  				goto next;
779  			if (dumped)
780  				memset(&cb->args[2], 0, sizeof(cb->args) -
781  						 2 * sizeof(cb->args[0]));
782  			err = fib_table_dump(tb, skb, cb);
783  			if (err < 0) {
784  				if (likely(skb->len))
785  					goto out;
786  
787  				goto out_err;
788  			}
789  			dumped = 1;
790  next:
791  			e++;
792  		}
793  	}
794  out:
795  	err = skb->len;
796  out_err:
797  	rcu_read_unlock();
798  
799  	cb->args[1] = e;
800  	cb->args[0] = h;
801  
802  	return err;
803  }
804  
805  /* Prepare and feed intra-kernel routing request.
806   * Really, it should be netlink message, but :-( netlink
807   * can be not configured, so that we feed it directly
808   * to fib engine. It is legal, because all events occur
809   * only when netlink is already locked.
810   */
fib_magic(int cmd,int type,__be32 dst,int dst_len,struct in_ifaddr * ifa)811  static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
812  {
813  	struct net *net = dev_net(ifa->ifa_dev->dev);
814  	u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
815  	struct fib_table *tb;
816  	struct fib_config cfg = {
817  		.fc_protocol = RTPROT_KERNEL,
818  		.fc_type = type,
819  		.fc_dst = dst,
820  		.fc_dst_len = dst_len,
821  		.fc_prefsrc = ifa->ifa_local,
822  		.fc_oif = ifa->ifa_dev->dev->ifindex,
823  		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
824  		.fc_nlinfo = {
825  			.nl_net = net,
826  		},
827  	};
828  
829  	if (!tb_id)
830  		tb_id = (type == RTN_UNICAST) ? RT_TABLE_MAIN : RT_TABLE_LOCAL;
831  
832  	tb = fib_new_table(net, tb_id);
833  	if (!tb)
834  		return;
835  
836  	cfg.fc_table = tb->tb_id;
837  
838  	if (type != RTN_LOCAL)
839  		cfg.fc_scope = RT_SCOPE_LINK;
840  	else
841  		cfg.fc_scope = RT_SCOPE_HOST;
842  
843  	if (cmd == RTM_NEWROUTE)
844  		fib_table_insert(net, tb, &cfg);
845  	else
846  		fib_table_delete(net, tb, &cfg);
847  }
848  
fib_add_ifaddr(struct in_ifaddr * ifa)849  void fib_add_ifaddr(struct in_ifaddr *ifa)
850  {
851  	struct in_device *in_dev = ifa->ifa_dev;
852  	struct net_device *dev = in_dev->dev;
853  	struct in_ifaddr *prim = ifa;
854  	__be32 mask = ifa->ifa_mask;
855  	__be32 addr = ifa->ifa_local;
856  	__be32 prefix = ifa->ifa_address & mask;
857  
858  	if (ifa->ifa_flags & IFA_F_SECONDARY) {
859  		prim = inet_ifa_byprefix(in_dev, prefix, mask);
860  		if (!prim) {
861  			pr_warn("%s: bug: prim == NULL\n", __func__);
862  			return;
863  		}
864  	}
865  
866  	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
867  
868  	if (!(dev->flags & IFF_UP))
869  		return;
870  
871  	/* Add broadcast address, if it is explicitly assigned. */
872  	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
873  		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
874  
875  	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
876  	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
877  		if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
878  			fib_magic(RTM_NEWROUTE,
879  				  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
880  				  prefix, ifa->ifa_prefixlen, prim);
881  
882  		/* Add network specific broadcasts, when it takes a sense */
883  		if (ifa->ifa_prefixlen < 31) {
884  			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
885  			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
886  				  32, prim);
887  		}
888  	}
889  }
890  
891  /* Delete primary or secondary address.
892   * Optionally, on secondary address promotion consider the addresses
893   * from subnet iprim as deleted, even if they are in device list.
894   * In this case the secondary ifa can be in device list.
895   */
fib_del_ifaddr(struct in_ifaddr * ifa,struct in_ifaddr * iprim)896  void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
897  {
898  	struct in_device *in_dev = ifa->ifa_dev;
899  	struct net_device *dev = in_dev->dev;
900  	struct in_ifaddr *ifa1;
901  	struct in_ifaddr *prim = ifa, *prim1 = NULL;
902  	__be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
903  	__be32 any = ifa->ifa_address & ifa->ifa_mask;
904  #define LOCAL_OK	1
905  #define BRD_OK		2
906  #define BRD0_OK		4
907  #define BRD1_OK		8
908  	unsigned int ok = 0;
909  	int subnet = 0;		/* Primary network */
910  	int gone = 1;		/* Address is missing */
911  	int same_prefsrc = 0;	/* Another primary with same IP */
912  
913  	if (ifa->ifa_flags & IFA_F_SECONDARY) {
914  		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
915  		if (!prim) {
916  			/* if the device has been deleted, we don't perform
917  			 * address promotion
918  			 */
919  			if (!in_dev->dead)
920  				pr_warn("%s: bug: prim == NULL\n", __func__);
921  			return;
922  		}
923  		if (iprim && iprim != prim) {
924  			pr_warn("%s: bug: iprim != prim\n", __func__);
925  			return;
926  		}
927  	} else if (!ipv4_is_zeronet(any) &&
928  		   (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
929  		if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
930  			fib_magic(RTM_DELROUTE,
931  				  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
932  				  any, ifa->ifa_prefixlen, prim);
933  		subnet = 1;
934  	}
935  
936  	if (in_dev->dead)
937  		goto no_promotions;
938  
939  	/* Deletion is more complicated than add.
940  	 * We should take care of not to delete too much :-)
941  	 *
942  	 * Scan address list to be sure that addresses are really gone.
943  	 */
944  
945  	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
946  		if (ifa1 == ifa) {
947  			/* promotion, keep the IP */
948  			gone = 0;
949  			continue;
950  		}
951  		/* Ignore IFAs from our subnet */
952  		if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
953  		    inet_ifa_match(ifa1->ifa_address, iprim))
954  			continue;
955  
956  		/* Ignore ifa1 if it uses different primary IP (prefsrc) */
957  		if (ifa1->ifa_flags & IFA_F_SECONDARY) {
958  			/* Another address from our subnet? */
959  			if (ifa1->ifa_mask == prim->ifa_mask &&
960  			    inet_ifa_match(ifa1->ifa_address, prim))
961  				prim1 = prim;
962  			else {
963  				/* We reached the secondaries, so
964  				 * same_prefsrc should be determined.
965  				 */
966  				if (!same_prefsrc)
967  					continue;
968  				/* Search new prim1 if ifa1 is not
969  				 * using the current prim1
970  				 */
971  				if (!prim1 ||
972  				    ifa1->ifa_mask != prim1->ifa_mask ||
973  				    !inet_ifa_match(ifa1->ifa_address, prim1))
974  					prim1 = inet_ifa_byprefix(in_dev,
975  							ifa1->ifa_address,
976  							ifa1->ifa_mask);
977  				if (!prim1)
978  					continue;
979  				if (prim1->ifa_local != prim->ifa_local)
980  					continue;
981  			}
982  		} else {
983  			if (prim->ifa_local != ifa1->ifa_local)
984  				continue;
985  			prim1 = ifa1;
986  			if (prim != prim1)
987  				same_prefsrc = 1;
988  		}
989  		if (ifa->ifa_local == ifa1->ifa_local)
990  			ok |= LOCAL_OK;
991  		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
992  			ok |= BRD_OK;
993  		if (brd == ifa1->ifa_broadcast)
994  			ok |= BRD1_OK;
995  		if (any == ifa1->ifa_broadcast)
996  			ok |= BRD0_OK;
997  		/* primary has network specific broadcasts */
998  		if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
999  			__be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
1000  			__be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
1001  
1002  			if (!ipv4_is_zeronet(any1)) {
1003  				if (ifa->ifa_broadcast == brd1 ||
1004  				    ifa->ifa_broadcast == any1)
1005  					ok |= BRD_OK;
1006  				if (brd == brd1 || brd == any1)
1007  					ok |= BRD1_OK;
1008  				if (any == brd1 || any == any1)
1009  					ok |= BRD0_OK;
1010  			}
1011  		}
1012  	}
1013  
1014  no_promotions:
1015  	if (!(ok & BRD_OK))
1016  		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
1017  	if (subnet && ifa->ifa_prefixlen < 31) {
1018  		if (!(ok & BRD1_OK))
1019  			fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
1020  		if (!(ok & BRD0_OK))
1021  			fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
1022  	}
1023  	if (!(ok & LOCAL_OK)) {
1024  		unsigned int addr_type;
1025  
1026  		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
1027  
1028  		/* Check, that this local address finally disappeared. */
1029  		addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
1030  						     ifa->ifa_local);
1031  		if (gone && addr_type != RTN_LOCAL) {
1032  			/* And the last, but not the least thing.
1033  			 * We must flush stray FIB entries.
1034  			 *
1035  			 * First of all, we scan fib_info list searching
1036  			 * for stray nexthop entries, then ignite fib_flush.
1037  			 */
1038  			if (fib_sync_down_addr(dev, ifa->ifa_local))
1039  				fib_flush(dev_net(dev));
1040  		}
1041  	}
1042  #undef LOCAL_OK
1043  #undef BRD_OK
1044  #undef BRD0_OK
1045  #undef BRD1_OK
1046  }
1047  
nl_fib_lookup(struct net * net,struct fib_result_nl * frn)1048  static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
1049  {
1050  
1051  	struct fib_result       res;
1052  	struct flowi4           fl4 = {
1053  		.flowi4_mark = frn->fl_mark,
1054  		.daddr = frn->fl_addr,
1055  		.flowi4_tos = frn->fl_tos,
1056  		.flowi4_scope = frn->fl_scope,
1057  	};
1058  	struct fib_table *tb;
1059  
1060  	rcu_read_lock();
1061  
1062  	tb = fib_get_table(net, frn->tb_id_in);
1063  
1064  	frn->err = -ENOENT;
1065  	if (tb) {
1066  		local_bh_disable();
1067  
1068  		frn->tb_id = tb->tb_id;
1069  		frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
1070  
1071  		if (!frn->err) {
1072  			frn->prefixlen = res.prefixlen;
1073  			frn->nh_sel = res.nh_sel;
1074  			frn->type = res.type;
1075  			frn->scope = res.scope;
1076  		}
1077  		local_bh_enable();
1078  	}
1079  
1080  	rcu_read_unlock();
1081  }
1082  
nl_fib_input(struct sk_buff * skb)1083  static void nl_fib_input(struct sk_buff *skb)
1084  {
1085  	struct net *net;
1086  	struct fib_result_nl *frn;
1087  	struct nlmsghdr *nlh;
1088  	u32 portid;
1089  
1090  	net = sock_net(skb->sk);
1091  	nlh = nlmsg_hdr(skb);
1092  	if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
1093  	    skb->len < nlh->nlmsg_len ||
1094  	    nlmsg_len(nlh) < sizeof(*frn))
1095  		return;
1096  
1097  	skb = netlink_skb_clone(skb, GFP_KERNEL);
1098  	if (!skb)
1099  		return;
1100  	nlh = nlmsg_hdr(skb);
1101  
1102  	frn = (struct fib_result_nl *) nlmsg_data(nlh);
1103  	nl_fib_lookup(net, frn);
1104  
1105  	portid = NETLINK_CB(skb).portid;      /* netlink portid */
1106  	NETLINK_CB(skb).portid = 0;        /* from kernel */
1107  	NETLINK_CB(skb).dst_group = 0;  /* unicast */
1108  	netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT);
1109  }
1110  
nl_fib_lookup_init(struct net * net)1111  static int __net_init nl_fib_lookup_init(struct net *net)
1112  {
1113  	struct sock *sk;
1114  	struct netlink_kernel_cfg cfg = {
1115  		.input	= nl_fib_input,
1116  	};
1117  
1118  	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg);
1119  	if (!sk)
1120  		return -EAFNOSUPPORT;
1121  	net->ipv4.fibnl = sk;
1122  	return 0;
1123  }
1124  
nl_fib_lookup_exit(struct net * net)1125  static void nl_fib_lookup_exit(struct net *net)
1126  {
1127  	netlink_kernel_release(net->ipv4.fibnl);
1128  	net->ipv4.fibnl = NULL;
1129  }
1130  
fib_disable_ip(struct net_device * dev,unsigned long event,bool force)1131  static void fib_disable_ip(struct net_device *dev, unsigned long event,
1132  			   bool force)
1133  {
1134  	if (fib_sync_down_dev(dev, event, force))
1135  		fib_flush(dev_net(dev));
1136  	rt_cache_flush(dev_net(dev));
1137  	arp_ifdown(dev);
1138  }
1139  
fib_inetaddr_event(struct notifier_block * this,unsigned long event,void * ptr)1140  static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
1141  {
1142  	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
1143  	struct net_device *dev = ifa->ifa_dev->dev;
1144  	struct net *net = dev_net(dev);
1145  
1146  	switch (event) {
1147  	case NETDEV_UP:
1148  		fib_add_ifaddr(ifa);
1149  #ifdef CONFIG_IP_ROUTE_MULTIPATH
1150  		fib_sync_up(dev, RTNH_F_DEAD);
1151  #endif
1152  		atomic_inc(&net->ipv4.dev_addr_genid);
1153  		rt_cache_flush(dev_net(dev));
1154  		break;
1155  	case NETDEV_DOWN:
1156  		fib_del_ifaddr(ifa, NULL);
1157  		atomic_inc(&net->ipv4.dev_addr_genid);
1158  		if (!ifa->ifa_dev->ifa_list) {
1159  			/* Last address was deleted from this interface.
1160  			 * Disable IP.
1161  			 */
1162  			fib_disable_ip(dev, event, true);
1163  		} else {
1164  			rt_cache_flush(dev_net(dev));
1165  		}
1166  		break;
1167  	}
1168  	return NOTIFY_DONE;
1169  }
1170  
fib_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1171  static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1172  {
1173  	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1174  	struct netdev_notifier_changeupper_info *info;
1175  	struct in_device *in_dev;
1176  	struct net *net = dev_net(dev);
1177  	unsigned int flags;
1178  
1179  	if (event == NETDEV_UNREGISTER) {
1180  		fib_disable_ip(dev, event, true);
1181  		rt_flush_dev(dev);
1182  		return NOTIFY_DONE;
1183  	}
1184  
1185  	in_dev = __in_dev_get_rtnl(dev);
1186  	if (!in_dev)
1187  		return NOTIFY_DONE;
1188  
1189  	switch (event) {
1190  	case NETDEV_UP:
1191  		for_ifa(in_dev) {
1192  			fib_add_ifaddr(ifa);
1193  		} endfor_ifa(in_dev);
1194  #ifdef CONFIG_IP_ROUTE_MULTIPATH
1195  		fib_sync_up(dev, RTNH_F_DEAD);
1196  #endif
1197  		atomic_inc(&net->ipv4.dev_addr_genid);
1198  		rt_cache_flush(net);
1199  		break;
1200  	case NETDEV_DOWN:
1201  		fib_disable_ip(dev, event, false);
1202  		break;
1203  	case NETDEV_CHANGE:
1204  		flags = dev_get_flags(dev);
1205  		if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1206  			fib_sync_up(dev, RTNH_F_LINKDOWN);
1207  		else
1208  			fib_sync_down_dev(dev, event, false);
1209  		/* fall through */
1210  	case NETDEV_CHANGEMTU:
1211  		rt_cache_flush(net);
1212  		break;
1213  	case NETDEV_CHANGEUPPER:
1214  		info = ptr;
1215  		/* flush all routes if dev is linked to or unlinked from
1216  		 * an L3 master device (e.g., VRF)
1217  		 */
1218  		if (info->upper_dev && netif_is_l3_master(info->upper_dev))
1219  			fib_disable_ip(dev, NETDEV_DOWN, true);
1220  		break;
1221  	}
1222  	return NOTIFY_DONE;
1223  }
1224  
1225  static struct notifier_block fib_inetaddr_notifier = {
1226  	.notifier_call = fib_inetaddr_event,
1227  };
1228  
1229  static struct notifier_block fib_netdev_notifier = {
1230  	.notifier_call = fib_netdev_event,
1231  };
1232  
ip_fib_net_init(struct net * net)1233  static int __net_init ip_fib_net_init(struct net *net)
1234  {
1235  	int err;
1236  	size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
1237  
1238  	/* Avoid false sharing : Use at least a full cache line */
1239  	size = max_t(size_t, size, L1_CACHE_BYTES);
1240  
1241  	net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
1242  	if (!net->ipv4.fib_table_hash)
1243  		return -ENOMEM;
1244  
1245  	err = fib4_rules_init(net);
1246  	if (err < 0)
1247  		goto fail;
1248  	return 0;
1249  
1250  fail:
1251  	kfree(net->ipv4.fib_table_hash);
1252  	return err;
1253  }
1254  
ip_fib_net_exit(struct net * net)1255  static void ip_fib_net_exit(struct net *net)
1256  {
1257  	int i;
1258  
1259  	rtnl_lock();
1260  #ifdef CONFIG_IP_MULTIPLE_TABLES
1261  	RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
1262  	RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
1263  #endif
1264  	/* Destroy the tables in reverse order to guarantee that the
1265  	 * local table, ID 255, is destroyed before the main table, ID
1266  	 * 254. This is necessary as the local table may contain
1267  	 * references to data contained in the main table.
1268  	 */
1269  	for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
1270  		struct hlist_head *head = &net->ipv4.fib_table_hash[i];
1271  		struct hlist_node *tmp;
1272  		struct fib_table *tb;
1273  
1274  		hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
1275  			hlist_del(&tb->tb_hlist);
1276  			fib_table_flush(net, tb);
1277  			fib_free_table(tb);
1278  		}
1279  	}
1280  
1281  #ifdef CONFIG_IP_MULTIPLE_TABLES
1282  	fib4_rules_exit(net);
1283  #endif
1284  	rtnl_unlock();
1285  	kfree(net->ipv4.fib_table_hash);
1286  }
1287  
fib_net_init(struct net * net)1288  static int __net_init fib_net_init(struct net *net)
1289  {
1290  	int error;
1291  
1292  #ifdef CONFIG_IP_ROUTE_CLASSID
1293  	net->ipv4.fib_num_tclassid_users = 0;
1294  #endif
1295  	error = ip_fib_net_init(net);
1296  	if (error < 0)
1297  		goto out;
1298  	error = nl_fib_lookup_init(net);
1299  	if (error < 0)
1300  		goto out_nlfl;
1301  	error = fib_proc_init(net);
1302  	if (error < 0)
1303  		goto out_proc;
1304  out:
1305  	return error;
1306  
1307  out_proc:
1308  	nl_fib_lookup_exit(net);
1309  out_nlfl:
1310  	ip_fib_net_exit(net);
1311  	goto out;
1312  }
1313  
fib_net_exit(struct net * net)1314  static void __net_exit fib_net_exit(struct net *net)
1315  {
1316  	fib_proc_exit(net);
1317  	nl_fib_lookup_exit(net);
1318  	ip_fib_net_exit(net);
1319  }
1320  
1321  static struct pernet_operations fib_net_ops = {
1322  	.init = fib_net_init,
1323  	.exit = fib_net_exit,
1324  };
1325  
ip_fib_init(void)1326  void __init ip_fib_init(void)
1327  {
1328  	fib_trie_init();
1329  
1330  	register_pernet_subsys(&fib_net_ops);
1331  
1332  	register_netdevice_notifier(&fib_netdev_notifier);
1333  	register_inetaddr_notifier(&fib_inetaddr_notifier);
1334  
1335  	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
1336  	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
1337  	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
1338  }
1339