• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
28 #ifdef CONFIG_SYSCTL
29 #include <linux/sysctl.h>
30 #endif
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
34 #include <net/arp.h>
35 #include <net/dst.h>
36 #include <net/sock.h>
37 #include <net/netevent.h>
38 #include <net/netlink.h>
39 #include <linux/rtnetlink.h>
40 #include <linux/random.h>
41 #include <linux/string.h>
42 #include <linux/log2.h>
43 #include <linux/inetdevice.h>
44 #include <net/addrconf.h>
45 
46 #define DEBUG
47 #define NEIGH_DEBUG 1
48 #define neigh_dbg(level, fmt, ...)		\
49 do {						\
50 	if (level <= NEIGH_DEBUG)		\
51 		pr_debug(fmt, ##__VA_ARGS__);	\
52 } while (0)
53 
54 #define PNEIGH_HASHMASK		0xF
55 
56 static void neigh_timer_handler(unsigned long arg);
57 static void __neigh_notify(struct neighbour *n, int type, int flags,
58 			   u32 pid);
59 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
60 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
61 				    struct net_device *dev);
62 
63 #ifdef CONFIG_PROC_FS
64 static const struct file_operations neigh_stat_seq_fops;
65 #endif
66 
67 /*
68    Neighbour hash table buckets are protected with rwlock tbl->lock.
69 
70    - All the scans/updates to hash buckets MUST be made under this lock.
71    - NOTHING clever should be made under this lock: no callbacks
72      to protocol backends, no attempts to send something to network.
73      It will result in deadlocks, if backend/driver wants to use neighbour
74      cache.
75    - If the entry requires some non-trivial actions, increase
76      its reference count and release table lock.
77 
78    Neighbour entries are protected:
79    - with reference count.
80    - with rwlock neigh->lock
81 
82    Reference count prevents destruction.
83 
84    neigh->lock mainly serializes ll address data and its validity state.
85    However, the same lock is used to protect another entry fields:
86     - timer
87     - resolution queue
88 
89    Again, nothing clever shall be made under neigh->lock,
90    the most complicated procedure, which we allow is dev->hard_header.
91    It is supposed, that dev->hard_header is simplistic and does
92    not make callbacks to neighbour tables.
93  */
94 
neigh_blackhole(struct neighbour * neigh,struct sk_buff * skb)95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97 	kfree_skb(skb);
98 	return -ENETDOWN;
99 }
100 
neigh_cleanup_and_release(struct neighbour * neigh)101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103 	if (neigh->parms->neigh_cleanup)
104 		neigh->parms->neigh_cleanup(neigh);
105 
106 	__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
107 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
108 	neigh_release(neigh);
109 }
110 
111 /*
112  * It is random distribution in the interval (1/2)*base...(3/2)*base.
113  * It corresponds to default IPv6 settings and is not overridable,
114  * because it is really reasonable choice.
115  */
116 
neigh_rand_reach_time(unsigned long base)117 unsigned long neigh_rand_reach_time(unsigned long base)
118 {
119 	return base ? (prandom_u32() % base) + (base >> 1) : 0;
120 }
121 EXPORT_SYMBOL(neigh_rand_reach_time);
122 
123 
neigh_del(struct neighbour * n,__u8 state,struct neighbour __rcu ** np,struct neigh_table * tbl)124 static bool neigh_del(struct neighbour *n, __u8 state,
125 		      struct neighbour __rcu **np, struct neigh_table *tbl)
126 {
127 	bool retval = false;
128 
129 	write_lock(&n->lock);
130 	if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
131 		struct neighbour *neigh;
132 
133 		neigh = rcu_dereference_protected(n->next,
134 						  lockdep_is_held(&tbl->lock));
135 		rcu_assign_pointer(*np, neigh);
136 		n->dead = 1;
137 		retval = true;
138 	}
139 	write_unlock(&n->lock);
140 	if (retval)
141 		neigh_cleanup_and_release(n);
142 	return retval;
143 }
144 
neigh_remove_one(struct neighbour * ndel,struct neigh_table * tbl)145 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
146 {
147 	struct neigh_hash_table *nht;
148 	void *pkey = ndel->primary_key;
149 	u32 hash_val;
150 	struct neighbour *n;
151 	struct neighbour __rcu **np;
152 
153 	nht = rcu_dereference_protected(tbl->nht,
154 					lockdep_is_held(&tbl->lock));
155 	hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
156 	hash_val = hash_val >> (32 - nht->hash_shift);
157 
158 	np = &nht->hash_buckets[hash_val];
159 	while ((n = rcu_dereference_protected(*np,
160 					      lockdep_is_held(&tbl->lock)))) {
161 		if (n == ndel)
162 			return neigh_del(n, 0, np, tbl);
163 		np = &n->next;
164 	}
165 	return false;
166 }
167 
neigh_forced_gc(struct neigh_table * tbl)168 static int neigh_forced_gc(struct neigh_table *tbl)
169 {
170 	int shrunk = 0;
171 	int i;
172 	struct neigh_hash_table *nht;
173 
174 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
175 
176 	write_lock_bh(&tbl->lock);
177 	nht = rcu_dereference_protected(tbl->nht,
178 					lockdep_is_held(&tbl->lock));
179 	for (i = 0; i < (1 << nht->hash_shift); i++) {
180 		struct neighbour *n;
181 		struct neighbour __rcu **np;
182 
183 		np = &nht->hash_buckets[i];
184 		while ((n = rcu_dereference_protected(*np,
185 					lockdep_is_held(&tbl->lock))) != NULL) {
186 			/* Neighbour record may be discarded if:
187 			 * - nobody refers to it.
188 			 * - it is not permanent
189 			 */
190 			if (neigh_del(n, NUD_PERMANENT, np, tbl)) {
191 				shrunk = 1;
192 				continue;
193 			}
194 			np = &n->next;
195 		}
196 	}
197 
198 	tbl->last_flush = jiffies;
199 
200 	write_unlock_bh(&tbl->lock);
201 
202 	return shrunk;
203 }
204 
neigh_add_timer(struct neighbour * n,unsigned long when)205 static void neigh_add_timer(struct neighbour *n, unsigned long when)
206 {
207 	neigh_hold(n);
208 	if (unlikely(mod_timer(&n->timer, when))) {
209 		printk("NEIGH: BUG, double timer add, state is %x\n",
210 		       n->nud_state);
211 		dump_stack();
212 	}
213 }
214 
neigh_del_timer(struct neighbour * n)215 static int neigh_del_timer(struct neighbour *n)
216 {
217 	if ((n->nud_state & NUD_IN_TIMER) &&
218 	    del_timer(&n->timer)) {
219 		neigh_release(n);
220 		return 1;
221 	}
222 	return 0;
223 }
224 
pneigh_queue_purge(struct sk_buff_head * list)225 static void pneigh_queue_purge(struct sk_buff_head *list)
226 {
227 	struct sk_buff *skb;
228 
229 	while ((skb = skb_dequeue(list)) != NULL) {
230 		dev_put(skb->dev);
231 		kfree_skb(skb);
232 	}
233 }
234 
neigh_flush_dev(struct neigh_table * tbl,struct net_device * dev)235 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
236 {
237 	int i;
238 	struct neigh_hash_table *nht;
239 
240 	nht = rcu_dereference_protected(tbl->nht,
241 					lockdep_is_held(&tbl->lock));
242 
243 	for (i = 0; i < (1 << nht->hash_shift); i++) {
244 		struct neighbour *n;
245 		struct neighbour __rcu **np = &nht->hash_buckets[i];
246 
247 		while ((n = rcu_dereference_protected(*np,
248 					lockdep_is_held(&tbl->lock))) != NULL) {
249 			if (dev && n->dev != dev) {
250 				np = &n->next;
251 				continue;
252 			}
253 			rcu_assign_pointer(*np,
254 				   rcu_dereference_protected(n->next,
255 						lockdep_is_held(&tbl->lock)));
256 			write_lock(&n->lock);
257 			neigh_del_timer(n);
258 			n->dead = 1;
259 
260 			if (refcount_read(&n->refcnt) != 1) {
261 				/* The most unpleasant situation.
262 				   We must destroy neighbour entry,
263 				   but someone still uses it.
264 
265 				   The destroy will be delayed until
266 				   the last user releases us, but
267 				   we must kill timers etc. and move
268 				   it to safe state.
269 				 */
270 				__skb_queue_purge(&n->arp_queue);
271 				n->arp_queue_len_bytes = 0;
272 				n->output = neigh_blackhole;
273 				if (n->nud_state & NUD_VALID)
274 					n->nud_state = NUD_NOARP;
275 				else
276 					n->nud_state = NUD_NONE;
277 				neigh_dbg(2, "neigh %p is stray\n", n);
278 			}
279 			write_unlock(&n->lock);
280 			neigh_cleanup_and_release(n);
281 		}
282 	}
283 }
284 
neigh_changeaddr(struct neigh_table * tbl,struct net_device * dev)285 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
286 {
287 	write_lock_bh(&tbl->lock);
288 	neigh_flush_dev(tbl, dev);
289 	write_unlock_bh(&tbl->lock);
290 }
291 EXPORT_SYMBOL(neigh_changeaddr);
292 
neigh_ifdown(struct neigh_table * tbl,struct net_device * dev)293 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
294 {
295 	write_lock_bh(&tbl->lock);
296 	neigh_flush_dev(tbl, dev);
297 	pneigh_ifdown_and_unlock(tbl, dev);
298 
299 	del_timer_sync(&tbl->proxy_timer);
300 	pneigh_queue_purge(&tbl->proxy_queue);
301 	return 0;
302 }
303 EXPORT_SYMBOL(neigh_ifdown);
304 
neigh_alloc(struct neigh_table * tbl,struct net_device * dev)305 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
306 {
307 	struct neighbour *n = NULL;
308 	unsigned long now = jiffies;
309 	int entries;
310 
311 	entries = atomic_inc_return(&tbl->entries) - 1;
312 	if (entries >= tbl->gc_thresh3 ||
313 	    (entries >= tbl->gc_thresh2 &&
314 	     time_after(now, tbl->last_flush + 5 * HZ))) {
315 		if (!neigh_forced_gc(tbl) &&
316 		    entries >= tbl->gc_thresh3) {
317 			net_info_ratelimited("%s: neighbor table overflow!\n",
318 					     tbl->id);
319 			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
320 			goto out_entries;
321 		}
322 	}
323 
324 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
325 	if (!n)
326 		goto out_entries;
327 
328 	__skb_queue_head_init(&n->arp_queue);
329 	rwlock_init(&n->lock);
330 	seqlock_init(&n->ha_lock);
331 	n->updated	  = n->used = now;
332 	n->nud_state	  = NUD_NONE;
333 	n->output	  = neigh_blackhole;
334 	seqlock_init(&n->hh.hh_lock);
335 	n->parms	  = neigh_parms_clone(&tbl->parms);
336 	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
337 
338 	NEIGH_CACHE_STAT_INC(tbl, allocs);
339 	n->tbl		  = tbl;
340 	refcount_set(&n->refcnt, 1);
341 	n->dead		  = 1;
342 out:
343 	return n;
344 
345 out_entries:
346 	atomic_dec(&tbl->entries);
347 	goto out;
348 }
349 
neigh_get_hash_rnd(u32 * x)350 static void neigh_get_hash_rnd(u32 *x)
351 {
352 	*x = get_random_u32() | 1;
353 }
354 
neigh_hash_alloc(unsigned int shift)355 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
356 {
357 	size_t size = (1 << shift) * sizeof(struct neighbour *);
358 	struct neigh_hash_table *ret;
359 	struct neighbour __rcu **buckets;
360 	int i;
361 
362 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
363 	if (!ret)
364 		return NULL;
365 	if (size <= PAGE_SIZE) {
366 		buckets = kzalloc(size, GFP_ATOMIC);
367 	} else {
368 		buckets = (struct neighbour __rcu **)
369 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
370 					   get_order(size));
371 		kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
372 	}
373 	if (!buckets) {
374 		kfree(ret);
375 		return NULL;
376 	}
377 	ret->hash_buckets = buckets;
378 	ret->hash_shift = shift;
379 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
380 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
381 	return ret;
382 }
383 
neigh_hash_free_rcu(struct rcu_head * head)384 static void neigh_hash_free_rcu(struct rcu_head *head)
385 {
386 	struct neigh_hash_table *nht = container_of(head,
387 						    struct neigh_hash_table,
388 						    rcu);
389 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
390 	struct neighbour __rcu **buckets = nht->hash_buckets;
391 
392 	if (size <= PAGE_SIZE) {
393 		kfree(buckets);
394 	} else {
395 		kmemleak_free(buckets);
396 		free_pages((unsigned long)buckets, get_order(size));
397 	}
398 	kfree(nht);
399 }
400 
neigh_hash_grow(struct neigh_table * tbl,unsigned long new_shift)401 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
402 						unsigned long new_shift)
403 {
404 	unsigned int i, hash;
405 	struct neigh_hash_table *new_nht, *old_nht;
406 
407 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
408 
409 	old_nht = rcu_dereference_protected(tbl->nht,
410 					    lockdep_is_held(&tbl->lock));
411 	new_nht = neigh_hash_alloc(new_shift);
412 	if (!new_nht)
413 		return old_nht;
414 
415 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
416 		struct neighbour *n, *next;
417 
418 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
419 						   lockdep_is_held(&tbl->lock));
420 		     n != NULL;
421 		     n = next) {
422 			hash = tbl->hash(n->primary_key, n->dev,
423 					 new_nht->hash_rnd);
424 
425 			hash >>= (32 - new_nht->hash_shift);
426 			next = rcu_dereference_protected(n->next,
427 						lockdep_is_held(&tbl->lock));
428 
429 			rcu_assign_pointer(n->next,
430 					   rcu_dereference_protected(
431 						new_nht->hash_buckets[hash],
432 						lockdep_is_held(&tbl->lock)));
433 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
434 		}
435 	}
436 
437 	rcu_assign_pointer(tbl->nht, new_nht);
438 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
439 	return new_nht;
440 }
441 
neigh_lookup(struct neigh_table * tbl,const void * pkey,struct net_device * dev)442 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
443 			       struct net_device *dev)
444 {
445 	struct neighbour *n;
446 
447 	NEIGH_CACHE_STAT_INC(tbl, lookups);
448 
449 	rcu_read_lock_bh();
450 	n = __neigh_lookup_noref(tbl, pkey, dev);
451 	if (n) {
452 		if (!refcount_inc_not_zero(&n->refcnt))
453 			n = NULL;
454 		NEIGH_CACHE_STAT_INC(tbl, hits);
455 	}
456 
457 	rcu_read_unlock_bh();
458 	return n;
459 }
460 EXPORT_SYMBOL(neigh_lookup);
461 
neigh_lookup_nodev(struct neigh_table * tbl,struct net * net,const void * pkey)462 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
463 				     const void *pkey)
464 {
465 	struct neighbour *n;
466 	int key_len = tbl->key_len;
467 	u32 hash_val;
468 	struct neigh_hash_table *nht;
469 
470 	NEIGH_CACHE_STAT_INC(tbl, lookups);
471 
472 	rcu_read_lock_bh();
473 	nht = rcu_dereference_bh(tbl->nht);
474 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
475 
476 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
477 	     n != NULL;
478 	     n = rcu_dereference_bh(n->next)) {
479 		if (!memcmp(n->primary_key, pkey, key_len) &&
480 		    net_eq(dev_net(n->dev), net)) {
481 			if (!refcount_inc_not_zero(&n->refcnt))
482 				n = NULL;
483 			NEIGH_CACHE_STAT_INC(tbl, hits);
484 			break;
485 		}
486 	}
487 
488 	rcu_read_unlock_bh();
489 	return n;
490 }
491 EXPORT_SYMBOL(neigh_lookup_nodev);
492 
__neigh_create(struct neigh_table * tbl,const void * pkey,struct net_device * dev,bool want_ref)493 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
494 				 struct net_device *dev, bool want_ref)
495 {
496 	u32 hash_val;
497 	int key_len = tbl->key_len;
498 	int error;
499 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
500 	struct neigh_hash_table *nht;
501 
502 	if (!n) {
503 		rc = ERR_PTR(-ENOBUFS);
504 		goto out;
505 	}
506 
507 	memcpy(n->primary_key, pkey, key_len);
508 	n->dev = dev;
509 	dev_hold(dev);
510 
511 	/* Protocol specific setup. */
512 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
513 		rc = ERR_PTR(error);
514 		goto out_neigh_release;
515 	}
516 
517 	if (dev->netdev_ops->ndo_neigh_construct) {
518 		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
519 		if (error < 0) {
520 			rc = ERR_PTR(error);
521 			goto out_neigh_release;
522 		}
523 	}
524 
525 	/* Device specific setup. */
526 	if (n->parms->neigh_setup &&
527 	    (error = n->parms->neigh_setup(n)) < 0) {
528 		rc = ERR_PTR(error);
529 		goto out_neigh_release;
530 	}
531 
532 	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
533 
534 	write_lock_bh(&tbl->lock);
535 	nht = rcu_dereference_protected(tbl->nht,
536 					lockdep_is_held(&tbl->lock));
537 
538 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
539 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
540 
541 	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
542 
543 	if (n->parms->dead) {
544 		rc = ERR_PTR(-EINVAL);
545 		goto out_tbl_unlock;
546 	}
547 
548 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
549 					    lockdep_is_held(&tbl->lock));
550 	     n1 != NULL;
551 	     n1 = rcu_dereference_protected(n1->next,
552 			lockdep_is_held(&tbl->lock))) {
553 		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
554 			if (want_ref)
555 				neigh_hold(n1);
556 			rc = n1;
557 			goto out_tbl_unlock;
558 		}
559 	}
560 
561 	n->dead = 0;
562 	if (want_ref)
563 		neigh_hold(n);
564 	rcu_assign_pointer(n->next,
565 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
566 						     lockdep_is_held(&tbl->lock)));
567 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
568 	write_unlock_bh(&tbl->lock);
569 	neigh_dbg(2, "neigh %p is created\n", n);
570 	rc = n;
571 out:
572 	return rc;
573 out_tbl_unlock:
574 	write_unlock_bh(&tbl->lock);
575 out_neigh_release:
576 	neigh_release(n);
577 	goto out;
578 }
579 EXPORT_SYMBOL(__neigh_create);
580 
pneigh_hash(const void * pkey,int key_len)581 static u32 pneigh_hash(const void *pkey, int key_len)
582 {
583 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
584 	hash_val ^= (hash_val >> 16);
585 	hash_val ^= hash_val >> 8;
586 	hash_val ^= hash_val >> 4;
587 	hash_val &= PNEIGH_HASHMASK;
588 	return hash_val;
589 }
590 
__pneigh_lookup_1(struct pneigh_entry * n,struct net * net,const void * pkey,int key_len,struct net_device * dev)591 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
592 					      struct net *net,
593 					      const void *pkey,
594 					      int key_len,
595 					      struct net_device *dev)
596 {
597 	while (n) {
598 		if (!memcmp(n->key, pkey, key_len) &&
599 		    net_eq(pneigh_net(n), net) &&
600 		    (n->dev == dev || !n->dev))
601 			return n;
602 		n = n->next;
603 	}
604 	return NULL;
605 }
606 
__pneigh_lookup(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev)607 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
608 		struct net *net, const void *pkey, struct net_device *dev)
609 {
610 	int key_len = tbl->key_len;
611 	u32 hash_val = pneigh_hash(pkey, key_len);
612 
613 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
614 				 net, pkey, key_len, dev);
615 }
616 EXPORT_SYMBOL_GPL(__pneigh_lookup);
617 
pneigh_lookup(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev,int creat)618 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
619 				    struct net *net, const void *pkey,
620 				    struct net_device *dev, int creat)
621 {
622 	struct pneigh_entry *n;
623 	int key_len = tbl->key_len;
624 	u32 hash_val = pneigh_hash(pkey, key_len);
625 
626 	read_lock_bh(&tbl->lock);
627 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
628 			      net, pkey, key_len, dev);
629 	read_unlock_bh(&tbl->lock);
630 
631 	if (n || !creat)
632 		goto out;
633 
634 	ASSERT_RTNL();
635 
636 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
637 	if (!n)
638 		goto out;
639 
640 	write_pnet(&n->net, net);
641 	memcpy(n->key, pkey, key_len);
642 	n->dev = dev;
643 	if (dev)
644 		dev_hold(dev);
645 
646 	if (tbl->pconstructor && tbl->pconstructor(n)) {
647 		if (dev)
648 			dev_put(dev);
649 		kfree(n);
650 		n = NULL;
651 		goto out;
652 	}
653 
654 	write_lock_bh(&tbl->lock);
655 	n->next = tbl->phash_buckets[hash_val];
656 	tbl->phash_buckets[hash_val] = n;
657 	write_unlock_bh(&tbl->lock);
658 out:
659 	return n;
660 }
661 EXPORT_SYMBOL(pneigh_lookup);
662 
663 
pneigh_delete(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev)664 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
665 		  struct net_device *dev)
666 {
667 	struct pneigh_entry *n, **np;
668 	int key_len = tbl->key_len;
669 	u32 hash_val = pneigh_hash(pkey, key_len);
670 
671 	write_lock_bh(&tbl->lock);
672 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
673 	     np = &n->next) {
674 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
675 		    net_eq(pneigh_net(n), net)) {
676 			*np = n->next;
677 			write_unlock_bh(&tbl->lock);
678 			if (tbl->pdestructor)
679 				tbl->pdestructor(n);
680 			if (n->dev)
681 				dev_put(n->dev);
682 			kfree(n);
683 			return 0;
684 		}
685 	}
686 	write_unlock_bh(&tbl->lock);
687 	return -ENOENT;
688 }
689 
pneigh_ifdown_and_unlock(struct neigh_table * tbl,struct net_device * dev)690 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
691 				    struct net_device *dev)
692 {
693 	struct pneigh_entry *n, **np, *freelist = NULL;
694 	u32 h;
695 
696 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
697 		np = &tbl->phash_buckets[h];
698 		while ((n = *np) != NULL) {
699 			if (!dev || n->dev == dev) {
700 				*np = n->next;
701 				n->next = freelist;
702 				freelist = n;
703 				continue;
704 			}
705 			np = &n->next;
706 		}
707 	}
708 	write_unlock_bh(&tbl->lock);
709 	while ((n = freelist)) {
710 		freelist = n->next;
711 		n->next = NULL;
712 		if (tbl->pdestructor)
713 			tbl->pdestructor(n);
714 		if (n->dev)
715 			dev_put(n->dev);
716 		kfree(n);
717 	}
718 	return -ENOENT;
719 }
720 
721 static void neigh_parms_destroy(struct neigh_parms *parms);
722 
neigh_parms_put(struct neigh_parms * parms)723 static inline void neigh_parms_put(struct neigh_parms *parms)
724 {
725 	if (refcount_dec_and_test(&parms->refcnt))
726 		neigh_parms_destroy(parms);
727 }
728 
729 /*
730  *	neighbour must already be out of the table;
731  *
732  */
neigh_destroy(struct neighbour * neigh)733 void neigh_destroy(struct neighbour *neigh)
734 {
735 	struct net_device *dev = neigh->dev;
736 
737 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
738 
739 	if (!neigh->dead) {
740 		pr_warn("Destroying alive neighbour %p\n", neigh);
741 		dump_stack();
742 		return;
743 	}
744 
745 	if (neigh_del_timer(neigh))
746 		pr_warn("Impossible event\n");
747 
748 	write_lock_bh(&neigh->lock);
749 	__skb_queue_purge(&neigh->arp_queue);
750 	write_unlock_bh(&neigh->lock);
751 	neigh->arp_queue_len_bytes = 0;
752 
753 	if (dev->netdev_ops->ndo_neigh_destroy)
754 		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
755 
756 	dev_put(dev);
757 	neigh_parms_put(neigh->parms);
758 
759 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
760 
761 	atomic_dec(&neigh->tbl->entries);
762 	kfree_rcu(neigh, rcu);
763 }
764 EXPORT_SYMBOL(neigh_destroy);
765 
766 /* Neighbour state is suspicious;
767    disable fast path.
768 
769    Called with write_locked neigh.
770  */
neigh_suspect(struct neighbour * neigh)771 static void neigh_suspect(struct neighbour *neigh)
772 {
773 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
774 
775 	neigh->output = neigh->ops->output;
776 }
777 
778 /* Neighbour state is OK;
779    enable fast path.
780 
781    Called with write_locked neigh.
782  */
neigh_connect(struct neighbour * neigh)783 static void neigh_connect(struct neighbour *neigh)
784 {
785 	neigh_dbg(2, "neigh %p is connected\n", neigh);
786 
787 	neigh->output = neigh->ops->connected_output;
788 }
789 
neigh_periodic_work(struct work_struct * work)790 static void neigh_periodic_work(struct work_struct *work)
791 {
792 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
793 	struct neighbour *n;
794 	struct neighbour __rcu **np;
795 	unsigned int i;
796 	struct neigh_hash_table *nht;
797 
798 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
799 
800 	write_lock_bh(&tbl->lock);
801 	nht = rcu_dereference_protected(tbl->nht,
802 					lockdep_is_held(&tbl->lock));
803 
804 	/*
805 	 *	periodically recompute ReachableTime from random function
806 	 */
807 
808 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
809 		struct neigh_parms *p;
810 		tbl->last_rand = jiffies;
811 		list_for_each_entry(p, &tbl->parms_list, list)
812 			p->reachable_time =
813 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
814 	}
815 
816 	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
817 		goto out;
818 
819 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
820 		np = &nht->hash_buckets[i];
821 
822 		while ((n = rcu_dereference_protected(*np,
823 				lockdep_is_held(&tbl->lock))) != NULL) {
824 			unsigned int state;
825 
826 			write_lock(&n->lock);
827 
828 			state = n->nud_state;
829 			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
830 				write_unlock(&n->lock);
831 				goto next_elt;
832 			}
833 
834 			if (time_before(n->used, n->confirmed))
835 				n->used = n->confirmed;
836 
837 			if (refcount_read(&n->refcnt) == 1 &&
838 			    (state == NUD_FAILED ||
839 			     time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
840 				*np = n->next;
841 				n->dead = 1;
842 				write_unlock(&n->lock);
843 				neigh_cleanup_and_release(n);
844 				continue;
845 			}
846 			write_unlock(&n->lock);
847 
848 next_elt:
849 			np = &n->next;
850 		}
851 		/*
852 		 * It's fine to release lock here, even if hash table
853 		 * grows while we are preempted.
854 		 */
855 		write_unlock_bh(&tbl->lock);
856 		cond_resched();
857 		write_lock_bh(&tbl->lock);
858 		nht = rcu_dereference_protected(tbl->nht,
859 						lockdep_is_held(&tbl->lock));
860 	}
861 out:
862 	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
863 	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
864 	 * BASE_REACHABLE_TIME.
865 	 */
866 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
867 			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
868 	write_unlock_bh(&tbl->lock);
869 }
870 
neigh_max_probes(struct neighbour * n)871 static __inline__ int neigh_max_probes(struct neighbour *n)
872 {
873 	struct neigh_parms *p = n->parms;
874 	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
875 	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
876 	        NEIGH_VAR(p, MCAST_PROBES));
877 }
878 
neigh_invalidate(struct neighbour * neigh)879 static void neigh_invalidate(struct neighbour *neigh)
880 	__releases(neigh->lock)
881 	__acquires(neigh->lock)
882 {
883 	struct sk_buff *skb;
884 
885 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
886 	neigh_dbg(2, "neigh %p is failed\n", neigh);
887 	neigh->updated = jiffies;
888 
889 	/* It is very thin place. report_unreachable is very complicated
890 	   routine. Particularly, it can hit the same neighbour entry!
891 
892 	   So that, we try to be accurate and avoid dead loop. --ANK
893 	 */
894 	while (neigh->nud_state == NUD_FAILED &&
895 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
896 		write_unlock(&neigh->lock);
897 		neigh->ops->error_report(neigh, skb);
898 		write_lock(&neigh->lock);
899 	}
900 	__skb_queue_purge(&neigh->arp_queue);
901 	neigh->arp_queue_len_bytes = 0;
902 }
903 
neigh_probe(struct neighbour * neigh)904 static void neigh_probe(struct neighbour *neigh)
905 	__releases(neigh->lock)
906 {
907 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
908 	/* keep skb alive even if arp_queue overflows */
909 	if (skb)
910 		skb = skb_clone(skb, GFP_ATOMIC);
911 	write_unlock(&neigh->lock);
912 	if (neigh->ops->solicit)
913 		neigh->ops->solicit(neigh, skb);
914 	atomic_inc(&neigh->probes);
915 	kfree_skb(skb);
916 }
917 
918 /* Called when a timer expires for a neighbour entry. */
919 
neigh_timer_handler(unsigned long arg)920 static void neigh_timer_handler(unsigned long arg)
921 {
922 	unsigned long now, next;
923 	struct neighbour *neigh = (struct neighbour *)arg;
924 	unsigned int state;
925 	int notify = 0;
926 
927 	write_lock(&neigh->lock);
928 
929 	state = neigh->nud_state;
930 	now = jiffies;
931 	next = now + HZ;
932 
933 	if (!(state & NUD_IN_TIMER))
934 		goto out;
935 
936 	if (state & NUD_REACHABLE) {
937 		if (time_before_eq(now,
938 				   neigh->confirmed + neigh->parms->reachable_time)) {
939 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
940 			next = neigh->confirmed + neigh->parms->reachable_time;
941 		} else if (time_before_eq(now,
942 					  neigh->used +
943 					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
944 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
945 			neigh->nud_state = NUD_DELAY;
946 			neigh->updated = jiffies;
947 			neigh_suspect(neigh);
948 			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
949 		} else {
950 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
951 			neigh->nud_state = NUD_STALE;
952 			neigh->updated = jiffies;
953 			neigh_suspect(neigh);
954 			notify = 1;
955 		}
956 	} else if (state & NUD_DELAY) {
957 		if (time_before_eq(now,
958 				   neigh->confirmed +
959 				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
960 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
961 			neigh->nud_state = NUD_REACHABLE;
962 			neigh->updated = jiffies;
963 			neigh_connect(neigh);
964 			notify = 1;
965 			next = neigh->confirmed + neigh->parms->reachable_time;
966 		} else {
967 			neigh_dbg(2, "neigh %p is probed\n", neigh);
968 			neigh->nud_state = NUD_PROBE;
969 			neigh->updated = jiffies;
970 			atomic_set(&neigh->probes, 0);
971 			notify = 1;
972 			next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
973 		}
974 	} else {
975 		/* NUD_PROBE|NUD_INCOMPLETE */
976 		next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
977 	}
978 
979 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
980 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
981 		neigh->nud_state = NUD_FAILED;
982 		notify = 1;
983 		neigh_invalidate(neigh);
984 		goto out;
985 	}
986 
987 	if (neigh->nud_state & NUD_IN_TIMER) {
988 		if (time_before(next, jiffies + HZ/2))
989 			next = jiffies + HZ/2;
990 		if (!mod_timer(&neigh->timer, next))
991 			neigh_hold(neigh);
992 	}
993 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
994 		neigh_probe(neigh);
995 	} else {
996 out:
997 		write_unlock(&neigh->lock);
998 	}
999 
1000 	if (notify)
1001 		neigh_update_notify(neigh, 0);
1002 
1003 	neigh_release(neigh);
1004 }
1005 
__neigh_event_send(struct neighbour * neigh,struct sk_buff * skb)1006 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1007 {
1008 	int rc;
1009 	bool immediate_probe = false;
1010 
1011 	write_lock_bh(&neigh->lock);
1012 
1013 	rc = 0;
1014 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1015 		goto out_unlock_bh;
1016 	if (neigh->dead)
1017 		goto out_dead;
1018 
1019 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1020 		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1021 		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1022 			unsigned long next, now = jiffies;
1023 
1024 			atomic_set(&neigh->probes,
1025 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1026 			neigh_del_timer(neigh);
1027 			neigh->nud_state     = NUD_INCOMPLETE;
1028 			neigh->updated = now;
1029 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1030 					 HZ/2);
1031 			neigh_add_timer(neigh, next);
1032 			immediate_probe = true;
1033 		} else {
1034 			neigh->nud_state = NUD_FAILED;
1035 			neigh->updated = jiffies;
1036 			write_unlock_bh(&neigh->lock);
1037 
1038 			kfree_skb(skb);
1039 			return 1;
1040 		}
1041 	} else if (neigh->nud_state & NUD_STALE) {
1042 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
1043 		neigh_del_timer(neigh);
1044 		neigh->nud_state = NUD_DELAY;
1045 		neigh->updated = jiffies;
1046 		neigh_add_timer(neigh, jiffies +
1047 				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1048 	}
1049 
1050 	if (neigh->nud_state == NUD_INCOMPLETE) {
1051 		if (skb) {
1052 			while (neigh->arp_queue_len_bytes + skb->truesize >
1053 			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1054 				struct sk_buff *buff;
1055 
1056 				buff = __skb_dequeue(&neigh->arp_queue);
1057 				if (!buff)
1058 					break;
1059 				neigh->arp_queue_len_bytes -= buff->truesize;
1060 				kfree_skb(buff);
1061 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1062 			}
1063 			skb_dst_force(skb);
1064 			__skb_queue_tail(&neigh->arp_queue, skb);
1065 			neigh->arp_queue_len_bytes += skb->truesize;
1066 		}
1067 		rc = 1;
1068 	}
1069 out_unlock_bh:
1070 	if (immediate_probe)
1071 		neigh_probe(neigh);
1072 	else
1073 		write_unlock(&neigh->lock);
1074 	local_bh_enable();
1075 	return rc;
1076 
1077 out_dead:
1078 	if (neigh->nud_state & NUD_STALE)
1079 		goto out_unlock_bh;
1080 	write_unlock_bh(&neigh->lock);
1081 	kfree_skb(skb);
1082 	return 1;
1083 }
1084 EXPORT_SYMBOL(__neigh_event_send);
1085 
neigh_update_hhs(struct neighbour * neigh)1086 static void neigh_update_hhs(struct neighbour *neigh)
1087 {
1088 	struct hh_cache *hh;
1089 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1090 		= NULL;
1091 
1092 	if (neigh->dev->header_ops)
1093 		update = neigh->dev->header_ops->cache_update;
1094 
1095 	if (update) {
1096 		hh = &neigh->hh;
1097 		if (READ_ONCE(hh->hh_len)) {
1098 			write_seqlock_bh(&hh->hh_lock);
1099 			update(hh, neigh->dev, neigh->ha);
1100 			write_sequnlock_bh(&hh->hh_lock);
1101 		}
1102 	}
1103 }
1104 
1105 
1106 
1107 /* Generic update routine.
1108    -- lladdr is new lladdr or NULL, if it is not supplied.
1109    -- new    is new state.
1110    -- flags
1111 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1112 				if it is different.
1113 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1114 				lladdr instead of overriding it
1115 				if it is different.
1116 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1117 
1118 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1119 				NTF_ROUTER flag.
1120 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1121 				a router.
1122 
1123    Caller MUST hold reference count on the entry.
1124  */
1125 
neigh_update(struct neighbour * neigh,const u8 * lladdr,u8 new,u32 flags,u32 nlmsg_pid)1126 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1127 		 u32 flags, u32 nlmsg_pid)
1128 {
1129 	u8 old;
1130 	int err;
1131 	int notify = 0;
1132 	struct net_device *dev;
1133 	int update_isrouter = 0;
1134 
1135 	write_lock_bh(&neigh->lock);
1136 
1137 	dev    = neigh->dev;
1138 	old    = neigh->nud_state;
1139 	err    = -EPERM;
1140 
1141 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1142 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1143 		goto out;
1144 	if (neigh->dead)
1145 		goto out;
1146 
1147 	if (!(new & NUD_VALID)) {
1148 		neigh_del_timer(neigh);
1149 		if (old & NUD_CONNECTED)
1150 			neigh_suspect(neigh);
1151 		neigh->nud_state = new;
1152 		err = 0;
1153 		notify = old & NUD_VALID;
1154 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1155 		    (new & NUD_FAILED)) {
1156 			neigh_invalidate(neigh);
1157 			notify = 1;
1158 		}
1159 		goto out;
1160 	}
1161 
1162 	/* Compare new lladdr with cached one */
1163 	if (!dev->addr_len) {
1164 		/* First case: device needs no address. */
1165 		lladdr = neigh->ha;
1166 	} else if (lladdr) {
1167 		/* The second case: if something is already cached
1168 		   and a new address is proposed:
1169 		   - compare new & old
1170 		   - if they are different, check override flag
1171 		 */
1172 		if ((old & NUD_VALID) &&
1173 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1174 			lladdr = neigh->ha;
1175 	} else {
1176 		/* No address is supplied; if we know something,
1177 		   use it, otherwise discard the request.
1178 		 */
1179 		err = -EINVAL;
1180 		if (!(old & NUD_VALID))
1181 			goto out;
1182 		lladdr = neigh->ha;
1183 	}
1184 
1185 	/* Update confirmed timestamp for neighbour entry after we
1186 	 * received ARP packet even if it doesn't change IP to MAC binding.
1187 	 */
1188 	if (new & NUD_CONNECTED)
1189 		neigh->confirmed = jiffies;
1190 
1191 	/* If entry was valid and address is not changed,
1192 	   do not change entry state, if new one is STALE.
1193 	 */
1194 	err = 0;
1195 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1196 	if (old & NUD_VALID) {
1197 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1198 			update_isrouter = 0;
1199 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1200 			    (old & NUD_CONNECTED)) {
1201 				lladdr = neigh->ha;
1202 				new = NUD_STALE;
1203 			} else
1204 				goto out;
1205 		} else {
1206 			if (lladdr == neigh->ha && new == NUD_STALE &&
1207 			    !(flags & NEIGH_UPDATE_F_ADMIN))
1208 				new = old;
1209 		}
1210 	}
1211 
1212 	/* Update timestamp only once we know we will make a change to the
1213 	 * neighbour entry. Otherwise we risk to move the locktime window with
1214 	 * noop updates and ignore relevant ARP updates.
1215 	 */
1216 	if (new != old || lladdr != neigh->ha)
1217 		neigh->updated = jiffies;
1218 
1219 	if (new != old) {
1220 		neigh_del_timer(neigh);
1221 		if (new & NUD_PROBE)
1222 			atomic_set(&neigh->probes, 0);
1223 		if (new & NUD_IN_TIMER)
1224 			neigh_add_timer(neigh, (jiffies +
1225 						((new & NUD_REACHABLE) ?
1226 						 neigh->parms->reachable_time :
1227 						 0)));
1228 		neigh->nud_state = new;
1229 		notify = 1;
1230 	}
1231 
1232 	if (lladdr != neigh->ha) {
1233 		write_seqlock(&neigh->ha_lock);
1234 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1235 		write_sequnlock(&neigh->ha_lock);
1236 		neigh_update_hhs(neigh);
1237 		if (!(new & NUD_CONNECTED))
1238 			neigh->confirmed = jiffies -
1239 				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1240 		notify = 1;
1241 	}
1242 	if (new == old)
1243 		goto out;
1244 	if (new & NUD_CONNECTED)
1245 		neigh_connect(neigh);
1246 	else
1247 		neigh_suspect(neigh);
1248 	if (!(old & NUD_VALID)) {
1249 		struct sk_buff *skb;
1250 
1251 		/* Again: avoid dead loop if something went wrong */
1252 
1253 		while (neigh->nud_state & NUD_VALID &&
1254 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1255 			struct dst_entry *dst = skb_dst(skb);
1256 			struct neighbour *n2, *n1 = neigh;
1257 			write_unlock_bh(&neigh->lock);
1258 
1259 			rcu_read_lock();
1260 
1261 			/* Why not just use 'neigh' as-is?  The problem is that
1262 			 * things such as shaper, eql, and sch_teql can end up
1263 			 * using alternative, different, neigh objects to output
1264 			 * the packet in the output path.  So what we need to do
1265 			 * here is re-lookup the top-level neigh in the path so
1266 			 * we can reinject the packet there.
1267 			 */
1268 			n2 = NULL;
1269 			if (dst) {
1270 				n2 = dst_neigh_lookup_skb(dst, skb);
1271 				if (n2)
1272 					n1 = n2;
1273 			}
1274 			n1->output(n1, skb);
1275 			if (n2)
1276 				neigh_release(n2);
1277 			rcu_read_unlock();
1278 
1279 			write_lock_bh(&neigh->lock);
1280 		}
1281 		__skb_queue_purge(&neigh->arp_queue);
1282 		neigh->arp_queue_len_bytes = 0;
1283 	}
1284 out:
1285 	if (update_isrouter) {
1286 		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1287 			(neigh->flags | NTF_ROUTER) :
1288 			(neigh->flags & ~NTF_ROUTER);
1289 	}
1290 	write_unlock_bh(&neigh->lock);
1291 
1292 	if (notify)
1293 		neigh_update_notify(neigh, nlmsg_pid);
1294 
1295 	return err;
1296 }
1297 EXPORT_SYMBOL(neigh_update);
1298 
1299 /* Update the neigh to listen temporarily for probe responses, even if it is
1300  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1301  */
__neigh_set_probe_once(struct neighbour * neigh)1302 void __neigh_set_probe_once(struct neighbour *neigh)
1303 {
1304 	if (neigh->dead)
1305 		return;
1306 	neigh->updated = jiffies;
1307 	if (!(neigh->nud_state & NUD_FAILED))
1308 		return;
1309 	neigh->nud_state = NUD_INCOMPLETE;
1310 	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1311 	neigh_add_timer(neigh,
1312 			jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1313 }
1314 EXPORT_SYMBOL(__neigh_set_probe_once);
1315 
neigh_event_ns(struct neigh_table * tbl,u8 * lladdr,void * saddr,struct net_device * dev)1316 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1317 				 u8 *lladdr, void *saddr,
1318 				 struct net_device *dev)
1319 {
1320 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1321 						 lladdr || !dev->addr_len);
1322 	if (neigh)
1323 		neigh_update(neigh, lladdr, NUD_STALE,
1324 			     NEIGH_UPDATE_F_OVERRIDE, 0);
1325 	return neigh;
1326 }
1327 EXPORT_SYMBOL(neigh_event_ns);
1328 
1329 /* called with read_lock_bh(&n->lock); */
neigh_hh_init(struct neighbour * n)1330 static void neigh_hh_init(struct neighbour *n)
1331 {
1332 	struct net_device *dev = n->dev;
1333 	__be16 prot = n->tbl->protocol;
1334 	struct hh_cache	*hh = &n->hh;
1335 
1336 	write_lock_bh(&n->lock);
1337 
1338 	/* Only one thread can come in here and initialize the
1339 	 * hh_cache entry.
1340 	 */
1341 	if (!hh->hh_len)
1342 		dev->header_ops->cache(n, hh, prot);
1343 
1344 	write_unlock_bh(&n->lock);
1345 }
1346 
1347 /* Slow and careful. */
1348 
neigh_resolve_output(struct neighbour * neigh,struct sk_buff * skb)1349 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1350 {
1351 	int rc = 0;
1352 
1353 	if (!neigh_event_send(neigh, skb)) {
1354 		int err;
1355 		struct net_device *dev = neigh->dev;
1356 		unsigned int seq;
1357 
1358 		if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1359 			neigh_hh_init(neigh);
1360 
1361 		do {
1362 			__skb_pull(skb, skb_network_offset(skb));
1363 			seq = read_seqbegin(&neigh->ha_lock);
1364 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1365 					      neigh->ha, NULL, skb->len);
1366 		} while (read_seqretry(&neigh->ha_lock, seq));
1367 
1368 		if (err >= 0)
1369 			rc = dev_queue_xmit(skb);
1370 		else
1371 			goto out_kfree_skb;
1372 	}
1373 out:
1374 	return rc;
1375 out_kfree_skb:
1376 	rc = -EINVAL;
1377 	kfree_skb(skb);
1378 	goto out;
1379 }
1380 EXPORT_SYMBOL(neigh_resolve_output);
1381 
1382 /* As fast as possible without hh cache */
1383 
neigh_connected_output(struct neighbour * neigh,struct sk_buff * skb)1384 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1385 {
1386 	struct net_device *dev = neigh->dev;
1387 	unsigned int seq;
1388 	int err;
1389 
1390 	do {
1391 		__skb_pull(skb, skb_network_offset(skb));
1392 		seq = read_seqbegin(&neigh->ha_lock);
1393 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1394 				      neigh->ha, NULL, skb->len);
1395 	} while (read_seqretry(&neigh->ha_lock, seq));
1396 
1397 	if (err >= 0)
1398 		err = dev_queue_xmit(skb);
1399 	else {
1400 		err = -EINVAL;
1401 		kfree_skb(skb);
1402 	}
1403 	return err;
1404 }
1405 EXPORT_SYMBOL(neigh_connected_output);
1406 
neigh_direct_output(struct neighbour * neigh,struct sk_buff * skb)1407 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1408 {
1409 	return dev_queue_xmit(skb);
1410 }
1411 EXPORT_SYMBOL(neigh_direct_output);
1412 
neigh_proxy_process(unsigned long arg)1413 static void neigh_proxy_process(unsigned long arg)
1414 {
1415 	struct neigh_table *tbl = (struct neigh_table *)arg;
1416 	long sched_next = 0;
1417 	unsigned long now = jiffies;
1418 	struct sk_buff *skb, *n;
1419 
1420 	spin_lock(&tbl->proxy_queue.lock);
1421 
1422 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1423 		long tdif = NEIGH_CB(skb)->sched_next - now;
1424 
1425 		if (tdif <= 0) {
1426 			struct net_device *dev = skb->dev;
1427 
1428 			__skb_unlink(skb, &tbl->proxy_queue);
1429 			if (tbl->proxy_redo && netif_running(dev)) {
1430 				rcu_read_lock();
1431 				tbl->proxy_redo(skb);
1432 				rcu_read_unlock();
1433 			} else {
1434 				kfree_skb(skb);
1435 			}
1436 
1437 			dev_put(dev);
1438 		} else if (!sched_next || tdif < sched_next)
1439 			sched_next = tdif;
1440 	}
1441 	del_timer(&tbl->proxy_timer);
1442 	if (sched_next)
1443 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1444 	spin_unlock(&tbl->proxy_queue.lock);
1445 }
1446 
pneigh_enqueue(struct neigh_table * tbl,struct neigh_parms * p,struct sk_buff * skb)1447 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1448 		    struct sk_buff *skb)
1449 {
1450 	unsigned long now = jiffies;
1451 
1452 	unsigned long sched_next = now + (prandom_u32() %
1453 					  NEIGH_VAR(p, PROXY_DELAY));
1454 
1455 	if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1456 		kfree_skb(skb);
1457 		return;
1458 	}
1459 
1460 	NEIGH_CB(skb)->sched_next = sched_next;
1461 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1462 
1463 	spin_lock(&tbl->proxy_queue.lock);
1464 	if (del_timer(&tbl->proxy_timer)) {
1465 		if (time_before(tbl->proxy_timer.expires, sched_next))
1466 			sched_next = tbl->proxy_timer.expires;
1467 	}
1468 	skb_dst_drop(skb);
1469 	dev_hold(skb->dev);
1470 	__skb_queue_tail(&tbl->proxy_queue, skb);
1471 	mod_timer(&tbl->proxy_timer, sched_next);
1472 	spin_unlock(&tbl->proxy_queue.lock);
1473 }
1474 EXPORT_SYMBOL(pneigh_enqueue);
1475 
lookup_neigh_parms(struct neigh_table * tbl,struct net * net,int ifindex)1476 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1477 						      struct net *net, int ifindex)
1478 {
1479 	struct neigh_parms *p;
1480 
1481 	list_for_each_entry(p, &tbl->parms_list, list) {
1482 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1483 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1484 			return p;
1485 	}
1486 
1487 	return NULL;
1488 }
1489 
neigh_parms_alloc(struct net_device * dev,struct neigh_table * tbl)1490 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1491 				      struct neigh_table *tbl)
1492 {
1493 	struct neigh_parms *p;
1494 	struct net *net = dev_net(dev);
1495 	const struct net_device_ops *ops = dev->netdev_ops;
1496 
1497 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1498 	if (p) {
1499 		p->tbl		  = tbl;
1500 		refcount_set(&p->refcnt, 1);
1501 		p->reachable_time =
1502 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1503 		dev_hold(dev);
1504 		p->dev = dev;
1505 		write_pnet(&p->net, net);
1506 		p->sysctl_table = NULL;
1507 
1508 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1509 			dev_put(dev);
1510 			kfree(p);
1511 			return NULL;
1512 		}
1513 
1514 		write_lock_bh(&tbl->lock);
1515 		list_add(&p->list, &tbl->parms.list);
1516 		write_unlock_bh(&tbl->lock);
1517 
1518 		neigh_parms_data_state_cleanall(p);
1519 	}
1520 	return p;
1521 }
1522 EXPORT_SYMBOL(neigh_parms_alloc);
1523 
neigh_rcu_free_parms(struct rcu_head * head)1524 static void neigh_rcu_free_parms(struct rcu_head *head)
1525 {
1526 	struct neigh_parms *parms =
1527 		container_of(head, struct neigh_parms, rcu_head);
1528 
1529 	neigh_parms_put(parms);
1530 }
1531 
neigh_parms_release(struct neigh_table * tbl,struct neigh_parms * parms)1532 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1533 {
1534 	if (!parms || parms == &tbl->parms)
1535 		return;
1536 	write_lock_bh(&tbl->lock);
1537 	list_del(&parms->list);
1538 	parms->dead = 1;
1539 	write_unlock_bh(&tbl->lock);
1540 	if (parms->dev)
1541 		dev_put(parms->dev);
1542 	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1543 }
1544 EXPORT_SYMBOL(neigh_parms_release);
1545 
neigh_parms_destroy(struct neigh_parms * parms)1546 static void neigh_parms_destroy(struct neigh_parms *parms)
1547 {
1548 	kfree(parms);
1549 }
1550 
1551 static struct lock_class_key neigh_table_proxy_queue_class;
1552 
1553 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1554 
neigh_table_init(int index,struct neigh_table * tbl)1555 void neigh_table_init(int index, struct neigh_table *tbl)
1556 {
1557 	unsigned long now = jiffies;
1558 	unsigned long phsize;
1559 
1560 	INIT_LIST_HEAD(&tbl->parms_list);
1561 	list_add(&tbl->parms.list, &tbl->parms_list);
1562 	write_pnet(&tbl->parms.net, &init_net);
1563 	refcount_set(&tbl->parms.refcnt, 1);
1564 	tbl->parms.reachable_time =
1565 			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1566 
1567 	tbl->stats = alloc_percpu(struct neigh_statistics);
1568 	if (!tbl->stats)
1569 		panic("cannot create neighbour cache statistics");
1570 
1571 #ifdef CONFIG_PROC_FS
1572 	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1573 			      &neigh_stat_seq_fops, tbl))
1574 		panic("cannot create neighbour proc dir entry");
1575 #endif
1576 
1577 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1578 
1579 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1580 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1581 
1582 	if (!tbl->nht || !tbl->phash_buckets)
1583 		panic("cannot allocate neighbour cache hashes");
1584 
1585 	if (!tbl->entry_size)
1586 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1587 					tbl->key_len, NEIGH_PRIV_ALIGN);
1588 	else
1589 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1590 
1591 	rwlock_init(&tbl->lock);
1592 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1593 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1594 			tbl->parms.reachable_time);
1595 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1596 	skb_queue_head_init_class(&tbl->proxy_queue,
1597 			&neigh_table_proxy_queue_class);
1598 
1599 	tbl->last_flush = now;
1600 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1601 
1602 	neigh_tables[index] = tbl;
1603 }
1604 EXPORT_SYMBOL(neigh_table_init);
1605 
neigh_table_clear(int index,struct neigh_table * tbl)1606 int neigh_table_clear(int index, struct neigh_table *tbl)
1607 {
1608 	neigh_tables[index] = NULL;
1609 	/* It is not clean... Fix it to unload IPv6 module safely */
1610 	cancel_delayed_work_sync(&tbl->gc_work);
1611 	del_timer_sync(&tbl->proxy_timer);
1612 	pneigh_queue_purge(&tbl->proxy_queue);
1613 	neigh_ifdown(tbl, NULL);
1614 	if (atomic_read(&tbl->entries))
1615 		pr_crit("neighbour leakage\n");
1616 
1617 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1618 		 neigh_hash_free_rcu);
1619 	tbl->nht = NULL;
1620 
1621 	kfree(tbl->phash_buckets);
1622 	tbl->phash_buckets = NULL;
1623 
1624 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1625 
1626 	free_percpu(tbl->stats);
1627 	tbl->stats = NULL;
1628 
1629 	return 0;
1630 }
1631 EXPORT_SYMBOL(neigh_table_clear);
1632 
neigh_find_table(int family)1633 static struct neigh_table *neigh_find_table(int family)
1634 {
1635 	struct neigh_table *tbl = NULL;
1636 
1637 	switch (family) {
1638 	case AF_INET:
1639 		tbl = neigh_tables[NEIGH_ARP_TABLE];
1640 		break;
1641 	case AF_INET6:
1642 		tbl = neigh_tables[NEIGH_ND_TABLE];
1643 		break;
1644 	case AF_DECnet:
1645 		tbl = neigh_tables[NEIGH_DN_TABLE];
1646 		break;
1647 	}
1648 
1649 	return tbl;
1650 }
1651 
neigh_delete(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1652 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1653 			struct netlink_ext_ack *extack)
1654 {
1655 	struct net *net = sock_net(skb->sk);
1656 	struct ndmsg *ndm;
1657 	struct nlattr *dst_attr;
1658 	struct neigh_table *tbl;
1659 	struct neighbour *neigh;
1660 	struct net_device *dev = NULL;
1661 	int err = -EINVAL;
1662 
1663 	ASSERT_RTNL();
1664 	if (nlmsg_len(nlh) < sizeof(*ndm))
1665 		goto out;
1666 
1667 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1668 	if (dst_attr == NULL)
1669 		goto out;
1670 
1671 	ndm = nlmsg_data(nlh);
1672 	if (ndm->ndm_ifindex) {
1673 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1674 		if (dev == NULL) {
1675 			err = -ENODEV;
1676 			goto out;
1677 		}
1678 	}
1679 
1680 	tbl = neigh_find_table(ndm->ndm_family);
1681 	if (tbl == NULL)
1682 		return -EAFNOSUPPORT;
1683 
1684 	if (nla_len(dst_attr) < tbl->key_len)
1685 		goto out;
1686 
1687 	if (ndm->ndm_flags & NTF_PROXY) {
1688 		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1689 		goto out;
1690 	}
1691 
1692 	if (dev == NULL)
1693 		goto out;
1694 
1695 	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1696 	if (neigh == NULL) {
1697 		err = -ENOENT;
1698 		goto out;
1699 	}
1700 
1701 	err = neigh_update(neigh, NULL, NUD_FAILED,
1702 			   NEIGH_UPDATE_F_OVERRIDE |
1703 			   NEIGH_UPDATE_F_ADMIN,
1704 			   NETLINK_CB(skb).portid);
1705 	write_lock_bh(&tbl->lock);
1706 	neigh_release(neigh);
1707 	neigh_remove_one(neigh, tbl);
1708 	write_unlock_bh(&tbl->lock);
1709 
1710 out:
1711 	return err;
1712 }
1713 
neigh_add(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1714 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1715 		     struct netlink_ext_ack *extack)
1716 {
1717 	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1718 	struct net *net = sock_net(skb->sk);
1719 	struct ndmsg *ndm;
1720 	struct nlattr *tb[NDA_MAX+1];
1721 	struct neigh_table *tbl;
1722 	struct net_device *dev = NULL;
1723 	struct neighbour *neigh;
1724 	void *dst, *lladdr;
1725 	int err;
1726 
1727 	ASSERT_RTNL();
1728 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1729 	if (err < 0)
1730 		goto out;
1731 
1732 	err = -EINVAL;
1733 	if (tb[NDA_DST] == NULL)
1734 		goto out;
1735 
1736 	ndm = nlmsg_data(nlh);
1737 	if (ndm->ndm_ifindex) {
1738 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1739 		if (dev == NULL) {
1740 			err = -ENODEV;
1741 			goto out;
1742 		}
1743 
1744 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1745 			goto out;
1746 	}
1747 
1748 	tbl = neigh_find_table(ndm->ndm_family);
1749 	if (tbl == NULL)
1750 		return -EAFNOSUPPORT;
1751 
1752 	if (nla_len(tb[NDA_DST]) < tbl->key_len)
1753 		goto out;
1754 	dst = nla_data(tb[NDA_DST]);
1755 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1756 
1757 	if (ndm->ndm_flags & NTF_PROXY) {
1758 		struct pneigh_entry *pn;
1759 
1760 		err = -ENOBUFS;
1761 		pn = pneigh_lookup(tbl, net, dst, dev, 1);
1762 		if (pn) {
1763 			pn->flags = ndm->ndm_flags;
1764 			err = 0;
1765 		}
1766 		goto out;
1767 	}
1768 
1769 	if (dev == NULL)
1770 		goto out;
1771 
1772 	neigh = neigh_lookup(tbl, dst, dev);
1773 	if (neigh == NULL) {
1774 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1775 			err = -ENOENT;
1776 			goto out;
1777 		}
1778 
1779 		neigh = __neigh_lookup_errno(tbl, dst, dev);
1780 		if (IS_ERR(neigh)) {
1781 			err = PTR_ERR(neigh);
1782 			goto out;
1783 		}
1784 	} else {
1785 		if (nlh->nlmsg_flags & NLM_F_EXCL) {
1786 			err = -EEXIST;
1787 			neigh_release(neigh);
1788 			goto out;
1789 		}
1790 
1791 		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1792 			flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1793 	}
1794 
1795 	if (ndm->ndm_flags & NTF_USE) {
1796 		neigh_event_send(neigh, NULL);
1797 		err = 0;
1798 	} else
1799 		err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1800 				   NETLINK_CB(skb).portid);
1801 	neigh_release(neigh);
1802 
1803 out:
1804 	return err;
1805 }
1806 
neightbl_fill_parms(struct sk_buff * skb,struct neigh_parms * parms)1807 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1808 {
1809 	struct nlattr *nest;
1810 
1811 	nest = nla_nest_start(skb, NDTA_PARMS);
1812 	if (nest == NULL)
1813 		return -ENOBUFS;
1814 
1815 	if ((parms->dev &&
1816 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1817 	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1818 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1819 			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1820 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
1821 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
1822 			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1823 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1824 	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1825 	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
1826 			NEIGH_VAR(parms, UCAST_PROBES)) ||
1827 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
1828 			NEIGH_VAR(parms, MCAST_PROBES)) ||
1829 	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1830 			NEIGH_VAR(parms, MCAST_REPROBES)) ||
1831 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1832 			  NDTPA_PAD) ||
1833 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1834 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1835 	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
1836 			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1837 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1838 			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1839 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1840 			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1841 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1842 			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1843 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1844 			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1845 	    nla_put_msecs(skb, NDTPA_LOCKTIME,
1846 			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1847 		goto nla_put_failure;
1848 	return nla_nest_end(skb, nest);
1849 
1850 nla_put_failure:
1851 	nla_nest_cancel(skb, nest);
1852 	return -EMSGSIZE;
1853 }
1854 
neightbl_fill_info(struct sk_buff * skb,struct neigh_table * tbl,u32 pid,u32 seq,int type,int flags)1855 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1856 			      u32 pid, u32 seq, int type, int flags)
1857 {
1858 	struct nlmsghdr *nlh;
1859 	struct ndtmsg *ndtmsg;
1860 
1861 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1862 	if (nlh == NULL)
1863 		return -EMSGSIZE;
1864 
1865 	ndtmsg = nlmsg_data(nlh);
1866 
1867 	read_lock_bh(&tbl->lock);
1868 	ndtmsg->ndtm_family = tbl->family;
1869 	ndtmsg->ndtm_pad1   = 0;
1870 	ndtmsg->ndtm_pad2   = 0;
1871 
1872 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1873 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1874 	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1875 	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1876 	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1877 		goto nla_put_failure;
1878 	{
1879 		unsigned long now = jiffies;
1880 		long flush_delta = now - tbl->last_flush;
1881 		long rand_delta = now - tbl->last_rand;
1882 		struct neigh_hash_table *nht;
1883 		struct ndt_config ndc = {
1884 			.ndtc_key_len		= tbl->key_len,
1885 			.ndtc_entry_size	= tbl->entry_size,
1886 			.ndtc_entries		= atomic_read(&tbl->entries),
1887 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1888 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1889 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1890 		};
1891 
1892 		rcu_read_lock_bh();
1893 		nht = rcu_dereference_bh(tbl->nht);
1894 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1895 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1896 		rcu_read_unlock_bh();
1897 
1898 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1899 			goto nla_put_failure;
1900 	}
1901 
1902 	{
1903 		int cpu;
1904 		struct ndt_stats ndst;
1905 
1906 		memset(&ndst, 0, sizeof(ndst));
1907 
1908 		for_each_possible_cpu(cpu) {
1909 			struct neigh_statistics	*st;
1910 
1911 			st = per_cpu_ptr(tbl->stats, cpu);
1912 			ndst.ndts_allocs		+= st->allocs;
1913 			ndst.ndts_destroys		+= st->destroys;
1914 			ndst.ndts_hash_grows		+= st->hash_grows;
1915 			ndst.ndts_res_failed		+= st->res_failed;
1916 			ndst.ndts_lookups		+= st->lookups;
1917 			ndst.ndts_hits			+= st->hits;
1918 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1919 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1920 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1921 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1922 			ndst.ndts_table_fulls		+= st->table_fulls;
1923 		}
1924 
1925 		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1926 				  NDTA_PAD))
1927 			goto nla_put_failure;
1928 	}
1929 
1930 	BUG_ON(tbl->parms.dev);
1931 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1932 		goto nla_put_failure;
1933 
1934 	read_unlock_bh(&tbl->lock);
1935 	nlmsg_end(skb, nlh);
1936 	return 0;
1937 
1938 nla_put_failure:
1939 	read_unlock_bh(&tbl->lock);
1940 	nlmsg_cancel(skb, nlh);
1941 	return -EMSGSIZE;
1942 }
1943 
neightbl_fill_param_info(struct sk_buff * skb,struct neigh_table * tbl,struct neigh_parms * parms,u32 pid,u32 seq,int type,unsigned int flags)1944 static int neightbl_fill_param_info(struct sk_buff *skb,
1945 				    struct neigh_table *tbl,
1946 				    struct neigh_parms *parms,
1947 				    u32 pid, u32 seq, int type,
1948 				    unsigned int flags)
1949 {
1950 	struct ndtmsg *ndtmsg;
1951 	struct nlmsghdr *nlh;
1952 
1953 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1954 	if (nlh == NULL)
1955 		return -EMSGSIZE;
1956 
1957 	ndtmsg = nlmsg_data(nlh);
1958 
1959 	read_lock_bh(&tbl->lock);
1960 	ndtmsg->ndtm_family = tbl->family;
1961 	ndtmsg->ndtm_pad1   = 0;
1962 	ndtmsg->ndtm_pad2   = 0;
1963 
1964 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1965 	    neightbl_fill_parms(skb, parms) < 0)
1966 		goto errout;
1967 
1968 	read_unlock_bh(&tbl->lock);
1969 	nlmsg_end(skb, nlh);
1970 	return 0;
1971 errout:
1972 	read_unlock_bh(&tbl->lock);
1973 	nlmsg_cancel(skb, nlh);
1974 	return -EMSGSIZE;
1975 }
1976 
1977 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1978 	[NDTA_NAME]		= { .type = NLA_STRING },
1979 	[NDTA_THRESH1]		= { .type = NLA_U32 },
1980 	[NDTA_THRESH2]		= { .type = NLA_U32 },
1981 	[NDTA_THRESH3]		= { .type = NLA_U32 },
1982 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1983 	[NDTA_PARMS]		= { .type = NLA_NESTED },
1984 };
1985 
1986 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1987 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1988 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1989 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1990 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1991 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1992 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1993 	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
1994 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1995 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1996 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1997 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1998 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1999 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
2000 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
2001 };
2002 
neightbl_set(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2003 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2004 			struct netlink_ext_ack *extack)
2005 {
2006 	struct net *net = sock_net(skb->sk);
2007 	struct neigh_table *tbl;
2008 	struct ndtmsg *ndtmsg;
2009 	struct nlattr *tb[NDTA_MAX+1];
2010 	bool found = false;
2011 	int err, tidx;
2012 
2013 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2014 			  nl_neightbl_policy, extack);
2015 	if (err < 0)
2016 		goto errout;
2017 
2018 	if (tb[NDTA_NAME] == NULL) {
2019 		err = -EINVAL;
2020 		goto errout;
2021 	}
2022 
2023 	ndtmsg = nlmsg_data(nlh);
2024 
2025 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2026 		tbl = neigh_tables[tidx];
2027 		if (!tbl)
2028 			continue;
2029 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2030 			continue;
2031 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2032 			found = true;
2033 			break;
2034 		}
2035 	}
2036 
2037 	if (!found)
2038 		return -ENOENT;
2039 
2040 	/*
2041 	 * We acquire tbl->lock to be nice to the periodic timers and
2042 	 * make sure they always see a consistent set of values.
2043 	 */
2044 	write_lock_bh(&tbl->lock);
2045 
2046 	if (tb[NDTA_PARMS]) {
2047 		struct nlattr *tbp[NDTPA_MAX+1];
2048 		struct neigh_parms *p;
2049 		int i, ifindex = 0;
2050 
2051 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2052 				       nl_ntbl_parm_policy, extack);
2053 		if (err < 0)
2054 			goto errout_tbl_lock;
2055 
2056 		if (tbp[NDTPA_IFINDEX])
2057 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2058 
2059 		p = lookup_neigh_parms(tbl, net, ifindex);
2060 		if (p == NULL) {
2061 			err = -ENOENT;
2062 			goto errout_tbl_lock;
2063 		}
2064 
2065 		for (i = 1; i <= NDTPA_MAX; i++) {
2066 			if (tbp[i] == NULL)
2067 				continue;
2068 
2069 			switch (i) {
2070 			case NDTPA_QUEUE_LEN:
2071 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2072 					      nla_get_u32(tbp[i]) *
2073 					      SKB_TRUESIZE(ETH_FRAME_LEN));
2074 				break;
2075 			case NDTPA_QUEUE_LENBYTES:
2076 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2077 					      nla_get_u32(tbp[i]));
2078 				break;
2079 			case NDTPA_PROXY_QLEN:
2080 				NEIGH_VAR_SET(p, PROXY_QLEN,
2081 					      nla_get_u32(tbp[i]));
2082 				break;
2083 			case NDTPA_APP_PROBES:
2084 				NEIGH_VAR_SET(p, APP_PROBES,
2085 					      nla_get_u32(tbp[i]));
2086 				break;
2087 			case NDTPA_UCAST_PROBES:
2088 				NEIGH_VAR_SET(p, UCAST_PROBES,
2089 					      nla_get_u32(tbp[i]));
2090 				break;
2091 			case NDTPA_MCAST_PROBES:
2092 				NEIGH_VAR_SET(p, MCAST_PROBES,
2093 					      nla_get_u32(tbp[i]));
2094 				break;
2095 			case NDTPA_MCAST_REPROBES:
2096 				NEIGH_VAR_SET(p, MCAST_REPROBES,
2097 					      nla_get_u32(tbp[i]));
2098 				break;
2099 			case NDTPA_BASE_REACHABLE_TIME:
2100 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2101 					      nla_get_msecs(tbp[i]));
2102 				/* update reachable_time as well, otherwise, the change will
2103 				 * only be effective after the next time neigh_periodic_work
2104 				 * decides to recompute it (can be multiple minutes)
2105 				 */
2106 				p->reachable_time =
2107 					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2108 				break;
2109 			case NDTPA_GC_STALETIME:
2110 				NEIGH_VAR_SET(p, GC_STALETIME,
2111 					      nla_get_msecs(tbp[i]));
2112 				break;
2113 			case NDTPA_DELAY_PROBE_TIME:
2114 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2115 					      nla_get_msecs(tbp[i]));
2116 				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2117 				break;
2118 			case NDTPA_RETRANS_TIME:
2119 				NEIGH_VAR_SET(p, RETRANS_TIME,
2120 					      nla_get_msecs(tbp[i]));
2121 				break;
2122 			case NDTPA_ANYCAST_DELAY:
2123 				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2124 					      nla_get_msecs(tbp[i]));
2125 				break;
2126 			case NDTPA_PROXY_DELAY:
2127 				NEIGH_VAR_SET(p, PROXY_DELAY,
2128 					      nla_get_msecs(tbp[i]));
2129 				break;
2130 			case NDTPA_LOCKTIME:
2131 				NEIGH_VAR_SET(p, LOCKTIME,
2132 					      nla_get_msecs(tbp[i]));
2133 				break;
2134 			}
2135 		}
2136 	}
2137 
2138 	err = -ENOENT;
2139 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2140 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2141 	    !net_eq(net, &init_net))
2142 		goto errout_tbl_lock;
2143 
2144 	if (tb[NDTA_THRESH1])
2145 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2146 
2147 	if (tb[NDTA_THRESH2])
2148 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2149 
2150 	if (tb[NDTA_THRESH3])
2151 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2152 
2153 	if (tb[NDTA_GC_INTERVAL])
2154 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2155 
2156 	err = 0;
2157 
2158 errout_tbl_lock:
2159 	write_unlock_bh(&tbl->lock);
2160 errout:
2161 	return err;
2162 }
2163 
neightbl_dump_info(struct sk_buff * skb,struct netlink_callback * cb)2164 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2165 {
2166 	struct net *net = sock_net(skb->sk);
2167 	int family, tidx, nidx = 0;
2168 	int tbl_skip = cb->args[0];
2169 	int neigh_skip = cb->args[1];
2170 	struct neigh_table *tbl;
2171 
2172 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2173 
2174 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2175 		struct neigh_parms *p;
2176 
2177 		tbl = neigh_tables[tidx];
2178 		if (!tbl)
2179 			continue;
2180 
2181 		if (tidx < tbl_skip || (family && tbl->family != family))
2182 			continue;
2183 
2184 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2185 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2186 				       NLM_F_MULTI) < 0)
2187 			break;
2188 
2189 		nidx = 0;
2190 		p = list_next_entry(&tbl->parms, list);
2191 		list_for_each_entry_from(p, &tbl->parms_list, list) {
2192 			if (!net_eq(neigh_parms_net(p), net))
2193 				continue;
2194 
2195 			if (nidx < neigh_skip)
2196 				goto next;
2197 
2198 			if (neightbl_fill_param_info(skb, tbl, p,
2199 						     NETLINK_CB(cb->skb).portid,
2200 						     cb->nlh->nlmsg_seq,
2201 						     RTM_NEWNEIGHTBL,
2202 						     NLM_F_MULTI) < 0)
2203 				goto out;
2204 		next:
2205 			nidx++;
2206 		}
2207 
2208 		neigh_skip = 0;
2209 	}
2210 out:
2211 	cb->args[0] = tidx;
2212 	cb->args[1] = nidx;
2213 
2214 	return skb->len;
2215 }
2216 
neigh_fill_info(struct sk_buff * skb,struct neighbour * neigh,u32 pid,u32 seq,int type,unsigned int flags)2217 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2218 			   u32 pid, u32 seq, int type, unsigned int flags)
2219 {
2220 	unsigned long now = jiffies;
2221 	struct nda_cacheinfo ci;
2222 	struct nlmsghdr *nlh;
2223 	struct ndmsg *ndm;
2224 
2225 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2226 	if (nlh == NULL)
2227 		return -EMSGSIZE;
2228 
2229 	ndm = nlmsg_data(nlh);
2230 	ndm->ndm_family	 = neigh->ops->family;
2231 	ndm->ndm_pad1    = 0;
2232 	ndm->ndm_pad2    = 0;
2233 	ndm->ndm_flags	 = neigh->flags;
2234 	ndm->ndm_type	 = neigh->type;
2235 	ndm->ndm_ifindex = neigh->dev->ifindex;
2236 
2237 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2238 		goto nla_put_failure;
2239 
2240 	read_lock_bh(&neigh->lock);
2241 	ndm->ndm_state	 = neigh->nud_state;
2242 	if (neigh->nud_state & NUD_VALID) {
2243 		char haddr[MAX_ADDR_LEN];
2244 
2245 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2246 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2247 			read_unlock_bh(&neigh->lock);
2248 			goto nla_put_failure;
2249 		}
2250 	}
2251 
2252 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2253 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2254 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2255 	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
2256 	read_unlock_bh(&neigh->lock);
2257 
2258 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2259 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2260 		goto nla_put_failure;
2261 
2262 	nlmsg_end(skb, nlh);
2263 	return 0;
2264 
2265 nla_put_failure:
2266 	nlmsg_cancel(skb, nlh);
2267 	return -EMSGSIZE;
2268 }
2269 
pneigh_fill_info(struct sk_buff * skb,struct pneigh_entry * pn,u32 pid,u32 seq,int type,unsigned int flags,struct neigh_table * tbl)2270 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2271 			    u32 pid, u32 seq, int type, unsigned int flags,
2272 			    struct neigh_table *tbl)
2273 {
2274 	struct nlmsghdr *nlh;
2275 	struct ndmsg *ndm;
2276 
2277 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2278 	if (nlh == NULL)
2279 		return -EMSGSIZE;
2280 
2281 	ndm = nlmsg_data(nlh);
2282 	ndm->ndm_family	 = tbl->family;
2283 	ndm->ndm_pad1    = 0;
2284 	ndm->ndm_pad2    = 0;
2285 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2286 	ndm->ndm_type	 = RTN_UNICAST;
2287 	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2288 	ndm->ndm_state	 = NUD_NONE;
2289 
2290 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2291 		goto nla_put_failure;
2292 
2293 	nlmsg_end(skb, nlh);
2294 	return 0;
2295 
2296 nla_put_failure:
2297 	nlmsg_cancel(skb, nlh);
2298 	return -EMSGSIZE;
2299 }
2300 
neigh_update_notify(struct neighbour * neigh,u32 nlmsg_pid)2301 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2302 {
2303 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2304 	__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2305 }
2306 
neigh_master_filtered(struct net_device * dev,int master_idx)2307 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2308 {
2309 	struct net_device *master;
2310 
2311 	if (!master_idx)
2312 		return false;
2313 
2314 	master = netdev_master_upper_dev_get(dev);
2315 	if (!master || master->ifindex != master_idx)
2316 		return true;
2317 
2318 	return false;
2319 }
2320 
neigh_ifindex_filtered(struct net_device * dev,int filter_idx)2321 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2322 {
2323 	if (filter_idx && dev->ifindex != filter_idx)
2324 		return true;
2325 
2326 	return false;
2327 }
2328 
neigh_dump_table(struct neigh_table * tbl,struct sk_buff * skb,struct netlink_callback * cb)2329 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2330 			    struct netlink_callback *cb)
2331 {
2332 	struct net *net = sock_net(skb->sk);
2333 	const struct nlmsghdr *nlh = cb->nlh;
2334 	struct nlattr *tb[NDA_MAX + 1];
2335 	struct neighbour *n;
2336 	int rc, h, s_h = cb->args[1];
2337 	int idx, s_idx = idx = cb->args[2];
2338 	struct neigh_hash_table *nht;
2339 	int filter_master_idx = 0, filter_idx = 0;
2340 	unsigned int flags = NLM_F_MULTI;
2341 	int err;
2342 
2343 	err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2344 	if (!err) {
2345 		if (tb[NDA_IFINDEX]) {
2346 			if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2347 				return -EINVAL;
2348 			filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2349 		}
2350 		if (tb[NDA_MASTER]) {
2351 			if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2352 				return -EINVAL;
2353 			filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2354 		}
2355 		if (filter_idx || filter_master_idx)
2356 			flags |= NLM_F_DUMP_FILTERED;
2357 	}
2358 
2359 	rcu_read_lock_bh();
2360 	nht = rcu_dereference_bh(tbl->nht);
2361 
2362 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2363 		if (h > s_h)
2364 			s_idx = 0;
2365 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2366 		     n != NULL;
2367 		     n = rcu_dereference_bh(n->next)) {
2368 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2369 				goto next;
2370 			if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2371 			    neigh_master_filtered(n->dev, filter_master_idx))
2372 				goto next;
2373 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2374 					    cb->nlh->nlmsg_seq,
2375 					    RTM_NEWNEIGH,
2376 					    flags) < 0) {
2377 				rc = -1;
2378 				goto out;
2379 			}
2380 next:
2381 			idx++;
2382 		}
2383 	}
2384 	rc = skb->len;
2385 out:
2386 	rcu_read_unlock_bh();
2387 	cb->args[1] = h;
2388 	cb->args[2] = idx;
2389 	return rc;
2390 }
2391 
pneigh_dump_table(struct neigh_table * tbl,struct sk_buff * skb,struct netlink_callback * cb)2392 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2393 			     struct netlink_callback *cb)
2394 {
2395 	struct pneigh_entry *n;
2396 	struct net *net = sock_net(skb->sk);
2397 	int rc, h, s_h = cb->args[3];
2398 	int idx, s_idx = idx = cb->args[4];
2399 
2400 	read_lock_bh(&tbl->lock);
2401 
2402 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2403 		if (h > s_h)
2404 			s_idx = 0;
2405 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2406 			if (idx < s_idx || pneigh_net(n) != net)
2407 				goto next;
2408 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2409 					    cb->nlh->nlmsg_seq,
2410 					    RTM_NEWNEIGH,
2411 					    NLM_F_MULTI, tbl) < 0) {
2412 				read_unlock_bh(&tbl->lock);
2413 				rc = -1;
2414 				goto out;
2415 			}
2416 		next:
2417 			idx++;
2418 		}
2419 	}
2420 
2421 	read_unlock_bh(&tbl->lock);
2422 	rc = skb->len;
2423 out:
2424 	cb->args[3] = h;
2425 	cb->args[4] = idx;
2426 	return rc;
2427 
2428 }
2429 
neigh_dump_info(struct sk_buff * skb,struct netlink_callback * cb)2430 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2431 {
2432 	struct neigh_table *tbl;
2433 	int t, family, s_t;
2434 	int proxy = 0;
2435 	int err;
2436 
2437 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2438 
2439 	/* check for full ndmsg structure presence, family member is
2440 	 * the same for both structures
2441 	 */
2442 	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2443 	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2444 		proxy = 1;
2445 
2446 	s_t = cb->args[0];
2447 
2448 	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2449 		tbl = neigh_tables[t];
2450 
2451 		if (!tbl)
2452 			continue;
2453 		if (t < s_t || (family && tbl->family != family))
2454 			continue;
2455 		if (t > s_t)
2456 			memset(&cb->args[1], 0, sizeof(cb->args) -
2457 						sizeof(cb->args[0]));
2458 		if (proxy)
2459 			err = pneigh_dump_table(tbl, skb, cb);
2460 		else
2461 			err = neigh_dump_table(tbl, skb, cb);
2462 		if (err < 0)
2463 			break;
2464 	}
2465 
2466 	cb->args[0] = t;
2467 	return skb->len;
2468 }
2469 
neigh_for_each(struct neigh_table * tbl,void (* cb)(struct neighbour *,void *),void * cookie)2470 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2471 {
2472 	int chain;
2473 	struct neigh_hash_table *nht;
2474 
2475 	rcu_read_lock_bh();
2476 	nht = rcu_dereference_bh(tbl->nht);
2477 
2478 	read_lock(&tbl->lock); /* avoid resizes */
2479 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2480 		struct neighbour *n;
2481 
2482 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2483 		     n != NULL;
2484 		     n = rcu_dereference_bh(n->next))
2485 			cb(n, cookie);
2486 	}
2487 	read_unlock(&tbl->lock);
2488 	rcu_read_unlock_bh();
2489 }
2490 EXPORT_SYMBOL(neigh_for_each);
2491 
2492 /* The tbl->lock must be held as a writer and BH disabled. */
__neigh_for_each_release(struct neigh_table * tbl,int (* cb)(struct neighbour *))2493 void __neigh_for_each_release(struct neigh_table *tbl,
2494 			      int (*cb)(struct neighbour *))
2495 {
2496 	int chain;
2497 	struct neigh_hash_table *nht;
2498 
2499 	nht = rcu_dereference_protected(tbl->nht,
2500 					lockdep_is_held(&tbl->lock));
2501 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2502 		struct neighbour *n;
2503 		struct neighbour __rcu **np;
2504 
2505 		np = &nht->hash_buckets[chain];
2506 		while ((n = rcu_dereference_protected(*np,
2507 					lockdep_is_held(&tbl->lock))) != NULL) {
2508 			int release;
2509 
2510 			write_lock(&n->lock);
2511 			release = cb(n);
2512 			if (release) {
2513 				rcu_assign_pointer(*np,
2514 					rcu_dereference_protected(n->next,
2515 						lockdep_is_held(&tbl->lock)));
2516 				n->dead = 1;
2517 			} else
2518 				np = &n->next;
2519 			write_unlock(&n->lock);
2520 			if (release)
2521 				neigh_cleanup_and_release(n);
2522 		}
2523 	}
2524 }
2525 EXPORT_SYMBOL(__neigh_for_each_release);
2526 
neigh_xmit(int index,struct net_device * dev,const void * addr,struct sk_buff * skb)2527 int neigh_xmit(int index, struct net_device *dev,
2528 	       const void *addr, struct sk_buff *skb)
2529 {
2530 	int err = -EAFNOSUPPORT;
2531 	if (likely(index < NEIGH_NR_TABLES)) {
2532 		struct neigh_table *tbl;
2533 		struct neighbour *neigh;
2534 
2535 		tbl = neigh_tables[index];
2536 		if (!tbl)
2537 			goto out;
2538 		rcu_read_lock_bh();
2539 		if (index == NEIGH_ARP_TABLE) {
2540 			u32 key = *((u32 *)addr);
2541 
2542 			neigh = __ipv4_neigh_lookup_noref(dev, key);
2543 		} else {
2544 			neigh = __neigh_lookup_noref(tbl, addr, dev);
2545 		}
2546 		if (!neigh)
2547 			neigh = __neigh_create(tbl, addr, dev, false);
2548 		err = PTR_ERR(neigh);
2549 		if (IS_ERR(neigh)) {
2550 			rcu_read_unlock_bh();
2551 			goto out_kfree_skb;
2552 		}
2553 		err = neigh->output(neigh, skb);
2554 		rcu_read_unlock_bh();
2555 	}
2556 	else if (index == NEIGH_LINK_TABLE) {
2557 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2558 				      addr, NULL, skb->len);
2559 		if (err < 0)
2560 			goto out_kfree_skb;
2561 		err = dev_queue_xmit(skb);
2562 	}
2563 out:
2564 	return err;
2565 out_kfree_skb:
2566 	kfree_skb(skb);
2567 	goto out;
2568 }
2569 EXPORT_SYMBOL(neigh_xmit);
2570 
2571 #ifdef CONFIG_PROC_FS
2572 
neigh_get_first(struct seq_file * seq)2573 static struct neighbour *neigh_get_first(struct seq_file *seq)
2574 {
2575 	struct neigh_seq_state *state = seq->private;
2576 	struct net *net = seq_file_net(seq);
2577 	struct neigh_hash_table *nht = state->nht;
2578 	struct neighbour *n = NULL;
2579 	int bucket = state->bucket;
2580 
2581 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2582 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2583 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2584 
2585 		while (n) {
2586 			if (!net_eq(dev_net(n->dev), net))
2587 				goto next;
2588 			if (state->neigh_sub_iter) {
2589 				loff_t fakep = 0;
2590 				void *v;
2591 
2592 				v = state->neigh_sub_iter(state, n, &fakep);
2593 				if (!v)
2594 					goto next;
2595 			}
2596 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2597 				break;
2598 			if (n->nud_state & ~NUD_NOARP)
2599 				break;
2600 next:
2601 			n = rcu_dereference_bh(n->next);
2602 		}
2603 
2604 		if (n)
2605 			break;
2606 	}
2607 	state->bucket = bucket;
2608 
2609 	return n;
2610 }
2611 
neigh_get_next(struct seq_file * seq,struct neighbour * n,loff_t * pos)2612 static struct neighbour *neigh_get_next(struct seq_file *seq,
2613 					struct neighbour *n,
2614 					loff_t *pos)
2615 {
2616 	struct neigh_seq_state *state = seq->private;
2617 	struct net *net = seq_file_net(seq);
2618 	struct neigh_hash_table *nht = state->nht;
2619 
2620 	if (state->neigh_sub_iter) {
2621 		void *v = state->neigh_sub_iter(state, n, pos);
2622 		if (v)
2623 			return n;
2624 	}
2625 	n = rcu_dereference_bh(n->next);
2626 
2627 	while (1) {
2628 		while (n) {
2629 			if (!net_eq(dev_net(n->dev), net))
2630 				goto next;
2631 			if (state->neigh_sub_iter) {
2632 				void *v = state->neigh_sub_iter(state, n, pos);
2633 				if (v)
2634 					return n;
2635 				goto next;
2636 			}
2637 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2638 				break;
2639 
2640 			if (n->nud_state & ~NUD_NOARP)
2641 				break;
2642 next:
2643 			n = rcu_dereference_bh(n->next);
2644 		}
2645 
2646 		if (n)
2647 			break;
2648 
2649 		if (++state->bucket >= (1 << nht->hash_shift))
2650 			break;
2651 
2652 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2653 	}
2654 
2655 	if (n && pos)
2656 		--(*pos);
2657 	return n;
2658 }
2659 
neigh_get_idx(struct seq_file * seq,loff_t * pos)2660 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2661 {
2662 	struct neighbour *n = neigh_get_first(seq);
2663 
2664 	if (n) {
2665 		--(*pos);
2666 		while (*pos) {
2667 			n = neigh_get_next(seq, n, pos);
2668 			if (!n)
2669 				break;
2670 		}
2671 	}
2672 	return *pos ? NULL : n;
2673 }
2674 
pneigh_get_first(struct seq_file * seq)2675 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2676 {
2677 	struct neigh_seq_state *state = seq->private;
2678 	struct net *net = seq_file_net(seq);
2679 	struct neigh_table *tbl = state->tbl;
2680 	struct pneigh_entry *pn = NULL;
2681 	int bucket = state->bucket;
2682 
2683 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2684 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2685 		pn = tbl->phash_buckets[bucket];
2686 		while (pn && !net_eq(pneigh_net(pn), net))
2687 			pn = pn->next;
2688 		if (pn)
2689 			break;
2690 	}
2691 	state->bucket = bucket;
2692 
2693 	return pn;
2694 }
2695 
pneigh_get_next(struct seq_file * seq,struct pneigh_entry * pn,loff_t * pos)2696 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2697 					    struct pneigh_entry *pn,
2698 					    loff_t *pos)
2699 {
2700 	struct neigh_seq_state *state = seq->private;
2701 	struct net *net = seq_file_net(seq);
2702 	struct neigh_table *tbl = state->tbl;
2703 
2704 	do {
2705 		pn = pn->next;
2706 	} while (pn && !net_eq(pneigh_net(pn), net));
2707 
2708 	while (!pn) {
2709 		if (++state->bucket > PNEIGH_HASHMASK)
2710 			break;
2711 		pn = tbl->phash_buckets[state->bucket];
2712 		while (pn && !net_eq(pneigh_net(pn), net))
2713 			pn = pn->next;
2714 		if (pn)
2715 			break;
2716 	}
2717 
2718 	if (pn && pos)
2719 		--(*pos);
2720 
2721 	return pn;
2722 }
2723 
pneigh_get_idx(struct seq_file * seq,loff_t * pos)2724 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2725 {
2726 	struct pneigh_entry *pn = pneigh_get_first(seq);
2727 
2728 	if (pn) {
2729 		--(*pos);
2730 		while (*pos) {
2731 			pn = pneigh_get_next(seq, pn, pos);
2732 			if (!pn)
2733 				break;
2734 		}
2735 	}
2736 	return *pos ? NULL : pn;
2737 }
2738 
neigh_get_idx_any(struct seq_file * seq,loff_t * pos)2739 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2740 {
2741 	struct neigh_seq_state *state = seq->private;
2742 	void *rc;
2743 	loff_t idxpos = *pos;
2744 
2745 	rc = neigh_get_idx(seq, &idxpos);
2746 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2747 		rc = pneigh_get_idx(seq, &idxpos);
2748 
2749 	return rc;
2750 }
2751 
neigh_seq_start(struct seq_file * seq,loff_t * pos,struct neigh_table * tbl,unsigned int neigh_seq_flags)2752 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2753 	__acquires(tbl->lock)
2754 	__acquires(rcu_bh)
2755 {
2756 	struct neigh_seq_state *state = seq->private;
2757 
2758 	state->tbl = tbl;
2759 	state->bucket = 0;
2760 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2761 
2762 	rcu_read_lock_bh();
2763 	state->nht = rcu_dereference_bh(tbl->nht);
2764 	read_lock(&tbl->lock);
2765 
2766 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2767 }
2768 EXPORT_SYMBOL(neigh_seq_start);
2769 
neigh_seq_next(struct seq_file * seq,void * v,loff_t * pos)2770 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2771 {
2772 	struct neigh_seq_state *state;
2773 	void *rc;
2774 
2775 	if (v == SEQ_START_TOKEN) {
2776 		rc = neigh_get_first(seq);
2777 		goto out;
2778 	}
2779 
2780 	state = seq->private;
2781 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2782 		rc = neigh_get_next(seq, v, NULL);
2783 		if (rc)
2784 			goto out;
2785 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2786 			rc = pneigh_get_first(seq);
2787 	} else {
2788 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2789 		rc = pneigh_get_next(seq, v, NULL);
2790 	}
2791 out:
2792 	++(*pos);
2793 	return rc;
2794 }
2795 EXPORT_SYMBOL(neigh_seq_next);
2796 
neigh_seq_stop(struct seq_file * seq,void * v)2797 void neigh_seq_stop(struct seq_file *seq, void *v)
2798 	__releases(tbl->lock)
2799 	__releases(rcu_bh)
2800 {
2801 	struct neigh_seq_state *state = seq->private;
2802 	struct neigh_table *tbl = state->tbl;
2803 
2804 	read_unlock(&tbl->lock);
2805 	rcu_read_unlock_bh();
2806 }
2807 EXPORT_SYMBOL(neigh_seq_stop);
2808 
2809 /* statistics via seq_file */
2810 
neigh_stat_seq_start(struct seq_file * seq,loff_t * pos)2811 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2812 {
2813 	struct neigh_table *tbl = seq->private;
2814 	int cpu;
2815 
2816 	if (*pos == 0)
2817 		return SEQ_START_TOKEN;
2818 
2819 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2820 		if (!cpu_possible(cpu))
2821 			continue;
2822 		*pos = cpu+1;
2823 		return per_cpu_ptr(tbl->stats, cpu);
2824 	}
2825 	return NULL;
2826 }
2827 
neigh_stat_seq_next(struct seq_file * seq,void * v,loff_t * pos)2828 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2829 {
2830 	struct neigh_table *tbl = seq->private;
2831 	int cpu;
2832 
2833 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2834 		if (!cpu_possible(cpu))
2835 			continue;
2836 		*pos = cpu+1;
2837 		return per_cpu_ptr(tbl->stats, cpu);
2838 	}
2839 	return NULL;
2840 }
2841 
neigh_stat_seq_stop(struct seq_file * seq,void * v)2842 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2843 {
2844 
2845 }
2846 
neigh_stat_seq_show(struct seq_file * seq,void * v)2847 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2848 {
2849 	struct neigh_table *tbl = seq->private;
2850 	struct neigh_statistics *st = v;
2851 
2852 	if (v == SEQ_START_TOKEN) {
2853 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2854 		return 0;
2855 	}
2856 
2857 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2858 			"%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2859 		   atomic_read(&tbl->entries),
2860 
2861 		   st->allocs,
2862 		   st->destroys,
2863 		   st->hash_grows,
2864 
2865 		   st->lookups,
2866 		   st->hits,
2867 
2868 		   st->res_failed,
2869 
2870 		   st->rcv_probes_mcast,
2871 		   st->rcv_probes_ucast,
2872 
2873 		   st->periodic_gc_runs,
2874 		   st->forced_gc_runs,
2875 		   st->unres_discards,
2876 		   st->table_fulls
2877 		   );
2878 
2879 	return 0;
2880 }
2881 
2882 static const struct seq_operations neigh_stat_seq_ops = {
2883 	.start	= neigh_stat_seq_start,
2884 	.next	= neigh_stat_seq_next,
2885 	.stop	= neigh_stat_seq_stop,
2886 	.show	= neigh_stat_seq_show,
2887 };
2888 
neigh_stat_seq_open(struct inode * inode,struct file * file)2889 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2890 {
2891 	int ret = seq_open(file, &neigh_stat_seq_ops);
2892 
2893 	if (!ret) {
2894 		struct seq_file *sf = file->private_data;
2895 		sf->private = PDE_DATA(inode);
2896 	}
2897 	return ret;
2898 };
2899 
2900 static const struct file_operations neigh_stat_seq_fops = {
2901 	.owner	 = THIS_MODULE,
2902 	.open 	 = neigh_stat_seq_open,
2903 	.read	 = seq_read,
2904 	.llseek	 = seq_lseek,
2905 	.release = seq_release,
2906 };
2907 
2908 #endif /* CONFIG_PROC_FS */
2909 
neigh_nlmsg_size(void)2910 static inline size_t neigh_nlmsg_size(void)
2911 {
2912 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2913 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2914 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2915 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2916 	       + nla_total_size(4); /* NDA_PROBES */
2917 }
2918 
__neigh_notify(struct neighbour * n,int type,int flags,u32 pid)2919 static void __neigh_notify(struct neighbour *n, int type, int flags,
2920 			   u32 pid)
2921 {
2922 	struct net *net = dev_net(n->dev);
2923 	struct sk_buff *skb;
2924 	int err = -ENOBUFS;
2925 
2926 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2927 	if (skb == NULL)
2928 		goto errout;
2929 
2930 	err = neigh_fill_info(skb, n, pid, 0, type, flags);
2931 	if (err < 0) {
2932 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2933 		WARN_ON(err == -EMSGSIZE);
2934 		kfree_skb(skb);
2935 		goto errout;
2936 	}
2937 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2938 	return;
2939 errout:
2940 	if (err < 0)
2941 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2942 }
2943 
neigh_app_ns(struct neighbour * n)2944 void neigh_app_ns(struct neighbour *n)
2945 {
2946 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2947 }
2948 EXPORT_SYMBOL(neigh_app_ns);
2949 
2950 #ifdef CONFIG_SYSCTL
2951 static int zero;
2952 static int int_max = INT_MAX;
2953 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2954 
proc_unres_qlen(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2955 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2956 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2957 {
2958 	int size, ret;
2959 	struct ctl_table tmp = *ctl;
2960 
2961 	tmp.extra1 = &zero;
2962 	tmp.extra2 = &unres_qlen_max;
2963 	tmp.data = &size;
2964 
2965 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2966 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2967 
2968 	if (write && !ret)
2969 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2970 	return ret;
2971 }
2972 
neigh_get_dev_parms_rcu(struct net_device * dev,int family)2973 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2974 						   int family)
2975 {
2976 	switch (family) {
2977 	case AF_INET:
2978 		return __in_dev_arp_parms_get_rcu(dev);
2979 	case AF_INET6:
2980 		return __in6_dev_nd_parms_get_rcu(dev);
2981 	}
2982 	return NULL;
2983 }
2984 
neigh_copy_dflt_parms(struct net * net,struct neigh_parms * p,int index)2985 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2986 				  int index)
2987 {
2988 	struct net_device *dev;
2989 	int family = neigh_parms_family(p);
2990 
2991 	rcu_read_lock();
2992 	for_each_netdev_rcu(net, dev) {
2993 		struct neigh_parms *dst_p =
2994 				neigh_get_dev_parms_rcu(dev, family);
2995 
2996 		if (dst_p && !test_bit(index, dst_p->data_state))
2997 			dst_p->data[index] = p->data[index];
2998 	}
2999 	rcu_read_unlock();
3000 }
3001 
neigh_proc_update(struct ctl_table * ctl,int write)3002 static void neigh_proc_update(struct ctl_table *ctl, int write)
3003 {
3004 	struct net_device *dev = ctl->extra1;
3005 	struct neigh_parms *p = ctl->extra2;
3006 	struct net *net = neigh_parms_net(p);
3007 	int index = (int *) ctl->data - p->data;
3008 
3009 	if (!write)
3010 		return;
3011 
3012 	set_bit(index, p->data_state);
3013 	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3014 		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3015 	if (!dev) /* NULL dev means this is default value */
3016 		neigh_copy_dflt_parms(net, p, index);
3017 }
3018 
neigh_proc_dointvec_zero_intmax(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3019 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3020 					   void __user *buffer,
3021 					   size_t *lenp, loff_t *ppos)
3022 {
3023 	struct ctl_table tmp = *ctl;
3024 	int ret;
3025 
3026 	tmp.extra1 = &zero;
3027 	tmp.extra2 = &int_max;
3028 
3029 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3030 	neigh_proc_update(ctl, write);
3031 	return ret;
3032 }
3033 
neigh_proc_dointvec(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3034 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3035 			void __user *buffer, size_t *lenp, loff_t *ppos)
3036 {
3037 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3038 
3039 	neigh_proc_update(ctl, write);
3040 	return ret;
3041 }
3042 EXPORT_SYMBOL(neigh_proc_dointvec);
3043 
neigh_proc_dointvec_jiffies(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3044 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3045 				void __user *buffer,
3046 				size_t *lenp, loff_t *ppos)
3047 {
3048 	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3049 
3050 	neigh_proc_update(ctl, write);
3051 	return ret;
3052 }
3053 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3054 
neigh_proc_dointvec_userhz_jiffies(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3055 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3056 					      void __user *buffer,
3057 					      size_t *lenp, loff_t *ppos)
3058 {
3059 	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3060 
3061 	neigh_proc_update(ctl, write);
3062 	return ret;
3063 }
3064 
neigh_proc_dointvec_ms_jiffies(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3065 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3066 				   void __user *buffer,
3067 				   size_t *lenp, loff_t *ppos)
3068 {
3069 	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3070 
3071 	neigh_proc_update(ctl, write);
3072 	return ret;
3073 }
3074 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3075 
neigh_proc_dointvec_unres_qlen(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3076 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3077 					  void __user *buffer,
3078 					  size_t *lenp, loff_t *ppos)
3079 {
3080 	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3081 
3082 	neigh_proc_update(ctl, write);
3083 	return ret;
3084 }
3085 
neigh_proc_base_reachable_time(struct ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)3086 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3087 					  void __user *buffer,
3088 					  size_t *lenp, loff_t *ppos)
3089 {
3090 	struct neigh_parms *p = ctl->extra2;
3091 	int ret;
3092 
3093 	if (strcmp(ctl->procname, "base_reachable_time") == 0)
3094 		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3095 	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3096 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3097 	else
3098 		ret = -1;
3099 
3100 	if (write && ret == 0) {
3101 		/* update reachable_time as well, otherwise, the change will
3102 		 * only be effective after the next time neigh_periodic_work
3103 		 * decides to recompute it
3104 		 */
3105 		p->reachable_time =
3106 			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3107 	}
3108 	return ret;
3109 }
3110 
3111 #define NEIGH_PARMS_DATA_OFFSET(index)	\
3112 	(&((struct neigh_parms *) 0)->data[index])
3113 
3114 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3115 	[NEIGH_VAR_ ## attr] = { \
3116 		.procname	= name, \
3117 		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3118 		.maxlen		= sizeof(int), \
3119 		.mode		= mval, \
3120 		.proc_handler	= proc, \
3121 	}
3122 
3123 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3124 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3125 
3126 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3127 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3128 
3129 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3130 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3131 
3132 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3133 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3134 
3135 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3136 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3137 
3138 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3139 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3140 
3141 static struct neigh_sysctl_table {
3142 	struct ctl_table_header *sysctl_header;
3143 	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3144 } neigh_sysctl_template __read_mostly = {
3145 	.neigh_vars = {
3146 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3147 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3148 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3149 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3150 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3151 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3152 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3153 		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3154 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3155 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3156 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3157 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3158 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3159 		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3160 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3161 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3162 		[NEIGH_VAR_GC_INTERVAL] = {
3163 			.procname	= "gc_interval",
3164 			.maxlen		= sizeof(int),
3165 			.mode		= 0644,
3166 			.proc_handler	= proc_dointvec_jiffies,
3167 		},
3168 		[NEIGH_VAR_GC_THRESH1] = {
3169 			.procname	= "gc_thresh1",
3170 			.maxlen		= sizeof(int),
3171 			.mode		= 0644,
3172 			.extra1 	= &zero,
3173 			.extra2		= &int_max,
3174 			.proc_handler	= proc_dointvec_minmax,
3175 		},
3176 		[NEIGH_VAR_GC_THRESH2] = {
3177 			.procname	= "gc_thresh2",
3178 			.maxlen		= sizeof(int),
3179 			.mode		= 0644,
3180 			.extra1 	= &zero,
3181 			.extra2		= &int_max,
3182 			.proc_handler	= proc_dointvec_minmax,
3183 		},
3184 		[NEIGH_VAR_GC_THRESH3] = {
3185 			.procname	= "gc_thresh3",
3186 			.maxlen		= sizeof(int),
3187 			.mode		= 0644,
3188 			.extra1 	= &zero,
3189 			.extra2		= &int_max,
3190 			.proc_handler	= proc_dointvec_minmax,
3191 		},
3192 		{},
3193 	},
3194 };
3195 
neigh_sysctl_register(struct net_device * dev,struct neigh_parms * p,proc_handler * handler)3196 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3197 			  proc_handler *handler)
3198 {
3199 	int i;
3200 	struct neigh_sysctl_table *t;
3201 	const char *dev_name_source;
3202 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3203 	char *p_name;
3204 
3205 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3206 	if (!t)
3207 		goto err;
3208 
3209 	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3210 		t->neigh_vars[i].data += (long) p;
3211 		t->neigh_vars[i].extra1 = dev;
3212 		t->neigh_vars[i].extra2 = p;
3213 	}
3214 
3215 	if (dev) {
3216 		dev_name_source = dev->name;
3217 		/* Terminate the table early */
3218 		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3219 		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3220 	} else {
3221 		struct neigh_table *tbl = p->tbl;
3222 		dev_name_source = "default";
3223 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3224 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3225 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3226 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3227 	}
3228 
3229 	if (handler) {
3230 		/* RetransTime */
3231 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3232 		/* ReachableTime */
3233 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3234 		/* RetransTime (in milliseconds)*/
3235 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3236 		/* ReachableTime (in milliseconds) */
3237 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3238 	} else {
3239 		/* Those handlers will update p->reachable_time after
3240 		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3241 		 * applied after the next neighbour update instead of waiting for
3242 		 * neigh_periodic_work to update its value (can be multiple minutes)
3243 		 * So any handler that replaces them should do this as well
3244 		 */
3245 		/* ReachableTime */
3246 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3247 			neigh_proc_base_reachable_time;
3248 		/* ReachableTime (in milliseconds) */
3249 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3250 			neigh_proc_base_reachable_time;
3251 	}
3252 
3253 	/* Don't export sysctls to unprivileged users */
3254 	if (neigh_parms_net(p)->user_ns != &init_user_ns)
3255 		t->neigh_vars[0].procname = NULL;
3256 
3257 	switch (neigh_parms_family(p)) {
3258 	case AF_INET:
3259 	      p_name = "ipv4";
3260 	      break;
3261 	case AF_INET6:
3262 	      p_name = "ipv6";
3263 	      break;
3264 	default:
3265 	      BUG();
3266 	}
3267 
3268 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3269 		p_name, dev_name_source);
3270 	t->sysctl_header =
3271 		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3272 	if (!t->sysctl_header)
3273 		goto free;
3274 
3275 	p->sysctl_table = t;
3276 	return 0;
3277 
3278 free:
3279 	kfree(t);
3280 err:
3281 	return -ENOBUFS;
3282 }
3283 EXPORT_SYMBOL(neigh_sysctl_register);
3284 
neigh_sysctl_unregister(struct neigh_parms * p)3285 void neigh_sysctl_unregister(struct neigh_parms *p)
3286 {
3287 	if (p->sysctl_table) {
3288 		struct neigh_sysctl_table *t = p->sysctl_table;
3289 		p->sysctl_table = NULL;
3290 		unregister_net_sysctl_table(t->sysctl_header);
3291 		kfree(t);
3292 	}
3293 }
3294 EXPORT_SYMBOL(neigh_sysctl_unregister);
3295 
3296 #endif	/* CONFIG_SYSCTL */
3297 
neigh_init(void)3298 static int __init neigh_init(void)
3299 {
3300 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3301 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3302 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3303 
3304 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3305 		      0);
3306 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3307 
3308 	return 0;
3309 }
3310 
3311 subsys_initcall(neigh_init);
3312 
3313