• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13 
14 static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
15 static unsigned afs_server_update_delay = 30;	/* Time till VLDB recheck in secs */
16 static atomic_t afs_server_debug_id;
17 
afs_inc_servers_outstanding(struct afs_net * net)18 static void afs_inc_servers_outstanding(struct afs_net *net)
19 {
20 	atomic_inc(&net->servers_outstanding);
21 }
22 
afs_dec_servers_outstanding(struct afs_net * net)23 static void afs_dec_servers_outstanding(struct afs_net *net)
24 {
25 	if (atomic_dec_and_test(&net->servers_outstanding))
26 		wake_up_var(&net->servers_outstanding);
27 }
28 
29 /*
30  * Find a server by one of its addresses.
31  */
afs_find_server(struct afs_net * net,const struct sockaddr_rxrpc * srx)32 struct afs_server *afs_find_server(struct afs_net *net,
33 				   const struct sockaddr_rxrpc *srx)
34 {
35 	const struct afs_addr_list *alist;
36 	struct afs_server *server = NULL;
37 	unsigned int i;
38 	int seq = 1, diff;
39 
40 	rcu_read_lock();
41 
42 	do {
43 		if (server)
44 			afs_put_server(net, server, afs_server_trace_put_find_rsq);
45 		server = NULL;
46 		seq++; /* 2 on the 1st/lockless path, otherwise odd */
47 		read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
48 
49 		if (srx->transport.family == AF_INET6) {
50 			const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
51 			hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
52 				alist = rcu_dereference(server->addresses);
53 				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
54 					b = &alist->addrs[i].transport.sin6;
55 					diff = ((u16 __force)a->sin6_port -
56 						(u16 __force)b->sin6_port);
57 					if (diff == 0)
58 						diff = memcmp(&a->sin6_addr,
59 							      &b->sin6_addr,
60 							      sizeof(struct in6_addr));
61 					if (diff == 0)
62 						goto found;
63 				}
64 			}
65 		} else {
66 			const struct sockaddr_in *a = &srx->transport.sin, *b;
67 			hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
68 				alist = rcu_dereference(server->addresses);
69 				for (i = 0; i < alist->nr_ipv4; i++) {
70 					b = &alist->addrs[i].transport.sin;
71 					diff = ((u16 __force)a->sin_port -
72 						(u16 __force)b->sin_port);
73 					if (diff == 0)
74 						diff = ((u32 __force)a->sin_addr.s_addr -
75 							(u32 __force)b->sin_addr.s_addr);
76 					if (diff == 0)
77 						goto found;
78 				}
79 			}
80 		}
81 
82 		server = NULL;
83 	found:
84 		if (server && !atomic_inc_not_zero(&server->usage))
85 			server = NULL;
86 
87 	} while (need_seqretry(&net->fs_addr_lock, seq));
88 
89 	done_seqretry(&net->fs_addr_lock, seq);
90 
91 	rcu_read_unlock();
92 	return server;
93 }
94 
95 /*
96  * Look up a server by its UUID
97  */
afs_find_server_by_uuid(struct afs_net * net,const uuid_t * uuid)98 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
99 {
100 	struct afs_server *server = NULL;
101 	struct rb_node *p;
102 	int diff, seq = 1;
103 
104 	_enter("%pU", uuid);
105 
106 	do {
107 		/* Unfortunately, rbtree walking doesn't give reliable results
108 		 * under just the RCU read lock, so we have to check for
109 		 * changes.
110 		 */
111 		if (server)
112 			afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
113 		server = NULL;
114 		seq++; /* 2 on the 1st/lockless path, otherwise odd */
115 		read_seqbegin_or_lock(&net->fs_lock, &seq);
116 
117 		p = net->fs_servers.rb_node;
118 		while (p) {
119 			server = rb_entry(p, struct afs_server, uuid_rb);
120 
121 			diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
122 			if (diff < 0) {
123 				p = p->rb_left;
124 			} else if (diff > 0) {
125 				p = p->rb_right;
126 			} else {
127 				afs_get_server(server, afs_server_trace_get_by_uuid);
128 				break;
129 			}
130 
131 			server = NULL;
132 		}
133 	} while (need_seqretry(&net->fs_lock, seq));
134 
135 	done_seqretry(&net->fs_lock, seq);
136 
137 	_leave(" = %p", server);
138 	return server;
139 }
140 
141 /*
142  * Install a server record in the namespace tree
143  */
afs_install_server(struct afs_net * net,struct afs_server * candidate)144 static struct afs_server *afs_install_server(struct afs_net *net,
145 					     struct afs_server *candidate)
146 {
147 	const struct afs_addr_list *alist;
148 	struct afs_server *server;
149 	struct rb_node **pp, *p;
150 	int ret = -EEXIST, diff;
151 
152 	_enter("%p", candidate);
153 
154 	write_seqlock(&net->fs_lock);
155 
156 	/* Firstly install the server in the UUID lookup tree */
157 	pp = &net->fs_servers.rb_node;
158 	p = NULL;
159 	while (*pp) {
160 		p = *pp;
161 		_debug("- consider %p", p);
162 		server = rb_entry(p, struct afs_server, uuid_rb);
163 		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
164 		if (diff < 0)
165 			pp = &(*pp)->rb_left;
166 		else if (diff > 0)
167 			pp = &(*pp)->rb_right;
168 		else
169 			goto exists;
170 	}
171 
172 	server = candidate;
173 	rb_link_node(&server->uuid_rb, p, pp);
174 	rb_insert_color(&server->uuid_rb, &net->fs_servers);
175 	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
176 
177 	write_seqlock(&net->fs_addr_lock);
178 	alist = rcu_dereference_protected(server->addresses,
179 					  lockdep_is_held(&net->fs_addr_lock.lock));
180 
181 	/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
182 	 * it in the IPv4 and/or IPv6 reverse-map lists.
183 	 *
184 	 * TODO: For speed we want to use something other than a flat list
185 	 * here; even sorting the list in terms of lowest address would help a
186 	 * bit, but anything we might want to do gets messy and memory
187 	 * intensive.
188 	 */
189 	if (alist->nr_ipv4 > 0)
190 		hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
191 	if (alist->nr_addrs > alist->nr_ipv4)
192 		hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
193 
194 	write_sequnlock(&net->fs_addr_lock);
195 	ret = 0;
196 
197 exists:
198 	afs_get_server(server, afs_server_trace_get_install);
199 	write_sequnlock(&net->fs_lock);
200 	return server;
201 }
202 
203 /*
204  * allocate a new server record
205  */
afs_alloc_server(struct afs_net * net,const uuid_t * uuid,struct afs_addr_list * alist)206 static struct afs_server *afs_alloc_server(struct afs_net *net,
207 					   const uuid_t *uuid,
208 					   struct afs_addr_list *alist)
209 {
210 	struct afs_server *server;
211 
212 	_enter("");
213 
214 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
215 	if (!server)
216 		goto enomem;
217 
218 	atomic_set(&server->usage, 1);
219 	server->debug_id = atomic_inc_return(&afs_server_debug_id);
220 	RCU_INIT_POINTER(server->addresses, alist);
221 	server->addr_version = alist->version;
222 	server->uuid = *uuid;
223 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
224 	rwlock_init(&server->fs_lock);
225 	INIT_HLIST_HEAD(&server->cb_volumes);
226 	rwlock_init(&server->cb_break_lock);
227 	init_waitqueue_head(&server->probe_wq);
228 	spin_lock_init(&server->probe_lock);
229 
230 	afs_inc_servers_outstanding(net);
231 	trace_afs_server(server, 1, afs_server_trace_alloc);
232 	_leave(" = %p", server);
233 	return server;
234 
235 enomem:
236 	_leave(" = NULL [nomem]");
237 	return NULL;
238 }
239 
240 /*
241  * Look up an address record for a server
242  */
afs_vl_lookup_addrs(struct afs_cell * cell,struct key * key,const uuid_t * uuid)243 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
244 						 struct key *key, const uuid_t *uuid)
245 {
246 	struct afs_vl_cursor vc;
247 	struct afs_addr_list *alist = NULL;
248 	int ret;
249 
250 	ret = -ERESTARTSYS;
251 	if (afs_begin_vlserver_operation(&vc, cell, key)) {
252 		while (afs_select_vlserver(&vc)) {
253 			if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
254 				alist = afs_yfsvl_get_endpoints(&vc, uuid);
255 			else
256 				alist = afs_vl_get_addrs_u(&vc, uuid);
257 		}
258 
259 		ret = afs_end_vlserver_operation(&vc);
260 	}
261 
262 	return ret < 0 ? ERR_PTR(ret) : alist;
263 }
264 
265 /*
266  * Get or create a fileserver record.
267  */
afs_lookup_server(struct afs_cell * cell,struct key * key,const uuid_t * uuid)268 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
269 				     const uuid_t *uuid)
270 {
271 	struct afs_addr_list *alist;
272 	struct afs_server *server, *candidate;
273 
274 	_enter("%p,%pU", cell->net, uuid);
275 
276 	server = afs_find_server_by_uuid(cell->net, uuid);
277 	if (server)
278 		return server;
279 
280 	alist = afs_vl_lookup_addrs(cell, key, uuid);
281 	if (IS_ERR(alist))
282 		return ERR_CAST(alist);
283 
284 	candidate = afs_alloc_server(cell->net, uuid, alist);
285 	if (!candidate) {
286 		afs_put_addrlist(alist);
287 		return ERR_PTR(-ENOMEM);
288 	}
289 
290 	server = afs_install_server(cell->net, candidate);
291 	if (server != candidate) {
292 		afs_put_addrlist(alist);
293 		kfree(candidate);
294 	}
295 
296 	_leave(" = %p{%d}", server, atomic_read(&server->usage));
297 	return server;
298 }
299 
300 /*
301  * Set the server timer to fire after a given delay, assuming it's not already
302  * set for an earlier time.
303  */
afs_set_server_timer(struct afs_net * net,time64_t delay)304 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
305 {
306 	if (net->live) {
307 		afs_inc_servers_outstanding(net);
308 		if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
309 			afs_dec_servers_outstanding(net);
310 	}
311 }
312 
313 /*
314  * Server management timer.  We have an increment on fs_outstanding that we
315  * need to pass along to the work item.
316  */
afs_servers_timer(struct timer_list * timer)317 void afs_servers_timer(struct timer_list *timer)
318 {
319 	struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
320 
321 	_enter("");
322 	if (!queue_work(afs_wq, &net->fs_manager))
323 		afs_dec_servers_outstanding(net);
324 }
325 
326 /*
327  * Get a reference on a server object.
328  */
afs_get_server(struct afs_server * server,enum afs_server_trace reason)329 struct afs_server *afs_get_server(struct afs_server *server,
330 				  enum afs_server_trace reason)
331 {
332 	unsigned int u = atomic_inc_return(&server->usage);
333 
334 	trace_afs_server(server, u, reason);
335 	return server;
336 }
337 
338 /*
339  * Release a reference on a server record.
340  */
afs_put_server(struct afs_net * net,struct afs_server * server,enum afs_server_trace reason)341 void afs_put_server(struct afs_net *net, struct afs_server *server,
342 		    enum afs_server_trace reason)
343 {
344 	unsigned int usage;
345 
346 	if (!server)
347 		return;
348 
349 	server->put_time = ktime_get_real_seconds();
350 
351 	usage = atomic_dec_return(&server->usage);
352 
353 	trace_afs_server(server, usage, reason);
354 
355 	if (likely(usage > 0))
356 		return;
357 
358 	afs_set_server_timer(net, afs_server_gc_delay);
359 }
360 
afs_server_rcu(struct rcu_head * rcu)361 static void afs_server_rcu(struct rcu_head *rcu)
362 {
363 	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
364 
365 	trace_afs_server(server, atomic_read(&server->usage),
366 			 afs_server_trace_free);
367 	afs_put_addrlist(rcu_access_pointer(server->addresses));
368 	kfree(server);
369 }
370 
371 /*
372  * destroy a dead server
373  */
afs_destroy_server(struct afs_net * net,struct afs_server * server)374 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
375 {
376 	struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
377 	struct afs_addr_cursor ac = {
378 		.alist	= alist,
379 		.index	= alist->preferred,
380 		.error	= 0,
381 	};
382 
383 	trace_afs_server(server, atomic_read(&server->usage),
384 			 afs_server_trace_give_up_cb);
385 
386 	if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
387 		afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
388 
389 	wait_var_event(&server->probe_outstanding,
390 		       atomic_read(&server->probe_outstanding) == 0);
391 
392 	trace_afs_server(server, atomic_read(&server->usage),
393 			 afs_server_trace_destroy);
394 	call_rcu(&server->rcu, afs_server_rcu);
395 	afs_dec_servers_outstanding(net);
396 }
397 
398 /*
399  * Garbage collect any expired servers.
400  */
afs_gc_servers(struct afs_net * net,struct afs_server * gc_list)401 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
402 {
403 	struct afs_server *server;
404 	bool deleted;
405 	int usage;
406 
407 	while ((server = gc_list)) {
408 		gc_list = server->gc_next;
409 
410 		write_seqlock(&net->fs_lock);
411 		usage = 1;
412 		deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
413 		trace_afs_server(server, usage, afs_server_trace_gc);
414 		if (deleted) {
415 			rb_erase(&server->uuid_rb, &net->fs_servers);
416 			hlist_del_rcu(&server->proc_link);
417 		}
418 		write_sequnlock(&net->fs_lock);
419 
420 		if (deleted) {
421 			write_seqlock(&net->fs_addr_lock);
422 			if (!hlist_unhashed(&server->addr4_link))
423 				hlist_del_rcu(&server->addr4_link);
424 			if (!hlist_unhashed(&server->addr6_link))
425 				hlist_del_rcu(&server->addr6_link);
426 			write_sequnlock(&net->fs_addr_lock);
427 			afs_destroy_server(net, server);
428 		}
429 	}
430 }
431 
432 /*
433  * Manage the records of servers known to be within a network namespace.  This
434  * includes garbage collecting unused servers.
435  *
436  * Note also that we were given an increment on net->servers_outstanding by
437  * whoever queued us that we need to deal with before returning.
438  */
afs_manage_servers(struct work_struct * work)439 void afs_manage_servers(struct work_struct *work)
440 {
441 	struct afs_net *net = container_of(work, struct afs_net, fs_manager);
442 	struct afs_server *gc_list = NULL;
443 	struct rb_node *cursor;
444 	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
445 	bool purging = !net->live;
446 
447 	_enter("");
448 
449 	/* Trawl the server list looking for servers that have expired from
450 	 * lack of use.
451 	 */
452 	read_seqlock_excl(&net->fs_lock);
453 
454 	for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
455 		struct afs_server *server =
456 			rb_entry(cursor, struct afs_server, uuid_rb);
457 		int usage = atomic_read(&server->usage);
458 
459 		_debug("manage %pU %u", &server->uuid, usage);
460 
461 		ASSERTCMP(usage, >=, 1);
462 		ASSERTIFCMP(purging, usage, ==, 1);
463 
464 		if (usage == 1) {
465 			time64_t expire_at = server->put_time;
466 
467 			if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
468 			    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
469 				expire_at += afs_server_gc_delay;
470 			if (purging || expire_at <= now) {
471 				server->gc_next = gc_list;
472 				gc_list = server;
473 			} else if (expire_at < next_manage) {
474 				next_manage = expire_at;
475 			}
476 		}
477 	}
478 
479 	read_sequnlock_excl(&net->fs_lock);
480 
481 	/* Update the timer on the way out.  We have to pass an increment on
482 	 * servers_outstanding in the namespace that we are in to the timer or
483 	 * the work scheduler.
484 	 */
485 	if (!purging && next_manage < TIME64_MAX) {
486 		now = ktime_get_real_seconds();
487 
488 		if (next_manage - now <= 0) {
489 			if (queue_work(afs_wq, &net->fs_manager))
490 				afs_inc_servers_outstanding(net);
491 		} else {
492 			afs_set_server_timer(net, next_manage - now);
493 		}
494 	}
495 
496 	afs_gc_servers(net, gc_list);
497 
498 	afs_dec_servers_outstanding(net);
499 	_leave(" [%d]", atomic_read(&net->servers_outstanding));
500 }
501 
afs_queue_server_manager(struct afs_net * net)502 static void afs_queue_server_manager(struct afs_net *net)
503 {
504 	afs_inc_servers_outstanding(net);
505 	if (!queue_work(afs_wq, &net->fs_manager))
506 		afs_dec_servers_outstanding(net);
507 }
508 
509 /*
510  * Purge list of servers.
511  */
afs_purge_servers(struct afs_net * net)512 void afs_purge_servers(struct afs_net *net)
513 {
514 	_enter("");
515 
516 	if (del_timer_sync(&net->fs_timer))
517 		atomic_dec(&net->servers_outstanding);
518 
519 	afs_queue_server_manager(net);
520 
521 	_debug("wait");
522 	wait_var_event(&net->servers_outstanding,
523 		       !atomic_read(&net->servers_outstanding));
524 	_leave("");
525 }
526 
527 /*
528  * Get an update for a server's address list.
529  */
afs_update_server_record(struct afs_fs_cursor * fc,struct afs_server * server)530 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
531 {
532 	struct afs_addr_list *alist, *discard;
533 
534 	_enter("");
535 
536 	trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
537 
538 	alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
539 				    &server->uuid);
540 	if (IS_ERR(alist)) {
541 		if ((PTR_ERR(alist) == -ERESTARTSYS ||
542 		     PTR_ERR(alist) == -EINTR) &&
543 		    !(fc->flags & AFS_FS_CURSOR_INTR) &&
544 		    server->addresses) {
545 			_leave(" = t [intr]");
546 			return true;
547 		}
548 		fc->error = PTR_ERR(alist);
549 		_leave(" = f [%d]", fc->error);
550 		return false;
551 	}
552 
553 	discard = alist;
554 	if (server->addr_version != alist->version) {
555 		write_lock(&server->fs_lock);
556 		discard = rcu_dereference_protected(server->addresses,
557 						    lockdep_is_held(&server->fs_lock));
558 		rcu_assign_pointer(server->addresses, alist);
559 		server->addr_version = alist->version;
560 		write_unlock(&server->fs_lock);
561 	}
562 
563 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
564 	afs_put_addrlist(discard);
565 	_leave(" = t");
566 	return true;
567 }
568 
569 /*
570  * See if a server's address list needs updating.
571  */
afs_check_server_record(struct afs_fs_cursor * fc,struct afs_server * server)572 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
573 {
574 	time64_t now = ktime_get_real_seconds();
575 	long diff;
576 	bool success;
577 	int ret, retries = 0;
578 
579 	_enter("");
580 
581 	ASSERT(server);
582 
583 retry:
584 	diff = READ_ONCE(server->update_at) - now;
585 	if (diff > 0) {
586 		_leave(" = t [not now %ld]", diff);
587 		return true;
588 	}
589 
590 	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
591 		success = afs_update_server_record(fc, server);
592 		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
593 		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
594 		_leave(" = %d", success);
595 		return success;
596 	}
597 
598 	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
599 			  (fc->flags & AFS_FS_CURSOR_INTR) ?
600 			  TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
601 	if (ret == -ERESTARTSYS) {
602 		fc->error = ret;
603 		_leave(" = f [intr]");
604 		return false;
605 	}
606 
607 	retries++;
608 	if (retries == 4) {
609 		_leave(" = f [stale]");
610 		ret = -ESTALE;
611 		return false;
612 	}
613 	goto retry;
614 }
615