• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13 
14 static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
15 static unsigned afs_server_update_delay = 30;	/* Time till VLDB recheck in secs */
16 static atomic_t afs_server_debug_id;
17 
afs_inc_servers_outstanding(struct afs_net * net)18 static void afs_inc_servers_outstanding(struct afs_net *net)
19 {
20 	atomic_inc(&net->servers_outstanding);
21 }
22 
afs_dec_servers_outstanding(struct afs_net * net)23 static void afs_dec_servers_outstanding(struct afs_net *net)
24 {
25 	if (atomic_dec_and_test(&net->servers_outstanding))
26 		wake_up_var(&net->servers_outstanding);
27 }
28 
29 /*
30  * Find a server by one of its addresses.
31  */
afs_find_server(struct afs_net * net,const struct sockaddr_rxrpc * srx)32 struct afs_server *afs_find_server(struct afs_net *net,
33 				   const struct sockaddr_rxrpc *srx)
34 {
35 	const struct afs_addr_list *alist;
36 	struct afs_server *server = NULL;
37 	unsigned int i;
38 	int seq = 0, diff;
39 
40 	rcu_read_lock();
41 
42 	do {
43 		if (server)
44 			afs_put_server(net, server, afs_server_trace_put_find_rsq);
45 		server = NULL;
46 		read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
47 
48 		if (srx->transport.family == AF_INET6) {
49 			const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
50 			hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
51 				alist = rcu_dereference(server->addresses);
52 				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
53 					b = &alist->addrs[i].transport.sin6;
54 					diff = ((u16 __force)a->sin6_port -
55 						(u16 __force)b->sin6_port);
56 					if (diff == 0)
57 						diff = memcmp(&a->sin6_addr,
58 							      &b->sin6_addr,
59 							      sizeof(struct in6_addr));
60 					if (diff == 0)
61 						goto found;
62 				}
63 			}
64 		} else {
65 			const struct sockaddr_in *a = &srx->transport.sin, *b;
66 			hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
67 				alist = rcu_dereference(server->addresses);
68 				for (i = 0; i < alist->nr_ipv4; i++) {
69 					b = &alist->addrs[i].transport.sin;
70 					diff = ((u16 __force)a->sin_port -
71 						(u16 __force)b->sin_port);
72 					if (diff == 0)
73 						diff = ((u32 __force)a->sin_addr.s_addr -
74 							(u32 __force)b->sin_addr.s_addr);
75 					if (diff == 0)
76 						goto found;
77 				}
78 			}
79 		}
80 
81 		server = NULL;
82 	found:
83 		if (server && !atomic_inc_not_zero(&server->usage))
84 			server = NULL;
85 
86 	} while (need_seqretry(&net->fs_addr_lock, seq));
87 
88 	done_seqretry(&net->fs_addr_lock, seq);
89 
90 	rcu_read_unlock();
91 	return server;
92 }
93 
94 /*
95  * Look up a server by its UUID
96  */
afs_find_server_by_uuid(struct afs_net * net,const uuid_t * uuid)97 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
98 {
99 	struct afs_server *server = NULL;
100 	struct rb_node *p;
101 	int diff, seq = 0;
102 
103 	_enter("%pU", uuid);
104 
105 	do {
106 		/* Unfortunately, rbtree walking doesn't give reliable results
107 		 * under just the RCU read lock, so we have to check for
108 		 * changes.
109 		 */
110 		if (server)
111 			afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
112 		server = NULL;
113 
114 		read_seqbegin_or_lock(&net->fs_lock, &seq);
115 
116 		p = net->fs_servers.rb_node;
117 		while (p) {
118 			server = rb_entry(p, struct afs_server, uuid_rb);
119 
120 			diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
121 			if (diff < 0) {
122 				p = p->rb_left;
123 			} else if (diff > 0) {
124 				p = p->rb_right;
125 			} else {
126 				afs_get_server(server, afs_server_trace_get_by_uuid);
127 				break;
128 			}
129 
130 			server = NULL;
131 		}
132 	} while (need_seqretry(&net->fs_lock, seq));
133 
134 	done_seqretry(&net->fs_lock, seq);
135 
136 	_leave(" = %p", server);
137 	return server;
138 }
139 
140 /*
141  * Install a server record in the namespace tree
142  */
afs_install_server(struct afs_net * net,struct afs_server * candidate)143 static struct afs_server *afs_install_server(struct afs_net *net,
144 					     struct afs_server *candidate)
145 {
146 	const struct afs_addr_list *alist;
147 	struct afs_server *server;
148 	struct rb_node **pp, *p;
149 	int ret = -EEXIST, diff;
150 
151 	_enter("%p", candidate);
152 
153 	write_seqlock(&net->fs_lock);
154 
155 	/* Firstly install the server in the UUID lookup tree */
156 	pp = &net->fs_servers.rb_node;
157 	p = NULL;
158 	while (*pp) {
159 		p = *pp;
160 		_debug("- consider %p", p);
161 		server = rb_entry(p, struct afs_server, uuid_rb);
162 		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
163 		if (diff < 0)
164 			pp = &(*pp)->rb_left;
165 		else if (diff > 0)
166 			pp = &(*pp)->rb_right;
167 		else
168 			goto exists;
169 	}
170 
171 	server = candidate;
172 	rb_link_node(&server->uuid_rb, p, pp);
173 	rb_insert_color(&server->uuid_rb, &net->fs_servers);
174 	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
175 
176 	write_seqlock(&net->fs_addr_lock);
177 	alist = rcu_dereference_protected(server->addresses,
178 					  lockdep_is_held(&net->fs_addr_lock.lock));
179 
180 	/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
181 	 * it in the IPv4 and/or IPv6 reverse-map lists.
182 	 *
183 	 * TODO: For speed we want to use something other than a flat list
184 	 * here; even sorting the list in terms of lowest address would help a
185 	 * bit, but anything we might want to do gets messy and memory
186 	 * intensive.
187 	 */
188 	if (alist->nr_ipv4 > 0)
189 		hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
190 	if (alist->nr_addrs > alist->nr_ipv4)
191 		hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
192 
193 	write_sequnlock(&net->fs_addr_lock);
194 	ret = 0;
195 
196 exists:
197 	afs_get_server(server, afs_server_trace_get_install);
198 	write_sequnlock(&net->fs_lock);
199 	return server;
200 }
201 
202 /*
203  * allocate a new server record
204  */
afs_alloc_server(struct afs_net * net,const uuid_t * uuid,struct afs_addr_list * alist)205 static struct afs_server *afs_alloc_server(struct afs_net *net,
206 					   const uuid_t *uuid,
207 					   struct afs_addr_list *alist)
208 {
209 	struct afs_server *server;
210 
211 	_enter("");
212 
213 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
214 	if (!server)
215 		goto enomem;
216 
217 	atomic_set(&server->usage, 1);
218 	server->debug_id = atomic_inc_return(&afs_server_debug_id);
219 	RCU_INIT_POINTER(server->addresses, alist);
220 	server->addr_version = alist->version;
221 	server->uuid = *uuid;
222 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
223 	rwlock_init(&server->fs_lock);
224 	INIT_HLIST_HEAD(&server->cb_volumes);
225 	rwlock_init(&server->cb_break_lock);
226 	init_waitqueue_head(&server->probe_wq);
227 	spin_lock_init(&server->probe_lock);
228 
229 	afs_inc_servers_outstanding(net);
230 	trace_afs_server(server, 1, afs_server_trace_alloc);
231 	_leave(" = %p", server);
232 	return server;
233 
234 enomem:
235 	_leave(" = NULL [nomem]");
236 	return NULL;
237 }
238 
239 /*
240  * Look up an address record for a server
241  */
afs_vl_lookup_addrs(struct afs_cell * cell,struct key * key,const uuid_t * uuid)242 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
243 						 struct key *key, const uuid_t *uuid)
244 {
245 	struct afs_vl_cursor vc;
246 	struct afs_addr_list *alist = NULL;
247 	int ret;
248 
249 	ret = -ERESTARTSYS;
250 	if (afs_begin_vlserver_operation(&vc, cell, key)) {
251 		while (afs_select_vlserver(&vc)) {
252 			if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
253 				alist = afs_yfsvl_get_endpoints(&vc, uuid);
254 			else
255 				alist = afs_vl_get_addrs_u(&vc, uuid);
256 		}
257 
258 		ret = afs_end_vlserver_operation(&vc);
259 	}
260 
261 	return ret < 0 ? ERR_PTR(ret) : alist;
262 }
263 
264 /*
265  * Get or create a fileserver record.
266  */
afs_lookup_server(struct afs_cell * cell,struct key * key,const uuid_t * uuid)267 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
268 				     const uuid_t *uuid)
269 {
270 	struct afs_addr_list *alist;
271 	struct afs_server *server, *candidate;
272 
273 	_enter("%p,%pU", cell->net, uuid);
274 
275 	server = afs_find_server_by_uuid(cell->net, uuid);
276 	if (server)
277 		return server;
278 
279 	alist = afs_vl_lookup_addrs(cell, key, uuid);
280 	if (IS_ERR(alist))
281 		return ERR_CAST(alist);
282 
283 	candidate = afs_alloc_server(cell->net, uuid, alist);
284 	if (!candidate) {
285 		afs_put_addrlist(alist);
286 		return ERR_PTR(-ENOMEM);
287 	}
288 
289 	server = afs_install_server(cell->net, candidate);
290 	if (server != candidate) {
291 		afs_put_addrlist(alist);
292 		kfree(candidate);
293 	}
294 
295 	_leave(" = %p{%d}", server, atomic_read(&server->usage));
296 	return server;
297 }
298 
299 /*
300  * Set the server timer to fire after a given delay, assuming it's not already
301  * set for an earlier time.
302  */
afs_set_server_timer(struct afs_net * net,time64_t delay)303 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
304 {
305 	if (net->live) {
306 		afs_inc_servers_outstanding(net);
307 		if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
308 			afs_dec_servers_outstanding(net);
309 	}
310 }
311 
312 /*
313  * Server management timer.  We have an increment on fs_outstanding that we
314  * need to pass along to the work item.
315  */
afs_servers_timer(struct timer_list * timer)316 void afs_servers_timer(struct timer_list *timer)
317 {
318 	struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
319 
320 	_enter("");
321 	if (!queue_work(afs_wq, &net->fs_manager))
322 		afs_dec_servers_outstanding(net);
323 }
324 
325 /*
326  * Get a reference on a server object.
327  */
afs_get_server(struct afs_server * server,enum afs_server_trace reason)328 struct afs_server *afs_get_server(struct afs_server *server,
329 				  enum afs_server_trace reason)
330 {
331 	unsigned int u = atomic_inc_return(&server->usage);
332 
333 	trace_afs_server(server, u, reason);
334 	return server;
335 }
336 
337 /*
338  * Release a reference on a server record.
339  */
afs_put_server(struct afs_net * net,struct afs_server * server,enum afs_server_trace reason)340 void afs_put_server(struct afs_net *net, struct afs_server *server,
341 		    enum afs_server_trace reason)
342 {
343 	unsigned int usage;
344 
345 	if (!server)
346 		return;
347 
348 	server->put_time = ktime_get_real_seconds();
349 
350 	usage = atomic_dec_return(&server->usage);
351 
352 	trace_afs_server(server, usage, reason);
353 
354 	if (likely(usage > 0))
355 		return;
356 
357 	afs_set_server_timer(net, afs_server_gc_delay);
358 }
359 
afs_server_rcu(struct rcu_head * rcu)360 static void afs_server_rcu(struct rcu_head *rcu)
361 {
362 	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
363 
364 	trace_afs_server(server, atomic_read(&server->usage),
365 			 afs_server_trace_free);
366 	afs_put_addrlist(rcu_access_pointer(server->addresses));
367 	kfree(server);
368 }
369 
370 /*
371  * destroy a dead server
372  */
afs_destroy_server(struct afs_net * net,struct afs_server * server)373 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
374 {
375 	struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
376 	struct afs_addr_cursor ac = {
377 		.alist	= alist,
378 		.index	= alist->preferred,
379 		.error	= 0,
380 	};
381 
382 	trace_afs_server(server, atomic_read(&server->usage),
383 			 afs_server_trace_give_up_cb);
384 
385 	if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
386 		afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
387 
388 	wait_var_event(&server->probe_outstanding,
389 		       atomic_read(&server->probe_outstanding) == 0);
390 
391 	trace_afs_server(server, atomic_read(&server->usage),
392 			 afs_server_trace_destroy);
393 	call_rcu(&server->rcu, afs_server_rcu);
394 	afs_dec_servers_outstanding(net);
395 }
396 
397 /*
398  * Garbage collect any expired servers.
399  */
afs_gc_servers(struct afs_net * net,struct afs_server * gc_list)400 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
401 {
402 	struct afs_server *server;
403 	bool deleted;
404 	int usage;
405 
406 	while ((server = gc_list)) {
407 		gc_list = server->gc_next;
408 
409 		write_seqlock(&net->fs_lock);
410 		usage = 1;
411 		deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
412 		trace_afs_server(server, usage, afs_server_trace_gc);
413 		if (deleted) {
414 			rb_erase(&server->uuid_rb, &net->fs_servers);
415 			hlist_del_rcu(&server->proc_link);
416 		}
417 		write_sequnlock(&net->fs_lock);
418 
419 		if (deleted) {
420 			write_seqlock(&net->fs_addr_lock);
421 			if (!hlist_unhashed(&server->addr4_link))
422 				hlist_del_rcu(&server->addr4_link);
423 			if (!hlist_unhashed(&server->addr6_link))
424 				hlist_del_rcu(&server->addr6_link);
425 			write_sequnlock(&net->fs_addr_lock);
426 			afs_destroy_server(net, server);
427 		}
428 	}
429 }
430 
431 /*
432  * Manage the records of servers known to be within a network namespace.  This
433  * includes garbage collecting unused servers.
434  *
435  * Note also that we were given an increment on net->servers_outstanding by
436  * whoever queued us that we need to deal with before returning.
437  */
afs_manage_servers(struct work_struct * work)438 void afs_manage_servers(struct work_struct *work)
439 {
440 	struct afs_net *net = container_of(work, struct afs_net, fs_manager);
441 	struct afs_server *gc_list = NULL;
442 	struct rb_node *cursor;
443 	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
444 	bool purging = !net->live;
445 
446 	_enter("");
447 
448 	/* Trawl the server list looking for servers that have expired from
449 	 * lack of use.
450 	 */
451 	read_seqlock_excl(&net->fs_lock);
452 
453 	for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
454 		struct afs_server *server =
455 			rb_entry(cursor, struct afs_server, uuid_rb);
456 		int usage = atomic_read(&server->usage);
457 
458 		_debug("manage %pU %u", &server->uuid, usage);
459 
460 		ASSERTCMP(usage, >=, 1);
461 		ASSERTIFCMP(purging, usage, ==, 1);
462 
463 		if (usage == 1) {
464 			time64_t expire_at = server->put_time;
465 
466 			if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
467 			    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
468 				expire_at += afs_server_gc_delay;
469 			if (purging || expire_at <= now) {
470 				server->gc_next = gc_list;
471 				gc_list = server;
472 			} else if (expire_at < next_manage) {
473 				next_manage = expire_at;
474 			}
475 		}
476 	}
477 
478 	read_sequnlock_excl(&net->fs_lock);
479 
480 	/* Update the timer on the way out.  We have to pass an increment on
481 	 * servers_outstanding in the namespace that we are in to the timer or
482 	 * the work scheduler.
483 	 */
484 	if (!purging && next_manage < TIME64_MAX) {
485 		now = ktime_get_real_seconds();
486 
487 		if (next_manage - now <= 0) {
488 			if (queue_work(afs_wq, &net->fs_manager))
489 				afs_inc_servers_outstanding(net);
490 		} else {
491 			afs_set_server_timer(net, next_manage - now);
492 		}
493 	}
494 
495 	afs_gc_servers(net, gc_list);
496 
497 	afs_dec_servers_outstanding(net);
498 	_leave(" [%d]", atomic_read(&net->servers_outstanding));
499 }
500 
afs_queue_server_manager(struct afs_net * net)501 static void afs_queue_server_manager(struct afs_net *net)
502 {
503 	afs_inc_servers_outstanding(net);
504 	if (!queue_work(afs_wq, &net->fs_manager))
505 		afs_dec_servers_outstanding(net);
506 }
507 
508 /*
509  * Purge list of servers.
510  */
afs_purge_servers(struct afs_net * net)511 void afs_purge_servers(struct afs_net *net)
512 {
513 	_enter("");
514 
515 	if (del_timer_sync(&net->fs_timer))
516 		atomic_dec(&net->servers_outstanding);
517 
518 	afs_queue_server_manager(net);
519 
520 	_debug("wait");
521 	wait_var_event(&net->servers_outstanding,
522 		       !atomic_read(&net->servers_outstanding));
523 	_leave("");
524 }
525 
526 /*
527  * Get an update for a server's address list.
528  */
afs_update_server_record(struct afs_fs_cursor * fc,struct afs_server * server)529 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
530 {
531 	struct afs_addr_list *alist, *discard;
532 
533 	_enter("");
534 
535 	trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
536 
537 	alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
538 				    &server->uuid);
539 	if (IS_ERR(alist)) {
540 		if ((PTR_ERR(alist) == -ERESTARTSYS ||
541 		     PTR_ERR(alist) == -EINTR) &&
542 		    !(fc->flags & AFS_FS_CURSOR_INTR) &&
543 		    server->addresses) {
544 			_leave(" = t [intr]");
545 			return true;
546 		}
547 		fc->error = PTR_ERR(alist);
548 		_leave(" = f [%d]", fc->error);
549 		return false;
550 	}
551 
552 	discard = alist;
553 	if (server->addr_version != alist->version) {
554 		write_lock(&server->fs_lock);
555 		discard = rcu_dereference_protected(server->addresses,
556 						    lockdep_is_held(&server->fs_lock));
557 		rcu_assign_pointer(server->addresses, alist);
558 		server->addr_version = alist->version;
559 		write_unlock(&server->fs_lock);
560 	}
561 
562 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
563 	afs_put_addrlist(discard);
564 	_leave(" = t");
565 	return true;
566 }
567 
568 /*
569  * See if a server's address list needs updating.
570  */
afs_check_server_record(struct afs_fs_cursor * fc,struct afs_server * server)571 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
572 {
573 	time64_t now = ktime_get_real_seconds();
574 	long diff;
575 	bool success;
576 	int ret, retries = 0;
577 
578 	_enter("");
579 
580 	ASSERT(server);
581 
582 retry:
583 	diff = READ_ONCE(server->update_at) - now;
584 	if (diff > 0) {
585 		_leave(" = t [not now %ld]", diff);
586 		return true;
587 	}
588 
589 	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
590 		success = afs_update_server_record(fc, server);
591 		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
592 		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
593 		_leave(" = %d", success);
594 		return success;
595 	}
596 
597 	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
598 			  TASK_INTERRUPTIBLE);
599 	if (ret == -ERESTARTSYS) {
600 		if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
601 			_leave(" = t [intr]");
602 			return true;
603 		}
604 		fc->error = ret;
605 		_leave(" = f [intr]");
606 		return false;
607 	}
608 
609 	retries++;
610 	if (retries == 4) {
611 		_leave(" = f [stale]");
612 		ret = -ESTALE;
613 		return false;
614 	}
615 	goto retry;
616 }
617