• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * 2002-10-15  Posix Clocks & timers
4  *                           by George Anzinger george@mvista.com
5  *			     Copyright (C) 2002 2003 by MontaVista Software.
6  *
7  * 2004-06-01  Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
8  *			     Copyright (C) 2004 Boris Hu
9  *
10  * These are all the functions necessary to implement POSIX clocks & timers
11  */
12 #include <linux/mm.h>
13 #include <linux/interrupt.h>
14 #include <linux/slab.h>
15 #include <linux/time.h>
16 #include <linux/mutex.h>
17 #include <linux/sched/task.h>
18 
19 #include <linux/uaccess.h>
20 #include <linux/list.h>
21 #include <linux/init.h>
22 #include <linux/compiler.h>
23 #include <linux/hash.h>
24 #include <linux/posix-clock.h>
25 #include <linux/posix-timers.h>
26 #include <linux/syscalls.h>
27 #include <linux/wait.h>
28 #include <linux/workqueue.h>
29 #include <linux/export.h>
30 #include <linux/hashtable.h>
31 #include <linux/compat.h>
32 #include <linux/nospec.h>
33 #include <linux/time_namespace.h>
34 
35 #include "timekeeping.h"
36 #include "posix-timers.h"
37 
38 static struct kmem_cache *posix_timers_cache;
39 
40 /*
41  * Timers are managed in a hash table for lockless lookup. The hash key is
42  * constructed from current::signal and the timer ID and the timer is
43  * matched against current::signal and the timer ID when walking the hash
44  * bucket list.
45  *
46  * This allows checkpoint/restore to reconstruct the exact timer IDs for
47  * a process.
48  */
49 static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
50 static DEFINE_SPINLOCK(hash_lock);
51 
52 static const struct k_clock * const posix_clocks[];
53 static const struct k_clock *clockid_to_kclock(const clockid_t id);
54 static const struct k_clock clock_realtime, clock_monotonic;
55 
56 /* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */
57 #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
58 			~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
59 #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
60 #endif
61 
62 static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
63 
64 #define lock_timer(tid, flags)						   \
65 ({	struct k_itimer *__timr;					   \
66 	__cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags));  \
67 	__timr;								   \
68 })
69 
hash(struct signal_struct * sig,unsigned int nr)70 static int hash(struct signal_struct *sig, unsigned int nr)
71 {
72 	return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
73 }
74 
__posix_timers_find(struct hlist_head * head,struct signal_struct * sig,timer_t id)75 static struct k_itimer *__posix_timers_find(struct hlist_head *head,
76 					    struct signal_struct *sig,
77 					    timer_t id)
78 {
79 	struct k_itimer *timer;
80 
81 	hlist_for_each_entry_rcu(timer, head, t_hash, lockdep_is_held(&hash_lock)) {
82 		/* timer->it_signal can be set concurrently */
83 		if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id))
84 			return timer;
85 	}
86 	return NULL;
87 }
88 
posix_timer_by_id(timer_t id)89 static struct k_itimer *posix_timer_by_id(timer_t id)
90 {
91 	struct signal_struct *sig = current->signal;
92 	struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
93 
94 	return __posix_timers_find(head, sig, id);
95 }
96 
posix_timer_add(struct k_itimer * timer)97 static int posix_timer_add(struct k_itimer *timer)
98 {
99 	struct signal_struct *sig = current->signal;
100 	struct hlist_head *head;
101 	unsigned int cnt, id;
102 
103 	/*
104 	 * FIXME: Replace this by a per signal struct xarray once there is
105 	 * a plan to handle the resulting CRIU regression gracefully.
106 	 */
107 	for (cnt = 0; cnt <= INT_MAX; cnt++) {
108 		spin_lock(&hash_lock);
109 		id = sig->next_posix_timer_id;
110 
111 		/* Write the next ID back. Clamp it to the positive space */
112 		sig->next_posix_timer_id = (id + 1) & INT_MAX;
113 
114 		head = &posix_timers_hashtable[hash(sig, id)];
115 		if (!__posix_timers_find(head, sig, id)) {
116 			hlist_add_head_rcu(&timer->t_hash, head);
117 			spin_unlock(&hash_lock);
118 			return id;
119 		}
120 		spin_unlock(&hash_lock);
121 		cond_resched();
122 	}
123 	/* POSIX return code when no timer ID could be allocated */
124 	return -EAGAIN;
125 }
126 
unlock_timer(struct k_itimer * timr,unsigned long flags)127 static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
128 {
129 	spin_unlock_irqrestore(&timr->it_lock, flags);
130 }
131 
posix_get_realtime_timespec(clockid_t which_clock,struct timespec64 * tp)132 static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp)
133 {
134 	ktime_get_real_ts64(tp);
135 	return 0;
136 }
137 
posix_get_realtime_ktime(clockid_t which_clock)138 static ktime_t posix_get_realtime_ktime(clockid_t which_clock)
139 {
140 	return ktime_get_real();
141 }
142 
posix_clock_realtime_set(const clockid_t which_clock,const struct timespec64 * tp)143 static int posix_clock_realtime_set(const clockid_t which_clock,
144 				    const struct timespec64 *tp)
145 {
146 	return do_sys_settimeofday64(tp, NULL);
147 }
148 
posix_clock_realtime_adj(const clockid_t which_clock,struct __kernel_timex * t)149 static int posix_clock_realtime_adj(const clockid_t which_clock,
150 				    struct __kernel_timex *t)
151 {
152 	return do_adjtimex(t);
153 }
154 
posix_get_monotonic_timespec(clockid_t which_clock,struct timespec64 * tp)155 static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp)
156 {
157 	ktime_get_ts64(tp);
158 	timens_add_monotonic(tp);
159 	return 0;
160 }
161 
posix_get_monotonic_ktime(clockid_t which_clock)162 static ktime_t posix_get_monotonic_ktime(clockid_t which_clock)
163 {
164 	return ktime_get();
165 }
166 
posix_get_monotonic_raw(clockid_t which_clock,struct timespec64 * tp)167 static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp)
168 {
169 	ktime_get_raw_ts64(tp);
170 	timens_add_monotonic(tp);
171 	return 0;
172 }
173 
posix_get_realtime_coarse(clockid_t which_clock,struct timespec64 * tp)174 static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp)
175 {
176 	ktime_get_coarse_real_ts64(tp);
177 	return 0;
178 }
179 
posix_get_monotonic_coarse(clockid_t which_clock,struct timespec64 * tp)180 static int posix_get_monotonic_coarse(clockid_t which_clock,
181 						struct timespec64 *tp)
182 {
183 	ktime_get_coarse_ts64(tp);
184 	timens_add_monotonic(tp);
185 	return 0;
186 }
187 
posix_get_coarse_res(const clockid_t which_clock,struct timespec64 * tp)188 static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *tp)
189 {
190 	*tp = ktime_to_timespec64(KTIME_LOW_RES);
191 	return 0;
192 }
193 
posix_get_boottime_timespec(const clockid_t which_clock,struct timespec64 * tp)194 static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp)
195 {
196 	ktime_get_boottime_ts64(tp);
197 	timens_add_boottime(tp);
198 	return 0;
199 }
200 
posix_get_boottime_ktime(const clockid_t which_clock)201 static ktime_t posix_get_boottime_ktime(const clockid_t which_clock)
202 {
203 	return ktime_get_boottime();
204 }
205 
posix_get_tai_timespec(clockid_t which_clock,struct timespec64 * tp)206 static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp)
207 {
208 	ktime_get_clocktai_ts64(tp);
209 	return 0;
210 }
211 
posix_get_tai_ktime(clockid_t which_clock)212 static ktime_t posix_get_tai_ktime(clockid_t which_clock)
213 {
214 	return ktime_get_clocktai();
215 }
216 
posix_get_hrtimer_res(clockid_t which_clock,struct timespec64 * tp)217 static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp)
218 {
219 	tp->tv_sec = 0;
220 	tp->tv_nsec = hrtimer_resolution;
221 	return 0;
222 }
223 
init_posix_timers(void)224 static __init int init_posix_timers(void)
225 {
226 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
227 					sizeof(struct k_itimer), 0,
228 					SLAB_PANIC | SLAB_ACCOUNT, NULL);
229 	return 0;
230 }
231 __initcall(init_posix_timers);
232 
233 /*
234  * The siginfo si_overrun field and the return value of timer_getoverrun(2)
235  * are of type int. Clamp the overrun value to INT_MAX
236  */
timer_overrun_to_int(struct k_itimer * timr,int baseval)237 static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval)
238 {
239 	s64 sum = timr->it_overrun_last + (s64)baseval;
240 
241 	return sum > (s64)INT_MAX ? INT_MAX : (int)sum;
242 }
243 
common_hrtimer_rearm(struct k_itimer * timr)244 static void common_hrtimer_rearm(struct k_itimer *timr)
245 {
246 	struct hrtimer *timer = &timr->it.real.timer;
247 
248 	timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
249 					    timr->it_interval);
250 	hrtimer_restart(timer);
251 }
252 
253 /*
254  * This function is called from the signal delivery code if
255  * info->si_sys_private is not zero, which indicates that the timer has to
256  * be rearmed. Restart the timer and update info::si_overrun.
257  */
posixtimer_rearm(struct kernel_siginfo * info)258 void posixtimer_rearm(struct kernel_siginfo *info)
259 {
260 	struct k_itimer *timr;
261 	unsigned long flags;
262 
263 	timr = lock_timer(info->si_tid, &flags);
264 	if (!timr)
265 		return;
266 
267 	if (timr->it_interval && timr->it_requeue_pending == info->si_sys_private) {
268 		timr->kclock->timer_rearm(timr);
269 
270 		timr->it_active = 1;
271 		timr->it_overrun_last = timr->it_overrun;
272 		timr->it_overrun = -1LL;
273 		++timr->it_requeue_pending;
274 
275 		info->si_overrun = timer_overrun_to_int(timr, info->si_overrun);
276 	}
277 
278 	unlock_timer(timr, flags);
279 }
280 
posix_timer_queue_signal(struct k_itimer * timr)281 int posix_timer_queue_signal(struct k_itimer *timr)
282 {
283 	int ret, si_private = 0;
284 	enum pid_type type;
285 
286 	lockdep_assert_held(&timr->it_lock);
287 
288 	timr->it_active = 0;
289 	if (timr->it_interval)
290 		si_private = ++timr->it_requeue_pending;
291 
292 	/*
293 	 * FIXME: if ->sigq is queued we can race with
294 	 * dequeue_signal()->posixtimer_rearm().
295 	 *
296 	 * If dequeue_signal() sees the "right" value of
297 	 * si_sys_private it calls posixtimer_rearm().
298 	 * We re-queue ->sigq and drop ->it_lock().
299 	 * posixtimer_rearm() locks the timer
300 	 * and re-schedules it while ->sigq is pending.
301 	 * Not really bad, but not that we want.
302 	 */
303 	timr->sigq->info.si_sys_private = si_private;
304 
305 	type = !(timr->it_sigev_notify & SIGEV_THREAD_ID) ? PIDTYPE_TGID : PIDTYPE_PID;
306 	ret = send_sigqueue(timr->sigq, timr->it_pid, type);
307 	/* If we failed to send the signal the timer stops. */
308 	return ret > 0;
309 }
310 
311 /*
312  * This function gets called when a POSIX.1b interval timer expires from
313  * the HRTIMER interrupt (soft interrupt on RT kernels).
314  *
315  * Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI
316  * based timers.
317  */
posix_timer_fn(struct hrtimer * timer)318 static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
319 {
320 	struct k_itimer *timr = container_of(timer, struct k_itimer, it.real.timer);
321 	enum hrtimer_restart ret = HRTIMER_NORESTART;
322 	unsigned long flags;
323 
324 	spin_lock_irqsave(&timr->it_lock, flags);
325 
326 	if (posix_timer_queue_signal(timr)) {
327 		/*
328 		 * The signal was not queued due to SIG_IGN. As a
329 		 * consequence the timer is not going to be rearmed from
330 		 * the signal delivery path. But as a real signal handler
331 		 * can be installed later the timer must be rearmed here.
332 		 */
333 		if (timr->it_interval != 0) {
334 			ktime_t now = hrtimer_cb_get_time(timer);
335 
336 			/*
337 			 * FIXME: What we really want, is to stop this
338 			 * timer completely and restart it in case the
339 			 * SIG_IGN is removed. This is a non trivial
340 			 * change to the signal handling code.
341 			 *
342 			 * For now let timers with an interval less than a
343 			 * jiffy expire every jiffy and recheck for a
344 			 * valid signal handler.
345 			 *
346 			 * This avoids interrupt starvation in case of a
347 			 * very small interval, which would expire the
348 			 * timer immediately again.
349 			 *
350 			 * Moving now ahead of time by one jiffy tricks
351 			 * hrtimer_forward() to expire the timer later,
352 			 * while it still maintains the overrun accuracy
353 			 * for the price of a slight inconsistency in the
354 			 * timer_gettime() case. This is at least better
355 			 * than a timer storm.
356 			 *
357 			 * Only required when high resolution timers are
358 			 * enabled as the periodic tick based timers are
359 			 * automatically aligned to the next tick.
360 			 */
361 			if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS)) {
362 				ktime_t kj = TICK_NSEC;
363 
364 				if (timr->it_interval < kj)
365 					now = ktime_add(now, kj);
366 			}
367 
368 			timr->it_overrun += hrtimer_forward(timer, now, timr->it_interval);
369 			ret = HRTIMER_RESTART;
370 			++timr->it_requeue_pending;
371 			timr->it_active = 1;
372 		}
373 	}
374 
375 	unlock_timer(timr, flags);
376 	return ret;
377 }
378 
good_sigevent(sigevent_t * event)379 static struct pid *good_sigevent(sigevent_t * event)
380 {
381 	struct pid *pid = task_tgid(current);
382 	struct task_struct *rtn;
383 
384 	switch (event->sigev_notify) {
385 	case SIGEV_SIGNAL | SIGEV_THREAD_ID:
386 		pid = find_vpid(event->sigev_notify_thread_id);
387 		rtn = pid_task(pid, PIDTYPE_PID);
388 		if (!rtn || !same_thread_group(rtn, current))
389 			return NULL;
390 		fallthrough;
391 	case SIGEV_SIGNAL:
392 	case SIGEV_THREAD:
393 		if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
394 			return NULL;
395 		fallthrough;
396 	case SIGEV_NONE:
397 		return pid;
398 	default:
399 		return NULL;
400 	}
401 }
402 
alloc_posix_timer(void)403 static struct k_itimer * alloc_posix_timer(void)
404 {
405 	struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
406 
407 	if (!tmr)
408 		return tmr;
409 	if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
410 		kmem_cache_free(posix_timers_cache, tmr);
411 		return NULL;
412 	}
413 	clear_siginfo(&tmr->sigq->info);
414 	return tmr;
415 }
416 
k_itimer_rcu_free(struct rcu_head * head)417 static void k_itimer_rcu_free(struct rcu_head *head)
418 {
419 	struct k_itimer *tmr = container_of(head, struct k_itimer, rcu);
420 
421 	kmem_cache_free(posix_timers_cache, tmr);
422 }
423 
posix_timer_free(struct k_itimer * tmr)424 static void posix_timer_free(struct k_itimer *tmr)
425 {
426 	put_pid(tmr->it_pid);
427 	sigqueue_free(tmr->sigq);
428 	call_rcu(&tmr->rcu, k_itimer_rcu_free);
429 }
430 
posix_timer_unhash_and_free(struct k_itimer * tmr)431 static void posix_timer_unhash_and_free(struct k_itimer *tmr)
432 {
433 	spin_lock(&hash_lock);
434 	hlist_del_rcu(&tmr->t_hash);
435 	spin_unlock(&hash_lock);
436 	posix_timer_free(tmr);
437 }
438 
common_timer_create(struct k_itimer * new_timer)439 static int common_timer_create(struct k_itimer *new_timer)
440 {
441 	hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
442 	return 0;
443 }
444 
445 /* Create a POSIX.1b interval timer. */
do_timer_create(clockid_t which_clock,struct sigevent * event,timer_t __user * created_timer_id)446 static int do_timer_create(clockid_t which_clock, struct sigevent *event,
447 			   timer_t __user *created_timer_id)
448 {
449 	const struct k_clock *kc = clockid_to_kclock(which_clock);
450 	struct k_itimer *new_timer;
451 	int error, new_timer_id;
452 
453 	if (!kc)
454 		return -EINVAL;
455 	if (!kc->timer_create)
456 		return -EOPNOTSUPP;
457 
458 	new_timer = alloc_posix_timer();
459 	if (unlikely(!new_timer))
460 		return -EAGAIN;
461 
462 	spin_lock_init(&new_timer->it_lock);
463 
464 	/*
465 	 * Add the timer to the hash table. The timer is not yet valid
466 	 * because new_timer::it_signal is still NULL. The timer id is also
467 	 * not yet visible to user space.
468 	 */
469 	new_timer_id = posix_timer_add(new_timer);
470 	if (new_timer_id < 0) {
471 		posix_timer_free(new_timer);
472 		return new_timer_id;
473 	}
474 
475 	new_timer->it_id = (timer_t) new_timer_id;
476 	new_timer->it_clock = which_clock;
477 	new_timer->kclock = kc;
478 	new_timer->it_overrun = -1LL;
479 
480 	if (event) {
481 		rcu_read_lock();
482 		new_timer->it_pid = get_pid(good_sigevent(event));
483 		rcu_read_unlock();
484 		if (!new_timer->it_pid) {
485 			error = -EINVAL;
486 			goto out;
487 		}
488 		new_timer->it_sigev_notify     = event->sigev_notify;
489 		new_timer->sigq->info.si_signo = event->sigev_signo;
490 		new_timer->sigq->info.si_value = event->sigev_value;
491 	} else {
492 		new_timer->it_sigev_notify     = SIGEV_SIGNAL;
493 		new_timer->sigq->info.si_signo = SIGALRM;
494 		memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t));
495 		new_timer->sigq->info.si_value.sival_int = new_timer->it_id;
496 		new_timer->it_pid = get_pid(task_tgid(current));
497 	}
498 
499 	new_timer->sigq->info.si_tid   = new_timer->it_id;
500 	new_timer->sigq->info.si_code  = SI_TIMER;
501 
502 	if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) {
503 		error = -EFAULT;
504 		goto out;
505 	}
506 	/*
507 	 * After succesful copy out, the timer ID is visible to user space
508 	 * now but not yet valid because new_timer::signal is still NULL.
509 	 *
510 	 * Complete the initialization with the clock specific create
511 	 * callback.
512 	 */
513 	error = kc->timer_create(new_timer);
514 	if (error)
515 		goto out;
516 
517 	/*
518 	 * timer::it_lock ensures that __lock_timer() observes a fully
519 	 * initialized timer when it observes a valid timer::it_signal.
520 	 *
521 	 * sighand::siglock is required to protect signal::posix_timers.
522 	 */
523 	scoped_guard (spinlock_irq, &new_timer->it_lock) {
524 		guard(spinlock)(&current->sighand->siglock);
525 		/* This makes the timer valid in the hash table */
526 		WRITE_ONCE(new_timer->it_signal, current->signal);
527 		hlist_add_head(&new_timer->list, &current->signal->posix_timers);
528 	}
529 	/*
530 	 * After unlocking @new_timer is subject to concurrent removal and
531 	 * cannot be touched anymore
532 	 */
533 	return 0;
534 out:
535 	posix_timer_unhash_and_free(new_timer);
536 	return error;
537 }
538 
SYSCALL_DEFINE3(timer_create,const clockid_t,which_clock,struct sigevent __user *,timer_event_spec,timer_t __user *,created_timer_id)539 SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
540 		struct sigevent __user *, timer_event_spec,
541 		timer_t __user *, created_timer_id)
542 {
543 	if (timer_event_spec) {
544 		sigevent_t event;
545 
546 		if (copy_from_user(&event, timer_event_spec, sizeof (event)))
547 			return -EFAULT;
548 		return do_timer_create(which_clock, &event, created_timer_id);
549 	}
550 	return do_timer_create(which_clock, NULL, created_timer_id);
551 }
552 
553 #ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(timer_create,clockid_t,which_clock,struct compat_sigevent __user *,timer_event_spec,timer_t __user *,created_timer_id)554 COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock,
555 		       struct compat_sigevent __user *, timer_event_spec,
556 		       timer_t __user *, created_timer_id)
557 {
558 	if (timer_event_spec) {
559 		sigevent_t event;
560 
561 		if (get_compat_sigevent(&event, timer_event_spec))
562 			return -EFAULT;
563 		return do_timer_create(which_clock, &event, created_timer_id);
564 	}
565 	return do_timer_create(which_clock, NULL, created_timer_id);
566 }
567 #endif
568 
__lock_timer(timer_t timer_id,unsigned long * flags)569 static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
570 {
571 	struct k_itimer *timr;
572 
573 	/*
574 	 * timer_t could be any type >= int and we want to make sure any
575 	 * @timer_id outside positive int range fails lookup.
576 	 */
577 	if ((unsigned long long)timer_id > INT_MAX)
578 		return NULL;
579 
580 	/*
581 	 * The hash lookup and the timers are RCU protected.
582 	 *
583 	 * Timers are added to the hash in invalid state where
584 	 * timr::it_signal == NULL. timer::it_signal is only set after the
585 	 * rest of the initialization succeeded.
586 	 *
587 	 * Timer destruction happens in steps:
588 	 *  1) Set timr::it_signal to NULL with timr::it_lock held
589 	 *  2) Release timr::it_lock
590 	 *  3) Remove from the hash under hash_lock
591 	 *  4) Call RCU for removal after the grace period
592 	 *
593 	 * Holding rcu_read_lock() accross the lookup ensures that
594 	 * the timer cannot be freed.
595 	 *
596 	 * The lookup validates locklessly that timr::it_signal ==
597 	 * current::it_signal and timr::it_id == @timer_id. timr::it_id
598 	 * can't change, but timr::it_signal becomes NULL during
599 	 * destruction.
600 	 */
601 	rcu_read_lock();
602 	timr = posix_timer_by_id(timer_id);
603 	if (timr) {
604 		spin_lock_irqsave(&timr->it_lock, *flags);
605 		/*
606 		 * Validate under timr::it_lock that timr::it_signal is
607 		 * still valid. Pairs with #1 above.
608 		 */
609 		if (timr->it_signal == current->signal) {
610 			rcu_read_unlock();
611 			return timr;
612 		}
613 		spin_unlock_irqrestore(&timr->it_lock, *flags);
614 	}
615 	rcu_read_unlock();
616 
617 	return NULL;
618 }
619 
common_hrtimer_remaining(struct k_itimer * timr,ktime_t now)620 static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now)
621 {
622 	struct hrtimer *timer = &timr->it.real.timer;
623 
624 	return __hrtimer_expires_remaining_adjusted(timer, now);
625 }
626 
common_hrtimer_forward(struct k_itimer * timr,ktime_t now)627 static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now)
628 {
629 	struct hrtimer *timer = &timr->it.real.timer;
630 
631 	return hrtimer_forward(timer, now, timr->it_interval);
632 }
633 
634 /*
635  * Get the time remaining on a POSIX.1b interval timer.
636  *
637  * Two issues to handle here:
638  *
639  *  1) The timer has a requeue pending. The return value must appear as
640  *     if the timer has been requeued right now.
641  *
642  *  2) The timer is a SIGEV_NONE timer. These timers are never enqueued
643  *     into the hrtimer queue and therefore never expired. Emulate expiry
644  *     here taking #1 into account.
645  */
common_timer_get(struct k_itimer * timr,struct itimerspec64 * cur_setting)646 void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
647 {
648 	const struct k_clock *kc = timr->kclock;
649 	ktime_t now, remaining, iv;
650 	bool sig_none;
651 
652 	sig_none = timr->it_sigev_notify == SIGEV_NONE;
653 	iv = timr->it_interval;
654 
655 	/* interval timer ? */
656 	if (iv) {
657 		cur_setting->it_interval = ktime_to_timespec64(iv);
658 	} else if (!timr->it_active) {
659 		/*
660 		 * SIGEV_NONE oneshot timers are never queued and therefore
661 		 * timr->it_active is always false. The check below
662 		 * vs. remaining time will handle this case.
663 		 *
664 		 * For all other timers there is nothing to update here, so
665 		 * return.
666 		 */
667 		if (!sig_none)
668 			return;
669 	}
670 
671 	now = kc->clock_get_ktime(timr->it_clock);
672 
673 	/*
674 	 * If this is an interval timer and either has requeue pending or
675 	 * is a SIGEV_NONE timer move the expiry time forward by intervals,
676 	 * so expiry is > now.
677 	 */
678 	if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none))
679 		timr->it_overrun += kc->timer_forward(timr, now);
680 
681 	remaining = kc->timer_remaining(timr, now);
682 	/*
683 	 * As @now is retrieved before a possible timer_forward() and
684 	 * cannot be reevaluated by the compiler @remaining is based on the
685 	 * same @now value. Therefore @remaining is consistent vs. @now.
686 	 *
687 	 * Consequently all interval timers, i.e. @iv > 0, cannot have a
688 	 * remaining time <= 0 because timer_forward() guarantees to move
689 	 * them forward so that the next timer expiry is > @now.
690 	 */
691 	if (remaining <= 0) {
692 		/*
693 		 * A single shot SIGEV_NONE timer must return 0, when it is
694 		 * expired! Timers which have a real signal delivery mode
695 		 * must return a remaining time greater than 0 because the
696 		 * signal has not yet been delivered.
697 		 */
698 		if (!sig_none)
699 			cur_setting->it_value.tv_nsec = 1;
700 	} else {
701 		cur_setting->it_value = ktime_to_timespec64(remaining);
702 	}
703 }
704 
do_timer_gettime(timer_t timer_id,struct itimerspec64 * setting)705 static int do_timer_gettime(timer_t timer_id,  struct itimerspec64 *setting)
706 {
707 	const struct k_clock *kc;
708 	struct k_itimer *timr;
709 	unsigned long flags;
710 	int ret = 0;
711 
712 	timr = lock_timer(timer_id, &flags);
713 	if (!timr)
714 		return -EINVAL;
715 
716 	memset(setting, 0, sizeof(*setting));
717 	kc = timr->kclock;
718 	if (WARN_ON_ONCE(!kc || !kc->timer_get))
719 		ret = -EINVAL;
720 	else
721 		kc->timer_get(timr, setting);
722 
723 	unlock_timer(timr, flags);
724 	return ret;
725 }
726 
727 /* Get the time remaining on a POSIX.1b interval timer. */
SYSCALL_DEFINE2(timer_gettime,timer_t,timer_id,struct __kernel_itimerspec __user *,setting)728 SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
729 		struct __kernel_itimerspec __user *, setting)
730 {
731 	struct itimerspec64 cur_setting;
732 
733 	int ret = do_timer_gettime(timer_id, &cur_setting);
734 	if (!ret) {
735 		if (put_itimerspec64(&cur_setting, setting))
736 			ret = -EFAULT;
737 	}
738 	return ret;
739 }
740 
741 #ifdef CONFIG_COMPAT_32BIT_TIME
742 
SYSCALL_DEFINE2(timer_gettime32,timer_t,timer_id,struct old_itimerspec32 __user *,setting)743 SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id,
744 		struct old_itimerspec32 __user *, setting)
745 {
746 	struct itimerspec64 cur_setting;
747 
748 	int ret = do_timer_gettime(timer_id, &cur_setting);
749 	if (!ret) {
750 		if (put_old_itimerspec32(&cur_setting, setting))
751 			ret = -EFAULT;
752 	}
753 	return ret;
754 }
755 
756 #endif
757 
758 /**
759  * sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer
760  * @timer_id:	The timer ID which identifies the timer
761  *
762  * The "overrun count" of a timer is one plus the number of expiration
763  * intervals which have elapsed between the first expiry, which queues the
764  * signal and the actual signal delivery. On signal delivery the "overrun
765  * count" is calculated and cached, so it can be returned directly here.
766  *
767  * As this is relative to the last queued signal the returned overrun count
768  * is meaningless outside of the signal delivery path and even there it
769  * does not accurately reflect the current state when user space evaluates
770  * it.
771  *
772  * Returns:
773  *	-EINVAL		@timer_id is invalid
774  *	1..INT_MAX	The number of overruns related to the last delivered signal
775  */
SYSCALL_DEFINE1(timer_getoverrun,timer_t,timer_id)776 SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
777 {
778 	struct k_itimer *timr;
779 	unsigned long flags;
780 	int overrun;
781 
782 	timr = lock_timer(timer_id, &flags);
783 	if (!timr)
784 		return -EINVAL;
785 
786 	overrun = timer_overrun_to_int(timr, 0);
787 	unlock_timer(timr, flags);
788 
789 	return overrun;
790 }
791 
common_hrtimer_arm(struct k_itimer * timr,ktime_t expires,bool absolute,bool sigev_none)792 static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires,
793 			       bool absolute, bool sigev_none)
794 {
795 	struct hrtimer *timer = &timr->it.real.timer;
796 	enum hrtimer_mode mode;
797 
798 	mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
799 	/*
800 	 * Posix magic: Relative CLOCK_REALTIME timers are not affected by
801 	 * clock modifications, so they become CLOCK_MONOTONIC based under the
802 	 * hood. See hrtimer_init(). Update timr->kclock, so the generic
803 	 * functions which use timr->kclock->clock_get_*() work.
804 	 *
805 	 * Note: it_clock stays unmodified, because the next timer_set() might
806 	 * use ABSTIME, so it needs to switch back.
807 	 */
808 	if (timr->it_clock == CLOCK_REALTIME)
809 		timr->kclock = absolute ? &clock_realtime : &clock_monotonic;
810 
811 	hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
812 	timr->it.real.timer.function = posix_timer_fn;
813 
814 	if (!absolute)
815 		expires = ktime_add_safe(expires, timer->base->get_time());
816 	hrtimer_set_expires(timer, expires);
817 
818 	if (!sigev_none)
819 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
820 }
821 
common_hrtimer_try_to_cancel(struct k_itimer * timr)822 static int common_hrtimer_try_to_cancel(struct k_itimer *timr)
823 {
824 	return hrtimer_try_to_cancel(&timr->it.real.timer);
825 }
826 
common_timer_wait_running(struct k_itimer * timer)827 static void common_timer_wait_running(struct k_itimer *timer)
828 {
829 	hrtimer_cancel_wait_running(&timer->it.real.timer);
830 }
831 
832 /*
833  * On PREEMPT_RT this prevents priority inversion and a potential livelock
834  * against the ksoftirqd thread in case that ksoftirqd gets preempted while
835  * executing a hrtimer callback.
836  *
837  * See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this
838  * just results in a cpu_relax().
839  *
840  * For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is
841  * just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this
842  * prevents spinning on an eventually scheduled out task and a livelock
843  * when the task which tries to delete or disarm the timer has preempted
844  * the task which runs the expiry in task work context.
845  */
timer_wait_running(struct k_itimer * timer,unsigned long * flags)846 static struct k_itimer *timer_wait_running(struct k_itimer *timer,
847 					   unsigned long *flags)
848 {
849 	const struct k_clock *kc = READ_ONCE(timer->kclock);
850 	timer_t timer_id = READ_ONCE(timer->it_id);
851 
852 	/* Prevent kfree(timer) after dropping the lock */
853 	rcu_read_lock();
854 	unlock_timer(timer, *flags);
855 
856 	/*
857 	 * kc->timer_wait_running() might drop RCU lock. So @timer
858 	 * cannot be touched anymore after the function returns!
859 	 */
860 	if (!WARN_ON_ONCE(!kc->timer_wait_running))
861 		kc->timer_wait_running(timer);
862 
863 	rcu_read_unlock();
864 	/* Relock the timer. It might be not longer hashed. */
865 	return lock_timer(timer_id, flags);
866 }
867 
868 /*
869  * Set up the new interval and reset the signal delivery data
870  */
posix_timer_set_common(struct k_itimer * timer,struct itimerspec64 * new_setting)871 void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting)
872 {
873 	if (new_setting->it_value.tv_sec || new_setting->it_value.tv_nsec)
874 		timer->it_interval = timespec64_to_ktime(new_setting->it_interval);
875 	else
876 		timer->it_interval = 0;
877 
878 	/* Prevent reloading in case there is a signal pending */
879 	timer->it_requeue_pending = (timer->it_requeue_pending + 2) & ~REQUEUE_PENDING;
880 	/* Reset overrun accounting */
881 	timer->it_overrun_last = 0;
882 	timer->it_overrun = -1LL;
883 }
884 
885 /* Set a POSIX.1b interval timer. */
common_timer_set(struct k_itimer * timr,int flags,struct itimerspec64 * new_setting,struct itimerspec64 * old_setting)886 int common_timer_set(struct k_itimer *timr, int flags,
887 		     struct itimerspec64 *new_setting,
888 		     struct itimerspec64 *old_setting)
889 {
890 	const struct k_clock *kc = timr->kclock;
891 	bool sigev_none;
892 	ktime_t expires;
893 
894 	if (old_setting)
895 		common_timer_get(timr, old_setting);
896 
897 	/* Prevent rearming by clearing the interval */
898 	timr->it_interval = 0;
899 	/*
900 	 * Careful here. On SMP systems the timer expiry function could be
901 	 * active and spinning on timr->it_lock.
902 	 */
903 	if (kc->timer_try_to_cancel(timr) < 0)
904 		return TIMER_RETRY;
905 
906 	timr->it_active = 0;
907 	posix_timer_set_common(timr, new_setting);
908 
909 	/* Keep timer disarmed when it_value is zero */
910 	if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
911 		return 0;
912 
913 	expires = timespec64_to_ktime(new_setting->it_value);
914 	if (flags & TIMER_ABSTIME)
915 		expires = timens_ktime_to_host(timr->it_clock, expires);
916 	sigev_none = timr->it_sigev_notify == SIGEV_NONE;
917 
918 	kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
919 	timr->it_active = !sigev_none;
920 	return 0;
921 }
922 
do_timer_settime(timer_t timer_id,int tmr_flags,struct itimerspec64 * new_spec64,struct itimerspec64 * old_spec64)923 static int do_timer_settime(timer_t timer_id, int tmr_flags,
924 			    struct itimerspec64 *new_spec64,
925 			    struct itimerspec64 *old_spec64)
926 {
927 	const struct k_clock *kc;
928 	struct k_itimer *timr;
929 	unsigned long flags;
930 	int error;
931 
932 	if (!timespec64_valid(&new_spec64->it_interval) ||
933 	    !timespec64_valid(&new_spec64->it_value))
934 		return -EINVAL;
935 
936 	if (old_spec64)
937 		memset(old_spec64, 0, sizeof(*old_spec64));
938 
939 	timr = lock_timer(timer_id, &flags);
940 retry:
941 	if (!timr)
942 		return -EINVAL;
943 
944 	if (old_spec64)
945 		old_spec64->it_interval = ktime_to_timespec64(timr->it_interval);
946 
947 	kc = timr->kclock;
948 	if (WARN_ON_ONCE(!kc || !kc->timer_set))
949 		error = -EINVAL;
950 	else
951 		error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64);
952 
953 	if (error == TIMER_RETRY) {
954 		// We already got the old time...
955 		old_spec64 = NULL;
956 		/* Unlocks and relocks the timer if it still exists */
957 		timr = timer_wait_running(timr, &flags);
958 		goto retry;
959 	}
960 	unlock_timer(timr, flags);
961 
962 	return error;
963 }
964 
965 /* Set a POSIX.1b interval timer */
SYSCALL_DEFINE4(timer_settime,timer_t,timer_id,int,flags,const struct __kernel_itimerspec __user *,new_setting,struct __kernel_itimerspec __user *,old_setting)966 SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
967 		const struct __kernel_itimerspec __user *, new_setting,
968 		struct __kernel_itimerspec __user *, old_setting)
969 {
970 	struct itimerspec64 new_spec, old_spec, *rtn;
971 	int error = 0;
972 
973 	if (!new_setting)
974 		return -EINVAL;
975 
976 	if (get_itimerspec64(&new_spec, new_setting))
977 		return -EFAULT;
978 
979 	rtn = old_setting ? &old_spec : NULL;
980 	error = do_timer_settime(timer_id, flags, &new_spec, rtn);
981 	if (!error && old_setting) {
982 		if (put_itimerspec64(&old_spec, old_setting))
983 			error = -EFAULT;
984 	}
985 	return error;
986 }
987 
988 #ifdef CONFIG_COMPAT_32BIT_TIME
SYSCALL_DEFINE4(timer_settime32,timer_t,timer_id,int,flags,struct old_itimerspec32 __user *,new,struct old_itimerspec32 __user *,old)989 SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags,
990 		struct old_itimerspec32 __user *, new,
991 		struct old_itimerspec32 __user *, old)
992 {
993 	struct itimerspec64 new_spec, old_spec;
994 	struct itimerspec64 *rtn = old ? &old_spec : NULL;
995 	int error = 0;
996 
997 	if (!new)
998 		return -EINVAL;
999 	if (get_old_itimerspec32(&new_spec, new))
1000 		return -EFAULT;
1001 
1002 	error = do_timer_settime(timer_id, flags, &new_spec, rtn);
1003 	if (!error && old) {
1004 		if (put_old_itimerspec32(&old_spec, old))
1005 			error = -EFAULT;
1006 	}
1007 	return error;
1008 }
1009 #endif
1010 
common_timer_del(struct k_itimer * timer)1011 int common_timer_del(struct k_itimer *timer)
1012 {
1013 	const struct k_clock *kc = timer->kclock;
1014 
1015 	timer->it_interval = 0;
1016 	if (kc->timer_try_to_cancel(timer) < 0)
1017 		return TIMER_RETRY;
1018 	timer->it_active = 0;
1019 	return 0;
1020 }
1021 
timer_delete_hook(struct k_itimer * timer)1022 static inline int timer_delete_hook(struct k_itimer *timer)
1023 {
1024 	const struct k_clock *kc = timer->kclock;
1025 
1026 	if (WARN_ON_ONCE(!kc || !kc->timer_del))
1027 		return -EINVAL;
1028 	return kc->timer_del(timer);
1029 }
1030 
1031 /* Delete a POSIX.1b interval timer. */
SYSCALL_DEFINE1(timer_delete,timer_t,timer_id)1032 SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
1033 {
1034 	struct k_itimer *timer;
1035 	unsigned long flags;
1036 
1037 	timer = lock_timer(timer_id, &flags);
1038 
1039 retry_delete:
1040 	if (!timer)
1041 		return -EINVAL;
1042 
1043 	if (unlikely(timer_delete_hook(timer) == TIMER_RETRY)) {
1044 		/* Unlocks and relocks the timer if it still exists */
1045 		timer = timer_wait_running(timer, &flags);
1046 		goto retry_delete;
1047 	}
1048 
1049 	spin_lock(&current->sighand->siglock);
1050 	hlist_del(&timer->list);
1051 	spin_unlock(&current->sighand->siglock);
1052 	/*
1053 	 * A concurrent lookup could check timer::it_signal lockless. It
1054 	 * will reevaluate with timer::it_lock held and observe the NULL.
1055 	 */
1056 	WRITE_ONCE(timer->it_signal, NULL);
1057 
1058 	unlock_timer(timer, flags);
1059 	posix_timer_unhash_and_free(timer);
1060 	return 0;
1061 }
1062 
1063 /*
1064  * Delete a timer if it is armed, remove it from the hash and schedule it
1065  * for RCU freeing.
1066  */
itimer_delete(struct k_itimer * timer)1067 static void itimer_delete(struct k_itimer *timer)
1068 {
1069 	unsigned long flags;
1070 
1071 	/*
1072 	 * irqsave is required to make timer_wait_running() work.
1073 	 */
1074 	spin_lock_irqsave(&timer->it_lock, flags);
1075 
1076 retry_delete:
1077 	/*
1078 	 * Even if the timer is not longer accessible from other tasks
1079 	 * it still might be armed and queued in the underlying timer
1080 	 * mechanism. Worse, that timer mechanism might run the expiry
1081 	 * function concurrently.
1082 	 */
1083 	if (timer_delete_hook(timer) == TIMER_RETRY) {
1084 		/*
1085 		 * Timer is expired concurrently, prevent livelocks
1086 		 * and pointless spinning on RT.
1087 		 *
1088 		 * timer_wait_running() drops timer::it_lock, which opens
1089 		 * the possibility for another task to delete the timer.
1090 		 *
1091 		 * That's not possible here because this is invoked from
1092 		 * do_exit() only for the last thread of the thread group.
1093 		 * So no other task can access and delete that timer.
1094 		 */
1095 		if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer))
1096 			return;
1097 
1098 		goto retry_delete;
1099 	}
1100 	hlist_del(&timer->list);
1101 
1102 	/*
1103 	 * Setting timer::it_signal to NULL is technically not required
1104 	 * here as nothing can access the timer anymore legitimately via
1105 	 * the hash table. Set it to NULL nevertheless so that all deletion
1106 	 * paths are consistent.
1107 	 */
1108 	WRITE_ONCE(timer->it_signal, NULL);
1109 
1110 	spin_unlock_irqrestore(&timer->it_lock, flags);
1111 	posix_timer_unhash_and_free(timer);
1112 }
1113 
1114 /*
1115  * Invoked from do_exit() when the last thread of a thread group exits.
1116  * At that point no other task can access the timers of the dying
1117  * task anymore.
1118  */
exit_itimers(struct task_struct * tsk)1119 void exit_itimers(struct task_struct *tsk)
1120 {
1121 	struct hlist_head timers;
1122 
1123 	if (hlist_empty(&tsk->signal->posix_timers))
1124 		return;
1125 
1126 	/* Protect against concurrent read via /proc/$PID/timers */
1127 	spin_lock_irq(&tsk->sighand->siglock);
1128 	hlist_move_list(&tsk->signal->posix_timers, &timers);
1129 	spin_unlock_irq(&tsk->sighand->siglock);
1130 
1131 	/* The timers are not longer accessible via tsk::signal */
1132 	while (!hlist_empty(&timers))
1133 		itimer_delete(hlist_entry(timers.first, struct k_itimer, list));
1134 }
1135 
SYSCALL_DEFINE2(clock_settime,const clockid_t,which_clock,const struct __kernel_timespec __user *,tp)1136 SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
1137 		const struct __kernel_timespec __user *, tp)
1138 {
1139 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1140 	struct timespec64 new_tp;
1141 
1142 	if (!kc || !kc->clock_set)
1143 		return -EINVAL;
1144 
1145 	if (get_timespec64(&new_tp, tp))
1146 		return -EFAULT;
1147 
1148 	/*
1149 	 * Permission checks have to be done inside the clock specific
1150 	 * setter callback.
1151 	 */
1152 	return kc->clock_set(which_clock, &new_tp);
1153 }
1154 
SYSCALL_DEFINE2(clock_gettime,const clockid_t,which_clock,struct __kernel_timespec __user *,tp)1155 SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
1156 		struct __kernel_timespec __user *, tp)
1157 {
1158 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1159 	struct timespec64 kernel_tp;
1160 	int error;
1161 
1162 	if (!kc)
1163 		return -EINVAL;
1164 
1165 	error = kc->clock_get_timespec(which_clock, &kernel_tp);
1166 
1167 	if (!error && put_timespec64(&kernel_tp, tp))
1168 		error = -EFAULT;
1169 
1170 	return error;
1171 }
1172 
do_clock_adjtime(const clockid_t which_clock,struct __kernel_timex * ktx)1173 int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx)
1174 {
1175 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1176 
1177 	if (!kc)
1178 		return -EINVAL;
1179 	if (!kc->clock_adj)
1180 		return -EOPNOTSUPP;
1181 
1182 	return kc->clock_adj(which_clock, ktx);
1183 }
1184 
SYSCALL_DEFINE2(clock_adjtime,const clockid_t,which_clock,struct __kernel_timex __user *,utx)1185 SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
1186 		struct __kernel_timex __user *, utx)
1187 {
1188 	struct __kernel_timex ktx;
1189 	int err;
1190 
1191 	if (copy_from_user(&ktx, utx, sizeof(ktx)))
1192 		return -EFAULT;
1193 
1194 	err = do_clock_adjtime(which_clock, &ktx);
1195 
1196 	if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
1197 		return -EFAULT;
1198 
1199 	return err;
1200 }
1201 
1202 /**
1203  * sys_clock_getres - Get the resolution of a clock
1204  * @which_clock:	The clock to get the resolution for
1205  * @tp:			Pointer to a a user space timespec64 for storage
1206  *
1207  * POSIX defines:
1208  *
1209  * "The clock_getres() function shall return the resolution of any
1210  * clock. Clock resolutions are implementation-defined and cannot be set by
1211  * a process. If the argument res is not NULL, the resolution of the
1212  * specified clock shall be stored in the location pointed to by res. If
1213  * res is NULL, the clock resolution is not returned. If the time argument
1214  * of clock_settime() is not a multiple of res, then the value is truncated
1215  * to a multiple of res."
1216  *
1217  * Due to the various hardware constraints the real resolution can vary
1218  * wildly and even change during runtime when the underlying devices are
1219  * replaced. The kernel also can use hardware devices with different
1220  * resolutions for reading the time and for arming timers.
1221  *
1222  * The kernel therefore deviates from the POSIX spec in various aspects:
1223  *
1224  * 1) The resolution returned to user space
1225  *
1226  *    For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI,
1227  *    CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW
1228  *    the kernel differentiates only two cases:
1229  *
1230  *    I)  Low resolution mode:
1231  *
1232  *	  When high resolution timers are disabled at compile or runtime
1233  *	  the resolution returned is nanoseconds per tick, which represents
1234  *	  the precision at which timers expire.
1235  *
1236  *    II) High resolution mode:
1237  *
1238  *	  When high resolution timers are enabled the resolution returned
1239  *	  is always one nanosecond independent of the actual resolution of
1240  *	  the underlying hardware devices.
1241  *
1242  *	  For CLOCK_*_ALARM the actual resolution depends on system
1243  *	  state. When system is running the resolution is the same as the
1244  *	  resolution of the other clocks. During suspend the actual
1245  *	  resolution is the resolution of the underlying RTC device which
1246  *	  might be way less precise than the clockevent device used during
1247  *	  running state.
1248  *
1249  *   For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution
1250  *   returned is always nanoseconds per tick.
1251  *
1252  *   For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution
1253  *   returned is always one nanosecond under the assumption that the
1254  *   underlying scheduler clock has a better resolution than nanoseconds
1255  *   per tick.
1256  *
1257  *   For dynamic POSIX clocks (PTP devices) the resolution returned is
1258  *   always one nanosecond.
1259  *
1260  * 2) Affect on sys_clock_settime()
1261  *
1262  *    The kernel does not truncate the time which is handed in to
1263  *    sys_clock_settime(). The kernel internal timekeeping is always using
1264  *    nanoseconds precision independent of the clocksource device which is
1265  *    used to read the time from. The resolution of that device only
1266  *    affects the presicion of the time returned by sys_clock_gettime().
1267  *
1268  * Returns:
1269  *	0		Success. @tp contains the resolution
1270  *	-EINVAL		@which_clock is not a valid clock ID
1271  *	-EFAULT		Copying the resolution to @tp faulted
1272  *	-ENODEV		Dynamic POSIX clock is not backed by a device
1273  *	-EOPNOTSUPP	Dynamic POSIX clock does not support getres()
1274  */
SYSCALL_DEFINE2(clock_getres,const clockid_t,which_clock,struct __kernel_timespec __user *,tp)1275 SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
1276 		struct __kernel_timespec __user *, tp)
1277 {
1278 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1279 	struct timespec64 rtn_tp;
1280 	int error;
1281 
1282 	if (!kc)
1283 		return -EINVAL;
1284 
1285 	error = kc->clock_getres(which_clock, &rtn_tp);
1286 
1287 	if (!error && tp && put_timespec64(&rtn_tp, tp))
1288 		error = -EFAULT;
1289 
1290 	return error;
1291 }
1292 
1293 #ifdef CONFIG_COMPAT_32BIT_TIME
1294 
SYSCALL_DEFINE2(clock_settime32,clockid_t,which_clock,struct old_timespec32 __user *,tp)1295 SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock,
1296 		struct old_timespec32 __user *, tp)
1297 {
1298 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1299 	struct timespec64 ts;
1300 
1301 	if (!kc || !kc->clock_set)
1302 		return -EINVAL;
1303 
1304 	if (get_old_timespec32(&ts, tp))
1305 		return -EFAULT;
1306 
1307 	return kc->clock_set(which_clock, &ts);
1308 }
1309 
SYSCALL_DEFINE2(clock_gettime32,clockid_t,which_clock,struct old_timespec32 __user *,tp)1310 SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock,
1311 		struct old_timespec32 __user *, tp)
1312 {
1313 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1314 	struct timespec64 ts;
1315 	int err;
1316 
1317 	if (!kc)
1318 		return -EINVAL;
1319 
1320 	err = kc->clock_get_timespec(which_clock, &ts);
1321 
1322 	if (!err && put_old_timespec32(&ts, tp))
1323 		err = -EFAULT;
1324 
1325 	return err;
1326 }
1327 
SYSCALL_DEFINE2(clock_adjtime32,clockid_t,which_clock,struct old_timex32 __user *,utp)1328 SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock,
1329 		struct old_timex32 __user *, utp)
1330 {
1331 	struct __kernel_timex ktx;
1332 	int err;
1333 
1334 	err = get_old_timex32(&ktx, utp);
1335 	if (err)
1336 		return err;
1337 
1338 	err = do_clock_adjtime(which_clock, &ktx);
1339 
1340 	if (err >= 0 && put_old_timex32(utp, &ktx))
1341 		return -EFAULT;
1342 
1343 	return err;
1344 }
1345 
SYSCALL_DEFINE2(clock_getres_time32,clockid_t,which_clock,struct old_timespec32 __user *,tp)1346 SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock,
1347 		struct old_timespec32 __user *, tp)
1348 {
1349 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1350 	struct timespec64 ts;
1351 	int err;
1352 
1353 	if (!kc)
1354 		return -EINVAL;
1355 
1356 	err = kc->clock_getres(which_clock, &ts);
1357 	if (!err && tp && put_old_timespec32(&ts, tp))
1358 		return -EFAULT;
1359 
1360 	return err;
1361 }
1362 
1363 #endif
1364 
1365 /*
1366  * sys_clock_nanosleep() for CLOCK_REALTIME and CLOCK_TAI
1367  */
common_nsleep(const clockid_t which_clock,int flags,const struct timespec64 * rqtp)1368 static int common_nsleep(const clockid_t which_clock, int flags,
1369 			 const struct timespec64 *rqtp)
1370 {
1371 	ktime_t texp = timespec64_to_ktime(*rqtp);
1372 
1373 	return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
1374 				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
1375 				 which_clock);
1376 }
1377 
1378 /*
1379  * sys_clock_nanosleep() for CLOCK_MONOTONIC and CLOCK_BOOTTIME
1380  *
1381  * Absolute nanosleeps for these clocks are time-namespace adjusted.
1382  */
common_nsleep_timens(const clockid_t which_clock,int flags,const struct timespec64 * rqtp)1383 static int common_nsleep_timens(const clockid_t which_clock, int flags,
1384 				const struct timespec64 *rqtp)
1385 {
1386 	ktime_t texp = timespec64_to_ktime(*rqtp);
1387 
1388 	if (flags & TIMER_ABSTIME)
1389 		texp = timens_ktime_to_host(which_clock, texp);
1390 
1391 	return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
1392 				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
1393 				 which_clock);
1394 }
1395 
SYSCALL_DEFINE4(clock_nanosleep,const clockid_t,which_clock,int,flags,const struct __kernel_timespec __user *,rqtp,struct __kernel_timespec __user *,rmtp)1396 SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
1397 		const struct __kernel_timespec __user *, rqtp,
1398 		struct __kernel_timespec __user *, rmtp)
1399 {
1400 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1401 	struct timespec64 t;
1402 
1403 	if (!kc)
1404 		return -EINVAL;
1405 	if (!kc->nsleep)
1406 		return -EOPNOTSUPP;
1407 
1408 	if (get_timespec64(&t, rqtp))
1409 		return -EFAULT;
1410 
1411 	if (!timespec64_valid(&t))
1412 		return -EINVAL;
1413 	if (flags & TIMER_ABSTIME)
1414 		rmtp = NULL;
1415 	current->restart_block.fn = do_no_restart_syscall;
1416 	current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
1417 	current->restart_block.nanosleep.rmtp = rmtp;
1418 
1419 	return kc->nsleep(which_clock, flags, &t);
1420 }
1421 
1422 #ifdef CONFIG_COMPAT_32BIT_TIME
1423 
SYSCALL_DEFINE4(clock_nanosleep_time32,clockid_t,which_clock,int,flags,struct old_timespec32 __user *,rqtp,struct old_timespec32 __user *,rmtp)1424 SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
1425 		struct old_timespec32 __user *, rqtp,
1426 		struct old_timespec32 __user *, rmtp)
1427 {
1428 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1429 	struct timespec64 t;
1430 
1431 	if (!kc)
1432 		return -EINVAL;
1433 	if (!kc->nsleep)
1434 		return -EOPNOTSUPP;
1435 
1436 	if (get_old_timespec32(&t, rqtp))
1437 		return -EFAULT;
1438 
1439 	if (!timespec64_valid(&t))
1440 		return -EINVAL;
1441 	if (flags & TIMER_ABSTIME)
1442 		rmtp = NULL;
1443 	current->restart_block.fn = do_no_restart_syscall;
1444 	current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
1445 	current->restart_block.nanosleep.compat_rmtp = rmtp;
1446 
1447 	return kc->nsleep(which_clock, flags, &t);
1448 }
1449 
1450 #endif
1451 
1452 static const struct k_clock clock_realtime = {
1453 	.clock_getres		= posix_get_hrtimer_res,
1454 	.clock_get_timespec	= posix_get_realtime_timespec,
1455 	.clock_get_ktime	= posix_get_realtime_ktime,
1456 	.clock_set		= posix_clock_realtime_set,
1457 	.clock_adj		= posix_clock_realtime_adj,
1458 	.nsleep			= common_nsleep,
1459 	.timer_create		= common_timer_create,
1460 	.timer_set		= common_timer_set,
1461 	.timer_get		= common_timer_get,
1462 	.timer_del		= common_timer_del,
1463 	.timer_rearm		= common_hrtimer_rearm,
1464 	.timer_forward		= common_hrtimer_forward,
1465 	.timer_remaining	= common_hrtimer_remaining,
1466 	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1467 	.timer_wait_running	= common_timer_wait_running,
1468 	.timer_arm		= common_hrtimer_arm,
1469 };
1470 
1471 static const struct k_clock clock_monotonic = {
1472 	.clock_getres		= posix_get_hrtimer_res,
1473 	.clock_get_timespec	= posix_get_monotonic_timespec,
1474 	.clock_get_ktime	= posix_get_monotonic_ktime,
1475 	.nsleep			= common_nsleep_timens,
1476 	.timer_create		= common_timer_create,
1477 	.timer_set		= common_timer_set,
1478 	.timer_get		= common_timer_get,
1479 	.timer_del		= common_timer_del,
1480 	.timer_rearm		= common_hrtimer_rearm,
1481 	.timer_forward		= common_hrtimer_forward,
1482 	.timer_remaining	= common_hrtimer_remaining,
1483 	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1484 	.timer_wait_running	= common_timer_wait_running,
1485 	.timer_arm		= common_hrtimer_arm,
1486 };
1487 
1488 static const struct k_clock clock_monotonic_raw = {
1489 	.clock_getres		= posix_get_hrtimer_res,
1490 	.clock_get_timespec	= posix_get_monotonic_raw,
1491 };
1492 
1493 static const struct k_clock clock_realtime_coarse = {
1494 	.clock_getres		= posix_get_coarse_res,
1495 	.clock_get_timespec	= posix_get_realtime_coarse,
1496 };
1497 
1498 static const struct k_clock clock_monotonic_coarse = {
1499 	.clock_getres		= posix_get_coarse_res,
1500 	.clock_get_timespec	= posix_get_monotonic_coarse,
1501 };
1502 
1503 static const struct k_clock clock_tai = {
1504 	.clock_getres		= posix_get_hrtimer_res,
1505 	.clock_get_ktime	= posix_get_tai_ktime,
1506 	.clock_get_timespec	= posix_get_tai_timespec,
1507 	.nsleep			= common_nsleep,
1508 	.timer_create		= common_timer_create,
1509 	.timer_set		= common_timer_set,
1510 	.timer_get		= common_timer_get,
1511 	.timer_del		= common_timer_del,
1512 	.timer_rearm		= common_hrtimer_rearm,
1513 	.timer_forward		= common_hrtimer_forward,
1514 	.timer_remaining	= common_hrtimer_remaining,
1515 	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1516 	.timer_wait_running	= common_timer_wait_running,
1517 	.timer_arm		= common_hrtimer_arm,
1518 };
1519 
1520 static const struct k_clock clock_boottime = {
1521 	.clock_getres		= posix_get_hrtimer_res,
1522 	.clock_get_ktime	= posix_get_boottime_ktime,
1523 	.clock_get_timespec	= posix_get_boottime_timespec,
1524 	.nsleep			= common_nsleep_timens,
1525 	.timer_create		= common_timer_create,
1526 	.timer_set		= common_timer_set,
1527 	.timer_get		= common_timer_get,
1528 	.timer_del		= common_timer_del,
1529 	.timer_rearm		= common_hrtimer_rearm,
1530 	.timer_forward		= common_hrtimer_forward,
1531 	.timer_remaining	= common_hrtimer_remaining,
1532 	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1533 	.timer_wait_running	= common_timer_wait_running,
1534 	.timer_arm		= common_hrtimer_arm,
1535 };
1536 
1537 static const struct k_clock * const posix_clocks[] = {
1538 	[CLOCK_REALTIME]		= &clock_realtime,
1539 	[CLOCK_MONOTONIC]		= &clock_monotonic,
1540 	[CLOCK_PROCESS_CPUTIME_ID]	= &clock_process,
1541 	[CLOCK_THREAD_CPUTIME_ID]	= &clock_thread,
1542 	[CLOCK_MONOTONIC_RAW]		= &clock_monotonic_raw,
1543 	[CLOCK_REALTIME_COARSE]		= &clock_realtime_coarse,
1544 	[CLOCK_MONOTONIC_COARSE]	= &clock_monotonic_coarse,
1545 	[CLOCK_BOOTTIME]		= &clock_boottime,
1546 	[CLOCK_REALTIME_ALARM]		= &alarm_clock,
1547 	[CLOCK_BOOTTIME_ALARM]		= &alarm_clock,
1548 	[CLOCK_TAI]			= &clock_tai,
1549 };
1550 
clockid_to_kclock(const clockid_t id)1551 static const struct k_clock *clockid_to_kclock(const clockid_t id)
1552 {
1553 	clockid_t idx = id;
1554 
1555 	if (id < 0) {
1556 		return (id & CLOCKFD_MASK) == CLOCKFD ?
1557 			&clock_posix_dynamic : &clock_posix_cpu;
1558 	}
1559 
1560 	if (id >= ARRAY_SIZE(posix_clocks))
1561 		return NULL;
1562 
1563 	return posix_clocks[array_index_nospec(idx, ARRAY_SIZE(posix_clocks))];
1564 }
1565