• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Implement CPU time clocks for the POSIX clock interface.
3  */
4 
5 #include <linux/sched.h>
6 #include <linux/posix-timers.h>
7 #include <linux/errno.h>
8 #include <linux/math64.h>
9 #include <asm/uaccess.h>
10 #include <linux/kernel_stat.h>
11 #include <trace/events/timer.h>
12 #include <linux/random.h>
13 #include <linux/tick.h>
14 #include <linux/workqueue.h>
15 
16 /*
17  * Called after updating RLIMIT_CPU to run cpu timer and update
18  * tsk->signal->cputime_expires expiration cache if necessary. Needs
19  * siglock protection since other code may update expiration cache as
20  * well.
21  */
update_rlimit_cpu(struct task_struct * task,unsigned long rlim_new)22 void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
23 {
24 	cputime_t cputime = secs_to_cputime(rlim_new);
25 
26 	spin_lock_irq(&task->sighand->siglock);
27 	set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
28 	spin_unlock_irq(&task->sighand->siglock);
29 }
30 
check_clock(const clockid_t which_clock)31 static int check_clock(const clockid_t which_clock)
32 {
33 	int error = 0;
34 	struct task_struct *p;
35 	const pid_t pid = CPUCLOCK_PID(which_clock);
36 
37 	if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
38 		return -EINVAL;
39 
40 	if (pid == 0)
41 		return 0;
42 
43 	rcu_read_lock();
44 	p = find_task_by_vpid(pid);
45 	if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
46 		   same_thread_group(p, current) : has_group_leader_pid(p))) {
47 		error = -EINVAL;
48 	}
49 	rcu_read_unlock();
50 
51 	return error;
52 }
53 
54 static inline union cpu_time_count
timespec_to_sample(const clockid_t which_clock,const struct timespec * tp)55 timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
56 {
57 	union cpu_time_count ret;
58 	ret.sched = 0;		/* high half always zero when .cpu used */
59 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
60 		ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
61 	} else {
62 		ret.cpu = timespec_to_cputime(tp);
63 	}
64 	return ret;
65 }
66 
sample_to_timespec(const clockid_t which_clock,union cpu_time_count cpu,struct timespec * tp)67 static void sample_to_timespec(const clockid_t which_clock,
68 			       union cpu_time_count cpu,
69 			       struct timespec *tp)
70 {
71 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
72 		*tp = ns_to_timespec(cpu.sched);
73 	else
74 		cputime_to_timespec(cpu.cpu, tp);
75 }
76 
cpu_time_before(const clockid_t which_clock,union cpu_time_count now,union cpu_time_count then)77 static inline int cpu_time_before(const clockid_t which_clock,
78 				  union cpu_time_count now,
79 				  union cpu_time_count then)
80 {
81 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
82 		return now.sched < then.sched;
83 	}  else {
84 		return now.cpu < then.cpu;
85 	}
86 }
cpu_time_add(const clockid_t which_clock,union cpu_time_count * acc,union cpu_time_count val)87 static inline void cpu_time_add(const clockid_t which_clock,
88 				union cpu_time_count *acc,
89 			        union cpu_time_count val)
90 {
91 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
92 		acc->sched += val.sched;
93 	}  else {
94 		acc->cpu += val.cpu;
95 	}
96 }
cpu_time_sub(const clockid_t which_clock,union cpu_time_count a,union cpu_time_count b)97 static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
98 						union cpu_time_count a,
99 						union cpu_time_count b)
100 {
101 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
102 		a.sched -= b.sched;
103 	}  else {
104 		a.cpu -= b.cpu;
105 	}
106 	return a;
107 }
108 
109 /*
110  * Update expiry time from increment, and increase overrun count,
111  * given the current clock sample.
112  */
bump_cpu_timer(struct k_itimer * timer,union cpu_time_count now)113 static void bump_cpu_timer(struct k_itimer *timer,
114 				  union cpu_time_count now)
115 {
116 	int i;
117 
118 	if (timer->it.cpu.incr.sched == 0)
119 		return;
120 
121 	if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
122 		unsigned long long delta, incr;
123 
124 		if (now.sched < timer->it.cpu.expires.sched)
125 			return;
126 		incr = timer->it.cpu.incr.sched;
127 		delta = now.sched + incr - timer->it.cpu.expires.sched;
128 		/* Don't use (incr*2 < delta), incr*2 might overflow. */
129 		for (i = 0; incr < delta - incr; i++)
130 			incr = incr << 1;
131 		for (; i >= 0; incr >>= 1, i--) {
132 			if (delta < incr)
133 				continue;
134 			timer->it.cpu.expires.sched += incr;
135 			timer->it_overrun += 1 << i;
136 			delta -= incr;
137 		}
138 	} else {
139 		cputime_t delta, incr;
140 
141 		if (now.cpu < timer->it.cpu.expires.cpu)
142 			return;
143 		incr = timer->it.cpu.incr.cpu;
144 		delta = now.cpu + incr - timer->it.cpu.expires.cpu;
145 		/* Don't use (incr*2 < delta), incr*2 might overflow. */
146 		for (i = 0; incr < delta - incr; i++)
147 			     incr += incr;
148 		for (; i >= 0; incr = incr >> 1, i--) {
149 			if (delta < incr)
150 				continue;
151 			timer->it.cpu.expires.cpu += incr;
152 			timer->it_overrun += 1 << i;
153 			delta -= incr;
154 		}
155 	}
156 }
157 
158 /**
159  * task_cputime_zero - Check a task_cputime struct for all zero fields.
160  *
161  * @cputime:	The struct to compare.
162  *
163  * Checks @cputime to see if all fields are zero.  Returns true if all fields
164  * are zero, false if any field is nonzero.
165  */
task_cputime_zero(const struct task_cputime * cputime)166 static inline int task_cputime_zero(const struct task_cputime *cputime)
167 {
168 	if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
169 		return 1;
170 	return 0;
171 }
172 
prof_ticks(struct task_struct * p)173 static inline cputime_t prof_ticks(struct task_struct *p)
174 {
175 	cputime_t utime, stime;
176 
177 	task_cputime(p, &utime, &stime);
178 
179 	return utime + stime;
180 }
virt_ticks(struct task_struct * p)181 static inline cputime_t virt_ticks(struct task_struct *p)
182 {
183 	cputime_t utime;
184 
185 	task_cputime(p, &utime, NULL);
186 
187 	return utime;
188 }
189 
190 static int
posix_cpu_clock_getres(const clockid_t which_clock,struct timespec * tp)191 posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
192 {
193 	int error = check_clock(which_clock);
194 	if (!error) {
195 		tp->tv_sec = 0;
196 		tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
197 		if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
198 			/*
199 			 * If sched_clock is using a cycle counter, we
200 			 * don't have any idea of its true resolution
201 			 * exported, but it is much more than 1s/HZ.
202 			 */
203 			tp->tv_nsec = 1;
204 		}
205 	}
206 	return error;
207 }
208 
209 static int
posix_cpu_clock_set(const clockid_t which_clock,const struct timespec * tp)210 posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
211 {
212 	/*
213 	 * You can never reset a CPU clock, but we check for other errors
214 	 * in the call before failing with EPERM.
215 	 */
216 	int error = check_clock(which_clock);
217 	if (error == 0) {
218 		error = -EPERM;
219 	}
220 	return error;
221 }
222 
223 
224 /*
225  * Sample a per-thread clock for the given task.
226  */
cpu_clock_sample(const clockid_t which_clock,struct task_struct * p,union cpu_time_count * cpu)227 static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
228 			    union cpu_time_count *cpu)
229 {
230 	switch (CPUCLOCK_WHICH(which_clock)) {
231 	default:
232 		return -EINVAL;
233 	case CPUCLOCK_PROF:
234 		cpu->cpu = prof_ticks(p);
235 		break;
236 	case CPUCLOCK_VIRT:
237 		cpu->cpu = virt_ticks(p);
238 		break;
239 	case CPUCLOCK_SCHED:
240 		cpu->sched = task_sched_runtime(p);
241 		break;
242 	}
243 	return 0;
244 }
245 
update_gt_cputime(struct task_cputime * a,struct task_cputime * b)246 static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
247 {
248 	if (b->utime > a->utime)
249 		a->utime = b->utime;
250 
251 	if (b->stime > a->stime)
252 		a->stime = b->stime;
253 
254 	if (b->sum_exec_runtime > a->sum_exec_runtime)
255 		a->sum_exec_runtime = b->sum_exec_runtime;
256 }
257 
thread_group_cputimer(struct task_struct * tsk,struct task_cputime * times)258 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
259 {
260 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
261 	struct task_cputime sum;
262 	unsigned long flags;
263 
264 	if (!cputimer->running) {
265 		/*
266 		 * The POSIX timer interface allows for absolute time expiry
267 		 * values through the TIMER_ABSTIME flag, therefore we have
268 		 * to synchronize the timer to the clock every time we start
269 		 * it.
270 		 */
271 		thread_group_cputime(tsk, &sum);
272 		raw_spin_lock_irqsave(&cputimer->lock, flags);
273 		cputimer->running = 1;
274 		update_gt_cputime(&cputimer->cputime, &sum);
275 	} else
276 		raw_spin_lock_irqsave(&cputimer->lock, flags);
277 	*times = cputimer->cputime;
278 	raw_spin_unlock_irqrestore(&cputimer->lock, flags);
279 }
280 
281 /*
282  * Sample a process (thread group) clock for the given group_leader task.
283  * Must be called with tasklist_lock held for reading.
284  */
cpu_clock_sample_group(const clockid_t which_clock,struct task_struct * p,union cpu_time_count * cpu)285 static int cpu_clock_sample_group(const clockid_t which_clock,
286 				  struct task_struct *p,
287 				  union cpu_time_count *cpu)
288 {
289 	struct task_cputime cputime;
290 
291 	switch (CPUCLOCK_WHICH(which_clock)) {
292 	default:
293 		return -EINVAL;
294 	case CPUCLOCK_PROF:
295 		thread_group_cputime(p, &cputime);
296 		cpu->cpu = cputime.utime + cputime.stime;
297 		break;
298 	case CPUCLOCK_VIRT:
299 		thread_group_cputime(p, &cputime);
300 		cpu->cpu = cputime.utime;
301 		break;
302 	case CPUCLOCK_SCHED:
303 		thread_group_cputime(p, &cputime);
304 		cpu->sched = cputime.sum_exec_runtime;
305 		break;
306 	}
307 	return 0;
308 }
309 
310 
posix_cpu_clock_get(const clockid_t which_clock,struct timespec * tp)311 static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
312 {
313 	const pid_t pid = CPUCLOCK_PID(which_clock);
314 	int error = -EINVAL;
315 	union cpu_time_count rtn;
316 
317 	if (pid == 0) {
318 		/*
319 		 * Special case constant value for our own clocks.
320 		 * We don't have to do any lookup to find ourselves.
321 		 */
322 		if (CPUCLOCK_PERTHREAD(which_clock)) {
323 			/*
324 			 * Sampling just ourselves we can do with no locking.
325 			 */
326 			error = cpu_clock_sample(which_clock,
327 						 current, &rtn);
328 		} else {
329 			read_lock(&tasklist_lock);
330 			error = cpu_clock_sample_group(which_clock,
331 						       current, &rtn);
332 			read_unlock(&tasklist_lock);
333 		}
334 	} else {
335 		/*
336 		 * Find the given PID, and validate that the caller
337 		 * should be able to see it.
338 		 */
339 		struct task_struct *p;
340 		rcu_read_lock();
341 		p = find_task_by_vpid(pid);
342 		if (p) {
343 			if (CPUCLOCK_PERTHREAD(which_clock)) {
344 				if (same_thread_group(p, current)) {
345 					error = cpu_clock_sample(which_clock,
346 								 p, &rtn);
347 				}
348 			} else {
349 				read_lock(&tasklist_lock);
350 				if (thread_group_leader(p) && p->sighand) {
351 					error =
352 					    cpu_clock_sample_group(which_clock,
353 							           p, &rtn);
354 				}
355 				read_unlock(&tasklist_lock);
356 			}
357 		}
358 		rcu_read_unlock();
359 	}
360 
361 	if (error)
362 		return error;
363 	sample_to_timespec(which_clock, rtn, tp);
364 	return 0;
365 }
366 
367 
368 /*
369  * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
370  * This is called from sys_timer_create() and do_cpu_nanosleep() with the
371  * new timer already all-zeros initialized.
372  */
posix_cpu_timer_create(struct k_itimer * new_timer)373 static int posix_cpu_timer_create(struct k_itimer *new_timer)
374 {
375 	int ret = 0;
376 	const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
377 	struct task_struct *p;
378 
379 	if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
380 		return -EINVAL;
381 
382 	INIT_LIST_HEAD(&new_timer->it.cpu.entry);
383 
384 	rcu_read_lock();
385 	if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
386 		if (pid == 0) {
387 			p = current;
388 		} else {
389 			p = find_task_by_vpid(pid);
390 			if (p && !same_thread_group(p, current))
391 				p = NULL;
392 		}
393 	} else {
394 		if (pid == 0) {
395 			p = current->group_leader;
396 		} else {
397 			p = find_task_by_vpid(pid);
398 			if (p && !has_group_leader_pid(p))
399 				p = NULL;
400 		}
401 	}
402 	new_timer->it.cpu.task = p;
403 	if (p) {
404 		get_task_struct(p);
405 	} else {
406 		ret = -EINVAL;
407 	}
408 	rcu_read_unlock();
409 
410 	return ret;
411 }
412 
413 /*
414  * Clean up a CPU-clock timer that is about to be destroyed.
415  * This is called from timer deletion with the timer already locked.
416  * If we return TIMER_RETRY, it's necessary to release the timer's lock
417  * and try again.  (This happens when the timer is in the middle of firing.)
418  */
posix_cpu_timer_del(struct k_itimer * timer)419 static int posix_cpu_timer_del(struct k_itimer *timer)
420 {
421 	struct task_struct *p = timer->it.cpu.task;
422 	int ret = 0;
423 
424 	if (likely(p != NULL)) {
425 		read_lock(&tasklist_lock);
426 		if (unlikely(p->sighand == NULL)) {
427 			/*
428 			 * We raced with the reaping of the task.
429 			 * The deletion should have cleared us off the list.
430 			 */
431 			BUG_ON(!list_empty(&timer->it.cpu.entry));
432 		} else {
433 			spin_lock(&p->sighand->siglock);
434 			if (timer->it.cpu.firing)
435 				ret = TIMER_RETRY;
436 			else
437 				list_del(&timer->it.cpu.entry);
438 			spin_unlock(&p->sighand->siglock);
439 		}
440 		read_unlock(&tasklist_lock);
441 
442 		if (!ret)
443 			put_task_struct(p);
444 	}
445 
446 	return ret;
447 }
448 
449 /*
450  * Clean out CPU timers still ticking when a thread exited.  The task
451  * pointer is cleared, and the expiry time is replaced with the residual
452  * time for later timer_gettime calls to return.
453  * This must be called with the siglock held.
454  */
cleanup_timers(struct list_head * head,cputime_t utime,cputime_t stime,unsigned long long sum_exec_runtime)455 static void cleanup_timers(struct list_head *head,
456 			   cputime_t utime, cputime_t stime,
457 			   unsigned long long sum_exec_runtime)
458 {
459 	struct cpu_timer_list *timer, *next;
460 	cputime_t ptime = utime + stime;
461 
462 	list_for_each_entry_safe(timer, next, head, entry) {
463 		list_del_init(&timer->entry);
464 		if (timer->expires.cpu < ptime) {
465 			timer->expires.cpu = 0;
466 		} else {
467 			timer->expires.cpu -= ptime;
468 		}
469 	}
470 
471 	++head;
472 	list_for_each_entry_safe(timer, next, head, entry) {
473 		list_del_init(&timer->entry);
474 		if (timer->expires.cpu < utime) {
475 			timer->expires.cpu = 0;
476 		} else {
477 			timer->expires.cpu -= utime;
478 		}
479 	}
480 
481 	++head;
482 	list_for_each_entry_safe(timer, next, head, entry) {
483 		list_del_init(&timer->entry);
484 		if (timer->expires.sched < sum_exec_runtime) {
485 			timer->expires.sched = 0;
486 		} else {
487 			timer->expires.sched -= sum_exec_runtime;
488 		}
489 	}
490 }
491 
492 /*
493  * These are both called with the siglock held, when the current thread
494  * is being reaped.  When the final (leader) thread in the group is reaped,
495  * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
496  */
posix_cpu_timers_exit(struct task_struct * tsk)497 void posix_cpu_timers_exit(struct task_struct *tsk)
498 {
499 	cputime_t utime, stime;
500 
501 	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
502 						sizeof(unsigned long long));
503 	task_cputime(tsk, &utime, &stime);
504 	cleanup_timers(tsk->cpu_timers,
505 		       utime, stime, tsk->se.sum_exec_runtime);
506 
507 }
posix_cpu_timers_exit_group(struct task_struct * tsk)508 void posix_cpu_timers_exit_group(struct task_struct *tsk)
509 {
510 	struct signal_struct *const sig = tsk->signal;
511 	cputime_t utime, stime;
512 
513 	task_cputime(tsk, &utime, &stime);
514 	cleanup_timers(tsk->signal->cpu_timers,
515 		       utime + sig->utime, stime + sig->stime,
516 		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
517 }
518 
clear_dead_task(struct k_itimer * timer,union cpu_time_count now)519 static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
520 {
521 	/*
522 	 * That's all for this thread or process.
523 	 * We leave our residual in expires to be reported.
524 	 */
525 	put_task_struct(timer->it.cpu.task);
526 	timer->it.cpu.task = NULL;
527 	timer->it.cpu.expires = cpu_time_sub(timer->it_clock,
528 					     timer->it.cpu.expires,
529 					     now);
530 }
531 
expires_gt(cputime_t expires,cputime_t new_exp)532 static inline int expires_gt(cputime_t expires, cputime_t new_exp)
533 {
534 	return expires == 0 || expires > new_exp;
535 }
536 
537 /*
538  * Insert the timer on the appropriate list before any timers that
539  * expire later.  This must be called with the tasklist_lock held
540  * for reading, interrupts disabled and p->sighand->siglock taken.
541  */
arm_timer(struct k_itimer * timer)542 static void arm_timer(struct k_itimer *timer)
543 {
544 	struct task_struct *p = timer->it.cpu.task;
545 	struct list_head *head, *listpos;
546 	struct task_cputime *cputime_expires;
547 	struct cpu_timer_list *const nt = &timer->it.cpu;
548 	struct cpu_timer_list *next;
549 
550 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
551 		head = p->cpu_timers;
552 		cputime_expires = &p->cputime_expires;
553 	} else {
554 		head = p->signal->cpu_timers;
555 		cputime_expires = &p->signal->cputime_expires;
556 	}
557 	head += CPUCLOCK_WHICH(timer->it_clock);
558 
559 	listpos = head;
560 	list_for_each_entry(next, head, entry) {
561 		if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
562 			break;
563 		listpos = &next->entry;
564 	}
565 	list_add(&nt->entry, listpos);
566 
567 	if (listpos == head) {
568 		union cpu_time_count *exp = &nt->expires;
569 
570 		/*
571 		 * We are the new earliest-expiring POSIX 1.b timer, hence
572 		 * need to update expiration cache. Take into account that
573 		 * for process timers we share expiration cache with itimers
574 		 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
575 		 */
576 
577 		switch (CPUCLOCK_WHICH(timer->it_clock)) {
578 		case CPUCLOCK_PROF:
579 			if (expires_gt(cputime_expires->prof_exp, exp->cpu))
580 				cputime_expires->prof_exp = exp->cpu;
581 			break;
582 		case CPUCLOCK_VIRT:
583 			if (expires_gt(cputime_expires->virt_exp, exp->cpu))
584 				cputime_expires->virt_exp = exp->cpu;
585 			break;
586 		case CPUCLOCK_SCHED:
587 			if (cputime_expires->sched_exp == 0 ||
588 			    cputime_expires->sched_exp > exp->sched)
589 				cputime_expires->sched_exp = exp->sched;
590 			break;
591 		}
592 	}
593 }
594 
595 /*
596  * The timer is locked, fire it and arrange for its reload.
597  */
cpu_timer_fire(struct k_itimer * timer)598 static void cpu_timer_fire(struct k_itimer *timer)
599 {
600 	if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
601 		/*
602 		 * User don't want any signal.
603 		 */
604 		timer->it.cpu.expires.sched = 0;
605 	} else if (unlikely(timer->sigq == NULL)) {
606 		/*
607 		 * This a special case for clock_nanosleep,
608 		 * not a normal timer from sys_timer_create.
609 		 */
610 		wake_up_process(timer->it_process);
611 		timer->it.cpu.expires.sched = 0;
612 	} else if (timer->it.cpu.incr.sched == 0) {
613 		/*
614 		 * One-shot timer.  Clear it as soon as it's fired.
615 		 */
616 		posix_timer_event(timer, 0);
617 		timer->it.cpu.expires.sched = 0;
618 	} else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
619 		/*
620 		 * The signal did not get queued because the signal
621 		 * was ignored, so we won't get any callback to
622 		 * reload the timer.  But we need to keep it
623 		 * ticking in case the signal is deliverable next time.
624 		 */
625 		posix_cpu_timer_schedule(timer);
626 	}
627 }
628 
629 /*
630  * Sample a process (thread group) timer for the given group_leader task.
631  * Must be called with tasklist_lock held for reading.
632  */
cpu_timer_sample_group(const clockid_t which_clock,struct task_struct * p,union cpu_time_count * cpu)633 static int cpu_timer_sample_group(const clockid_t which_clock,
634 				  struct task_struct *p,
635 				  union cpu_time_count *cpu)
636 {
637 	struct task_cputime cputime;
638 
639 	thread_group_cputimer(p, &cputime);
640 	switch (CPUCLOCK_WHICH(which_clock)) {
641 	default:
642 		return -EINVAL;
643 	case CPUCLOCK_PROF:
644 		cpu->cpu = cputime.utime + cputime.stime;
645 		break;
646 	case CPUCLOCK_VIRT:
647 		cpu->cpu = cputime.utime;
648 		break;
649 	case CPUCLOCK_SCHED:
650 		cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
651 		break;
652 	}
653 	return 0;
654 }
655 
656 #ifdef CONFIG_NO_HZ_FULL
nohz_kick_work_fn(struct work_struct * work)657 static void nohz_kick_work_fn(struct work_struct *work)
658 {
659 	tick_nohz_full_kick_all();
660 }
661 
662 static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
663 
664 /*
665  * We need the IPIs to be sent from sane process context.
666  * The posix cpu timers are always set with irqs disabled.
667  */
posix_cpu_timer_kick_nohz(void)668 static void posix_cpu_timer_kick_nohz(void)
669 {
670 	schedule_work(&nohz_kick_work);
671 }
672 
posix_cpu_timers_can_stop_tick(struct task_struct * tsk)673 bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
674 {
675 	if (!task_cputime_zero(&tsk->cputime_expires))
676 		return false;
677 
678 	if (tsk->signal->cputimer.running)
679 		return false;
680 
681 	return true;
682 }
683 #else
posix_cpu_timer_kick_nohz(void)684 static inline void posix_cpu_timer_kick_nohz(void) { }
685 #endif
686 
687 /*
688  * Guts of sys_timer_settime for CPU timers.
689  * This is called with the timer locked and interrupts disabled.
690  * If we return TIMER_RETRY, it's necessary to release the timer's lock
691  * and try again.  (This happens when the timer is in the middle of firing.)
692  */
posix_cpu_timer_set(struct k_itimer * timer,int flags,struct itimerspec * new,struct itimerspec * old)693 static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
694 			       struct itimerspec *new, struct itimerspec *old)
695 {
696 	struct task_struct *p = timer->it.cpu.task;
697 	union cpu_time_count old_expires, new_expires, old_incr, val;
698 	int ret;
699 
700 	if (unlikely(p == NULL)) {
701 		/*
702 		 * Timer refers to a dead task's clock.
703 		 */
704 		return -ESRCH;
705 	}
706 
707 	new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
708 
709 	read_lock(&tasklist_lock);
710 	/*
711 	 * We need the tasklist_lock to protect against reaping that
712 	 * clears p->sighand.  If p has just been reaped, we can no
713 	 * longer get any information about it at all.
714 	 */
715 	if (unlikely(p->sighand == NULL)) {
716 		read_unlock(&tasklist_lock);
717 		put_task_struct(p);
718 		timer->it.cpu.task = NULL;
719 		return -ESRCH;
720 	}
721 
722 	/*
723 	 * Disarm any old timer after extracting its expiry time.
724 	 */
725 	BUG_ON(!irqs_disabled());
726 
727 	ret = 0;
728 	old_incr = timer->it.cpu.incr;
729 	spin_lock(&p->sighand->siglock);
730 	old_expires = timer->it.cpu.expires;
731 	if (unlikely(timer->it.cpu.firing)) {
732 		timer->it.cpu.firing = -1;
733 		ret = TIMER_RETRY;
734 	} else
735 		list_del_init(&timer->it.cpu.entry);
736 
737 	/*
738 	 * We need to sample the current value to convert the new
739 	 * value from to relative and absolute, and to convert the
740 	 * old value from absolute to relative.  To set a process
741 	 * timer, we need a sample to balance the thread expiry
742 	 * times (in arm_timer).  With an absolute time, we must
743 	 * check if it's already passed.  In short, we need a sample.
744 	 */
745 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
746 		cpu_clock_sample(timer->it_clock, p, &val);
747 	} else {
748 		cpu_timer_sample_group(timer->it_clock, p, &val);
749 	}
750 
751 	if (old) {
752 		if (old_expires.sched == 0) {
753 			old->it_value.tv_sec = 0;
754 			old->it_value.tv_nsec = 0;
755 		} else {
756 			/*
757 			 * Update the timer in case it has
758 			 * overrun already.  If it has,
759 			 * we'll report it as having overrun
760 			 * and with the next reloaded timer
761 			 * already ticking, though we are
762 			 * swallowing that pending
763 			 * notification here to install the
764 			 * new setting.
765 			 */
766 			bump_cpu_timer(timer, val);
767 			if (cpu_time_before(timer->it_clock, val,
768 					    timer->it.cpu.expires)) {
769 				old_expires = cpu_time_sub(
770 					timer->it_clock,
771 					timer->it.cpu.expires, val);
772 				sample_to_timespec(timer->it_clock,
773 						   old_expires,
774 						   &old->it_value);
775 			} else {
776 				old->it_value.tv_nsec = 1;
777 				old->it_value.tv_sec = 0;
778 			}
779 		}
780 	}
781 
782 	if (unlikely(ret)) {
783 		/*
784 		 * We are colliding with the timer actually firing.
785 		 * Punt after filling in the timer's old value, and
786 		 * disable this firing since we are already reporting
787 		 * it as an overrun (thanks to bump_cpu_timer above).
788 		 */
789 		spin_unlock(&p->sighand->siglock);
790 		read_unlock(&tasklist_lock);
791 		goto out;
792 	}
793 
794 	if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
795 		cpu_time_add(timer->it_clock, &new_expires, val);
796 	}
797 
798 	/*
799 	 * Install the new expiry time (or zero).
800 	 * For a timer with no notification action, we don't actually
801 	 * arm the timer (we'll just fake it for timer_gettime).
802 	 */
803 	timer->it.cpu.expires = new_expires;
804 	if (new_expires.sched != 0 &&
805 	    cpu_time_before(timer->it_clock, val, new_expires)) {
806 		arm_timer(timer);
807 	}
808 
809 	spin_unlock(&p->sighand->siglock);
810 	read_unlock(&tasklist_lock);
811 
812 	/*
813 	 * Install the new reload setting, and
814 	 * set up the signal and overrun bookkeeping.
815 	 */
816 	timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
817 						&new->it_interval);
818 
819 	/*
820 	 * This acts as a modification timestamp for the timer,
821 	 * so any automatic reload attempt will punt on seeing
822 	 * that we have reset the timer manually.
823 	 */
824 	timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
825 		~REQUEUE_PENDING;
826 	timer->it_overrun_last = 0;
827 	timer->it_overrun = -1;
828 
829 	if (new_expires.sched != 0 &&
830 	    !cpu_time_before(timer->it_clock, val, new_expires)) {
831 		/*
832 		 * The designated time already passed, so we notify
833 		 * immediately, even if the thread never runs to
834 		 * accumulate more time on this clock.
835 		 */
836 		cpu_timer_fire(timer);
837 	}
838 
839 	ret = 0;
840  out:
841 	if (old) {
842 		sample_to_timespec(timer->it_clock,
843 				   old_incr, &old->it_interval);
844 	}
845 	if (!ret)
846 		posix_cpu_timer_kick_nohz();
847 	return ret;
848 }
849 
posix_cpu_timer_get(struct k_itimer * timer,struct itimerspec * itp)850 static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
851 {
852 	union cpu_time_count now;
853 	struct task_struct *p = timer->it.cpu.task;
854 	int clear_dead;
855 
856 	/*
857 	 * Easy part: convert the reload time.
858 	 */
859 	sample_to_timespec(timer->it_clock,
860 			   timer->it.cpu.incr, &itp->it_interval);
861 
862 	if (timer->it.cpu.expires.sched == 0) {	/* Timer not armed at all.  */
863 		itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
864 		return;
865 	}
866 
867 	if (unlikely(p == NULL)) {
868 		/*
869 		 * This task already died and the timer will never fire.
870 		 * In this case, expires is actually the dead value.
871 		 */
872 	dead:
873 		sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
874 				   &itp->it_value);
875 		return;
876 	}
877 
878 	/*
879 	 * Sample the clock to take the difference with the expiry time.
880 	 */
881 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
882 		cpu_clock_sample(timer->it_clock, p, &now);
883 		clear_dead = p->exit_state;
884 	} else {
885 		read_lock(&tasklist_lock);
886 		if (unlikely(p->sighand == NULL)) {
887 			/*
888 			 * The process has been reaped.
889 			 * We can't even collect a sample any more.
890 			 * Call the timer disarmed, nothing else to do.
891 			 */
892 			put_task_struct(p);
893 			timer->it.cpu.task = NULL;
894 			timer->it.cpu.expires.sched = 0;
895 			read_unlock(&tasklist_lock);
896 			goto dead;
897 		} else {
898 			cpu_timer_sample_group(timer->it_clock, p, &now);
899 			clear_dead = (unlikely(p->exit_state) &&
900 				      thread_group_empty(p));
901 		}
902 		read_unlock(&tasklist_lock);
903 	}
904 
905 	if (unlikely(clear_dead)) {
906 		/*
907 		 * We've noticed that the thread is dead, but
908 		 * not yet reaped.  Take this opportunity to
909 		 * drop our task ref.
910 		 */
911 		clear_dead_task(timer, now);
912 		goto dead;
913 	}
914 
915 	if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
916 		sample_to_timespec(timer->it_clock,
917 				   cpu_time_sub(timer->it_clock,
918 						timer->it.cpu.expires, now),
919 				   &itp->it_value);
920 	} else {
921 		/*
922 		 * The timer should have expired already, but the firing
923 		 * hasn't taken place yet.  Say it's just about to expire.
924 		 */
925 		itp->it_value.tv_nsec = 1;
926 		itp->it_value.tv_sec = 0;
927 	}
928 }
929 
930 /*
931  * Check for any per-thread CPU timers that have fired and move them off
932  * the tsk->cpu_timers[N] list onto the firing list.  Here we update the
933  * tsk->it_*_expires values to reflect the remaining thread CPU timers.
934  */
check_thread_timers(struct task_struct * tsk,struct list_head * firing)935 static void check_thread_timers(struct task_struct *tsk,
936 				struct list_head *firing)
937 {
938 	int maxfire;
939 	struct list_head *timers = tsk->cpu_timers;
940 	struct signal_struct *const sig = tsk->signal;
941 	unsigned long soft;
942 
943 	maxfire = 20;
944 	tsk->cputime_expires.prof_exp = 0;
945 	while (!list_empty(timers)) {
946 		struct cpu_timer_list *t = list_first_entry(timers,
947 						      struct cpu_timer_list,
948 						      entry);
949 		if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) {
950 			tsk->cputime_expires.prof_exp = t->expires.cpu;
951 			break;
952 		}
953 		t->firing = 1;
954 		list_move_tail(&t->entry, firing);
955 	}
956 
957 	++timers;
958 	maxfire = 20;
959 	tsk->cputime_expires.virt_exp = 0;
960 	while (!list_empty(timers)) {
961 		struct cpu_timer_list *t = list_first_entry(timers,
962 						      struct cpu_timer_list,
963 						      entry);
964 		if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) {
965 			tsk->cputime_expires.virt_exp = t->expires.cpu;
966 			break;
967 		}
968 		t->firing = 1;
969 		list_move_tail(&t->entry, firing);
970 	}
971 
972 	++timers;
973 	maxfire = 20;
974 	tsk->cputime_expires.sched_exp = 0;
975 	while (!list_empty(timers)) {
976 		struct cpu_timer_list *t = list_first_entry(timers,
977 						      struct cpu_timer_list,
978 						      entry);
979 		if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
980 			tsk->cputime_expires.sched_exp = t->expires.sched;
981 			break;
982 		}
983 		t->firing = 1;
984 		list_move_tail(&t->entry, firing);
985 	}
986 
987 	/*
988 	 * Check for the special case thread timers.
989 	 */
990 	soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
991 	if (soft != RLIM_INFINITY) {
992 		unsigned long hard =
993 			ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
994 
995 		if (hard != RLIM_INFINITY &&
996 		    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
997 			/*
998 			 * At the hard limit, we just die.
999 			 * No need to calculate anything else now.
1000 			 */
1001 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
1002 			return;
1003 		}
1004 		if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
1005 			/*
1006 			 * At the soft limit, send a SIGXCPU every second.
1007 			 */
1008 			if (soft < hard) {
1009 				soft += USEC_PER_SEC;
1010 				sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
1011 			}
1012 			printk(KERN_INFO
1013 				"RT Watchdog Timeout: %s[%d]\n",
1014 				tsk->comm, task_pid_nr(tsk));
1015 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
1016 		}
1017 	}
1018 }
1019 
stop_process_timers(struct signal_struct * sig)1020 static void stop_process_timers(struct signal_struct *sig)
1021 {
1022 	struct thread_group_cputimer *cputimer = &sig->cputimer;
1023 	unsigned long flags;
1024 
1025 	raw_spin_lock_irqsave(&cputimer->lock, flags);
1026 	cputimer->running = 0;
1027 	raw_spin_unlock_irqrestore(&cputimer->lock, flags);
1028 }
1029 
1030 static u32 onecputick;
1031 
check_cpu_itimer(struct task_struct * tsk,struct cpu_itimer * it,cputime_t * expires,cputime_t cur_time,int signo)1032 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
1033 			     cputime_t *expires, cputime_t cur_time, int signo)
1034 {
1035 	if (!it->expires)
1036 		return;
1037 
1038 	if (cur_time >= it->expires) {
1039 		if (it->incr) {
1040 			it->expires += it->incr;
1041 			it->error += it->incr_error;
1042 			if (it->error >= onecputick) {
1043 				it->expires -= cputime_one_jiffy;
1044 				it->error -= onecputick;
1045 			}
1046 		} else {
1047 			it->expires = 0;
1048 		}
1049 
1050 		trace_itimer_expire(signo == SIGPROF ?
1051 				    ITIMER_PROF : ITIMER_VIRTUAL,
1052 				    tsk->signal->leader_pid, cur_time);
1053 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
1054 	}
1055 
1056 	if (it->expires && (!*expires || it->expires < *expires)) {
1057 		*expires = it->expires;
1058 	}
1059 }
1060 
1061 /*
1062  * Check for any per-thread CPU timers that have fired and move them
1063  * off the tsk->*_timers list onto the firing list.  Per-thread timers
1064  * have already been taken off.
1065  */
check_process_timers(struct task_struct * tsk,struct list_head * firing)1066 static void check_process_timers(struct task_struct *tsk,
1067 				 struct list_head *firing)
1068 {
1069 	int maxfire;
1070 	struct signal_struct *const sig = tsk->signal;
1071 	cputime_t utime, ptime, virt_expires, prof_expires;
1072 	unsigned long long sum_sched_runtime, sched_expires;
1073 	struct list_head *timers = sig->cpu_timers;
1074 	struct task_cputime cputime;
1075 	unsigned long soft;
1076 
1077 	/*
1078 	 * Collect the current process totals.
1079 	 */
1080 	thread_group_cputimer(tsk, &cputime);
1081 	utime = cputime.utime;
1082 	ptime = utime + cputime.stime;
1083 	sum_sched_runtime = cputime.sum_exec_runtime;
1084 	maxfire = 20;
1085 	prof_expires = 0;
1086 	while (!list_empty(timers)) {
1087 		struct cpu_timer_list *tl = list_first_entry(timers,
1088 						      struct cpu_timer_list,
1089 						      entry);
1090 		if (!--maxfire || ptime < tl->expires.cpu) {
1091 			prof_expires = tl->expires.cpu;
1092 			break;
1093 		}
1094 		tl->firing = 1;
1095 		list_move_tail(&tl->entry, firing);
1096 	}
1097 
1098 	++timers;
1099 	maxfire = 20;
1100 	virt_expires = 0;
1101 	while (!list_empty(timers)) {
1102 		struct cpu_timer_list *tl = list_first_entry(timers,
1103 						      struct cpu_timer_list,
1104 						      entry);
1105 		if (!--maxfire || utime < tl->expires.cpu) {
1106 			virt_expires = tl->expires.cpu;
1107 			break;
1108 		}
1109 		tl->firing = 1;
1110 		list_move_tail(&tl->entry, firing);
1111 	}
1112 
1113 	++timers;
1114 	maxfire = 20;
1115 	sched_expires = 0;
1116 	while (!list_empty(timers)) {
1117 		struct cpu_timer_list *tl = list_first_entry(timers,
1118 						      struct cpu_timer_list,
1119 						      entry);
1120 		if (!--maxfire || sum_sched_runtime < tl->expires.sched) {
1121 			sched_expires = tl->expires.sched;
1122 			break;
1123 		}
1124 		tl->firing = 1;
1125 		list_move_tail(&tl->entry, firing);
1126 	}
1127 
1128 	/*
1129 	 * Check for the special case process timers.
1130 	 */
1131 	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
1132 			 SIGPROF);
1133 	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
1134 			 SIGVTALRM);
1135 	soft = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
1136 	if (soft != RLIM_INFINITY) {
1137 		unsigned long psecs = cputime_to_secs(ptime);
1138 		unsigned long hard =
1139 			ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
1140 		cputime_t x;
1141 		if (psecs >= hard) {
1142 			/*
1143 			 * At the hard limit, we just die.
1144 			 * No need to calculate anything else now.
1145 			 */
1146 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
1147 			return;
1148 		}
1149 		if (psecs >= soft) {
1150 			/*
1151 			 * At the soft limit, send a SIGXCPU every second.
1152 			 */
1153 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
1154 			if (soft < hard) {
1155 				soft++;
1156 				sig->rlim[RLIMIT_CPU].rlim_cur = soft;
1157 			}
1158 		}
1159 		x = secs_to_cputime(soft);
1160 		if (!prof_expires || x < prof_expires) {
1161 			prof_expires = x;
1162 		}
1163 	}
1164 
1165 	sig->cputime_expires.prof_exp = prof_expires;
1166 	sig->cputime_expires.virt_exp = virt_expires;
1167 	sig->cputime_expires.sched_exp = sched_expires;
1168 	if (task_cputime_zero(&sig->cputime_expires))
1169 		stop_process_timers(sig);
1170 }
1171 
1172 /*
1173  * This is called from the signal code (via do_schedule_next_timer)
1174  * when the last timer signal was delivered and we have to reload the timer.
1175  */
posix_cpu_timer_schedule(struct k_itimer * timer)1176 void posix_cpu_timer_schedule(struct k_itimer *timer)
1177 {
1178 	struct task_struct *p = timer->it.cpu.task;
1179 	union cpu_time_count now;
1180 
1181 	if (unlikely(p == NULL))
1182 		/*
1183 		 * The task was cleaned up already, no future firings.
1184 		 */
1185 		goto out;
1186 
1187 	/*
1188 	 * Fetch the current sample and update the timer's expiry time.
1189 	 */
1190 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
1191 		cpu_clock_sample(timer->it_clock, p, &now);
1192 		bump_cpu_timer(timer, now);
1193 		if (unlikely(p->exit_state)) {
1194 			clear_dead_task(timer, now);
1195 			goto out;
1196 		}
1197 		read_lock(&tasklist_lock); /* arm_timer needs it.  */
1198 		spin_lock(&p->sighand->siglock);
1199 	} else {
1200 		read_lock(&tasklist_lock);
1201 		if (unlikely(p->sighand == NULL)) {
1202 			/*
1203 			 * The process has been reaped.
1204 			 * We can't even collect a sample any more.
1205 			 */
1206 			put_task_struct(p);
1207 			timer->it.cpu.task = p = NULL;
1208 			timer->it.cpu.expires.sched = 0;
1209 			goto out_unlock;
1210 		} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
1211 			/*
1212 			 * We've noticed that the thread is dead, but
1213 			 * not yet reaped.  Take this opportunity to
1214 			 * drop our task ref.
1215 			 */
1216 			clear_dead_task(timer, now);
1217 			goto out_unlock;
1218 		}
1219 		spin_lock(&p->sighand->siglock);
1220 		cpu_timer_sample_group(timer->it_clock, p, &now);
1221 		bump_cpu_timer(timer, now);
1222 		/* Leave the tasklist_lock locked for the call below.  */
1223 	}
1224 
1225 	/*
1226 	 * Now re-arm for the new expiry time.
1227 	 */
1228 	BUG_ON(!irqs_disabled());
1229 	arm_timer(timer);
1230 	spin_unlock(&p->sighand->siglock);
1231 
1232 out_unlock:
1233 	read_unlock(&tasklist_lock);
1234 
1235 out:
1236 	timer->it_overrun_last = timer->it_overrun;
1237 	timer->it_overrun = -1;
1238 	++timer->it_requeue_pending;
1239 }
1240 
1241 /**
1242  * task_cputime_expired - Compare two task_cputime entities.
1243  *
1244  * @sample:	The task_cputime structure to be checked for expiration.
1245  * @expires:	Expiration times, against which @sample will be checked.
1246  *
1247  * Checks @sample against @expires to see if any field of @sample has expired.
1248  * Returns true if any field of the former is greater than the corresponding
1249  * field of the latter if the latter field is set.  Otherwise returns false.
1250  */
task_cputime_expired(const struct task_cputime * sample,const struct task_cputime * expires)1251 static inline int task_cputime_expired(const struct task_cputime *sample,
1252 					const struct task_cputime *expires)
1253 {
1254 	if (expires->utime && sample->utime >= expires->utime)
1255 		return 1;
1256 	if (expires->stime && sample->utime + sample->stime >= expires->stime)
1257 		return 1;
1258 	if (expires->sum_exec_runtime != 0 &&
1259 	    sample->sum_exec_runtime >= expires->sum_exec_runtime)
1260 		return 1;
1261 	return 0;
1262 }
1263 
1264 /**
1265  * fastpath_timer_check - POSIX CPU timers fast path.
1266  *
1267  * @tsk:	The task (thread) being checked.
1268  *
1269  * Check the task and thread group timers.  If both are zero (there are no
1270  * timers set) return false.  Otherwise snapshot the task and thread group
1271  * timers and compare them with the corresponding expiration times.  Return
1272  * true if a timer has expired, else return false.
1273  */
fastpath_timer_check(struct task_struct * tsk)1274 static inline int fastpath_timer_check(struct task_struct *tsk)
1275 {
1276 	struct signal_struct *sig;
1277 	cputime_t utime, stime;
1278 
1279 	task_cputime(tsk, &utime, &stime);
1280 
1281 	if (!task_cputime_zero(&tsk->cputime_expires)) {
1282 		struct task_cputime task_sample = {
1283 			.utime = utime,
1284 			.stime = stime,
1285 			.sum_exec_runtime = tsk->se.sum_exec_runtime
1286 		};
1287 
1288 		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
1289 			return 1;
1290 	}
1291 
1292 	sig = tsk->signal;
1293 	if (sig->cputimer.running) {
1294 		struct task_cputime group_sample;
1295 
1296 		raw_spin_lock(&sig->cputimer.lock);
1297 		group_sample = sig->cputimer.cputime;
1298 		raw_spin_unlock(&sig->cputimer.lock);
1299 
1300 		if (task_cputime_expired(&group_sample, &sig->cputime_expires))
1301 			return 1;
1302 	}
1303 
1304 	return 0;
1305 }
1306 
1307 /*
1308  * This is called from the timer interrupt handler.  The irq handler has
1309  * already updated our counts.  We need to check if any timers fire now.
1310  * Interrupts are disabled.
1311  */
run_posix_cpu_timers(struct task_struct * tsk)1312 void run_posix_cpu_timers(struct task_struct *tsk)
1313 {
1314 	LIST_HEAD(firing);
1315 	struct k_itimer *timer, *next;
1316 	unsigned long flags;
1317 
1318 	BUG_ON(!irqs_disabled());
1319 
1320 	/*
1321 	 * The fast path checks that there are no expired thread or thread
1322 	 * group timers.  If that's so, just return.
1323 	 */
1324 	if (!fastpath_timer_check(tsk))
1325 		return;
1326 
1327 	if (!lock_task_sighand(tsk, &flags))
1328 		return;
1329 	/*
1330 	 * Here we take off tsk->signal->cpu_timers[N] and
1331 	 * tsk->cpu_timers[N] all the timers that are firing, and
1332 	 * put them on the firing list.
1333 	 */
1334 	check_thread_timers(tsk, &firing);
1335 	/*
1336 	 * If there are any active process wide timers (POSIX 1.b, itimers,
1337 	 * RLIMIT_CPU) cputimer must be running.
1338 	 */
1339 	if (tsk->signal->cputimer.running)
1340 		check_process_timers(tsk, &firing);
1341 
1342 	/*
1343 	 * We must release these locks before taking any timer's lock.
1344 	 * There is a potential race with timer deletion here, as the
1345 	 * siglock now protects our private firing list.  We have set
1346 	 * the firing flag in each timer, so that a deletion attempt
1347 	 * that gets the timer lock before we do will give it up and
1348 	 * spin until we've taken care of that timer below.
1349 	 */
1350 	unlock_task_sighand(tsk, &flags);
1351 
1352 	/*
1353 	 * Now that all the timers on our list have the firing flag,
1354 	 * no one will touch their list entries but us.  We'll take
1355 	 * each timer's lock before clearing its firing flag, so no
1356 	 * timer call will interfere.
1357 	 */
1358 	list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
1359 		int cpu_firing;
1360 
1361 		spin_lock(&timer->it_lock);
1362 		list_del_init(&timer->it.cpu.entry);
1363 		cpu_firing = timer->it.cpu.firing;
1364 		timer->it.cpu.firing = 0;
1365 		/*
1366 		 * The firing flag is -1 if we collided with a reset
1367 		 * of the timer, which already reported this
1368 		 * almost-firing as an overrun.  So don't generate an event.
1369 		 */
1370 		if (likely(cpu_firing >= 0))
1371 			cpu_timer_fire(timer);
1372 		spin_unlock(&timer->it_lock);
1373 	}
1374 
1375 	/*
1376 	 * In case some timers were rescheduled after the queue got emptied,
1377 	 * wake up full dynticks CPUs.
1378 	 */
1379 	if (tsk->signal->cputimer.running)
1380 		posix_cpu_timer_kick_nohz();
1381 }
1382 
1383 /*
1384  * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
1385  * The tsk->sighand->siglock must be held by the caller.
1386  */
set_process_cpu_timer(struct task_struct * tsk,unsigned int clock_idx,cputime_t * newval,cputime_t * oldval)1387 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1388 			   cputime_t *newval, cputime_t *oldval)
1389 {
1390 	union cpu_time_count now;
1391 
1392 	BUG_ON(clock_idx == CPUCLOCK_SCHED);
1393 	cpu_timer_sample_group(clock_idx, tsk, &now);
1394 
1395 	if (oldval) {
1396 		/*
1397 		 * We are setting itimer. The *oldval is absolute and we update
1398 		 * it to be relative, *newval argument is relative and we update
1399 		 * it to be absolute.
1400 		 */
1401 		if (*oldval) {
1402 			if (*oldval <= now.cpu) {
1403 				/* Just about to fire. */
1404 				*oldval = cputime_one_jiffy;
1405 			} else {
1406 				*oldval -= now.cpu;
1407 			}
1408 		}
1409 
1410 		if (!*newval)
1411 			goto out;
1412 		*newval += now.cpu;
1413 	}
1414 
1415 	/*
1416 	 * Update expiration cache if we are the earliest timer, or eventually
1417 	 * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
1418 	 */
1419 	switch (clock_idx) {
1420 	case CPUCLOCK_PROF:
1421 		if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
1422 			tsk->signal->cputime_expires.prof_exp = *newval;
1423 		break;
1424 	case CPUCLOCK_VIRT:
1425 		if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
1426 			tsk->signal->cputime_expires.virt_exp = *newval;
1427 		break;
1428 	}
1429 out:
1430 	posix_cpu_timer_kick_nohz();
1431 }
1432 
do_cpu_nanosleep(const clockid_t which_clock,int flags,struct timespec * rqtp,struct itimerspec * it)1433 static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1434 			    struct timespec *rqtp, struct itimerspec *it)
1435 {
1436 	struct k_itimer timer;
1437 	int error;
1438 
1439 	/*
1440 	 * Set up a temporary timer and then wait for it to go off.
1441 	 */
1442 	memset(&timer, 0, sizeof timer);
1443 	spin_lock_init(&timer.it_lock);
1444 	timer.it_clock = which_clock;
1445 	timer.it_overrun = -1;
1446 	error = posix_cpu_timer_create(&timer);
1447 	timer.it_process = current;
1448 	if (!error) {
1449 		static struct itimerspec zero_it;
1450 
1451 		memset(it, 0, sizeof *it);
1452 		it->it_value = *rqtp;
1453 
1454 		spin_lock_irq(&timer.it_lock);
1455 		error = posix_cpu_timer_set(&timer, flags, it, NULL);
1456 		if (error) {
1457 			spin_unlock_irq(&timer.it_lock);
1458 			return error;
1459 		}
1460 
1461 		while (!signal_pending(current)) {
1462 			if (timer.it.cpu.expires.sched == 0) {
1463 				/*
1464 				 * Our timer fired and was reset, below
1465 				 * deletion can not fail.
1466 				 */
1467 				posix_cpu_timer_del(&timer);
1468 				spin_unlock_irq(&timer.it_lock);
1469 				return 0;
1470 			}
1471 
1472 			/*
1473 			 * Block until cpu_timer_fire (or a signal) wakes us.
1474 			 */
1475 			__set_current_state(TASK_INTERRUPTIBLE);
1476 			spin_unlock_irq(&timer.it_lock);
1477 			schedule();
1478 			spin_lock_irq(&timer.it_lock);
1479 		}
1480 
1481 		/*
1482 		 * We were interrupted by a signal.
1483 		 */
1484 		sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
1485 		error = posix_cpu_timer_set(&timer, 0, &zero_it, it);
1486 		if (!error) {
1487 			/*
1488 			 * Timer is now unarmed, deletion can not fail.
1489 			 */
1490 			posix_cpu_timer_del(&timer);
1491 		}
1492 		spin_unlock_irq(&timer.it_lock);
1493 
1494 		while (error == TIMER_RETRY) {
1495 			/*
1496 			 * We need to handle case when timer was or is in the
1497 			 * middle of firing. In other cases we already freed
1498 			 * resources.
1499 			 */
1500 			spin_lock_irq(&timer.it_lock);
1501 			error = posix_cpu_timer_del(&timer);
1502 			spin_unlock_irq(&timer.it_lock);
1503 		}
1504 
1505 		if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
1506 			/*
1507 			 * It actually did fire already.
1508 			 */
1509 			return 0;
1510 		}
1511 
1512 		error = -ERESTART_RESTARTBLOCK;
1513 	}
1514 
1515 	return error;
1516 }
1517 
1518 static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
1519 
posix_cpu_nsleep(const clockid_t which_clock,int flags,struct timespec * rqtp,struct timespec __user * rmtp)1520 static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1521 			    struct timespec *rqtp, struct timespec __user *rmtp)
1522 {
1523 	struct restart_block *restart_block =
1524 		&current_thread_info()->restart_block;
1525 	struct itimerspec it;
1526 	int error;
1527 
1528 	/*
1529 	 * Diagnose required errors first.
1530 	 */
1531 	if (CPUCLOCK_PERTHREAD(which_clock) &&
1532 	    (CPUCLOCK_PID(which_clock) == 0 ||
1533 	     CPUCLOCK_PID(which_clock) == current->pid))
1534 		return -EINVAL;
1535 
1536 	error = do_cpu_nanosleep(which_clock, flags, rqtp, &it);
1537 
1538 	if (error == -ERESTART_RESTARTBLOCK) {
1539 
1540 		if (flags & TIMER_ABSTIME)
1541 			return -ERESTARTNOHAND;
1542 		/*
1543 		 * Report back to the user the time still remaining.
1544 		 */
1545 		if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1546 			return -EFAULT;
1547 
1548 		restart_block->fn = posix_cpu_nsleep_restart;
1549 		restart_block->nanosleep.clockid = which_clock;
1550 		restart_block->nanosleep.rmtp = rmtp;
1551 		restart_block->nanosleep.expires = timespec_to_ns(rqtp);
1552 	}
1553 	return error;
1554 }
1555 
posix_cpu_nsleep_restart(struct restart_block * restart_block)1556 static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1557 {
1558 	clockid_t which_clock = restart_block->nanosleep.clockid;
1559 	struct timespec t;
1560 	struct itimerspec it;
1561 	int error;
1562 
1563 	t = ns_to_timespec(restart_block->nanosleep.expires);
1564 
1565 	error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
1566 
1567 	if (error == -ERESTART_RESTARTBLOCK) {
1568 		struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
1569 		/*
1570 		 * Report back to the user the time still remaining.
1571 		 */
1572 		if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1573 			return -EFAULT;
1574 
1575 		restart_block->nanosleep.expires = timespec_to_ns(&t);
1576 	}
1577 	return error;
1578 
1579 }
1580 
1581 #define PROCESS_CLOCK	MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
1582 #define THREAD_CLOCK	MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
1583 
process_cpu_clock_getres(const clockid_t which_clock,struct timespec * tp)1584 static int process_cpu_clock_getres(const clockid_t which_clock,
1585 				    struct timespec *tp)
1586 {
1587 	return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
1588 }
process_cpu_clock_get(const clockid_t which_clock,struct timespec * tp)1589 static int process_cpu_clock_get(const clockid_t which_clock,
1590 				 struct timespec *tp)
1591 {
1592 	return posix_cpu_clock_get(PROCESS_CLOCK, tp);
1593 }
process_cpu_timer_create(struct k_itimer * timer)1594 static int process_cpu_timer_create(struct k_itimer *timer)
1595 {
1596 	timer->it_clock = PROCESS_CLOCK;
1597 	return posix_cpu_timer_create(timer);
1598 }
process_cpu_nsleep(const clockid_t which_clock,int flags,struct timespec * rqtp,struct timespec __user * rmtp)1599 static int process_cpu_nsleep(const clockid_t which_clock, int flags,
1600 			      struct timespec *rqtp,
1601 			      struct timespec __user *rmtp)
1602 {
1603 	return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
1604 }
process_cpu_nsleep_restart(struct restart_block * restart_block)1605 static long process_cpu_nsleep_restart(struct restart_block *restart_block)
1606 {
1607 	return -EINVAL;
1608 }
thread_cpu_clock_getres(const clockid_t which_clock,struct timespec * tp)1609 static int thread_cpu_clock_getres(const clockid_t which_clock,
1610 				   struct timespec *tp)
1611 {
1612 	return posix_cpu_clock_getres(THREAD_CLOCK, tp);
1613 }
thread_cpu_clock_get(const clockid_t which_clock,struct timespec * tp)1614 static int thread_cpu_clock_get(const clockid_t which_clock,
1615 				struct timespec *tp)
1616 {
1617 	return posix_cpu_clock_get(THREAD_CLOCK, tp);
1618 }
thread_cpu_timer_create(struct k_itimer * timer)1619 static int thread_cpu_timer_create(struct k_itimer *timer)
1620 {
1621 	timer->it_clock = THREAD_CLOCK;
1622 	return posix_cpu_timer_create(timer);
1623 }
1624 
1625 struct k_clock clock_posix_cpu = {
1626 	.clock_getres	= posix_cpu_clock_getres,
1627 	.clock_set	= posix_cpu_clock_set,
1628 	.clock_get	= posix_cpu_clock_get,
1629 	.timer_create	= posix_cpu_timer_create,
1630 	.nsleep		= posix_cpu_nsleep,
1631 	.nsleep_restart	= posix_cpu_nsleep_restart,
1632 	.timer_set	= posix_cpu_timer_set,
1633 	.timer_del	= posix_cpu_timer_del,
1634 	.timer_get	= posix_cpu_timer_get,
1635 };
1636 
init_posix_cpu_timers(void)1637 static __init int init_posix_cpu_timers(void)
1638 {
1639 	struct k_clock process = {
1640 		.clock_getres	= process_cpu_clock_getres,
1641 		.clock_get	= process_cpu_clock_get,
1642 		.timer_create	= process_cpu_timer_create,
1643 		.nsleep		= process_cpu_nsleep,
1644 		.nsleep_restart	= process_cpu_nsleep_restart,
1645 	};
1646 	struct k_clock thread = {
1647 		.clock_getres	= thread_cpu_clock_getres,
1648 		.clock_get	= thread_cpu_clock_get,
1649 		.timer_create	= thread_cpu_timer_create,
1650 	};
1651 	struct timespec ts;
1652 
1653 	posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
1654 	posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
1655 
1656 	cputime_to_timespec(cputime_one_jiffy, &ts);
1657 	onecputick = ts.tv_nsec;
1658 	WARN_ON(ts.tv_sec != 0);
1659 
1660 	return 0;
1661 }
1662 __initcall(init_posix_cpu_timers);
1663