• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <linux/cpufreq.h>
2 #include <linux/export.h>
3 #include <linux/sched.h>
4 #include <linux/tsacct_kern.h>
5 #include <linux/kernel_stat.h>
6 #include <linux/static_key.h>
7 #include <linux/context_tracking.h>
8 #include "sched.h"
9 
10 
11 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
12 
13 /*
14  * There are no locks covering percpu hardirq/softirq time.
15  * They are only modified in vtime_account, on corresponding CPU
16  * with interrupts disabled. So, writes are safe.
17  * They are read and saved off onto struct rq in update_rq_clock().
18  * This may result in other CPU reading this CPU's irq time and can
19  * race with irq/vtime_account on this CPU. We would either get old
20  * or new value with a side effect of accounting a slice of irq time to wrong
21  * task when irq is in progress while we read rq->clock. That is a worthy
22  * compromise in place of having locks on each irq in account_system_time.
23  */
24 DEFINE_PER_CPU(u64, cpu_hardirq_time);
25 DEFINE_PER_CPU(u64, cpu_softirq_time);
26 
27 static DEFINE_PER_CPU(u64, irq_start_time);
28 static int sched_clock_irqtime;
29 
enable_sched_clock_irqtime(void)30 void enable_sched_clock_irqtime(void)
31 {
32 	sched_clock_irqtime = 1;
33 }
34 
disable_sched_clock_irqtime(void)35 void disable_sched_clock_irqtime(void)
36 {
37 	sched_clock_irqtime = 0;
38 }
39 
40 #ifndef CONFIG_64BIT
41 DEFINE_PER_CPU(seqcount_t, irq_time_seq);
42 #endif /* CONFIG_64BIT */
43 
44 /*
45  * Called before incrementing preempt_count on {soft,}irq_enter
46  * and before decrementing preempt_count on {soft,}irq_exit.
47  */
irqtime_account_irq(struct task_struct * curr)48 void irqtime_account_irq(struct task_struct *curr)
49 {
50 	unsigned long flags;
51 	s64 delta;
52 	int cpu;
53 
54 	if (!sched_clock_irqtime)
55 		return;
56 
57 	local_irq_save(flags);
58 
59 	cpu = smp_processor_id();
60 	delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
61 	__this_cpu_add(irq_start_time, delta);
62 
63 	irq_time_write_begin();
64 	/*
65 	 * We do not account for softirq time from ksoftirqd here.
66 	 * We want to continue accounting softirq time to ksoftirqd thread
67 	 * in that case, so as not to confuse scheduler with a special task
68 	 * that do not consume any time, but still wants to run.
69 	 */
70 	if (hardirq_count())
71 		__this_cpu_add(cpu_hardirq_time, delta);
72 	else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
73 		__this_cpu_add(cpu_softirq_time, delta);
74 
75 	irq_time_write_end();
76 	local_irq_restore(flags);
77 }
78 EXPORT_SYMBOL_GPL(irqtime_account_irq);
79 
irqtime_account_hi_update(void)80 static int irqtime_account_hi_update(void)
81 {
82 	u64 *cpustat = kcpustat_this_cpu->cpustat;
83 	unsigned long flags;
84 	u64 latest_ns;
85 	int ret = 0;
86 
87 	local_irq_save(flags);
88 	latest_ns = this_cpu_read(cpu_hardirq_time);
89 	if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
90 		ret = 1;
91 	local_irq_restore(flags);
92 	return ret;
93 }
94 
irqtime_account_si_update(void)95 static int irqtime_account_si_update(void)
96 {
97 	u64 *cpustat = kcpustat_this_cpu->cpustat;
98 	unsigned long flags;
99 	u64 latest_ns;
100 	int ret = 0;
101 
102 	local_irq_save(flags);
103 	latest_ns = this_cpu_read(cpu_softirq_time);
104 	if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
105 		ret = 1;
106 	local_irq_restore(flags);
107 	return ret;
108 }
109 
110 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
111 
112 #define sched_clock_irqtime	(0)
113 
114 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
115 
task_group_account_field(struct task_struct * p,int index,u64 tmp)116 static inline void task_group_account_field(struct task_struct *p, int index,
117 					    u64 tmp)
118 {
119 	/*
120 	 * Since all updates are sure to touch the root cgroup, we
121 	 * get ourselves ahead and touch it first. If the root cgroup
122 	 * is the only cgroup, then nothing else should be necessary.
123 	 *
124 	 */
125 	__get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
126 
127 	cpuacct_account_field(p, index, tmp);
128 }
129 
130 /*
131  * Account user cpu time to a process.
132  * @p: the process that the cpu time gets accounted to
133  * @cputime: the cpu time spent in user space since the last update
134  * @cputime_scaled: cputime scaled by cpu frequency
135  */
account_user_time(struct task_struct * p,cputime_t cputime,cputime_t cputime_scaled)136 void account_user_time(struct task_struct *p, cputime_t cputime,
137 		       cputime_t cputime_scaled)
138 {
139 	int index;
140 
141 	/* Add user time to process. */
142 	p->utime += cputime;
143 	p->utimescaled += cputime_scaled;
144 	account_group_user_time(p, cputime);
145 
146 	index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
147 
148 	/* Add user time to cpustat. */
149 	task_group_account_field(p, index, (__force u64) cputime);
150 
151 	/* Account for user time used */
152 	acct_account_cputime(p);
153 
154 #ifdef CONFIG_CPU_FREQ_STAT
155 	/* Account power usage for user time */
156 	acct_update_power(p, cputime);
157 #endif
158 }
159 
160 /*
161  * Account guest cpu time to a process.
162  * @p: the process that the cpu time gets accounted to
163  * @cputime: the cpu time spent in virtual machine since the last update
164  * @cputime_scaled: cputime scaled by cpu frequency
165  */
account_guest_time(struct task_struct * p,cputime_t cputime,cputime_t cputime_scaled)166 static void account_guest_time(struct task_struct *p, cputime_t cputime,
167 			       cputime_t cputime_scaled)
168 {
169 	u64 *cpustat = kcpustat_this_cpu->cpustat;
170 
171 	/* Add guest time to process. */
172 	p->utime += cputime;
173 	p->utimescaled += cputime_scaled;
174 	account_group_user_time(p, cputime);
175 	p->gtime += cputime;
176 
177 	/* Add guest time to cpustat. */
178 	if (TASK_NICE(p) > 0) {
179 		cpustat[CPUTIME_NICE] += (__force u64) cputime;
180 		cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
181 	} else {
182 		cpustat[CPUTIME_USER] += (__force u64) cputime;
183 		cpustat[CPUTIME_GUEST] += (__force u64) cputime;
184 	}
185 }
186 
187 /*
188  * Account system cpu time to a process and desired cpustat field
189  * @p: the process that the cpu time gets accounted to
190  * @cputime: the cpu time spent in kernel space since the last update
191  * @cputime_scaled: cputime scaled by cpu frequency
192  * @target_cputime64: pointer to cpustat field that has to be updated
193  */
194 static inline
__account_system_time(struct task_struct * p,cputime_t cputime,cputime_t cputime_scaled,int index)195 void __account_system_time(struct task_struct *p, cputime_t cputime,
196 			cputime_t cputime_scaled, int index)
197 {
198 	/* Add system time to process. */
199 	p->stime += cputime;
200 	p->stimescaled += cputime_scaled;
201 	account_group_system_time(p, cputime);
202 
203 	/* Add system time to cpustat. */
204 	task_group_account_field(p, index, (__force u64) cputime);
205 
206 	/* Account for system time used */
207 	acct_account_cputime(p);
208 
209 #ifdef CONFIG_CPU_FREQ_STAT
210 	/* Account power usage for system time */
211 	acct_update_power(p, cputime);
212 #endif
213 }
214 
215 /*
216  * Account system cpu time to a process.
217  * @p: the process that the cpu time gets accounted to
218  * @hardirq_offset: the offset to subtract from hardirq_count()
219  * @cputime: the cpu time spent in kernel space since the last update
220  * @cputime_scaled: cputime scaled by cpu frequency
221  */
account_system_time(struct task_struct * p,int hardirq_offset,cputime_t cputime,cputime_t cputime_scaled)222 void account_system_time(struct task_struct *p, int hardirq_offset,
223 			 cputime_t cputime, cputime_t cputime_scaled)
224 {
225 	int index;
226 
227 	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
228 		account_guest_time(p, cputime, cputime_scaled);
229 		return;
230 	}
231 
232 	if (hardirq_count() - hardirq_offset)
233 		index = CPUTIME_IRQ;
234 	else if (in_serving_softirq())
235 		index = CPUTIME_SOFTIRQ;
236 	else
237 		index = CPUTIME_SYSTEM;
238 
239 	__account_system_time(p, cputime, cputime_scaled, index);
240 }
241 
242 /*
243  * Account for involuntary wait time.
244  * @cputime: the cpu time spent in involuntary wait
245  */
account_steal_time(cputime_t cputime)246 void account_steal_time(cputime_t cputime)
247 {
248 	u64 *cpustat = kcpustat_this_cpu->cpustat;
249 
250 	cpustat[CPUTIME_STEAL] += (__force u64) cputime;
251 }
252 
253 /*
254  * Account for idle time.
255  * @cputime: the cpu time spent in idle wait
256  */
account_idle_time(cputime_t cputime)257 void account_idle_time(cputime_t cputime)
258 {
259 	u64 *cpustat = kcpustat_this_cpu->cpustat;
260 	struct rq *rq = this_rq();
261 
262 	if (atomic_read(&rq->nr_iowait) > 0)
263 		cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
264 	else
265 		cpustat[CPUTIME_IDLE] += (__force u64) cputime;
266 }
267 
steal_account_process_tick(void)268 static __always_inline bool steal_account_process_tick(void)
269 {
270 #ifdef CONFIG_PARAVIRT
271 	if (static_key_false(&paravirt_steal_enabled)) {
272 		u64 steal, st = 0;
273 
274 		steal = paravirt_steal_clock(smp_processor_id());
275 		steal -= this_rq()->prev_steal_time;
276 
277 		st = steal_ticks(steal);
278 		this_rq()->prev_steal_time += st * TICK_NSEC;
279 
280 		account_steal_time(st);
281 		return st;
282 	}
283 #endif
284 	return false;
285 }
286 
287 /*
288  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
289  * tasks (sum on group iteration) belonging to @tsk's group.
290  */
thread_group_cputime(struct task_struct * tsk,struct task_cputime * times)291 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
292 {
293 	struct signal_struct *sig = tsk->signal;
294 	cputime_t utime, stime;
295 	struct task_struct *t;
296 
297 	times->utime = sig->utime;
298 	times->stime = sig->stime;
299 	times->sum_exec_runtime = sig->sum_sched_runtime;
300 
301 	rcu_read_lock();
302 	/* make sure we can trust tsk->thread_group list */
303 	if (!likely(pid_alive(tsk)))
304 		goto out;
305 
306 	t = tsk;
307 	do {
308 		task_cputime(t, &utime, &stime);
309 		times->utime += utime;
310 		times->stime += stime;
311 		times->sum_exec_runtime += task_sched_runtime(t);
312 	} while_each_thread(tsk, t);
313 out:
314 	rcu_read_unlock();
315 }
316 
317 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
318 /*
319  * Account a tick to a process and cpustat
320  * @p: the process that the cpu time gets accounted to
321  * @user_tick: is the tick from userspace
322  * @rq: the pointer to rq
323  *
324  * Tick demultiplexing follows the order
325  * - pending hardirq update
326  * - pending softirq update
327  * - user_time
328  * - idle_time
329  * - system time
330  *   - check for guest_time
331  *   - else account as system_time
332  *
333  * Check for hardirq is done both for system and user time as there is
334  * no timer going off while we are on hardirq and hence we may never get an
335  * opportunity to update it solely in system time.
336  * p->stime and friends are only updated on system time and not on irq
337  * softirq as those do not count in task exec_runtime any more.
338  */
irqtime_account_process_tick(struct task_struct * p,int user_tick,struct rq * rq)339 static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
340 						struct rq *rq)
341 {
342 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
343 	u64 *cpustat = kcpustat_this_cpu->cpustat;
344 
345 	if (steal_account_process_tick())
346 		return;
347 
348 	if (irqtime_account_hi_update()) {
349 		cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy;
350 	} else if (irqtime_account_si_update()) {
351 		cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy;
352 	} else if (this_cpu_ksoftirqd() == p) {
353 		/*
354 		 * ksoftirqd time do not get accounted in cpu_softirq_time.
355 		 * So, we have to handle it separately here.
356 		 * Also, p->stime needs to be updated for ksoftirqd.
357 		 */
358 		__account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
359 					CPUTIME_SOFTIRQ);
360 	} else if (user_tick) {
361 		account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
362 	} else if (p == rq->idle) {
363 		account_idle_time(cputime_one_jiffy);
364 	} else if (p->flags & PF_VCPU) { /* System time or guest time */
365 		account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
366 	} else {
367 		__account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
368 					CPUTIME_SYSTEM);
369 	}
370 }
371 
irqtime_account_idle_ticks(int ticks)372 static void irqtime_account_idle_ticks(int ticks)
373 {
374 	int i;
375 	struct rq *rq = this_rq();
376 
377 	for (i = 0; i < ticks; i++)
378 		irqtime_account_process_tick(current, 0, rq);
379 }
380 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
irqtime_account_idle_ticks(int ticks)381 static inline void irqtime_account_idle_ticks(int ticks) {}
irqtime_account_process_tick(struct task_struct * p,int user_tick,struct rq * rq)382 static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
383 						struct rq *rq) {}
384 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
385 
386 /*
387  * Use precise platform statistics if available:
388  */
389 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
390 
391 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
vtime_task_switch(struct task_struct * prev)392 void vtime_task_switch(struct task_struct *prev)
393 {
394 	if (!vtime_accounting_enabled())
395 		return;
396 
397 	if (is_idle_task(prev))
398 		vtime_account_idle(prev);
399 	else
400 		vtime_account_system(prev);
401 
402 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
403 	vtime_account_user(prev);
404 #endif
405 	arch_vtime_task_switch(prev);
406 }
407 #endif
408 
409 /*
410  * Archs that account the whole time spent in the idle task
411  * (outside irq) as idle time can rely on this and just implement
412  * vtime_account_system() and vtime_account_idle(). Archs that
413  * have other meaning of the idle time (s390 only includes the
414  * time spent by the CPU when it's in low power mode) must override
415  * vtime_account().
416  */
417 #ifndef __ARCH_HAS_VTIME_ACCOUNT
vtime_account_irq_enter(struct task_struct * tsk)418 void vtime_account_irq_enter(struct task_struct *tsk)
419 {
420 	if (!vtime_accounting_enabled())
421 		return;
422 
423 	if (!in_interrupt()) {
424 		/*
425 		 * If we interrupted user, context_tracking_in_user()
426 		 * is 1 because the context tracking don't hook
427 		 * on irq entry/exit. This way we know if
428 		 * we need to flush user time on kernel entry.
429 		 */
430 		if (context_tracking_in_user()) {
431 			vtime_account_user(tsk);
432 			return;
433 		}
434 
435 		if (is_idle_task(tsk)) {
436 			vtime_account_idle(tsk);
437 			return;
438 		}
439 	}
440 	vtime_account_system(tsk);
441 }
442 EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
443 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
444 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
445 
446 
447 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
task_cputime_adjusted(struct task_struct * p,cputime_t * ut,cputime_t * st)448 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
449 {
450 	*ut = p->utime;
451 	*st = p->stime;
452 }
453 
thread_group_cputime_adjusted(struct task_struct * p,cputime_t * ut,cputime_t * st)454 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
455 {
456 	struct task_cputime cputime;
457 
458 	thread_group_cputime(p, &cputime);
459 
460 	*ut = cputime.utime;
461 	*st = cputime.stime;
462 }
463 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
464 /*
465  * Account a single tick of cpu time.
466  * @p: the process that the cpu time gets accounted to
467  * @user_tick: indicates if the tick is a user or a system tick
468  */
account_process_tick(struct task_struct * p,int user_tick)469 void account_process_tick(struct task_struct *p, int user_tick)
470 {
471 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
472 	struct rq *rq = this_rq();
473 
474 	if (vtime_accounting_enabled())
475 		return;
476 
477 	if (sched_clock_irqtime) {
478 		irqtime_account_process_tick(p, user_tick, rq);
479 		return;
480 	}
481 
482 	if (steal_account_process_tick())
483 		return;
484 
485 	if (user_tick)
486 		account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
487 	else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
488 		account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
489 				    one_jiffy_scaled);
490 	else
491 		account_idle_time(cputime_one_jiffy);
492 }
493 
494 /*
495  * Account multiple ticks of steal time.
496  * @p: the process from which the cpu time has been stolen
497  * @ticks: number of stolen ticks
498  */
account_steal_ticks(unsigned long ticks)499 void account_steal_ticks(unsigned long ticks)
500 {
501 	account_steal_time(jiffies_to_cputime(ticks));
502 }
503 
504 /*
505  * Account multiple ticks of idle time.
506  * @ticks: number of stolen ticks
507  */
account_idle_ticks(unsigned long ticks)508 void account_idle_ticks(unsigned long ticks)
509 {
510 
511 	if (sched_clock_irqtime) {
512 		irqtime_account_idle_ticks(ticks);
513 		return;
514 	}
515 
516 	account_idle_time(jiffies_to_cputime(ticks));
517 }
518 
519 /*
520  * Perform (stime * rtime) / total, but avoid multiplication overflow by
521  * loosing precision when the numbers are big.
522  */
scale_stime(u64 stime,u64 rtime,u64 total)523 static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
524 {
525 	u64 scaled;
526 
527 	for (;;) {
528 		/* Make sure "rtime" is the bigger of stime/rtime */
529 		if (stime > rtime) {
530 			u64 tmp = rtime; rtime = stime; stime = tmp;
531 		}
532 
533 		/* Make sure 'total' fits in 32 bits */
534 		if (total >> 32)
535 			goto drop_precision;
536 
537 		/* Does rtime (and thus stime) fit in 32 bits? */
538 		if (!(rtime >> 32))
539 			break;
540 
541 		/* Can we just balance rtime/stime rather than dropping bits? */
542 		if (stime >> 31)
543 			goto drop_precision;
544 
545 		/* We can grow stime and shrink rtime and try to make them both fit */
546 		stime <<= 1;
547 		rtime >>= 1;
548 		continue;
549 
550 drop_precision:
551 		/* We drop from rtime, it has more bits than stime */
552 		rtime >>= 1;
553 		total >>= 1;
554 	}
555 
556 	/*
557 	 * Make sure gcc understands that this is a 32x32->64 multiply,
558 	 * followed by a 64/32->64 divide.
559 	 */
560 	scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
561 	return (__force cputime_t) scaled;
562 }
563 
564 /*
565  * Adjust tick based cputime random precision against scheduler
566  * runtime accounting.
567  */
cputime_adjust(struct task_cputime * curr,struct cputime * prev,cputime_t * ut,cputime_t * st)568 static void cputime_adjust(struct task_cputime *curr,
569 			   struct cputime *prev,
570 			   cputime_t *ut, cputime_t *st)
571 {
572 	cputime_t rtime, stime, utime, total;
573 
574 	if (vtime_accounting_enabled()) {
575 		*ut = curr->utime;
576 		*st = curr->stime;
577 		return;
578 	}
579 
580 	stime = curr->stime;
581 	total = stime + curr->utime;
582 
583 	/*
584 	 * Tick based cputime accounting depend on random scheduling
585 	 * timeslices of a task to be interrupted or not by the timer.
586 	 * Depending on these circumstances, the number of these interrupts
587 	 * may be over or under-optimistic, matching the real user and system
588 	 * cputime with a variable precision.
589 	 *
590 	 * Fix this by scaling these tick based values against the total
591 	 * runtime accounted by the CFS scheduler.
592 	 */
593 	rtime = nsecs_to_cputime(curr->sum_exec_runtime);
594 
595 	/*
596 	 * Update userspace visible utime/stime values only if actual execution
597 	 * time is bigger than already exported. Note that can happen, that we
598 	 * provided bigger values due to scaling inaccuracy on big numbers.
599 	 */
600 	if (prev->stime + prev->utime >= rtime)
601 		goto out;
602 
603 	if (total) {
604 		stime = scale_stime((__force u64)stime,
605 				    (__force u64)rtime, (__force u64)total);
606 		utime = rtime - stime;
607 	} else {
608 		stime = rtime;
609 		utime = 0;
610 	}
611 
612 	/*
613 	 * If the tick based count grows faster than the scheduler one,
614 	 * the result of the scaling may go backward.
615 	 * Let's enforce monotonicity.
616 	 */
617 	prev->stime = max(prev->stime, stime);
618 	prev->utime = max(prev->utime, utime);
619 
620 out:
621 	*ut = prev->utime;
622 	*st = prev->stime;
623 }
624 
task_cputime_adjusted(struct task_struct * p,cputime_t * ut,cputime_t * st)625 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
626 {
627 	struct task_cputime cputime = {
628 		.sum_exec_runtime = p->se.sum_exec_runtime,
629 	};
630 
631 	task_cputime(p, &cputime.utime, &cputime.stime);
632 	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
633 }
634 
635 /*
636  * Must be called with siglock held.
637  */
thread_group_cputime_adjusted(struct task_struct * p,cputime_t * ut,cputime_t * st)638 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
639 {
640 	struct task_cputime cputime;
641 
642 	thread_group_cputime(p, &cputime);
643 	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
644 }
645 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
646 
647 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
vtime_delta(struct task_struct * tsk)648 static unsigned long long vtime_delta(struct task_struct *tsk)
649 {
650 	unsigned long long clock;
651 
652 	clock = local_clock();
653 	if (clock < tsk->vtime_snap)
654 		return 0;
655 
656 	return clock - tsk->vtime_snap;
657 }
658 
get_vtime_delta(struct task_struct * tsk)659 static cputime_t get_vtime_delta(struct task_struct *tsk)
660 {
661 	unsigned long long delta = vtime_delta(tsk);
662 
663 	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
664 	tsk->vtime_snap += delta;
665 
666 	/* CHECKME: always safe to convert nsecs to cputime? */
667 	return nsecs_to_cputime(delta);
668 }
669 
__vtime_account_system(struct task_struct * tsk)670 static void __vtime_account_system(struct task_struct *tsk)
671 {
672 	cputime_t delta_cpu = get_vtime_delta(tsk);
673 
674 	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
675 }
676 
vtime_account_system(struct task_struct * tsk)677 void vtime_account_system(struct task_struct *tsk)
678 {
679 	if (!vtime_accounting_enabled())
680 		return;
681 
682 	write_seqlock(&tsk->vtime_seqlock);
683 	__vtime_account_system(tsk);
684 	write_sequnlock(&tsk->vtime_seqlock);
685 }
686 
vtime_account_irq_exit(struct task_struct * tsk)687 void vtime_account_irq_exit(struct task_struct *tsk)
688 {
689 	if (!vtime_accounting_enabled())
690 		return;
691 
692 	write_seqlock(&tsk->vtime_seqlock);
693 	if (context_tracking_in_user())
694 		tsk->vtime_snap_whence = VTIME_USER;
695 	__vtime_account_system(tsk);
696 	write_sequnlock(&tsk->vtime_seqlock);
697 }
698 
vtime_account_user(struct task_struct * tsk)699 void vtime_account_user(struct task_struct *tsk)
700 {
701 	cputime_t delta_cpu;
702 
703 	if (!vtime_accounting_enabled())
704 		return;
705 
706 	delta_cpu = get_vtime_delta(tsk);
707 
708 	write_seqlock(&tsk->vtime_seqlock);
709 	tsk->vtime_snap_whence = VTIME_SYS;
710 	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
711 	write_sequnlock(&tsk->vtime_seqlock);
712 }
713 
vtime_user_enter(struct task_struct * tsk)714 void vtime_user_enter(struct task_struct *tsk)
715 {
716 	if (!vtime_accounting_enabled())
717 		return;
718 
719 	write_seqlock(&tsk->vtime_seqlock);
720 	tsk->vtime_snap_whence = VTIME_USER;
721 	__vtime_account_system(tsk);
722 	write_sequnlock(&tsk->vtime_seqlock);
723 }
724 
vtime_guest_enter(struct task_struct * tsk)725 void vtime_guest_enter(struct task_struct *tsk)
726 {
727 	write_seqlock(&tsk->vtime_seqlock);
728 	__vtime_account_system(tsk);
729 	current->flags |= PF_VCPU;
730 	write_sequnlock(&tsk->vtime_seqlock);
731 }
732 
vtime_guest_exit(struct task_struct * tsk)733 void vtime_guest_exit(struct task_struct *tsk)
734 {
735 	write_seqlock(&tsk->vtime_seqlock);
736 	__vtime_account_system(tsk);
737 	current->flags &= ~PF_VCPU;
738 	write_sequnlock(&tsk->vtime_seqlock);
739 }
740 
vtime_account_idle(struct task_struct * tsk)741 void vtime_account_idle(struct task_struct *tsk)
742 {
743 	cputime_t delta_cpu = get_vtime_delta(tsk);
744 
745 	account_idle_time(delta_cpu);
746 }
747 
vtime_accounting_enabled(void)748 bool vtime_accounting_enabled(void)
749 {
750 	return context_tracking_active();
751 }
752 
arch_vtime_task_switch(struct task_struct * prev)753 void arch_vtime_task_switch(struct task_struct *prev)
754 {
755 	write_seqlock(&prev->vtime_seqlock);
756 	prev->vtime_snap_whence = VTIME_SLEEPING;
757 	write_sequnlock(&prev->vtime_seqlock);
758 
759 	write_seqlock(&current->vtime_seqlock);
760 	current->vtime_snap_whence = VTIME_SYS;
761 	current->vtime_snap = sched_clock_cpu(smp_processor_id());
762 	write_sequnlock(&current->vtime_seqlock);
763 }
764 
vtime_init_idle(struct task_struct * t,int cpu)765 void vtime_init_idle(struct task_struct *t, int cpu)
766 {
767 	unsigned long flags;
768 
769 	write_seqlock_irqsave(&t->vtime_seqlock, flags);
770 	t->vtime_snap_whence = VTIME_SYS;
771 	t->vtime_snap = sched_clock_cpu(cpu);
772 	write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
773 }
774 
task_gtime(struct task_struct * t)775 cputime_t task_gtime(struct task_struct *t)
776 {
777 	unsigned int seq;
778 	cputime_t gtime;
779 
780 	do {
781 		seq = read_seqbegin(&t->vtime_seqlock);
782 
783 		gtime = t->gtime;
784 		if (t->flags & PF_VCPU)
785 			gtime += vtime_delta(t);
786 
787 	} while (read_seqretry(&t->vtime_seqlock, seq));
788 
789 	return gtime;
790 }
791 
792 /*
793  * Fetch cputime raw values from fields of task_struct and
794  * add up the pending nohz execution time since the last
795  * cputime snapshot.
796  */
797 static void
fetch_task_cputime(struct task_struct * t,cputime_t * u_dst,cputime_t * s_dst,cputime_t * u_src,cputime_t * s_src,cputime_t * udelta,cputime_t * sdelta)798 fetch_task_cputime(struct task_struct *t,
799 		   cputime_t *u_dst, cputime_t *s_dst,
800 		   cputime_t *u_src, cputime_t *s_src,
801 		   cputime_t *udelta, cputime_t *sdelta)
802 {
803 	unsigned int seq;
804 	unsigned long long delta;
805 
806 	do {
807 		*udelta = 0;
808 		*sdelta = 0;
809 
810 		seq = read_seqbegin(&t->vtime_seqlock);
811 
812 		if (u_dst)
813 			*u_dst = *u_src;
814 		if (s_dst)
815 			*s_dst = *s_src;
816 
817 		/* Task is sleeping, nothing to add */
818 		if (t->vtime_snap_whence == VTIME_SLEEPING ||
819 		    is_idle_task(t))
820 			continue;
821 
822 		delta = vtime_delta(t);
823 
824 		/*
825 		 * Task runs either in user or kernel space, add pending nohz time to
826 		 * the right place.
827 		 */
828 		if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
829 			*udelta = delta;
830 		} else {
831 			if (t->vtime_snap_whence == VTIME_SYS)
832 				*sdelta = delta;
833 		}
834 	} while (read_seqretry(&t->vtime_seqlock, seq));
835 }
836 
837 
task_cputime(struct task_struct * t,cputime_t * utime,cputime_t * stime)838 void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
839 {
840 	cputime_t udelta, sdelta;
841 
842 	fetch_task_cputime(t, utime, stime, &t->utime,
843 			   &t->stime, &udelta, &sdelta);
844 	if (utime)
845 		*utime += udelta;
846 	if (stime)
847 		*stime += sdelta;
848 }
849 
task_cputime_scaled(struct task_struct * t,cputime_t * utimescaled,cputime_t * stimescaled)850 void task_cputime_scaled(struct task_struct *t,
851 			 cputime_t *utimescaled, cputime_t *stimescaled)
852 {
853 	cputime_t udelta, sdelta;
854 
855 	fetch_task_cputime(t, utimescaled, stimescaled,
856 			   &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
857 	if (utimescaled)
858 		*utimescaled += cputime_to_scaled(udelta);
859 	if (stimescaled)
860 		*stimescaled += cputime_to_scaled(sdelta);
861 }
862 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
863