1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_SCHED_H
3 #define _LINUX_SCHED_H
4
5 /*
6 * Define 'struct task_struct' and provide the main scheduler
7 * APIs (schedule(), wakeup variants, etc.)
8 */
9
10 #include <uapi/linux/sched.h>
11
12 #include <asm/current.h>
13 #include <asm/processor.h>
14 #include <linux/thread_info.h>
15 #include <linux/preempt.h>
16 #include <linux/cpumask_types.h>
17
18 #include <linux/cache.h>
19 #include <linux/irqflags_types.h>
20 #include <linux/smp_types.h>
21 #include <linux/pid_types.h>
22 #include <linux/sem_types.h>
23 #include <linux/shm.h>
24 #include <linux/kmsan_types.h>
25 #include <linux/mutex_types.h>
26 #include <linux/plist_types.h>
27 #include <linux/hrtimer_types.h>
28 #include <linux/timer_types.h>
29 #include <linux/seccomp_types.h>
30 #include <linux/nodemask_types.h>
31 #include <linux/refcount_types.h>
32 #include <linux/resource.h>
33 #include <linux/latencytop.h>
34 #include <linux/sched/prio.h>
35 #include <linux/sched/types.h>
36 #include <linux/signal_types.h>
37 #include <linux/spinlock.h>
38 #include <linux/syscall_user_dispatch_types.h>
39 #include <linux/mm_types_task.h>
40 #include <linux/netdevice_xmit.h>
41 #include <linux/task_io_accounting.h>
42 #include <linux/posix-timers_types.h>
43 #include <linux/restart_block.h>
44 #include <uapi/linux/rseq.h>
45 #include <linux/seqlock_types.h>
46 #include <linux/kcsan.h>
47 #include <linux/rv.h>
48 #include <linux/livepatch_sched.h>
49 #include <linux/uidgid_types.h>
50 #include <linux/android_vendor.h>
51 #include <linux/android_kabi.h>
52 #include <asm/kmap_size.h>
53
54 /* task_struct member predeclarations (sorted alphabetically): */
55 struct audit_context;
56 struct bio_list;
57 struct blk_plug;
58 struct bpf_local_storage;
59 struct bpf_run_ctx;
60 struct bpf_net_context;
61 struct capture_control;
62 struct cfs_rq;
63 struct fs_struct;
64 struct futex_pi_state;
65 struct io_context;
66 struct io_uring_task;
67 struct mempolicy;
68 struct nameidata;
69 struct nsproxy;
70 struct perf_event_context;
71 struct pid_namespace;
72 struct pipe_inode_info;
73 struct rcu_node;
74 struct reclaim_state;
75 struct robust_list_head;
76 struct root_domain;
77 struct rq;
78 struct sched_attr;
79 struct sched_dl_entity;
80 struct seq_file;
81 struct sighand_struct;
82 struct signal_struct;
83 struct task_delay_info;
84 struct task_group;
85 struct task_struct;
86 struct user_event_mm;
87
88 #include <linux/sched/ext.h>
89
90 /*
91 * Task state bitmask. NOTE! These bits are also
92 * encoded in fs/proc/array.c: get_task_state().
93 *
94 * We have two separate sets of flags: task->__state
95 * is about runnability, while task->exit_state are
96 * about the task exiting. Confusing, but this way
97 * modifying one set can't modify the other one by
98 * mistake.
99 */
100
101 /* Used in tsk->__state: */
102 #define TASK_RUNNING 0x00000000
103 #define TASK_INTERRUPTIBLE 0x00000001
104 #define TASK_UNINTERRUPTIBLE 0x00000002
105 #define __TASK_STOPPED 0x00000004
106 #define __TASK_TRACED 0x00000008
107 /* Used in tsk->exit_state: */
108 #define EXIT_DEAD 0x00000010
109 #define EXIT_ZOMBIE 0x00000020
110 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
111 /* Used in tsk->__state again: */
112 #define TASK_PARKED 0x00000040
113 #define TASK_DEAD 0x00000080
114 #define TASK_WAKEKILL 0x00000100
115 #define TASK_WAKING 0x00000200
116 #define TASK_NOLOAD 0x00000400
117 #define TASK_NEW 0x00000800
118 #define TASK_RTLOCK_WAIT 0x00001000
119 #define TASK_FREEZABLE 0x00002000
120 #define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP))
121 #define TASK_FROZEN 0x00008000
122 #define TASK_STATE_MAX 0x00010000
123
124 #define TASK_ANY (TASK_STATE_MAX-1)
125
126 /*
127 * DO NOT ADD ANY NEW USERS !
128 */
129 #define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE | __TASK_FREEZABLE_UNSAFE)
130
131 /* Convenience macros for the sake of set_current_state: */
132 #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
133 #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
134 #define TASK_TRACED __TASK_TRACED
135
136 #define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
137
138 /* Convenience macros for the sake of wake_up(): */
139 #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
140
141 /* get_task_state(): */
142 #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \
143 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
144 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
145 TASK_PARKED)
146
147 #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
148
149 #define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
150 #define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0)
151 #define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0)
152
153 /*
154 * Special states are those that do not use the normal wait-loop pattern. See
155 * the comment with set_special_state().
156 */
157 #define is_special_task_state(state) \
158 ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | \
159 TASK_DEAD | TASK_FROZEN))
160
161 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
162 # define debug_normal_state_change(state_value) \
163 do { \
164 WARN_ON_ONCE(is_special_task_state(state_value)); \
165 current->task_state_change = _THIS_IP_; \
166 } while (0)
167
168 # define debug_special_state_change(state_value) \
169 do { \
170 WARN_ON_ONCE(!is_special_task_state(state_value)); \
171 current->task_state_change = _THIS_IP_; \
172 } while (0)
173
174 # define debug_rtlock_wait_set_state() \
175 do { \
176 current->saved_state_change = current->task_state_change;\
177 current->task_state_change = _THIS_IP_; \
178 } while (0)
179
180 # define debug_rtlock_wait_restore_state() \
181 do { \
182 current->task_state_change = current->saved_state_change;\
183 } while (0)
184
185 #else
186 # define debug_normal_state_change(cond) do { } while (0)
187 # define debug_special_state_change(cond) do { } while (0)
188 # define debug_rtlock_wait_set_state() do { } while (0)
189 # define debug_rtlock_wait_restore_state() do { } while (0)
190 #endif
191
192 /*
193 * set_current_state() includes a barrier so that the write of current->__state
194 * is correctly serialised wrt the caller's subsequent test of whether to
195 * actually sleep:
196 *
197 * for (;;) {
198 * set_current_state(TASK_UNINTERRUPTIBLE);
199 * if (CONDITION)
200 * break;
201 *
202 * schedule();
203 * }
204 * __set_current_state(TASK_RUNNING);
205 *
206 * If the caller does not need such serialisation (because, for instance, the
207 * CONDITION test and condition change and wakeup are under the same lock) then
208 * use __set_current_state().
209 *
210 * The above is typically ordered against the wakeup, which does:
211 *
212 * CONDITION = 1;
213 * wake_up_state(p, TASK_UNINTERRUPTIBLE);
214 *
215 * where wake_up_state()/try_to_wake_up() executes a full memory barrier before
216 * accessing p->__state.
217 *
218 * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is,
219 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
220 * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
221 *
222 * However, with slightly different timing the wakeup TASK_RUNNING store can
223 * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not
224 * a problem either because that will result in one extra go around the loop
225 * and our @cond test will save the day.
226 *
227 * Also see the comments of try_to_wake_up().
228 */
229 #define __set_current_state(state_value) \
230 do { \
231 debug_normal_state_change((state_value)); \
232 WRITE_ONCE(current->__state, (state_value)); \
233 } while (0)
234
235 #define set_current_state(state_value) \
236 do { \
237 debug_normal_state_change((state_value)); \
238 smp_store_mb(current->__state, (state_value)); \
239 } while (0)
240
241 /*
242 * set_special_state() should be used for those states when the blocking task
243 * can not use the regular condition based wait-loop. In that case we must
244 * serialize against wakeups such that any possible in-flight TASK_RUNNING
245 * stores will not collide with our state change.
246 */
247 #define set_special_state(state_value) \
248 do { \
249 unsigned long flags; /* may shadow */ \
250 \
251 raw_spin_lock_irqsave(¤t->pi_lock, flags); \
252 debug_special_state_change((state_value)); \
253 WRITE_ONCE(current->__state, (state_value)); \
254 raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \
255 } while (0)
256
257 /*
258 * PREEMPT_RT specific variants for "sleeping" spin/rwlocks
259 *
260 * RT's spin/rwlock substitutions are state preserving. The state of the
261 * task when blocking on the lock is saved in task_struct::saved_state and
262 * restored after the lock has been acquired. These operations are
263 * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT
264 * lock related wakeups while the task is blocked on the lock are
265 * redirected to operate on task_struct::saved_state to ensure that these
266 * are not dropped. On restore task_struct::saved_state is set to
267 * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail.
268 *
269 * The lock operation looks like this:
270 *
271 * current_save_and_set_rtlock_wait_state();
272 * for (;;) {
273 * if (try_lock())
274 * break;
275 * raw_spin_unlock_irq(&lock->wait_lock);
276 * schedule_rtlock();
277 * raw_spin_lock_irq(&lock->wait_lock);
278 * set_current_state(TASK_RTLOCK_WAIT);
279 * }
280 * current_restore_rtlock_saved_state();
281 */
282 #define current_save_and_set_rtlock_wait_state() \
283 do { \
284 lockdep_assert_irqs_disabled(); \
285 raw_spin_lock(¤t->pi_lock); \
286 current->saved_state = current->__state; \
287 debug_rtlock_wait_set_state(); \
288 WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \
289 raw_spin_unlock(¤t->pi_lock); \
290 } while (0);
291
292 #define current_restore_rtlock_saved_state() \
293 do { \
294 lockdep_assert_irqs_disabled(); \
295 raw_spin_lock(¤t->pi_lock); \
296 debug_rtlock_wait_restore_state(); \
297 WRITE_ONCE(current->__state, current->saved_state); \
298 current->saved_state = TASK_RUNNING; \
299 raw_spin_unlock(¤t->pi_lock); \
300 } while (0);
301
302 #define get_current_state() READ_ONCE(current->__state)
303
304 /*
305 * Define the task command name length as enum, then it can be visible to
306 * BPF programs.
307 */
308 enum {
309 TASK_COMM_LEN = 16,
310 };
311
312 extern void sched_tick(void);
313
314 #define MAX_SCHEDULE_TIMEOUT LONG_MAX
315
316 extern long schedule_timeout(long timeout);
317 extern long schedule_timeout_interruptible(long timeout);
318 extern long schedule_timeout_killable(long timeout);
319 extern long schedule_timeout_uninterruptible(long timeout);
320 extern long schedule_timeout_idle(long timeout);
321 asmlinkage void schedule(void);
322 extern void schedule_preempt_disabled(void);
323 asmlinkage void preempt_schedule_irq(void);
324 #ifdef CONFIG_PREEMPT_RT
325 extern void schedule_rtlock(void);
326 #endif
327
328 extern int __must_check io_schedule_prepare(void);
329 extern void io_schedule_finish(int token);
330 extern long io_schedule_timeout(long timeout);
331 extern void io_schedule(void);
332 extern int select_fallback_rq(int cpu, struct task_struct *p);
333 extern struct task_struct *pick_task(struct rq *rq);
334
335 /**
336 * struct prev_cputime - snapshot of system and user cputime
337 * @utime: time spent in user mode
338 * @stime: time spent in system mode
339 * @lock: protects the above two fields
340 *
341 * Stores previous user/system time values such that we can guarantee
342 * monotonicity.
343 */
344 struct prev_cputime {
345 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
346 u64 utime;
347 u64 stime;
348 raw_spinlock_t lock;
349 #endif
350 };
351
352 enum vtime_state {
353 /* Task is sleeping or running in a CPU with VTIME inactive: */
354 VTIME_INACTIVE = 0,
355 /* Task is idle */
356 VTIME_IDLE,
357 /* Task runs in kernelspace in a CPU with VTIME active: */
358 VTIME_SYS,
359 /* Task runs in userspace in a CPU with VTIME active: */
360 VTIME_USER,
361 /* Task runs as guests in a CPU with VTIME active: */
362 VTIME_GUEST,
363 };
364
365 struct vtime {
366 seqcount_t seqcount;
367 unsigned long long starttime;
368 enum vtime_state state;
369 unsigned int cpu;
370 u64 utime;
371 u64 stime;
372 u64 gtime;
373 };
374
375 /*
376 * Utilization clamp constraints.
377 * @UCLAMP_MIN: Minimum utilization
378 * @UCLAMP_MAX: Maximum utilization
379 * @UCLAMP_CNT: Utilization clamp constraints count
380 */
381 enum uclamp_id {
382 UCLAMP_MIN = 0,
383 UCLAMP_MAX,
384 UCLAMP_CNT
385 };
386
387 #ifdef CONFIG_SMP
388 extern struct root_domain def_root_domain;
389 extern struct mutex sched_domains_mutex;
390 #endif
391
392 struct sched_param {
393 int sched_priority;
394 };
395
396 struct sched_info {
397 #ifdef CONFIG_SCHED_INFO
398 /* Cumulative counters: */
399
400 /* # of times we have run on this CPU: */
401 unsigned long pcount;
402
403 /* Time spent waiting on a runqueue: */
404 unsigned long long run_delay;
405
406 /* Timestamps: */
407
408 /* When did we last run on a CPU? */
409 unsigned long long last_arrival;
410
411 /* When were we last queued to run? */
412 unsigned long long last_queued;
413
414 #endif /* CONFIG_SCHED_INFO */
415 };
416
417 /*
418 * Integer metrics need fixed point arithmetic, e.g., sched/fair
419 * has a few: load, load_avg, util_avg, freq, and capacity.
420 *
421 * We define a basic fixed point arithmetic range, and then formalize
422 * all these metrics based on that basic range.
423 */
424 # define SCHED_FIXEDPOINT_SHIFT 10
425 # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
426
427 /* Increase resolution of cpu_capacity calculations */
428 # define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
429 # define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
430
431 struct load_weight {
432 unsigned long weight;
433 u32 inv_weight;
434 };
435
436 /*
437 * The load/runnable/util_avg accumulates an infinite geometric series
438 * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c).
439 *
440 * [load_avg definition]
441 *
442 * load_avg = runnable% * scale_load_down(load)
443 *
444 * [runnable_avg definition]
445 *
446 * runnable_avg = runnable% * SCHED_CAPACITY_SCALE
447 *
448 * [util_avg definition]
449 *
450 * util_avg = running% * SCHED_CAPACITY_SCALE
451 *
452 * where runnable% is the time ratio that a sched_entity is runnable and
453 * running% the time ratio that a sched_entity is running.
454 *
455 * For cfs_rq, they are the aggregated values of all runnable and blocked
456 * sched_entities.
457 *
458 * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU
459 * capacity scaling. The scaling is done through the rq_clock_pelt that is used
460 * for computing those signals (see update_rq_clock_pelt())
461 *
462 * N.B., the above ratios (runnable% and running%) themselves are in the
463 * range of [0, 1]. To do fixed point arithmetics, we therefore scale them
464 * to as large a range as necessary. This is for example reflected by
465 * util_avg's SCHED_CAPACITY_SCALE.
466 *
467 * [Overflow issue]
468 *
469 * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
470 * with the highest load (=88761), always runnable on a single cfs_rq,
471 * and should not overflow as the number already hits PID_MAX_LIMIT.
472 *
473 * For all other cases (including 32-bit kernels), struct load_weight's
474 * weight will overflow first before we do, because:
475 *
476 * Max(load_avg) <= Max(load.weight)
477 *
478 * Then it is the load_weight's responsibility to consider overflow
479 * issues.
480 */
481 struct sched_avg {
482 u64 last_update_time;
483 u64 load_sum;
484 u64 runnable_sum;
485 u32 util_sum;
486 u32 period_contrib;
487 unsigned long load_avg;
488 unsigned long runnable_avg;
489 unsigned long util_avg;
490 unsigned int util_est;
491 } ____cacheline_aligned;
492
493 /*
494 * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
495 * updates. When a task is dequeued, its util_est should not be updated if its
496 * util_avg has not been updated in the meantime.
497 * This information is mapped into the MSB bit of util_est at dequeue time.
498 * Since max value of util_est for a task is 1024 (PELT util_avg for a task)
499 * it is safe to use MSB.
500 */
501 #define UTIL_EST_WEIGHT_SHIFT 2
502 #define UTIL_AVG_UNCHANGED 0x80000000
503
504 struct sched_statistics {
505 #ifdef CONFIG_SCHEDSTATS
506 u64 wait_start;
507 u64 wait_max;
508 u64 wait_count;
509 u64 wait_sum;
510 u64 iowait_count;
511 u64 iowait_sum;
512
513 u64 sleep_start;
514 u64 sleep_max;
515 s64 sum_sleep_runtime;
516
517 u64 block_start;
518 u64 block_max;
519 s64 sum_block_runtime;
520
521 s64 exec_max;
522 u64 slice_max;
523
524 u64 nr_migrations_cold;
525 u64 nr_failed_migrations_affine;
526 u64 nr_failed_migrations_running;
527 u64 nr_failed_migrations_hot;
528 u64 nr_forced_migrations;
529
530 u64 nr_wakeups;
531 u64 nr_wakeups_sync;
532 u64 nr_wakeups_migrate;
533 u64 nr_wakeups_local;
534 u64 nr_wakeups_remote;
535 u64 nr_wakeups_affine;
536 u64 nr_wakeups_affine_attempts;
537 u64 nr_wakeups_passive;
538 u64 nr_wakeups_idle;
539
540 #ifdef CONFIG_SCHED_CORE
541 u64 core_forceidle_sum;
542 #endif
543 #endif /* CONFIG_SCHEDSTATS */
544 } ____cacheline_aligned;
545
546 struct sched_entity {
547 /* For load-balancing: */
548 struct load_weight load;
549 struct rb_node run_node;
550 u64 deadline;
551 u64 min_vruntime;
552 u64 min_slice;
553
554 struct list_head group_node;
555 unsigned char on_rq;
556 unsigned char sched_delayed;
557 unsigned char rel_deadline;
558 unsigned char custom_slice;
559 /* hole */
560
561 u64 exec_start;
562 u64 sum_exec_runtime;
563 u64 prev_sum_exec_runtime;
564 u64 vruntime;
565 s64 vlag;
566 u64 slice;
567
568 u64 nr_migrations;
569
570 #ifdef CONFIG_FAIR_GROUP_SCHED
571 int depth;
572 struct sched_entity *parent;
573 /* rq on which this entity is (to be) queued: */
574 struct cfs_rq *cfs_rq;
575 /* rq "owned" by this entity/group: */
576 struct cfs_rq *my_q;
577 /* cached value of my_q->h_nr_running */
578 unsigned long runnable_weight;
579 #endif
580
581 #ifdef CONFIG_SMP
582 /*
583 * Per entity load average tracking.
584 *
585 * Put into separate cache line so it does not
586 * collide with read-mostly values above.
587 */
588 struct sched_avg avg;
589 #endif
590
591 ANDROID_KABI_RESERVE(1);
592 ANDROID_KABI_RESERVE(2);
593 ANDROID_KABI_RESERVE(3);
594 ANDROID_KABI_RESERVE(4);
595 };
596
597 struct sched_rt_entity {
598 struct list_head run_list;
599 unsigned long timeout;
600 unsigned long watchdog_stamp;
601 unsigned int time_slice;
602 unsigned short on_rq;
603 unsigned short on_list;
604
605 struct sched_rt_entity *back;
606 #ifdef CONFIG_RT_GROUP_SCHED
607 struct sched_rt_entity *parent;
608 /* rq on which this entity is (to be) queued: */
609 struct rt_rq *rt_rq;
610 /* rq "owned" by this entity/group: */
611 struct rt_rq *my_q;
612 #endif
613
614 ANDROID_KABI_RESERVE(1);
615 ANDROID_KABI_RESERVE(2);
616 ANDROID_KABI_RESERVE(3);
617 ANDROID_KABI_RESERVE(4);
618 } __randomize_layout;
619
620 typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *);
621 typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *);
622
623 struct sched_dl_entity {
624 struct rb_node rb_node;
625
626 /*
627 * Original scheduling parameters. Copied here from sched_attr
628 * during sched_setattr(), they will remain the same until
629 * the next sched_setattr().
630 */
631 u64 dl_runtime; /* Maximum runtime for each instance */
632 u64 dl_deadline; /* Relative deadline of each instance */
633 u64 dl_period; /* Separation of two instances (period) */
634 u64 dl_bw; /* dl_runtime / dl_period */
635 u64 dl_density; /* dl_runtime / dl_deadline */
636
637 /*
638 * Actual scheduling parameters. Initialized with the values above,
639 * they are continuously updated during task execution. Note that
640 * the remaining runtime could be < 0 in case we are in overrun.
641 */
642 s64 runtime; /* Remaining runtime for this instance */
643 u64 deadline; /* Absolute deadline for this instance */
644 unsigned int flags; /* Specifying the scheduler behaviour */
645
646 /*
647 * Some bool flags:
648 *
649 * @dl_throttled tells if we exhausted the runtime. If so, the
650 * task has to wait for a replenishment to be performed at the
651 * next firing of dl_timer.
652 *
653 * @dl_yielded tells if task gave up the CPU before consuming
654 * all its available runtime during the last job.
655 *
656 * @dl_non_contending tells if the task is inactive while still
657 * contributing to the active utilization. In other words, it
658 * indicates if the inactive timer has been armed and its handler
659 * has not been executed yet. This flag is useful to avoid race
660 * conditions between the inactive timer handler and the wakeup
661 * code.
662 *
663 * @dl_overrun tells if the task asked to be informed about runtime
664 * overruns.
665 *
666 * @dl_server tells if this is a server entity.
667 *
668 * @dl_defer tells if this is a deferred or regular server. For
669 * now only defer server exists.
670 *
671 * @dl_defer_armed tells if the deferrable server is waiting
672 * for the replenishment timer to activate it.
673 *
674 * @dl_server_active tells if the dlserver is active(started).
675 * dlserver is started on first cfs enqueue on an idle runqueue
676 * and is stopped when a dequeue results in 0 cfs tasks on the
677 * runqueue. In other words, dlserver is active only when cpu's
678 * runqueue has atleast one cfs task.
679 *
680 * @dl_defer_running tells if the deferrable server is actually
681 * running, skipping the defer phase.
682 */
683 unsigned int dl_throttled : 1;
684 unsigned int dl_yielded : 1;
685 unsigned int dl_non_contending : 1;
686 unsigned int dl_overrun : 1;
687 unsigned int dl_server : 1;
688 unsigned int dl_server_active : 1;
689 unsigned int dl_defer : 1;
690 unsigned int dl_defer_armed : 1;
691 unsigned int dl_defer_running : 1;
692 unsigned int dl_server_idle : 1; //UNUSED but kept for KMI
693
694 /*
695 * Bandwidth enforcement timer. Each -deadline task has its
696 * own bandwidth to be enforced, thus we need one timer per task.
697 */
698 struct hrtimer dl_timer;
699
700 /*
701 * Inactive timer, responsible for decreasing the active utilization
702 * at the "0-lag time". When a -deadline task blocks, it contributes
703 * to GRUB's active utilization until the "0-lag time", hence a
704 * timer is needed to decrease the active utilization at the correct
705 * time.
706 */
707 struct hrtimer inactive_timer;
708
709 /*
710 * Bits for DL-server functionality. Also see the comment near
711 * dl_server_update().
712 *
713 * @rq the runqueue this server is for
714 *
715 * @server_has_tasks() returns true if @server_pick return a
716 * runnable task.
717 */
718 struct rq *rq;
719 dl_server_has_tasks_f server_has_tasks; //UNUSED, but preserved for KMI
720 dl_server_pick_f server_pick_task;
721
722 #ifdef CONFIG_RT_MUTEXES
723 /*
724 * Priority Inheritance. When a DEADLINE scheduling entity is boosted
725 * pi_se points to the donor, otherwise points to the dl_se it belongs
726 * to (the original one/itself).
727 */
728 struct sched_dl_entity *pi_se;
729 #endif
730 };
731
732 ANDROID_KABI_TYPE_STRING("s#sched_dl_entity", "structure_type sched_dl_entity "
733 "{ member s#rb_node rb_node data_member_location(0) , member t#u64 "
734 "dl_runtime data_member_location(24) , member t#u64 dl_deadline "
735 "data_member_location(32) , member t#u64 dl_period data_member_location(40) "
736 ", member t#u64 dl_bw data_member_location(48) , member t#u64 dl_density "
737 "data_member_location(56) , member t#s64 runtime data_member_location(64) , "
738 "member t#u64 deadline data_member_location(72) , member base_type unsigned "
739 "int byte_size(4) encoding(7) flags data_member_location(80) , member base_type "
740 "unsigned int byte_size(4) encoding(7) dl_throttled bit_size(1) "
741 "data_bit_offset(672) , member base_type unsigned int byte_size(4) encoding(7) "
742 "dl_yielded bit_size(1) data_bit_offset(673) , member base_type unsigned int "
743 "byte_size(4) encoding(7) dl_non_contending bit_size(1) data_bit_offset(674) , "
744 "member base_type unsigned int byte_size(4) encoding(7) dl_overrun bit_size(1) "
745 "data_bit_offset(675) , member base_type unsigned int byte_size(4) encoding(7) "
746 "dl_server bit_size(1) data_bit_offset(676) , member base_type unsigned int "
747 "byte_size(4) encoding(7) dl_server_active bit_size(1) data_bit_offset(677) , "
748 "member base_type unsigned int byte_size(4) encoding(7) dl_defer bit_size(1) "
749 "data_bit_offset(678) , member base_type unsigned int byte_size(4) encoding(7) "
750 "dl_defer_armed bit_size(1) data_bit_offset(679) , member base_type unsigned "
751 "int byte_size(4) encoding(7) dl_defer_running bit_size(1) "
752 "data_bit_offset(680) , member s#hrtimer dl_timer data_member_location(88) , "
753 "member s#hrtimer inactive_timer data_member_location(152) , member "
754 "pointer_type { s#rq } rq data_member_location(216) , member "
755 "t#dl_server_has_tasks_f server_has_tasks data_member_location(224) , member "
756 "t#dl_server_pick_f server_pick_task data_member_location(232) , member "
757 "pointer_type { s#sched_dl_entity } pi_se data_member_location(240) } "
758 "byte_size(248)");
759
760 #ifdef CONFIG_UCLAMP_TASK
761 /* Number of utilization clamp buckets (shorter alias) */
762 #define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT
763
764 /*
765 * Utilization clamp for a scheduling entity
766 * @value: clamp value "assigned" to a se
767 * @bucket_id: bucket index corresponding to the "assigned" value
768 * @active: the se is currently refcounted in a rq's bucket
769 * @user_defined: the requested clamp value comes from user-space
770 *
771 * The bucket_id is the index of the clamp bucket matching the clamp value
772 * which is pre-computed and stored to avoid expensive integer divisions from
773 * the fast path.
774 *
775 * The active bit is set whenever a task has got an "effective" value assigned,
776 * which can be different from the clamp value "requested" from user-space.
777 * This allows to know a task is refcounted in the rq's bucket corresponding
778 * to the "effective" bucket_id.
779 *
780 * The user_defined bit is set whenever a task has got a task-specific clamp
781 * value requested from userspace, i.e. the system defaults apply to this task
782 * just as a restriction. This allows to relax default clamps when a less
783 * restrictive task-specific value has been requested, thus allowing to
784 * implement a "nice" semantic. For example, a task running with a 20%
785 * default boost can still drop its own boosting to 0%.
786 */
787 struct uclamp_se {
788 unsigned int value : bits_per(SCHED_CAPACITY_SCALE);
789 unsigned int bucket_id : bits_per(UCLAMP_BUCKETS);
790 unsigned int active : 1;
791 unsigned int user_defined : 1;
792 };
793 #endif /* CONFIG_UCLAMP_TASK */
794
795 union rcu_special {
796 struct {
797 u8 blocked;
798 u8 need_qs;
799 u8 exp_hint; /* Hint for performance. */
800 u8 need_mb; /* Readers need smp_mb(). */
801 } b; /* Bits. */
802 u32 s; /* Set of bits. */
803 };
804
805 enum perf_event_task_context {
806 perf_invalid_context = -1,
807 perf_hw_context = 0,
808 perf_sw_context,
809 perf_nr_task_contexts,
810 };
811
812 /*
813 * Number of contexts where an event can trigger:
814 * task, softirq, hardirq, nmi.
815 */
816 #define PERF_NR_CONTEXTS 4
817
818 struct wake_q_node {
819 struct wake_q_node *next;
820 };
821
822 struct kmap_ctrl {
823 #ifdef CONFIG_KMAP_LOCAL
824 int idx;
825 pte_t pteval[KM_MAX_IDX];
826 #endif
827 };
828
829 enum blocked_on_state {
830 BO_RUNNABLE,
831 BO_BLOCKED,
832 BO_WAKING,
833 };
834
835 struct task_struct {
836 #ifdef CONFIG_THREAD_INFO_IN_TASK
837 /*
838 * For reasons of header soup (see current_thread_info()), this
839 * must be the first element of task_struct.
840 */
841 struct thread_info thread_info;
842 #endif
843 unsigned int __state;
844
845 /* saved state for "spinlock sleepers" */
846 unsigned int saved_state;
847
848 /*
849 * This begins the randomizable portion of task_struct. Only
850 * scheduling-critical items should be added above here.
851 */
852 randomized_struct_fields_start
853
854 void *stack;
855 refcount_t usage;
856 /* Per task flags (PF_*), defined further below: */
857 unsigned int flags;
858 unsigned int ptrace;
859
860 #ifdef CONFIG_MEM_ALLOC_PROFILING
861 struct alloc_tag *alloc_tag;
862 #endif
863
864 #ifdef CONFIG_SMP
865 int on_cpu;
866 struct __call_single_node wake_entry;
867 unsigned int wakee_flips;
868 unsigned long wakee_flip_decay_ts;
869 struct task_struct *last_wakee;
870
871 /*
872 * recent_used_cpu is initially set as the last CPU used by a task
873 * that wakes affine another task. Waker/wakee relationships can
874 * push tasks around a CPU where each wakeup moves to the next one.
875 * Tracking a recently used CPU allows a quick search for a recently
876 * used CPU that may be idle.
877 */
878 int recent_used_cpu;
879 int wake_cpu;
880 #endif
881 int on_rq;
882
883 int prio;
884 int static_prio;
885 int normal_prio;
886 unsigned int rt_priority;
887
888 struct sched_entity se;
889 struct sched_rt_entity rt;
890 struct sched_dl_entity dl;
891 struct sched_dl_entity *dl_server;
892 #ifdef CONFIG_SCHED_CLASS_EXT
893 struct sched_ext_entity scx;
894 #endif
895 const struct sched_class *sched_class;
896
897 #ifdef CONFIG_SCHED_CORE
898 struct rb_node core_node;
899 unsigned long core_cookie;
900 unsigned int core_occupation;
901 #endif
902
903 #ifdef CONFIG_CGROUP_SCHED
904 struct task_group *sched_task_group;
905 #endif
906
907
908 #ifdef CONFIG_UCLAMP_TASK
909 /*
910 * Clamp values requested for a scheduling entity.
911 * Must be updated with task_rq_lock() held.
912 */
913 struct uclamp_se uclamp_req[UCLAMP_CNT];
914 /*
915 * Effective clamp values used for a scheduling entity.
916 * Must be updated with task_rq_lock() held.
917 */
918 struct uclamp_se uclamp[UCLAMP_CNT];
919 #endif
920
921 struct sched_statistics stats;
922
923 #ifdef CONFIG_PREEMPT_NOTIFIERS
924 /* List of struct preempt_notifier: */
925 struct hlist_head preempt_notifiers;
926 #endif
927
928 #ifdef CONFIG_BLK_DEV_IO_TRACE
929 unsigned int btrace_seq;
930 #endif
931
932 unsigned int policy;
933 unsigned long max_allowed_capacity;
934 int nr_cpus_allowed;
935 const cpumask_t *cpus_ptr;
936 cpumask_t *user_cpus_ptr;
937 cpumask_t cpus_mask;
938 void *migration_pending;
939 #ifdef CONFIG_SMP
940 unsigned short migration_disabled;
941 #endif
942 unsigned short migration_flags;
943
944 #ifdef CONFIG_PREEMPT_RCU
945 int rcu_read_lock_nesting;
946 union rcu_special rcu_read_unlock_special;
947 struct list_head rcu_node_entry;
948 struct rcu_node *rcu_blocked_node;
949 #endif /* #ifdef CONFIG_PREEMPT_RCU */
950
951 #ifdef CONFIG_TASKS_RCU
952 unsigned long rcu_tasks_nvcsw;
953 u8 rcu_tasks_holdout;
954 u8 rcu_tasks_idx;
955 int rcu_tasks_idle_cpu;
956 struct list_head rcu_tasks_holdout_list;
957 int rcu_tasks_exit_cpu;
958 struct list_head rcu_tasks_exit_list;
959 #endif /* #ifdef CONFIG_TASKS_RCU */
960
961 #ifdef CONFIG_TASKS_TRACE_RCU
962 int trc_reader_nesting;
963 int trc_ipi_to_cpu;
964 union rcu_special trc_reader_special;
965 struct list_head trc_holdout_list;
966 struct list_head trc_blkd_node;
967 int trc_blkd_cpu;
968 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
969
970 struct sched_info sched_info;
971
972 struct list_head tasks;
973 #ifdef CONFIG_SMP
974 struct plist_node pushable_tasks;
975 struct rb_node pushable_dl_tasks;
976 #endif
977
978 struct mm_struct *mm;
979 struct mm_struct *active_mm;
980 struct address_space *faults_disabled_mapping;
981
982 int exit_state;
983 int exit_code;
984 int exit_signal;
985 /* The signal sent when the parent dies: */
986 int pdeath_signal;
987 /* JOBCTL_*, siglock protected: */
988 unsigned long jobctl;
989
990 /* Used for emulating ABI behavior of previous Linux versions: */
991 unsigned int personality;
992
993 /* Scheduler bits, serialized by scheduler locks: */
994 unsigned sched_reset_on_fork:1;
995 unsigned sched_contributes_to_load:1;
996 unsigned sched_migrated:1;
997 unsigned sched_task_hot:1;
998
999 /* Force alignment to the next boundary: */
1000 unsigned :0;
1001
1002 /* Unserialized, strictly 'current' */
1003
1004 /*
1005 * This field must not be in the scheduler word above due to wakelist
1006 * queueing no longer being serialized by p->on_cpu. However:
1007 *
1008 * p->XXX = X; ttwu()
1009 * schedule() if (p->on_rq && ..) // false
1010 * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true
1011 * deactivate_task() ttwu_queue_wakelist())
1012 * p->on_rq = 0; p->sched_remote_wakeup = Y;
1013 *
1014 * guarantees all stores of 'current' are visible before
1015 * ->sched_remote_wakeup gets used, so it can be in this word.
1016 */
1017 unsigned sched_remote_wakeup:1;
1018 #ifdef CONFIG_RT_MUTEXES
1019 unsigned sched_rt_mutex:1;
1020 #endif
1021
1022 /* Bit to tell TOMOYO we're in execve(): */
1023 unsigned in_execve:1;
1024 unsigned in_iowait:1;
1025 #ifndef TIF_RESTORE_SIGMASK
1026 unsigned restore_sigmask:1;
1027 #endif
1028 #ifdef CONFIG_MEMCG_V1
1029 unsigned in_user_fault:1;
1030 #endif
1031 #ifdef CONFIG_LRU_GEN
1032 /* whether the LRU algorithm may apply to this access */
1033 unsigned in_lru_fault:1;
1034 #endif
1035 #ifdef CONFIG_COMPAT_BRK
1036 unsigned brk_randomized:1;
1037 #endif
1038 #ifdef CONFIG_CGROUPS
1039 /* disallow userland-initiated cgroup migration */
1040 unsigned no_cgroup_migration:1;
1041 /* task is frozen/stopped (used by the cgroup freezer) */
1042 unsigned frozen:1;
1043 #endif
1044 #ifdef CONFIG_BLK_CGROUP
1045 unsigned use_memdelay:1;
1046 #endif
1047 #ifdef CONFIG_PSI
1048 /* Stalled due to lack of memory */
1049 unsigned in_memstall:1;
1050 #endif
1051 #ifdef CONFIG_PAGE_OWNER
1052 /* Used by page_owner=on to detect recursion in page tracking. */
1053 unsigned in_page_owner:1;
1054 #endif
1055 #ifdef CONFIG_EVENTFD
1056 /* Recursion prevention for eventfd_signal() */
1057 unsigned in_eventfd:1;
1058 #endif
1059 #ifdef CONFIG_ARCH_HAS_CPU_PASID
1060 unsigned pasid_activated:1;
1061 #endif
1062 #ifdef CONFIG_CPU_SUP_INTEL
1063 unsigned reported_split_lock:1;
1064 #endif
1065 #ifdef CONFIG_TASK_DELAY_ACCT
1066 /* delay due to memory thrashing */
1067 unsigned in_thrashing:1;
1068 #endif
1069 #ifdef CONFIG_PREEMPT_RT
1070 struct netdev_xmit net_xmit;
1071 #endif
1072 unsigned long atomic_flags; /* Flags requiring atomic access. */
1073
1074 struct restart_block restart_block;
1075
1076 pid_t pid;
1077 pid_t tgid;
1078
1079 #ifdef CONFIG_STACKPROTECTOR
1080 /* Canary value for the -fstack-protector GCC feature: */
1081 unsigned long stack_canary;
1082 #endif
1083 /*
1084 * Pointers to the (original) parent process, youngest child, younger sibling,
1085 * older sibling, respectively. (p->father can be replaced with
1086 * p->real_parent->pid)
1087 */
1088
1089 /* Real parent process: */
1090 struct task_struct __rcu *real_parent;
1091
1092 /* Recipient of SIGCHLD, wait4() reports: */
1093 struct task_struct __rcu *parent;
1094
1095 /*
1096 * Children/sibling form the list of natural children:
1097 */
1098 struct list_head children;
1099 struct list_head sibling;
1100 struct task_struct *group_leader;
1101
1102 /*
1103 * 'ptraced' is the list of tasks this task is using ptrace() on.
1104 *
1105 * This includes both natural children and PTRACE_ATTACH targets.
1106 * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
1107 */
1108 struct list_head ptraced;
1109 struct list_head ptrace_entry;
1110
1111 /* PID/PID hash table linkage. */
1112 struct pid *thread_pid;
1113 struct hlist_node pid_links[PIDTYPE_MAX];
1114 struct list_head thread_node;
1115
1116 struct completion *vfork_done;
1117
1118 /* CLONE_CHILD_SETTID: */
1119 int __user *set_child_tid;
1120
1121 /* CLONE_CHILD_CLEARTID: */
1122 int __user *clear_child_tid;
1123
1124 /* PF_KTHREAD | PF_IO_WORKER */
1125 void *worker_private;
1126
1127 u64 utime;
1128 u64 stime;
1129 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
1130 u64 utimescaled;
1131 u64 stimescaled;
1132 #endif
1133 u64 gtime;
1134 #ifdef CONFIG_CPU_FREQ_TIMES
1135 u64 *time_in_state;
1136 unsigned int max_state;
1137 #endif
1138 struct prev_cputime prev_cputime;
1139 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1140 struct vtime vtime;
1141 #endif
1142
1143 #ifdef CONFIG_NO_HZ_FULL
1144 atomic_t tick_dep_mask;
1145 #endif
1146 /* Context switch counts: */
1147 unsigned long nvcsw;
1148 unsigned long nivcsw;
1149
1150 /* Monotonic time in nsecs: */
1151 u64 start_time;
1152
1153 /* Boot based time in nsecs: */
1154 u64 start_boottime;
1155
1156 /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
1157 unsigned long min_flt;
1158 unsigned long maj_flt;
1159
1160 /* Empty if CONFIG_POSIX_CPUTIMERS=n */
1161 struct posix_cputimers posix_cputimers;
1162
1163 #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
1164 struct posix_cputimers_work posix_cputimers_work;
1165 #endif
1166
1167 /* Process credentials: */
1168
1169 /* Tracer's credentials at attach: */
1170 const struct cred __rcu *ptracer_cred;
1171
1172 /* Objective and real subjective task credentials (COW): */
1173 const struct cred __rcu *real_cred;
1174
1175 /* Effective (overridable) subjective task credentials (COW): */
1176 const struct cred __rcu *cred;
1177
1178 #ifdef CONFIG_KEYS
1179 /* Cached requested key. */
1180 struct key *cached_requested_key;
1181 #endif
1182
1183 /*
1184 * executable name, excluding path.
1185 *
1186 * - normally initialized setup_new_exec()
1187 * - access it with [gs]et_task_comm()
1188 * - lock it with task_lock()
1189 */
1190 char comm[TASK_COMM_LEN];
1191
1192 struct nameidata *nameidata;
1193
1194 #ifdef CONFIG_SYSVIPC
1195 struct sysv_sem sysvsem;
1196 struct sysv_shm sysvshm;
1197 #endif
1198 #ifdef CONFIG_DETECT_HUNG_TASK
1199 unsigned long last_switch_count;
1200 unsigned long last_switch_time;
1201 #endif
1202 /* Filesystem information: */
1203 struct fs_struct *fs;
1204
1205 /* Open file information: */
1206 struct files_struct *files;
1207
1208 #ifdef CONFIG_IO_URING
1209 struct io_uring_task *io_uring;
1210 #endif
1211
1212 /* Namespaces: */
1213 struct nsproxy *nsproxy;
1214
1215 /* Signal handlers: */
1216 struct signal_struct *signal;
1217 struct sighand_struct __rcu *sighand;
1218 sigset_t blocked;
1219 sigset_t real_blocked;
1220 /* Restored if set_restore_sigmask() was used: */
1221 sigset_t saved_sigmask;
1222 struct sigpending pending;
1223 unsigned long sas_ss_sp;
1224 size_t sas_ss_size;
1225 unsigned int sas_ss_flags;
1226
1227 struct callback_head *task_works;
1228
1229 #ifdef CONFIG_AUDIT
1230 #ifdef CONFIG_AUDITSYSCALL
1231 struct audit_context *audit_context;
1232 #endif
1233 kuid_t loginuid;
1234 unsigned int sessionid;
1235 #endif
1236 struct seccomp seccomp;
1237 struct syscall_user_dispatch syscall_dispatch;
1238
1239 /* Thread group tracking: */
1240 u64 parent_exec_id;
1241 u64 self_exec_id;
1242
1243 /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
1244 spinlock_t alloc_lock;
1245
1246 /* Protection of the PI data structures: */
1247 raw_spinlock_t pi_lock;
1248
1249 struct wake_q_node wake_q;
1250 int wake_q_count;
1251
1252 #ifdef CONFIG_RT_MUTEXES
1253 /* PI waiters blocked on a rt_mutex held by this task: */
1254 struct rb_root_cached pi_waiters;
1255 /* Updated under owner's pi_lock and rq lock */
1256 struct task_struct *pi_top_task;
1257 /* Deadlock detection and priority inheritance handling: */
1258 struct rt_mutex_waiter *pi_blocked_on;
1259 #endif
1260
1261 enum blocked_on_state blocked_on_state;
1262 struct mutex *blocked_on; /* lock we're blocked on */
1263 struct task_struct *blocked_donor; /* task that is boosting this task */
1264 struct list_head migration_node;
1265 #ifdef CONFIG_SCHED_PROXY_EXEC
1266 struct list_head blocked_head; /* tasks blocked on this task */
1267 struct list_head blocked_node; /* our entry on someone elses blocked_head */
1268 /* Node for list of tasks to process blocked_head list for blocked entitiy activations */
1269 struct list_head blocked_activation_node;
1270 struct task_struct *sleeping_owner; /* task our blocked_node is enqueued on */
1271 #endif
1272 raw_spinlock_t blocked_lock;
1273
1274 #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
1275 struct mutex *blocker_mutex;
1276 #endif
1277
1278 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1279 int non_block_count;
1280 #endif
1281
1282 #ifdef CONFIG_TRACE_IRQFLAGS
1283 struct irqtrace_events irqtrace;
1284 unsigned int hardirq_threaded;
1285 u64 hardirq_chain_key;
1286 int softirqs_enabled;
1287 int softirq_context;
1288 int irq_config;
1289 #endif
1290 #ifdef CONFIG_PREEMPT_RT
1291 int softirq_disable_cnt;
1292 #endif
1293
1294 #ifdef CONFIG_LOCKDEP
1295 # define MAX_LOCK_DEPTH 48UL
1296 u64 curr_chain_key;
1297 int lockdep_depth;
1298 unsigned int lockdep_recursion;
1299 struct held_lock held_locks[MAX_LOCK_DEPTH];
1300 #endif
1301
1302 #if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP)
1303 unsigned int in_ubsan;
1304 #endif
1305
1306 /* Journalling filesystem info: */
1307 void *journal_info;
1308
1309 /* Stacked block device info: */
1310 struct bio_list *bio_list;
1311
1312 /* Stack plugging: */
1313 struct blk_plug *plug;
1314
1315 /* VM state: */
1316 struct reclaim_state *reclaim_state;
1317
1318 struct io_context *io_context;
1319
1320 #ifdef CONFIG_COMPACTION
1321 struct capture_control *capture_control;
1322 #endif
1323 /* Ptrace state: */
1324 unsigned long ptrace_message;
1325 kernel_siginfo_t *last_siginfo;
1326
1327 struct task_io_accounting ioac;
1328 #ifdef CONFIG_PSI
1329 /* Pressure stall state */
1330 unsigned int psi_flags;
1331 #endif
1332 #ifdef CONFIG_TASK_XACCT
1333 /* Accumulated RSS usage: */
1334 u64 acct_rss_mem1;
1335 /* Accumulated virtual memory usage: */
1336 u64 acct_vm_mem1;
1337 /* stime + utime since last update: */
1338 u64 acct_timexpd;
1339 #endif
1340 #ifdef CONFIG_CPUSETS
1341 /* Protected by ->alloc_lock: */
1342 nodemask_t mems_allowed;
1343 /* Sequence number to catch updates: */
1344 seqcount_spinlock_t mems_allowed_seq;
1345 int cpuset_mem_spread_rotor;
1346 #endif
1347 #ifdef CONFIG_CGROUPS
1348 /* Control Group info protected by css_set_lock: */
1349 struct css_set __rcu *cgroups;
1350 /* cg_list protected by css_set_lock and tsk->alloc_lock: */
1351 struct list_head cg_list;
1352 #endif
1353 #ifdef CONFIG_X86_CPU_RESCTRL
1354 u32 closid;
1355 u32 rmid;
1356 #endif
1357 #ifdef CONFIG_FUTEX
1358 struct robust_list_head __user *robust_list;
1359 #ifdef CONFIG_COMPAT
1360 struct compat_robust_list_head __user *compat_robust_list;
1361 #endif
1362 struct list_head pi_state_list;
1363 struct futex_pi_state *pi_state_cache;
1364 struct mutex futex_exit_mutex;
1365 unsigned int futex_state;
1366 #endif
1367 #ifdef CONFIG_PERF_EVENTS
1368 u8 perf_recursion[PERF_NR_CONTEXTS];
1369 struct perf_event_context *perf_event_ctxp;
1370 struct mutex perf_event_mutex;
1371 struct list_head perf_event_list;
1372 #endif
1373 #ifdef CONFIG_DEBUG_PREEMPT
1374 unsigned long preempt_disable_ip;
1375 #endif
1376 #ifdef CONFIG_NUMA
1377 /* Protected by alloc_lock: */
1378 struct mempolicy *mempolicy;
1379 short il_prev;
1380 u8 il_weight;
1381 short pref_node_fork;
1382 #endif
1383 #ifdef CONFIG_NUMA_BALANCING
1384 int numa_scan_seq;
1385 unsigned int numa_scan_period;
1386 unsigned int numa_scan_period_max;
1387 int numa_preferred_nid;
1388 unsigned long numa_migrate_retry;
1389 /* Migration stamp: */
1390 u64 node_stamp;
1391 u64 last_task_numa_placement;
1392 u64 last_sum_exec_runtime;
1393 struct callback_head numa_work;
1394
1395 /*
1396 * This pointer is only modified for current in syscall and
1397 * pagefault context (and for tasks being destroyed), so it can be read
1398 * from any of the following contexts:
1399 * - RCU read-side critical section
1400 * - current->numa_group from everywhere
1401 * - task's runqueue locked, task not running
1402 */
1403 struct numa_group __rcu *numa_group;
1404
1405 /*
1406 * numa_faults is an array split into four regions:
1407 * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
1408 * in this precise order.
1409 *
1410 * faults_memory: Exponential decaying average of faults on a per-node
1411 * basis. Scheduling placement decisions are made based on these
1412 * counts. The values remain static for the duration of a PTE scan.
1413 * faults_cpu: Track the nodes the process was running on when a NUMA
1414 * hinting fault was incurred.
1415 * faults_memory_buffer and faults_cpu_buffer: Record faults per node
1416 * during the current scan window. When the scan completes, the counts
1417 * in faults_memory and faults_cpu decay and these values are copied.
1418 */
1419 unsigned long *numa_faults;
1420 unsigned long total_numa_faults;
1421
1422 /*
1423 * numa_faults_locality tracks if faults recorded during the last
1424 * scan window were remote/local or failed to migrate. The task scan
1425 * period is adapted based on the locality of the faults with different
1426 * weights depending on whether they were shared or private faults
1427 */
1428 unsigned long numa_faults_locality[3];
1429
1430 unsigned long numa_pages_migrated;
1431 #endif /* CONFIG_NUMA_BALANCING */
1432
1433 #ifdef CONFIG_RSEQ
1434 struct rseq __user *rseq;
1435 u32 rseq_len;
1436 u32 rseq_sig;
1437 /*
1438 * RmW on rseq_event_mask must be performed atomically
1439 * with respect to preemption.
1440 */
1441 unsigned long rseq_event_mask;
1442 #endif
1443
1444 #ifdef CONFIG_SCHED_MM_CID
1445 int mm_cid; /* Current cid in mm */
1446 int last_mm_cid; /* Most recent cid in mm */
1447 int migrate_from_cpu;
1448 int mm_cid_active; /* Whether cid bitmap is active */
1449 struct callback_head cid_work;
1450 #endif
1451
1452 struct tlbflush_unmap_batch tlb_ubc;
1453
1454 /* Cache last used pipe for splice(): */
1455 struct pipe_inode_info *splice_pipe;
1456
1457 struct page_frag task_frag;
1458
1459 #ifdef CONFIG_TASK_DELAY_ACCT
1460 struct task_delay_info *delays;
1461 #endif
1462
1463 #ifdef CONFIG_FAULT_INJECTION
1464 int make_it_fail;
1465 unsigned int fail_nth;
1466 #endif
1467 /*
1468 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
1469 * balance_dirty_pages() for a dirty throttling pause:
1470 */
1471 int nr_dirtied;
1472 int nr_dirtied_pause;
1473 /* Start of a write-and-pause period: */
1474 unsigned long dirty_paused_when;
1475
1476 #ifdef CONFIG_LATENCYTOP
1477 int latency_record_count;
1478 struct latency_record latency_record[LT_SAVECOUNT];
1479 #endif
1480 /*
1481 * Time slack values; these are used to round up poll() and
1482 * select() etc timeout values. These are in nanoseconds.
1483 */
1484 u64 timer_slack_ns;
1485 u64 default_timer_slack_ns;
1486
1487 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
1488 unsigned int kasan_depth;
1489 #endif
1490
1491 #ifdef CONFIG_KCSAN
1492 struct kcsan_ctx kcsan_ctx;
1493 #ifdef CONFIG_TRACE_IRQFLAGS
1494 struct irqtrace_events kcsan_save_irqtrace;
1495 #endif
1496 #ifdef CONFIG_KCSAN_WEAK_MEMORY
1497 int kcsan_stack_depth;
1498 #endif
1499 #endif
1500
1501 #ifdef CONFIG_KMSAN
1502 struct kmsan_ctx kmsan_ctx;
1503 #endif
1504
1505 #if IS_ENABLED(CONFIG_KUNIT)
1506 struct kunit *kunit_test;
1507 #endif
1508
1509 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
1510 /* Index of current stored address in ret_stack: */
1511 int curr_ret_stack;
1512 int curr_ret_depth;
1513
1514 /* Stack of return addresses for return function tracing: */
1515 unsigned long *ret_stack;
1516
1517 /* Timestamp for last schedule: */
1518 unsigned long long ftrace_timestamp;
1519
1520 /*
1521 * Number of functions that haven't been traced
1522 * because of depth overrun:
1523 */
1524 atomic_t trace_overrun;
1525
1526 /* Pause tracing: */
1527 atomic_t tracing_graph_pause;
1528 #endif
1529
1530 #ifdef CONFIG_TRACING
1531 /* Bitmask and counter of trace recursion: */
1532 unsigned long trace_recursion;
1533 #endif /* CONFIG_TRACING */
1534
1535 #ifdef CONFIG_KCOV
1536 /* See kernel/kcov.c for more details. */
1537
1538 /* Coverage collection mode enabled for this task (0 if disabled): */
1539 unsigned int kcov_mode;
1540
1541 /* Size of the kcov_area: */
1542 unsigned int kcov_size;
1543
1544 /* Buffer for coverage collection: */
1545 void *kcov_area;
1546
1547 /* KCOV descriptor wired with this task or NULL: */
1548 struct kcov *kcov;
1549
1550 /* KCOV common handle for remote coverage collection: */
1551 u64 kcov_handle;
1552
1553 /* KCOV sequence number: */
1554 int kcov_sequence;
1555
1556 /* Collect coverage from softirq context: */
1557 unsigned int kcov_softirq;
1558 #endif
1559
1560 #ifdef CONFIG_MEMCG_V1
1561 struct mem_cgroup *memcg_in_oom;
1562 #endif
1563
1564 #ifdef CONFIG_MEMCG
1565 /* Number of pages to reclaim on returning to userland: */
1566 unsigned int memcg_nr_pages_over_high;
1567
1568 /* Used by memcontrol for targeted memcg charge: */
1569 struct mem_cgroup *active_memcg;
1570
1571 /* Cache for current->cgroups->memcg->objcg lookups: */
1572 struct obj_cgroup *objcg;
1573 #endif
1574
1575 #ifdef CONFIG_BLK_CGROUP
1576 struct gendisk *throttle_disk;
1577 #endif
1578
1579 #ifdef CONFIG_UPROBES
1580 struct uprobe_task *utask;
1581 #endif
1582 #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
1583 unsigned int sequential_io;
1584 unsigned int sequential_io_avg;
1585 #endif
1586 struct kmap_ctrl kmap_ctrl;
1587 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1588 unsigned long task_state_change;
1589 # ifdef CONFIG_PREEMPT_RT
1590 unsigned long saved_state_change;
1591 # endif
1592 #endif
1593 struct rcu_head rcu;
1594 refcount_t rcu_users;
1595 int pagefault_disabled;
1596 #ifdef CONFIG_MMU
1597 struct task_struct *oom_reaper_list;
1598 struct timer_list oom_reaper_timer;
1599 #endif
1600 #ifdef CONFIG_VMAP_STACK
1601 struct vm_struct *stack_vm_area;
1602 #endif
1603 #ifdef CONFIG_THREAD_INFO_IN_TASK
1604 /* A live task holds one reference: */
1605 refcount_t stack_refcount;
1606 #endif
1607 #ifdef CONFIG_LIVEPATCH
1608 int patch_state;
1609 #endif
1610 #ifdef CONFIG_SECURITY
1611 /* Used by LSM modules for access restriction: */
1612 void *security;
1613 #endif
1614 #ifdef CONFIG_BPF_SYSCALL
1615 /* Used by BPF task local storage */
1616 struct bpf_local_storage __rcu *bpf_storage;
1617 /* Used for BPF run context */
1618 struct bpf_run_ctx *bpf_ctx;
1619 #endif
1620 /* Used by BPF for per-TASK xdp storage */
1621 struct bpf_net_context *bpf_net_context;
1622
1623 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
1624 unsigned long lowest_stack;
1625 unsigned long prev_lowest_stack;
1626 #endif
1627
1628 #ifdef CONFIG_X86_MCE
1629 void __user *mce_vaddr;
1630 __u64 mce_kflags;
1631 u64 mce_addr;
1632 __u64 mce_ripv : 1,
1633 mce_whole_page : 1,
1634 __mce_reserved : 62;
1635 struct callback_head mce_kill_me;
1636 int mce_count;
1637 #endif
1638 ANDROID_VENDOR_DATA_ARRAY(1, 6);
1639 ANDROID_OEM_DATA_ARRAY(1, 6);
1640
1641 #ifdef CONFIG_KRETPROBES
1642 struct llist_head kretprobe_instances;
1643 #endif
1644 #ifdef CONFIG_RETHOOK
1645 struct llist_head rethooks;
1646 #endif
1647
1648 #ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH
1649 /*
1650 * If L1D flush is supported on mm context switch
1651 * then we use this callback head to queue kill work
1652 * to kill tasks that are not running on SMT disabled
1653 * cores
1654 */
1655 struct callback_head l1d_flush_kill;
1656 #endif
1657 ANDROID_KABI_RESERVE(1);
1658 ANDROID_KABI_RESERVE(2);
1659 ANDROID_KABI_RESERVE(3);
1660 ANDROID_KABI_RESERVE(4);
1661 ANDROID_KABI_RESERVE(5);
1662 ANDROID_KABI_RESERVE(6);
1663 ANDROID_KABI_RESERVE(7);
1664 ANDROID_KABI_RESERVE(8);
1665
1666 #ifdef CONFIG_RV
1667 /*
1668 * Per-task RV monitor. Nowadays fixed in RV_PER_TASK_MONITORS.
1669 * If we find justification for more monitors, we can think
1670 * about adding more or developing a dynamic method. So far,
1671 * none of these are justified.
1672 */
1673 union rv_task_monitor rv[RV_PER_TASK_MONITORS];
1674 #endif
1675
1676 #ifdef CONFIG_USER_EVENTS
1677 struct user_event_mm *user_event_mm;
1678 #endif
1679
1680 /*
1681 * New fields for task_struct should be added above here, so that
1682 * they are included in the randomized portion of task_struct.
1683 */
1684 randomized_struct_fields_end
1685
1686 /* CPU-specific state of this task: */
1687 struct thread_struct thread;
1688
1689 /*
1690 * WARNING: on x86, 'thread_struct' contains a variable-sized
1691 * structure. It *MUST* be at the end of 'task_struct'.
1692 *
1693 * Do not put anything below here!
1694 */
1695 };
1696
1697 #ifdef CONFIG_SCHED_PROXY_EXEC
1698 DECLARE_STATIC_KEY_FALSE(__sched_proxy_exec);
sched_proxy_exec(void)1699 static inline bool sched_proxy_exec(void)
1700 {
1701 return static_branch_likely(&__sched_proxy_exec);
1702 }
1703 #else
sched_proxy_exec(void)1704 static inline bool sched_proxy_exec(void)
1705 {
1706 return false;
1707 }
1708 #endif
1709
1710 #define TASK_REPORT_IDLE (TASK_REPORT + 1)
1711 #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
1712
__task_state_index(unsigned int tsk_state,unsigned int tsk_exit_state)1713 static inline unsigned int __task_state_index(unsigned int tsk_state,
1714 unsigned int tsk_exit_state)
1715 {
1716 unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT;
1717
1718 BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
1719
1720 if ((tsk_state & TASK_IDLE) == TASK_IDLE)
1721 state = TASK_REPORT_IDLE;
1722
1723 /*
1724 * We're lying here, but rather than expose a completely new task state
1725 * to userspace, we can make this appear as if the task has gone through
1726 * a regular rt_mutex_lock() call.
1727 * Report frozen tasks as uninterruptible.
1728 */
1729 if ((tsk_state & TASK_RTLOCK_WAIT) || (tsk_state & TASK_FROZEN))
1730 state = TASK_UNINTERRUPTIBLE;
1731
1732 return fls(state);
1733 }
1734
task_state_index(struct task_struct * tsk)1735 static inline unsigned int task_state_index(struct task_struct *tsk)
1736 {
1737 return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state);
1738 }
1739
task_index_to_char(unsigned int state)1740 static inline char task_index_to_char(unsigned int state)
1741 {
1742 static const char state_char[] = "RSDTtXZPI";
1743
1744 BUILD_BUG_ON(TASK_REPORT_MAX * 2 != 1 << (sizeof(state_char) - 1));
1745
1746 return state_char[state];
1747 }
1748
task_state_to_char(struct task_struct * tsk)1749 static inline char task_state_to_char(struct task_struct *tsk)
1750 {
1751 return task_index_to_char(task_state_index(tsk));
1752 }
1753
1754 extern struct pid *cad_pid;
1755
1756 /*
1757 * Per process flags
1758 */
1759 #define PF_VCPU 0x00000001 /* I'm a virtual CPU */
1760 #define PF_IDLE 0x00000002 /* I am an IDLE thread */
1761 #define PF_EXITING 0x00000004 /* Getting shut down */
1762 #define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */
1763 #define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
1764 #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
1765 #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
1766 #define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */
1767 #define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */
1768 #define PF_DUMPCORE 0x00000200 /* Dumped core */
1769 #define PF_SIGNALED 0x00000400 /* Killed by a signal */
1770 #define PF_MEMALLOC 0x00000800 /* Allocating memory to free memory. See memalloc_noreclaim_save() */
1771 #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */
1772 #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
1773 #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */
1774 #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
1775 #define PF_KCOMPACTD 0x00010000 /* I am kcompactd */
1776 #define PF_KSWAPD 0x00020000 /* I am kswapd */
1777 #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */
1778 #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */
1779 #define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to,
1780 * I am cleaning dirty pages from some other bdi. */
1781 #define PF_KTHREAD 0x00200000 /* I am a kernel thread */
1782 #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
1783 #define PF__HOLE__00800000 0x00800000
1784 #define PF__HOLE__01000000 0x01000000
1785 #define PF__HOLE__02000000 0x02000000
1786 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
1787 #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1788 #define PF_MEMALLOC_PIN 0x10000000 /* Allocations constrained to zones which allow long term pinning.
1789 * See memalloc_pin_save() */
1790 #define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */
1791 #define PF__HOLE__40000000 0x40000000
1792 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
1793
1794 /*
1795 * Only the _current_ task can read/write to tsk->flags, but other
1796 * tasks can access tsk->flags in readonly mode for example
1797 * with tsk_used_math (like during threaded core dumping).
1798 * There is however an exception to this rule during ptrace
1799 * or during fork: the ptracer task is allowed to write to the
1800 * child->flags of its traced child (same goes for fork, the parent
1801 * can write to the child->flags), because we're guaranteed the
1802 * child is not running and in turn not changing child->flags
1803 * at the same time the parent does it.
1804 */
1805 #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
1806 #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0)
1807 #define clear_used_math() clear_stopped_child_used_math(current)
1808 #define set_used_math() set_stopped_child_used_math(current)
1809
1810 #define conditional_stopped_child_used_math(condition, child) \
1811 do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
1812
1813 #define conditional_used_math(condition) conditional_stopped_child_used_math(condition, current)
1814
1815 #define copy_to_stopped_child_used_math(child) \
1816 do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
1817
1818 /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
1819 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
1820 #define used_math() tsk_used_math(current)
1821
is_percpu_thread(void)1822 static __always_inline bool is_percpu_thread(void)
1823 {
1824 #ifdef CONFIG_SMP
1825 return (current->flags & PF_NO_SETAFFINITY) &&
1826 (current->nr_cpus_allowed == 1);
1827 #else
1828 return true;
1829 #endif
1830 }
1831
1832 /* Per-process atomic flags. */
1833 #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
1834 #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
1835 #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
1836 #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
1837 #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
1838 #define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
1839 #define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
1840 #define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */
1841
1842 #define TASK_PFA_TEST(name, func) \
1843 static inline bool task_##func(struct task_struct *p) \
1844 { return test_bit(PFA_##name, &p->atomic_flags); }
1845
1846 #define TASK_PFA_SET(name, func) \
1847 static inline void task_set_##func(struct task_struct *p) \
1848 { set_bit(PFA_##name, &p->atomic_flags); }
1849
1850 #define TASK_PFA_CLEAR(name, func) \
1851 static inline void task_clear_##func(struct task_struct *p) \
1852 { clear_bit(PFA_##name, &p->atomic_flags); }
1853
TASK_PFA_TEST(NO_NEW_PRIVS,no_new_privs)1854 TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
1855 TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
1856
1857 TASK_PFA_TEST(SPREAD_PAGE, spread_page)
1858 TASK_PFA_SET(SPREAD_PAGE, spread_page)
1859 TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
1860
1861 TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
1862 TASK_PFA_SET(SPREAD_SLAB, spread_slab)
1863 TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
1864
1865 TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
1866 TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
1867 TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
1868
1869 TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1870 TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1871 TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1872
1873 TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
1874 TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
1875
1876 TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
1877 TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
1878 TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
1879
1880 TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
1881 TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
1882
1883 static inline void
1884 current_restore_flags(unsigned long orig_flags, unsigned long flags)
1885 {
1886 current->flags &= ~flags;
1887 current->flags |= orig_flags & flags;
1888 }
1889
1890 extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
1891 extern int task_can_attach(struct task_struct *p);
1892 extern int dl_bw_alloc(int cpu, u64 dl_bw);
1893 extern void dl_bw_free(int cpu, u64 dl_bw);
1894 #ifdef CONFIG_SMP
1895
1896 /* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */
1897 extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
1898
1899 /**
1900 * set_cpus_allowed_ptr - set CPU affinity mask of a task
1901 * @p: the task
1902 * @new_mask: CPU affinity mask
1903 *
1904 * Return: zero if successful, or a negative error code
1905 */
1906 extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
1907 extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node);
1908 extern void release_user_cpus_ptr(struct task_struct *p);
1909 extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask);
1910 extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
1911 extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
1912 #else
do_set_cpus_allowed(struct task_struct * p,const struct cpumask * new_mask)1913 static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
1914 {
1915 }
set_cpus_allowed_ptr(struct task_struct * p,const struct cpumask * new_mask)1916 static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
1917 {
1918 /* Opencoded cpumask_test_cpu(0, new_mask) to avoid dependency on cpumask.h */
1919 if ((*cpumask_bits(new_mask) & 1) == 0)
1920 return -EINVAL;
1921 return 0;
1922 }
dup_user_cpus_ptr(struct task_struct * dst,struct task_struct * src,int node)1923 static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node)
1924 {
1925 if (src->user_cpus_ptr)
1926 return -EINVAL;
1927 return 0;
1928 }
release_user_cpus_ptr(struct task_struct * p)1929 static inline void release_user_cpus_ptr(struct task_struct *p)
1930 {
1931 WARN_ON(p->user_cpus_ptr);
1932 }
1933
dl_task_check_affinity(struct task_struct * p,const struct cpumask * mask)1934 static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
1935 {
1936 return 0;
1937 }
1938 #endif
1939
1940 extern int yield_to(struct task_struct *p, bool preempt);
1941 extern void set_user_nice(struct task_struct *p, long nice);
1942 extern int task_prio(const struct task_struct *p);
1943
1944 /**
1945 * task_nice - return the nice value of a given task.
1946 * @p: the task in question.
1947 *
1948 * Return: The nice value [ -20 ... 0 ... 19 ].
1949 */
task_nice(const struct task_struct * p)1950 static inline int task_nice(const struct task_struct *p)
1951 {
1952 return PRIO_TO_NICE((p)->static_prio);
1953 }
1954
1955 extern int can_nice(const struct task_struct *p, const int nice);
1956 extern int task_curr(const struct task_struct *p);
1957 extern int idle_cpu(int cpu);
1958 extern int available_idle_cpu(int cpu);
1959 extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
1960 extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
1961 extern void sched_set_fifo(struct task_struct *p);
1962 extern void sched_set_fifo_low(struct task_struct *p);
1963 extern void sched_set_normal(struct task_struct *p, int nice);
1964 extern int sched_setattr(struct task_struct *, const struct sched_attr *);
1965 extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *);
1966 extern struct task_struct *idle_task(int cpu);
1967
1968 /**
1969 * is_idle_task - is the specified task an idle task?
1970 * @p: the task in question.
1971 *
1972 * Return: 1 if @p is an idle task. 0 otherwise.
1973 */
is_idle_task(const struct task_struct * p)1974 static __always_inline bool is_idle_task(const struct task_struct *p)
1975 {
1976 return !!(p->flags & PF_IDLE);
1977 }
1978
1979 extern struct task_struct *curr_task(int cpu);
1980 extern void ia64_set_curr_task(int cpu, struct task_struct *p);
1981
1982 void yield(void);
1983
1984 union thread_union {
1985 struct task_struct task;
1986 #ifndef CONFIG_THREAD_INFO_IN_TASK
1987 struct thread_info thread_info;
1988 #endif
1989 unsigned long stack[THREAD_SIZE/sizeof(long)];
1990 };
1991
1992 #ifndef CONFIG_THREAD_INFO_IN_TASK
1993 extern struct thread_info init_thread_info;
1994 #endif
1995
1996 extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];
1997
1998 #ifdef CONFIG_THREAD_INFO_IN_TASK
1999 # define task_thread_info(task) (&(task)->thread_info)
2000 #elif !defined(__HAVE_THREAD_FUNCTIONS)
2001 # define task_thread_info(task) ((struct thread_info *)(task)->stack)
2002 #endif
2003
2004 /*
2005 * find a task by one of its numerical ids
2006 *
2007 * find_task_by_pid_ns():
2008 * finds a task by its pid in the specified namespace
2009 * find_task_by_vpid():
2010 * finds a task by its virtual pid
2011 *
2012 * see also find_vpid() etc in include/linux/pid.h
2013 */
2014
2015 extern struct task_struct *find_task_by_vpid(pid_t nr);
2016 extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns);
2017
2018 /*
2019 * find a task by its virtual pid and get the task struct
2020 */
2021 extern struct task_struct *find_get_task_by_vpid(pid_t nr);
2022
2023 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
2024 extern int wake_up_process(struct task_struct *tsk);
2025 extern void wake_up_new_task(struct task_struct *tsk);
2026
2027 #ifdef CONFIG_SMP
2028 extern void kick_process(struct task_struct *tsk);
2029 #else
kick_process(struct task_struct * tsk)2030 static inline void kick_process(struct task_struct *tsk) { }
2031 #endif
2032
2033 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
2034
set_task_comm(struct task_struct * tsk,const char * from)2035 static inline void set_task_comm(struct task_struct *tsk, const char *from)
2036 {
2037 __set_task_comm(tsk, from, false);
2038 }
2039
2040 extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
2041 #define get_task_comm(buf, tsk) ({ \
2042 BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN); \
2043 __get_task_comm(buf, sizeof(buf), tsk); \
2044 })
2045
2046 #ifdef CONFIG_SMP
scheduler_ipi(void)2047 static __always_inline void scheduler_ipi(void)
2048 {
2049 /*
2050 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
2051 * TIF_NEED_RESCHED remotely (for the first time) will also send
2052 * this IPI.
2053 */
2054 preempt_fold_need_resched();
2055 }
2056 #else
scheduler_ipi(void)2057 static inline void scheduler_ipi(void) { }
2058 #endif
2059
2060 extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);
2061
2062 /*
2063 * Set thread flags in other task's structures.
2064 * See asm/thread_info.h for TIF_xxxx flags available:
2065 */
set_tsk_thread_flag(struct task_struct * tsk,int flag)2066 static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
2067 {
2068 set_ti_thread_flag(task_thread_info(tsk), flag);
2069 }
2070
clear_tsk_thread_flag(struct task_struct * tsk,int flag)2071 static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
2072 {
2073 clear_ti_thread_flag(task_thread_info(tsk), flag);
2074 }
2075
update_tsk_thread_flag(struct task_struct * tsk,int flag,bool value)2076 static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag,
2077 bool value)
2078 {
2079 update_ti_thread_flag(task_thread_info(tsk), flag, value);
2080 }
2081
test_and_set_tsk_thread_flag(struct task_struct * tsk,int flag)2082 static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
2083 {
2084 return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
2085 }
2086
test_and_clear_tsk_thread_flag(struct task_struct * tsk,int flag)2087 static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
2088 {
2089 return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
2090 }
2091
test_tsk_thread_flag(struct task_struct * tsk,int flag)2092 static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
2093 {
2094 return test_ti_thread_flag(task_thread_info(tsk), flag);
2095 }
2096
set_tsk_need_resched(struct task_struct * tsk)2097 static inline void set_tsk_need_resched(struct task_struct *tsk)
2098 {
2099 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2100 }
2101
clear_tsk_need_resched(struct task_struct * tsk)2102 static inline void clear_tsk_need_resched(struct task_struct *tsk)
2103 {
2104 clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2105 }
2106
test_tsk_need_resched(struct task_struct * tsk)2107 static inline int test_tsk_need_resched(struct task_struct *tsk)
2108 {
2109 return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
2110 }
2111
2112 /*
2113 * cond_resched() and cond_resched_lock(): latency reduction via
2114 * explicit rescheduling in places that are safe. The return
2115 * value indicates whether a reschedule was done in fact.
2116 * cond_resched_lock() will drop the spinlock before scheduling,
2117 */
2118 #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
2119 extern int __cond_resched(void);
2120
2121 #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
2122
2123 void sched_dynamic_klp_enable(void);
2124 void sched_dynamic_klp_disable(void);
2125
2126 DECLARE_STATIC_CALL(cond_resched, __cond_resched);
2127
_cond_resched(void)2128 static __always_inline int _cond_resched(void)
2129 {
2130 return static_call_mod(cond_resched)();
2131 }
2132
2133 #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
2134
2135 extern int dynamic_cond_resched(void);
2136
_cond_resched(void)2137 static __always_inline int _cond_resched(void)
2138 {
2139 return dynamic_cond_resched();
2140 }
2141
2142 #else /* !CONFIG_PREEMPTION */
2143
_cond_resched(void)2144 static inline int _cond_resched(void)
2145 {
2146 klp_sched_try_switch();
2147 return __cond_resched();
2148 }
2149
2150 #endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
2151
2152 #else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */
2153
_cond_resched(void)2154 static inline int _cond_resched(void)
2155 {
2156 klp_sched_try_switch();
2157 return 0;
2158 }
2159
2160 #endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */
2161
2162 #define cond_resched() ({ \
2163 __might_resched(__FILE__, __LINE__, 0); \
2164 _cond_resched(); \
2165 })
2166
2167 extern int __cond_resched_lock(spinlock_t *lock);
2168 extern int __cond_resched_rwlock_read(rwlock_t *lock);
2169 extern int __cond_resched_rwlock_write(rwlock_t *lock);
2170
2171 #define MIGHT_RESCHED_RCU_SHIFT 8
2172 #define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
2173
2174 #ifndef CONFIG_PREEMPT_RT
2175 /*
2176 * Non RT kernels have an elevated preempt count due to the held lock,
2177 * but are not allowed to be inside a RCU read side critical section
2178 */
2179 # define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET
2180 #else
2181 /*
2182 * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
2183 * cond_resched*lock() has to take that into account because it checks for
2184 * preempt_count() and rcu_preempt_depth().
2185 */
2186 # define PREEMPT_LOCK_RESCHED_OFFSETS \
2187 (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
2188 #endif
2189
2190 #define cond_resched_lock(lock) ({ \
2191 __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
2192 __cond_resched_lock(lock); \
2193 })
2194
2195 #define cond_resched_rwlock_read(lock) ({ \
2196 __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
2197 __cond_resched_rwlock_read(lock); \
2198 })
2199
2200 #define cond_resched_rwlock_write(lock) ({ \
2201 __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
2202 __cond_resched_rwlock_write(lock); \
2203 })
2204
__force_blocked_on_runnable(struct task_struct * p)2205 static inline void __force_blocked_on_runnable(struct task_struct *p)
2206 {
2207 lockdep_assert_held(&p->blocked_lock);
2208 p->blocked_on_state = BO_RUNNABLE;
2209 }
2210
force_blocked_on_runnable(struct task_struct * p)2211 static inline void force_blocked_on_runnable(struct task_struct *p)
2212 {
2213 guard(raw_spinlock_irqsave)(&p->blocked_lock);
2214 __force_blocked_on_runnable(p);
2215 }
2216
__set_blocked_on_runnable(struct task_struct * p)2217 static inline void __set_blocked_on_runnable(struct task_struct *p)
2218 {
2219 lockdep_assert_held(&p->blocked_lock);
2220
2221 if (p->blocked_on_state == BO_WAKING)
2222 p->blocked_on_state = BO_RUNNABLE;
2223 }
2224
set_blocked_on_runnable(struct task_struct * p)2225 static inline void set_blocked_on_runnable(struct task_struct *p)
2226 {
2227 if (!sched_proxy_exec())
2228 return;
2229
2230 guard(raw_spinlock_irqsave)(&p->blocked_lock);
2231 __set_blocked_on_runnable(p);
2232 }
2233
__set_blocked_on_waking(struct task_struct * p)2234 static inline void __set_blocked_on_waking(struct task_struct *p)
2235 {
2236 lockdep_assert_held(&p->blocked_lock);
2237
2238 if (p->blocked_on_state == BO_BLOCKED)
2239 p->blocked_on_state = BO_WAKING;
2240 }
2241
__get_task_blocked_on(struct task_struct * p)2242 static inline struct mutex *__get_task_blocked_on(struct task_struct *p)
2243 {
2244 lockdep_assert_held_once(&p->blocked_lock);
2245 return p->blocked_on;
2246 }
2247
2248 #ifndef CONFIG_PREEMPT_RT
set_blocked_on_waking_nested(struct task_struct * p,struct mutex * m)2249 static inline void set_blocked_on_waking_nested(struct task_struct *p, struct mutex *m)
2250 {
2251 raw_spin_lock_nested(&p->blocked_lock, SINGLE_DEPTH_NESTING);
2252 __set_blocked_on_waking(p);
2253 raw_spin_unlock(&p->blocked_lock);
2254 }
2255 #else
set_blocked_on_waking_nested(struct task_struct * p,struct rt_mutex * m)2256 static inline void set_blocked_on_waking_nested(struct task_struct *p, struct rt_mutex *m)
2257 {
2258 raw_spin_lock_nested(&p->blocked_lock, SINGLE_DEPTH_NESTING);
2259 __set_blocked_on_waking(p);
2260 raw_spin_unlock(&p->blocked_lock);
2261 }
2262 #endif
2263
__set_task_blocked_on(struct task_struct * p,struct mutex * m)2264 static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
2265 {
2266 WARN_ON_ONCE(!m);
2267 /* The task should only be setting itself as blocked */
2268 WARN_ON_ONCE(p != current);
2269 /* Currently we serialize blocked_on under the task::blocked_lock */
2270 lockdep_assert_held_once(&p->blocked_lock);
2271 /*
2272 * Check ensure we don't overwrite existing mutex value
2273 * with a different mutex.
2274 */
2275 WARN_ON_ONCE(p->blocked_on);
2276 p->blocked_on = m;
2277 p->blocked_on_state = BO_BLOCKED;
2278 }
2279
__clear_task_blocked_on(struct task_struct * p,struct mutex * m)2280 static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m)
2281 {
2282 /* The task should only be clearing itself */
2283 WARN_ON_ONCE(p != current);
2284 /* Currently we serialize blocked_on under the task::blocked_lock */
2285 lockdep_assert_held_once(&p->blocked_lock);
2286 /* Make sure we are clearing the relationship with the right lock */
2287 WARN_ON_ONCE(p->blocked_on != m);
2288 p->blocked_on = NULL;
2289 p->blocked_on_state = BO_RUNNABLE;
2290 }
2291
need_resched(void)2292 static __always_inline bool need_resched(void)
2293 {
2294 return unlikely(tif_need_resched());
2295 }
2296
2297 /*
2298 * Wrappers for p->thread_info->cpu access. No-op on UP.
2299 */
2300 #ifdef CONFIG_SMP
2301
task_cpu(const struct task_struct * p)2302 static inline unsigned int task_cpu(const struct task_struct *p)
2303 {
2304 return READ_ONCE(task_thread_info(p)->cpu);
2305 }
2306
2307 extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
2308
2309 #else
2310
task_cpu(const struct task_struct * p)2311 static inline unsigned int task_cpu(const struct task_struct *p)
2312 {
2313 return 0;
2314 }
2315
set_task_cpu(struct task_struct * p,unsigned int cpu)2316 static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
2317 {
2318 }
2319
2320 #endif /* CONFIG_SMP */
2321
task_is_runnable(struct task_struct * p)2322 static inline bool task_is_runnable(struct task_struct *p)
2323 {
2324 return p->on_rq && !p->se.sched_delayed;
2325 }
2326
2327 extern bool sched_task_on_rq(struct task_struct *p);
2328 extern unsigned long get_wchan(struct task_struct *p);
2329 extern struct task_struct *cpu_curr_snapshot(int cpu);
2330
2331 /*
2332 * In order to reduce various lock holder preemption latencies provide an
2333 * interface to see if a vCPU is currently running or not.
2334 *
2335 * This allows us to terminate optimistic spin loops and block, analogous to
2336 * the native optimistic spin heuristic of testing if the lock owner task is
2337 * running or not.
2338 */
2339 #ifndef vcpu_is_preempted
vcpu_is_preempted(int cpu)2340 static inline bool vcpu_is_preempted(int cpu)
2341 {
2342 return false;
2343 }
2344 #endif
2345
2346 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
2347 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
2348
2349 #ifndef TASK_SIZE_OF
2350 #define TASK_SIZE_OF(tsk) TASK_SIZE
2351 #endif
2352
2353 #ifdef CONFIG_SMP
owner_on_cpu(struct task_struct * owner)2354 static inline bool owner_on_cpu(struct task_struct *owner)
2355 {
2356 /*
2357 * As lock holder preemption issue, we both skip spinning if
2358 * task is not on cpu or its cpu is preempted
2359 */
2360 return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(task_cpu(owner));
2361 }
2362
2363 /* Returns effective CPU energy utilization, as seen by the scheduler */
2364 unsigned long sched_cpu_util(int cpu);
2365 #endif /* CONFIG_SMP */
2366
2367 #ifdef CONFIG_SCHED_CORE
2368 extern void sched_core_free(struct task_struct *tsk);
2369 extern void sched_core_fork(struct task_struct *p);
2370 extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
2371 unsigned long uaddr);
2372 extern int sched_core_idle_cpu(int cpu);
2373 #else
sched_core_free(struct task_struct * tsk)2374 static inline void sched_core_free(struct task_struct *tsk) { }
sched_core_fork(struct task_struct * p)2375 static inline void sched_core_fork(struct task_struct *p) { }
sched_core_idle_cpu(int cpu)2376 static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
2377 #endif
2378
2379 extern void sched_set_stop_task(int cpu, struct task_struct *stop);
2380
2381 #ifdef CONFIG_MEM_ALLOC_PROFILING
alloc_tag_save(struct alloc_tag * tag)2382 static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
2383 {
2384 swap(current->alloc_tag, tag);
2385 return tag;
2386 }
2387
alloc_tag_restore(struct alloc_tag * tag,struct alloc_tag * old)2388 static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
2389 {
2390 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
2391 WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
2392 #endif
2393 current->alloc_tag = old;
2394 }
2395 #else
2396 #define alloc_tag_save(_tag) NULL
2397 #define alloc_tag_restore(_tag, _old) do {} while (0)
2398 #endif
2399
2400 #endif
2401