• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef _LINUX_PSI_TYPES_H
2 #define _LINUX_PSI_TYPES_H
3 
4 #include <linux/kthread.h>
5 #include <linux/seqlock.h>
6 #include <linux/types.h>
7 #include <linux/kref.h>
8 #include <linux/wait.h>
9 
10 #ifdef CONFIG_PSI
11 
12 /* Tracked task states */
13 enum psi_task_count {
14 	NR_IOWAIT,
15 	NR_MEMSTALL,
16 	NR_RUNNING,
17 	/*
18 	 * This can't have values other than 0 or 1 and could be
19 	 * implemented as a bit flag. But for now we still have room
20 	 * in the first cacheline of psi_group_cpu, and this way we
21 	 * don't have to special case any state tracking for it.
22 	 */
23 	NR_ONCPU,
24 	/*
25 	 * For IO and CPU stalls the presence of running/oncpu tasks
26 	 * in the domain means a partial rather than a full stall.
27 	 * For memory it's not so simple because of page reclaimers:
28 	 * they are running/oncpu while representing a stall. To tell
29 	 * whether a domain has productivity left or not, we need to
30 	 * distinguish between regular running (i.e. productive)
31 	 * threads and memstall ones.
32 	 */
33 	NR_MEMSTALL_RUNNING,
34 	NR_PSI_TASK_COUNTS = 5,
35 };
36 
37 /* Task state bitmasks */
38 #define TSK_IOWAIT	(1 << NR_IOWAIT)
39 #define TSK_MEMSTALL	(1 << NR_MEMSTALL)
40 #define TSK_RUNNING	(1 << NR_RUNNING)
41 #define TSK_ONCPU	(1 << NR_ONCPU)
42 #define TSK_MEMSTALL_RUNNING	(1 << NR_MEMSTALL_RUNNING)
43 
44 /* Resources that workloads could be stalled on */
45 enum psi_res {
46 	PSI_IO,
47 	PSI_MEM,
48 	PSI_CPU,
49 	NR_PSI_RESOURCES = 3,
50 };
51 
52 /*
53  * Pressure states for each resource:
54  *
55  * SOME: Stalled tasks & working tasks
56  * FULL: Stalled tasks & no working tasks
57  */
58 enum psi_states {
59 	PSI_IO_SOME,
60 	PSI_IO_FULL,
61 	PSI_MEM_SOME,
62 	PSI_MEM_FULL,
63 	PSI_CPU_SOME,
64 	PSI_CPU_FULL,
65 	/* Only per-CPU, to weigh the CPU in the global average: */
66 	PSI_NONIDLE,
67 	NR_PSI_STATES = 7,
68 };
69 
70 enum psi_aggregators {
71 	PSI_AVGS = 0,
72 	PSI_POLL,
73 	NR_PSI_AGGREGATORS,
74 };
75 
76 struct psi_group_cpu {
77 	/* 1st cacheline updated by the scheduler */
78 
79 	/* Aggregator needs to know of concurrent changes */
80 	seqcount_t seq ____cacheline_aligned_in_smp;
81 
82 	/* States of the tasks belonging to this group */
83 	unsigned int tasks[NR_PSI_TASK_COUNTS];
84 
85 	/* Aggregate pressure state derived from the tasks */
86 	u32 state_mask;
87 
88 	/* Period time sampling buckets for each state of interest (ns) */
89 	u32 times[NR_PSI_STATES];
90 
91 	/* Time of last task change in this group (rq_clock) */
92 	u64 state_start;
93 
94 	/* 2nd cacheline updated by the aggregator */
95 
96 	/* Delta detection against the sampling buckets */
97 	u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
98 			____cacheline_aligned_in_smp;
99 };
100 
101 /* PSI growth tracking window */
102 struct psi_window {
103 	/* Window size in ns */
104 	u64 size;
105 
106 	/* Start time of the current window in ns */
107 	u64 start_time;
108 
109 	/* Value at the start of the window */
110 	u64 start_value;
111 
112 	/* Value growth in the previous window */
113 	u64 prev_growth;
114 };
115 
116 struct psi_trigger {
117 	/* PSI state being monitored by the trigger */
118 	enum psi_states state;
119 
120 	/* User-spacified threshold in ns */
121 	u64 threshold;
122 
123 	/* List node inside triggers list */
124 	struct list_head node;
125 
126 	/* Backpointer needed during trigger destruction */
127 	struct psi_group *group;
128 
129 	/* Wait queue for polling */
130 	wait_queue_head_t event_wait;
131 
132 	/* Pending event flag */
133 	int event;
134 
135 	/* Tracking window */
136 	struct psi_window win;
137 
138 	/*
139 	 * Time last event was generated. Used for rate-limiting
140 	 * events to one per window
141 	 */
142 	u64 last_event_time;
143 };
144 
145 struct psi_group {
146 	/* Protects data used by the aggregator */
147 	struct mutex avgs_lock;
148 
149 	/* Per-cpu task state & time tracking */
150 	struct psi_group_cpu __percpu *pcpu;
151 
152 	/* Running pressure averages */
153 	u64 avg_total[NR_PSI_STATES - 1];
154 	u64 avg_last_update;
155 	u64 avg_next_update;
156 
157 	/* Aggregator work control */
158 	struct delayed_work avgs_work;
159 
160 	/* Total stall times and sampled pressure averages */
161 	u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
162 	unsigned long avg[NR_PSI_STATES - 1][3];
163 
164 	/* Monitor work control */
165 	struct task_struct __rcu *poll_task;
166 	struct timer_list poll_timer;
167 	wait_queue_head_t poll_wait;
168 	atomic_t poll_wakeup;
169 	atomic_t poll_scheduled;
170 
171 	/* Protects data used by the monitor */
172 	struct mutex trigger_lock;
173 
174 	/* Configured polling triggers */
175 	struct list_head triggers;
176 	u32 nr_triggers[NR_PSI_STATES - 1];
177 	u32 poll_states;
178 	u64 poll_min_period;
179 
180 	/* Total stall times at the start of monitor activation */
181 	u64 polling_total[NR_PSI_STATES - 1];
182 	u64 polling_next_update;
183 	u64 polling_until;
184 };
185 
186 #else /* CONFIG_PSI */
187 
188 struct psi_group { };
189 
190 #endif /* CONFIG_PSI */
191 
192 #endif /* _LINUX_PSI_TYPES_H */
193