• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  #ifndef _LINUX_PSI_TYPES_H
2  #define _LINUX_PSI_TYPES_H
3  
4  #include <linux/kthread.h>
5  #include <linux/seqlock.h>
6  #include <linux/types.h>
7  #include <linux/kref.h>
8  #include <linux/wait.h>
9  
10  #ifdef CONFIG_PSI
11  
12  /* Tracked task states */
13  enum psi_task_count {
14  	NR_IOWAIT,
15  	NR_MEMSTALL,
16  	NR_RUNNING,
17  	/*
18  	 * This can't have values other than 0 or 1 and could be
19  	 * implemented as a bit flag. But for now we still have room
20  	 * in the first cacheline of psi_group_cpu, and this way we
21  	 * don't have to special case any state tracking for it.
22  	 */
23  	NR_ONCPU,
24  	/*
25  	 * For IO and CPU stalls the presence of running/oncpu tasks
26  	 * in the domain means a partial rather than a full stall.
27  	 * For memory it's not so simple because of page reclaimers:
28  	 * they are running/oncpu while representing a stall. To tell
29  	 * whether a domain has productivity left or not, we need to
30  	 * distinguish between regular running (i.e. productive)
31  	 * threads and memstall ones.
32  	 */
33  	NR_MEMSTALL_RUNNING,
34  	NR_PSI_TASK_COUNTS = 5,
35  };
36  
37  /* Task state bitmasks */
38  #define TSK_IOWAIT	(1 << NR_IOWAIT)
39  #define TSK_MEMSTALL	(1 << NR_MEMSTALL)
40  #define TSK_RUNNING	(1 << NR_RUNNING)
41  #define TSK_ONCPU	(1 << NR_ONCPU)
42  #define TSK_MEMSTALL_RUNNING	(1 << NR_MEMSTALL_RUNNING)
43  
44  /* Resources that workloads could be stalled on */
45  enum psi_res {
46  	PSI_IO,
47  	PSI_MEM,
48  	PSI_CPU,
49  	NR_PSI_RESOURCES = 3,
50  };
51  
52  /*
53   * Pressure states for each resource:
54   *
55   * SOME: Stalled tasks & working tasks
56   * FULL: Stalled tasks & no working tasks
57   */
58  enum psi_states {
59  	PSI_IO_SOME,
60  	PSI_IO_FULL,
61  	PSI_MEM_SOME,
62  	PSI_MEM_FULL,
63  	PSI_CPU_SOME,
64  	PSI_CPU_FULL,
65  	/* Only per-CPU, to weigh the CPU in the global average: */
66  	PSI_NONIDLE,
67  	NR_PSI_STATES = 7,
68  };
69  
70  enum psi_aggregators {
71  	PSI_AVGS = 0,
72  	PSI_POLL,
73  	NR_PSI_AGGREGATORS,
74  };
75  
76  struct psi_group_cpu {
77  	/* 1st cacheline updated by the scheduler */
78  
79  	/* Aggregator needs to know of concurrent changes */
80  	seqcount_t seq ____cacheline_aligned_in_smp;
81  
82  	/* States of the tasks belonging to this group */
83  	unsigned int tasks[NR_PSI_TASK_COUNTS];
84  
85  	/* Aggregate pressure state derived from the tasks */
86  	u32 state_mask;
87  
88  	/* Period time sampling buckets for each state of interest (ns) */
89  	u32 times[NR_PSI_STATES];
90  
91  	/* Time of last task change in this group (rq_clock) */
92  	u64 state_start;
93  
94  	/* 2nd cacheline updated by the aggregator */
95  
96  	/* Delta detection against the sampling buckets */
97  	u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
98  			____cacheline_aligned_in_smp;
99  };
100  
101  /* PSI growth tracking window */
102  struct psi_window {
103  	/* Window size in ns */
104  	u64 size;
105  
106  	/* Start time of the current window in ns */
107  	u64 start_time;
108  
109  	/* Value at the start of the window */
110  	u64 start_value;
111  
112  	/* Value growth in the previous window */
113  	u64 prev_growth;
114  };
115  
116  struct psi_trigger {
117  	/* PSI state being monitored by the trigger */
118  	enum psi_states state;
119  
120  	/* User-spacified threshold in ns */
121  	u64 threshold;
122  
123  	/* List node inside triggers list */
124  	struct list_head node;
125  
126  	/* Backpointer needed during trigger destruction */
127  	struct psi_group *group;
128  
129  	/* Wait queue for polling */
130  	wait_queue_head_t event_wait;
131  
132  	/* Pending event flag */
133  	int event;
134  
135  	/* Tracking window */
136  	struct psi_window win;
137  
138  	/*
139  	 * Time last event was generated. Used for rate-limiting
140  	 * events to one per window
141  	 */
142  	u64 last_event_time;
143  };
144  
145  struct psi_group {
146  	/* Protects data used by the aggregator */
147  	struct mutex avgs_lock;
148  
149  	/* Per-cpu task state & time tracking */
150  	struct psi_group_cpu __percpu *pcpu;
151  
152  	/* Running pressure averages */
153  	u64 avg_total[NR_PSI_STATES - 1];
154  	u64 avg_last_update;
155  	u64 avg_next_update;
156  
157  	/* Aggregator work control */
158  	struct delayed_work avgs_work;
159  
160  	/* Total stall times and sampled pressure averages */
161  	u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
162  	unsigned long avg[NR_PSI_STATES - 1][3];
163  
164  	/* Monitor work control */
165  	struct task_struct __rcu *poll_task;
166  	struct timer_list poll_timer;
167  	wait_queue_head_t poll_wait;
168  	atomic_t poll_wakeup;
169  	atomic_t poll_scheduled;
170  
171  	/* Protects data used by the monitor */
172  	struct mutex trigger_lock;
173  
174  	/* Configured polling triggers */
175  	struct list_head triggers;
176  	u32 nr_triggers[NR_PSI_STATES - 1];
177  	u32 poll_states;
178  	u64 poll_min_period;
179  
180  	/* Total stall times at the start of monitor activation */
181  	u64 polling_total[NR_PSI_STATES - 1];
182  	u64 polling_next_update;
183  	u64 polling_until;
184  };
185  
186  #else /* CONFIG_PSI */
187  
188  struct psi_group { };
189  
190  #endif /* CONFIG_PSI */
191  
192  #endif /* _LINUX_PSI_TYPES_H */
193