1 #ifndef _LINUX_PSI_TYPES_H 2 #define _LINUX_PSI_TYPES_H 3 4 #include <linux/kthread.h> 5 #include <linux/seqlock.h> 6 #include <linux/types.h> 7 #include <linux/kref.h> 8 #include <linux/wait.h> 9 10 #ifdef CONFIG_PSI 11 12 /* Tracked task states */ 13 enum psi_task_count { 14 NR_IOWAIT, 15 NR_MEMSTALL, 16 NR_RUNNING, 17 /* 18 * This can't have values other than 0 or 1 and could be 19 * implemented as a bit flag. But for now we still have room 20 * in the first cacheline of psi_group_cpu, and this way we 21 * don't have to special case any state tracking for it. 22 */ 23 NR_ONCPU, 24 /* 25 * For IO and CPU stalls the presence of running/oncpu tasks 26 * in the domain means a partial rather than a full stall. 27 * For memory it's not so simple because of page reclaimers: 28 * they are running/oncpu while representing a stall. To tell 29 * whether a domain has productivity left or not, we need to 30 * distinguish between regular running (i.e. productive) 31 * threads and memstall ones. 32 */ 33 NR_MEMSTALL_RUNNING, 34 NR_PSI_TASK_COUNTS = 5, 35 }; 36 37 /* Task state bitmasks */ 38 #define TSK_IOWAIT (1 << NR_IOWAIT) 39 #define TSK_MEMSTALL (1 << NR_MEMSTALL) 40 #define TSK_RUNNING (1 << NR_RUNNING) 41 #define TSK_ONCPU (1 << NR_ONCPU) 42 #define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING) 43 44 /* Resources that workloads could be stalled on */ 45 enum psi_res { 46 PSI_IO, 47 PSI_MEM, 48 PSI_CPU, 49 NR_PSI_RESOURCES = 3, 50 }; 51 52 /* 53 * Pressure states for each resource: 54 * 55 * SOME: Stalled tasks & working tasks 56 * FULL: Stalled tasks & no working tasks 57 */ 58 enum psi_states { 59 PSI_IO_SOME, 60 PSI_IO_FULL, 61 PSI_MEM_SOME, 62 PSI_MEM_FULL, 63 PSI_CPU_SOME, 64 PSI_CPU_FULL, 65 /* Only per-CPU, to weigh the CPU in the global average: */ 66 PSI_NONIDLE, 67 NR_PSI_STATES = 7, 68 }; 69 70 enum psi_aggregators { 71 PSI_AVGS = 0, 72 PSI_POLL, 73 NR_PSI_AGGREGATORS, 74 }; 75 76 struct psi_group_cpu { 77 /* 1st cacheline updated by the scheduler */ 78 79 /* Aggregator needs to know of concurrent changes */ 80 seqcount_t seq ____cacheline_aligned_in_smp; 81 82 /* States of the tasks belonging to this group */ 83 unsigned int tasks[NR_PSI_TASK_COUNTS]; 84 85 /* Aggregate pressure state derived from the tasks */ 86 u32 state_mask; 87 88 /* Period time sampling buckets for each state of interest (ns) */ 89 u32 times[NR_PSI_STATES]; 90 91 /* Time of last task change in this group (rq_clock) */ 92 u64 state_start; 93 94 /* 2nd cacheline updated by the aggregator */ 95 96 /* Delta detection against the sampling buckets */ 97 u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES] 98 ____cacheline_aligned_in_smp; 99 }; 100 101 /* PSI growth tracking window */ 102 struct psi_window { 103 /* Window size in ns */ 104 u64 size; 105 106 /* Start time of the current window in ns */ 107 u64 start_time; 108 109 /* Value at the start of the window */ 110 u64 start_value; 111 112 /* Value growth in the previous window */ 113 u64 prev_growth; 114 }; 115 116 struct psi_trigger { 117 /* PSI state being monitored by the trigger */ 118 enum psi_states state; 119 120 /* User-spacified threshold in ns */ 121 u64 threshold; 122 123 /* List node inside triggers list */ 124 struct list_head node; 125 126 /* Backpointer needed during trigger destruction */ 127 struct psi_group *group; 128 129 /* Wait queue for polling */ 130 wait_queue_head_t event_wait; 131 132 /* Pending event flag */ 133 int event; 134 135 /* Tracking window */ 136 struct psi_window win; 137 138 /* 139 * Time last event was generated. Used for rate-limiting 140 * events to one per window 141 */ 142 u64 last_event_time; 143 }; 144 145 struct psi_group { 146 /* Protects data used by the aggregator */ 147 struct mutex avgs_lock; 148 149 /* Per-cpu task state & time tracking */ 150 struct psi_group_cpu __percpu *pcpu; 151 152 /* Running pressure averages */ 153 u64 avg_total[NR_PSI_STATES - 1]; 154 u64 avg_last_update; 155 u64 avg_next_update; 156 157 /* Aggregator work control */ 158 struct delayed_work avgs_work; 159 160 /* Total stall times and sampled pressure averages */ 161 u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1]; 162 unsigned long avg[NR_PSI_STATES - 1][3]; 163 164 /* Monitor work control */ 165 struct task_struct __rcu *poll_task; 166 struct timer_list poll_timer; 167 wait_queue_head_t poll_wait; 168 atomic_t poll_wakeup; 169 atomic_t poll_scheduled; 170 171 /* Protects data used by the monitor */ 172 struct mutex trigger_lock; 173 174 /* Configured polling triggers */ 175 struct list_head triggers; 176 u32 nr_triggers[NR_PSI_STATES - 1]; 177 u32 poll_states; 178 u64 poll_min_period; 179 180 /* Total stall times at the start of monitor activation */ 181 u64 polling_total[NR_PSI_STATES - 1]; 182 u64 polling_next_update; 183 u64 polling_until; 184 }; 185 186 #else /* CONFIG_PSI */ 187 188 struct psi_group { }; 189 190 #endif /* CONFIG_PSI */ 191 192 #endif /* _LINUX_PSI_TYPES_H */ 193