1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Energy Model of devices
4 *
5 * Copyright (c) 2018-2020, Arm ltd.
6 * Written by: Quentin Perret, Arm ltd.
7 * Improvements provided by: Lukasz Luba, Arm ltd.
8 */
9
10 #define pr_fmt(fmt) "energy_model: " fmt
11
12 #include <linux/cpu.h>
13 #include <linux/cpumask.h>
14 #include <linux/debugfs.h>
15 #include <linux/energy_model.h>
16 #include <linux/sched/topology.h>
17 #include <linux/slab.h>
18 #include <trace/hooks/sched.h>
19
20 /*
21 * Mutex serializing the registrations of performance domains and letting
22 * callbacks defined by drivers sleep.
23 */
24 static DEFINE_MUTEX(em_pd_mutex);
25
_is_cpu_device(struct device * dev)26 static bool _is_cpu_device(struct device *dev)
27 {
28 return (dev->bus == &cpu_subsys);
29 }
30
31 #ifdef CONFIG_DEBUG_FS
32 static struct dentry *rootdir;
33
em_debug_create_ps(struct em_perf_state * ps,struct dentry * pd)34 static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
35 {
36 struct dentry *d;
37 char name[24];
38
39 snprintf(name, sizeof(name), "ps:%lu", ps->frequency);
40
41 /* Create per-ps directory */
42 d = debugfs_create_dir(name, pd);
43 debugfs_create_ulong("frequency", 0444, d, &ps->frequency);
44 debugfs_create_ulong("power", 0444, d, &ps->power);
45 debugfs_create_ulong("cost", 0444, d, &ps->cost);
46 }
47
em_debug_cpus_show(struct seq_file * s,void * unused)48 static int em_debug_cpus_show(struct seq_file *s, void *unused)
49 {
50 seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private)));
51
52 return 0;
53 }
54 DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
55
em_debug_units_show(struct seq_file * s,void * unused)56 static int em_debug_units_show(struct seq_file *s, void *unused)
57 {
58 struct em_perf_domain *pd = s->private;
59 char *units = pd->milliwatts ? "milliWatts" : "bogoWatts";
60
61 seq_printf(s, "%s\n", units);
62
63 return 0;
64 }
65 DEFINE_SHOW_ATTRIBUTE(em_debug_units);
66
em_debug_create_pd(struct device * dev)67 static void em_debug_create_pd(struct device *dev)
68 {
69 struct dentry *d;
70 int i;
71
72 /* Create the directory of the performance domain */
73 d = debugfs_create_dir(dev_name(dev), rootdir);
74
75 if (_is_cpu_device(dev))
76 debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus,
77 &em_debug_cpus_fops);
78
79 debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
80
81 /* Create a sub-directory for each performance state */
82 for (i = 0; i < dev->em_pd->nr_perf_states; i++)
83 em_debug_create_ps(&dev->em_pd->table[i], d);
84
85 }
86
em_debug_remove_pd(struct device * dev)87 static void em_debug_remove_pd(struct device *dev)
88 {
89 debugfs_lookup_and_remove(dev_name(dev), rootdir);
90 }
91
em_debug_init(void)92 static int __init em_debug_init(void)
93 {
94 /* Create /sys/kernel/debug/energy_model directory */
95 rootdir = debugfs_create_dir("energy_model", NULL);
96
97 return 0;
98 }
99 fs_initcall(em_debug_init);
100 #else /* CONFIG_DEBUG_FS */
em_debug_create_pd(struct device * dev)101 static void em_debug_create_pd(struct device *dev) {}
em_debug_remove_pd(struct device * dev)102 static void em_debug_remove_pd(struct device *dev) {}
103 #endif
104
em_create_perf_table(struct device * dev,struct em_perf_domain * pd,int nr_states,struct em_data_callback * cb)105 static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
106 int nr_states, struct em_data_callback *cb)
107 {
108 unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
109 struct em_perf_state *table;
110 int i, ret;
111 u64 fmax;
112
113 table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
114 if (!table)
115 return -ENOMEM;
116
117 /* Build the list of performance states for this performance domain */
118 for (i = 0, freq = 0; i < nr_states; i++, freq++) {
119 /*
120 * active_power() is a driver callback which ceils 'freq' to
121 * lowest performance state of 'dev' above 'freq' and updates
122 * 'power' and 'freq' accordingly.
123 */
124 ret = cb->active_power(&power, &freq, dev);
125 if (ret) {
126 dev_err(dev, "EM: invalid perf. state: %d\n",
127 ret);
128 goto free_ps_table;
129 }
130
131 /*
132 * We expect the driver callback to increase the frequency for
133 * higher performance states.
134 */
135 if (freq <= prev_freq) {
136 dev_err(dev, "EM: non-increasing freq: %lu\n",
137 freq);
138 goto free_ps_table;
139 }
140
141 /*
142 * The power returned by active_state() is expected to be
143 * positive and to fit into 16 bits.
144 */
145 if (!power || power > EM_MAX_POWER) {
146 dev_err(dev, "EM: invalid power: %lu\n",
147 power);
148 goto free_ps_table;
149 }
150
151 table[i].power = power;
152 table[i].frequency = prev_freq = freq;
153 }
154
155 /* Compute the cost of each performance state. */
156 fmax = (u64) table[nr_states - 1].frequency;
157 for (i = nr_states - 1; i >= 0; i--) {
158 unsigned long power_res = em_scale_power(table[i].power);
159
160 table[i].cost = div64_u64(fmax * power_res,
161 table[i].frequency);
162 if (table[i].cost >= prev_cost) {
163 dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
164 table[i].frequency);
165 } else {
166 prev_cost = table[i].cost;
167 }
168 }
169
170 pd->table = table;
171 pd->nr_perf_states = nr_states;
172
173 return 0;
174
175 free_ps_table:
176 kfree(table);
177 return -EINVAL;
178 }
179
em_create_pd(struct device * dev,int nr_states,struct em_data_callback * cb,cpumask_t * cpus)180 static int em_create_pd(struct device *dev, int nr_states,
181 struct em_data_callback *cb, cpumask_t *cpus)
182 {
183 struct em_perf_domain *pd;
184 struct device *cpu_dev;
185 int cpu, ret;
186
187 if (_is_cpu_device(dev)) {
188 pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
189 if (!pd)
190 return -ENOMEM;
191
192 cpumask_copy(em_span_cpus(pd), cpus);
193 } else {
194 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
195 if (!pd)
196 return -ENOMEM;
197 }
198
199 ret = em_create_perf_table(dev, pd, nr_states, cb);
200 if (ret) {
201 kfree(pd);
202 return ret;
203 }
204
205 if (_is_cpu_device(dev))
206 for_each_cpu(cpu, cpus) {
207 cpu_dev = get_cpu_device(cpu);
208 cpu_dev->em_pd = pd;
209 }
210
211 dev->em_pd = pd;
212
213 return 0;
214 }
215
216 /**
217 * em_pd_get() - Return the performance domain for a device
218 * @dev : Device to find the performance domain for
219 *
220 * Returns the performance domain to which @dev belongs, or NULL if it doesn't
221 * exist.
222 */
em_pd_get(struct device * dev)223 struct em_perf_domain *em_pd_get(struct device *dev)
224 {
225 if (IS_ERR_OR_NULL(dev))
226 return NULL;
227
228 return dev->em_pd;
229 }
230 EXPORT_SYMBOL_GPL(em_pd_get);
231
232 /**
233 * em_cpu_get() - Return the performance domain for a CPU
234 * @cpu : CPU to find the performance domain for
235 *
236 * Returns the performance domain to which @cpu belongs, or NULL if it doesn't
237 * exist.
238 */
em_cpu_get(int cpu)239 struct em_perf_domain *em_cpu_get(int cpu)
240 {
241 struct device *cpu_dev;
242
243 cpu_dev = get_cpu_device(cpu);
244 if (!cpu_dev)
245 return NULL;
246
247 return em_pd_get(cpu_dev);
248 }
249 EXPORT_SYMBOL_GPL(em_cpu_get);
250
251 /**
252 * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device
253 * @dev : Device for which the EM is to register
254 * @nr_states : Number of performance states to register
255 * @cb : Callback functions providing the data of the Energy Model
256 * @cpus : Pointer to cpumask_t, which in case of a CPU device is
257 * obligatory. It can be taken from i.e. 'policy->cpus'. For other
258 * type of devices this should be set to NULL.
259 * @milliwatts : Flag indicating that the power values are in milliWatts or
260 * in some other scale. It must be set properly.
261 *
262 * Create Energy Model tables for a performance domain using the callbacks
263 * defined in cb.
264 *
265 * The @milliwatts is important to set with correct value. Some kernel
266 * sub-systems might rely on this flag and check if all devices in the EM are
267 * using the same scale.
268 *
269 * If multiple clients register the same performance domain, all but the first
270 * registration will be ignored.
271 *
272 * Return 0 on success
273 */
em_dev_register_perf_domain(struct device * dev,unsigned int nr_states,struct em_data_callback * cb,cpumask_t * cpus,bool milliwatts)274 int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
275 struct em_data_callback *cb, cpumask_t *cpus,
276 bool milliwatts)
277 {
278 unsigned long cap, prev_cap = 0;
279 int cpu, ret;
280 bool cond = false;
281
282 if (!dev || !nr_states || !cb)
283 return -EINVAL;
284
285 /*
286 * Use a mutex to serialize the registration of performance domains and
287 * let the driver-defined callback functions sleep.
288 */
289 mutex_lock(&em_pd_mutex);
290
291 if (dev->em_pd) {
292 ret = -EEXIST;
293 goto unlock;
294 }
295
296 if (_is_cpu_device(dev)) {
297 if (!cpus) {
298 dev_err(dev, "EM: invalid CPU mask\n");
299 ret = -EINVAL;
300 goto unlock;
301 }
302
303 for_each_cpu(cpu, cpus) {
304 if (em_cpu_get(cpu)) {
305 dev_err(dev, "EM: exists for CPU%d\n", cpu);
306 ret = -EEXIST;
307 goto unlock;
308 }
309
310 trace_android_vh_em_dev_register_pd(&cond);
311 if (cond)
312 continue;
313 /*
314 * All CPUs of a domain must have the same
315 * micro-architecture since they all share the same
316 * table.
317 */
318 cap = arch_scale_cpu_capacity(cpu);
319 if (prev_cap && prev_cap != cap) {
320 dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n",
321 cpumask_pr_args(cpus));
322
323 ret = -EINVAL;
324 goto unlock;
325 }
326 prev_cap = cap;
327 }
328 }
329
330 ret = em_create_pd(dev, nr_states, cb, cpus);
331 if (ret)
332 goto unlock;
333
334 dev->em_pd->milliwatts = milliwatts;
335
336 em_debug_create_pd(dev);
337 dev_info(dev, "EM: created perf domain\n");
338
339 unlock:
340 mutex_unlock(&em_pd_mutex);
341 return ret;
342 }
343 EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
344
345 /**
346 * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device
347 * @dev : Device for which the EM is registered
348 *
349 * Unregister the EM for the specified @dev (but not a CPU device).
350 */
em_dev_unregister_perf_domain(struct device * dev)351 void em_dev_unregister_perf_domain(struct device *dev)
352 {
353 if (IS_ERR_OR_NULL(dev) || !dev->em_pd)
354 return;
355
356 if (_is_cpu_device(dev))
357 return;
358
359 /*
360 * The mutex separates all register/unregister requests and protects
361 * from potential clean-up/setup issues in the debugfs directories.
362 * The debugfs directory name is the same as device's name.
363 */
364 mutex_lock(&em_pd_mutex);
365 em_debug_remove_pd(dev);
366
367 kfree(dev->em_pd->table);
368 kfree(dev->em_pd);
369 dev->em_pd = NULL;
370 mutex_unlock(&em_pd_mutex);
371 }
372 EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
373