• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Energy Model of devices
4  *
5  * Copyright (c) 2018-2020, Arm ltd.
6  * Written by: Quentin Perret, Arm ltd.
7  * Improvements provided by: Lukasz Luba, Arm ltd.
8  */
9 
10 #define pr_fmt(fmt) "energy_model: " fmt
11 
12 #include <linux/cpu.h>
13 #include <linux/cpumask.h>
14 #include <linux/debugfs.h>
15 #include <linux/energy_model.h>
16 #include <linux/sched/topology.h>
17 #include <linux/slab.h>
18 
19 #define FILE_PROPERTY 0444
20 
21 /*
22  * Mutex serializing the registrations of performance domains and letting
23  * callbacks defined by drivers sleep.
24  */
25 static DEFINE_MUTEX(em_pd_mutex);
26 
_is_cpu_device(struct device * dev)27 static bool _is_cpu_device(struct device *dev)
28 {
29     return (dev->bus == &cpu_subsys);
30 }
31 
32 #ifdef CONFIG_DEBUG_FS
33 static struct dentry *rootdir;
34 
em_debug_create_ps(struct em_perf_state * ps,struct dentry * pd)35 static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
36 {
37     struct dentry *d;
38     char name[24];
39 
40     snprintf(name, sizeof(name), "ps:%lu", ps->frequency);
41 
42     /* Create per-ps directory */
43     d = debugfs_create_dir(name, pd);
44     debugfs_create_ulong("frequency", FILE_PROPERTY, d, &ps->frequency);
45     debugfs_create_ulong("power", FILE_PROPERTY, d, &ps->power);
46     debugfs_create_ulong("cost", FILE_PROPERTY, d, &ps->cost);
47 }
48 
em_debug_cpus_show(struct seq_file * s,void * unused)49 static int em_debug_cpus_show(struct seq_file *s, void *unused)
50 {
51     seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private)));
52 
53     return 0;
54 }
55 DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
56 
em_debug_units_show(struct seq_file * s,void * unused)57 static int em_debug_units_show(struct seq_file *s, void *unused)
58 {
59     struct em_perf_domain *pd = s->private;
60     char *units = pd->milliwatts ? "milliWatts" : "bogoWatts";
61 
62     seq_printf(s, "%s\n", units);
63 
64     return 0;
65 }
66 DEFINE_SHOW_ATTRIBUTE(em_debug_units);
67 
em_debug_create_pd(struct device * dev)68 static void em_debug_create_pd(struct device *dev)
69 {
70     struct dentry *d;
71     int i;
72 
73     /* Create the directory of the performance domain */
74     d = debugfs_create_dir(dev_name(dev), rootdir);
75 
76     if (_is_cpu_device(dev)) {
77         debugfs_create_file("cpus", FILE_PROPERTY, d, dev->em_pd->cpus, &em_debug_cpus_fops);
78     }
79 
80     debugfs_create_file("units", FILE_PROPERTY, d, dev->em_pd, &em_debug_units_fops);
81 
82     /* Create a sub-directory for each performance state */
83     for (i = 0; i < dev->em_pd->nr_perf_states; i++) {
84         em_debug_create_ps(&dev->em_pd->table[i], d);
85     }
86 }
87 
em_debug_remove_pd(struct device * dev)88 static void em_debug_remove_pd(struct device *dev)
89 {
90     struct dentry *debug_dir;
91 
92     debug_dir = debugfs_lookup(dev_name(dev), rootdir);
93     debugfs_remove_recursive(debug_dir);
94 }
95 
em_debug_init(void)96 static int __init em_debug_init(void)
97 {
98     /* Create /sys/kernel/debug/energy_model directory */
99     rootdir = debugfs_create_dir("energy_model", NULL);
100 
101     return 0;
102 }
103 fs_initcall(em_debug_init);
104 #else /* CONFIG_DEBUG_FS */
em_debug_create_pd(struct device * dev)105 static void em_debug_create_pd(struct device *dev)
106 {
107 }
em_debug_remove_pd(struct device * dev)108 static void em_debug_remove_pd(struct device *dev)
109 {
110 }
111 #endif
112 
em_create_perf_table(struct device * dev,struct em_perf_domain * pd,int nr_states,struct em_data_callback * cb)113 static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, int nr_states,
114                                 struct em_data_callback *cb)
115 {
116     unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
117     struct em_perf_state *table;
118     int i, ret;
119     u64 fmax;
120 
121     table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
122     if (!table) {
123         return -ENOMEM;
124     }
125 
126     /* Build the list of performance states for this performance domain */
127     for (i = 0, freq = 0; i < nr_states; i++, freq++) {
128         /*
129          * active_power() is a driver callback which ceils 'freq' to
130          * lowest performance state of 'dev' above 'freq' and updates
131          * 'power' and 'freq' accordingly.
132          */
133         ret = cb->active_power(&power, &freq, dev);
134         if (ret) {
135             dev_err(dev, "EM: invalid perf. state: %d\n", ret);
136             goto free_ps_table;
137         }
138 
139         /*
140          * We expect the driver callback to increase the frequency for
141          * higher performance states.
142          */
143         if (freq <= prev_freq) {
144             dev_err(dev, "EM: non-increasing freq: %lu\n", freq);
145             goto free_ps_table;
146         }
147 
148         /*
149          * The power returned by active_state() is expected to be
150          * positive, in milli-watts and to fit into 16 bits.
151          */
152         if (!power || power > EM_MAX_POWER) {
153             dev_err(dev, "EM: invalid power: %lu\n", power);
154             goto free_ps_table;
155         }
156 
157         table[i].power = power;
158         table[i].frequency = prev_freq = freq;
159         }
160 
161     /* Compute the cost of each performance state. */
162     fmax = (u64)table[nr_states - 1].frequency;
163     for (i = nr_states - 1; i >= 0; i--) {
164         unsigned long power_res = em_scale_power(table[i].power);
165 
166         table[i].cost = div64_u64(fmax * power_res, table[i].frequency);
167         if (table[i].cost >= prev_cost) {
168             dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
169                 table[i].frequency);
170         } else {
171             prev_cost = table[i].cost;
172         }
173     }
174 
175     pd->table = table;
176     pd->nr_perf_states = nr_states;
177 
178     return 0;
179 
180 free_ps_table:
181     kfree(table);
182     return -EINVAL;
183 }
184 
em_create_pd(struct device * dev,int nr_states,struct em_data_callback * cb,cpumask_t * cpus)185 static int em_create_pd(struct device *dev, int nr_states, struct em_data_callback *cb, cpumask_t *cpus)
186 {
187     struct em_perf_domain *pd;
188     struct device *cpu_dev;
189     int cpu, ret;
190 
191     if (_is_cpu_device(dev)) {
192         pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
193         if (!pd) {
194             return -ENOMEM;
195         }
196 
197         cpumask_copy(em_span_cpus(pd), cpus);
198     } else {
199         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
200         if (!pd) {
201             return -ENOMEM;
202         }
203     }
204 
205     ret = em_create_perf_table(dev, pd, nr_states, cb);
206     if (ret) {
207         kfree(pd);
208         return ret;
209     }
210 
211     if (_is_cpu_device(dev)) {
212         for_each_cpu(cpu, cpus)
213         {
214             cpu_dev = get_cpu_device(cpu);
215             cpu_dev->em_pd = pd;
216         }
217     }
218 
219     dev->em_pd = pd;
220 
221     return 0;
222 }
223 
224 /**
225  * em_pd_get() - Return the performance domain for a device
226  * @dev : Device to find the performance domain for
227  *
228  * Returns the performance domain to which @dev belongs, or NULL if it doesn't
229  * exist.
230  */
em_pd_get(struct device * dev)231 struct em_perf_domain *em_pd_get(struct device *dev)
232 {
233     if (IS_ERR_OR_NULL(dev)) {
234         return NULL;
235     }
236 
237     return dev->em_pd;
238 }
239 EXPORT_SYMBOL_GPL(em_pd_get);
240 
241 /**
242  * em_cpu_get() - Return the performance domain for a CPU
243  * @cpu : CPU to find the performance domain for
244  *
245  * Returns the performance domain to which @cpu belongs, or NULL if it doesn't
246  * exist.
247  */
em_cpu_get(int cpu)248 struct em_perf_domain *em_cpu_get(int cpu)
249 {
250     struct device *cpu_dev;
251 
252     cpu_dev = get_cpu_device(cpu);
253     if (!cpu_dev) {
254         return NULL;
255     }
256 
257     return em_pd_get(cpu_dev);
258 }
259 EXPORT_SYMBOL_GPL(em_cpu_get);
260 
261 /**
262  * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device
263  * @dev        : Device for which the EM is to register
264  * @nr_states    : Number of performance states to register
265  * @cb        : Callback functions providing the data of the Energy Model
266  * @cpus    : Pointer to cpumask_t, which in case of a CPU device is
267  *        obligatory. It can be taken from i.e. 'policy->cpus'. For other
268  *        type of devices this should be set to NULL.
269  * @milliwatts    : Flag indicating that the power values are in milliWatts or
270  *        in some other scale. It must be set properly.
271  *
272  * Create Energy Model tables for a performance domain using the callbacks
273  * defined in cb.
274  *
275  * The @milliwatts is important to set with correct value. Some kernel
276  * sub-systems might rely on this flag and check if all devices in the EM are
277  * using the same scale.
278  *
279  * If multiple clients register the same performance domain, all but the first
280  * registration will be ignored.
281  *
282  * Return 0 on success
283  */
em_dev_register_perf_domain(struct device * dev,unsigned int nr_states,struct em_data_callback * cb,cpumask_t * cpus,bool milliwatts)284 int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb,
285                                 cpumask_t *cpus, bool milliwatts)
286 {
287     unsigned long cap, prev_cap = 0;
288     int cpu, ret;
289 
290     if (!dev || !nr_states || !cb) {
291         return -EINVAL;
292     }
293 
294     /*
295      * Use a mutex to serialize the registration of performance domains and
296      * let the driver-defined callback functions sleep.
297      */
298     mutex_lock(&em_pd_mutex);
299 
300     if (dev->em_pd) {
301         ret = -EEXIST;
302         goto unlock;
303     }
304 
305     if (_is_cpu_device(dev)) {
306         if (!cpus) {
307             dev_err(dev, "EM: invalid CPU mask\n");
308             ret = -EINVAL;
309             goto unlock;
310         }
311 
312         for_each_cpu(cpu, cpus)
313         {
314             if (em_cpu_get(cpu)) {
315                 dev_err(dev, "EM: exists for CPU%d\n", cpu);
316                 ret = -EEXIST;
317                 goto unlock;
318             }
319             /*
320              * All CPUs of a domain must have the same
321              * micro-architecture since they all share the same
322              * table.
323              */
324             cap = arch_scale_cpu_capacity(cpu);
325             if (prev_cap && prev_cap != cap) {
326                 dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n", cpumask_pr_args(cpus));
327 
328                 ret = -EINVAL;
329                 goto unlock;
330             }
331             prev_cap = cap;
332         }
333     }
334 
335     ret = em_create_pd(dev, nr_states, cb, cpus);
336     if (ret) {
337         goto unlock;
338     }
339 
340     dev->em_pd->milliwatts = milliwatts;
341 
342     em_debug_create_pd(dev);
343     dev_info(dev, "EM: created perf domain\n");
344 
345 unlock:
346     mutex_unlock(&em_pd_mutex);
347     return ret;
348 }
349 EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
350 
351 /**
352  * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device
353  * @dev        : Device for which the EM is registered
354  *
355  * Unregister the EM for the specified @dev (but not a CPU device).
356  */
em_dev_unregister_perf_domain(struct device * dev)357 void em_dev_unregister_perf_domain(struct device *dev)
358 {
359     if (IS_ERR_OR_NULL(dev) || !dev->em_pd) {
360         return;
361     }
362 
363     if (_is_cpu_device(dev)) {
364         return;
365     }
366 
367     /*
368      * The mutex separates all register/unregister requests and protects
369      * from potential clean-up/setup issues in the debugfs directories.
370      * The debugfs directory name is the same as device's name.
371      */
372     mutex_lock(&em_pd_mutex);
373     em_debug_remove_pd(dev);
374 
375     kfree(dev->em_pd->table);
376     kfree(dev->em_pd);
377     dev->em_pd = NULL;
378     mutex_unlock(&em_pd_mutex);
379 }
380 EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
381