• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * itmt.c: Support Intel Turbo Boost Max Technology 3.0
4   *
5   * (C) Copyright 2016 Intel Corporation
6   * Author: Tim Chen <tim.c.chen@linux.intel.com>
7   *
8   * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
9   * the maximum turbo frequencies of some cores in a CPU package may be
10   * higher than for the other cores in the same package.  In that case,
11   * better performance can be achieved by making the scheduler prefer
12   * to run tasks on the CPUs with higher max turbo frequencies.
13   *
14   * This file provides functions and data structures for enabling the
15   * scheduler to favor scheduling on cores can be boosted to a higher
16   * frequency under ITMT.
17   */
18  
19  #include <linux/sched.h>
20  #include <linux/cpumask.h>
21  #include <linux/cpuset.h>
22  #include <linux/mutex.h>
23  #include <linux/sysctl.h>
24  #include <linux/nodemask.h>
25  
26  static DEFINE_MUTEX(itmt_update_mutex);
27  DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
28  
29  /* Boolean to track if system has ITMT capabilities */
30  static bool __read_mostly sched_itmt_capable;
31  
32  /*
33   * Boolean to control whether we want to move processes to cpu capable
34   * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
35   * Technology 3.0.
36   *
37   * It can be set via /proc/sys/kernel/sched_itmt_enabled
38   */
39  unsigned int __read_mostly sysctl_sched_itmt_enabled;
40  
sched_itmt_update_handler(struct ctl_table * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)41  static int sched_itmt_update_handler(struct ctl_table *table, int write,
42  				     void __user *buffer, size_t *lenp,
43  				     loff_t *ppos)
44  {
45  	unsigned int old_sysctl;
46  	int ret;
47  
48  	mutex_lock(&itmt_update_mutex);
49  
50  	if (!sched_itmt_capable) {
51  		mutex_unlock(&itmt_update_mutex);
52  		return -EINVAL;
53  	}
54  
55  	old_sysctl = sysctl_sched_itmt_enabled;
56  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
57  
58  	if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
59  		x86_topology_update = true;
60  		rebuild_sched_domains();
61  	}
62  
63  	mutex_unlock(&itmt_update_mutex);
64  
65  	return ret;
66  }
67  
68  static struct ctl_table itmt_kern_table[] = {
69  	{
70  		.procname	= "sched_itmt_enabled",
71  		.data		= &sysctl_sched_itmt_enabled,
72  		.maxlen		= sizeof(unsigned int),
73  		.mode		= 0644,
74  		.proc_handler	= sched_itmt_update_handler,
75  		.extra1		= SYSCTL_ZERO,
76  		.extra2		= SYSCTL_ONE,
77  	},
78  	{}
79  };
80  
81  static struct ctl_table itmt_root_table[] = {
82  	{
83  		.procname	= "kernel",
84  		.mode		= 0555,
85  		.child		= itmt_kern_table,
86  	},
87  	{}
88  };
89  
90  static struct ctl_table_header *itmt_sysctl_header;
91  
92  /**
93   * sched_set_itmt_support() - Indicate platform supports ITMT
94   *
95   * This function is used by the OS to indicate to scheduler that the platform
96   * is capable of supporting the ITMT feature.
97   *
98   * The current scheme has the pstate driver detects if the system
99   * is ITMT capable and call sched_set_itmt_support.
100   *
101   * This must be done only after sched_set_itmt_core_prio
102   * has been called to set the cpus' priorities.
103   * It must not be called with cpu hot plug lock
104   * held as we need to acquire the lock to rebuild sched domains
105   * later.
106   *
107   * Return: 0 on success
108   */
sched_set_itmt_support(void)109  int sched_set_itmt_support(void)
110  {
111  	mutex_lock(&itmt_update_mutex);
112  
113  	if (sched_itmt_capable) {
114  		mutex_unlock(&itmt_update_mutex);
115  		return 0;
116  	}
117  
118  	itmt_sysctl_header = register_sysctl_table(itmt_root_table);
119  	if (!itmt_sysctl_header) {
120  		mutex_unlock(&itmt_update_mutex);
121  		return -ENOMEM;
122  	}
123  
124  	sched_itmt_capable = true;
125  
126  	sysctl_sched_itmt_enabled = 1;
127  
128  	x86_topology_update = true;
129  	rebuild_sched_domains();
130  
131  	mutex_unlock(&itmt_update_mutex);
132  
133  	return 0;
134  }
135  
136  /**
137   * sched_clear_itmt_support() - Revoke platform's support of ITMT
138   *
139   * This function is used by the OS to indicate that it has
140   * revoked the platform's support of ITMT feature.
141   *
142   * It must not be called with cpu hot plug lock
143   * held as we need to acquire the lock to rebuild sched domains
144   * later.
145   */
sched_clear_itmt_support(void)146  void sched_clear_itmt_support(void)
147  {
148  	mutex_lock(&itmt_update_mutex);
149  
150  	if (!sched_itmt_capable) {
151  		mutex_unlock(&itmt_update_mutex);
152  		return;
153  	}
154  	sched_itmt_capable = false;
155  
156  	if (itmt_sysctl_header) {
157  		unregister_sysctl_table(itmt_sysctl_header);
158  		itmt_sysctl_header = NULL;
159  	}
160  
161  	if (sysctl_sched_itmt_enabled) {
162  		/* disable sched_itmt if we are no longer ITMT capable */
163  		sysctl_sched_itmt_enabled = 0;
164  		x86_topology_update = true;
165  		rebuild_sched_domains();
166  	}
167  
168  	mutex_unlock(&itmt_update_mutex);
169  }
170  
arch_asym_cpu_priority(int cpu)171  int arch_asym_cpu_priority(int cpu)
172  {
173  	return per_cpu(sched_core_priority, cpu);
174  }
175  
176  /**
177   * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
178   * @prio:	Priority of cpu core
179   * @core_cpu:	The cpu number associated with the core
180   *
181   * The pstate driver will find out the max boost frequency
182   * and call this function to set a priority proportional
183   * to the max boost frequency. CPU with higher boost
184   * frequency will receive higher priority.
185   *
186   * No need to rebuild sched domain after updating
187   * the CPU priorities. The sched domains have no
188   * dependency on CPU priorities.
189   */
sched_set_itmt_core_prio(int prio,int core_cpu)190  void sched_set_itmt_core_prio(int prio, int core_cpu)
191  {
192  	int cpu, i = 1;
193  
194  	for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
195  		int smt_prio;
196  
197  		/*
198  		 * Ensure that the siblings are moved to the end
199  		 * of the priority chain and only used when
200  		 * all other high priority cpus are out of capacity.
201  		 */
202  		smt_prio = prio * smp_num_siblings / i;
203  		per_cpu(sched_core_priority, cpu) = smt_prio;
204  		i++;
205  	}
206  }
207