• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *	Routines to identify caches on Intel CPU.
4  *
5  *	Changes:
6  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
7  *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
8  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
9  */
10 
11 #include <linux/slab.h>
12 #include <linux/cacheinfo.h>
13 #include <linux/cpu.h>
14 #include <linux/cpuhotplug.h>
15 #include <linux/sched.h>
16 #include <linux/capability.h>
17 #include <linux/sysfs.h>
18 #include <linux/pci.h>
19 #include <linux/stop_machine.h>
20 
21 #include <asm/cpufeature.h>
22 #include <asm/cacheinfo.h>
23 #include <asm/amd_nb.h>
24 #include <asm/smp.h>
25 #include <asm/mtrr.h>
26 #include <asm/tlbflush.h>
27 
28 #include "cpu.h"
29 
30 #define LVL_1_INST	1
31 #define LVL_1_DATA	2
32 #define LVL_2		3
33 #define LVL_3		4
34 #define LVL_TRACE	5
35 
36 /* Shared last level cache maps */
37 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
38 
39 /* Shared L2 cache maps */
40 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
41 
42 static cpumask_var_t cpu_cacheinfo_mask;
43 
44 /* Kernel controls MTRR and/or PAT MSRs. */
45 unsigned int memory_caching_control __ro_after_init;
46 
47 struct _cache_table {
48 	unsigned char descriptor;
49 	char cache_type;
50 	short size;
51 };
52 
53 #define MB(x)	((x) * 1024)
54 
55 /* All the cache descriptor types we care about (no TLB or
56    trace cache entries) */
57 
58 static const struct _cache_table cache_table[] =
59 {
60 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
61 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
62 	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
63 	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
64 	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
65 	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
66 	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
67 	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
68 	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
69 	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
70 	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
71 	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
72 	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
73 	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
74 	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
75 	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
76 	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
77 	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
78 	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
79 	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
80 	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
81 	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
82 	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
83 	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
84 	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
85 	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
86 	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
87 	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
88 	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
89 	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
90 	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
91 	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
92 	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
93 	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
94 	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
95 	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
96 	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
97 	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
98 	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
99 	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
100 	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
101 	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
102 	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
103 	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
104 	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
105 	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
106 	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
107 	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
108 	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
109 	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
110 	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
111 	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
112 	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
113 	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
114 	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
115 	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
116 	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
117 	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
118 	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
119 	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
120 	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
121 	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
122 	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
123 	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
124 	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
125 	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
126 	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
127 	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
128 	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
129 	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
130 	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
131 	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
132 	{ 0x00, 0, 0}
133 };
134 
135 
136 enum _cache_type {
137 	CTYPE_NULL = 0,
138 	CTYPE_DATA = 1,
139 	CTYPE_INST = 2,
140 	CTYPE_UNIFIED = 3
141 };
142 
143 union _cpuid4_leaf_eax {
144 	struct {
145 		enum _cache_type	type:5;
146 		unsigned int		level:3;
147 		unsigned int		is_self_initializing:1;
148 		unsigned int		is_fully_associative:1;
149 		unsigned int		reserved:4;
150 		unsigned int		num_threads_sharing:12;
151 		unsigned int		num_cores_on_die:6;
152 	} split;
153 	u32 full;
154 };
155 
156 union _cpuid4_leaf_ebx {
157 	struct {
158 		unsigned int		coherency_line_size:12;
159 		unsigned int		physical_line_partition:10;
160 		unsigned int		ways_of_associativity:10;
161 	} split;
162 	u32 full;
163 };
164 
165 union _cpuid4_leaf_ecx {
166 	struct {
167 		unsigned int		number_of_sets:32;
168 	} split;
169 	u32 full;
170 };
171 
172 struct _cpuid4_info_regs {
173 	union _cpuid4_leaf_eax eax;
174 	union _cpuid4_leaf_ebx ebx;
175 	union _cpuid4_leaf_ecx ecx;
176 	unsigned int id;
177 	unsigned long size;
178 	struct amd_northbridge *nb;
179 };
180 
181 static unsigned short num_cache_leaves;
182 
183 /* AMD doesn't have CPUID4. Emulate it here to report the same
184    information to the user.  This makes some assumptions about the machine:
185    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
186 
187    In theory the TLBs could be reported as fake type (they are in "dummy").
188    Maybe later */
189 union l1_cache {
190 	struct {
191 		unsigned line_size:8;
192 		unsigned lines_per_tag:8;
193 		unsigned assoc:8;
194 		unsigned size_in_kb:8;
195 	};
196 	unsigned val;
197 };
198 
199 union l2_cache {
200 	struct {
201 		unsigned line_size:8;
202 		unsigned lines_per_tag:4;
203 		unsigned assoc:4;
204 		unsigned size_in_kb:16;
205 	};
206 	unsigned val;
207 };
208 
209 union l3_cache {
210 	struct {
211 		unsigned line_size:8;
212 		unsigned lines_per_tag:4;
213 		unsigned assoc:4;
214 		unsigned res:2;
215 		unsigned size_encoded:14;
216 	};
217 	unsigned val;
218 };
219 
220 static const unsigned short assocs[] = {
221 	[1] = 1,
222 	[2] = 2,
223 	[4] = 4,
224 	[6] = 8,
225 	[8] = 16,
226 	[0xa] = 32,
227 	[0xb] = 48,
228 	[0xc] = 64,
229 	[0xd] = 96,
230 	[0xe] = 128,
231 	[0xf] = 0xffff /* fully associative - no way to show this currently */
232 };
233 
234 static const unsigned char levels[] = { 1, 1, 2, 3 };
235 static const unsigned char types[] = { 1, 2, 3, 3 };
236 
237 static const enum cache_type cache_type_map[] = {
238 	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
239 	[CTYPE_DATA] = CACHE_TYPE_DATA,
240 	[CTYPE_INST] = CACHE_TYPE_INST,
241 	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
242 };
243 
244 static void
amd_cpuid4(int leaf,union _cpuid4_leaf_eax * eax,union _cpuid4_leaf_ebx * ebx,union _cpuid4_leaf_ecx * ecx)245 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
246 		     union _cpuid4_leaf_ebx *ebx,
247 		     union _cpuid4_leaf_ecx *ecx)
248 {
249 	unsigned dummy;
250 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
251 	union l1_cache l1i, l1d;
252 	union l2_cache l2;
253 	union l3_cache l3;
254 	union l1_cache *l1 = &l1d;
255 
256 	eax->full = 0;
257 	ebx->full = 0;
258 	ecx->full = 0;
259 
260 	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
261 	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
262 
263 	switch (leaf) {
264 	case 1:
265 		l1 = &l1i;
266 		fallthrough;
267 	case 0:
268 		if (!l1->val)
269 			return;
270 		assoc = assocs[l1->assoc];
271 		line_size = l1->line_size;
272 		lines_per_tag = l1->lines_per_tag;
273 		size_in_kb = l1->size_in_kb;
274 		break;
275 	case 2:
276 		if (!l2.val)
277 			return;
278 		assoc = assocs[l2.assoc];
279 		line_size = l2.line_size;
280 		lines_per_tag = l2.lines_per_tag;
281 		/* cpu_data has errata corrections for K7 applied */
282 		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
283 		break;
284 	case 3:
285 		if (!l3.val)
286 			return;
287 		assoc = assocs[l3.assoc];
288 		line_size = l3.line_size;
289 		lines_per_tag = l3.lines_per_tag;
290 		size_in_kb = l3.size_encoded * 512;
291 		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
292 			size_in_kb = size_in_kb >> 1;
293 			assoc = assoc >> 1;
294 		}
295 		break;
296 	default:
297 		return;
298 	}
299 
300 	eax->split.is_self_initializing = 1;
301 	eax->split.type = types[leaf];
302 	eax->split.level = levels[leaf];
303 	eax->split.num_threads_sharing = 0;
304 	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
305 
306 
307 	if (assoc == 0xffff)
308 		eax->split.is_fully_associative = 1;
309 	ebx->split.coherency_line_size = line_size - 1;
310 	ebx->split.ways_of_associativity = assoc - 1;
311 	ebx->split.physical_line_partition = lines_per_tag - 1;
312 	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
313 		(ebx->split.ways_of_associativity + 1) - 1;
314 }
315 
316 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
317 
318 /*
319  * L3 cache descriptors
320  */
amd_calc_l3_indices(struct amd_northbridge * nb)321 static void amd_calc_l3_indices(struct amd_northbridge *nb)
322 {
323 	struct amd_l3_cache *l3 = &nb->l3_cache;
324 	unsigned int sc0, sc1, sc2, sc3;
325 	u32 val = 0;
326 
327 	pci_read_config_dword(nb->misc, 0x1C4, &val);
328 
329 	/* calculate subcache sizes */
330 	l3->subcaches[0] = sc0 = !(val & BIT(0));
331 	l3->subcaches[1] = sc1 = !(val & BIT(4));
332 
333 	if (boot_cpu_data.x86 == 0x15) {
334 		l3->subcaches[0] = sc0 += !(val & BIT(1));
335 		l3->subcaches[1] = sc1 += !(val & BIT(5));
336 	}
337 
338 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
339 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
340 
341 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
342 }
343 
344 /*
345  * check whether a slot used for disabling an L3 index is occupied.
346  * @l3: L3 cache descriptor
347  * @slot: slot number (0..1)
348  *
349  * @returns: the disabled index if used or negative value if slot free.
350  */
amd_get_l3_disable_slot(struct amd_northbridge * nb,unsigned slot)351 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
352 {
353 	unsigned int reg = 0;
354 
355 	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
356 
357 	/* check whether this slot is activated already */
358 	if (reg & (3UL << 30))
359 		return reg & 0xfff;
360 
361 	return -1;
362 }
363 
show_cache_disable(struct cacheinfo * this_leaf,char * buf,unsigned int slot)364 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
365 				  unsigned int slot)
366 {
367 	int index;
368 	struct amd_northbridge *nb = this_leaf->priv;
369 
370 	index = amd_get_l3_disable_slot(nb, slot);
371 	if (index >= 0)
372 		return sprintf(buf, "%d\n", index);
373 
374 	return sprintf(buf, "FREE\n");
375 }
376 
377 #define SHOW_CACHE_DISABLE(slot)					\
378 static ssize_t								\
379 cache_disable_##slot##_show(struct device *dev,				\
380 			    struct device_attribute *attr, char *buf)	\
381 {									\
382 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
383 	return show_cache_disable(this_leaf, buf, slot);		\
384 }
385 SHOW_CACHE_DISABLE(0)
386 SHOW_CACHE_DISABLE(1)
387 
amd_l3_disable_index(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long idx)388 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
389 				 unsigned slot, unsigned long idx)
390 {
391 	int i;
392 
393 	idx |= BIT(30);
394 
395 	/*
396 	 *  disable index in all 4 subcaches
397 	 */
398 	for (i = 0; i < 4; i++) {
399 		u32 reg = idx | (i << 20);
400 
401 		if (!nb->l3_cache.subcaches[i])
402 			continue;
403 
404 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
405 
406 		/*
407 		 * We need to WBINVD on a core on the node containing the L3
408 		 * cache which indices we disable therefore a simple wbinvd()
409 		 * is not sufficient.
410 		 */
411 		wbinvd_on_cpu(cpu);
412 
413 		reg |= BIT(31);
414 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
415 	}
416 }
417 
418 /*
419  * disable a L3 cache index by using a disable-slot
420  *
421  * @l3:    L3 cache descriptor
422  * @cpu:   A CPU on the node containing the L3 cache
423  * @slot:  slot number (0..1)
424  * @index: index to disable
425  *
426  * @return: 0 on success, error status on failure
427  */
amd_set_l3_disable_slot(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long index)428 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
429 			    unsigned slot, unsigned long index)
430 {
431 	int ret = 0;
432 
433 	/*  check if @slot is already used or the index is already disabled */
434 	ret = amd_get_l3_disable_slot(nb, slot);
435 	if (ret >= 0)
436 		return -EEXIST;
437 
438 	if (index > nb->l3_cache.indices)
439 		return -EINVAL;
440 
441 	/* check whether the other slot has disabled the same index already */
442 	if (index == amd_get_l3_disable_slot(nb, !slot))
443 		return -EEXIST;
444 
445 	amd_l3_disable_index(nb, cpu, slot, index);
446 
447 	return 0;
448 }
449 
store_cache_disable(struct cacheinfo * this_leaf,const char * buf,size_t count,unsigned int slot)450 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
451 				   const char *buf, size_t count,
452 				   unsigned int slot)
453 {
454 	unsigned long val = 0;
455 	int cpu, err = 0;
456 	struct amd_northbridge *nb = this_leaf->priv;
457 
458 	if (!capable(CAP_SYS_ADMIN))
459 		return -EPERM;
460 
461 	cpu = cpumask_first(&this_leaf->shared_cpu_map);
462 
463 	if (kstrtoul(buf, 10, &val) < 0)
464 		return -EINVAL;
465 
466 	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
467 	if (err) {
468 		if (err == -EEXIST)
469 			pr_warn("L3 slot %d in use/index already disabled!\n",
470 				   slot);
471 		return err;
472 	}
473 	return count;
474 }
475 
476 #define STORE_CACHE_DISABLE(slot)					\
477 static ssize_t								\
478 cache_disable_##slot##_store(struct device *dev,			\
479 			     struct device_attribute *attr,		\
480 			     const char *buf, size_t count)		\
481 {									\
482 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
483 	return store_cache_disable(this_leaf, buf, count, slot);	\
484 }
485 STORE_CACHE_DISABLE(0)
486 STORE_CACHE_DISABLE(1)
487 
subcaches_show(struct device * dev,struct device_attribute * attr,char * buf)488 static ssize_t subcaches_show(struct device *dev,
489 			      struct device_attribute *attr, char *buf)
490 {
491 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
492 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
493 
494 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
495 }
496 
subcaches_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)497 static ssize_t subcaches_store(struct device *dev,
498 			       struct device_attribute *attr,
499 			       const char *buf, size_t count)
500 {
501 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
502 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
503 	unsigned long val;
504 
505 	if (!capable(CAP_SYS_ADMIN))
506 		return -EPERM;
507 
508 	if (kstrtoul(buf, 16, &val) < 0)
509 		return -EINVAL;
510 
511 	if (amd_set_subcaches(cpu, val))
512 		return -EINVAL;
513 
514 	return count;
515 }
516 
517 static DEVICE_ATTR_RW(cache_disable_0);
518 static DEVICE_ATTR_RW(cache_disable_1);
519 static DEVICE_ATTR_RW(subcaches);
520 
521 static umode_t
cache_private_attrs_is_visible(struct kobject * kobj,struct attribute * attr,int unused)522 cache_private_attrs_is_visible(struct kobject *kobj,
523 			       struct attribute *attr, int unused)
524 {
525 	struct device *dev = kobj_to_dev(kobj);
526 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
527 	umode_t mode = attr->mode;
528 
529 	if (!this_leaf->priv)
530 		return 0;
531 
532 	if ((attr == &dev_attr_subcaches.attr) &&
533 	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
534 		return mode;
535 
536 	if ((attr == &dev_attr_cache_disable_0.attr ||
537 	     attr == &dev_attr_cache_disable_1.attr) &&
538 	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
539 		return mode;
540 
541 	return 0;
542 }
543 
544 static struct attribute_group cache_private_group = {
545 	.is_visible = cache_private_attrs_is_visible,
546 };
547 
init_amd_l3_attrs(void)548 static void init_amd_l3_attrs(void)
549 {
550 	int n = 1;
551 	static struct attribute **amd_l3_attrs;
552 
553 	if (amd_l3_attrs) /* already initialized */
554 		return;
555 
556 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
557 		n += 2;
558 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
559 		n += 1;
560 
561 	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
562 	if (!amd_l3_attrs)
563 		return;
564 
565 	n = 0;
566 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
567 		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
568 		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
569 	}
570 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
571 		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
572 
573 	cache_private_group.attrs = amd_l3_attrs;
574 }
575 
576 const struct attribute_group *
cache_get_priv_group(struct cacheinfo * this_leaf)577 cache_get_priv_group(struct cacheinfo *this_leaf)
578 {
579 	struct amd_northbridge *nb = this_leaf->priv;
580 
581 	if (this_leaf->level < 3 || !nb)
582 		return NULL;
583 
584 	if (nb && nb->l3_cache.indices)
585 		init_amd_l3_attrs();
586 
587 	return &cache_private_group;
588 }
589 
amd_init_l3_cache(struct _cpuid4_info_regs * this_leaf,int index)590 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
591 {
592 	int node;
593 
594 	/* only for L3, and not in virtualized environments */
595 	if (index < 3)
596 		return;
597 
598 	node = topology_die_id(smp_processor_id());
599 	this_leaf->nb = node_to_amd_nb(node);
600 	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
601 		amd_calc_l3_indices(this_leaf->nb);
602 }
603 #else
604 #define amd_init_l3_cache(x, y)
605 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
606 
607 static int
cpuid4_cache_lookup_regs(int index,struct _cpuid4_info_regs * this_leaf)608 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
609 {
610 	union _cpuid4_leaf_eax	eax;
611 	union _cpuid4_leaf_ebx	ebx;
612 	union _cpuid4_leaf_ecx	ecx;
613 	unsigned		edx;
614 
615 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
616 		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
617 			cpuid_count(0x8000001d, index, &eax.full,
618 				    &ebx.full, &ecx.full, &edx);
619 		else
620 			amd_cpuid4(index, &eax, &ebx, &ecx);
621 		amd_init_l3_cache(this_leaf, index);
622 	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
623 		cpuid_count(0x8000001d, index, &eax.full,
624 			    &ebx.full, &ecx.full, &edx);
625 		amd_init_l3_cache(this_leaf, index);
626 	} else {
627 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
628 	}
629 
630 	if (eax.split.type == CTYPE_NULL)
631 		return -EIO; /* better error ? */
632 
633 	this_leaf->eax = eax;
634 	this_leaf->ebx = ebx;
635 	this_leaf->ecx = ecx;
636 	this_leaf->size = (ecx.split.number_of_sets          + 1) *
637 			  (ebx.split.coherency_line_size     + 1) *
638 			  (ebx.split.physical_line_partition + 1) *
639 			  (ebx.split.ways_of_associativity   + 1);
640 	return 0;
641 }
642 
find_num_cache_leaves(struct cpuinfo_x86 * c)643 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
644 {
645 	unsigned int		eax, ebx, ecx, edx, op;
646 	union _cpuid4_leaf_eax	cache_eax;
647 	int 			i = -1;
648 
649 	if (c->x86_vendor == X86_VENDOR_AMD ||
650 	    c->x86_vendor == X86_VENDOR_HYGON)
651 		op = 0x8000001d;
652 	else
653 		op = 4;
654 
655 	do {
656 		++i;
657 		/* Do cpuid(op) loop to find out num_cache_leaves */
658 		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
659 		cache_eax.full = eax;
660 	} while (cache_eax.split.type != CTYPE_NULL);
661 	return i;
662 }
663 
cacheinfo_amd_init_llc_id(struct cpuinfo_x86 * c,int cpu)664 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)
665 {
666 	/*
667 	 * We may have multiple LLCs if L3 caches exist, so check if we
668 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
669 	 */
670 	if (!cpuid_edx(0x80000006))
671 		return;
672 
673 	if (c->x86 < 0x17) {
674 		/* LLC is at the node level. */
675 		per_cpu(cpu_llc_id, cpu) = c->cpu_die_id;
676 	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
677 		/*
678 		 * LLC is at the core complex level.
679 		 * Core complex ID is ApicId[3] for these processors.
680 		 */
681 		per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
682 	} else {
683 		/*
684 		 * LLC ID is calculated from the number of threads sharing the
685 		 * cache.
686 		 * */
687 		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
688 		u32 llc_index = find_num_cache_leaves(c) - 1;
689 
690 		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
691 		if (eax)
692 			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
693 
694 		if (num_sharing_cache) {
695 			int bits = get_count_order(num_sharing_cache);
696 
697 			per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
698 		}
699 	}
700 }
701 
cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 * c,int cpu)702 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
703 {
704 	/*
705 	 * We may have multiple LLCs if L3 caches exist, so check if we
706 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
707 	 */
708 	if (!cpuid_edx(0x80000006))
709 		return;
710 
711 	/*
712 	 * LLC is at the core complex level.
713 	 * Core complex ID is ApicId[3] for these processors.
714 	 */
715 	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
716 }
717 
init_amd_cacheinfo(struct cpuinfo_x86 * c)718 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
719 {
720 
721 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
722 		num_cache_leaves = find_num_cache_leaves(c);
723 	} else if (c->extended_cpuid_level >= 0x80000006) {
724 		if (cpuid_edx(0x80000006) & 0xf000)
725 			num_cache_leaves = 4;
726 		else
727 			num_cache_leaves = 3;
728 	}
729 }
730 
init_hygon_cacheinfo(struct cpuinfo_x86 * c)731 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
732 {
733 	num_cache_leaves = find_num_cache_leaves(c);
734 }
735 
init_intel_cacheinfo(struct cpuinfo_x86 * c)736 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
737 {
738 	/* Cache sizes */
739 	unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0;
740 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
741 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
742 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
743 #ifdef CONFIG_SMP
744 	unsigned int cpu = c->cpu_index;
745 #endif
746 
747 	if (c->cpuid_level > 3) {
748 		static int is_initialized;
749 
750 		if (is_initialized == 0) {
751 			/* Init num_cache_leaves from boot CPU */
752 			num_cache_leaves = find_num_cache_leaves(c);
753 			is_initialized++;
754 		}
755 
756 		/*
757 		 * Whenever possible use cpuid(4), deterministic cache
758 		 * parameters cpuid leaf to find the cache details
759 		 */
760 		for (i = 0; i < num_cache_leaves; i++) {
761 			struct _cpuid4_info_regs this_leaf = {};
762 			int retval;
763 
764 			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
765 			if (retval < 0)
766 				continue;
767 
768 			switch (this_leaf.eax.split.level) {
769 			case 1:
770 				if (this_leaf.eax.split.type == CTYPE_DATA)
771 					new_l1d = this_leaf.size/1024;
772 				else if (this_leaf.eax.split.type == CTYPE_INST)
773 					new_l1i = this_leaf.size/1024;
774 				break;
775 			case 2:
776 				new_l2 = this_leaf.size/1024;
777 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
778 				index_msb = get_count_order(num_threads_sharing);
779 				l2_id = c->apicid & ~((1 << index_msb) - 1);
780 				break;
781 			case 3:
782 				new_l3 = this_leaf.size/1024;
783 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
784 				index_msb = get_count_order(num_threads_sharing);
785 				l3_id = c->apicid & ~((1 << index_msb) - 1);
786 				break;
787 			default:
788 				break;
789 			}
790 		}
791 	}
792 	/*
793 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
794 	 * trace cache
795 	 */
796 	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
797 		/* supports eax=2  call */
798 		int j, n;
799 		unsigned int regs[4];
800 		unsigned char *dp = (unsigned char *)regs;
801 		int only_trace = 0;
802 
803 		if (num_cache_leaves != 0 && c->x86 == 15)
804 			only_trace = 1;
805 
806 		/* Number of times to iterate */
807 		n = cpuid_eax(2) & 0xFF;
808 
809 		for (i = 0 ; i < n ; i++) {
810 			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
811 
812 			/* If bit 31 is set, this is an unknown format */
813 			for (j = 0 ; j < 3 ; j++)
814 				if (regs[j] & (1 << 31))
815 					regs[j] = 0;
816 
817 			/* Byte 0 is level count, not a descriptor */
818 			for (j = 1 ; j < 16 ; j++) {
819 				unsigned char des = dp[j];
820 				unsigned char k = 0;
821 
822 				/* look up this descriptor in the table */
823 				while (cache_table[k].descriptor != 0) {
824 					if (cache_table[k].descriptor == des) {
825 						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
826 							break;
827 						switch (cache_table[k].cache_type) {
828 						case LVL_1_INST:
829 							l1i += cache_table[k].size;
830 							break;
831 						case LVL_1_DATA:
832 							l1d += cache_table[k].size;
833 							break;
834 						case LVL_2:
835 							l2 += cache_table[k].size;
836 							break;
837 						case LVL_3:
838 							l3 += cache_table[k].size;
839 							break;
840 						}
841 
842 						break;
843 					}
844 
845 					k++;
846 				}
847 			}
848 		}
849 	}
850 
851 	if (new_l1d)
852 		l1d = new_l1d;
853 
854 	if (new_l1i)
855 		l1i = new_l1i;
856 
857 	if (new_l2) {
858 		l2 = new_l2;
859 #ifdef CONFIG_SMP
860 		per_cpu(cpu_llc_id, cpu) = l2_id;
861 		per_cpu(cpu_l2c_id, cpu) = l2_id;
862 #endif
863 	}
864 
865 	if (new_l3) {
866 		l3 = new_l3;
867 #ifdef CONFIG_SMP
868 		per_cpu(cpu_llc_id, cpu) = l3_id;
869 #endif
870 	}
871 
872 #ifdef CONFIG_SMP
873 	/*
874 	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
875 	 * turns means that the only possibility is SMT (as indicated in
876 	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
877 	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
878 	 * c->phys_proc_id.
879 	 */
880 	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
881 		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
882 #endif
883 
884 	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
885 
886 	if (!l2)
887 		cpu_detect_cache_sizes(c);
888 }
889 
__cache_amd_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)890 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
891 				    struct _cpuid4_info_regs *base)
892 {
893 	struct cpu_cacheinfo *this_cpu_ci;
894 	struct cacheinfo *this_leaf;
895 	int i, sibling;
896 
897 	/*
898 	 * For L3, always use the pre-calculated cpu_llc_shared_mask
899 	 * to derive shared_cpu_map.
900 	 */
901 	if (index == 3) {
902 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
903 			this_cpu_ci = get_cpu_cacheinfo(i);
904 			if (!this_cpu_ci->info_list)
905 				continue;
906 			this_leaf = this_cpu_ci->info_list + index;
907 			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
908 				if (!cpu_online(sibling))
909 					continue;
910 				cpumask_set_cpu(sibling,
911 						&this_leaf->shared_cpu_map);
912 			}
913 		}
914 	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
915 		unsigned int apicid, nshared, first, last;
916 
917 		nshared = base->eax.split.num_threads_sharing + 1;
918 		apicid = cpu_data(cpu).apicid;
919 		first = apicid - (apicid % nshared);
920 		last = first + nshared - 1;
921 
922 		for_each_online_cpu(i) {
923 			this_cpu_ci = get_cpu_cacheinfo(i);
924 			if (!this_cpu_ci->info_list)
925 				continue;
926 
927 			apicid = cpu_data(i).apicid;
928 			if ((apicid < first) || (apicid > last))
929 				continue;
930 
931 			this_leaf = this_cpu_ci->info_list + index;
932 
933 			for_each_online_cpu(sibling) {
934 				apicid = cpu_data(sibling).apicid;
935 				if ((apicid < first) || (apicid > last))
936 					continue;
937 				cpumask_set_cpu(sibling,
938 						&this_leaf->shared_cpu_map);
939 			}
940 		}
941 	} else
942 		return 0;
943 
944 	return 1;
945 }
946 
__cache_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)947 static void __cache_cpumap_setup(unsigned int cpu, int index,
948 				 struct _cpuid4_info_regs *base)
949 {
950 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
951 	struct cacheinfo *this_leaf, *sibling_leaf;
952 	unsigned long num_threads_sharing;
953 	int index_msb, i;
954 	struct cpuinfo_x86 *c = &cpu_data(cpu);
955 
956 	if (c->x86_vendor == X86_VENDOR_AMD ||
957 	    c->x86_vendor == X86_VENDOR_HYGON) {
958 		if (__cache_amd_cpumap_setup(cpu, index, base))
959 			return;
960 	}
961 
962 	this_leaf = this_cpu_ci->info_list + index;
963 	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
964 
965 	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
966 	if (num_threads_sharing == 1)
967 		return;
968 
969 	index_msb = get_count_order(num_threads_sharing);
970 
971 	for_each_online_cpu(i)
972 		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
973 			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
974 
975 			if (i == cpu || !sib_cpu_ci->info_list)
976 				continue;/* skip if itself or no cacheinfo */
977 			sibling_leaf = sib_cpu_ci->info_list + index;
978 			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
979 			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
980 		}
981 }
982 
ci_leaf_init(struct cacheinfo * this_leaf,struct _cpuid4_info_regs * base)983 static void ci_leaf_init(struct cacheinfo *this_leaf,
984 			 struct _cpuid4_info_regs *base)
985 {
986 	this_leaf->id = base->id;
987 	this_leaf->attributes = CACHE_ID;
988 	this_leaf->level = base->eax.split.level;
989 	this_leaf->type = cache_type_map[base->eax.split.type];
990 	this_leaf->coherency_line_size =
991 				base->ebx.split.coherency_line_size + 1;
992 	this_leaf->ways_of_associativity =
993 				base->ebx.split.ways_of_associativity + 1;
994 	this_leaf->size = base->size;
995 	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
996 	this_leaf->physical_line_partition =
997 				base->ebx.split.physical_line_partition + 1;
998 	this_leaf->priv = base->nb;
999 }
1000 
init_cache_level(unsigned int cpu)1001 int init_cache_level(unsigned int cpu)
1002 {
1003 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1004 
1005 	if (!num_cache_leaves)
1006 		return -ENOENT;
1007 	if (!this_cpu_ci)
1008 		return -EINVAL;
1009 	this_cpu_ci->num_levels = 3;
1010 	this_cpu_ci->num_leaves = num_cache_leaves;
1011 	return 0;
1012 }
1013 
1014 /*
1015  * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1016  * ECX as cache index. Then right shift apicid by the number's order to get
1017  * cache id for this cache node.
1018  */
get_cache_id(int cpu,struct _cpuid4_info_regs * id4_regs)1019 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1020 {
1021 	struct cpuinfo_x86 *c = &cpu_data(cpu);
1022 	unsigned long num_threads_sharing;
1023 	int index_msb;
1024 
1025 	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1026 	index_msb = get_count_order(num_threads_sharing);
1027 	id4_regs->id = c->apicid >> index_msb;
1028 }
1029 
populate_cache_leaves(unsigned int cpu)1030 int populate_cache_leaves(unsigned int cpu)
1031 {
1032 	unsigned int idx, ret;
1033 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1034 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1035 	struct _cpuid4_info_regs id4_regs = {};
1036 
1037 	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1038 		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1039 		if (ret)
1040 			return ret;
1041 		get_cache_id(cpu, &id4_regs);
1042 		ci_leaf_init(this_leaf++, &id4_regs);
1043 		__cache_cpumap_setup(cpu, idx, &id4_regs);
1044 	}
1045 	this_cpu_ci->cpu_map_populated = true;
1046 
1047 	return 0;
1048 }
1049 
1050 /*
1051  * Disable and enable caches. Needed for changing MTRRs and the PAT MSR.
1052  *
1053  * Since we are disabling the cache don't allow any interrupts,
1054  * they would run extremely slow and would only increase the pain.
1055  *
1056  * The caller must ensure that local interrupts are disabled and
1057  * are reenabled after cache_enable() has been called.
1058  */
1059 static unsigned long saved_cr4;
1060 static DEFINE_RAW_SPINLOCK(cache_disable_lock);
1061 
cache_disable(void)1062 void cache_disable(void) __acquires(cache_disable_lock)
1063 {
1064 	unsigned long cr0;
1065 
1066 	/*
1067 	 * Note that this is not ideal
1068 	 * since the cache is only flushed/disabled for this CPU while the
1069 	 * MTRRs are changed, but changing this requires more invasive
1070 	 * changes to the way the kernel boots
1071 	 */
1072 
1073 	raw_spin_lock(&cache_disable_lock);
1074 
1075 	/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
1076 	cr0 = read_cr0() | X86_CR0_CD;
1077 	write_cr0(cr0);
1078 
1079 	/*
1080 	 * Cache flushing is the most time-consuming step when programming
1081 	 * the MTRRs. Fortunately, as per the Intel Software Development
1082 	 * Manual, we can skip it if the processor supports cache self-
1083 	 * snooping.
1084 	 */
1085 	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1086 		wbinvd();
1087 
1088 	/* Save value of CR4 and clear Page Global Enable (bit 7) */
1089 	if (cpu_feature_enabled(X86_FEATURE_PGE)) {
1090 		saved_cr4 = __read_cr4();
1091 		__write_cr4(saved_cr4 & ~X86_CR4_PGE);
1092 	}
1093 
1094 	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
1095 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1096 	flush_tlb_local();
1097 
1098 	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1099 		mtrr_disable();
1100 
1101 	/* Again, only flush caches if we have to. */
1102 	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1103 		wbinvd();
1104 }
1105 
cache_enable(void)1106 void cache_enable(void) __releases(cache_disable_lock)
1107 {
1108 	/* Flush TLBs (no need to flush caches - they are disabled) */
1109 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1110 	flush_tlb_local();
1111 
1112 	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1113 		mtrr_enable();
1114 
1115 	/* Enable caches */
1116 	write_cr0(read_cr0() & ~X86_CR0_CD);
1117 
1118 	/* Restore value of CR4 */
1119 	if (cpu_feature_enabled(X86_FEATURE_PGE))
1120 		__write_cr4(saved_cr4);
1121 
1122 	raw_spin_unlock(&cache_disable_lock);
1123 }
1124 
cache_cpu_init(void)1125 static void cache_cpu_init(void)
1126 {
1127 	unsigned long flags;
1128 
1129 	local_irq_save(flags);
1130 	cache_disable();
1131 
1132 	if (memory_caching_control & CACHE_MTRR)
1133 		mtrr_generic_set_state();
1134 
1135 	if (memory_caching_control & CACHE_PAT)
1136 		pat_cpu_init();
1137 
1138 	cache_enable();
1139 	local_irq_restore(flags);
1140 }
1141 
1142 static bool cache_aps_delayed_init = true;
1143 
set_cache_aps_delayed_init(bool val)1144 void set_cache_aps_delayed_init(bool val)
1145 {
1146 	cache_aps_delayed_init = val;
1147 }
1148 
get_cache_aps_delayed_init(void)1149 bool get_cache_aps_delayed_init(void)
1150 {
1151 	return cache_aps_delayed_init;
1152 }
1153 
cache_rendezvous_handler(void * unused)1154 static int cache_rendezvous_handler(void *unused)
1155 {
1156 	if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id()))
1157 		cache_cpu_init();
1158 
1159 	return 0;
1160 }
1161 
cache_bp_init(void)1162 void __init cache_bp_init(void)
1163 {
1164 	mtrr_bp_init();
1165 	pat_bp_init();
1166 
1167 	if (memory_caching_control)
1168 		cache_cpu_init();
1169 }
1170 
cache_bp_restore(void)1171 void cache_bp_restore(void)
1172 {
1173 	if (memory_caching_control)
1174 		cache_cpu_init();
1175 }
1176 
cache_ap_online(unsigned int cpu)1177 static int cache_ap_online(unsigned int cpu)
1178 {
1179 	cpumask_set_cpu(cpu, cpu_cacheinfo_mask);
1180 
1181 	if (!memory_caching_control || get_cache_aps_delayed_init())
1182 		return 0;
1183 
1184 	/*
1185 	 * Ideally we should hold mtrr_mutex here to avoid MTRR entries
1186 	 * changed, but this routine will be called in CPU boot time,
1187 	 * holding the lock breaks it.
1188 	 *
1189 	 * This routine is called in two cases:
1190 	 *
1191 	 *   1. very early time of software resume, when there absolutely
1192 	 *      isn't MTRR entry changes;
1193 	 *
1194 	 *   2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug
1195 	 *      lock to prevent MTRR entry changes
1196 	 */
1197 	stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL,
1198 				       cpu_cacheinfo_mask);
1199 
1200 	return 0;
1201 }
1202 
cache_ap_offline(unsigned int cpu)1203 static int cache_ap_offline(unsigned int cpu)
1204 {
1205 	cpumask_clear_cpu(cpu, cpu_cacheinfo_mask);
1206 	return 0;
1207 }
1208 
1209 /*
1210  * Delayed cache initialization for all AP's
1211  */
cache_aps_init(void)1212 void cache_aps_init(void)
1213 {
1214 	if (!memory_caching_control || !get_cache_aps_delayed_init())
1215 		return;
1216 
1217 	stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask);
1218 	set_cache_aps_delayed_init(false);
1219 }
1220 
cache_ap_register(void)1221 static int __init cache_ap_register(void)
1222 {
1223 	zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL);
1224 	cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask);
1225 
1226 	cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING,
1227 				  "x86/cachectrl:starting",
1228 				  cache_ap_online, cache_ap_offline);
1229 	return 0;
1230 }
1231 early_initcall(cache_ap_register);
1232