• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *	Routines to identify caches on Intel CPU.
4  *
5  *	Changes:
6  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
7  *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
8  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
9  */
10 
11 #include <linux/slab.h>
12 #include <linux/cacheinfo.h>
13 #include <linux/cpu.h>
14 #include <linux/sched.h>
15 #include <linux/capability.h>
16 #include <linux/sysfs.h>
17 #include <linux/pci.h>
18 
19 #include <asm/cpufeature.h>
20 #include <asm/amd_nb.h>
21 #include <asm/smp.h>
22 
23 #define LVL_1_INST	1
24 #define LVL_1_DATA	2
25 #define LVL_2		3
26 #define LVL_3		4
27 #define LVL_TRACE	5
28 
29 struct _cache_table {
30 	unsigned char descriptor;
31 	char cache_type;
32 	short size;
33 };
34 
35 #define MB(x)	((x) * 1024)
36 
37 /* All the cache descriptor types we care about (no TLB or
38    trace cache entries) */
39 
40 static const struct _cache_table cache_table[] =
41 {
42 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
43 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
44 	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
45 	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
46 	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
47 	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
48 	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
49 	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
50 	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
51 	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
52 	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
53 	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
54 	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
55 	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
56 	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
57 	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
58 	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
59 	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
60 	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
61 	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
62 	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
63 	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
64 	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
65 	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
66 	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
67 	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
68 	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
69 	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
70 	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
71 	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
72 	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
73 	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
74 	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
75 	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
76 	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
77 	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
78 	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
79 	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
80 	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
81 	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
82 	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
83 	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
84 	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
85 	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
86 	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
87 	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
88 	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
89 	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
90 	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
91 	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
92 	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
93 	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
94 	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
95 	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
96 	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
97 	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
98 	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
99 	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
100 	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
101 	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
102 	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
103 	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
104 	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
105 	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
106 	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
107 	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
108 	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
109 	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
110 	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
111 	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
112 	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
113 	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
114 	{ 0x00, 0, 0}
115 };
116 
117 
118 enum _cache_type {
119 	CTYPE_NULL = 0,
120 	CTYPE_DATA = 1,
121 	CTYPE_INST = 2,
122 	CTYPE_UNIFIED = 3
123 };
124 
125 union _cpuid4_leaf_eax {
126 	struct {
127 		enum _cache_type	type:5;
128 		unsigned int		level:3;
129 		unsigned int		is_self_initializing:1;
130 		unsigned int		is_fully_associative:1;
131 		unsigned int		reserved:4;
132 		unsigned int		num_threads_sharing:12;
133 		unsigned int		num_cores_on_die:6;
134 	} split;
135 	u32 full;
136 };
137 
138 union _cpuid4_leaf_ebx {
139 	struct {
140 		unsigned int		coherency_line_size:12;
141 		unsigned int		physical_line_partition:10;
142 		unsigned int		ways_of_associativity:10;
143 	} split;
144 	u32 full;
145 };
146 
147 union _cpuid4_leaf_ecx {
148 	struct {
149 		unsigned int		number_of_sets:32;
150 	} split;
151 	u32 full;
152 };
153 
154 struct _cpuid4_info_regs {
155 	union _cpuid4_leaf_eax eax;
156 	union _cpuid4_leaf_ebx ebx;
157 	union _cpuid4_leaf_ecx ecx;
158 	unsigned int id;
159 	unsigned long size;
160 	struct amd_northbridge *nb;
161 };
162 
163 static unsigned short num_cache_leaves;
164 
165 /* AMD doesn't have CPUID4. Emulate it here to report the same
166    information to the user.  This makes some assumptions about the machine:
167    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
168 
169    In theory the TLBs could be reported as fake type (they are in "dummy").
170    Maybe later */
171 union l1_cache {
172 	struct {
173 		unsigned line_size:8;
174 		unsigned lines_per_tag:8;
175 		unsigned assoc:8;
176 		unsigned size_in_kb:8;
177 	};
178 	unsigned val;
179 };
180 
181 union l2_cache {
182 	struct {
183 		unsigned line_size:8;
184 		unsigned lines_per_tag:4;
185 		unsigned assoc:4;
186 		unsigned size_in_kb:16;
187 	};
188 	unsigned val;
189 };
190 
191 union l3_cache {
192 	struct {
193 		unsigned line_size:8;
194 		unsigned lines_per_tag:4;
195 		unsigned assoc:4;
196 		unsigned res:2;
197 		unsigned size_encoded:14;
198 	};
199 	unsigned val;
200 };
201 
202 static const unsigned short assocs[] = {
203 	[1] = 1,
204 	[2] = 2,
205 	[4] = 4,
206 	[6] = 8,
207 	[8] = 16,
208 	[0xa] = 32,
209 	[0xb] = 48,
210 	[0xc] = 64,
211 	[0xd] = 96,
212 	[0xe] = 128,
213 	[0xf] = 0xffff /* fully associative - no way to show this currently */
214 };
215 
216 static const unsigned char levels[] = { 1, 1, 2, 3 };
217 static const unsigned char types[] = { 1, 2, 3, 3 };
218 
219 static const enum cache_type cache_type_map[] = {
220 	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
221 	[CTYPE_DATA] = CACHE_TYPE_DATA,
222 	[CTYPE_INST] = CACHE_TYPE_INST,
223 	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
224 };
225 
226 static void
amd_cpuid4(int leaf,union _cpuid4_leaf_eax * eax,union _cpuid4_leaf_ebx * ebx,union _cpuid4_leaf_ecx * ecx)227 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
228 		     union _cpuid4_leaf_ebx *ebx,
229 		     union _cpuid4_leaf_ecx *ecx)
230 {
231 	unsigned dummy;
232 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
233 	union l1_cache l1i, l1d;
234 	union l2_cache l2;
235 	union l3_cache l3;
236 	union l1_cache *l1 = &l1d;
237 
238 	eax->full = 0;
239 	ebx->full = 0;
240 	ecx->full = 0;
241 
242 	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
243 	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
244 
245 	switch (leaf) {
246 	case 1:
247 		l1 = &l1i;
248 	case 0:
249 		if (!l1->val)
250 			return;
251 		assoc = assocs[l1->assoc];
252 		line_size = l1->line_size;
253 		lines_per_tag = l1->lines_per_tag;
254 		size_in_kb = l1->size_in_kb;
255 		break;
256 	case 2:
257 		if (!l2.val)
258 			return;
259 		assoc = assocs[l2.assoc];
260 		line_size = l2.line_size;
261 		lines_per_tag = l2.lines_per_tag;
262 		/* cpu_data has errata corrections for K7 applied */
263 		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
264 		break;
265 	case 3:
266 		if (!l3.val)
267 			return;
268 		assoc = assocs[l3.assoc];
269 		line_size = l3.line_size;
270 		lines_per_tag = l3.lines_per_tag;
271 		size_in_kb = l3.size_encoded * 512;
272 		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
273 			size_in_kb = size_in_kb >> 1;
274 			assoc = assoc >> 1;
275 		}
276 		break;
277 	default:
278 		return;
279 	}
280 
281 	eax->split.is_self_initializing = 1;
282 	eax->split.type = types[leaf];
283 	eax->split.level = levels[leaf];
284 	eax->split.num_threads_sharing = 0;
285 	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
286 
287 
288 	if (assoc == 0xffff)
289 		eax->split.is_fully_associative = 1;
290 	ebx->split.coherency_line_size = line_size - 1;
291 	ebx->split.ways_of_associativity = assoc - 1;
292 	ebx->split.physical_line_partition = lines_per_tag - 1;
293 	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
294 		(ebx->split.ways_of_associativity + 1) - 1;
295 }
296 
297 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
298 
299 /*
300  * L3 cache descriptors
301  */
amd_calc_l3_indices(struct amd_northbridge * nb)302 static void amd_calc_l3_indices(struct amd_northbridge *nb)
303 {
304 	struct amd_l3_cache *l3 = &nb->l3_cache;
305 	unsigned int sc0, sc1, sc2, sc3;
306 	u32 val = 0;
307 
308 	pci_read_config_dword(nb->misc, 0x1C4, &val);
309 
310 	/* calculate subcache sizes */
311 	l3->subcaches[0] = sc0 = !(val & BIT(0));
312 	l3->subcaches[1] = sc1 = !(val & BIT(4));
313 
314 	if (boot_cpu_data.x86 == 0x15) {
315 		l3->subcaches[0] = sc0 += !(val & BIT(1));
316 		l3->subcaches[1] = sc1 += !(val & BIT(5));
317 	}
318 
319 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
320 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
321 
322 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
323 }
324 
325 /*
326  * check whether a slot used for disabling an L3 index is occupied.
327  * @l3: L3 cache descriptor
328  * @slot: slot number (0..1)
329  *
330  * @returns: the disabled index if used or negative value if slot free.
331  */
amd_get_l3_disable_slot(struct amd_northbridge * nb,unsigned slot)332 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
333 {
334 	unsigned int reg = 0;
335 
336 	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
337 
338 	/* check whether this slot is activated already */
339 	if (reg & (3UL << 30))
340 		return reg & 0xfff;
341 
342 	return -1;
343 }
344 
show_cache_disable(struct cacheinfo * this_leaf,char * buf,unsigned int slot)345 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
346 				  unsigned int slot)
347 {
348 	int index;
349 	struct amd_northbridge *nb = this_leaf->priv;
350 
351 	index = amd_get_l3_disable_slot(nb, slot);
352 	if (index >= 0)
353 		return sprintf(buf, "%d\n", index);
354 
355 	return sprintf(buf, "FREE\n");
356 }
357 
358 #define SHOW_CACHE_DISABLE(slot)					\
359 static ssize_t								\
360 cache_disable_##slot##_show(struct device *dev,				\
361 			    struct device_attribute *attr, char *buf)	\
362 {									\
363 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
364 	return show_cache_disable(this_leaf, buf, slot);		\
365 }
366 SHOW_CACHE_DISABLE(0)
367 SHOW_CACHE_DISABLE(1)
368 
amd_l3_disable_index(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long idx)369 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
370 				 unsigned slot, unsigned long idx)
371 {
372 	int i;
373 
374 	idx |= BIT(30);
375 
376 	/*
377 	 *  disable index in all 4 subcaches
378 	 */
379 	for (i = 0; i < 4; i++) {
380 		u32 reg = idx | (i << 20);
381 
382 		if (!nb->l3_cache.subcaches[i])
383 			continue;
384 
385 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
386 
387 		/*
388 		 * We need to WBINVD on a core on the node containing the L3
389 		 * cache which indices we disable therefore a simple wbinvd()
390 		 * is not sufficient.
391 		 */
392 		wbinvd_on_cpu(cpu);
393 
394 		reg |= BIT(31);
395 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
396 	}
397 }
398 
399 /*
400  * disable a L3 cache index by using a disable-slot
401  *
402  * @l3:    L3 cache descriptor
403  * @cpu:   A CPU on the node containing the L3 cache
404  * @slot:  slot number (0..1)
405  * @index: index to disable
406  *
407  * @return: 0 on success, error status on failure
408  */
amd_set_l3_disable_slot(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long index)409 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
410 			    unsigned slot, unsigned long index)
411 {
412 	int ret = 0;
413 
414 	/*  check if @slot is already used or the index is already disabled */
415 	ret = amd_get_l3_disable_slot(nb, slot);
416 	if (ret >= 0)
417 		return -EEXIST;
418 
419 	if (index > nb->l3_cache.indices)
420 		return -EINVAL;
421 
422 	/* check whether the other slot has disabled the same index already */
423 	if (index == amd_get_l3_disable_slot(nb, !slot))
424 		return -EEXIST;
425 
426 	amd_l3_disable_index(nb, cpu, slot, index);
427 
428 	return 0;
429 }
430 
store_cache_disable(struct cacheinfo * this_leaf,const char * buf,size_t count,unsigned int slot)431 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
432 				   const char *buf, size_t count,
433 				   unsigned int slot)
434 {
435 	unsigned long val = 0;
436 	int cpu, err = 0;
437 	struct amd_northbridge *nb = this_leaf->priv;
438 
439 	if (!capable(CAP_SYS_ADMIN))
440 		return -EPERM;
441 
442 	cpu = cpumask_first(&this_leaf->shared_cpu_map);
443 
444 	if (kstrtoul(buf, 10, &val) < 0)
445 		return -EINVAL;
446 
447 	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
448 	if (err) {
449 		if (err == -EEXIST)
450 			pr_warn("L3 slot %d in use/index already disabled!\n",
451 				   slot);
452 		return err;
453 	}
454 	return count;
455 }
456 
457 #define STORE_CACHE_DISABLE(slot)					\
458 static ssize_t								\
459 cache_disable_##slot##_store(struct device *dev,			\
460 			     struct device_attribute *attr,		\
461 			     const char *buf, size_t count)		\
462 {									\
463 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
464 	return store_cache_disable(this_leaf, buf, count, slot);	\
465 }
466 STORE_CACHE_DISABLE(0)
467 STORE_CACHE_DISABLE(1)
468 
subcaches_show(struct device * dev,struct device_attribute * attr,char * buf)469 static ssize_t subcaches_show(struct device *dev,
470 			      struct device_attribute *attr, char *buf)
471 {
472 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
473 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
474 
475 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
476 }
477 
subcaches_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)478 static ssize_t subcaches_store(struct device *dev,
479 			       struct device_attribute *attr,
480 			       const char *buf, size_t count)
481 {
482 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
483 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
484 	unsigned long val;
485 
486 	if (!capable(CAP_SYS_ADMIN))
487 		return -EPERM;
488 
489 	if (kstrtoul(buf, 16, &val) < 0)
490 		return -EINVAL;
491 
492 	if (amd_set_subcaches(cpu, val))
493 		return -EINVAL;
494 
495 	return count;
496 }
497 
498 static DEVICE_ATTR_RW(cache_disable_0);
499 static DEVICE_ATTR_RW(cache_disable_1);
500 static DEVICE_ATTR_RW(subcaches);
501 
502 static umode_t
cache_private_attrs_is_visible(struct kobject * kobj,struct attribute * attr,int unused)503 cache_private_attrs_is_visible(struct kobject *kobj,
504 			       struct attribute *attr, int unused)
505 {
506 	struct device *dev = kobj_to_dev(kobj);
507 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
508 	umode_t mode = attr->mode;
509 
510 	if (!this_leaf->priv)
511 		return 0;
512 
513 	if ((attr == &dev_attr_subcaches.attr) &&
514 	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
515 		return mode;
516 
517 	if ((attr == &dev_attr_cache_disable_0.attr ||
518 	     attr == &dev_attr_cache_disable_1.attr) &&
519 	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
520 		return mode;
521 
522 	return 0;
523 }
524 
525 static struct attribute_group cache_private_group = {
526 	.is_visible = cache_private_attrs_is_visible,
527 };
528 
init_amd_l3_attrs(void)529 static void init_amd_l3_attrs(void)
530 {
531 	int n = 1;
532 	static struct attribute **amd_l3_attrs;
533 
534 	if (amd_l3_attrs) /* already initialized */
535 		return;
536 
537 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
538 		n += 2;
539 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
540 		n += 1;
541 
542 	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
543 	if (!amd_l3_attrs)
544 		return;
545 
546 	n = 0;
547 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
548 		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
549 		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
550 	}
551 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
552 		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
553 
554 	cache_private_group.attrs = amd_l3_attrs;
555 }
556 
557 const struct attribute_group *
cache_get_priv_group(struct cacheinfo * this_leaf)558 cache_get_priv_group(struct cacheinfo *this_leaf)
559 {
560 	struct amd_northbridge *nb = this_leaf->priv;
561 
562 	if (this_leaf->level < 3 || !nb)
563 		return NULL;
564 
565 	if (nb && nb->l3_cache.indices)
566 		init_amd_l3_attrs();
567 
568 	return &cache_private_group;
569 }
570 
amd_init_l3_cache(struct _cpuid4_info_regs * this_leaf,int index)571 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
572 {
573 	int node;
574 
575 	/* only for L3, and not in virtualized environments */
576 	if (index < 3)
577 		return;
578 
579 	node = amd_get_nb_id(smp_processor_id());
580 	this_leaf->nb = node_to_amd_nb(node);
581 	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
582 		amd_calc_l3_indices(this_leaf->nb);
583 }
584 #else
585 #define amd_init_l3_cache(x, y)
586 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
587 
588 static int
cpuid4_cache_lookup_regs(int index,struct _cpuid4_info_regs * this_leaf)589 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
590 {
591 	union _cpuid4_leaf_eax	eax;
592 	union _cpuid4_leaf_ebx	ebx;
593 	union _cpuid4_leaf_ecx	ecx;
594 	unsigned		edx;
595 
596 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
597 		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
598 			cpuid_count(0x8000001d, index, &eax.full,
599 				    &ebx.full, &ecx.full, &edx);
600 		else
601 			amd_cpuid4(index, &eax, &ebx, &ecx);
602 		amd_init_l3_cache(this_leaf, index);
603 	} else {
604 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
605 	}
606 
607 	if (eax.split.type == CTYPE_NULL)
608 		return -EIO; /* better error ? */
609 
610 	this_leaf->eax = eax;
611 	this_leaf->ebx = ebx;
612 	this_leaf->ecx = ecx;
613 	this_leaf->size = (ecx.split.number_of_sets          + 1) *
614 			  (ebx.split.coherency_line_size     + 1) *
615 			  (ebx.split.physical_line_partition + 1) *
616 			  (ebx.split.ways_of_associativity   + 1);
617 	return 0;
618 }
619 
find_num_cache_leaves(struct cpuinfo_x86 * c)620 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
621 {
622 	unsigned int		eax, ebx, ecx, edx, op;
623 	union _cpuid4_leaf_eax	cache_eax;
624 	int 			i = -1;
625 
626 	if (c->x86_vendor == X86_VENDOR_AMD)
627 		op = 0x8000001d;
628 	else
629 		op = 4;
630 
631 	do {
632 		++i;
633 		/* Do cpuid(op) loop to find out num_cache_leaves */
634 		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
635 		cache_eax.full = eax;
636 	} while (cache_eax.split.type != CTYPE_NULL);
637 	return i;
638 }
639 
init_amd_cacheinfo(struct cpuinfo_x86 * c)640 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
641 {
642 
643 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
644 		num_cache_leaves = find_num_cache_leaves(c);
645 	} else if (c->extended_cpuid_level >= 0x80000006) {
646 		if (cpuid_edx(0x80000006) & 0xf000)
647 			num_cache_leaves = 4;
648 		else
649 			num_cache_leaves = 3;
650 	}
651 }
652 
init_intel_cacheinfo(struct cpuinfo_x86 * c)653 unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
654 {
655 	/* Cache sizes */
656 	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
657 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
658 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
659 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
660 #ifdef CONFIG_SMP
661 	unsigned int cpu = c->cpu_index;
662 #endif
663 
664 	if (c->cpuid_level > 3) {
665 		static int is_initialized;
666 
667 		if (is_initialized == 0) {
668 			/* Init num_cache_leaves from boot CPU */
669 			num_cache_leaves = find_num_cache_leaves(c);
670 			is_initialized++;
671 		}
672 
673 		/*
674 		 * Whenever possible use cpuid(4), deterministic cache
675 		 * parameters cpuid leaf to find the cache details
676 		 */
677 		for (i = 0; i < num_cache_leaves; i++) {
678 			struct _cpuid4_info_regs this_leaf = {};
679 			int retval;
680 
681 			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
682 			if (retval < 0)
683 				continue;
684 
685 			switch (this_leaf.eax.split.level) {
686 			case 1:
687 				if (this_leaf.eax.split.type == CTYPE_DATA)
688 					new_l1d = this_leaf.size/1024;
689 				else if (this_leaf.eax.split.type == CTYPE_INST)
690 					new_l1i = this_leaf.size/1024;
691 				break;
692 			case 2:
693 				new_l2 = this_leaf.size/1024;
694 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
695 				index_msb = get_count_order(num_threads_sharing);
696 				l2_id = c->apicid & ~((1 << index_msb) - 1);
697 				break;
698 			case 3:
699 				new_l3 = this_leaf.size/1024;
700 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
701 				index_msb = get_count_order(num_threads_sharing);
702 				l3_id = c->apicid & ~((1 << index_msb) - 1);
703 				break;
704 			default:
705 				break;
706 			}
707 		}
708 	}
709 	/*
710 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
711 	 * trace cache
712 	 */
713 	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
714 		/* supports eax=2  call */
715 		int j, n;
716 		unsigned int regs[4];
717 		unsigned char *dp = (unsigned char *)regs;
718 		int only_trace = 0;
719 
720 		if (num_cache_leaves != 0 && c->x86 == 15)
721 			only_trace = 1;
722 
723 		/* Number of times to iterate */
724 		n = cpuid_eax(2) & 0xFF;
725 
726 		for (i = 0 ; i < n ; i++) {
727 			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
728 
729 			/* If bit 31 is set, this is an unknown format */
730 			for (j = 0 ; j < 3 ; j++)
731 				if (regs[j] & (1 << 31))
732 					regs[j] = 0;
733 
734 			/* Byte 0 is level count, not a descriptor */
735 			for (j = 1 ; j < 16 ; j++) {
736 				unsigned char des = dp[j];
737 				unsigned char k = 0;
738 
739 				/* look up this descriptor in the table */
740 				while (cache_table[k].descriptor != 0) {
741 					if (cache_table[k].descriptor == des) {
742 						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
743 							break;
744 						switch (cache_table[k].cache_type) {
745 						case LVL_1_INST:
746 							l1i += cache_table[k].size;
747 							break;
748 						case LVL_1_DATA:
749 							l1d += cache_table[k].size;
750 							break;
751 						case LVL_2:
752 							l2 += cache_table[k].size;
753 							break;
754 						case LVL_3:
755 							l3 += cache_table[k].size;
756 							break;
757 						case LVL_TRACE:
758 							trace += cache_table[k].size;
759 							break;
760 						}
761 
762 						break;
763 					}
764 
765 					k++;
766 				}
767 			}
768 		}
769 	}
770 
771 	if (new_l1d)
772 		l1d = new_l1d;
773 
774 	if (new_l1i)
775 		l1i = new_l1i;
776 
777 	if (new_l2) {
778 		l2 = new_l2;
779 #ifdef CONFIG_SMP
780 		per_cpu(cpu_llc_id, cpu) = l2_id;
781 #endif
782 	}
783 
784 	if (new_l3) {
785 		l3 = new_l3;
786 #ifdef CONFIG_SMP
787 		per_cpu(cpu_llc_id, cpu) = l3_id;
788 #endif
789 	}
790 
791 #ifdef CONFIG_SMP
792 	/*
793 	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
794 	 * turns means that the only possibility is SMT (as indicated in
795 	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
796 	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
797 	 * c->phys_proc_id.
798 	 */
799 	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
800 		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
801 #endif
802 
803 	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
804 
805 	return l2;
806 }
807 
__cache_amd_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)808 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
809 				    struct _cpuid4_info_regs *base)
810 {
811 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
812 	struct cacheinfo *this_leaf;
813 	int i, sibling;
814 
815 	/*
816 	 * For L3, always use the pre-calculated cpu_llc_shared_mask
817 	 * to derive shared_cpu_map.
818 	 */
819 	if (index == 3) {
820 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
821 			this_cpu_ci = get_cpu_cacheinfo(i);
822 			if (!this_cpu_ci->info_list)
823 				continue;
824 			this_leaf = this_cpu_ci->info_list + index;
825 			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
826 				if (!cpu_online(sibling))
827 					continue;
828 				cpumask_set_cpu(sibling,
829 						&this_leaf->shared_cpu_map);
830 			}
831 		}
832 	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
833 		unsigned int apicid, nshared, first, last;
834 
835 		nshared = base->eax.split.num_threads_sharing + 1;
836 		apicid = cpu_data(cpu).apicid;
837 		first = apicid - (apicid % nshared);
838 		last = first + nshared - 1;
839 
840 		for_each_online_cpu(i) {
841 			this_cpu_ci = get_cpu_cacheinfo(i);
842 			if (!this_cpu_ci->info_list)
843 				continue;
844 
845 			apicid = cpu_data(i).apicid;
846 			if ((apicid < first) || (apicid > last))
847 				continue;
848 
849 			this_leaf = this_cpu_ci->info_list + index;
850 
851 			for_each_online_cpu(sibling) {
852 				apicid = cpu_data(sibling).apicid;
853 				if ((apicid < first) || (apicid > last))
854 					continue;
855 				cpumask_set_cpu(sibling,
856 						&this_leaf->shared_cpu_map);
857 			}
858 		}
859 	} else
860 		return 0;
861 
862 	return 1;
863 }
864 
__cache_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)865 static void __cache_cpumap_setup(unsigned int cpu, int index,
866 				 struct _cpuid4_info_regs *base)
867 {
868 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
869 	struct cacheinfo *this_leaf, *sibling_leaf;
870 	unsigned long num_threads_sharing;
871 	int index_msb, i;
872 	struct cpuinfo_x86 *c = &cpu_data(cpu);
873 
874 	if (c->x86_vendor == X86_VENDOR_AMD) {
875 		if (__cache_amd_cpumap_setup(cpu, index, base))
876 			return;
877 	}
878 
879 	this_leaf = this_cpu_ci->info_list + index;
880 	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
881 
882 	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
883 	if (num_threads_sharing == 1)
884 		return;
885 
886 	index_msb = get_count_order(num_threads_sharing);
887 
888 	for_each_online_cpu(i)
889 		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
890 			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
891 
892 			if (i == cpu || !sib_cpu_ci->info_list)
893 				continue;/* skip if itself or no cacheinfo */
894 			sibling_leaf = sib_cpu_ci->info_list + index;
895 			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
896 			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
897 		}
898 }
899 
ci_leaf_init(struct cacheinfo * this_leaf,struct _cpuid4_info_regs * base)900 static void ci_leaf_init(struct cacheinfo *this_leaf,
901 			 struct _cpuid4_info_regs *base)
902 {
903 	this_leaf->id = base->id;
904 	this_leaf->attributes = CACHE_ID;
905 	this_leaf->level = base->eax.split.level;
906 	this_leaf->type = cache_type_map[base->eax.split.type];
907 	this_leaf->coherency_line_size =
908 				base->ebx.split.coherency_line_size + 1;
909 	this_leaf->ways_of_associativity =
910 				base->ebx.split.ways_of_associativity + 1;
911 	this_leaf->size = base->size;
912 	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
913 	this_leaf->physical_line_partition =
914 				base->ebx.split.physical_line_partition + 1;
915 	this_leaf->priv = base->nb;
916 }
917 
__init_cache_level(unsigned int cpu)918 static int __init_cache_level(unsigned int cpu)
919 {
920 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
921 
922 	if (!num_cache_leaves)
923 		return -ENOENT;
924 	if (!this_cpu_ci)
925 		return -EINVAL;
926 	this_cpu_ci->num_levels = 3;
927 	this_cpu_ci->num_leaves = num_cache_leaves;
928 	return 0;
929 }
930 
931 /*
932  * The max shared threads number comes from CPUID.4:EAX[25-14] with input
933  * ECX as cache index. Then right shift apicid by the number's order to get
934  * cache id for this cache node.
935  */
get_cache_id(int cpu,struct _cpuid4_info_regs * id4_regs)936 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
937 {
938 	struct cpuinfo_x86 *c = &cpu_data(cpu);
939 	unsigned long num_threads_sharing;
940 	int index_msb;
941 
942 	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
943 	index_msb = get_count_order(num_threads_sharing);
944 	id4_regs->id = c->apicid >> index_msb;
945 }
946 
__populate_cache_leaves(unsigned int cpu)947 static int __populate_cache_leaves(unsigned int cpu)
948 {
949 	unsigned int idx, ret;
950 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
951 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
952 	struct _cpuid4_info_regs id4_regs = {};
953 
954 	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
955 		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
956 		if (ret)
957 			return ret;
958 		get_cache_id(cpu, &id4_regs);
959 		ci_leaf_init(this_leaf++, &id4_regs);
960 		__cache_cpumap_setup(cpu, idx, &id4_regs);
961 	}
962 	this_cpu_ci->cpu_map_populated = true;
963 
964 	return 0;
965 }
966 
967 DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
968 DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
969