• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/* SPDX-License-Identifier: GPL-2.0-only */
2
3#include <cpu/x86/cr.h>
4#include <cpu/amd/mtrr.h>
5#include <cpu/x86/msr.h>
6#include <arch/ram_segs.h>
7
8#define __RAMSTAGE__
9#include <cpu/x86/64bit/entry64.inc>
10
11/* The SIPI vector is responsible for initializing the APs in the system. It
12 * loads microcode, sets up MSRs, and enables caching before calling into
13 * C code. */
14
15.section ".module_parameters", "aw", @progbits
16ap_start_params:
17gdtaddr:
18.word 0 /* limit */
19.long 0 /* table */
20.word 0 /* unused */
21idt_ptr:
22.long 0
23per_cpu_segment_descriptors:
24.long 0
25per_cpu_segment_selector:
26.long 0
27stack_top:
28.long 0
29stack_size:
30.long 0
31microcode_lock:
32.long 0
33microcode_ptr:
34.long 0
35msr_table_ptr:
36.long 0
37msr_count:
38.long 0
39c_handler:
40.long 0
41cr3:
42.long 0
43ap_count:
44.long 0
45
46#define CR0_CLEAR_FLAGS_CACHE_ENABLE (CR0_CD | CR0_NW)
47#define CR0_SET_FLAGS (CR0_CLEAR_FLAGS_CACHE_ENABLE | CR0_PE)
48#define CR0_CLEAR_FLAGS \
49	(CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_TS | CR0_EM | CR0_MP)
50
51.text
52.code16
53.global _start
54_start:
55	cli
56	xorl	%eax, %eax
57	movl	%eax, %cr3    /* Invalidate TLB*/
58
59	/* On hyper threaded cpus, invalidating the cache here is
60	 * very very bad.  Don't.
61	 */
62
63	/* setup the data segment */
64	movw	%cs, %ax
65	movw	%ax, %ds
66
67	/* The gdtaddr needs to be relative to the data segment in order
68	 * to properly dereference it. The .text section comes first in an
69	 * rmodule so _start can be used as a proxy for the load address. */
70	movl	$(gdtaddr), %ebx
71	sub	$(_start), %ebx
72
73	lgdtl	(%ebx)
74
75	movl	%cr0, %eax
76	andl	$~CR0_CLEAR_FLAGS, %eax
77	orl	$CR0_SET_FLAGS, %eax
78	movl	%eax, %cr0
79
80	ljmpl	$RAM_CODE_SEG, $1f
811:
82	.code32
83	movw	$RAM_DATA_SEG, %ax
84	movw	%ax, %ds
85	movw	%ax, %es
86	movw	%ax, %ss
87	xor	%ax, %ax /* zero out the gs and fs segment index */
88	movw	%ax, %fs
89	movw	%ax, %gs /* Will be used for cpu_info */
90
91	/* Load the Interrupt descriptor table */
92	mov	idt_ptr, %ebx
93	lidt	(%ebx)
94
951:
96	/* Obtain CPU number. */
97	movl	ap_count, %ecx
98	inc	%ecx
99	lock cmpxchg %ecx, ap_count
100	jnz	1b
101
102	/* Setup stacks for each CPU. */
103	movl	stack_size, %eax
104	mul	%ecx
105	movl	stack_top, %edx
106	subl	%eax, %edx
107	mov	%edx, %esp
108
109	/* Save CPU number for calling the AP entry */
110	push	%ecx
111
112	/*
113	 * The following code only needs to run on Intel platforms and thus the caller
114	 * doesn't provide a microcode_ptr if not on Intel.
115	 * On Intel platforms which update microcode using FIT the version check will
116	 * also skip the microcode update.
117	 */
118
119	/* Determine if one should check microcode versions. */
120	mov	microcode_ptr, %edi
121	test	%edi, %edi
122	jz	microcode_done /* Bypass if no microde exists. */
123
124	/* Get the Microcode version. */
125	xorl	%eax, %eax
126	xorl	%edx, %edx
127	movl	$IA32_BIOS_SIGN_ID, %ecx
128	wrmsr
129	mov	$1, %eax
130	cpuid
131	mov	$IA32_BIOS_SIGN_ID, %ecx
132	rdmsr
133	/* If something already loaded skip loading again. */
134	test	%edx, %edx
135	jnz	microcode_done
136
137	/*
138	 * Intel SDM and various BWGs specify to use a semaphore to update microcode
139	 * on one thread per core on Hyper-Threading enabled CPUs. Due to this complex
140	 * code would be necessary to determine the core #ID, initializing and picking
141	 * the right semaphore out of CONFIG_MAX_CPUS / 2.
142	 * Instead of the per core approachm, as recommended, use one global spinlock.
143	 * Assuming that only pre-FIT platforms with Hyper-Threading enabled and at
144	 * most 8 threads will ever run into this condition, the boot delay is negligible.
145	 */
146
147	/* Determine if parallel microcode loading is allowed. */
148	cmpl	$0xffffffff, microcode_lock
149	je	load_microcode
150
151	/* Protect microcode loading. */
152lock_microcode:
153	lock btsl $0, microcode_lock
154	jc	lock_microcode
155
156load_microcode:
157	/* Load new microcode. */
158	mov	$IA32_BIOS_UPDT_TRIG, %ecx
159	xor	%edx, %edx
160	mov	%edi, %eax
161	/* The microcode pointer is passed in pointing to the header. Adjust
162	 * pointer to reflect the payload (header size is 48 bytes). */
163	add	$48, %eax
164	pusha
165	wrmsr
166	popa
167
168	/* Unconditionally unlock microcode loading. */
169	cmpl	$0xffffffff, microcode_lock
170	je	microcode_done
171
172	xor	%eax, %eax
173	mov	%eax, microcode_lock
174
175microcode_done:
176	/*
177	 * Load MSRs. Each entry in the table consists of:
178	 * 0: index,
179	 * 4: value[31:0]
180	 * 8: value[63:32]
181	 */
182	mov	msr_table_ptr, %edi
183	mov	msr_count, %ebx
184	test	%ebx, %ebx
185	jz	1f
186
187#if CONFIG(X86_AMD_FIXED_MTRRS)
188	/* Allow modification of RdDram and WrDram bits */
189	mov	$SYSCFG_MSR, %ecx
190	rdmsr
191	or	$SYSCFG_MSR_MtrrFixDramModEn, %eax
192	wrmsr
193#endif
194
195load_msr:
196	mov	(%edi), %ecx
197	mov	4(%edi), %eax
198	mov	8(%edi), %edx
199	wrmsr
200	add	$12, %edi
201	dec	%ebx
202	jnz	load_msr
203
204#if CONFIG(X86_AMD_FIXED_MTRRS)
205	mov	$SYSCFG_MSR, %ecx
206	rdmsr
207	and	$~SYSCFG_MSR_MtrrFixDramModEn, %eax
208	wrmsr
209#endif
210
2111:
212	/* Enable caching. */
213	mov	%cr0, %eax
214	and	$~(CR0_CLEAR_FLAGS_CACHE_ENABLE), %eax
215	mov	%eax, %cr0
216
217#if CONFIG(SSE)
218	/* Enable sse instructions. */
219	mov	%cr4, %eax
220	orl	$(CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
221	mov	%eax, %cr4
222#endif
223
224	pop	%edi /* Retrieve cpu index */
225	andl	$0xfffffff0, %esp /* ensure stack alignment */
226
227#if ENV_X86_64
228	/* entry64.inc preserves ebx, esi, edi, ebp */
229	setup_longmode cr3
230
231	movabs	c_handler, %eax
232	call	*%rax
233#else
234	push	$0
235	push	$0
236	push	$0
237	push	%edi
238	mov	c_handler, %eax
239	call	*%eax
240#endif
241
242
243halt_jump:
244	hlt
245	jmp	halt_jump
246