1/* SPDX-License-Identifier: GPL-2.0-only */ 2 3#include <cpu/x86/cr.h> 4#include <cpu/amd/mtrr.h> 5#include <cpu/x86/msr.h> 6#include <arch/ram_segs.h> 7 8#define __RAMSTAGE__ 9#include <cpu/x86/64bit/entry64.inc> 10 11/* The SIPI vector is responsible for initializing the APs in the system. It 12 * loads microcode, sets up MSRs, and enables caching before calling into 13 * C code. */ 14 15.section ".module_parameters", "aw", @progbits 16ap_start_params: 17gdtaddr: 18.word 0 /* limit */ 19.long 0 /* table */ 20.word 0 /* unused */ 21idt_ptr: 22.long 0 23per_cpu_segment_descriptors: 24.long 0 25per_cpu_segment_selector: 26.long 0 27stack_top: 28.long 0 29stack_size: 30.long 0 31microcode_lock: 32.long 0 33microcode_ptr: 34.long 0 35msr_table_ptr: 36.long 0 37msr_count: 38.long 0 39c_handler: 40.long 0 41cr3: 42.long 0 43ap_count: 44.long 0 45 46#define CR0_CLEAR_FLAGS_CACHE_ENABLE (CR0_CD | CR0_NW) 47#define CR0_SET_FLAGS (CR0_CLEAR_FLAGS_CACHE_ENABLE | CR0_PE) 48#define CR0_CLEAR_FLAGS \ 49 (CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_TS | CR0_EM | CR0_MP) 50 51.text 52.code16 53.global _start 54_start: 55 cli 56 xorl %eax, %eax 57 movl %eax, %cr3 /* Invalidate TLB*/ 58 59 /* On hyper threaded cpus, invalidating the cache here is 60 * very very bad. Don't. 61 */ 62 63 /* setup the data segment */ 64 movw %cs, %ax 65 movw %ax, %ds 66 67 /* The gdtaddr needs to be relative to the data segment in order 68 * to properly dereference it. The .text section comes first in an 69 * rmodule so _start can be used as a proxy for the load address. */ 70 movl $(gdtaddr), %ebx 71 sub $(_start), %ebx 72 73 lgdtl (%ebx) 74 75 movl %cr0, %eax 76 andl $~CR0_CLEAR_FLAGS, %eax 77 orl $CR0_SET_FLAGS, %eax 78 movl %eax, %cr0 79 80 ljmpl $RAM_CODE_SEG, $1f 811: 82 .code32 83 movw $RAM_DATA_SEG, %ax 84 movw %ax, %ds 85 movw %ax, %es 86 movw %ax, %ss 87 xor %ax, %ax /* zero out the gs and fs segment index */ 88 movw %ax, %fs 89 movw %ax, %gs /* Will be used for cpu_info */ 90 91 /* Load the Interrupt descriptor table */ 92 mov idt_ptr, %ebx 93 lidt (%ebx) 94 951: 96 /* Obtain CPU number. */ 97 movl ap_count, %ecx 98 inc %ecx 99 lock cmpxchg %ecx, ap_count 100 jnz 1b 101 102 /* Setup stacks for each CPU. */ 103 movl stack_size, %eax 104 mul %ecx 105 movl stack_top, %edx 106 subl %eax, %edx 107 mov %edx, %esp 108 109 /* Save CPU number for calling the AP entry */ 110 push %ecx 111 112 /* 113 * The following code only needs to run on Intel platforms and thus the caller 114 * doesn't provide a microcode_ptr if not on Intel. 115 * On Intel platforms which update microcode using FIT the version check will 116 * also skip the microcode update. 117 */ 118 119 /* Determine if one should check microcode versions. */ 120 mov microcode_ptr, %edi 121 test %edi, %edi 122 jz microcode_done /* Bypass if no microde exists. */ 123 124 /* Get the Microcode version. */ 125 xorl %eax, %eax 126 xorl %edx, %edx 127 movl $IA32_BIOS_SIGN_ID, %ecx 128 wrmsr 129 mov $1, %eax 130 cpuid 131 mov $IA32_BIOS_SIGN_ID, %ecx 132 rdmsr 133 /* If something already loaded skip loading again. */ 134 test %edx, %edx 135 jnz microcode_done 136 137 /* 138 * Intel SDM and various BWGs specify to use a semaphore to update microcode 139 * on one thread per core on Hyper-Threading enabled CPUs. Due to this complex 140 * code would be necessary to determine the core #ID, initializing and picking 141 * the right semaphore out of CONFIG_MAX_CPUS / 2. 142 * Instead of the per core approachm, as recommended, use one global spinlock. 143 * Assuming that only pre-FIT platforms with Hyper-Threading enabled and at 144 * most 8 threads will ever run into this condition, the boot delay is negligible. 145 */ 146 147 /* Determine if parallel microcode loading is allowed. */ 148 cmpl $0xffffffff, microcode_lock 149 je load_microcode 150 151 /* Protect microcode loading. */ 152lock_microcode: 153 lock btsl $0, microcode_lock 154 jc lock_microcode 155 156load_microcode: 157 /* Load new microcode. */ 158 mov $IA32_BIOS_UPDT_TRIG, %ecx 159 xor %edx, %edx 160 mov %edi, %eax 161 /* The microcode pointer is passed in pointing to the header. Adjust 162 * pointer to reflect the payload (header size is 48 bytes). */ 163 add $48, %eax 164 pusha 165 wrmsr 166 popa 167 168 /* Unconditionally unlock microcode loading. */ 169 cmpl $0xffffffff, microcode_lock 170 je microcode_done 171 172 xor %eax, %eax 173 mov %eax, microcode_lock 174 175microcode_done: 176 /* 177 * Load MSRs. Each entry in the table consists of: 178 * 0: index, 179 * 4: value[31:0] 180 * 8: value[63:32] 181 */ 182 mov msr_table_ptr, %edi 183 mov msr_count, %ebx 184 test %ebx, %ebx 185 jz 1f 186 187#if CONFIG(X86_AMD_FIXED_MTRRS) 188 /* Allow modification of RdDram and WrDram bits */ 189 mov $SYSCFG_MSR, %ecx 190 rdmsr 191 or $SYSCFG_MSR_MtrrFixDramModEn, %eax 192 wrmsr 193#endif 194 195load_msr: 196 mov (%edi), %ecx 197 mov 4(%edi), %eax 198 mov 8(%edi), %edx 199 wrmsr 200 add $12, %edi 201 dec %ebx 202 jnz load_msr 203 204#if CONFIG(X86_AMD_FIXED_MTRRS) 205 mov $SYSCFG_MSR, %ecx 206 rdmsr 207 and $~SYSCFG_MSR_MtrrFixDramModEn, %eax 208 wrmsr 209#endif 210 2111: 212 /* Enable caching. */ 213 mov %cr0, %eax 214 and $~(CR0_CLEAR_FLAGS_CACHE_ENABLE), %eax 215 mov %eax, %cr0 216 217#if CONFIG(SSE) 218 /* Enable sse instructions. */ 219 mov %cr4, %eax 220 orl $(CR4_OSFXSR | CR4_OSXMMEXCPT), %eax 221 mov %eax, %cr4 222#endif 223 224 pop %edi /* Retrieve cpu index */ 225 andl $0xfffffff0, %esp /* ensure stack alignment */ 226 227#if ENV_X86_64 228 /* entry64.inc preserves ebx, esi, edi, ebp */ 229 setup_longmode cr3 230 231 movabs c_handler, %eax 232 call *%rax 233#else 234 push $0 235 push $0 236 push $0 237 push %edi 238 mov c_handler, %eax 239 call *%eax 240#endif 241 242 243halt_jump: 244 hlt 245 jmp halt_jump 246