1 /*
2 * Copyright (c) 2009 Corey Tabaka
3 * Copyright (c) 2015-2018 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files
7 * (the "Software"), to deal in the Software without restriction,
8 * including without limitation the rights to use, copy, modify, merge,
9 * publish, distribute, sublicense, and/or sell copies of the Software,
10 * and to permit persons to whom the Software is furnished to do so,
11 * subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <debug.h>
26 #include <arch.h>
27 #include <arch/ops.h>
28 #include <arch/x86.h>
29 #include <arch/x86/mmu.h>
30 #include <arch/x86/mp.h>
31 #include <arch/x86/descriptor.h>
32 #include <arch/fpu.h>
33 #include <arch/mmu.h>
34 #include <assert.h>
35 #include <platform.h>
36 #include <sys/types.h>
37 #include <string.h>
38
39 /* early stack */
40 uint8_t _kstack[PAGE_SIZE] __ALIGNED(8);
41 uint8_t _tss_start[SMP_MAX_CPUS][PAGE_SIZE] __ALIGNED(8);
42 uint8_t _double_fault_stack[SMP_MAX_CPUS][PAGE_SIZE] __ALIGNED(8);
43
44 /* save a pointer to the multiboot information coming in from whoever called us */
45 /* make sure it lives in .data to avoid it being wiped out by bss clearing */
46 __SECTION(".data") void *_multiboot_info;
47
48 /* main tss */
49 tss_t system_tss[SMP_MAX_CPUS];
50 x86_per_cpu_states_t per_cpu_states[SMP_MAX_CPUS];
51
52 volatile int cpu_woken_up = 0;
53
init_per_cpu_state(uint cpu)54 static void init_per_cpu_state(uint cpu)
55 {
56 x86_per_cpu_states_t states;
57
58 /*
59 * At this point, BSP has already set up current thread in global state,
60 * init global states of AP(s) only.
61 */
62 if (0 != cpu) {
63 states = per_cpu_states[cpu];
64
65 states.cur_thread = NULL;
66 states.syscall_stack = 0;
67
68 write_msr(X86_MSR_GS_BASE, (uint64_t)&states);
69 }
70 }
71
set_tss_segment_percpu(void)72 static void set_tss_segment_percpu(void)
73 {
74 uint64_t addr;
75
76 tss_t *tss_base = get_tss_base();
77 uint cpu_id = arch_curr_cpu_num();
78 ASSERT(tss_base);
79
80 addr = (uint64_t)&_tss_start[cpu_id + 1];
81
82 /*
83 * Care about privilege 0 only, since privilege 1 and 2 are unused.
84 * This stack is used when inter-privilege changes from privilege
85 * level 3 to level 0, for instance interrupt handling when interrupt
86 * raised at level 3.
87 */
88 tss_base->rsp0 = addr;
89
90 /* Syscall uses same stack with RSP0 in TSS */
91 x86_write_gs_with_offset(SYSCALL_STACK_OFF, addr);
92
93 /*
94 * Exception and interrupt handlers share same stack with kernel context,
95 * if kernel stack is corrupted or misused, exception handler will
96 * continue to use this corrupted kernel stack, it hard to trace this
97 * error especially in Page Fault handler.
98 *
99 * In order to ensure Page Fault handler does not trigger an infinite loop,
100 * Interrupt Stack Table 1 (IST1) is dedicated to Double Fault handler.
101 * With this dedicated double fault stack, a Page Fault while the stack
102 * pointer is invalid, will trigger a double fault, that can then exit
103 * cleanly.
104 */
105 addr = (uint64_t)&_double_fault_stack[cpu_id + 1];
106 tss_base->ist1 = addr;
107 }
108
x86_syscall(void)109 __WEAK void x86_syscall(void)
110 {
111 panic("unhandled syscall\n");
112 }
113
setup_syscall_percpu(void)114 static void setup_syscall_percpu(void)
115 {
116 /*
117 * SYSENTER instruction is used to execute a fast syscall to a level 0
118 * system procedure or routine from level 3. According instruction
119 * description about SYSENTER in ISDM VOL 2, if all conditions check
120 * passed, then:
121 * RSP <- SYSENTER_ESP_MSR
122 * RIP <- SYSENTER_EIP_MSR
123 * CS.Selector <- SYSENTER_CS_MSR[15:0] & 0xFFFCH
124 * SS.Selector <- CS.Selector + 8
125 */
126 write_msr(SYSENTER_CS_MSR, CODE_64_SELECTOR);
127 write_msr(SYSENTER_ESP_MSR, x86_read_gs_with_offset(SYSCALL_STACK_OFF));
128 write_msr(SYSENTER_EIP_MSR, (uint64_t)(x86_syscall));
129 }
130
arch_early_init(void)131 void arch_early_init(void)
132 {
133 seg_sel_t sel = 0;
134 uint cpu_id = 1;
135
136 cpu_id = atomic_add(&cpu_woken_up, cpu_id);
137
138 init_per_cpu_state(cpu_id);
139
140 if (check_fsgsbase_avail()) {
141 x86_set_cr4(x86_get_cr4() | X86_CR4_FSGSBASE);
142 }
143
144 sel = (seg_sel_t)(cpu_id << 4);
145 sel += TSS_SELECTOR;
146
147 /* enable caches here for now */
148 clear_in_cr0(X86_CR0_NW | X86_CR0_CD);
149
150 set_global_desc(sel,
151 &system_tss[cpu_id],
152 sizeof(tss_t),
153 1,
154 0,
155 0,
156 SEG_TYPE_TSS,
157 0,
158 0);
159 x86_ltr(sel);
160
161 x86_mmu_early_init();
162 platform_init_mmu_mappings();
163 }
164
arch_init(void)165 void arch_init(void)
166 {
167 x86_mmu_init();
168
169 set_tss_segment_percpu();
170 setup_syscall_percpu();
171
172 #ifdef X86_WITH_FPU
173 fpu_init();
174 #endif
175 }
176
arch_chain_load(void * ep,ulong arg0,ulong arg1,ulong arg2,ulong arg3)177 void arch_chain_load(void *ep, ulong arg0, ulong arg1, ulong arg2, ulong arg3)
178 {
179 PANIC_UNIMPLEMENTED;
180 }
181
arch_enter_uspace(vaddr_t ep,vaddr_t stack,vaddr_t shadow_stack_base,uint32_t flags,ulong arg0)182 void arch_enter_uspace(vaddr_t ep,
183 vaddr_t stack,
184 vaddr_t shadow_stack_base,
185 uint32_t flags,
186 ulong arg0)
187 {
188 register uint64_t sp_usr = round_down(stack + 8, 16) - 8;
189 register uint64_t entry = ep;
190 register uint64_t code_seg = USER_CODE_64_SELECTOR | USER_RPL;
191 register uint64_t data_seg = USER_DATA_64_SELECTOR | USER_RPL;
192 register uint64_t usr_flags = USER_EFLAGS;
193
194 //DEBUG_ASSERT(shadow_stack_base == 0);
195
196 /*
197 * Clear all General Purpose Registers except RDI, since RDI carries
198 * parameter to user space.
199 *
200 * IRETQ instruction is used here to perform inter-privilege level return.
201 * Input parameters 'flags' is ignored when entering level 3.
202 *
203 * LK kernel runs at IA-32e mode, when iretq instruction invoked,
204 * processor performs:
205 *
206 * 1. IA-32e-MODE operation steps, pops RIP/CS/tempRFLAGS:
207 * RIP <- POP() -- entry
208 * CS.Selector <- POP() -- code_seg
209 * tempRFLAGS <- POP() -- usr_flags
210 * 2. Since CS.RPL(3) > CPL(0), then goto return-to-outer-privilege-level:
211 * RSP <- POP() -- sp_usr
212 * SS <- POP() -- data_seg
213 * RFLAGS <- tempRFLAGS
214 * CPL <- CS.RPL
215 *
216 * After IRETQ executed, processor runs at RIP in 64-bit level 3.
217 *
218 * More details please refer "IRET/IRETD -- Interrupt Return" in Intel
219 * ISDM VOL2 <Instruction Set Reference>.
220 */
221 __asm__ __volatile__ (
222 "pushq %0 \n"
223 "pushq %1 \n"
224 "pushq %2 \n"
225 "pushq %3 \n"
226 "pushq %4 \n"
227 "pushq %5 \n"
228 "swapgs \n"
229 "xorq %%r15, %%r15 \n"
230 "xorq %%r14, %%r14 \n"
231 "xorq %%r13, %%r13 \n"
232 "xorq %%r12, %%r12 \n"
233 "xorq %%r11, %%r11 \n"
234 "xorq %%r10, %%r10 \n"
235 "xorq %%r9, %%r9 \n"
236 "xorq %%r8, %%r8 \n"
237 "xorq %%rbp, %%rbp \n"
238 "xorq %%rdx, %%rdx \n"
239 "xorq %%rcx, %%rcx \n"
240 "xorq %%rbx, %%rbx \n"
241 "xorq %%rax, %%rax \n"
242 "xorq %%rsi, %%rsi \n"
243 "popq %%rdi \n"
244 "iretq"
245 :
246 :"r" (data_seg), "r" (sp_usr), "r" (usr_flags),
247 "r" (code_seg), "r"(entry), "r" (arg0));
248
249 __UNREACHABLE;
250 }
251
arch_set_user_tls(vaddr_t tls_ptr)252 void arch_set_user_tls(vaddr_t tls_ptr)
253 {
254 thread_t *cur_thread = get_current_thread();
255
256 cur_thread->arch.fs_base = tls_ptr;
257 write_msr(X86_MSR_FS_BASE, tls_ptr);
258 }
259