• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2009 Corey Tabaka
3  * Copyright (c) 2015-2018 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files
7  * (the "Software"), to deal in the Software without restriction,
8  * including without limitation the rights to use, copy, modify, merge,
9  * publish, distribute, sublicense, and/or sell copies of the Software,
10  * and to permit persons to whom the Software is furnished to do so,
11  * subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be
14  * included in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include <debug.h>
26 #include <arch.h>
27 #include <arch/ops.h>
28 #include <arch/x86.h>
29 #include <arch/x86/mmu.h>
30 #include <arch/x86/mp.h>
31 #include <arch/x86/descriptor.h>
32 #include <arch/fpu.h>
33 #include <arch/mmu.h>
34 #include <assert.h>
35 #include <platform.h>
36 #include <sys/types.h>
37 #include <string.h>
38 
39 /* early stack */
40 uint8_t _kstack[PAGE_SIZE] __ALIGNED(8);
41 uint8_t _tss_start[SMP_MAX_CPUS][PAGE_SIZE] __ALIGNED(8);
42 uint8_t _double_fault_stack[SMP_MAX_CPUS][PAGE_SIZE] __ALIGNED(8);
43 
44 /* save a pointer to the multiboot information coming in from whoever called us */
45 /* make sure it lives in .data to avoid it being wiped out by bss clearing */
46 __SECTION(".data") void *_multiboot_info;
47 
48 /* main tss */
49 tss_t system_tss[SMP_MAX_CPUS];
50 x86_per_cpu_states_t per_cpu_states[SMP_MAX_CPUS];
51 
52 volatile int cpu_woken_up = 0;
53 
init_per_cpu_state(uint cpu)54 static void init_per_cpu_state(uint cpu)
55 {
56     x86_per_cpu_states_t states;
57 
58     /*
59      * At this point, BSP has already set up current thread in global state,
60      * init global states of AP(s) only.
61      */
62     if (0 != cpu) {
63         states = per_cpu_states[cpu];
64 
65         states.cur_thread    = NULL;
66         states.syscall_stack = 0;
67 
68         write_msr(X86_MSR_GS_BASE, (uint64_t)&states);
69     }
70 }
71 
set_tss_segment_percpu(void)72 static void set_tss_segment_percpu(void)
73 {
74     uint64_t addr;
75 
76     tss_t *tss_base = get_tss_base();
77     uint cpu_id = arch_curr_cpu_num();
78     ASSERT(tss_base);
79 
80     addr = (uint64_t)&_tss_start[cpu_id + 1];
81 
82     /*
83      * Care about privilege 0 only, since privilege 1 and 2 are unused.
84      * This stack is used when inter-privilege changes from privilege
85      * level 3 to level 0, for instance interrupt handling when interrupt
86      * raised at level 3.
87      */
88     tss_base->rsp0 = addr;
89 
90     /* Syscall uses same stack with RSP0 in TSS */
91     x86_write_gs_with_offset(SYSCALL_STACK_OFF, addr);
92 
93     /*
94      * Exception and interrupt handlers share same stack with kernel context,
95      * if kernel stack is corrupted or misused, exception handler will
96      * continue to use this corrupted kernel stack, it hard to trace this
97      * error especially in Page Fault handler.
98      *
99      * In order to ensure Page Fault handler does not trigger an infinite loop,
100      * Interrupt Stack Table 1 (IST1) is dedicated to Double Fault handler.
101      * With this dedicated double fault stack, a Page Fault while the stack
102      * pointer is invalid, will trigger a double fault, that can then exit
103      * cleanly.
104      */
105     addr = (uint64_t)&_double_fault_stack[cpu_id + 1];
106     tss_base->ist1 = addr;
107 }
108 
x86_syscall(void)109 __WEAK void x86_syscall(void)
110 {
111     panic("unhandled syscall\n");
112 }
113 
setup_syscall_percpu(void)114 static void setup_syscall_percpu(void)
115 {
116     /*
117      * SYSENTER instruction is used to execute a fast syscall to a level 0
118      * system procedure or routine from level 3. According instruction
119      * description about SYSENTER in ISDM VOL 2, if all conditions check
120      * passed, then:
121      *      RSP          <-  SYSENTER_ESP_MSR
122      *      RIP          <-  SYSENTER_EIP_MSR
123      *      CS.Selector  <-  SYSENTER_CS_MSR[15:0] & 0xFFFCH
124      *      SS.Selector  <-  CS.Selector + 8
125      */
126     write_msr(SYSENTER_CS_MSR, CODE_64_SELECTOR);
127     write_msr(SYSENTER_ESP_MSR, x86_read_gs_with_offset(SYSCALL_STACK_OFF));
128     write_msr(SYSENTER_EIP_MSR, (uint64_t)(x86_syscall));
129 }
130 
arch_early_init(void)131 void arch_early_init(void)
132 {
133     seg_sel_t sel = 0;
134     uint cpu_id = 1;
135 
136     cpu_id = atomic_add(&cpu_woken_up, cpu_id);
137 
138     init_per_cpu_state(cpu_id);
139 
140     if (check_fsgsbase_avail()) {
141         x86_set_cr4(x86_get_cr4() | X86_CR4_FSGSBASE);
142     }
143 
144     sel = (seg_sel_t)(cpu_id << 4);
145     sel += TSS_SELECTOR;
146 
147     /* enable caches here for now */
148     clear_in_cr0(X86_CR0_NW | X86_CR0_CD);
149 
150     set_global_desc(sel,
151             &system_tss[cpu_id],
152             sizeof(tss_t),
153             1,
154             0,
155             0,
156             SEG_TYPE_TSS,
157             0,
158             0);
159     x86_ltr(sel);
160 
161     x86_mmu_early_init();
162     platform_init_mmu_mappings();
163 }
164 
arch_init(void)165 void arch_init(void)
166 {
167     x86_mmu_init();
168 
169     set_tss_segment_percpu();
170     setup_syscall_percpu();
171 
172 #ifdef X86_WITH_FPU
173     fpu_init();
174 #endif
175 }
176 
arch_chain_load(void * ep,ulong arg0,ulong arg1,ulong arg2,ulong arg3)177 void arch_chain_load(void *ep, ulong arg0, ulong arg1, ulong arg2, ulong arg3)
178 {
179     PANIC_UNIMPLEMENTED;
180 }
181 
arch_enter_uspace(vaddr_t ep,vaddr_t stack,vaddr_t shadow_stack_base,uint32_t flags,ulong arg0)182 void arch_enter_uspace(vaddr_t ep,
183                        vaddr_t stack,
184                        vaddr_t shadow_stack_base,
185                        uint32_t flags,
186                        ulong arg0)
187 {
188     register uint64_t sp_usr = round_down(stack + 8, 16) - 8;
189     register uint64_t entry = ep;
190     register uint64_t code_seg = USER_CODE_64_SELECTOR | USER_RPL;
191     register uint64_t data_seg = USER_DATA_64_SELECTOR | USER_RPL;
192     register uint64_t usr_flags = USER_EFLAGS;
193 
194     //DEBUG_ASSERT(shadow_stack_base == 0);
195 
196     /*
197      * Clear all General Purpose Registers except RDI, since RDI carries
198      * parameter to user space.
199      *
200      * IRETQ instruction is used here to perform inter-privilege level return.
201      * Input parameters 'flags' is ignored when entering level 3.
202      *
203      * LK kernel runs at IA-32e mode, when iretq instruction invoked,
204      * processor performs:
205      *
206      * 1. IA-32e-MODE operation steps, pops RIP/CS/tempRFLAGS:
207      *      RIP          <- POP()       --  entry
208      *      CS.Selector  <- POP()       --  code_seg
209      *      tempRFLAGS   <- POP()       --  usr_flags
210      * 2. Since CS.RPL(3) > CPL(0), then goto return-to-outer-privilege-level:
211      *      RSP          <- POP()       --  sp_usr
212      *      SS           <- POP()       --  data_seg
213      *      RFLAGS       <- tempRFLAGS
214      *      CPL          <- CS.RPL
215      *
216      * After IRETQ executed, processor runs at RIP in 64-bit level 3.
217      *
218      * More details please refer "IRET/IRETD -- Interrupt Return" in Intel
219      * ISDM VOL2 <Instruction Set Reference>.
220      */
221     __asm__ __volatile__ (
222             "pushq %0   \n"
223             "pushq %1   \n"
224             "pushq %2   \n"
225             "pushq %3   \n"
226             "pushq %4   \n"
227             "pushq %5   \n"
228             "swapgs \n"
229             "xorq %%r15, %%r15 \n"
230             "xorq %%r14, %%r14 \n"
231             "xorq %%r13, %%r13 \n"
232             "xorq %%r12, %%r12 \n"
233             "xorq %%r11, %%r11 \n"
234             "xorq %%r10, %%r10 \n"
235             "xorq %%r9, %%r9 \n"
236             "xorq %%r8, %%r8 \n"
237             "xorq %%rbp, %%rbp \n"
238             "xorq %%rdx, %%rdx \n"
239             "xorq %%rcx, %%rcx \n"
240             "xorq %%rbx, %%rbx \n"
241             "xorq %%rax, %%rax \n"
242             "xorq %%rsi, %%rsi \n"
243             "popq %%rdi \n"
244             "iretq"
245             :
246             :"r" (data_seg), "r" (sp_usr), "r" (usr_flags),
247              "r" (code_seg), "r"(entry), "r" (arg0));
248 
249     __UNREACHABLE;
250 }
251 
arch_set_user_tls(vaddr_t tls_ptr)252 void arch_set_user_tls(vaddr_t tls_ptr)
253 {
254     thread_t *cur_thread = get_current_thread();
255 
256     cur_thread->arch.fs_base = tls_ptr;
257     write_msr(X86_MSR_FS_BASE, tls_ptr);
258 }
259