1 /*
2 * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU)
3 * Licensed under the Mulan PSL v2.
4 * You can use this software according to the terms and conditions of the Mulan PSL v2.
5 * You may obtain a copy of Mulan PSL v2 at:
6 * http://license.coscl.org.cn/MulanPSL2
7 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
8 * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
9 * PURPOSE.
10 * See the Mulan PSL v2 for more details.
11 */
12 #include <object/thread.h>
13 #include <sched/context.h>
14 #include <sched/sched.h>
15 #include <mm/kmalloc.h>
16
17 struct aarch64_fpu_area {
18 /* 32 fpu registers and each has 16 bytes */
19 u8 fpu[32 * 16];
20
21 /* fp control reg and state reg */
22 u32 fpcr;
23 u32 fpsr;
24 };
25
arch_init_thread_fpu(struct thread_ctx * ctx)26 void arch_init_thread_fpu(struct thread_ctx *ctx)
27 {
28 struct aarch64_fpu_area *buf;
29
30 /*
31 * In case of data leakage, clear this area (use kzalloc instead
32 * of kmalloc).
33 */
34 ctx->fpu_state = kzalloc(sizeof(struct aarch64_fpu_area));
35 ctx->is_fpu_owner = -1;
36 buf = (struct aarch64_fpu_area *)ctx->fpu_state;
37
38 buf->fpcr = 0;
39 buf->fpsr = 0;
40 }
41
arch_free_thread_fpu(struct thread_ctx * ctx)42 void arch_free_thread_fpu(struct thread_ctx *ctx)
43 {
44 kfree(ctx->fpu_state);
45 }
46
save_fpu_state(struct thread * thread)47 void save_fpu_state(struct thread *thread)
48 {
49 if (likely((thread) && (thread->thread_ctx->type > TYPE_KERNEL))) {
50 struct aarch64_fpu_area *buf;
51 u64 fpcr, fpsr;
52
53 buf = (struct aarch64_fpu_area *)thread->thread_ctx->fpu_state;
54
55 __asm__ volatile("stp q0, q1, [%0, #(0 * 32)]\n"
56 "stp q2, q3, [%0, #(1 * 32)]\n"
57 "stp q4, q5, [%0, #(2 * 32)]\n"
58 "stp q6, q7, [%0, #(3 * 32)]\n"
59 "stp q8, q9, [%0, #(4 * 32)]\n"
60 "stp q10, q11, [%0, #(5 * 32)]\n"
61 "stp q12, q13, [%0, #(6 * 32)]\n"
62 "stp q14, q15, [%0, #(7 * 32)]\n"
63 "stp q16, q17, [%0, #(8 * 32)]\n"
64 "stp q18, q19, [%0, #(9 * 32)]\n"
65 "stp q20, q21, [%0, #(10 * 32)]\n"
66 "stp q22, q23, [%0, #(11 * 32)]\n"
67 "stp q24, q25, [%0, #(12 * 32)]\n"
68 "stp q26, q27, [%0, #(13 * 32)]\n"
69 "stp q28, q29, [%0, #(14 * 32)]\n"
70 "stp q30, q31, [%0, #(15 * 32)]\n"
71 :
72 : "r"(buf->fpu)
73 : "memory");
74
75 /*
76 * These are 32-bit values,
77 * but the msr instruction always uses
78 * a 64-bit destination register.
79 */
80 __asm__("mrs %0, fpcr\n" : "=r"(fpcr));
81 __asm__("mrs %0, fpsr\n" : "=r"(fpsr));
82
83 buf->fpcr = (u32)fpcr;
84 buf->fpsr = (u32)fpsr;
85 }
86 }
87
restore_fpu_state(struct thread * thread)88 void restore_fpu_state(struct thread *thread)
89 {
90 if (likely((thread) && (thread->thread_ctx->type > TYPE_KERNEL))) {
91 struct aarch64_fpu_area *buf;
92
93 buf = (struct aarch64_fpu_area *)thread->thread_ctx->fpu_state;
94
95 __asm__ volatile(
96 "ldp q0, q1, [%0, #(0 * 32)]\n"
97 "ldp q2, q3, [%0, #(1 * 32)]\n"
98 "ldp q4, q5, [%0, #(2 * 32)]\n"
99 "ldp q6, q7, [%0, #(3 * 32)]\n"
100 "ldp q8, q9, [%0, #(4 * 32)]\n"
101 "ldp q10, q11, [%0, #(5 * 32)]\n"
102 "ldp q12, q13, [%0, #(6 * 32)]\n"
103 "ldp q14, q15, [%0, #(7 * 32)]\n"
104 "ldp q16, q17, [%0, #(8 * 32)]\n"
105 "ldp q18, q19, [%0, #(9 * 32)]\n"
106 "ldp q20, q21, [%0, #(10 * 32)]\n"
107 "ldp q22, q23, [%0, #(11 * 32)]\n"
108 "ldp q24, q25, [%0, #(12 * 32)]\n"
109 "ldp q26, q27, [%0, #(13 * 32)]\n"
110 "ldp q28, q29, [%0, #(14 * 32)]\n"
111 "ldp q30, q31, [%0, #(15 * 32)]\n"
112 "msr fpcr, %1\n"
113 "msr fpsr, %2\n"
114 :
115 : "r"(buf->fpu), "r"((u64)buf->fpcr), "r"((u64)buf->fpsr));
116 }
117 }
118
119 #if FPU_SAVING_MODE == LAZY_FPU_MODE
120
121 #define CPACR_EL1_FPEN 20
122 #define CPACR_EL1_FPEN_MASK 0b11
123
124
125
disable_fpu_usage(void)126 void disable_fpu_usage(void)
127 {
128 struct per_cpu_info *info;
129 u32 cpacr = 0;
130
131 info = get_per_cpu_info();
132
133 if (info->fpu_disable == 0) {
134 /* Get current cpacr value */
135 asm volatile("mrs %0, cpacr_el1" : "=r"(cpacr)::"memory");
136 /* Disable using FPU */
137 cpacr &= ~(CPACR_EL1_FPEN_MASK << CPACR_EL1_FPEN);
138 /* Allow EL1 to use */
139 cpacr |= (1 << CPACR_EL1_FPEN);
140 asm volatile("msr cpacr_el1, %0" ::"r"(cpacr) : "memory");
141
142 info->fpu_disable = 1;
143 }
144 }
145
enable_fpu_usage(void)146 void enable_fpu_usage(void)
147 {
148 struct per_cpu_info *info;
149 u32 cpacr = 0;
150
151 /* Get current cpacr value */
152 asm volatile("mrs %0, cpacr_el1" : "=r"(cpacr)::"memory");
153 /* Enable using FPU */
154 cpacr |= (CPACR_EL1_FPEN_MASK << CPACR_EL1_FPEN);
155 asm volatile("msr cpacr_el1, %0" ::"r"(cpacr) : "memory");
156
157 info = get_per_cpu_info();
158 info->fpu_disable = 0;
159 }
160
161 /* This function is used as the hander for FPU traps */
change_fpu_owner(void)162 void change_fpu_owner(void)
163 {
164 struct per_cpu_info *info;
165 struct thread *fpu_owner;
166 u32 cpuid;
167
168 enable_fpu_usage();
169
170 cpuid = smp_get_cpu_id();
171 lock(&fpu_owner_locks[cpuid]);
172
173 /* Get the current fpu_owner (per CPU) */
174 info = get_per_cpu_info();
175 fpu_owner = info->fpu_owner;
176
177 /* A (fpu_owner) -> B (no using FPU) -> A */
178 if (fpu_owner == current_thread) {
179 unlock(&fpu_owner_locks[cpuid]);
180 return;
181 }
182
183 /* Save the fpu states for the current owner */
184 if (fpu_owner) {
185 save_fpu_state(fpu_owner);
186 /*
187 * A barrier to make sure that fpu state has been saved
188 * before is_fpu_owner has been set to -1.
189 */
190 smp_mb();
191 fpu_owner->thread_ctx->is_fpu_owner = -1;
192 }
193
194 /* Set current_thread as the new owner */
195 info->fpu_owner = current_thread;
196 unlock(&fpu_owner_locks[cpuid]);
197
198 restore_fpu_state(current_thread);
199 current_thread->thread_ctx->is_fpu_owner = cpuid;
200 }
201
202 /*
203 * This function is used by the scheduler
204 * when it migrates @thread to another CPU.
205 *
206 * No need to acquire the fpu_owner_lock because
207 * @thread is an active thread.
208 */
save_and_release_fpu(struct thread * thread)209 void save_and_release_fpu(struct thread *thread)
210 {
211 struct per_cpu_info *info;
212 struct thread *fpu_owner;
213
214 BUG_ON(thread->thread_ctx->thread_exit_state == TE_EXITED);
215
216 /* Get the current fpu_owner (per CPU) */
217 info = get_per_cpu_info();
218 fpu_owner = info->fpu_owner;
219
220 if (fpu_owner == thread) {
221 /*
222 * On aarch64, we always enable OS (EL1) to operate FPU.
223 * Thus, there is no need to invoke enable_fpu_usage before
224 * save_fpu_state as what does on x86_64.
225 */
226 save_fpu_state(thread);
227 thread->thread_ctx->is_fpu_owner = -1;
228 info->fpu_owner = NULL;
229 disable_fpu_usage();
230 }
231 }
232
save_and_release_fpu_owner(void)233 void save_and_release_fpu_owner(void)
234 {
235 struct per_cpu_info *info;
236 struct thread *fpu_owner;
237 u32 cpuid;
238
239 cpuid = smp_get_cpu_id();
240 lock(&fpu_owner_locks[cpuid]);
241
242 /* Get the current fpu_owner (per CPU) */
243 info = get_per_cpu_info();
244 fpu_owner = info->fpu_owner;
245
246 if (fpu_owner) {
247 save_fpu_state(fpu_owner);
248 fpu_owner->thread_ctx->is_fpu_owner = -1;
249 info->fpu_owner = NULL;
250 disable_fpu_usage();
251 }
252
253 unlock(&fpu_owner_locks[cpuid]);
254 }
255
256 #endif
257