• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU)
3  * Licensed under the Mulan PSL v2.
4  * You can use this software according to the terms and conditions of the Mulan PSL v2.
5  * You may obtain a copy of Mulan PSL v2 at:
6  *     http://license.coscl.org.cn/MulanPSL2
7  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
8  * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
9  * PURPOSE.
10  * See the Mulan PSL v2 for more details.
11  */
12 #include <object/thread.h>
13 #include <sched/context.h>
14 #include <sched/sched.h>
15 #include <mm/kmalloc.h>
16 
17 struct aarch64_fpu_area {
18     /* 32 fpu registers and each has 16 bytes */
19     u8 fpu[32 * 16];
20 
21     /* fp control reg and state reg */
22     u32 fpcr;
23     u32 fpsr;
24 };
25 
arch_init_thread_fpu(struct thread_ctx * ctx)26 void arch_init_thread_fpu(struct thread_ctx *ctx)
27 {
28     struct aarch64_fpu_area *buf;
29 
30     /*
31      * In case of data leakage, clear this area (use kzalloc instead
32      * of kmalloc).
33      */
34     ctx->fpu_state = kzalloc(sizeof(struct aarch64_fpu_area));
35     ctx->is_fpu_owner = -1;
36     buf = (struct aarch64_fpu_area *)ctx->fpu_state;
37 
38     buf->fpcr = 0;
39     buf->fpsr = 0;
40 }
41 
arch_free_thread_fpu(struct thread_ctx * ctx)42 void arch_free_thread_fpu(struct thread_ctx *ctx)
43 {
44     kfree(ctx->fpu_state);
45 }
46 
save_fpu_state(struct thread * thread)47 void save_fpu_state(struct thread *thread)
48 {
49     if (likely((thread) && (thread->thread_ctx->type > TYPE_KERNEL))) {
50         struct aarch64_fpu_area *buf;
51         u64 fpcr, fpsr;
52 
53         buf = (struct aarch64_fpu_area *)thread->thread_ctx->fpu_state;
54 
55         __asm__ volatile("stp     q0, q1, [%0, #(0 * 32)]\n"
56                          "stp     q2, q3, [%0, #(1 * 32)]\n"
57                          "stp     q4, q5, [%0, #(2 * 32)]\n"
58                          "stp     q6, q7, [%0, #(3 * 32)]\n"
59                          "stp     q8, q9, [%0, #(4 * 32)]\n"
60                          "stp     q10, q11, [%0, #(5 * 32)]\n"
61                          "stp     q12, q13, [%0, #(6 * 32)]\n"
62                          "stp     q14, q15, [%0, #(7 * 32)]\n"
63                          "stp     q16, q17, [%0, #(8 * 32)]\n"
64                          "stp     q18, q19, [%0, #(9 * 32)]\n"
65                          "stp     q20, q21, [%0, #(10 * 32)]\n"
66                          "stp     q22, q23, [%0, #(11 * 32)]\n"
67                          "stp     q24, q25, [%0, #(12 * 32)]\n"
68                          "stp     q26, q27, [%0, #(13 * 32)]\n"
69                          "stp     q28, q29, [%0, #(14 * 32)]\n"
70                          "stp     q30, q31, [%0, #(15 * 32)]\n"
71                          :
72                          : "r"(buf->fpu)
73                          : "memory");
74 
75         /*
76          * These are 32-bit values,
77          * but the msr instruction always uses
78          * a 64-bit destination register.
79          */
80         __asm__("mrs %0, fpcr\n" : "=r"(fpcr));
81         __asm__("mrs %0, fpsr\n" : "=r"(fpsr));
82 
83         buf->fpcr = (u32)fpcr;
84         buf->fpsr = (u32)fpsr;
85     }
86 }
87 
restore_fpu_state(struct thread * thread)88 void restore_fpu_state(struct thread *thread)
89 {
90     if (likely((thread) && (thread->thread_ctx->type > TYPE_KERNEL))) {
91         struct aarch64_fpu_area *buf;
92 
93         buf = (struct aarch64_fpu_area *)thread->thread_ctx->fpu_state;
94 
95         __asm__ volatile(
96             "ldp     q0, q1, [%0, #(0 * 32)]\n"
97             "ldp     q2, q3, [%0, #(1 * 32)]\n"
98             "ldp     q4, q5, [%0, #(2 * 32)]\n"
99             "ldp     q6, q7, [%0, #(3 * 32)]\n"
100             "ldp     q8, q9, [%0, #(4 * 32)]\n"
101             "ldp     q10, q11, [%0, #(5 * 32)]\n"
102             "ldp     q12, q13, [%0, #(6 * 32)]\n"
103             "ldp     q14, q15, [%0, #(7 * 32)]\n"
104             "ldp     q16, q17, [%0, #(8 * 32)]\n"
105             "ldp     q18, q19, [%0, #(9 * 32)]\n"
106             "ldp     q20, q21, [%0, #(10 * 32)]\n"
107             "ldp     q22, q23, [%0, #(11 * 32)]\n"
108             "ldp     q24, q25, [%0, #(12 * 32)]\n"
109             "ldp     q26, q27, [%0, #(13 * 32)]\n"
110             "ldp     q28, q29, [%0, #(14 * 32)]\n"
111             "ldp     q30, q31, [%0, #(15 * 32)]\n"
112             "msr     fpcr, %1\n"
113             "msr     fpsr, %2\n"
114             :
115             : "r"(buf->fpu), "r"((u64)buf->fpcr), "r"((u64)buf->fpsr));
116     }
117 }
118 
119 #if FPU_SAVING_MODE == LAZY_FPU_MODE
120 
121 #define CPACR_EL1_FPEN      20
122 #define CPACR_EL1_FPEN_MASK 0b11
123 
124 
125 
disable_fpu_usage(void)126 void disable_fpu_usage(void)
127 {
128     struct per_cpu_info *info;
129     u32 cpacr = 0;
130 
131     info = get_per_cpu_info();
132 
133     if (info->fpu_disable == 0) {
134         /* Get current cpacr value */
135         asm volatile("mrs %0, cpacr_el1" : "=r"(cpacr)::"memory");
136         /* Disable using FPU */
137         cpacr &= ~(CPACR_EL1_FPEN_MASK << CPACR_EL1_FPEN);
138         /* Allow EL1 to use */
139         cpacr |= (1 << CPACR_EL1_FPEN);
140         asm volatile("msr cpacr_el1, %0" ::"r"(cpacr) : "memory");
141 
142         info->fpu_disable = 1;
143     }
144 }
145 
enable_fpu_usage(void)146 void enable_fpu_usage(void)
147 {
148     struct per_cpu_info *info;
149     u32 cpacr = 0;
150 
151     /* Get current cpacr value */
152     asm volatile("mrs %0, cpacr_el1" : "=r"(cpacr)::"memory");
153     /* Enable using FPU */
154     cpacr |= (CPACR_EL1_FPEN_MASK << CPACR_EL1_FPEN);
155     asm volatile("msr cpacr_el1, %0" ::"r"(cpacr) : "memory");
156 
157     info = get_per_cpu_info();
158     info->fpu_disable = 0;
159 }
160 
161 /* This function is used as the hander for FPU traps */
change_fpu_owner(void)162 void change_fpu_owner(void)
163 {
164     struct per_cpu_info *info;
165     struct thread *fpu_owner;
166     u32 cpuid;
167 
168     enable_fpu_usage();
169 
170     cpuid = smp_get_cpu_id();
171     lock(&fpu_owner_locks[cpuid]);
172 
173     /* Get the current fpu_owner (per CPU) */
174     info = get_per_cpu_info();
175     fpu_owner = info->fpu_owner;
176 
177     /* A (fpu_owner) -> B (no using FPU) -> A */
178     if (fpu_owner == current_thread) {
179         unlock(&fpu_owner_locks[cpuid]);
180         return;
181     }
182 
183     /* Save the fpu states for the current owner */
184     if (fpu_owner) {
185         save_fpu_state(fpu_owner);
186         /*
187          * A barrier to make sure that fpu state has been saved
188          * before is_fpu_owner has been set to -1.
189          */
190         smp_mb();
191         fpu_owner->thread_ctx->is_fpu_owner = -1;
192     }
193 
194     /* Set current_thread as the new owner */
195     info->fpu_owner = current_thread;
196     unlock(&fpu_owner_locks[cpuid]);
197 
198     restore_fpu_state(current_thread);
199     current_thread->thread_ctx->is_fpu_owner = cpuid;
200 }
201 
202 /*
203  * This function is used by the scheduler
204  * when it migrates @thread to another CPU.
205  *
206  * No need to acquire the fpu_owner_lock because
207  * @thread is an active thread.
208  */
save_and_release_fpu(struct thread * thread)209 void save_and_release_fpu(struct thread *thread)
210 {
211     struct per_cpu_info *info;
212     struct thread *fpu_owner;
213 
214     BUG_ON(thread->thread_ctx->thread_exit_state == TE_EXITED);
215 
216     /* Get the current fpu_owner (per CPU) */
217     info = get_per_cpu_info();
218     fpu_owner = info->fpu_owner;
219 
220     if (fpu_owner == thread) {
221         /*
222          * On aarch64, we always enable OS (EL1) to operate FPU.
223          * Thus, there is no need to invoke enable_fpu_usage before
224          * save_fpu_state as what does on x86_64.
225          */
226         save_fpu_state(thread);
227         thread->thread_ctx->is_fpu_owner = -1;
228         info->fpu_owner = NULL;
229         disable_fpu_usage();
230     }
231 }
232 
save_and_release_fpu_owner(void)233 void save_and_release_fpu_owner(void)
234 {
235     struct per_cpu_info *info;
236     struct thread *fpu_owner;
237     u32 cpuid;
238 
239     cpuid = smp_get_cpu_id();
240     lock(&fpu_owner_locks[cpuid]);
241 
242     /* Get the current fpu_owner (per CPU) */
243     info = get_per_cpu_info();
244     fpu_owner = info->fpu_owner;
245 
246     if (fpu_owner) {
247         save_fpu_state(fpu_owner);
248         fpu_owner->thread_ctx->is_fpu_owner = -1;
249         info->fpu_owner = NULL;
250         disable_fpu_usage();
251     }
252 
253     unlock(&fpu_owner_locks[cpuid]);
254 }
255 
256 #endif
257