1 // SPDX-License-Identifier: GPL-2.0-or-later
2
3 #include <linux/err.h>
4 #include <asm/asm-prototypes.h>
5 #include <asm/kup.h>
6 #include <asm/cputime.h>
7 #include <asm/hw_irq.h>
8 #include <asm/kprobes.h>
9 #include <asm/paca.h>
10 #include <asm/ptrace.h>
11 #include <asm/reg.h>
12 #include <asm/signal.h>
13 #include <asm/switch_to.h>
14 #include <asm/syscall.h>
15 #include <asm/time.h>
16 #include <asm/unistd.h>
17
18 typedef long (*syscall_fn)(long, long, long, long, long, long);
19
20 /* Has to run notrace because it is entered not completely "reconciled" */
system_call_exception(long r3,long r4,long r5,long r6,long r7,long r8,unsigned long r0,struct pt_regs * regs)21 notrace long system_call_exception(long r3, long r4, long r5,
22 long r6, long r7, long r8,
23 unsigned long r0, struct pt_regs *regs)
24 {
25 syscall_fn f;
26
27 if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
28 BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
29
30 trace_hardirqs_off(); /* finish reconciling */
31
32 if (IS_ENABLED(CONFIG_PPC_BOOK3S))
33 BUG_ON(!(regs->msr & MSR_RI));
34 BUG_ON(!(regs->msr & MSR_PR));
35 BUG_ON(!FULL_REGS(regs));
36 BUG_ON(regs->softe != IRQS_ENABLED);
37
38 kuap_check_amr();
39
40 account_cpu_user_entry();
41
42 #ifdef CONFIG_PPC_SPLPAR
43 if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) &&
44 firmware_has_feature(FW_FEATURE_SPLPAR)) {
45 struct lppaca *lp = local_paca->lppaca_ptr;
46
47 if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
48 accumulate_stolen_time();
49 }
50 #endif
51
52 /*
53 * This is not required for the syscall exit path, but makes the
54 * stack frame look nicer. If this was initialised in the first stack
55 * frame, or if the unwinder was taught the first stack frame always
56 * returns to user with IRQS_ENABLED, this store could be avoided!
57 */
58 regs->softe = IRQS_ENABLED;
59
60 local_irq_enable();
61
62 if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
63 if (unlikely(regs->trap == 0x7ff0)) {
64 /* Unsupported scv vector */
65 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
66 return regs->gpr[3];
67 }
68 /*
69 * We use the return value of do_syscall_trace_enter() as the
70 * syscall number. If the syscall was rejected for any reason
71 * do_syscall_trace_enter() returns an invalid syscall number
72 * and the test against NR_syscalls will fail and the return
73 * value to be used is in regs->gpr[3].
74 */
75 r0 = do_syscall_trace_enter(regs);
76 if (unlikely(r0 >= NR_syscalls))
77 return regs->gpr[3];
78 r3 = regs->gpr[3];
79 r4 = regs->gpr[4];
80 r5 = regs->gpr[5];
81 r6 = regs->gpr[6];
82 r7 = regs->gpr[7];
83 r8 = regs->gpr[8];
84
85 } else if (unlikely(r0 >= NR_syscalls)) {
86 if (unlikely(regs->trap == 0x7ff0)) {
87 /* Unsupported scv vector */
88 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
89 return regs->gpr[3];
90 }
91 return -ENOSYS;
92 }
93
94 /* May be faster to do array_index_nospec? */
95 barrier_nospec();
96
97 if (unlikely(is_32bit_task())) {
98 f = (void *)compat_sys_call_table[r0];
99
100 r3 &= 0x00000000ffffffffULL;
101 r4 &= 0x00000000ffffffffULL;
102 r5 &= 0x00000000ffffffffULL;
103 r6 &= 0x00000000ffffffffULL;
104 r7 &= 0x00000000ffffffffULL;
105 r8 &= 0x00000000ffffffffULL;
106
107 } else {
108 f = (void *)sys_call_table[r0];
109 }
110
111 return f(r3, r4, r5, r6, r7, r8);
112 }
113
114 /*
115 * local irqs must be disabled. Returns false if the caller must re-enable
116 * them, check for new work, and try again.
117 */
prep_irq_for_enabled_exit(bool clear_ri)118 static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri)
119 {
120 /* This must be done with RI=1 because tracing may touch vmaps */
121 trace_hardirqs_on();
122
123 /* This pattern matches prep_irq_for_idle */
124 if (clear_ri)
125 __hard_EE_RI_disable();
126 else
127 __hard_irq_disable();
128 if (unlikely(lazy_irq_pending_nocheck())) {
129 /* Took an interrupt, may have more exit work to do. */
130 if (clear_ri)
131 __hard_RI_enable();
132 trace_hardirqs_off();
133 local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
134
135 return false;
136 }
137 local_paca->irq_happened = 0;
138 irq_soft_mask_set(IRQS_ENABLED);
139
140 return true;
141 }
142
143 /*
144 * This should be called after a syscall returns, with r3 the return value
145 * from the syscall. If this function returns non-zero, the system call
146 * exit assembly should additionally load all GPR registers and CTR and XER
147 * from the interrupt frame.
148 *
149 * The function graph tracer can not trace the return side of this function,
150 * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
151 */
syscall_exit_prepare(unsigned long r3,struct pt_regs * regs,long scv)152 notrace unsigned long syscall_exit_prepare(unsigned long r3,
153 struct pt_regs *regs,
154 long scv)
155 {
156 unsigned long *ti_flagsp = ¤t_thread_info()->flags;
157 unsigned long ti_flags;
158 unsigned long ret = 0;
159
160 kuap_check_amr();
161
162 regs->result = r3;
163
164 /* Check whether the syscall is issued inside a restartable sequence */
165 rseq_syscall(regs);
166
167 ti_flags = *ti_flagsp;
168
169 if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) {
170 if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
171 r3 = -r3;
172 regs->ccr |= 0x10000000; /* Set SO bit in CR */
173 }
174 }
175
176 if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
177 if (ti_flags & _TIF_RESTOREALL)
178 ret = _TIF_RESTOREALL;
179 else
180 regs->gpr[3] = r3;
181 clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
182 } else {
183 regs->gpr[3] = r3;
184 }
185
186 if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
187 do_syscall_trace_leave(regs);
188 ret |= _TIF_RESTOREALL;
189 }
190
191 again:
192 local_irq_disable();
193 ti_flags = READ_ONCE(*ti_flagsp);
194 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
195 local_irq_enable();
196 if (ti_flags & _TIF_NEED_RESCHED) {
197 schedule();
198 } else {
199 /*
200 * SIGPENDING must restore signal handler function
201 * argument GPRs, and some non-volatiles (e.g., r1).
202 * Restore all for now. This could be made lighter.
203 */
204 if (ti_flags & _TIF_SIGPENDING)
205 ret |= _TIF_RESTOREALL;
206 do_notify_resume(regs, ti_flags);
207 }
208 local_irq_disable();
209 ti_flags = READ_ONCE(*ti_flagsp);
210 }
211
212 if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
213 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
214 unlikely((ti_flags & _TIF_RESTORE_TM))) {
215 restore_tm_state(regs);
216 } else {
217 unsigned long mathflags = MSR_FP;
218
219 if (cpu_has_feature(CPU_FTR_VSX))
220 mathflags |= MSR_VEC | MSR_VSX;
221 else if (cpu_has_feature(CPU_FTR_ALTIVEC))
222 mathflags |= MSR_VEC;
223
224 /*
225 * If userspace MSR has all available FP bits set,
226 * then they are live and no need to restore. If not,
227 * it means the regs were given up and restore_math
228 * may decide to restore them (to avoid taking an FP
229 * fault).
230 */
231 if ((regs->msr & mathflags) != mathflags)
232 restore_math(regs);
233 }
234 }
235
236 /* scv need not set RI=0 because SRRs are not used */
237 if (unlikely(!prep_irq_for_enabled_exit(!scv))) {
238 local_irq_enable();
239 goto again;
240 }
241
242 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
243 local_paca->tm_scratch = regs->msr;
244 #endif
245
246 account_cpu_user_exit();
247
248 return ret;
249 }
250
251 #ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
interrupt_exit_user_prepare(struct pt_regs * regs,unsigned long msr)252 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
253 {
254 #ifdef CONFIG_PPC_BOOK3E
255 struct thread_struct *ts = ¤t->thread;
256 #endif
257 unsigned long *ti_flagsp = ¤t_thread_info()->flags;
258 unsigned long ti_flags;
259 unsigned long flags;
260 unsigned long ret = 0;
261
262 if (IS_ENABLED(CONFIG_PPC_BOOK3S))
263 BUG_ON(!(regs->msr & MSR_RI));
264 BUG_ON(!(regs->msr & MSR_PR));
265 BUG_ON(!FULL_REGS(regs));
266 BUG_ON(regs->softe != IRQS_ENABLED);
267
268 /*
269 * We don't need to restore AMR on the way back to userspace for KUAP.
270 * AMR can only have been unlocked if we interrupted the kernel.
271 */
272 kuap_check_amr();
273
274 local_irq_save(flags);
275
276 again:
277 ti_flags = READ_ONCE(*ti_flagsp);
278 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
279 local_irq_enable(); /* returning to user: may enable */
280 if (ti_flags & _TIF_NEED_RESCHED) {
281 schedule();
282 } else {
283 if (ti_flags & _TIF_SIGPENDING)
284 ret |= _TIF_RESTOREALL;
285 do_notify_resume(regs, ti_flags);
286 }
287 local_irq_disable();
288 ti_flags = READ_ONCE(*ti_flagsp);
289 }
290
291 if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
292 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
293 unlikely((ti_flags & _TIF_RESTORE_TM))) {
294 restore_tm_state(regs);
295 } else {
296 unsigned long mathflags = MSR_FP;
297
298 if (cpu_has_feature(CPU_FTR_VSX))
299 mathflags |= MSR_VEC | MSR_VSX;
300 else if (cpu_has_feature(CPU_FTR_ALTIVEC))
301 mathflags |= MSR_VEC;
302
303 /* See above restore_math comment */
304 if ((regs->msr & mathflags) != mathflags)
305 restore_math(regs);
306 }
307 }
308
309 if (unlikely(!prep_irq_for_enabled_exit(true))) {
310 local_irq_enable();
311 local_irq_disable();
312 goto again;
313 }
314
315 #ifdef CONFIG_PPC_BOOK3E
316 if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
317 /*
318 * Check to see if the dbcr0 register is set up to debug.
319 * Use the internal debug mode bit to do this.
320 */
321 mtmsr(mfmsr() & ~MSR_DE);
322 mtspr(SPRN_DBCR0, ts->debug.dbcr0);
323 mtspr(SPRN_DBSR, -1);
324 }
325 #endif
326
327 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
328 local_paca->tm_scratch = regs->msr;
329 #endif
330
331 account_cpu_user_exit();
332
333 return ret;
334 }
335
336 void unrecoverable_exception(struct pt_regs *regs);
337 void preempt_schedule_irq(void);
338
interrupt_exit_kernel_prepare(struct pt_regs * regs,unsigned long msr)339 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
340 {
341 unsigned long *ti_flagsp = ¤t_thread_info()->flags;
342 unsigned long flags;
343 unsigned long ret = 0;
344 unsigned long amr;
345
346 if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI)))
347 unrecoverable_exception(regs);
348 BUG_ON(regs->msr & MSR_PR);
349 BUG_ON(!FULL_REGS(regs));
350
351 amr = kuap_get_and_check_amr();
352
353 if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) {
354 clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp);
355 ret = 1;
356 }
357
358 local_irq_save(flags);
359
360 if (regs->softe == IRQS_ENABLED) {
361 /* Returning to a kernel context with local irqs enabled. */
362 WARN_ON_ONCE(!(regs->msr & MSR_EE));
363 again:
364 if (IS_ENABLED(CONFIG_PREEMPT)) {
365 /* Return to preemptible kernel context */
366 if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) {
367 if (preempt_count() == 0)
368 preempt_schedule_irq();
369 }
370 }
371
372 if (unlikely(!prep_irq_for_enabled_exit(true))) {
373 /*
374 * Can't local_irq_restore to replay if we were in
375 * interrupt context. Must replay directly.
376 */
377 if (irqs_disabled_flags(flags)) {
378 replay_soft_interrupts();
379 } else {
380 local_irq_restore(flags);
381 local_irq_save(flags);
382 }
383 /* Took an interrupt, may have more exit work to do. */
384 goto again;
385 }
386 } else {
387 /* Returning to a kernel context with local irqs disabled. */
388 __hard_EE_RI_disable();
389 if (regs->msr & MSR_EE)
390 local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
391 }
392
393
394 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
395 local_paca->tm_scratch = regs->msr;
396 #endif
397
398 /*
399 * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr,
400 * which would cause Read-After-Write stalls. Hence, we take the AMR
401 * value from the check above.
402 */
403 kuap_restore_amr(regs, amr);
404
405 return ret;
406 }
407 #endif
408