• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1994 Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *  General FPU state handling cleanups
6  *	Gareth Hughes <gareth@valinux.com>, May 2000
7  */
8 #include <asm/fpu/internal.h>
9 #include <asm/fpu/regset.h>
10 #include <asm/fpu/signal.h>
11 #include <asm/traps.h>
12 
13 #include <linux/hardirq.h>
14 
15 /*
16  * Represents the initial FPU state. It's mostly (but not completely) zeroes,
17  * depending on the FPU hardware format:
18  */
19 union fpregs_state init_fpstate __read_mostly;
20 
21 /*
22  * Track whether the kernel is using the FPU state
23  * currently.
24  *
25  * This flag is used:
26  *
27  *   - by IRQ context code to potentially use the FPU
28  *     if it's unused.
29  *
30  *   - to debug kernel_fpu_begin()/end() correctness
31  */
32 static DEFINE_PER_CPU(bool, in_kernel_fpu);
33 
34 /*
35  * Track which context is using the FPU on the CPU:
36  */
37 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
38 
kernel_fpu_disable(void)39 static void kernel_fpu_disable(void)
40 {
41 	WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
42 	this_cpu_write(in_kernel_fpu, true);
43 }
44 
kernel_fpu_enable(void)45 static void kernel_fpu_enable(void)
46 {
47 	WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
48 	this_cpu_write(in_kernel_fpu, false);
49 }
50 
kernel_fpu_disabled(void)51 static bool kernel_fpu_disabled(void)
52 {
53 	return this_cpu_read(in_kernel_fpu);
54 }
55 
interrupted_kernel_fpu_idle(void)56 static bool interrupted_kernel_fpu_idle(void)
57 {
58 	return !kernel_fpu_disabled();
59 }
60 
61 /*
62  * Were we in user mode (or vm86 mode) when we were
63  * interrupted?
64  *
65  * Doing kernel_fpu_begin/end() is ok if we are running
66  * in an interrupt context from user mode - we'll just
67  * save the FPU state as required.
68  */
interrupted_user_mode(void)69 static bool interrupted_user_mode(void)
70 {
71 	struct pt_regs *regs = get_irq_regs();
72 	return regs && user_mode(regs);
73 }
74 
75 /*
76  * Can we use the FPU in kernel mode with the
77  * whole "kernel_fpu_begin/end()" sequence?
78  *
79  * It's always ok in process context (ie "not interrupt")
80  * but it is sometimes ok even from an irq.
81  */
irq_fpu_usable(void)82 bool irq_fpu_usable(void)
83 {
84 	return !in_interrupt() ||
85 		interrupted_user_mode() ||
86 		interrupted_kernel_fpu_idle();
87 }
88 EXPORT_SYMBOL(irq_fpu_usable);
89 
__kernel_fpu_begin(void)90 void __kernel_fpu_begin(void)
91 {
92 	struct fpu *fpu = &current->thread.fpu;
93 
94 	WARN_ON_FPU(!irq_fpu_usable());
95 
96 	kernel_fpu_disable();
97 
98 	if (fpu->fpregs_active) {
99 		/*
100 		 * Ignore return value -- we don't care if reg state
101 		 * is clobbered.
102 		 */
103 		copy_fpregs_to_fpstate(fpu);
104 	} else {
105 		this_cpu_write(fpu_fpregs_owner_ctx, NULL);
106 	}
107 }
108 EXPORT_SYMBOL(__kernel_fpu_begin);
109 
__kernel_fpu_end(void)110 void __kernel_fpu_end(void)
111 {
112 	struct fpu *fpu = &current->thread.fpu;
113 
114 	if (fpu->fpregs_active)
115 		copy_kernel_to_fpregs(&fpu->state);
116 
117 	kernel_fpu_enable();
118 }
119 EXPORT_SYMBOL(__kernel_fpu_end);
120 
kernel_fpu_begin(void)121 void kernel_fpu_begin(void)
122 {
123 	preempt_disable();
124 	__kernel_fpu_begin();
125 }
126 EXPORT_SYMBOL_GPL(kernel_fpu_begin);
127 
kernel_fpu_end(void)128 void kernel_fpu_end(void)
129 {
130 	__kernel_fpu_end();
131 	preempt_enable();
132 }
133 EXPORT_SYMBOL_GPL(kernel_fpu_end);
134 
135 /*
136  * CR0::TS save/restore functions:
137  */
irq_ts_save(void)138 int irq_ts_save(void)
139 {
140 	/*
141 	 * If in process context and not atomic, we can take a spurious DNA fault.
142 	 * Otherwise, doing clts() in process context requires disabling preemption
143 	 * or some heavy lifting like kernel_fpu_begin()
144 	 */
145 	if (!in_atomic())
146 		return 0;
147 
148 	if (read_cr0() & X86_CR0_TS) {
149 		clts();
150 		return 1;
151 	}
152 
153 	return 0;
154 }
155 EXPORT_SYMBOL_GPL(irq_ts_save);
156 
irq_ts_restore(int TS_state)157 void irq_ts_restore(int TS_state)
158 {
159 	if (TS_state)
160 		stts();
161 }
162 EXPORT_SYMBOL_GPL(irq_ts_restore);
163 
164 /*
165  * Save the FPU state (mark it for reload if necessary):
166  *
167  * This only ever gets called for the current task.
168  */
fpu__save(struct fpu * fpu)169 void fpu__save(struct fpu *fpu)
170 {
171 	WARN_ON_FPU(fpu != &current->thread.fpu);
172 
173 	preempt_disable();
174 	if (fpu->fpregs_active) {
175 		if (!copy_fpregs_to_fpstate(fpu)) {
176 			copy_kernel_to_fpregs(&fpu->state);
177 		}
178 	}
179 	preempt_enable();
180 }
181 EXPORT_SYMBOL_GPL(fpu__save);
182 
183 /*
184  * Legacy x87 fpstate state init:
185  */
fpstate_init_fstate(struct fregs_state * fp)186 static inline void fpstate_init_fstate(struct fregs_state *fp)
187 {
188 	fp->cwd = 0xffff037fu;
189 	fp->swd = 0xffff0000u;
190 	fp->twd = 0xffffffffu;
191 	fp->fos = 0xffff0000u;
192 }
193 
fpstate_init(union fpregs_state * state)194 void fpstate_init(union fpregs_state *state)
195 {
196 	if (!cpu_has_fpu) {
197 		fpstate_init_soft(&state->soft);
198 		return;
199 	}
200 
201 	memset(state, 0, xstate_size);
202 
203 	if (cpu_has_fxsr)
204 		fpstate_init_fxstate(&state->fxsave);
205 	else
206 		fpstate_init_fstate(&state->fsave);
207 }
208 EXPORT_SYMBOL_GPL(fpstate_init);
209 
210 /*
211  * Copy the current task's FPU state to a new task's FPU context.
212  *
213  * In both the 'eager' and the 'lazy' case we save hardware registers
214  * directly to the destination buffer.
215  */
fpu_copy(struct fpu * dst_fpu,struct fpu * src_fpu)216 static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
217 {
218 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
219 
220 	/*
221 	 * Don't let 'init optimized' areas of the XSAVE area
222 	 * leak into the child task:
223 	 */
224 	memset(&dst_fpu->state.xsave, 0, xstate_size);
225 
226 	/*
227 	 * Save current FPU registers directly into the child
228 	 * FPU context, without any memory-to-memory copying.
229 	 *
230 	 * If the FPU context got destroyed in the process (FNSAVE
231 	 * done on old CPUs) then copy it back into the source
232 	 * context and mark the current task for lazy restore.
233 	 *
234 	 * We have to do all this with preemption disabled,
235 	 * mostly because of the FNSAVE case, because in that
236 	 * case we must not allow preemption in the window
237 	 * between the FNSAVE and us marking the context lazy.
238 	 *
239 	 * It shouldn't be an issue as even FNSAVE is plenty
240 	 * fast in terms of critical section length.
241 	 */
242 	preempt_disable();
243 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
244 		memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
245 
246 		copy_kernel_to_fpregs(&src_fpu->state);
247 	}
248 	preempt_enable();
249 }
250 
fpu__copy(struct fpu * dst_fpu,struct fpu * src_fpu)251 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
252 {
253 	dst_fpu->fpregs_active = 0;
254 	dst_fpu->last_cpu = -1;
255 
256 	if (src_fpu->fpstate_active && cpu_has_fpu)
257 		fpu_copy(dst_fpu, src_fpu);
258 
259 	return 0;
260 }
261 
262 /*
263  * Activate the current task's in-memory FPU context,
264  * if it has not been used before:
265  */
fpu__activate_curr(struct fpu * fpu)266 void fpu__activate_curr(struct fpu *fpu)
267 {
268 	WARN_ON_FPU(fpu != &current->thread.fpu);
269 
270 	if (!fpu->fpstate_active) {
271 		fpstate_init(&fpu->state);
272 
273 		/* Safe to do for the current task: */
274 		fpu->fpstate_active = 1;
275 	}
276 }
277 EXPORT_SYMBOL_GPL(fpu__activate_curr);
278 
279 /*
280  * This function must be called before we read a task's fpstate.
281  *
282  * If the task has not used the FPU before then initialize its
283  * fpstate.
284  *
285  * If the task has used the FPU before then save it.
286  */
fpu__activate_fpstate_read(struct fpu * fpu)287 void fpu__activate_fpstate_read(struct fpu *fpu)
288 {
289 	/*
290 	 * If fpregs are active (in the current CPU), then
291 	 * copy them to the fpstate:
292 	 */
293 	if (fpu->fpregs_active) {
294 		fpu__save(fpu);
295 	} else {
296 		if (!fpu->fpstate_active) {
297 			fpstate_init(&fpu->state);
298 
299 			/* Safe to do for current and for stopped child tasks: */
300 			fpu->fpstate_active = 1;
301 		}
302 	}
303 }
304 
305 /*
306  * This function must be called before we write a task's fpstate.
307  *
308  * If the task has used the FPU before then unlazy it.
309  * If the task has not used the FPU before then initialize its fpstate.
310  *
311  * After this function call, after registers in the fpstate are
312  * modified and the child task has woken up, the child task will
313  * restore the modified FPU state from the modified context. If we
314  * didn't clear its lazy status here then the lazy in-registers
315  * state pending on its former CPU could be restored, corrupting
316  * the modifications.
317  */
fpu__activate_fpstate_write(struct fpu * fpu)318 void fpu__activate_fpstate_write(struct fpu *fpu)
319 {
320 	/*
321 	 * Only stopped child tasks can be used to modify the FPU
322 	 * state in the fpstate buffer:
323 	 */
324 	WARN_ON_FPU(fpu == &current->thread.fpu);
325 
326 	if (fpu->fpstate_active) {
327 		/* Invalidate any lazy state: */
328 		fpu->last_cpu = -1;
329 	} else {
330 		fpstate_init(&fpu->state);
331 
332 		/* Safe to do for stopped child tasks: */
333 		fpu->fpstate_active = 1;
334 	}
335 }
336 
337 /*
338  * 'fpu__restore()' is called to copy FPU registers from
339  * the FPU fpstate to the live hw registers and to activate
340  * access to the hardware registers, so that FPU instructions
341  * can be used afterwards.
342  *
343  * Must be called with kernel preemption disabled (for example
344  * with local interrupts disabled, as it is in the case of
345  * do_device_not_available()).
346  */
fpu__restore(struct fpu * fpu)347 void fpu__restore(struct fpu *fpu)
348 {
349 	fpu__activate_curr(fpu);
350 
351 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
352 	kernel_fpu_disable();
353 	fpregs_activate(fpu);
354 	copy_kernel_to_fpregs(&fpu->state);
355 	kernel_fpu_enable();
356 }
357 EXPORT_SYMBOL_GPL(fpu__restore);
358 
359 /*
360  * Drops current FPU state: deactivates the fpregs and
361  * the fpstate. NOTE: it still leaves previous contents
362  * in the fpregs in the eager-FPU case.
363  *
364  * This function can be used in cases where we know that
365  * a state-restore is coming: either an explicit one,
366  * or a reschedule.
367  */
fpu__drop(struct fpu * fpu)368 void fpu__drop(struct fpu *fpu)
369 {
370 	preempt_disable();
371 
372 	if (fpu->fpregs_active) {
373 		/* Ignore delayed exceptions from user space */
374 		asm volatile("1: fwait\n"
375 			     "2:\n"
376 			     _ASM_EXTABLE(1b, 2b));
377 		fpregs_deactivate(fpu);
378 	}
379 
380 	fpu->fpstate_active = 0;
381 
382 	preempt_enable();
383 }
384 
385 /*
386  * Clear FPU registers by setting them up from
387  * the init fpstate:
388  */
copy_init_fpstate_to_fpregs(void)389 static inline void copy_init_fpstate_to_fpregs(void)
390 {
391 	if (use_xsave())
392 		copy_kernel_to_xregs(&init_fpstate.xsave, -1);
393 	else if (static_cpu_has(X86_FEATURE_FXSR))
394 		copy_kernel_to_fxregs(&init_fpstate.fxsave);
395 	else
396 		copy_kernel_to_fregs(&init_fpstate.fsave);
397 }
398 
399 /*
400  * Clear the FPU state back to init state.
401  *
402  * Called by sys_execve(), by the signal handler code and by various
403  * error paths.
404  */
fpu__clear(struct fpu * fpu)405 void fpu__clear(struct fpu *fpu)
406 {
407 	WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
408 
409 	if (!static_cpu_has(X86_FEATURE_FPU)) {
410 		/* FPU state will be reallocated lazily at the first use. */
411 		fpu__drop(fpu);
412 	} else {
413 		if (!fpu->fpstate_active) {
414 			fpu__activate_curr(fpu);
415 			user_fpu_begin();
416 		}
417 		copy_init_fpstate_to_fpregs();
418 	}
419 }
420 
421 /*
422  * x87 math exception handling:
423  */
424 
get_fpu_cwd(struct fpu * fpu)425 static inline unsigned short get_fpu_cwd(struct fpu *fpu)
426 {
427 	if (cpu_has_fxsr) {
428 		return fpu->state.fxsave.cwd;
429 	} else {
430 		return (unsigned short)fpu->state.fsave.cwd;
431 	}
432 }
433 
get_fpu_swd(struct fpu * fpu)434 static inline unsigned short get_fpu_swd(struct fpu *fpu)
435 {
436 	if (cpu_has_fxsr) {
437 		return fpu->state.fxsave.swd;
438 	} else {
439 		return (unsigned short)fpu->state.fsave.swd;
440 	}
441 }
442 
get_fpu_mxcsr(struct fpu * fpu)443 static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
444 {
445 	if (cpu_has_xmm) {
446 		return fpu->state.fxsave.mxcsr;
447 	} else {
448 		return MXCSR_DEFAULT;
449 	}
450 }
451 
fpu__exception_code(struct fpu * fpu,int trap_nr)452 int fpu__exception_code(struct fpu *fpu, int trap_nr)
453 {
454 	int err;
455 
456 	if (trap_nr == X86_TRAP_MF) {
457 		unsigned short cwd, swd;
458 		/*
459 		 * (~cwd & swd) will mask out exceptions that are not set to unmasked
460 		 * status.  0x3f is the exception bits in these regs, 0x200 is the
461 		 * C1 reg you need in case of a stack fault, 0x040 is the stack
462 		 * fault bit.  We should only be taking one exception at a time,
463 		 * so if this combination doesn't produce any single exception,
464 		 * then we have a bad program that isn't synchronizing its FPU usage
465 		 * and it will suffer the consequences since we won't be able to
466 		 * fully reproduce the context of the exception
467 		 */
468 		cwd = get_fpu_cwd(fpu);
469 		swd = get_fpu_swd(fpu);
470 
471 		err = swd & ~cwd;
472 	} else {
473 		/*
474 		 * The SIMD FPU exceptions are handled a little differently, as there
475 		 * is only a single status/control register.  Thus, to determine which
476 		 * unmasked exception was caught we must mask the exception mask bits
477 		 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
478 		 */
479 		unsigned short mxcsr = get_fpu_mxcsr(fpu);
480 		err = ~(mxcsr >> 7) & mxcsr;
481 	}
482 
483 	if (err & 0x001) {	/* Invalid op */
484 		/*
485 		 * swd & 0x240 == 0x040: Stack Underflow
486 		 * swd & 0x240 == 0x240: Stack Overflow
487 		 * User must clear the SF bit (0x40) if set
488 		 */
489 		return FPE_FLTINV;
490 	} else if (err & 0x004) { /* Divide by Zero */
491 		return FPE_FLTDIV;
492 	} else if (err & 0x008) { /* Overflow */
493 		return FPE_FLTOVF;
494 	} else if (err & 0x012) { /* Denormal, Underflow */
495 		return FPE_FLTUND;
496 	} else if (err & 0x020) { /* Precision */
497 		return FPE_FLTRES;
498 	}
499 
500 	/*
501 	 * If we're using IRQ 13, or supposedly even some trap
502 	 * X86_TRAP_MF implementations, it's possible
503 	 * we get a spurious trap, which is not an error.
504 	 */
505 	return 0;
506 }
507