• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1994 Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *  General FPU state handling cleanups
6  *	Gareth Hughes <gareth@valinux.com>, May 2000
7  */
8 #include <linux/module.h>
9 #include <linux/regset.h>
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 
13 #include <asm/sigcontext.h>
14 #include <asm/processor.h>
15 #include <asm/math_emu.h>
16 #include <asm/uaccess.h>
17 #include <asm/ptrace.h>
18 #include <asm/i387.h>
19 #include <asm/fpu-internal.h>
20 #include <asm/user.h>
21 
22 /*
23  * Were we in an interrupt that interrupted kernel mode?
24  *
25  * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
26  * pair does nothing at all: the thread must not have fpu (so
27  * that we don't try to save the FPU state), and TS must
28  * be set (so that the clts/stts pair does nothing that is
29  * visible in the interrupted kernel thread).
30  *
31  * Except for the eagerfpu case when we return 1 unless we've already
32  * been eager and saved the state in kernel_fpu_begin().
33  */
interrupted_kernel_fpu_idle(void)34 static inline bool interrupted_kernel_fpu_idle(void)
35 {
36 	if (use_eager_fpu())
37 		return __thread_has_fpu(current);
38 
39 	return !__thread_has_fpu(current) &&
40 		(read_cr0() & X86_CR0_TS);
41 }
42 
43 /*
44  * Were we in user mode (or vm86 mode) when we were
45  * interrupted?
46  *
47  * Doing kernel_fpu_begin/end() is ok if we are running
48  * in an interrupt context from user mode - we'll just
49  * save the FPU state as required.
50  */
interrupted_user_mode(void)51 static inline bool interrupted_user_mode(void)
52 {
53 	struct pt_regs *regs = get_irq_regs();
54 	return regs && user_mode_vm(regs);
55 }
56 
57 /*
58  * Can we use the FPU in kernel mode with the
59  * whole "kernel_fpu_begin/end()" sequence?
60  *
61  * It's always ok in process context (ie "not interrupt")
62  * but it is sometimes ok even from an irq.
63  */
irq_fpu_usable(void)64 bool irq_fpu_usable(void)
65 {
66 	return !in_interrupt() ||
67 		interrupted_user_mode() ||
68 		interrupted_kernel_fpu_idle();
69 }
70 EXPORT_SYMBOL(irq_fpu_usable);
71 
__kernel_fpu_begin(void)72 void __kernel_fpu_begin(void)
73 {
74 	struct task_struct *me = current;
75 
76 	if (__thread_has_fpu(me)) {
77 		__thread_clear_has_fpu(me);
78 		__save_init_fpu(me);
79 		/* We do 'stts()' in __kernel_fpu_end() */
80 	} else if (!use_eager_fpu()) {
81 		this_cpu_write(fpu_owner_task, NULL);
82 		clts();
83 	}
84 }
85 EXPORT_SYMBOL(__kernel_fpu_begin);
86 
__kernel_fpu_end(void)87 void __kernel_fpu_end(void)
88 {
89 	if (use_eager_fpu())
90 		math_state_restore();
91 	else
92 		stts();
93 }
94 EXPORT_SYMBOL(__kernel_fpu_end);
95 
unlazy_fpu(struct task_struct * tsk)96 void unlazy_fpu(struct task_struct *tsk)
97 {
98 	preempt_disable();
99 	if (__thread_has_fpu(tsk)) {
100 		__save_init_fpu(tsk);
101 		__thread_fpu_end(tsk);
102 	} else
103 		tsk->fpu_counter = 0;
104 	preempt_enable();
105 }
106 EXPORT_SYMBOL(unlazy_fpu);
107 
108 unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
109 unsigned int xstate_size;
110 EXPORT_SYMBOL_GPL(xstate_size);
111 static struct i387_fxsave_struct fx_scratch __cpuinitdata;
112 
mxcsr_feature_mask_init(void)113 static void __cpuinit mxcsr_feature_mask_init(void)
114 {
115 	unsigned long mask = 0;
116 
117 	if (cpu_has_fxsr) {
118 		memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
119 		asm volatile("fxsave %0" : : "m" (fx_scratch));
120 		mask = fx_scratch.mxcsr_mask;
121 		if (mask == 0)
122 			mask = 0x0000ffbf;
123 	}
124 	mxcsr_feature_mask &= mask;
125 }
126 
init_thread_xstate(void)127 static void __cpuinit init_thread_xstate(void)
128 {
129 	/*
130 	 * Note that xstate_size might be overwriten later during
131 	 * xsave_init().
132 	 */
133 
134 	if (!HAVE_HWFP) {
135 		/*
136 		 * Disable xsave as we do not support it if i387
137 		 * emulation is enabled.
138 		 */
139 		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
140 		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
141 		xstate_size = sizeof(struct i387_soft_struct);
142 		return;
143 	}
144 
145 	if (cpu_has_fxsr)
146 		xstate_size = sizeof(struct i387_fxsave_struct);
147 	else
148 		xstate_size = sizeof(struct i387_fsave_struct);
149 }
150 
151 /*
152  * Called at bootup to set up the initial FPU state that is later cloned
153  * into all processes.
154  */
155 
fpu_init(void)156 void __cpuinit fpu_init(void)
157 {
158 	unsigned long cr0;
159 	unsigned long cr4_mask = 0;
160 
161 	if (cpu_has_fxsr)
162 		cr4_mask |= X86_CR4_OSFXSR;
163 	if (cpu_has_xmm)
164 		cr4_mask |= X86_CR4_OSXMMEXCPT;
165 	if (cr4_mask)
166 		set_in_cr4(cr4_mask);
167 
168 	cr0 = read_cr0();
169 	cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
170 	if (!HAVE_HWFP)
171 		cr0 |= X86_CR0_EM;
172 	write_cr0(cr0);
173 
174 	/*
175 	 * init_thread_xstate is only called once to avoid overriding
176 	 * xstate_size during boot time or during CPU hotplug.
177 	 */
178 	if (xstate_size == 0)
179 		init_thread_xstate();
180 
181 	mxcsr_feature_mask_init();
182 	xsave_init();
183 	eager_fpu_init();
184 }
185 
fpu_finit(struct fpu * fpu)186 void fpu_finit(struct fpu *fpu)
187 {
188 	if (!HAVE_HWFP) {
189 		finit_soft_fpu(&fpu->state->soft);
190 		return;
191 	}
192 
193 	if (cpu_has_fxsr) {
194 		fx_finit(&fpu->state->fxsave);
195 	} else {
196 		struct i387_fsave_struct *fp = &fpu->state->fsave;
197 		memset(fp, 0, xstate_size);
198 		fp->cwd = 0xffff037fu;
199 		fp->swd = 0xffff0000u;
200 		fp->twd = 0xffffffffu;
201 		fp->fos = 0xffff0000u;
202 	}
203 }
204 EXPORT_SYMBOL_GPL(fpu_finit);
205 
206 /*
207  * The _current_ task is using the FPU for the first time
208  * so initialize it and set the mxcsr to its default
209  * value at reset if we support XMM instructions and then
210  * remember the current task has used the FPU.
211  */
init_fpu(struct task_struct * tsk)212 int init_fpu(struct task_struct *tsk)
213 {
214 	int ret;
215 
216 	if (tsk_used_math(tsk)) {
217 		if (HAVE_HWFP && tsk == current)
218 			unlazy_fpu(tsk);
219 		tsk->thread.fpu.last_cpu = ~0;
220 		return 0;
221 	}
222 
223 	/*
224 	 * Memory allocation at the first usage of the FPU and other state.
225 	 */
226 	ret = fpu_alloc(&tsk->thread.fpu);
227 	if (ret)
228 		return ret;
229 
230 	fpu_finit(&tsk->thread.fpu);
231 
232 	set_stopped_child_used_math(tsk);
233 	return 0;
234 }
235 EXPORT_SYMBOL_GPL(init_fpu);
236 
237 /*
238  * The xstateregs_active() routine is the same as the fpregs_active() routine,
239  * as the "regset->n" for the xstate regset will be updated based on the feature
240  * capabilites supported by the xsave.
241  */
fpregs_active(struct task_struct * target,const struct user_regset * regset)242 int fpregs_active(struct task_struct *target, const struct user_regset *regset)
243 {
244 	return tsk_used_math(target) ? regset->n : 0;
245 }
246 
xfpregs_active(struct task_struct * target,const struct user_regset * regset)247 int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
248 {
249 	return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0;
250 }
251 
xfpregs_get(struct task_struct * target,const struct user_regset * regset,unsigned int pos,unsigned int count,void * kbuf,void __user * ubuf)252 int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
253 		unsigned int pos, unsigned int count,
254 		void *kbuf, void __user *ubuf)
255 {
256 	int ret;
257 
258 	if (!cpu_has_fxsr)
259 		return -ENODEV;
260 
261 	ret = init_fpu(target);
262 	if (ret)
263 		return ret;
264 
265 	sanitize_i387_state(target);
266 
267 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
268 				   &target->thread.fpu.state->fxsave, 0, -1);
269 }
270 
xfpregs_set(struct task_struct * target,const struct user_regset * regset,unsigned int pos,unsigned int count,const void * kbuf,const void __user * ubuf)271 int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
272 		unsigned int pos, unsigned int count,
273 		const void *kbuf, const void __user *ubuf)
274 {
275 	int ret;
276 
277 	if (!cpu_has_fxsr)
278 		return -ENODEV;
279 
280 	ret = init_fpu(target);
281 	if (ret)
282 		return ret;
283 
284 	sanitize_i387_state(target);
285 
286 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
287 				 &target->thread.fpu.state->fxsave, 0, -1);
288 
289 	/*
290 	 * mxcsr reserved bits must be masked to zero for security reasons.
291 	 */
292 	target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
293 
294 	/*
295 	 * update the header bits in the xsave header, indicating the
296 	 * presence of FP and SSE state.
297 	 */
298 	if (cpu_has_xsave)
299 		target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
300 
301 	return ret;
302 }
303 
xstateregs_get(struct task_struct * target,const struct user_regset * regset,unsigned int pos,unsigned int count,void * kbuf,void __user * ubuf)304 int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
305 		unsigned int pos, unsigned int count,
306 		void *kbuf, void __user *ubuf)
307 {
308 	int ret;
309 
310 	if (!cpu_has_xsave)
311 		return -ENODEV;
312 
313 	ret = init_fpu(target);
314 	if (ret)
315 		return ret;
316 
317 	/*
318 	 * Copy the 48bytes defined by the software first into the xstate
319 	 * memory layout in the thread struct, so that we can copy the entire
320 	 * xstateregs to the user using one user_regset_copyout().
321 	 */
322 	memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
323 	       xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
324 
325 	/*
326 	 * Copy the xstate memory layout.
327 	 */
328 	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
329 				  &target->thread.fpu.state->xsave, 0, -1);
330 	return ret;
331 }
332 
xstateregs_set(struct task_struct * target,const struct user_regset * regset,unsigned int pos,unsigned int count,const void * kbuf,const void __user * ubuf)333 int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
334 		  unsigned int pos, unsigned int count,
335 		  const void *kbuf, const void __user *ubuf)
336 {
337 	int ret;
338 	struct xsave_hdr_struct *xsave_hdr;
339 
340 	if (!cpu_has_xsave)
341 		return -ENODEV;
342 
343 	ret = init_fpu(target);
344 	if (ret)
345 		return ret;
346 
347 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
348 				 &target->thread.fpu.state->xsave, 0, -1);
349 
350 	/*
351 	 * mxcsr reserved bits must be masked to zero for security reasons.
352 	 */
353 	target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
354 
355 	xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;
356 
357 	xsave_hdr->xstate_bv &= pcntxt_mask;
358 	/*
359 	 * These bits must be zero.
360 	 */
361 	xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
362 
363 	return ret;
364 }
365 
366 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
367 
368 /*
369  * FPU tag word conversions.
370  */
371 
twd_i387_to_fxsr(unsigned short twd)372 static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
373 {
374 	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
375 
376 	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
377 	tmp = ~twd;
378 	tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
379 	/* and move the valid bits to the lower byte. */
380 	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
381 	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
382 	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
383 
384 	return tmp;
385 }
386 
387 #define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16)
388 #define FP_EXP_TAG_VALID	0
389 #define FP_EXP_TAG_ZERO		1
390 #define FP_EXP_TAG_SPECIAL	2
391 #define FP_EXP_TAG_EMPTY	3
392 
twd_fxsr_to_i387(struct i387_fxsave_struct * fxsave)393 static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
394 {
395 	struct _fpxreg *st;
396 	u32 tos = (fxsave->swd >> 11) & 7;
397 	u32 twd = (unsigned long) fxsave->twd;
398 	u32 tag;
399 	u32 ret = 0xffff0000u;
400 	int i;
401 
402 	for (i = 0; i < 8; i++, twd >>= 1) {
403 		if (twd & 0x1) {
404 			st = FPREG_ADDR(fxsave, (i - tos) & 7);
405 
406 			switch (st->exponent & 0x7fff) {
407 			case 0x7fff:
408 				tag = FP_EXP_TAG_SPECIAL;
409 				break;
410 			case 0x0000:
411 				if (!st->significand[0] &&
412 				    !st->significand[1] &&
413 				    !st->significand[2] &&
414 				    !st->significand[3])
415 					tag = FP_EXP_TAG_ZERO;
416 				else
417 					tag = FP_EXP_TAG_SPECIAL;
418 				break;
419 			default:
420 				if (st->significand[3] & 0x8000)
421 					tag = FP_EXP_TAG_VALID;
422 				else
423 					tag = FP_EXP_TAG_SPECIAL;
424 				break;
425 			}
426 		} else {
427 			tag = FP_EXP_TAG_EMPTY;
428 		}
429 		ret |= tag << (2 * i);
430 	}
431 	return ret;
432 }
433 
434 /*
435  * FXSR floating point environment conversions.
436  */
437 
438 void
convert_from_fxsr(struct user_i387_ia32_struct * env,struct task_struct * tsk)439 convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
440 {
441 	struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
442 	struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
443 	struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
444 	int i;
445 
446 	env->cwd = fxsave->cwd | 0xffff0000u;
447 	env->swd = fxsave->swd | 0xffff0000u;
448 	env->twd = twd_fxsr_to_i387(fxsave);
449 
450 #ifdef CONFIG_X86_64
451 	env->fip = fxsave->rip;
452 	env->foo = fxsave->rdp;
453 	/*
454 	 * should be actually ds/cs at fpu exception time, but
455 	 * that information is not available in 64bit mode.
456 	 */
457 	env->fcs = task_pt_regs(tsk)->cs;
458 	if (tsk == current) {
459 		savesegment(ds, env->fos);
460 	} else {
461 		env->fos = tsk->thread.ds;
462 	}
463 	env->fos |= 0xffff0000;
464 #else
465 	env->fip = fxsave->fip;
466 	env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
467 	env->foo = fxsave->foo;
468 	env->fos = fxsave->fos;
469 #endif
470 
471 	for (i = 0; i < 8; ++i)
472 		memcpy(&to[i], &from[i], sizeof(to[0]));
473 }
474 
convert_to_fxsr(struct task_struct * tsk,const struct user_i387_ia32_struct * env)475 void convert_to_fxsr(struct task_struct *tsk,
476 		     const struct user_i387_ia32_struct *env)
477 
478 {
479 	struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
480 	struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
481 	struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
482 	int i;
483 
484 	fxsave->cwd = env->cwd;
485 	fxsave->swd = env->swd;
486 	fxsave->twd = twd_i387_to_fxsr(env->twd);
487 	fxsave->fop = (u16) ((u32) env->fcs >> 16);
488 #ifdef CONFIG_X86_64
489 	fxsave->rip = env->fip;
490 	fxsave->rdp = env->foo;
491 	/* cs and ds ignored */
492 #else
493 	fxsave->fip = env->fip;
494 	fxsave->fcs = (env->fcs & 0xffff);
495 	fxsave->foo = env->foo;
496 	fxsave->fos = env->fos;
497 #endif
498 
499 	for (i = 0; i < 8; ++i)
500 		memcpy(&to[i], &from[i], sizeof(from[0]));
501 }
502 
fpregs_get(struct task_struct * target,const struct user_regset * regset,unsigned int pos,unsigned int count,void * kbuf,void __user * ubuf)503 int fpregs_get(struct task_struct *target, const struct user_regset *regset,
504 	       unsigned int pos, unsigned int count,
505 	       void *kbuf, void __user *ubuf)
506 {
507 	struct user_i387_ia32_struct env;
508 	int ret;
509 
510 	ret = init_fpu(target);
511 	if (ret)
512 		return ret;
513 
514 	if (!HAVE_HWFP)
515 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
516 
517 	if (!cpu_has_fxsr) {
518 		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
519 					   &target->thread.fpu.state->fsave, 0,
520 					   -1);
521 	}
522 
523 	sanitize_i387_state(target);
524 
525 	if (kbuf && pos == 0 && count == sizeof(env)) {
526 		convert_from_fxsr(kbuf, target);
527 		return 0;
528 	}
529 
530 	convert_from_fxsr(&env, target);
531 
532 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
533 }
534 
fpregs_set(struct task_struct * target,const struct user_regset * regset,unsigned int pos,unsigned int count,const void * kbuf,const void __user * ubuf)535 int fpregs_set(struct task_struct *target, const struct user_regset *regset,
536 	       unsigned int pos, unsigned int count,
537 	       const void *kbuf, const void __user *ubuf)
538 {
539 	struct user_i387_ia32_struct env;
540 	int ret;
541 
542 	ret = init_fpu(target);
543 	if (ret)
544 		return ret;
545 
546 	sanitize_i387_state(target);
547 
548 	if (!HAVE_HWFP)
549 		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
550 
551 	if (!cpu_has_fxsr) {
552 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
553 					  &target->thread.fpu.state->fsave, 0, -1);
554 	}
555 
556 	if (pos > 0 || count < sizeof(env))
557 		convert_from_fxsr(&env, target);
558 
559 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
560 	if (!ret)
561 		convert_to_fxsr(target, &env);
562 
563 	/*
564 	 * update the header bit in the xsave header, indicating the
565 	 * presence of FP.
566 	 */
567 	if (cpu_has_xsave)
568 		target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
569 	return ret;
570 }
571 
572 /*
573  * FPU state for core dumps.
574  * This is only used for a.out dumps now.
575  * It is declared generically using elf_fpregset_t (which is
576  * struct user_i387_struct) but is in fact only used for 32-bit
577  * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
578  */
dump_fpu(struct pt_regs * regs,struct user_i387_struct * fpu)579 int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
580 {
581 	struct task_struct *tsk = current;
582 	int fpvalid;
583 
584 	fpvalid = !!used_math();
585 	if (fpvalid)
586 		fpvalid = !fpregs_get(tsk, NULL,
587 				      0, sizeof(struct user_i387_ia32_struct),
588 				      fpu, NULL);
589 
590 	return fpvalid;
591 }
592 EXPORT_SYMBOL(dump_fpu);
593 
594 #endif	/* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
595