• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <linux/cpu.h>
18 #include <linux/errno.h>
19 #include <linux/sched.h>
20 #include <linux/fs.h>
21 #include <linux/kernel.h>
22 #include <linux/mm.h>
23 #include <linux/elfcore.h>
24 #include <linux/smp.h>
25 #include <linux/slab.h>
26 #include <linux/user.h>
27 #include <linux/interrupt.h>
28 #include <linux/delay.h>
29 #include <linux/export.h>
30 #include <linux/ptrace.h>
31 #include <linux/notifier.h>
32 #include <linux/kprobes.h>
33 #include <linux/kdebug.h>
34 #include <linux/prctl.h>
35 #include <linux/uaccess.h>
36 #include <linux/io.h>
37 #include <linux/ftrace.h>
38 
39 #include <asm/pgtable.h>
40 #include <asm/processor.h>
41 #include <asm/fpu/internal.h>
42 #include <asm/mmu_context.h>
43 #include <asm/prctl.h>
44 #include <asm/desc.h>
45 #include <asm/proto.h>
46 #include <asm/ia32.h>
47 #include <asm/idle.h>
48 #include <asm/syscalls.h>
49 #include <asm/debugreg.h>
50 #include <asm/switch_to.h>
51 #include <asm/xen/hypervisor.h>
52 #include <asm/vdso.h>
53 
54 __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
55 
56 /* Prints also some state that isn't saved in the pt_regs */
__show_regs(struct pt_regs * regs,int all)57 void __show_regs(struct pt_regs *regs, int all)
58 {
59 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
60 	unsigned long d0, d1, d2, d3, d6, d7;
61 	unsigned int fsindex, gsindex;
62 	unsigned int ds, cs, es;
63 
64 	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
65 	printk_address(regs->ip);
66 	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
67 			regs->sp, regs->flags);
68 	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
69 	       regs->ax, regs->bx, regs->cx);
70 	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
71 	       regs->dx, regs->si, regs->di);
72 	printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
73 	       regs->bp, regs->r8, regs->r9);
74 	printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
75 	       regs->r10, regs->r11, regs->r12);
76 	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
77 	       regs->r13, regs->r14, regs->r15);
78 
79 	asm("movl %%ds,%0" : "=r" (ds));
80 	asm("movl %%cs,%0" : "=r" (cs));
81 	asm("movl %%es,%0" : "=r" (es));
82 	asm("movl %%fs,%0" : "=r" (fsindex));
83 	asm("movl %%gs,%0" : "=r" (gsindex));
84 
85 	rdmsrl(MSR_FS_BASE, fs);
86 	rdmsrl(MSR_GS_BASE, gs);
87 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
88 
89 	if (!all)
90 		return;
91 
92 	cr0 = read_cr0();
93 	cr2 = read_cr2();
94 	cr3 = read_cr3();
95 	cr4 = __read_cr4();
96 
97 	printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
98 	       fs, fsindex, gs, gsindex, shadowgs);
99 	printk(KERN_DEFAULT "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
100 			es, cr0);
101 	printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
102 			cr4);
103 
104 	get_debugreg(d0, 0);
105 	get_debugreg(d1, 1);
106 	get_debugreg(d2, 2);
107 	get_debugreg(d3, 3);
108 	get_debugreg(d6, 6);
109 	get_debugreg(d7, 7);
110 
111 	/* Only print out debug registers if they are in their non-default state. */
112 	if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
113 	    (d6 == DR6_RESERVED) && (d7 == 0x400))) {
114 		printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
115 		       d0, d1, d2);
116 		printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
117 		       d3, d6, d7);
118 	}
119 
120 	if (boot_cpu_has(X86_FEATURE_OSPKE))
121 		printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
122 }
123 
release_thread(struct task_struct * dead_task)124 void release_thread(struct task_struct *dead_task)
125 {
126 	if (dead_task->mm) {
127 #ifdef CONFIG_MODIFY_LDT_SYSCALL
128 		if (dead_task->mm->context.ldt) {
129 			pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
130 				dead_task->comm,
131 				dead_task->mm->context.ldt->entries,
132 				dead_task->mm->context.ldt->size);
133 			BUG();
134 		}
135 #endif
136 	}
137 }
138 
139 enum which_selector {
140 	FS,
141 	GS
142 };
143 
144 /*
145  * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
146  * not available.  The goal is to be reasonably fast on non-FSGSBASE systems.
147  * It's forcibly inlined because it'll generate better code and this function
148  * is hot.
149  */
save_base_legacy(struct task_struct * prev_p,unsigned short selector,enum which_selector which)150 static __always_inline void save_base_legacy(struct task_struct *prev_p,
151 					     unsigned short selector,
152 					     enum which_selector which)
153 {
154 	if (likely(selector == 0)) {
155 		/*
156 		 * On Intel (without X86_BUG_NULL_SEG), the segment base could
157 		 * be the pre-existing saved base or it could be zero.  On AMD
158 		 * (with X86_BUG_NULL_SEG), the segment base could be almost
159 		 * anything.
160 		 *
161 		 * This branch is very hot (it's hit twice on almost every
162 		 * context switch between 64-bit programs), and avoiding
163 		 * the RDMSR helps a lot, so we just assume that whatever
164 		 * value is already saved is correct.  This matches historical
165 		 * Linux behavior, so it won't break existing applications.
166 		 *
167 		 * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
168 		 * report that the base is zero, it needs to actually be zero:
169 		 * see the corresponding logic in load_seg_legacy.
170 		 */
171 	} else {
172 		/*
173 		 * If the selector is 1, 2, or 3, then the base is zero on
174 		 * !X86_BUG_NULL_SEG CPUs and could be anything on
175 		 * X86_BUG_NULL_SEG CPUs.  In the latter case, Linux
176 		 * has never attempted to preserve the base across context
177 		 * switches.
178 		 *
179 		 * If selector > 3, then it refers to a real segment, and
180 		 * saving the base isn't necessary.
181 		 */
182 		if (which == FS)
183 			prev_p->thread.fsbase = 0;
184 		else
185 			prev_p->thread.gsbase = 0;
186 	}
187 }
188 
save_fsgs(struct task_struct * task)189 static __always_inline void save_fsgs(struct task_struct *task)
190 {
191 	savesegment(fs, task->thread.fsindex);
192 	savesegment(gs, task->thread.gsindex);
193 	save_base_legacy(task, task->thread.fsindex, FS);
194 	save_base_legacy(task, task->thread.gsindex, GS);
195 }
196 
loadseg(enum which_selector which,unsigned short sel)197 static __always_inline void loadseg(enum which_selector which,
198 				    unsigned short sel)
199 {
200 	if (which == FS)
201 		loadsegment(fs, sel);
202 	else
203 		load_gs_index(sel);
204 }
205 
load_seg_legacy(unsigned short prev_index,unsigned long prev_base,unsigned short next_index,unsigned long next_base,enum which_selector which)206 static __always_inline void load_seg_legacy(unsigned short prev_index,
207 					    unsigned long prev_base,
208 					    unsigned short next_index,
209 					    unsigned long next_base,
210 					    enum which_selector which)
211 {
212 	if (likely(next_index <= 3)) {
213 		/*
214 		 * The next task is using 64-bit TLS, is not using this
215 		 * segment at all, or is having fun with arcane CPU features.
216 		 */
217 		if (next_base == 0) {
218 			/*
219 			 * Nasty case: on AMD CPUs, we need to forcibly zero
220 			 * the base.
221 			 */
222 			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
223 				loadseg(which, __USER_DS);
224 				loadseg(which, next_index);
225 			} else {
226 				/*
227 				 * We could try to exhaustively detect cases
228 				 * under which we can skip the segment load,
229 				 * but there's really only one case that matters
230 				 * for performance: if both the previous and
231 				 * next states are fully zeroed, we can skip
232 				 * the load.
233 				 *
234 				 * (This assumes that prev_base == 0 has no
235 				 * false positives.  This is the case on
236 				 * Intel-style CPUs.)
237 				 */
238 				if (likely(prev_index | next_index | prev_base))
239 					loadseg(which, next_index);
240 			}
241 		} else {
242 			if (prev_index != next_index)
243 				loadseg(which, next_index);
244 			wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
245 			       next_base);
246 		}
247 	} else {
248 		/*
249 		 * The next task is using a real segment.  Loading the selector
250 		 * is sufficient.
251 		 */
252 		loadseg(which, next_index);
253 	}
254 }
255 
copy_thread_tls(unsigned long clone_flags,unsigned long sp,unsigned long arg,struct task_struct * p,unsigned long tls)256 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
257 		unsigned long arg, struct task_struct *p, unsigned long tls)
258 {
259 	int err;
260 	struct pt_regs *childregs;
261 	struct fork_frame *fork_frame;
262 	struct inactive_task_frame *frame;
263 	struct task_struct *me = current;
264 
265 	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
266 	childregs = task_pt_regs(p);
267 	fork_frame = container_of(childregs, struct fork_frame, regs);
268 	frame = &fork_frame->frame;
269 	frame->bp = 0;
270 	frame->ret_addr = (unsigned long) ret_from_fork;
271 	p->thread.sp = (unsigned long) fork_frame;
272 	p->thread.io_bitmap_ptr = NULL;
273 
274 	savesegment(gs, p->thread.gsindex);
275 	p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
276 	savesegment(fs, p->thread.fsindex);
277 	p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
278 	savesegment(es, p->thread.es);
279 	savesegment(ds, p->thread.ds);
280 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
281 
282 	if (unlikely(p->flags & PF_KTHREAD)) {
283 		/* kernel thread */
284 		memset(childregs, 0, sizeof(struct pt_regs));
285 		frame->bx = sp;		/* function */
286 		frame->r12 = arg;
287 		return 0;
288 	}
289 	frame->bx = 0;
290 	*childregs = *current_pt_regs();
291 
292 	childregs->ax = 0;
293 	if (sp)
294 		childregs->sp = sp;
295 
296 	err = -ENOMEM;
297 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
298 		p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
299 						  IO_BITMAP_BYTES, GFP_KERNEL);
300 		if (!p->thread.io_bitmap_ptr) {
301 			p->thread.io_bitmap_max = 0;
302 			return -ENOMEM;
303 		}
304 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
305 	}
306 
307 	/*
308 	 * Set a new TLS for the child thread?
309 	 */
310 	if (clone_flags & CLONE_SETTLS) {
311 #ifdef CONFIG_IA32_EMULATION
312 		if (in_ia32_syscall())
313 			err = do_set_thread_area(p, -1,
314 				(struct user_desc __user *)tls, 0);
315 		else
316 #endif
317 			err = do_arch_prctl(p, ARCH_SET_FS, tls);
318 		if (err)
319 			goto out;
320 	}
321 	err = 0;
322 out:
323 	if (err && p->thread.io_bitmap_ptr) {
324 		kfree(p->thread.io_bitmap_ptr);
325 		p->thread.io_bitmap_max = 0;
326 	}
327 
328 	return err;
329 }
330 
331 static void
start_thread_common(struct pt_regs * regs,unsigned long new_ip,unsigned long new_sp,unsigned int _cs,unsigned int _ss,unsigned int _ds)332 start_thread_common(struct pt_regs *regs, unsigned long new_ip,
333 		    unsigned long new_sp,
334 		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
335 {
336 	WARN_ON_ONCE(regs != current_pt_regs());
337 
338 	if (static_cpu_has(X86_BUG_NULL_SEG)) {
339 		/* Loading zero below won't clear the base. */
340 		loadsegment(fs, __USER_DS);
341 		load_gs_index(__USER_DS);
342 	}
343 
344 	loadsegment(fs, 0);
345 	loadsegment(es, _ds);
346 	loadsegment(ds, _ds);
347 	load_gs_index(0);
348 
349 	regs->ip		= new_ip;
350 	regs->sp		= new_sp;
351 	regs->cs		= _cs;
352 	regs->ss		= _ss;
353 	regs->flags		= X86_EFLAGS_IF;
354 	force_iret();
355 }
356 
357 void
start_thread(struct pt_regs * regs,unsigned long new_ip,unsigned long new_sp)358 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
359 {
360 	start_thread_common(regs, new_ip, new_sp,
361 			    __USER_CS, __USER_DS, 0);
362 }
363 
364 #ifdef CONFIG_COMPAT
compat_start_thread(struct pt_regs * regs,u32 new_ip,u32 new_sp)365 void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
366 {
367 	start_thread_common(regs, new_ip, new_sp,
368 			    test_thread_flag(TIF_X32)
369 			    ? __USER_CS : __USER32_CS,
370 			    __USER_DS, __USER_DS);
371 }
372 #endif
373 
374 /*
375  *	switch_to(x,y) should switch tasks from x to y.
376  *
377  * This could still be optimized:
378  * - fold all the options into a flag word and test it with a single test.
379  * - could test fs/gs bitsliced
380  *
381  * Kprobes not supported here. Set the probe on schedule instead.
382  * Function graph tracer not supported too.
383  */
384 __visible __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct * prev_p,struct task_struct * next_p)385 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
386 {
387 	struct thread_struct *prev = &prev_p->thread;
388 	struct thread_struct *next = &next_p->thread;
389 	struct fpu *prev_fpu = &prev->fpu;
390 	struct fpu *next_fpu = &next->fpu;
391 	int cpu = smp_processor_id();
392 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
393 	fpu_switch_t fpu_switch;
394 
395 	fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
396 
397 	/* We must save %fs and %gs before load_TLS() because
398 	 * %fs and %gs may be cleared by load_TLS().
399 	 *
400 	 * (e.g. xen_load_tls())
401 	 */
402 	save_fsgs(prev_p);
403 
404 	/*
405 	 * Load TLS before restoring any segments so that segment loads
406 	 * reference the correct GDT entries.
407 	 */
408 	load_TLS(next, cpu);
409 
410 	/*
411 	 * Leave lazy mode, flushing any hypercalls made here.  This
412 	 * must be done after loading TLS entries in the GDT but before
413 	 * loading segments that might reference them, and and it must
414 	 * be done before fpu__restore(), so the TS bit is up to
415 	 * date.
416 	 */
417 	arch_end_context_switch(next_p);
418 
419 	/* Switch DS and ES.
420 	 *
421 	 * Reading them only returns the selectors, but writing them (if
422 	 * nonzero) loads the full descriptor from the GDT or LDT.  The
423 	 * LDT for next is loaded in switch_mm, and the GDT is loaded
424 	 * above.
425 	 *
426 	 * We therefore need to write new values to the segment
427 	 * registers on every context switch unless both the new and old
428 	 * values are zero.
429 	 *
430 	 * Note that we don't need to do anything for CS and SS, as
431 	 * those are saved and restored as part of pt_regs.
432 	 */
433 	savesegment(es, prev->es);
434 	if (unlikely(next->es | prev->es))
435 		loadsegment(es, next->es);
436 
437 	savesegment(ds, prev->ds);
438 	if (unlikely(next->ds | prev->ds))
439 		loadsegment(ds, next->ds);
440 
441 	load_seg_legacy(prev->fsindex, prev->fsbase,
442 			next->fsindex, next->fsbase, FS);
443 	load_seg_legacy(prev->gsindex, prev->gsbase,
444 			next->gsindex, next->gsbase, GS);
445 
446 	switch_fpu_finish(next_fpu, fpu_switch);
447 
448 	/*
449 	 * Switch the PDA and FPU contexts.
450 	 */
451 	this_cpu_write(current_task, next_p);
452 
453 	/* Reload esp0 and ss1.  This changes current_thread_info(). */
454 	load_sp0(tss, next);
455 
456 	/*
457 	 * Now maybe reload the debug registers and handle I/O bitmaps
458 	 */
459 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
460 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
461 		__switch_to_xtra(prev_p, next_p, tss);
462 
463 #ifdef CONFIG_XEN
464 	/*
465 	 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
466 	 * current_pt_regs()->flags may not match the current task's
467 	 * intended IOPL.  We need to switch it manually.
468 	 */
469 	if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
470 		     prev->iopl != next->iopl))
471 		xen_set_iopl_mask(next->iopl);
472 #endif
473 
474 	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
475 		/*
476 		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
477 		 * does not update the cached descriptor.  As a result, if we
478 		 * do SYSRET while SS is NULL, we'll end up in user mode with
479 		 * SS apparently equal to __USER_DS but actually unusable.
480 		 *
481 		 * The straightforward workaround would be to fix it up just
482 		 * before SYSRET, but that would slow down the system call
483 		 * fast paths.  Instead, we ensure that SS is never NULL in
484 		 * system call context.  We do this by replacing NULL SS
485 		 * selectors at every context switch.  SYSCALL sets up a valid
486 		 * SS, so the only way to get NULL is to re-enter the kernel
487 		 * from CPL 3 through an interrupt.  Since that can't happen
488 		 * in the same task as a running syscall, we are guaranteed to
489 		 * context switch between every interrupt vector entry and a
490 		 * subsequent SYSRET.
491 		 *
492 		 * We read SS first because SS reads are much faster than
493 		 * writes.  Out of caution, we force SS to __KERNEL_DS even if
494 		 * it previously had a different non-NULL value.
495 		 */
496 		unsigned short ss_sel;
497 		savesegment(ss, ss_sel);
498 		if (ss_sel != __KERNEL_DS)
499 			loadsegment(ss, __KERNEL_DS);
500 	}
501 
502 	return prev_p;
503 }
504 
set_personality_64bit(void)505 void set_personality_64bit(void)
506 {
507 	/* inherit personality from parent */
508 
509 	/* Make sure to be in 64bit mode */
510 	clear_thread_flag(TIF_IA32);
511 	clear_thread_flag(TIF_ADDR32);
512 	clear_thread_flag(TIF_X32);
513 
514 	/* Ensure the corresponding mm is not marked. */
515 	if (current->mm)
516 		current->mm->context.ia32_compat = 0;
517 
518 	/* TBD: overwrites user setup. Should have two bits.
519 	   But 64bit processes have always behaved this way,
520 	   so it's not too bad. The main problem is just that
521 	   32bit childs are affected again. */
522 	current->personality &= ~READ_IMPLIES_EXEC;
523 }
524 
set_personality_ia32(bool x32)525 void set_personality_ia32(bool x32)
526 {
527 	/* inherit personality from parent */
528 
529 	/* Make sure to be in 32bit mode */
530 	set_thread_flag(TIF_ADDR32);
531 
532 	/* Mark the associated mm as containing 32-bit tasks. */
533 	if (x32) {
534 		clear_thread_flag(TIF_IA32);
535 		set_thread_flag(TIF_X32);
536 		if (current->mm)
537 			current->mm->context.ia32_compat = TIF_X32;
538 		current->personality &= ~READ_IMPLIES_EXEC;
539 		/* in_compat_syscall() uses the presence of the x32
540 		   syscall bit flag to determine compat status */
541 		current_thread_info()->status &= ~TS_COMPAT;
542 	} else {
543 		set_thread_flag(TIF_IA32);
544 		clear_thread_flag(TIF_X32);
545 		if (current->mm)
546 			current->mm->context.ia32_compat = TIF_IA32;
547 		current->personality |= force_personality32;
548 		/* Prepare the first "return" to user space */
549 		current_thread_info()->status |= TS_COMPAT;
550 	}
551 }
552 EXPORT_SYMBOL_GPL(set_personality_ia32);
553 
554 #ifdef CONFIG_CHECKPOINT_RESTORE
prctl_map_vdso(const struct vdso_image * image,unsigned long addr)555 static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
556 {
557 	int ret;
558 
559 	ret = map_vdso_once(image, addr);
560 	if (ret)
561 		return ret;
562 
563 	return (long)image->size;
564 }
565 #endif
566 
do_arch_prctl(struct task_struct * task,int code,unsigned long addr)567 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
568 {
569 	int ret = 0;
570 	int doit = task == current;
571 	int cpu;
572 
573 	switch (code) {
574 	case ARCH_SET_GS:
575 		if (addr >= TASK_SIZE_MAX)
576 			return -EPERM;
577 		cpu = get_cpu();
578 		task->thread.gsindex = 0;
579 		task->thread.gsbase = addr;
580 		if (doit) {
581 			load_gs_index(0);
582 			ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
583 		}
584 		put_cpu();
585 		break;
586 	case ARCH_SET_FS:
587 		/* Not strictly needed for fs, but do it for symmetry
588 		   with gs */
589 		if (addr >= TASK_SIZE_MAX)
590 			return -EPERM;
591 		cpu = get_cpu();
592 		task->thread.fsindex = 0;
593 		task->thread.fsbase = addr;
594 		if (doit) {
595 			/* set the selector to 0 to not confuse __switch_to */
596 			loadsegment(fs, 0);
597 			ret = wrmsrl_safe(MSR_FS_BASE, addr);
598 		}
599 		put_cpu();
600 		break;
601 	case ARCH_GET_FS: {
602 		unsigned long base;
603 		if (doit)
604 			rdmsrl(MSR_FS_BASE, base);
605 		else
606 			base = task->thread.fsbase;
607 		ret = put_user(base, (unsigned long __user *)addr);
608 		break;
609 	}
610 	case ARCH_GET_GS: {
611 		unsigned long base;
612 		if (doit)
613 			rdmsrl(MSR_KERNEL_GS_BASE, base);
614 		else
615 			base = task->thread.gsbase;
616 		ret = put_user(base, (unsigned long __user *)addr);
617 		break;
618 	}
619 
620 #ifdef CONFIG_CHECKPOINT_RESTORE
621 # ifdef CONFIG_X86_X32_ABI
622 	case ARCH_MAP_VDSO_X32:
623 		return prctl_map_vdso(&vdso_image_x32, addr);
624 # endif
625 # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
626 	case ARCH_MAP_VDSO_32:
627 		return prctl_map_vdso(&vdso_image_32, addr);
628 # endif
629 	case ARCH_MAP_VDSO_64:
630 		return prctl_map_vdso(&vdso_image_64, addr);
631 #endif
632 
633 	default:
634 		ret = -EINVAL;
635 		break;
636 	}
637 
638 	return ret;
639 }
640 
sys_arch_prctl(int code,unsigned long addr)641 long sys_arch_prctl(int code, unsigned long addr)
642 {
643 	return do_arch_prctl(current, code, addr);
644 }
645 
KSTK_ESP(struct task_struct * task)646 unsigned long KSTK_ESP(struct task_struct *task)
647 {
648 	return task_pt_regs(task)->sp;
649 }
650