• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 *  Copyright (C) 1991,1992  Linus Torvalds
3 *
4 * entry_32.S contains the system-call and low-level fault and trap handling routines.
5 *
6 * Stack layout while running C code:
7 *	ptrace needs to have all registers on the stack.
8 *	If the order here is changed, it needs to be
9 *	updated in fork.c:copy_process(), signal.c:do_signal(),
10 *	ptrace.c and ptrace.h
11 *
12 *	 0(%esp) - %ebx
13 *	 4(%esp) - %ecx
14 *	 8(%esp) - %edx
15 *	 C(%esp) - %esi
16 *	10(%esp) - %edi
17 *	14(%esp) - %ebp
18 *	18(%esp) - %eax
19 *	1C(%esp) - %ds
20 *	20(%esp) - %es
21 *	24(%esp) - %fs
22 *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
23 *	2C(%esp) - orig_eax
24 *	30(%esp) - %eip
25 *	34(%esp) - %cs
26 *	38(%esp) - %eflags
27 *	3C(%esp) - %oldesp
28 *	40(%esp) - %oldss
29 */
30
31#include <linux/linkage.h>
32#include <linux/err.h>
33#include <asm/thread_info.h>
34#include <asm/irqflags.h>
35#include <asm/errno.h>
36#include <asm/segment.h>
37#include <asm/smp.h>
38#include <asm/page_types.h>
39#include <asm/percpu.h>
40#include <asm/processor-flags.h>
41#include <asm/ftrace.h>
42#include <asm/irq_vectors.h>
43#include <asm/cpufeatures.h>
44#include <asm/alternative-asm.h>
45#include <asm/asm.h>
46#include <asm/smap.h>
47#include <asm/export.h>
48#include <asm/nospec-branch.h>
49
50	.section .entry.text, "ax"
51
52/*
53 * We use macros for low-level operations which need to be overridden
54 * for paravirtualization.  The following will never clobber any registers:
55 *   INTERRUPT_RETURN (aka. "iret")
56 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
57 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
58 *
59 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
60 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
61 * Allowing a register to be clobbered can shrink the paravirt replacement
62 * enough to patch inline, increasing performance.
63 */
64
65#ifdef CONFIG_PREEMPT
66# define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
67#else
68# define preempt_stop(clobbers)
69# define resume_kernel		restore_all
70#endif
71
72.macro TRACE_IRQS_IRET
73#ifdef CONFIG_TRACE_IRQFLAGS
74	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)     # interrupts off?
75	jz	1f
76	TRACE_IRQS_ON
771:
78#endif
79.endm
80
81/*
82 * User gs save/restore
83 *
84 * %gs is used for userland TLS and kernel only uses it for stack
85 * canary which is required to be at %gs:20 by gcc.  Read the comment
86 * at the top of stackprotector.h for more info.
87 *
88 * Local labels 98 and 99 are used.
89 */
90#ifdef CONFIG_X86_32_LAZY_GS
91
92 /* unfortunately push/pop can't be no-op */
93.macro PUSH_GS
94	pushl	$0
95.endm
96.macro POP_GS pop=0
97	addl	$(4 + \pop), %esp
98.endm
99.macro POP_GS_EX
100.endm
101
102 /* all the rest are no-op */
103.macro PTGS_TO_GS
104.endm
105.macro PTGS_TO_GS_EX
106.endm
107.macro GS_TO_REG reg
108.endm
109.macro REG_TO_PTGS reg
110.endm
111.macro SET_KERNEL_GS reg
112.endm
113
114#else	/* CONFIG_X86_32_LAZY_GS */
115
116.macro PUSH_GS
117	pushl	%gs
118.endm
119
120.macro POP_GS pop=0
12198:	popl	%gs
122  .if \pop <> 0
123	add	$\pop, %esp
124  .endif
125.endm
126.macro POP_GS_EX
127.pushsection .fixup, "ax"
12899:	movl	$0, (%esp)
129	jmp	98b
130.popsection
131	_ASM_EXTABLE(98b, 99b)
132.endm
133
134.macro PTGS_TO_GS
13598:	mov	PT_GS(%esp), %gs
136.endm
137.macro PTGS_TO_GS_EX
138.pushsection .fixup, "ax"
13999:	movl	$0, PT_GS(%esp)
140	jmp	98b
141.popsection
142	_ASM_EXTABLE(98b, 99b)
143.endm
144
145.macro GS_TO_REG reg
146	movl	%gs, \reg
147.endm
148.macro REG_TO_PTGS reg
149	movl	\reg, PT_GS(%esp)
150.endm
151.macro SET_KERNEL_GS reg
152	movl	$(__KERNEL_STACK_CANARY), \reg
153	movl	\reg, %gs
154.endm
155
156#endif /* CONFIG_X86_32_LAZY_GS */
157
158.macro SAVE_ALL pt_regs_ax=%eax
159	cld
160	PUSH_GS
161	pushl	%fs
162	pushl	%es
163	pushl	%ds
164	pushl	\pt_regs_ax
165	pushl	%ebp
166	pushl	%edi
167	pushl	%esi
168	pushl	%edx
169	pushl	%ecx
170	pushl	%ebx
171	movl	$(__USER_DS), %edx
172	movl	%edx, %ds
173	movl	%edx, %es
174	movl	$(__KERNEL_PERCPU), %edx
175	movl	%edx, %fs
176	SET_KERNEL_GS %edx
177.endm
178
179.macro RESTORE_INT_REGS
180	popl	%ebx
181	popl	%ecx
182	popl	%edx
183	popl	%esi
184	popl	%edi
185	popl	%ebp
186	popl	%eax
187.endm
188
189.macro RESTORE_REGS pop=0
190	RESTORE_INT_REGS
1911:	popl	%ds
1922:	popl	%es
1933:	popl	%fs
194	POP_GS \pop
195.pushsection .fixup, "ax"
1964:	movl	$0, (%esp)
197	jmp	1b
1985:	movl	$0, (%esp)
199	jmp	2b
2006:	movl	$0, (%esp)
201	jmp	3b
202.popsection
203	_ASM_EXTABLE(1b, 4b)
204	_ASM_EXTABLE(2b, 5b)
205	_ASM_EXTABLE(3b, 6b)
206	POP_GS_EX
207.endm
208
209/*
210 * %eax: prev task
211 * %edx: next task
212 */
213ENTRY(__switch_to_asm)
214	/*
215	 * Save callee-saved registers
216	 * This must match the order in struct inactive_task_frame
217	 */
218	pushl	%ebp
219	pushl	%ebx
220	pushl	%edi
221	pushl	%esi
222
223	/* switch stack */
224	movl	%esp, TASK_threadsp(%eax)
225	movl	TASK_threadsp(%edx), %esp
226
227#ifdef CONFIG_CC_STACKPROTECTOR
228	movl	TASK_stack_canary(%edx), %ebx
229	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
230#endif
231
232#ifdef CONFIG_RETPOLINE
233	/*
234	 * When switching from a shallower to a deeper call stack
235	 * the RSB may either underflow or use entries populated
236	 * with userspace addresses. On CPUs where those concerns
237	 * exist, overwrite the RSB with entries which capture
238	 * speculative execution to prevent attack.
239	 */
240	FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
241#endif
242
243	/* restore callee-saved registers */
244	popl	%esi
245	popl	%edi
246	popl	%ebx
247	popl	%ebp
248
249	jmp	__switch_to
250END(__switch_to_asm)
251
252/*
253 * A newly forked process directly context switches into this address.
254 *
255 * eax: prev task we switched from
256 * ebx: kernel thread func (NULL for user thread)
257 * edi: kernel thread arg
258 */
259ENTRY(ret_from_fork)
260	pushl	%eax
261	call	schedule_tail
262	popl	%eax
263
264	testl	%ebx, %ebx
265	jnz	1f		/* kernel threads are uncommon */
266
2672:
268	/* When we fork, we trace the syscall return in the child, too. */
269	movl    %esp, %eax
270	call    syscall_return_slowpath
271	jmp     restore_all
272
273	/* kernel thread */
2741:	movl	%edi, %eax
275	CALL_NOSPEC %ebx
276	/*
277	 * A kernel thread is allowed to return here after successfully
278	 * calling do_execve().  Exit to userspace to complete the execve()
279	 * syscall.
280	 */
281	movl	$0, PT_EAX(%esp)
282	jmp	2b
283END(ret_from_fork)
284
285/*
286 * Return to user mode is not as complex as all this looks,
287 * but we want the default path for a system call return to
288 * go as quickly as possible which is why some of this is
289 * less clear than it otherwise should be.
290 */
291
292	# userspace resumption stub bypassing syscall exit tracing
293	ALIGN
294ret_from_exception:
295	preempt_stop(CLBR_ANY)
296ret_from_intr:
297#ifdef CONFIG_VM86
298	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS and CS
299	movb	PT_CS(%esp), %al
300	andl	$(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
301#else
302	/*
303	 * We can be coming here from child spawned by kernel_thread().
304	 */
305	movl	PT_CS(%esp), %eax
306	andl	$SEGMENT_RPL_MASK, %eax
307#endif
308	cmpl	$USER_RPL, %eax
309	jb	resume_kernel			# not returning to v8086 or userspace
310
311ENTRY(resume_userspace)
312	DISABLE_INTERRUPTS(CLBR_ANY)
313	TRACE_IRQS_OFF
314	movl	%esp, %eax
315	call	prepare_exit_to_usermode
316	jmp	restore_all
317END(ret_from_exception)
318
319#ifdef CONFIG_PREEMPT
320ENTRY(resume_kernel)
321	DISABLE_INTERRUPTS(CLBR_ANY)
322need_resched:
323	cmpl	$0, PER_CPU_VAR(__preempt_count)
324	jnz	restore_all
325	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)	# interrupts off (exception path) ?
326	jz	restore_all
327	call	preempt_schedule_irq
328	jmp	need_resched
329END(resume_kernel)
330#endif
331
332GLOBAL(__begin_SYSENTER_singlestep_region)
333/*
334 * All code from here through __end_SYSENTER_singlestep_region is subject
335 * to being single-stepped if a user program sets TF and executes SYSENTER.
336 * There is absolutely nothing that we can do to prevent this from happening
337 * (thanks Intel!).  To keep our handling of this situation as simple as
338 * possible, we handle TF just like AC and NT, except that our #DB handler
339 * will ignore all of the single-step traps generated in this range.
340 */
341
342#ifdef CONFIG_XEN
343/*
344 * Xen doesn't set %esp to be precisely what the normal SYSENTER
345 * entry point expects, so fix it up before using the normal path.
346 */
347ENTRY(xen_sysenter_target)
348	addl	$5*4, %esp			/* remove xen-provided frame */
349	jmp	sysenter_past_esp
350#endif
351
352/*
353 * 32-bit SYSENTER entry.
354 *
355 * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
356 * if X86_FEATURE_SEP is available.  This is the preferred system call
357 * entry on 32-bit systems.
358 *
359 * The SYSENTER instruction, in principle, should *only* occur in the
360 * vDSO.  In practice, a small number of Android devices were shipped
361 * with a copy of Bionic that inlined a SYSENTER instruction.  This
362 * never happened in any of Google's Bionic versions -- it only happened
363 * in a narrow range of Intel-provided versions.
364 *
365 * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
366 * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
367 * SYSENTER does not save anything on the stack,
368 * and does not save old EIP (!!!), ESP, or EFLAGS.
369 *
370 * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
371 * user and/or vm86 state), we explicitly disable the SYSENTER
372 * instruction in vm86 mode by reprogramming the MSRs.
373 *
374 * Arguments:
375 * eax  system call number
376 * ebx  arg1
377 * ecx  arg2
378 * edx  arg3
379 * esi  arg4
380 * edi  arg5
381 * ebp  user stack
382 * 0(%ebp) arg6
383 */
384ENTRY(entry_SYSENTER_32)
385	movl	TSS_sysenter_sp0(%esp), %esp
386sysenter_past_esp:
387	pushl	$__USER_DS		/* pt_regs->ss */
388	pushl	%ebp			/* pt_regs->sp (stashed in bp) */
389	pushfl				/* pt_regs->flags (except IF = 0) */
390	orl	$X86_EFLAGS_IF, (%esp)	/* Fix IF */
391	pushl	$__USER_CS		/* pt_regs->cs */
392	pushl	$0			/* pt_regs->ip = 0 (placeholder) */
393	pushl	%eax			/* pt_regs->orig_ax */
394	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
395
396	/*
397	 * SYSENTER doesn't filter flags, so we need to clear NT, AC
398	 * and TF ourselves.  To save a few cycles, we can check whether
399	 * either was set instead of doing an unconditional popfq.
400	 * This needs to happen before enabling interrupts so that
401	 * we don't get preempted with NT set.
402	 *
403	 * If TF is set, we will single-step all the way to here -- do_debug
404	 * will ignore all the traps.  (Yes, this is slow, but so is
405	 * single-stepping in general.  This allows us to avoid having
406	 * a more complicated code to handle the case where a user program
407	 * forces us to single-step through the SYSENTER entry code.)
408	 *
409	 * NB.: .Lsysenter_fix_flags is a label with the code under it moved
410	 * out-of-line as an optimization: NT is unlikely to be set in the
411	 * majority of the cases and instead of polluting the I$ unnecessarily,
412	 * we're keeping that code behind a branch which will predict as
413	 * not-taken and therefore its instructions won't be fetched.
414	 */
415	testl	$X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
416	jnz	.Lsysenter_fix_flags
417.Lsysenter_flags_fixed:
418
419	/*
420	 * User mode is traced as though IRQs are on, and SYSENTER
421	 * turned them off.
422	 */
423	TRACE_IRQS_OFF
424
425	movl	%esp, %eax
426	call	do_fast_syscall_32
427	/* XEN PV guests always use IRET path */
428	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
429		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
430
431/* Opportunistic SYSEXIT */
432	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
433	movl	PT_EIP(%esp), %edx	/* pt_regs->ip */
434	movl	PT_OLDESP(%esp), %ecx	/* pt_regs->sp */
4351:	mov	PT_FS(%esp), %fs
436	PTGS_TO_GS
437	popl	%ebx			/* pt_regs->bx */
438	addl	$2*4, %esp		/* skip pt_regs->cx and pt_regs->dx */
439	popl	%esi			/* pt_regs->si */
440	popl	%edi			/* pt_regs->di */
441	popl	%ebp			/* pt_regs->bp */
442	popl	%eax			/* pt_regs->ax */
443
444	/*
445	 * Restore all flags except IF. (We restore IF separately because
446	 * STI gives a one-instruction window in which we won't be interrupted,
447	 * whereas POPF does not.)
448	 */
449	addl	$PT_EFLAGS-PT_DS, %esp	/* point esp at pt_regs->flags */
450	btr	$X86_EFLAGS_IF_BIT, (%esp)
451	popfl
452
453	/*
454	 * Return back to the vDSO, which will pop ecx and edx.
455	 * Don't bother with DS and ES (they already contain __USER_DS).
456	 */
457	sti
458	sysexit
459
460.pushsection .fixup, "ax"
4612:	movl	$0, PT_FS(%esp)
462	jmp	1b
463.popsection
464	_ASM_EXTABLE(1b, 2b)
465	PTGS_TO_GS_EX
466
467.Lsysenter_fix_flags:
468	pushl	$X86_EFLAGS_FIXED
469	popfl
470	jmp	.Lsysenter_flags_fixed
471GLOBAL(__end_SYSENTER_singlestep_region)
472ENDPROC(entry_SYSENTER_32)
473
474/*
475 * 32-bit legacy system call entry.
476 *
477 * 32-bit x86 Linux system calls traditionally used the INT $0x80
478 * instruction.  INT $0x80 lands here.
479 *
480 * This entry point can be used by any 32-bit perform system calls.
481 * Instances of INT $0x80 can be found inline in various programs and
482 * libraries.  It is also used by the vDSO's __kernel_vsyscall
483 * fallback for hardware that doesn't support a faster entry method.
484 * Restarted 32-bit system calls also fall back to INT $0x80
485 * regardless of what instruction was originally used to do the system
486 * call.  (64-bit programs can use INT $0x80 as well, but they can
487 * only run on 64-bit kernels and therefore land in
488 * entry_INT80_compat.)
489 *
490 * This is considered a slow path.  It is not used by most libc
491 * implementations on modern hardware except during process startup.
492 *
493 * Arguments:
494 * eax  system call number
495 * ebx  arg1
496 * ecx  arg2
497 * edx  arg3
498 * esi  arg4
499 * edi  arg5
500 * ebp  arg6
501 */
502ENTRY(entry_INT80_32)
503	ASM_CLAC
504	pushl	%eax			/* pt_regs->orig_ax */
505	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
506
507	/*
508	 * User mode is traced as though IRQs are on, and the interrupt gate
509	 * turned them off.
510	 */
511	TRACE_IRQS_OFF
512
513	movl	%esp, %eax
514	call	do_int80_syscall_32
515.Lsyscall_32_done:
516
517restore_all:
518	TRACE_IRQS_IRET
519restore_all_notrace:
520#ifdef CONFIG_X86_ESPFIX32
521	ALTERNATIVE	"jmp restore_nocheck", "", X86_BUG_ESPFIX
522
523	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
524	/*
525	 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
526	 * are returning to the kernel.
527	 * See comments in process.c:copy_thread() for details.
528	 */
529	movb	PT_OLDSS(%esp), %ah
530	movb	PT_CS(%esp), %al
531	andl	$(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
532	cmpl	$((SEGMENT_LDT << 8) | USER_RPL), %eax
533	je ldt_ss				# returning to user-space with LDT SS
534#endif
535restore_nocheck:
536	RESTORE_REGS 4				# skip orig_eax/error_code
537irq_return:
538	INTERRUPT_RETURN
539.section .fixup, "ax"
540ENTRY(iret_exc	)
541	pushl	$0				# no error code
542	pushl	$do_iret_error
543	jmp	error_code
544.previous
545	_ASM_EXTABLE(irq_return, iret_exc)
546
547#ifdef CONFIG_X86_ESPFIX32
548ldt_ss:
549/*
550 * Setup and switch to ESPFIX stack
551 *
552 * We're returning to userspace with a 16 bit stack. The CPU will not
553 * restore the high word of ESP for us on executing iret... This is an
554 * "official" bug of all the x86-compatible CPUs, which we can work
555 * around to make dosemu and wine happy. We do this by preloading the
556 * high word of ESP with the high word of the userspace ESP while
557 * compensating for the offset by changing to the ESPFIX segment with
558 * a base address that matches for the difference.
559 */
560#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
561	mov	%esp, %edx			/* load kernel esp */
562	mov	PT_OLDESP(%esp), %eax		/* load userspace esp */
563	mov	%dx, %ax			/* eax: new kernel esp */
564	sub	%eax, %edx			/* offset (low word is 0) */
565	shr	$16, %edx
566	mov	%dl, GDT_ESPFIX_SS + 4		/* bits 16..23 */
567	mov	%dh, GDT_ESPFIX_SS + 7		/* bits 24..31 */
568	pushl	$__ESPFIX_SS
569	pushl	%eax				/* new kernel esp */
570	/*
571	 * Disable interrupts, but do not irqtrace this section: we
572	 * will soon execute iret and the tracer was already set to
573	 * the irqstate after the IRET:
574	 */
575	DISABLE_INTERRUPTS(CLBR_EAX)
576	lss	(%esp), %esp			/* switch to espfix segment */
577	jmp	restore_nocheck
578#endif
579ENDPROC(entry_INT80_32)
580
581.macro FIXUP_ESPFIX_STACK
582/*
583 * Switch back for ESPFIX stack to the normal zerobased stack
584 *
585 * We can't call C functions using the ESPFIX stack. This code reads
586 * the high word of the segment base from the GDT and swiches to the
587 * normal stack and adjusts ESP with the matching offset.
588 */
589#ifdef CONFIG_X86_ESPFIX32
590	/* fixup the stack */
591	mov	GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
592	mov	GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
593	shl	$16, %eax
594	addl	%esp, %eax			/* the adjusted stack pointer */
595	pushl	$__KERNEL_DS
596	pushl	%eax
597	lss	(%esp), %esp			/* switch to the normal stack segment */
598#endif
599.endm
600.macro UNWIND_ESPFIX_STACK
601#ifdef CONFIG_X86_ESPFIX32
602	movl	%ss, %eax
603	/* see if on espfix stack */
604	cmpw	$__ESPFIX_SS, %ax
605	jne	27f
606	movl	$__KERNEL_DS, %eax
607	movl	%eax, %ds
608	movl	%eax, %es
609	/* switch to normal stack */
610	FIXUP_ESPFIX_STACK
61127:
612#endif
613.endm
614
615/*
616 * Build the entry stubs with some assembler magic.
617 * We pack 1 stub into every 8-byte block.
618 */
619	.align 8
620ENTRY(irq_entries_start)
621    vector=FIRST_EXTERNAL_VECTOR
622    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
623	pushl	$(~vector+0x80)			/* Note: always in signed byte range */
624    vector=vector+1
625	jmp	common_interrupt
626	.align	8
627    .endr
628END(irq_entries_start)
629
630/*
631 * the CPU automatically disables interrupts when executing an IRQ vector,
632 * so IRQ-flags tracing has to follow that:
633 */
634	.p2align CONFIG_X86_L1_CACHE_SHIFT
635common_interrupt:
636	ASM_CLAC
637	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
638	SAVE_ALL
639	TRACE_IRQS_OFF
640	movl	%esp, %eax
641	call	do_IRQ
642	jmp	ret_from_intr
643ENDPROC(common_interrupt)
644
645#define BUILD_INTERRUPT3(name, nr, fn)	\
646ENTRY(name)				\
647	ASM_CLAC;			\
648	pushl	$~(nr);			\
649	SAVE_ALL;			\
650	TRACE_IRQS_OFF			\
651	movl	%esp, %eax;		\
652	call	fn;			\
653	jmp	ret_from_intr;		\
654ENDPROC(name)
655
656
657#ifdef CONFIG_TRACING
658# define TRACE_BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
659#else
660# define TRACE_BUILD_INTERRUPT(name, nr)
661#endif
662
663#define BUILD_INTERRUPT(name, nr)		\
664	BUILD_INTERRUPT3(name, nr, smp_##name);	\
665	TRACE_BUILD_INTERRUPT(name, nr)
666
667/* The include is where all of the SMP etc. interrupts come from */
668#include <asm/entry_arch.h>
669
670ENTRY(coprocessor_error)
671	ASM_CLAC
672	pushl	$0
673	pushl	$do_coprocessor_error
674	jmp	error_code
675END(coprocessor_error)
676
677ENTRY(simd_coprocessor_error)
678	ASM_CLAC
679	pushl	$0
680#ifdef CONFIG_X86_INVD_BUG
681	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
682	ALTERNATIVE "pushl	$do_general_protection",	\
683		    "pushl	$do_simd_coprocessor_error",	\
684		    X86_FEATURE_XMM
685#else
686	pushl	$do_simd_coprocessor_error
687#endif
688	jmp	error_code
689END(simd_coprocessor_error)
690
691ENTRY(device_not_available)
692	ASM_CLAC
693	pushl	$-1				# mark this as an int
694	pushl	$do_device_not_available
695	jmp	error_code
696END(device_not_available)
697
698#ifdef CONFIG_PARAVIRT
699ENTRY(native_iret)
700	iret
701	_ASM_EXTABLE(native_iret, iret_exc)
702END(native_iret)
703#endif
704
705ENTRY(overflow)
706	ASM_CLAC
707	pushl	$0
708	pushl	$do_overflow
709	jmp	error_code
710END(overflow)
711
712ENTRY(bounds)
713	ASM_CLAC
714	pushl	$0
715	pushl	$do_bounds
716	jmp	error_code
717END(bounds)
718
719ENTRY(invalid_op)
720	ASM_CLAC
721	pushl	$0
722	pushl	$do_invalid_op
723	jmp	error_code
724END(invalid_op)
725
726ENTRY(coprocessor_segment_overrun)
727	ASM_CLAC
728	pushl	$0
729	pushl	$do_coprocessor_segment_overrun
730	jmp	error_code
731END(coprocessor_segment_overrun)
732
733ENTRY(invalid_TSS)
734	ASM_CLAC
735	pushl	$do_invalid_TSS
736	jmp	error_code
737END(invalid_TSS)
738
739ENTRY(segment_not_present)
740	ASM_CLAC
741	pushl	$do_segment_not_present
742	jmp	error_code
743END(segment_not_present)
744
745ENTRY(stack_segment)
746	ASM_CLAC
747	pushl	$do_stack_segment
748	jmp	error_code
749END(stack_segment)
750
751ENTRY(alignment_check)
752	ASM_CLAC
753	pushl	$do_alignment_check
754	jmp	error_code
755END(alignment_check)
756
757ENTRY(divide_error)
758	ASM_CLAC
759	pushl	$0				# no error code
760	pushl	$do_divide_error
761	jmp	error_code
762END(divide_error)
763
764#ifdef CONFIG_X86_MCE
765ENTRY(machine_check)
766	ASM_CLAC
767	pushl	$0
768	pushl	machine_check_vector
769	jmp	error_code
770END(machine_check)
771#endif
772
773ENTRY(spurious_interrupt_bug)
774	ASM_CLAC
775	pushl	$0
776	pushl	$do_spurious_interrupt_bug
777	jmp	error_code
778END(spurious_interrupt_bug)
779
780#ifdef CONFIG_XEN
781ENTRY(xen_hypervisor_callback)
782	pushl	$-1				/* orig_ax = -1 => not a system call */
783	SAVE_ALL
784	TRACE_IRQS_OFF
785
786	/*
787	 * Check to see if we got the event in the critical
788	 * region in xen_iret_direct, after we've reenabled
789	 * events and checked for pending events.  This simulates
790	 * iret instruction's behaviour where it delivers a
791	 * pending interrupt when enabling interrupts:
792	 */
793	movl	PT_EIP(%esp), %eax
794	cmpl	$xen_iret_start_crit, %eax
795	jb	1f
796	cmpl	$xen_iret_end_crit, %eax
797	jae	1f
798
799	jmp	xen_iret_crit_fixup
800
801ENTRY(xen_do_upcall)
8021:	mov	%esp, %eax
803	call	xen_evtchn_do_upcall
804#ifndef CONFIG_PREEMPT
805	call	xen_maybe_preempt_hcall
806#endif
807	jmp	ret_from_intr
808ENDPROC(xen_hypervisor_callback)
809
810/*
811 * Hypervisor uses this for application faults while it executes.
812 * We get here for two reasons:
813 *  1. Fault while reloading DS, ES, FS or GS
814 *  2. Fault while executing IRET
815 * Category 1 we fix up by reattempting the load, and zeroing the segment
816 * register if the load fails.
817 * Category 2 we fix up by jumping to do_iret_error. We cannot use the
818 * normal Linux return path in this case because if we use the IRET hypercall
819 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
820 * We distinguish between categories by maintaining a status value in EAX.
821 */
822ENTRY(xen_failsafe_callback)
823	pushl	%eax
824	movl	$1, %eax
8251:	mov	4(%esp), %ds
8262:	mov	8(%esp), %es
8273:	mov	12(%esp), %fs
8284:	mov	16(%esp), %gs
829	/* EAX == 0 => Category 1 (Bad segment)
830	   EAX != 0 => Category 2 (Bad IRET) */
831	testl	%eax, %eax
832	popl	%eax
833	lea	16(%esp), %esp
834	jz	5f
835	jmp	iret_exc
8365:	pushl	$-1				/* orig_ax = -1 => not a system call */
837	SAVE_ALL
838	jmp	ret_from_exception
839
840.section .fixup, "ax"
8416:	xorl	%eax, %eax
842	movl	%eax, 4(%esp)
843	jmp	1b
8447:	xorl	%eax, %eax
845	movl	%eax, 8(%esp)
846	jmp	2b
8478:	xorl	%eax, %eax
848	movl	%eax, 12(%esp)
849	jmp	3b
8509:	xorl	%eax, %eax
851	movl	%eax, 16(%esp)
852	jmp	4b
853.previous
854	_ASM_EXTABLE(1b, 6b)
855	_ASM_EXTABLE(2b, 7b)
856	_ASM_EXTABLE(3b, 8b)
857	_ASM_EXTABLE(4b, 9b)
858ENDPROC(xen_failsafe_callback)
859
860BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
861		xen_evtchn_do_upcall)
862
863#endif /* CONFIG_XEN */
864
865#if IS_ENABLED(CONFIG_HYPERV)
866
867BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
868	hyperv_vector_handler)
869
870#endif /* CONFIG_HYPERV */
871
872#ifdef CONFIG_FUNCTION_TRACER
873#ifdef CONFIG_DYNAMIC_FTRACE
874
875ENTRY(mcount)
876	ret
877END(mcount)
878
879ENTRY(ftrace_caller)
880	pushl	%eax
881	pushl	%ecx
882	pushl	%edx
883	pushl	$0				/* Pass NULL as regs pointer */
884	movl	4*4(%esp), %eax
885	movl	0x4(%ebp), %edx
886	movl	function_trace_op, %ecx
887	subl	$MCOUNT_INSN_SIZE, %eax
888
889.globl ftrace_call
890ftrace_call:
891	call	ftrace_stub
892
893	addl	$4, %esp			/* skip NULL pointer */
894	popl	%edx
895	popl	%ecx
896	popl	%eax
897ftrace_ret:
898#ifdef CONFIG_FUNCTION_GRAPH_TRACER
899.globl ftrace_graph_call
900ftrace_graph_call:
901	jmp	ftrace_stub
902#endif
903
904/* This is weak to keep gas from relaxing the jumps */
905WEAK(ftrace_stub)
906	ret
907END(ftrace_caller)
908
909ENTRY(ftrace_regs_caller)
910	pushf	/* push flags before compare (in cs location) */
911
912	/*
913	 * i386 does not save SS and ESP when coming from kernel.
914	 * Instead, to get sp, &regs->sp is used (see ptrace.h).
915	 * Unfortunately, that means eflags must be at the same location
916	 * as the current return ip is. We move the return ip into the
917	 * ip location, and move flags into the return ip location.
918	 */
919	pushl	4(%esp)				/* save return ip into ip slot */
920
921	pushl	$0				/* Load 0 into orig_ax */
922	pushl	%gs
923	pushl	%fs
924	pushl	%es
925	pushl	%ds
926	pushl	%eax
927	pushl	%ebp
928	pushl	%edi
929	pushl	%esi
930	pushl	%edx
931	pushl	%ecx
932	pushl	%ebx
933
934	movl	13*4(%esp), %eax		/* Get the saved flags */
935	movl	%eax, 14*4(%esp)		/* Move saved flags into regs->flags location */
936						/* clobbering return ip */
937	movl	$__KERNEL_CS, 13*4(%esp)
938
939	movl	12*4(%esp), %eax		/* Load ip (1st parameter) */
940	subl	$MCOUNT_INSN_SIZE, %eax		/* Adjust ip */
941	movl	0x4(%ebp), %edx			/* Load parent ip (2nd parameter) */
942	movl	function_trace_op, %ecx		/* Save ftrace_pos in 3rd parameter */
943	pushl	%esp				/* Save pt_regs as 4th parameter */
944
945GLOBAL(ftrace_regs_call)
946	call	ftrace_stub
947
948	addl	$4, %esp			/* Skip pt_regs */
949	movl	14*4(%esp), %eax		/* Move flags back into cs */
950	movl	%eax, 13*4(%esp)		/* Needed to keep addl	from modifying flags */
951	movl	12*4(%esp), %eax		/* Get return ip from regs->ip */
952	movl	%eax, 14*4(%esp)		/* Put return ip back for ret */
953
954	popl	%ebx
955	popl	%ecx
956	popl	%edx
957	popl	%esi
958	popl	%edi
959	popl	%ebp
960	popl	%eax
961	popl	%ds
962	popl	%es
963	popl	%fs
964	popl	%gs
965	addl	$8, %esp			/* Skip orig_ax and ip */
966	popf					/* Pop flags at end (no addl to corrupt flags) */
967	jmp	ftrace_ret
968
969	popf
970	jmp	ftrace_stub
971#else /* ! CONFIG_DYNAMIC_FTRACE */
972
973ENTRY(mcount)
974	cmpl	$__PAGE_OFFSET, %esp
975	jb	ftrace_stub			/* Paging not enabled yet? */
976
977	cmpl	$ftrace_stub, ftrace_trace_function
978	jnz	trace
979#ifdef CONFIG_FUNCTION_GRAPH_TRACER
980	cmpl	$ftrace_stub, ftrace_graph_return
981	jnz	ftrace_graph_caller
982
983	cmpl	$ftrace_graph_entry_stub, ftrace_graph_entry
984	jnz	ftrace_graph_caller
985#endif
986.globl ftrace_stub
987ftrace_stub:
988	ret
989
990	/* taken from glibc */
991trace:
992	pushl	%eax
993	pushl	%ecx
994	pushl	%edx
995	movl	0xc(%esp), %eax
996	movl	0x4(%ebp), %edx
997	subl	$MCOUNT_INSN_SIZE, %eax
998
999	movl    ftrace_trace_function, %ecx
1000	CALL_NOSPEC %ecx
1001
1002	popl	%edx
1003	popl	%ecx
1004	popl	%eax
1005	jmp	ftrace_stub
1006END(mcount)
1007#endif /* CONFIG_DYNAMIC_FTRACE */
1008EXPORT_SYMBOL(mcount)
1009#endif /* CONFIG_FUNCTION_TRACER */
1010
1011#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1012ENTRY(ftrace_graph_caller)
1013	pushl	%eax
1014	pushl	%ecx
1015	pushl	%edx
1016	movl	0xc(%esp), %eax
1017	lea	0x4(%ebp), %edx
1018	movl	(%ebp), %ecx
1019	subl	$MCOUNT_INSN_SIZE, %eax
1020	call	prepare_ftrace_return
1021	popl	%edx
1022	popl	%ecx
1023	popl	%eax
1024	ret
1025END(ftrace_graph_caller)
1026
1027.globl return_to_handler
1028return_to_handler:
1029	pushl	%eax
1030	pushl	%edx
1031	movl	%ebp, %eax
1032	call	ftrace_return_to_handler
1033	movl	%eax, %ecx
1034	popl	%edx
1035	popl	%eax
1036	JMP_NOSPEC %ecx
1037#endif
1038
1039#ifdef CONFIG_TRACING
1040ENTRY(trace_page_fault)
1041	ASM_CLAC
1042	pushl	$trace_do_page_fault
1043	jmp	error_code
1044END(trace_page_fault)
1045#endif
1046
1047ENTRY(page_fault)
1048	ASM_CLAC
1049	pushl	$do_page_fault
1050	ALIGN
1051error_code:
1052	/* the function address is in %gs's slot on the stack */
1053	pushl	%fs
1054	pushl	%es
1055	pushl	%ds
1056	pushl	%eax
1057	pushl	%ebp
1058	pushl	%edi
1059	pushl	%esi
1060	pushl	%edx
1061	pushl	%ecx
1062	pushl	%ebx
1063	cld
1064	movl	$(__KERNEL_PERCPU), %ecx
1065	movl	%ecx, %fs
1066	UNWIND_ESPFIX_STACK
1067	GS_TO_REG %ecx
1068	movl	PT_GS(%esp), %edi		# get the function address
1069	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
1070	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
1071	REG_TO_PTGS %ecx
1072	SET_KERNEL_GS %ecx
1073	movl	$(__USER_DS), %ecx
1074	movl	%ecx, %ds
1075	movl	%ecx, %es
1076	TRACE_IRQS_OFF
1077	movl	%esp, %eax			# pt_regs pointer
1078	CALL_NOSPEC %edi
1079	jmp	ret_from_exception
1080END(page_fault)
1081
1082ENTRY(debug)
1083	/*
1084	 * #DB can happen at the first instruction of
1085	 * entry_SYSENTER_32 or in Xen's SYSENTER prologue.  If this
1086	 * happens, then we will be running on a very small stack.  We
1087	 * need to detect this condition and switch to the thread
1088	 * stack before calling any C code at all.
1089	 *
1090	 * If you edit this code, keep in mind that NMIs can happen in here.
1091	 */
1092	ASM_CLAC
1093	pushl	$-1				# mark this as an int
1094	SAVE_ALL
1095	xorl	%edx, %edx			# error code 0
1096	movl	%esp, %eax			# pt_regs pointer
1097
1098	/* Are we currently on the SYSENTER stack? */
1099	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
1100	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
1101	cmpl	$SIZEOF_SYSENTER_stack, %ecx
1102	jb	.Ldebug_from_sysenter_stack
1103
1104	TRACE_IRQS_OFF
1105	call	do_debug
1106	jmp	ret_from_exception
1107
1108.Ldebug_from_sysenter_stack:
1109	/* We're on the SYSENTER stack.  Switch off. */
1110	movl	%esp, %ebp
1111	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
1112	TRACE_IRQS_OFF
1113	call	do_debug
1114	movl	%ebp, %esp
1115	jmp	ret_from_exception
1116END(debug)
1117
1118/*
1119 * NMI is doubly nasty.  It can happen on the first instruction of
1120 * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
1121 * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
1122 * switched stacks.  We handle both conditions by simply checking whether we
1123 * interrupted kernel code running on the SYSENTER stack.
1124 */
1125ENTRY(nmi)
1126	ASM_CLAC
1127#ifdef CONFIG_X86_ESPFIX32
1128	pushl	%eax
1129	movl	%ss, %eax
1130	cmpw	$__ESPFIX_SS, %ax
1131	popl	%eax
1132	je	nmi_espfix_stack
1133#endif
1134
1135	pushl	%eax				# pt_regs->orig_ax
1136	SAVE_ALL
1137	xorl	%edx, %edx			# zero error code
1138	movl	%esp, %eax			# pt_regs pointer
1139
1140	/* Are we currently on the SYSENTER stack? */
1141	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
1142	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
1143	cmpl	$SIZEOF_SYSENTER_stack, %ecx
1144	jb	.Lnmi_from_sysenter_stack
1145
1146	/* Not on SYSENTER stack. */
1147	call	do_nmi
1148	jmp	restore_all_notrace
1149
1150.Lnmi_from_sysenter_stack:
1151	/*
1152	 * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
1153	 * is using the thread stack right now, so it's safe for us to use it.
1154	 */
1155	movl	%esp, %ebp
1156	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
1157	call	do_nmi
1158	movl	%ebp, %esp
1159	jmp	restore_all_notrace
1160
1161#ifdef CONFIG_X86_ESPFIX32
1162nmi_espfix_stack:
1163	/*
1164	 * create the pointer to lss back
1165	 */
1166	pushl	%ss
1167	pushl	%esp
1168	addl	$4, (%esp)
1169	/* copy the iret frame of 12 bytes */
1170	.rept 3
1171	pushl	16(%esp)
1172	.endr
1173	pushl	%eax
1174	SAVE_ALL
1175	FIXUP_ESPFIX_STACK			# %eax == %esp
1176	xorl	%edx, %edx			# zero error code
1177	call	do_nmi
1178	RESTORE_REGS
1179	lss	12+4(%esp), %esp		# back to espfix stack
1180	jmp	irq_return
1181#endif
1182END(nmi)
1183
1184ENTRY(int3)
1185	ASM_CLAC
1186	pushl	$-1				# mark this as an int
1187	SAVE_ALL
1188	TRACE_IRQS_OFF
1189	xorl	%edx, %edx			# zero error code
1190	movl	%esp, %eax			# pt_regs pointer
1191	call	do_int3
1192	jmp	ret_from_exception
1193END(int3)
1194
1195ENTRY(general_protection)
1196	pushl	$do_general_protection
1197	jmp	error_code
1198END(general_protection)
1199
1200#ifdef CONFIG_KVM_GUEST
1201ENTRY(async_page_fault)
1202	ASM_CLAC
1203	pushl	$do_async_page_fault
1204	jmp	error_code
1205END(async_page_fault)
1206#endif
1207
1208ENTRY(rewind_stack_do_exit)
1209	/* Prevent any naive code from trying to unwind to our caller. */
1210	xorl	%ebp, %ebp
1211
1212	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esi
1213	leal	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
1214
1215	call	do_exit
12161:	jmp 1b
1217END(rewind_stack_do_exit)
1218