• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *
4 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
5 *
6 * Derived from book3s_rmhandlers.S and other files, which are:
7 *
8 * Copyright SUSE Linux Products GmbH 2009
9 *
10 * Authors: Alexander Graf <agraf@suse.de>
11 */
12
13#include <asm/ppc_asm.h>
14#include <asm/code-patching-asm.h>
15#include <asm/kvm_asm.h>
16#include <asm/reg.h>
17#include <asm/mmu.h>
18#include <asm/page.h>
19#include <asm/ptrace.h>
20#include <asm/hvcall.h>
21#include <asm/asm-offsets.h>
22#include <asm/exception-64s.h>
23#include <asm/kvm_book3s_asm.h>
24#include <asm/book3s/64/mmu-hash.h>
25#include <asm/export.h>
26#include <asm/tm.h>
27#include <asm/opal.h>
28#include <asm/xive-regs.h>
29#include <asm/thread_info.h>
30#include <asm/asm-compat.h>
31#include <asm/feature-fixups.h>
32#include <asm/cpuidle.h>
33#include <asm/ultravisor-api.h>
34
35/* Sign-extend HDEC if not on POWER9 */
36#define EXTEND_HDEC(reg)			\
37BEGIN_FTR_SECTION;				\
38	extsw	reg, reg;			\
39END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
40
41/* Values in HSTATE_NAPPING(r13) */
42#define NAPPING_CEDE	1
43#define NAPPING_NOVCPU	2
44#define NAPPING_UNSPLIT	3
45
46/* Stack frame offsets for kvmppc_hv_entry */
47#define SFS			208
48#define STACK_SLOT_TRAP		(SFS-4)
49#define STACK_SLOT_SHORT_PATH	(SFS-8)
50#define STACK_SLOT_TID		(SFS-16)
51#define STACK_SLOT_PSSCR	(SFS-24)
52#define STACK_SLOT_PID		(SFS-32)
53#define STACK_SLOT_IAMR		(SFS-40)
54#define STACK_SLOT_CIABR	(SFS-48)
55#define STACK_SLOT_DAWR		(SFS-56)
56#define STACK_SLOT_DAWRX	(SFS-64)
57#define STACK_SLOT_HFSCR	(SFS-72)
58#define STACK_SLOT_AMR		(SFS-80)
59#define STACK_SLOT_UAMOR	(SFS-88)
60/* the following is used by the P9 short path */
61#define STACK_SLOT_NVGPRS	(SFS-152)	/* 18 gprs */
62
63/*
64 * Call kvmppc_hv_entry in real mode.
65 * Must be called with interrupts hard-disabled.
66 *
67 * Input Registers:
68 *
69 * LR = return address to continue at after eventually re-enabling MMU
70 */
71_GLOBAL_TOC(kvmppc_hv_entry_trampoline)
72	mflr	r0
73	std	r0, PPC_LR_STKOFF(r1)
74	stdu	r1, -112(r1)
75	mfmsr	r10
76	std	r10, HSTATE_HOST_MSR(r13)
77	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
78	li	r0,MSR_RI
79	andc	r0,r10,r0
80	li	r6,MSR_IR | MSR_DR
81	andc	r6,r10,r6
82	mtmsrd	r0,1		/* clear RI in MSR */
83	mtsrr0	r5
84	mtsrr1	r6
85	RFI_TO_KERNEL
86
87kvmppc_call_hv_entry:
88BEGIN_FTR_SECTION
89	/* On P9, do LPCR setting, if necessary */
90	ld	r3, HSTATE_SPLIT_MODE(r13)
91	cmpdi	r3, 0
92	beq	46f
93	lwz	r4, KVM_SPLIT_DO_SET(r3)
94	cmpwi	r4, 0
95	beq	46f
96	bl	kvmhv_p9_set_lpcr
97	nop
9846:
99END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
100
101	ld	r4, HSTATE_KVM_VCPU(r13)
102	bl	kvmppc_hv_entry
103
104	/* Back from guest - restore host state and return to caller */
105
106BEGIN_FTR_SECTION
107	/* Restore host DABR and DABRX */
108	ld	r5,HSTATE_DABR(r13)
109	li	r6,7
110	mtspr	SPRN_DABR,r5
111	mtspr	SPRN_DABRX,r6
112END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
113
114	/* Restore SPRG3 */
115	ld	r3,PACA_SPRG_VDSO(r13)
116	mtspr	SPRN_SPRG_VDSO_WRITE,r3
117
118	/* Reload the host's PMU registers */
119	bl	kvmhv_load_host_pmu
120
121	/*
122	 * Reload DEC.  HDEC interrupts were disabled when
123	 * we reloaded the host's LPCR value.
124	 */
125	ld	r3, HSTATE_DECEXP(r13)
126	mftb	r4
127	subf	r4, r4, r3
128	mtspr	SPRN_DEC, r4
129
130	/* hwthread_req may have got set by cede or no vcpu, so clear it */
131	li	r0, 0
132	stb	r0, HSTATE_HWTHREAD_REQ(r13)
133
134	/*
135	 * For external interrupts we need to call the Linux
136	 * handler to process the interrupt. We do that by jumping
137	 * to absolute address 0x500 for external interrupts.
138	 * The [h]rfid at the end of the handler will return to
139	 * the book3s_hv_interrupts.S code. For other interrupts
140	 * we do the rfid to get back to the book3s_hv_interrupts.S
141	 * code here.
142	 */
143	ld	r8, 112+PPC_LR_STKOFF(r1)
144	addi	r1, r1, 112
145	ld	r7, HSTATE_HOST_MSR(r13)
146
147	/* Return the trap number on this thread as the return value */
148	mr	r3, r12
149
150	/*
151	 * If we came back from the guest via a relocation-on interrupt,
152	 * we will be in virtual mode at this point, which makes it a
153	 * little easier to get back to the caller.
154	 */
155	mfmsr	r0
156	andi.	r0, r0, MSR_IR		/* in real mode? */
157	bne	.Lvirt_return
158
159	/* RFI into the highmem handler */
160	mfmsr	r6
161	li	r0, MSR_RI
162	andc	r6, r6, r0
163	mtmsrd	r6, 1			/* Clear RI in MSR */
164	mtsrr0	r8
165	mtsrr1	r7
166	RFI_TO_KERNEL
167
168	/* Virtual-mode return */
169.Lvirt_return:
170	mtlr	r8
171	blr
172
173kvmppc_primary_no_guest:
174	/* We handle this much like a ceded vcpu */
175	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
176	/* HDEC may be larger than DEC for arch >= v3.00, but since the */
177	/* HDEC value came from DEC in the first place, it will fit */
178	mfspr	r3, SPRN_HDEC
179	mtspr	SPRN_DEC, r3
180	/*
181	 * Make sure the primary has finished the MMU switch.
182	 * We should never get here on a secondary thread, but
183	 * check it for robustness' sake.
184	 */
185	ld	r5, HSTATE_KVM_VCORE(r13)
18665:	lbz	r0, VCORE_IN_GUEST(r5)
187	cmpwi	r0, 0
188	beq	65b
189	/* Set LPCR. */
190	ld	r8,VCORE_LPCR(r5)
191	mtspr	SPRN_LPCR,r8
192	isync
193	/* set our bit in napping_threads */
194	ld	r5, HSTATE_KVM_VCORE(r13)
195	lbz	r7, HSTATE_PTID(r13)
196	li	r0, 1
197	sld	r0, r0, r7
198	addi	r6, r5, VCORE_NAPPING_THREADS
1991:	lwarx	r3, 0, r6
200	or	r3, r3, r0
201	stwcx.	r3, 0, r6
202	bne	1b
203	/* order napping_threads update vs testing entry_exit_map */
204	isync
205	li	r12, 0
206	lwz	r7, VCORE_ENTRY_EXIT(r5)
207	cmpwi	r7, 0x100
208	bge	kvm_novcpu_exit	/* another thread already exiting */
209	li	r3, NAPPING_NOVCPU
210	stb	r3, HSTATE_NAPPING(r13)
211
212	li	r3, 0		/* Don't wake on privileged (OS) doorbell */
213	b	kvm_do_nap
214
215/*
216 * kvm_novcpu_wakeup
217 *	Entered from kvm_start_guest if kvm_hstate.napping is set
218 *	to NAPPING_NOVCPU
219 *		r2 = kernel TOC
220 *		r13 = paca
221 */
222kvm_novcpu_wakeup:
223	ld	r1, HSTATE_HOST_R1(r13)
224	ld	r5, HSTATE_KVM_VCORE(r13)
225	li	r0, 0
226	stb	r0, HSTATE_NAPPING(r13)
227
228	/* check the wake reason */
229	bl	kvmppc_check_wake_reason
230
231	/*
232	 * Restore volatile registers since we could have called
233	 * a C routine in kvmppc_check_wake_reason.
234	 *	r5 = VCORE
235	 */
236	ld	r5, HSTATE_KVM_VCORE(r13)
237
238	/* see if any other thread is already exiting */
239	lwz	r0, VCORE_ENTRY_EXIT(r5)
240	cmpwi	r0, 0x100
241	bge	kvm_novcpu_exit
242
243	/* clear our bit in napping_threads */
244	lbz	r7, HSTATE_PTID(r13)
245	li	r0, 1
246	sld	r0, r0, r7
247	addi	r6, r5, VCORE_NAPPING_THREADS
2484:	lwarx	r7, 0, r6
249	andc	r7, r7, r0
250	stwcx.	r7, 0, r6
251	bne	4b
252
253	/* See if the wake reason means we need to exit */
254	cmpdi	r3, 0
255	bge	kvm_novcpu_exit
256
257	/* See if our timeslice has expired (HDEC is negative) */
258	mfspr	r0, SPRN_HDEC
259	EXTEND_HDEC(r0)
260	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
261	cmpdi	r0, 0
262	blt	kvm_novcpu_exit
263
264	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
265	ld	r4, HSTATE_KVM_VCPU(r13)
266	cmpdi	r4, 0
267	beq	kvmppc_primary_no_guest
268
269#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
270	addi	r3, r4, VCPU_TB_RMENTRY
271	bl	kvmhv_start_timing
272#endif
273	b	kvmppc_got_guest
274
275kvm_novcpu_exit:
276#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
277	ld	r4, HSTATE_KVM_VCPU(r13)
278	cmpdi	r4, 0
279	beq	13f
280	addi	r3, r4, VCPU_TB_RMEXIT
281	bl	kvmhv_accumulate_time
282#endif
28313:	mr	r3, r12
284	stw	r12, STACK_SLOT_TRAP(r1)
285	bl	kvmhv_commence_exit
286	nop
287	b	kvmhv_switch_to_host
288
289/*
290 * We come in here when wakened from Linux offline idle code.
291 * Relocation is off
292 * r3 contains the SRR1 wakeup value, SRR1 is trashed.
293 */
294_GLOBAL(idle_kvm_start_guest)
295	mfcr	r5
296	mflr	r0
297	std	r5, 8(r1)	// Save CR in caller's frame
298	std	r0, 16(r1)	// Save LR in caller's frame
299	// Create frame on emergency stack
300	ld	r4, PACAEMERGSP(r13)
301	stdu	r1, -SWITCH_FRAME_SIZE(r4)
302	// Switch to new frame on emergency stack
303	mr	r1, r4
304	std	r3, 32(r1)	// Save SRR1 wakeup value
305	SAVE_NVGPRS(r1)
306
307	/*
308	 * Could avoid this and pass it through in r3. For now,
309	 * code expects it to be in SRR1.
310	 */
311	mtspr	SPRN_SRR1,r3
312
313	li	r0,0
314	stb	r0,PACA_FTRACE_ENABLED(r13)
315
316	li	r0,KVM_HWTHREAD_IN_KVM
317	stb	r0,HSTATE_HWTHREAD_STATE(r13)
318
319	/* kvm cede / napping does not come through here */
320	lbz	r0,HSTATE_NAPPING(r13)
321	twnei	r0,0
322
323	b	1f
324
325kvm_unsplit_wakeup:
326	li	r0, 0
327	stb	r0, HSTATE_NAPPING(r13)
328
3291:
330
331	/*
332	 * We weren't napping due to cede, so this must be a secondary
333	 * thread being woken up to run a guest, or being woken up due
334	 * to a stray IPI.  (Or due to some machine check or hypervisor
335	 * maintenance interrupt while the core is in KVM.)
336	 */
337
338	/* Check the wake reason in SRR1 to see why we got here */
339	bl	kvmppc_check_wake_reason
340	/*
341	 * kvmppc_check_wake_reason could invoke a C routine, but we
342	 * have no volatile registers to restore when we return.
343	 */
344
345	cmpdi	r3, 0
346	bge	kvm_no_guest
347
348	/* get vcore pointer, NULL if we have nothing to run */
349	ld	r5,HSTATE_KVM_VCORE(r13)
350	cmpdi	r5,0
351	/* if we have no vcore to run, go back to sleep */
352	beq	kvm_no_guest
353
354kvm_secondary_got_guest:
355
356	// About to go to guest, clear saved SRR1
357	li	r0, 0
358	std	r0, 32(r1)
359
360	/* Set HSTATE_DSCR(r13) to something sensible */
361	ld	r6, PACA_DSCR_DEFAULT(r13)
362	std	r6, HSTATE_DSCR(r13)
363
364	/* On thread 0 of a subcore, set HDEC to max */
365	lbz	r4, HSTATE_PTID(r13)
366	cmpwi	r4, 0
367	bne	63f
368	LOAD_REG_ADDR(r6, decrementer_max)
369	ld	r6, 0(r6)
370	mtspr	SPRN_HDEC, r6
371	/* and set per-LPAR registers, if doing dynamic micro-threading */
372	ld	r6, HSTATE_SPLIT_MODE(r13)
373	cmpdi	r6, 0
374	beq	63f
375BEGIN_FTR_SECTION
376	ld	r0, KVM_SPLIT_RPR(r6)
377	mtspr	SPRN_RPR, r0
378	ld	r0, KVM_SPLIT_PMMAR(r6)
379	mtspr	SPRN_PMMAR, r0
380	ld	r0, KVM_SPLIT_LDBAR(r6)
381	mtspr	SPRN_LDBAR, r0
382	isync
383FTR_SECTION_ELSE
384	/* On P9 we use the split_info for coordinating LPCR changes */
385	lwz	r4, KVM_SPLIT_DO_SET(r6)
386	cmpwi	r4, 0
387	beq	1f
388	mr	r3, r6
389	bl	kvmhv_p9_set_lpcr
390	nop
3911:
392ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
39363:
394	/* Order load of vcpu after load of vcore */
395	lwsync
396	ld	r4, HSTATE_KVM_VCPU(r13)
397	bl	kvmppc_hv_entry
398
399	/* Back from the guest, go back to nap */
400	/* Clear our vcpu and vcore pointers so we don't come back in early */
401	li	r0, 0
402	std	r0, HSTATE_KVM_VCPU(r13)
403	/*
404	 * Once we clear HSTATE_KVM_VCORE(r13), the code in
405	 * kvmppc_run_core() is going to assume that all our vcpu
406	 * state is visible in memory.  This lwsync makes sure
407	 * that that is true.
408	 */
409	lwsync
410	std	r0, HSTATE_KVM_VCORE(r13)
411
412	/*
413	 * All secondaries exiting guest will fall through this path.
414	 * Before proceeding, just check for HMI interrupt and
415	 * invoke opal hmi handler. By now we are sure that the
416	 * primary thread on this core/subcore has already made partition
417	 * switch/TB resync and we are good to call opal hmi handler.
418	 */
419	cmpwi	r12, BOOK3S_INTERRUPT_HMI
420	bne	kvm_no_guest
421
422	li	r3,0			/* NULL argument */
423	bl	hmi_exception_realmode
424/*
425 * At this point we have finished executing in the guest.
426 * We need to wait for hwthread_req to become zero, since
427 * we may not turn on the MMU while hwthread_req is non-zero.
428 * While waiting we also need to check if we get given a vcpu to run.
429 */
430kvm_no_guest:
431	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
432	cmpwi	r3, 0
433	bne	53f
434	HMT_MEDIUM
435	li	r0, KVM_HWTHREAD_IN_KERNEL
436	stb	r0, HSTATE_HWTHREAD_STATE(r13)
437	/* need to recheck hwthread_req after a barrier, to avoid race */
438	sync
439	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
440	cmpwi	r3, 0
441	bne	54f
442
443	/*
444	 * Jump to idle_return_gpr_loss, which returns to the
445	 * idle_kvm_start_guest caller.
446	 */
447	li	r3, LPCR_PECE0
448	mfspr	r4, SPRN_LPCR
449	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
450	mtspr	SPRN_LPCR, r4
451	// Return SRR1 wakeup value, or 0 if we went into the guest
452	ld	r3, 32(r1)
453	REST_NVGPRS(r1)
454	ld	r1, 0(r1)	// Switch back to caller stack
455	ld	r0, 16(r1)	// Reload LR
456	ld	r5, 8(r1)	// Reload CR
457	mtlr	r0
458	mtcr	r5
459	blr
460
46153:	HMT_LOW
462	ld	r5, HSTATE_KVM_VCORE(r13)
463	cmpdi	r5, 0
464	bne	60f
465	ld	r3, HSTATE_SPLIT_MODE(r13)
466	cmpdi	r3, 0
467	beq	kvm_no_guest
468	lwz	r0, KVM_SPLIT_DO_SET(r3)
469	cmpwi	r0, 0
470	bne	kvmhv_do_set
471	lwz	r0, KVM_SPLIT_DO_RESTORE(r3)
472	cmpwi	r0, 0
473	bne	kvmhv_do_restore
474	lbz	r0, KVM_SPLIT_DO_NAP(r3)
475	cmpwi	r0, 0
476	beq	kvm_no_guest
477	HMT_MEDIUM
478	b	kvm_unsplit_nap
47960:	HMT_MEDIUM
480	b	kvm_secondary_got_guest
481
48254:	li	r0, KVM_HWTHREAD_IN_KVM
483	stb	r0, HSTATE_HWTHREAD_STATE(r13)
484	b	kvm_no_guest
485
486kvmhv_do_set:
487	/* Set LPCR, LPIDR etc. on P9 */
488	HMT_MEDIUM
489	bl	kvmhv_p9_set_lpcr
490	nop
491	b	kvm_no_guest
492
493kvmhv_do_restore:
494	HMT_MEDIUM
495	bl	kvmhv_p9_restore_lpcr
496	nop
497	b	kvm_no_guest
498
499/*
500 * Here the primary thread is trying to return the core to
501 * whole-core mode, so we need to nap.
502 */
503kvm_unsplit_nap:
504	/*
505	 * When secondaries are napping in kvm_unsplit_nap() with
506	 * hwthread_req = 1, HMI goes ignored even though subcores are
507	 * already exited the guest. Hence HMI keeps waking up secondaries
508	 * from nap in a loop and secondaries always go back to nap since
509	 * no vcore is assigned to them. This makes impossible for primary
510	 * thread to get hold of secondary threads resulting into a soft
511	 * lockup in KVM path.
512	 *
513	 * Let us check if HMI is pending and handle it before we go to nap.
514	 */
515	cmpwi	r12, BOOK3S_INTERRUPT_HMI
516	bne	55f
517	li	r3, 0			/* NULL argument */
518	bl	hmi_exception_realmode
51955:
520	/*
521	 * Ensure that secondary doesn't nap when it has
522	 * its vcore pointer set.
523	 */
524	sync		/* matches smp_mb() before setting split_info.do_nap */
525	ld	r0, HSTATE_KVM_VCORE(r13)
526	cmpdi	r0, 0
527	bne	kvm_no_guest
528	/* clear any pending message */
529BEGIN_FTR_SECTION
530	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
531	PPC_MSGCLR(6)
532END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
533	/* Set kvm_split_mode.napped[tid] = 1 */
534	ld	r3, HSTATE_SPLIT_MODE(r13)
535	li	r0, 1
536	lbz	r4, HSTATE_TID(r13)
537	addi	r4, r4, KVM_SPLIT_NAPPED
538	stbx	r0, r3, r4
539	/* Check the do_nap flag again after setting napped[] */
540	sync
541	lbz	r0, KVM_SPLIT_DO_NAP(r3)
542	cmpwi	r0, 0
543	beq	57f
544	li	r3, NAPPING_UNSPLIT
545	stb	r3, HSTATE_NAPPING(r13)
546	li	r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
547	mfspr	r5, SPRN_LPCR
548	rlwimi	r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
549	b	kvm_nap_sequence
550
55157:	li	r0, 0
552	stbx	r0, r3, r4
553	b	kvm_no_guest
554
555/******************************************************************************
556 *                                                                            *
557 *                               Entry code                                   *
558 *                                                                            *
559 *****************************************************************************/
560
561.global kvmppc_hv_entry
562kvmppc_hv_entry:
563
564	/* Required state:
565	 *
566	 * R4 = vcpu pointer (or NULL)
567	 * MSR = ~IR|DR
568	 * R13 = PACA
569	 * R1 = host R1
570	 * R2 = TOC
571	 * all other volatile GPRS = free
572	 * Does not preserve non-volatile GPRs or CR fields
573	 */
574	mflr	r0
575	std	r0, PPC_LR_STKOFF(r1)
576	stdu	r1, -SFS(r1)
577
578	/* Save R1 in the PACA */
579	std	r1, HSTATE_HOST_R1(r13)
580
581	li	r6, KVM_GUEST_MODE_HOST_HV
582	stb	r6, HSTATE_IN_GUEST(r13)
583
584#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
585	/* Store initial timestamp */
586	cmpdi	r4, 0
587	beq	1f
588	addi	r3, r4, VCPU_TB_RMENTRY
589	bl	kvmhv_start_timing
5901:
591#endif
592
593	ld	r5, HSTATE_KVM_VCORE(r13)
594	ld	r9, VCORE_KVM(r5)	/* pointer to struct kvm */
595
596	/*
597	 * POWER7/POWER8 host -> guest partition switch code.
598	 * We don't have to lock against concurrent tlbies,
599	 * but we do have to coordinate across hardware threads.
600	 */
601	/* Set bit in entry map iff exit map is zero. */
602	li	r7, 1
603	lbz	r6, HSTATE_PTID(r13)
604	sld	r7, r7, r6
605	addi	r8, r5, VCORE_ENTRY_EXIT
60621:	lwarx	r3, 0, r8
607	cmpwi	r3, 0x100		/* any threads starting to exit? */
608	bge	secondary_too_late	/* if so we're too late to the party */
609	or	r3, r3, r7
610	stwcx.	r3, 0, r8
611	bne	21b
612
613	/* Primary thread switches to guest partition. */
614	cmpwi	r6,0
615	bne	10f
616
617	lwz	r7,KVM_LPID(r9)
618BEGIN_FTR_SECTION
619	ld	r6,KVM_SDR1(r9)
620	li	r0,LPID_RSVD		/* switch to reserved LPID */
621	mtspr	SPRN_LPID,r0
622	ptesync
623	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
624END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
625	mtspr	SPRN_LPID,r7
626	isync
627
628	/* See if we need to flush the TLB. */
629	mr	r3, r9			/* kvm pointer */
630	lhz	r4, PACAPACAINDEX(r13)	/* physical cpu number */
631	li	r5, 0			/* nested vcpu pointer */
632	bl	kvmppc_check_need_tlb_flush
633	nop
634	ld	r5, HSTATE_KVM_VCORE(r13)
635
636	/* Add timebase offset onto timebase */
63722:	ld	r8,VCORE_TB_OFFSET(r5)
638	cmpdi	r8,0
639	beq	37f
640	std	r8, VCORE_TB_OFFSET_APPL(r5)
641	mftb	r6		/* current host timebase */
642	add	r8,r8,r6
643	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */
644	mftb	r7		/* check if lower 24 bits overflowed */
645	clrldi	r6,r6,40
646	clrldi	r7,r7,40
647	cmpld	r7,r6
648	bge	37f
649	addis	r8,r8,0x100	/* if so, increment upper 40 bits */
650	mtspr	SPRN_TBU40,r8
651
652	/* Load guest PCR value to select appropriate compat mode */
65337:	ld	r7, VCORE_PCR(r5)
654	LOAD_REG_IMMEDIATE(r6, PCR_MASK)
655	cmpld	r7, r6
656	beq	38f
657	or	r7, r7, r6
658	mtspr	SPRN_PCR, r7
65938:
660
661BEGIN_FTR_SECTION
662	/* DPDES and VTB are shared between threads */
663	ld	r8, VCORE_DPDES(r5)
664	ld	r7, VCORE_VTB(r5)
665	mtspr	SPRN_DPDES, r8
666	mtspr	SPRN_VTB, r7
667END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
668
669	/* Mark the subcore state as inside guest */
670	bl	kvmppc_subcore_enter_guest
671	nop
672	ld	r5, HSTATE_KVM_VCORE(r13)
673	ld	r4, HSTATE_KVM_VCPU(r13)
674	li	r0,1
675	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
676
677	/* Do we have a guest vcpu to run? */
67810:	cmpdi	r4, 0
679	beq	kvmppc_primary_no_guest
680kvmppc_got_guest:
681	/* Increment yield count if they have a VPA */
682	ld	r3, VCPU_VPA(r4)
683	cmpdi	r3, 0
684	beq	25f
685	li	r6, LPPACA_YIELDCOUNT
686	LWZX_BE	r5, r3, r6
687	addi	r5, r5, 1
688	STWX_BE	r5, r3, r6
689	li	r6, 1
690	stb	r6, VCPU_VPA_DIRTY(r4)
69125:
692
693	/* Save purr/spurr */
694	mfspr	r5,SPRN_PURR
695	mfspr	r6,SPRN_SPURR
696	std	r5,HSTATE_PURR(r13)
697	std	r6,HSTATE_SPURR(r13)
698	ld	r7,VCPU_PURR(r4)
699	ld	r8,VCPU_SPURR(r4)
700	mtspr	SPRN_PURR,r7
701	mtspr	SPRN_SPURR,r8
702
703	/* Save host values of some registers */
704BEGIN_FTR_SECTION
705	mfspr	r5, SPRN_TIDR
706	mfspr	r6, SPRN_PSSCR
707	mfspr	r7, SPRN_PID
708	std	r5, STACK_SLOT_TID(r1)
709	std	r6, STACK_SLOT_PSSCR(r1)
710	std	r7, STACK_SLOT_PID(r1)
711	mfspr	r5, SPRN_HFSCR
712	std	r5, STACK_SLOT_HFSCR(r1)
713END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
714BEGIN_FTR_SECTION
715	mfspr	r5, SPRN_CIABR
716	mfspr	r6, SPRN_DAWR0
717	mfspr	r7, SPRN_DAWRX0
718	mfspr	r8, SPRN_IAMR
719	std	r5, STACK_SLOT_CIABR(r1)
720	std	r6, STACK_SLOT_DAWR(r1)
721	std	r7, STACK_SLOT_DAWRX(r1)
722	std	r8, STACK_SLOT_IAMR(r1)
723END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
724
725	mfspr	r5, SPRN_AMR
726	std	r5, STACK_SLOT_AMR(r1)
727	mfspr	r6, SPRN_UAMOR
728	std	r6, STACK_SLOT_UAMOR(r1)
729
730BEGIN_FTR_SECTION
731	/* Set partition DABR */
732	/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
733	lwz	r5,VCPU_DABRX(r4)
734	ld	r6,VCPU_DABR(r4)
735	mtspr	SPRN_DABRX,r5
736	mtspr	SPRN_DABR,r6
737	isync
738END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
739
740#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
741/*
742 * Branch around the call if both CPU_FTR_TM and
743 * CPU_FTR_P9_TM_HV_ASSIST are off.
744 */
745BEGIN_FTR_SECTION
746	b	91f
747END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
748	/*
749	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
750	 */
751	mr      r3, r4
752	ld      r4, VCPU_MSR(r3)
753	li	r5, 0			/* don't preserve non-vol regs */
754	bl	kvmppc_restore_tm_hv
755	nop
756	ld	r4, HSTATE_KVM_VCPU(r13)
75791:
758#endif
759
760	/* Load guest PMU registers; r4 = vcpu pointer here */
761	mr	r3, r4
762	bl	kvmhv_load_guest_pmu
763
764	/* Load up FP, VMX and VSX registers */
765	ld	r4, HSTATE_KVM_VCPU(r13)
766	bl	kvmppc_load_fp
767
768	ld	r14, VCPU_GPR(R14)(r4)
769	ld	r15, VCPU_GPR(R15)(r4)
770	ld	r16, VCPU_GPR(R16)(r4)
771	ld	r17, VCPU_GPR(R17)(r4)
772	ld	r18, VCPU_GPR(R18)(r4)
773	ld	r19, VCPU_GPR(R19)(r4)
774	ld	r20, VCPU_GPR(R20)(r4)
775	ld	r21, VCPU_GPR(R21)(r4)
776	ld	r22, VCPU_GPR(R22)(r4)
777	ld	r23, VCPU_GPR(R23)(r4)
778	ld	r24, VCPU_GPR(R24)(r4)
779	ld	r25, VCPU_GPR(R25)(r4)
780	ld	r26, VCPU_GPR(R26)(r4)
781	ld	r27, VCPU_GPR(R27)(r4)
782	ld	r28, VCPU_GPR(R28)(r4)
783	ld	r29, VCPU_GPR(R29)(r4)
784	ld	r30, VCPU_GPR(R30)(r4)
785	ld	r31, VCPU_GPR(R31)(r4)
786
787	/* Switch DSCR to guest value */
788	ld	r5, VCPU_DSCR(r4)
789	mtspr	SPRN_DSCR, r5
790
791BEGIN_FTR_SECTION
792	/* Skip next section on POWER7 */
793	b	8f
794END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
795	/* Load up POWER8-specific registers */
796	ld	r5, VCPU_IAMR(r4)
797	lwz	r6, VCPU_PSPB(r4)
798	ld	r7, VCPU_FSCR(r4)
799	mtspr	SPRN_IAMR, r5
800	mtspr	SPRN_PSPB, r6
801	mtspr	SPRN_FSCR, r7
802	/*
803	 * Handle broken DAWR case by not writing it. This means we
804	 * can still store the DAWR register for migration.
805	 */
806	LOAD_REG_ADDR(r5, dawr_force_enable)
807	lbz	r5, 0(r5)
808	cmpdi	r5, 0
809	beq	1f
810	ld	r5, VCPU_DAWR(r4)
811	ld	r6, VCPU_DAWRX(r4)
812	mtspr	SPRN_DAWR0, r5
813	mtspr	SPRN_DAWRX0, r6
8141:
815	ld	r7, VCPU_CIABR(r4)
816	ld	r8, VCPU_TAR(r4)
817	mtspr	SPRN_CIABR, r7
818	mtspr	SPRN_TAR, r8
819	ld	r5, VCPU_IC(r4)
820	ld	r8, VCPU_EBBHR(r4)
821	mtspr	SPRN_IC, r5
822	mtspr	SPRN_EBBHR, r8
823	ld	r5, VCPU_EBBRR(r4)
824	ld	r6, VCPU_BESCR(r4)
825	lwz	r7, VCPU_GUEST_PID(r4)
826	ld	r8, VCPU_WORT(r4)
827	mtspr	SPRN_EBBRR, r5
828	mtspr	SPRN_BESCR, r6
829	mtspr	SPRN_PID, r7
830	mtspr	SPRN_WORT, r8
831BEGIN_FTR_SECTION
832	/* POWER8-only registers */
833	ld	r5, VCPU_TCSCR(r4)
834	ld	r6, VCPU_ACOP(r4)
835	ld	r7, VCPU_CSIGR(r4)
836	ld	r8, VCPU_TACR(r4)
837	mtspr	SPRN_TCSCR, r5
838	mtspr	SPRN_ACOP, r6
839	mtspr	SPRN_CSIGR, r7
840	mtspr	SPRN_TACR, r8
841	nop
842FTR_SECTION_ELSE
843	/* POWER9-only registers */
844	ld	r5, VCPU_TID(r4)
845	ld	r6, VCPU_PSSCR(r4)
846	lbz	r8, HSTATE_FAKE_SUSPEND(r13)
847	oris	r6, r6, PSSCR_EC@h	/* This makes stop trap to HV */
848	rldimi	r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
849	ld	r7, VCPU_HFSCR(r4)
850	mtspr	SPRN_TIDR, r5
851	mtspr	SPRN_PSSCR, r6
852	mtspr	SPRN_HFSCR, r7
853ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8548:
855
856	ld	r5, VCPU_SPRG0(r4)
857	ld	r6, VCPU_SPRG1(r4)
858	ld	r7, VCPU_SPRG2(r4)
859	ld	r8, VCPU_SPRG3(r4)
860	mtspr	SPRN_SPRG0, r5
861	mtspr	SPRN_SPRG1, r6
862	mtspr	SPRN_SPRG2, r7
863	mtspr	SPRN_SPRG3, r8
864
865	/* Load up DAR and DSISR */
866	ld	r5, VCPU_DAR(r4)
867	lwz	r6, VCPU_DSISR(r4)
868	mtspr	SPRN_DAR, r5
869	mtspr	SPRN_DSISR, r6
870
871	/* Restore AMR and UAMOR, set AMOR to all 1s */
872	ld	r5,VCPU_AMR(r4)
873	ld	r6,VCPU_UAMOR(r4)
874	li	r7,-1
875	mtspr	SPRN_AMR,r5
876	mtspr	SPRN_UAMOR,r6
877	mtspr	SPRN_AMOR,r7
878
879	/* Restore state of CTRL run bit; assume 1 on entry */
880	lwz	r5,VCPU_CTRL(r4)
881	andi.	r5,r5,1
882	bne	4f
883	mfspr	r6,SPRN_CTRLF
884	clrrdi	r6,r6,1
885	mtspr	SPRN_CTRLT,r6
8864:
887	/* Secondary threads wait for primary to have done partition switch */
888	ld	r5, HSTATE_KVM_VCORE(r13)
889	lbz	r6, HSTATE_PTID(r13)
890	cmpwi	r6, 0
891	beq	21f
892	lbz	r0, VCORE_IN_GUEST(r5)
893	cmpwi	r0, 0
894	bne	21f
895	HMT_LOW
89620:	lwz	r3, VCORE_ENTRY_EXIT(r5)
897	cmpwi	r3, 0x100
898	bge	no_switch_exit
899	lbz	r0, VCORE_IN_GUEST(r5)
900	cmpwi	r0, 0
901	beq	20b
902	HMT_MEDIUM
90321:
904	/* Set LPCR. */
905	ld	r8,VCORE_LPCR(r5)
906	mtspr	SPRN_LPCR,r8
907	isync
908
909	/*
910	 * Set the decrementer to the guest decrementer.
911	 */
912	ld	r8,VCPU_DEC_EXPIRES(r4)
913	/* r8 is a host timebase value here, convert to guest TB */
914	ld	r5,HSTATE_KVM_VCORE(r13)
915	ld	r6,VCORE_TB_OFFSET_APPL(r5)
916	add	r8,r8,r6
917	mftb	r7
918	subf	r3,r7,r8
919	mtspr	SPRN_DEC,r3
920
921	/* Check if HDEC expires soon */
922	mfspr	r3, SPRN_HDEC
923	EXTEND_HDEC(r3)
924	cmpdi	r3, 512		/* 1 microsecond */
925	blt	hdec_soon
926
927	/* For hash guest, clear out and reload the SLB */
928	ld	r6, VCPU_KVM(r4)
929	lbz	r0, KVM_RADIX(r6)
930	cmpwi	r0, 0
931	bne	9f
932	li	r6, 0
933	slbmte	r6, r6
934	slbia
935	ptesync
936
937	/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
938	lwz	r5,VCPU_SLB_MAX(r4)
939	cmpwi	r5,0
940	beq	9f
941	mtctr	r5
942	addi	r6,r4,VCPU_SLB
9431:	ld	r8,VCPU_SLB_E(r6)
944	ld	r9,VCPU_SLB_V(r6)
945	slbmte	r9,r8
946	addi	r6,r6,VCPU_SLB_SIZE
947	bdnz	1b
9489:
949
950#ifdef CONFIG_KVM_XICS
951	/* We are entering the guest on that thread, push VCPU to XIVE */
952	ld	r11, VCPU_XIVE_SAVED_STATE(r4)
953	li	r9, TM_QW1_OS
954	lwz	r8, VCPU_XIVE_CAM_WORD(r4)
955	cmpwi	r8, 0
956	beq	no_xive
957	li	r7, TM_QW1_OS + TM_WORD2
958	mfmsr	r0
959	andi.	r0, r0, MSR_DR		/* in real mode? */
960	beq	2f
961	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
962	cmpldi	cr1, r10, 0
963	beq     cr1, no_xive
964	eieio
965	stdx	r11,r9,r10
966	stwx	r8,r7,r10
967	b	3f
9682:	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
969	cmpldi	cr1, r10, 0
970	beq	cr1, no_xive
971	eieio
972	stdcix	r11,r9,r10
973	stwcix	r8,r7,r10
9743:	li	r9, 1
975	stb	r9, VCPU_XIVE_PUSHED(r4)
976	eieio
977
978	/*
979	 * We clear the irq_pending flag. There is a small chance of a
980	 * race vs. the escalation interrupt happening on another
981	 * processor setting it again, but the only consequence is to
982	 * cause a spurrious wakeup on the next H_CEDE which is not an
983	 * issue.
984	 */
985	li	r0,0
986	stb	r0, VCPU_IRQ_PENDING(r4)
987
988	/*
989	 * In single escalation mode, if the escalation interrupt is
990	 * on, we mask it.
991	 */
992	lbz	r0, VCPU_XIVE_ESC_ON(r4)
993	cmpwi	cr1, r0,0
994	beq	cr1, 1f
995	li	r9, XIVE_ESB_SET_PQ_01
996	beq	4f			/* in real mode? */
997	ld	r10, VCPU_XIVE_ESC_VADDR(r4)
998	ldx	r0, r10, r9
999	b	5f
10004:	ld	r10, VCPU_XIVE_ESC_RADDR(r4)
1001	ldcix	r0, r10, r9
10025:	sync
1003
1004	/* We have a possible subtle race here: The escalation interrupt might
1005	 * have fired and be on its way to the host queue while we mask it,
1006	 * and if we unmask it early enough (re-cede right away), there is
1007	 * a theorical possibility that it fires again, thus landing in the
1008	 * target queue more than once which is a big no-no.
1009	 *
1010	 * Fortunately, solving this is rather easy. If the above load setting
1011	 * PQ to 01 returns a previous value where P is set, then we know the
1012	 * escalation interrupt is somewhere on its way to the host. In that
1013	 * case we simply don't clear the xive_esc_on flag below. It will be
1014	 * eventually cleared by the handler for the escalation interrupt.
1015	 *
1016	 * Then, when doing a cede, we check that flag again before re-enabling
1017	 * the escalation interrupt, and if set, we abort the cede.
1018	 */
1019	andi.	r0, r0, XIVE_ESB_VAL_P
1020	bne-	1f
1021
1022	/* Now P is 0, we can clear the flag */
1023	li	r0, 0
1024	stb	r0, VCPU_XIVE_ESC_ON(r4)
10251:
1026no_xive:
1027#endif /* CONFIG_KVM_XICS */
1028
1029	li	r0, 0
1030	stw	r0, STACK_SLOT_SHORT_PATH(r1)
1031
1032deliver_guest_interrupt:	/* r4 = vcpu, r13 = paca */
1033	/* Check if we can deliver an external or decrementer interrupt now */
1034	ld	r0, VCPU_PENDING_EXC(r4)
1035BEGIN_FTR_SECTION
1036	/* On POWER9, also check for emulated doorbell interrupt */
1037	lbz	r3, VCPU_DBELL_REQ(r4)
1038	or	r0, r0, r3
1039END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1040	cmpdi	r0, 0
1041	beq	71f
1042	mr	r3, r4
1043	bl	kvmppc_guest_entry_inject_int
1044	ld	r4, HSTATE_KVM_VCPU(r13)
104571:
1046	ld	r6, VCPU_SRR0(r4)
1047	ld	r7, VCPU_SRR1(r4)
1048	mtspr	SPRN_SRR0, r6
1049	mtspr	SPRN_SRR1, r7
1050
1051fast_guest_entry_c:
1052	ld	r10, VCPU_PC(r4)
1053	ld	r11, VCPU_MSR(r4)
1054	/* r11 = vcpu->arch.msr & ~MSR_HV */
1055	rldicl	r11, r11, 63 - MSR_HV_LG, 1
1056	rotldi	r11, r11, 1 + MSR_HV_LG
1057	ori	r11, r11, MSR_ME
1058
1059	ld	r6, VCPU_CTR(r4)
1060	ld	r7, VCPU_XER(r4)
1061	mtctr	r6
1062	mtxer	r7
1063
1064/*
1065 * Required state:
1066 * R4 = vcpu
1067 * R10: value for HSRR0
1068 * R11: value for HSRR1
1069 * R13 = PACA
1070 */
1071fast_guest_return:
1072	li	r0,0
1073	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
1074	mtspr	SPRN_HSRR0,r10
1075	mtspr	SPRN_HSRR1,r11
1076
1077	/* Activate guest mode, so faults get handled by KVM */
1078	li	r9, KVM_GUEST_MODE_GUEST_HV
1079	stb	r9, HSTATE_IN_GUEST(r13)
1080
1081#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1082	/* Accumulate timing */
1083	addi	r3, r4, VCPU_TB_GUEST
1084	bl	kvmhv_accumulate_time
1085#endif
1086
1087	/* Enter guest */
1088
1089BEGIN_FTR_SECTION
1090	ld	r5, VCPU_CFAR(r4)
1091	mtspr	SPRN_CFAR, r5
1092END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1093BEGIN_FTR_SECTION
1094	ld	r0, VCPU_PPR(r4)
1095END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1096
1097	ld	r5, VCPU_LR(r4)
1098	mtlr	r5
1099
1100	ld	r1, VCPU_GPR(R1)(r4)
1101	ld	r5, VCPU_GPR(R5)(r4)
1102	ld	r8, VCPU_GPR(R8)(r4)
1103	ld	r9, VCPU_GPR(R9)(r4)
1104	ld	r10, VCPU_GPR(R10)(r4)
1105	ld	r11, VCPU_GPR(R11)(r4)
1106	ld	r12, VCPU_GPR(R12)(r4)
1107	ld	r13, VCPU_GPR(R13)(r4)
1108
1109BEGIN_FTR_SECTION
1110	mtspr	SPRN_PPR, r0
1111END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1112
1113/* Move canary into DSISR to check for later */
1114BEGIN_FTR_SECTION
1115	li	r0, 0x7fff
1116	mtspr	SPRN_HDSISR, r0
1117END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1118
1119	ld	r6, VCPU_KVM(r4)
1120	lbz	r7, KVM_SECURE_GUEST(r6)
1121	cmpdi	r7, 0
1122	ld	r6, VCPU_GPR(R6)(r4)
1123	ld	r7, VCPU_GPR(R7)(r4)
1124	bne	ret_to_ultra
1125
1126	ld	r0, VCPU_CR(r4)
1127	mtcr	r0
1128
1129	ld	r0, VCPU_GPR(R0)(r4)
1130	ld	r2, VCPU_GPR(R2)(r4)
1131	ld	r3, VCPU_GPR(R3)(r4)
1132	ld	r4, VCPU_GPR(R4)(r4)
1133	HRFI_TO_GUEST
1134	b	.
1135/*
1136 * Use UV_RETURN ultracall to return control back to the Ultravisor after
1137 * processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
1138 * to the Hypervisor.
1139 *
1140 * All registers have already been loaded, except:
1141 *   R0 = hcall result
1142 *   R2 = SRR1, so UV can detect a synthesized interrupt (if any)
1143 *   R3 = UV_RETURN
1144 */
1145ret_to_ultra:
1146	ld	r0, VCPU_CR(r4)
1147	mtcr	r0
1148
1149	ld	r0, VCPU_GPR(R3)(r4)
1150	mfspr	r2, SPRN_SRR1
1151	li	r3, 0
1152	ori	r3, r3, UV_RETURN
1153	ld	r4, VCPU_GPR(R4)(r4)
1154	sc	2
1155
1156/*
1157 * Enter the guest on a P9 or later system where we have exactly
1158 * one vcpu per vcore and we don't need to go to real mode
1159 * (which implies that host and guest are both using radix MMU mode).
1160 * r3 = vcpu pointer
1161 * Most SPRs and all the VSRs have been loaded already.
1162 */
1163_GLOBAL(__kvmhv_vcpu_entry_p9)
1164EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
1165	mflr	r0
1166	std	r0, PPC_LR_STKOFF(r1)
1167	stdu	r1, -SFS(r1)
1168
1169	li	r0, 1
1170	stw	r0, STACK_SLOT_SHORT_PATH(r1)
1171
1172	std	r3, HSTATE_KVM_VCPU(r13)
1173	mfcr	r4
1174	stw	r4, SFS+8(r1)
1175
1176	std	r1, HSTATE_HOST_R1(r13)
1177
1178	reg = 14
1179	.rept	18
1180	std	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
1181	reg = reg + 1
1182	.endr
1183
1184	reg = 14
1185	.rept	18
1186	ld	reg, __VCPU_GPR(reg)(r3)
1187	reg = reg + 1
1188	.endr
1189
1190	mfmsr	r10
1191	std	r10, HSTATE_HOST_MSR(r13)
1192
1193	mr	r4, r3
1194	b	fast_guest_entry_c
1195guest_exit_short_path:
1196
1197	li	r0, KVM_GUEST_MODE_NONE
1198	stb	r0, HSTATE_IN_GUEST(r13)
1199
1200	reg = 14
1201	.rept	18
1202	std	reg, __VCPU_GPR(reg)(r9)
1203	reg = reg + 1
1204	.endr
1205
1206	reg = 14
1207	.rept	18
1208	ld	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
1209	reg = reg + 1
1210	.endr
1211
1212	lwz	r4, SFS+8(r1)
1213	mtcr	r4
1214
1215	mr	r3, r12		/* trap number */
1216
1217	addi	r1, r1, SFS
1218	ld	r0, PPC_LR_STKOFF(r1)
1219	mtlr	r0
1220
1221	/* If we are in real mode, do a rfid to get back to the caller */
1222	mfmsr	r4
1223	andi.	r5, r4, MSR_IR
1224	bnelr
1225	rldicl	r5, r4, 64 - MSR_TS_S_LG, 62	/* extract TS field */
1226	mtspr	SPRN_SRR0, r0
1227	ld	r10, HSTATE_HOST_MSR(r13)
1228	rldimi	r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
1229	mtspr	SPRN_SRR1, r10
1230	RFI_TO_KERNEL
1231	b	.
1232
1233secondary_too_late:
1234	li	r12, 0
1235	stw	r12, STACK_SLOT_TRAP(r1)
1236	cmpdi	r4, 0
1237	beq	11f
1238	stw	r12, VCPU_TRAP(r4)
1239#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1240	addi	r3, r4, VCPU_TB_RMEXIT
1241	bl	kvmhv_accumulate_time
1242#endif
124311:	b	kvmhv_switch_to_host
1244
1245no_switch_exit:
1246	HMT_MEDIUM
1247	li	r12, 0
1248	b	12f
1249hdec_soon:
1250	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
125112:	stw	r12, VCPU_TRAP(r4)
1252	mr	r9, r4
1253#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1254	addi	r3, r4, VCPU_TB_RMEXIT
1255	bl	kvmhv_accumulate_time
1256#endif
1257	b	guest_bypass
1258
1259/******************************************************************************
1260 *                                                                            *
1261 *                               Exit code                                    *
1262 *                                                                            *
1263 *****************************************************************************/
1264
1265/*
1266 * We come here from the first-level interrupt handlers.
1267 */
1268	.globl	kvmppc_interrupt_hv
1269kvmppc_interrupt_hv:
1270	/*
1271	 * Register contents:
1272	 * R12		= (guest CR << 32) | interrupt vector
1273	 * R13		= PACA
1274	 * guest R12 saved in shadow VCPU SCRATCH0
1275	 * guest R13 saved in SPRN_SCRATCH0
1276	 */
1277	std	r9, HSTATE_SCRATCH2(r13)
1278	lbz	r9, HSTATE_IN_GUEST(r13)
1279	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
1280	beq	kvmppc_bad_host_intr
1281#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
1282	cmpwi	r9, KVM_GUEST_MODE_GUEST
1283	ld	r9, HSTATE_SCRATCH2(r13)
1284	beq	kvmppc_interrupt_pr
1285#endif
1286	/* We're now back in the host but in guest MMU context */
1287	li	r9, KVM_GUEST_MODE_HOST_HV
1288	stb	r9, HSTATE_IN_GUEST(r13)
1289
1290	ld	r9, HSTATE_KVM_VCPU(r13)
1291
1292	/* Save registers */
1293
1294	std	r0, VCPU_GPR(R0)(r9)
1295	std	r1, VCPU_GPR(R1)(r9)
1296	std	r2, VCPU_GPR(R2)(r9)
1297	std	r3, VCPU_GPR(R3)(r9)
1298	std	r4, VCPU_GPR(R4)(r9)
1299	std	r5, VCPU_GPR(R5)(r9)
1300	std	r6, VCPU_GPR(R6)(r9)
1301	std	r7, VCPU_GPR(R7)(r9)
1302	std	r8, VCPU_GPR(R8)(r9)
1303	ld	r0, HSTATE_SCRATCH2(r13)
1304	std	r0, VCPU_GPR(R9)(r9)
1305	std	r10, VCPU_GPR(R10)(r9)
1306	std	r11, VCPU_GPR(R11)(r9)
1307	ld	r3, HSTATE_SCRATCH0(r13)
1308	std	r3, VCPU_GPR(R12)(r9)
1309	/* CR is in the high half of r12 */
1310	srdi	r4, r12, 32
1311	std	r4, VCPU_CR(r9)
1312BEGIN_FTR_SECTION
1313	ld	r3, HSTATE_CFAR(r13)
1314	std	r3, VCPU_CFAR(r9)
1315END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1316BEGIN_FTR_SECTION
1317	ld	r4, HSTATE_PPR(r13)
1318	std	r4, VCPU_PPR(r9)
1319END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1320
1321	/* Restore R1/R2 so we can handle faults */
1322	ld	r1, HSTATE_HOST_R1(r13)
1323	ld	r2, PACATOC(r13)
1324
1325	mfspr	r10, SPRN_SRR0
1326	mfspr	r11, SPRN_SRR1
1327	std	r10, VCPU_SRR0(r9)
1328	std	r11, VCPU_SRR1(r9)
1329	/* trap is in the low half of r12, clear CR from the high half */
1330	clrldi	r12, r12, 32
1331	andi.	r0, r12, 2		/* need to read HSRR0/1? */
1332	beq	1f
1333	mfspr	r10, SPRN_HSRR0
1334	mfspr	r11, SPRN_HSRR1
1335	clrrdi	r12, r12, 2
13361:	std	r10, VCPU_PC(r9)
1337	std	r11, VCPU_MSR(r9)
1338
1339	GET_SCRATCH0(r3)
1340	mflr	r4
1341	std	r3, VCPU_GPR(R13)(r9)
1342	std	r4, VCPU_LR(r9)
1343
1344	stw	r12,VCPU_TRAP(r9)
1345
1346	/*
1347	 * Now that we have saved away SRR0/1 and HSRR0/1,
1348	 * interrupts are recoverable in principle, so set MSR_RI.
1349	 * This becomes important for relocation-on interrupts from
1350	 * the guest, which we can get in radix mode on POWER9.
1351	 */
1352	li	r0, MSR_RI
1353	mtmsrd	r0, 1
1354
1355#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1356	addi	r3, r9, VCPU_TB_RMINTR
1357	mr	r4, r9
1358	bl	kvmhv_accumulate_time
1359	ld	r5, VCPU_GPR(R5)(r9)
1360	ld	r6, VCPU_GPR(R6)(r9)
1361	ld	r7, VCPU_GPR(R7)(r9)
1362	ld	r8, VCPU_GPR(R8)(r9)
1363#endif
1364
1365	/* Save HEIR (HV emulation assist reg) in emul_inst
1366	   if this is an HEI (HV emulation interrupt, e40) */
1367	li	r3,KVM_INST_FETCH_FAILED
1368	stw	r3,VCPU_LAST_INST(r9)
1369	cmpwi	r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
1370	bne	11f
1371	mfspr	r3,SPRN_HEIR
137211:	stw	r3,VCPU_HEIR(r9)
1373
1374	/* these are volatile across C function calls */
1375	mfctr	r3
1376	mfxer	r4
1377	std	r3, VCPU_CTR(r9)
1378	std	r4, VCPU_XER(r9)
1379
1380	/* Save more register state  */
1381	mfdar	r3
1382	mfdsisr	r4
1383	std	r3, VCPU_DAR(r9)
1384	stw	r4, VCPU_DSISR(r9)
1385
1386	/* If this is a page table miss then see if it's theirs or ours */
1387	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
1388	beq	kvmppc_hdsi
1389	std	r3, VCPU_FAULT_DAR(r9)
1390	stw	r4, VCPU_FAULT_DSISR(r9)
1391	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1392	beq	kvmppc_hisi
1393
1394#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1395	/* For softpatch interrupt, go off and do TM instruction emulation */
1396	cmpwi	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
1397	beq	kvmppc_tm_emul
1398#endif
1399
1400	/* See if this is a leftover HDEC interrupt */
1401	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
1402	bne	2f
1403	mfspr	r3,SPRN_HDEC
1404	EXTEND_HDEC(r3)
1405	cmpdi	r3,0
1406	mr	r4,r9
1407	bge	fast_guest_return
14082:
1409	/* See if this is an hcall we can handle in real mode */
1410	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
1411	beq	hcall_try_real_mode
1412
1413	/* Hypervisor doorbell - exit only if host IPI flag set */
1414	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
1415	bne	3f
1416BEGIN_FTR_SECTION
1417	PPC_MSGSYNC
1418	lwsync
1419	/* always exit if we're running a nested guest */
1420	ld	r0, VCPU_NESTED(r9)
1421	cmpdi	r0, 0
1422	bne	guest_exit_cont
1423END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1424	lbz	r0, HSTATE_HOST_IPI(r13)
1425	cmpwi	r0, 0
1426	beq	maybe_reenter_guest
1427	b	guest_exit_cont
14283:
1429	/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
1430	cmpwi	r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
1431	bne	14f
1432	mfspr	r3, SPRN_HFSCR
1433	std	r3, VCPU_HFSCR(r9)
1434	b	guest_exit_cont
143514:
1436	/* External interrupt ? */
1437	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
1438	beq	kvmppc_guest_external
1439	/* See if it is a machine check */
1440	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1441	beq	machine_check_realmode
1442	/* Or a hypervisor maintenance interrupt */
1443	cmpwi	r12, BOOK3S_INTERRUPT_HMI
1444	beq	hmi_realmode
1445
1446guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
1447
1448#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1449	addi	r3, r9, VCPU_TB_RMEXIT
1450	mr	r4, r9
1451	bl	kvmhv_accumulate_time
1452#endif
1453#ifdef CONFIG_KVM_XICS
1454	/* We are exiting, pull the VP from the XIVE */
1455	lbz	r0, VCPU_XIVE_PUSHED(r9)
1456	cmpwi	cr0, r0, 0
1457	beq	1f
1458	li	r7, TM_SPC_PULL_OS_CTX
1459	li	r6, TM_QW1_OS
1460	mfmsr	r0
1461	andi.	r0, r0, MSR_DR		/* in real mode? */
1462	beq	2f
1463	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
1464	cmpldi	cr0, r10, 0
1465	beq	1f
1466	/* First load to pull the context, we ignore the value */
1467	eieio
1468	lwzx	r11, r7, r10
1469	/* Second load to recover the context state (Words 0 and 1) */
1470	ldx	r11, r6, r10
1471	b	3f
14722:	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
1473	cmpldi	cr0, r10, 0
1474	beq	1f
1475	/* First load to pull the context, we ignore the value */
1476	eieio
1477	lwzcix	r11, r7, r10
1478	/* Second load to recover the context state (Words 0 and 1) */
1479	ldcix	r11, r6, r10
14803:	std	r11, VCPU_XIVE_SAVED_STATE(r9)
1481	/* Fixup some of the state for the next load */
1482	li	r10, 0
1483	li	r0, 0xff
1484	stb	r10, VCPU_XIVE_PUSHED(r9)
1485	stb	r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
1486	stb	r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
1487	eieio
14881:
1489#endif /* CONFIG_KVM_XICS */
1490
1491	/*
1492	 * Possibly flush the link stack here, before we do a blr in
1493	 * guest_exit_short_path.
1494	 */
14951:	nop
1496	patch_site 1b patch__call_kvm_flush_link_stack
1497
1498	/* If we came in through the P9 short path, go back out to C now */
1499	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
1500	cmpwi	r0, 0
1501	bne	guest_exit_short_path
1502
1503	/* For hash guest, read the guest SLB and save it away */
1504	ld	r5, VCPU_KVM(r9)
1505	lbz	r0, KVM_RADIX(r5)
1506	li	r5, 0
1507	cmpwi	r0, 0
1508	bne	3f			/* for radix, save 0 entries */
1509	lwz	r0,VCPU_SLB_NR(r9)	/* number of entries in SLB */
1510	mtctr	r0
1511	li	r6,0
1512	addi	r7,r9,VCPU_SLB
15131:	slbmfee	r8,r6
1514	andis.	r0,r8,SLB_ESID_V@h
1515	beq	2f
1516	add	r8,r8,r6		/* put index in */
1517	slbmfev	r3,r6
1518	std	r8,VCPU_SLB_E(r7)
1519	std	r3,VCPU_SLB_V(r7)
1520	addi	r7,r7,VCPU_SLB_SIZE
1521	addi	r5,r5,1
15222:	addi	r6,r6,1
1523	bdnz	1b
1524	/* Finally clear out the SLB */
1525	li	r0,0
1526	slbmte	r0,r0
1527	slbia
1528	ptesync
15293:	stw	r5,VCPU_SLB_MAX(r9)
1530
1531	/* load host SLB entries */
1532BEGIN_MMU_FTR_SECTION
1533	b	0f
1534END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
1535	ld	r8,PACA_SLBSHADOWPTR(r13)
1536
1537	.rept	SLB_NUM_BOLTED
1538	li	r3, SLBSHADOW_SAVEAREA
1539	LDX_BE	r5, r8, r3
1540	addi	r3, r3, 8
1541	LDX_BE	r6, r8, r3
1542	andis.	r7,r5,SLB_ESID_V@h
1543	beq	1f
1544	slbmte	r6,r5
15451:	addi	r8,r8,16
1546	.endr
15470:
1548
1549guest_bypass:
1550	stw	r12, STACK_SLOT_TRAP(r1)
1551
1552	/* Save DEC */
1553	/* Do this before kvmhv_commence_exit so we know TB is guest TB */
1554	ld	r3, HSTATE_KVM_VCORE(r13)
1555	mfspr	r5,SPRN_DEC
1556	mftb	r6
1557	/* On P9, if the guest has large decr enabled, don't sign extend */
1558BEGIN_FTR_SECTION
1559	ld	r4, VCORE_LPCR(r3)
1560	andis.	r4, r4, LPCR_LD@h
1561	bne	16f
1562END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1563	extsw	r5,r5
156416:	add	r5,r5,r6
1565	/* r5 is a guest timebase value here, convert to host TB */
1566	ld	r4,VCORE_TB_OFFSET_APPL(r3)
1567	subf	r5,r4,r5
1568	std	r5,VCPU_DEC_EXPIRES(r9)
1569
1570	/* Increment exit count, poke other threads to exit */
1571	mr 	r3, r12
1572	bl	kvmhv_commence_exit
1573	nop
1574	ld	r9, HSTATE_KVM_VCPU(r13)
1575
1576	/* Stop others sending VCPU interrupts to this physical CPU */
1577	li	r0, -1
1578	stw	r0, VCPU_CPU(r9)
1579	stw	r0, VCPU_THREAD_CPU(r9)
1580
1581	/* Save guest CTRL register, set runlatch to 1 */
1582	mfspr	r6,SPRN_CTRLF
1583	stw	r6,VCPU_CTRL(r9)
1584	andi.	r0,r6,1
1585	bne	4f
1586	ori	r6,r6,1
1587	mtspr	SPRN_CTRLT,r6
15884:
1589	/*
1590	 * Save the guest PURR/SPURR
1591	 */
1592	mfspr	r5,SPRN_PURR
1593	mfspr	r6,SPRN_SPURR
1594	ld	r7,VCPU_PURR(r9)
1595	ld	r8,VCPU_SPURR(r9)
1596	std	r5,VCPU_PURR(r9)
1597	std	r6,VCPU_SPURR(r9)
1598	subf	r5,r7,r5
1599	subf	r6,r8,r6
1600
1601	/*
1602	 * Restore host PURR/SPURR and add guest times
1603	 * so that the time in the guest gets accounted.
1604	 */
1605	ld	r3,HSTATE_PURR(r13)
1606	ld	r4,HSTATE_SPURR(r13)
1607	add	r3,r3,r5
1608	add	r4,r4,r6
1609	mtspr	SPRN_PURR,r3
1610	mtspr	SPRN_SPURR,r4
1611
1612BEGIN_FTR_SECTION
1613	b	8f
1614END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
1615	/* Save POWER8-specific registers */
1616	mfspr	r5, SPRN_IAMR
1617	mfspr	r6, SPRN_PSPB
1618	mfspr	r7, SPRN_FSCR
1619	std	r5, VCPU_IAMR(r9)
1620	stw	r6, VCPU_PSPB(r9)
1621	std	r7, VCPU_FSCR(r9)
1622	mfspr	r5, SPRN_IC
1623	mfspr	r7, SPRN_TAR
1624	std	r5, VCPU_IC(r9)
1625	std	r7, VCPU_TAR(r9)
1626	mfspr	r8, SPRN_EBBHR
1627	std	r8, VCPU_EBBHR(r9)
1628	mfspr	r5, SPRN_EBBRR
1629	mfspr	r6, SPRN_BESCR
1630	mfspr	r7, SPRN_PID
1631	mfspr	r8, SPRN_WORT
1632	std	r5, VCPU_EBBRR(r9)
1633	std	r6, VCPU_BESCR(r9)
1634	stw	r7, VCPU_GUEST_PID(r9)
1635	std	r8, VCPU_WORT(r9)
1636BEGIN_FTR_SECTION
1637	mfspr	r5, SPRN_TCSCR
1638	mfspr	r6, SPRN_ACOP
1639	mfspr	r7, SPRN_CSIGR
1640	mfspr	r8, SPRN_TACR
1641	std	r5, VCPU_TCSCR(r9)
1642	std	r6, VCPU_ACOP(r9)
1643	std	r7, VCPU_CSIGR(r9)
1644	std	r8, VCPU_TACR(r9)
1645FTR_SECTION_ELSE
1646	mfspr	r5, SPRN_TIDR
1647	mfspr	r6, SPRN_PSSCR
1648	std	r5, VCPU_TID(r9)
1649	rldicl	r6, r6, 4, 50		/* r6 &= PSSCR_GUEST_VIS */
1650	rotldi	r6, r6, 60
1651	std	r6, VCPU_PSSCR(r9)
1652	/* Restore host HFSCR value */
1653	ld	r7, STACK_SLOT_HFSCR(r1)
1654	mtspr	SPRN_HFSCR, r7
1655ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
1656	/*
1657	 * Restore various registers to 0, where non-zero values
1658	 * set by the guest could disrupt the host.
1659	 */
1660	li	r0, 0
1661	mtspr	SPRN_PSPB, r0
1662	mtspr	SPRN_WORT, r0
1663BEGIN_FTR_SECTION
1664	mtspr	SPRN_TCSCR, r0
1665	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
1666	li	r0, 1
1667	sldi	r0, r0, 31
1668	mtspr	SPRN_MMCRS, r0
1669END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
1670
1671	/* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
1672	ld	r8, STACK_SLOT_IAMR(r1)
1673	mtspr	SPRN_IAMR, r8
1674
16758:	/* Power7 jumps back in here */
1676	mfspr	r5,SPRN_AMR
1677	mfspr	r6,SPRN_UAMOR
1678	std	r5,VCPU_AMR(r9)
1679	std	r6,VCPU_UAMOR(r9)
1680	ld	r5,STACK_SLOT_AMR(r1)
1681	ld	r6,STACK_SLOT_UAMOR(r1)
1682	mtspr	SPRN_AMR, r5
1683	mtspr	SPRN_UAMOR, r6
1684
1685	/* Switch DSCR back to host value */
1686	mfspr	r8, SPRN_DSCR
1687	ld	r7, HSTATE_DSCR(r13)
1688	std	r8, VCPU_DSCR(r9)
1689	mtspr	SPRN_DSCR, r7
1690
1691	/* Save non-volatile GPRs */
1692	std	r14, VCPU_GPR(R14)(r9)
1693	std	r15, VCPU_GPR(R15)(r9)
1694	std	r16, VCPU_GPR(R16)(r9)
1695	std	r17, VCPU_GPR(R17)(r9)
1696	std	r18, VCPU_GPR(R18)(r9)
1697	std	r19, VCPU_GPR(R19)(r9)
1698	std	r20, VCPU_GPR(R20)(r9)
1699	std	r21, VCPU_GPR(R21)(r9)
1700	std	r22, VCPU_GPR(R22)(r9)
1701	std	r23, VCPU_GPR(R23)(r9)
1702	std	r24, VCPU_GPR(R24)(r9)
1703	std	r25, VCPU_GPR(R25)(r9)
1704	std	r26, VCPU_GPR(R26)(r9)
1705	std	r27, VCPU_GPR(R27)(r9)
1706	std	r28, VCPU_GPR(R28)(r9)
1707	std	r29, VCPU_GPR(R29)(r9)
1708	std	r30, VCPU_GPR(R30)(r9)
1709	std	r31, VCPU_GPR(R31)(r9)
1710
1711	/* Save SPRGs */
1712	mfspr	r3, SPRN_SPRG0
1713	mfspr	r4, SPRN_SPRG1
1714	mfspr	r5, SPRN_SPRG2
1715	mfspr	r6, SPRN_SPRG3
1716	std	r3, VCPU_SPRG0(r9)
1717	std	r4, VCPU_SPRG1(r9)
1718	std	r5, VCPU_SPRG2(r9)
1719	std	r6, VCPU_SPRG3(r9)
1720
1721	/* save FP state */
1722	mr	r3, r9
1723	bl	kvmppc_save_fp
1724
1725#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1726/*
1727 * Branch around the call if both CPU_FTR_TM and
1728 * CPU_FTR_P9_TM_HV_ASSIST are off.
1729 */
1730BEGIN_FTR_SECTION
1731	b	91f
1732END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
1733	/*
1734	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
1735	 */
1736	mr      r3, r9
1737	ld      r4, VCPU_MSR(r3)
1738	li	r5, 0			/* don't preserve non-vol regs */
1739	bl	kvmppc_save_tm_hv
1740	nop
1741	ld	r9, HSTATE_KVM_VCPU(r13)
174291:
1743#endif
1744
1745	/* Increment yield count if they have a VPA */
1746	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */
1747	cmpdi	r8, 0
1748	beq	25f
1749	li	r4, LPPACA_YIELDCOUNT
1750	LWZX_BE	r3, r8, r4
1751	addi	r3, r3, 1
1752	STWX_BE	r3, r8, r4
1753	li	r3, 1
1754	stb	r3, VCPU_VPA_DIRTY(r9)
175525:
1756	/* Save PMU registers if requested */
1757	/* r8 and cr0.eq are live here */
1758	mr	r3, r9
1759	li	r4, 1
1760	beq	21f			/* if no VPA, save PMU stuff anyway */
1761	lbz	r4, LPPACA_PMCINUSE(r8)
176221:	bl	kvmhv_save_guest_pmu
1763	ld	r9, HSTATE_KVM_VCPU(r13)
1764
1765	/* Restore host values of some registers */
1766BEGIN_FTR_SECTION
1767	ld	r5, STACK_SLOT_CIABR(r1)
1768	ld	r6, STACK_SLOT_DAWR(r1)
1769	ld	r7, STACK_SLOT_DAWRX(r1)
1770	mtspr	SPRN_CIABR, r5
1771	/*
1772	 * If the DAWR doesn't work, it's ok to write these here as
1773	 * this value should always be zero
1774	*/
1775	mtspr	SPRN_DAWR0, r6
1776	mtspr	SPRN_DAWRX0, r7
1777END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1778BEGIN_FTR_SECTION
1779	ld	r5, STACK_SLOT_TID(r1)
1780	ld	r6, STACK_SLOT_PSSCR(r1)
1781	ld	r7, STACK_SLOT_PID(r1)
1782	mtspr	SPRN_TIDR, r5
1783	mtspr	SPRN_PSSCR, r6
1784	mtspr	SPRN_PID, r7
1785END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1786
1787#ifdef CONFIG_PPC_RADIX_MMU
1788	/*
1789	 * Are we running hash or radix ?
1790	 */
1791	ld	r5, VCPU_KVM(r9)
1792	lbz	r0, KVM_RADIX(r5)
1793	cmpwi	cr2, r0, 0
1794	beq	cr2, 2f
1795
1796	/*
1797	 * Radix: do eieio; tlbsync; ptesync sequence in case we
1798	 * interrupted the guest between a tlbie and a ptesync.
1799	 */
1800	eieio
1801	tlbsync
1802	ptesync
1803
1804BEGIN_FTR_SECTION
1805	/* Radix: Handle the case where the guest used an illegal PID */
1806	LOAD_REG_ADDR(r4, mmu_base_pid)
1807	lwz	r3, VCPU_GUEST_PID(r9)
1808	lwz	r5, 0(r4)
1809	cmpw	cr0,r3,r5
1810	blt	2f
1811
1812	/*
1813	 * Illegal PID, the HW might have prefetched and cached in the TLB
1814	 * some translations for the  LPID 0 / guest PID combination which
1815	 * Linux doesn't know about, so we need to flush that PID out of
1816	 * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
1817	 * the right context.
1818	*/
1819	li	r0,0
1820	mtspr	SPRN_LPID,r0
1821	isync
1822
1823	/* Then do a congruence class local flush */
1824	ld	r6,VCPU_KVM(r9)
1825	lwz	r0,KVM_TLB_SETS(r6)
1826	mtctr	r0
1827	li	r7,0x400		/* IS field = 0b01 */
1828	ptesync
1829	sldi	r0,r3,32		/* RS has PID */
18301:	PPC_TLBIEL(7,0,2,1,1)		/* RIC=2, PRS=1, R=1 */
1831	addi	r7,r7,0x1000
1832	bdnz	1b
1833	ptesync
1834END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
1835
18362:
1837#endif /* CONFIG_PPC_RADIX_MMU */
1838
1839	/*
1840	 * cp_abort is required if the processor supports local copy-paste
1841	 * to clear the copy buffer that was under control of the guest.
1842	 */
1843BEGIN_FTR_SECTION
1844	PPC_CP_ABORT
1845END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
1846
1847	/*
1848	 * POWER7/POWER8 guest -> host partition switch code.
1849	 * We don't have to lock against tlbies but we do
1850	 * have to coordinate the hardware threads.
1851	 * Here STACK_SLOT_TRAP(r1) contains the trap number.
1852	 */
1853kvmhv_switch_to_host:
1854	/* Secondary threads wait for primary to do partition switch */
1855	ld	r5,HSTATE_KVM_VCORE(r13)
1856	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
1857	lbz	r3,HSTATE_PTID(r13)
1858	cmpwi	r3,0
1859	beq	15f
1860	HMT_LOW
186113:	lbz	r3,VCORE_IN_GUEST(r5)
1862	cmpwi	r3,0
1863	bne	13b
1864	HMT_MEDIUM
1865	b	16f
1866
1867	/* Primary thread waits for all the secondaries to exit guest */
186815:	lwz	r3,VCORE_ENTRY_EXIT(r5)
1869	rlwinm	r0,r3,32-8,0xff
1870	clrldi	r3,r3,56
1871	cmpw	r3,r0
1872	bne	15b
1873	isync
1874
1875	/* Did we actually switch to the guest at all? */
1876	lbz	r6, VCORE_IN_GUEST(r5)
1877	cmpwi	r6, 0
1878	beq	19f
1879
1880	/* Primary thread switches back to host partition */
1881	lwz	r7,KVM_HOST_LPID(r4)
1882BEGIN_FTR_SECTION
1883	ld	r6,KVM_HOST_SDR1(r4)
1884	li	r8,LPID_RSVD		/* switch to reserved LPID */
1885	mtspr	SPRN_LPID,r8
1886	ptesync
1887	mtspr	SPRN_SDR1,r6		/* switch to host page table */
1888END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
1889	mtspr	SPRN_LPID,r7
1890	isync
1891
1892BEGIN_FTR_SECTION
1893	/* DPDES and VTB are shared between threads */
1894	mfspr	r7, SPRN_DPDES
1895	mfspr	r8, SPRN_VTB
1896	std	r7, VCORE_DPDES(r5)
1897	std	r8, VCORE_VTB(r5)
1898	/* clear DPDES so we don't get guest doorbells in the host */
1899	li	r8, 0
1900	mtspr	SPRN_DPDES, r8
1901END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1902
1903	/* Subtract timebase offset from timebase */
1904	ld	r8, VCORE_TB_OFFSET_APPL(r5)
1905	cmpdi	r8,0
1906	beq	17f
1907	li	r0, 0
1908	std	r0, VCORE_TB_OFFSET_APPL(r5)
1909	mftb	r6			/* current guest timebase */
1910	subf	r8,r8,r6
1911	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */
1912	mftb	r7			/* check if lower 24 bits overflowed */
1913	clrldi	r6,r6,40
1914	clrldi	r7,r7,40
1915	cmpld	r7,r6
1916	bge	17f
1917	addis	r8,r8,0x100		/* if so, increment upper 40 bits */
1918	mtspr	SPRN_TBU40,r8
1919
192017:
1921	/*
1922	 * If this is an HMI, we called kvmppc_realmode_hmi_handler
1923	 * above, which may or may not have already called
1924	 * kvmppc_subcore_exit_guest.  Fortunately, all that
1925	 * kvmppc_subcore_exit_guest does is clear a flag, so calling
1926	 * it again here is benign even if kvmppc_realmode_hmi_handler
1927	 * has already called it.
1928	 */
1929	bl	kvmppc_subcore_exit_guest
1930	nop
193130:	ld	r5,HSTATE_KVM_VCORE(r13)
1932	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
1933
1934	/* Reset PCR */
1935	ld	r0, VCORE_PCR(r5)
1936	LOAD_REG_IMMEDIATE(r6, PCR_MASK)
1937	cmpld	r0, r6
1938	beq	18f
1939	mtspr	SPRN_PCR, r6
194018:
1941	/* Signal secondary CPUs to continue */
1942	li	r0, 0
1943	stb	r0,VCORE_IN_GUEST(r5)
194419:	lis	r8,0x7fff		/* MAX_INT@h */
1945	mtspr	SPRN_HDEC,r8
1946
194716:
1948BEGIN_FTR_SECTION
1949	/* On POWER9 with HPT-on-radix we need to wait for all other threads */
1950	ld	r3, HSTATE_SPLIT_MODE(r13)
1951	cmpdi	r3, 0
1952	beq	47f
1953	lwz	r8, KVM_SPLIT_DO_RESTORE(r3)
1954	cmpwi	r8, 0
1955	beq	47f
1956	bl	kvmhv_p9_restore_lpcr
1957	nop
1958	b	48f
195947:
1960END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1961	ld	r8,KVM_HOST_LPCR(r4)
1962	mtspr	SPRN_LPCR,r8
1963	isync
196448:
1965#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1966	/* Finish timing, if we have a vcpu */
1967	ld	r4, HSTATE_KVM_VCPU(r13)
1968	cmpdi	r4, 0
1969	li	r3, 0
1970	beq	2f
1971	bl	kvmhv_accumulate_time
19722:
1973#endif
1974	/* Unset guest mode */
1975	li	r0, KVM_GUEST_MODE_NONE
1976	stb	r0, HSTATE_IN_GUEST(r13)
1977
1978	lwz	r12, STACK_SLOT_TRAP(r1)	/* return trap # in r12 */
1979	ld	r0, SFS+PPC_LR_STKOFF(r1)
1980	addi	r1, r1, SFS
1981	mtlr	r0
1982	blr
1983
1984.balign 32
1985.global kvm_flush_link_stack
1986kvm_flush_link_stack:
1987	/* Save LR into r0 */
1988	mflr	r0
1989
1990	/* Flush the link stack. On Power8 it's up to 32 entries in size. */
1991	.rept 32
1992	bl	.+4
1993	.endr
1994
1995	/* And on Power9 it's up to 64. */
1996BEGIN_FTR_SECTION
1997	.rept 32
1998	bl	.+4
1999	.endr
2000END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2001
2002	/* Restore LR */
2003	mtlr	r0
2004	blr
2005
2006kvmppc_guest_external:
2007	/* External interrupt, first check for host_ipi. If this is
2008	 * set, we know the host wants us out so let's do it now
2009	 */
2010	bl	kvmppc_read_intr
2011
2012	/*
2013	 * Restore the active volatile registers after returning from
2014	 * a C function.
2015	 */
2016	ld	r9, HSTATE_KVM_VCPU(r13)
2017	li	r12, BOOK3S_INTERRUPT_EXTERNAL
2018
2019	/*
2020	 * kvmppc_read_intr return codes:
2021	 *
2022	 * Exit to host (r3 > 0)
2023	 *   1 An interrupt is pending that needs to be handled by the host
2024	 *     Exit guest and return to host by branching to guest_exit_cont
2025	 *
2026	 *   2 Passthrough that needs completion in the host
2027	 *     Exit guest and return to host by branching to guest_exit_cont
2028	 *     However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
2029	 *     to indicate to the host to complete handling the interrupt
2030	 *
2031	 * Before returning to guest, we check if any CPU is heading out
2032	 * to the host and if so, we head out also. If no CPUs are heading
2033	 * check return values <= 0.
2034	 *
2035	 * Return to guest (r3 <= 0)
2036	 *  0 No external interrupt is pending
2037	 * -1 A guest wakeup IPI (which has now been cleared)
2038	 *    In either case, we return to guest to deliver any pending
2039	 *    guest interrupts.
2040	 *
2041	 * -2 A PCI passthrough external interrupt was handled
2042	 *    (interrupt was delivered directly to guest)
2043	 *    Return to guest to deliver any pending guest interrupts.
2044	 */
2045
2046	cmpdi	r3, 1
2047	ble	1f
2048
2049	/* Return code = 2 */
2050	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
2051	stw	r12, VCPU_TRAP(r9)
2052	b	guest_exit_cont
2053
20541:	/* Return code <= 1 */
2055	cmpdi	r3, 0
2056	bgt	guest_exit_cont
2057
2058	/* Return code <= 0 */
2059maybe_reenter_guest:
2060	ld	r5, HSTATE_KVM_VCORE(r13)
2061	lwz	r0, VCORE_ENTRY_EXIT(r5)
2062	cmpwi	r0, 0x100
2063	mr	r4, r9
2064	blt	deliver_guest_interrupt
2065	b	guest_exit_cont
2066
2067#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2068/*
2069 * Softpatch interrupt for transactional memory emulation cases
2070 * on POWER9 DD2.2.  This is early in the guest exit path - we
2071 * haven't saved registers or done a treclaim yet.
2072 */
2073kvmppc_tm_emul:
2074	/* Save instruction image in HEIR */
2075	mfspr	r3, SPRN_HEIR
2076	stw	r3, VCPU_HEIR(r9)
2077
2078	/*
2079	 * The cases we want to handle here are those where the guest
2080	 * is in real suspend mode and is trying to transition to
2081	 * transactional mode.
2082	 */
2083	lbz	r0, HSTATE_FAKE_SUSPEND(r13)
2084	cmpwi	r0, 0		/* keep exiting guest if in fake suspend */
2085	bne	guest_exit_cont
2086	rldicl	r3, r11, 64 - MSR_TS_S_LG, 62
2087	cmpwi	r3, 1		/* or if not in suspend state */
2088	bne	guest_exit_cont
2089
2090	/* Call C code to do the emulation */
2091	mr	r3, r9
2092	bl	kvmhv_p9_tm_emulation_early
2093	nop
2094	ld	r9, HSTATE_KVM_VCPU(r13)
2095	li	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
2096	cmpwi	r3, 0
2097	beq	guest_exit_cont		/* continue exiting if not handled */
2098	ld	r10, VCPU_PC(r9)
2099	ld	r11, VCPU_MSR(r9)
2100	b	fast_interrupt_c_return	/* go back to guest if handled */
2101#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
2102
2103/*
2104 * Check whether an HDSI is an HPTE not found fault or something else.
2105 * If it is an HPTE not found fault that is due to the guest accessing
2106 * a page that they have mapped but which we have paged out, then
2107 * we continue on with the guest exit path.  In all other cases,
2108 * reflect the HDSI to the guest as a DSI.
2109 */
2110kvmppc_hdsi:
2111	ld	r3, VCPU_KVM(r9)
2112	lbz	r0, KVM_RADIX(r3)
2113	mfspr	r4, SPRN_HDAR
2114	mfspr	r6, SPRN_HDSISR
2115BEGIN_FTR_SECTION
2116	/* Look for DSISR canary. If we find it, retry instruction */
2117	cmpdi	r6, 0x7fff
2118	beq	6f
2119END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2120	cmpwi	r0, 0
2121	bne	.Lradix_hdsi		/* on radix, just save DAR/DSISR/ASDR */
2122	/* HPTE not found fault or protection fault? */
2123	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
2124	beq	1f			/* if not, send it to the guest */
2125	andi.	r0, r11, MSR_DR		/* data relocation enabled? */
2126	beq	3f
2127BEGIN_FTR_SECTION
2128	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
2129	b	4f
2130END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2131	clrrdi	r0, r4, 28
2132	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
2133	li	r0, BOOK3S_INTERRUPT_DATA_SEGMENT
2134	bne	7f			/* if no SLB entry found */
21354:	std	r4, VCPU_FAULT_DAR(r9)
2136	stw	r6, VCPU_FAULT_DSISR(r9)
2137
2138	/* Search the hash table. */
2139	mr	r3, r9			/* vcpu pointer */
2140	li	r7, 1			/* data fault */
2141	bl	kvmppc_hpte_hv_fault
2142	ld	r9, HSTATE_KVM_VCPU(r13)
2143	ld	r10, VCPU_PC(r9)
2144	ld	r11, VCPU_MSR(r9)
2145	li	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
2146	cmpdi	r3, 0			/* retry the instruction */
2147	beq	6f
2148	cmpdi	r3, -1			/* handle in kernel mode */
2149	beq	guest_exit_cont
2150	cmpdi	r3, -2			/* MMIO emulation; need instr word */
2151	beq	2f
2152
2153	/* Synthesize a DSI (or DSegI) for the guest */
2154	ld	r4, VCPU_FAULT_DAR(r9)
2155	mr	r6, r3
21561:	li	r0, BOOK3S_INTERRUPT_DATA_STORAGE
2157	mtspr	SPRN_DSISR, r6
21587:	mtspr	SPRN_DAR, r4
2159	mtspr	SPRN_SRR0, r10
2160	mtspr	SPRN_SRR1, r11
2161	mr	r10, r0
2162	bl	kvmppc_msr_interrupt
2163fast_interrupt_c_return:
21646:	ld	r7, VCPU_CTR(r9)
2165	ld	r8, VCPU_XER(r9)
2166	mtctr	r7
2167	mtxer	r8
2168	mr	r4, r9
2169	b	fast_guest_return
2170
21713:	ld	r5, VCPU_KVM(r9)	/* not relocated, use VRMA */
2172	ld	r5, KVM_VRMA_SLB_V(r5)
2173	b	4b
2174
2175	/* If this is for emulated MMIO, load the instruction word */
21762:	li	r8, KVM_INST_FETCH_FAILED	/* In case lwz faults */
2177
2178	/* Set guest mode to 'jump over instruction' so if lwz faults
2179	 * we'll just continue at the next IP. */
2180	li	r0, KVM_GUEST_MODE_SKIP
2181	stb	r0, HSTATE_IN_GUEST(r13)
2182
2183	/* Do the access with MSR:DR enabled */
2184	mfmsr	r3
2185	ori	r4, r3, MSR_DR		/* Enable paging for data */
2186	mtmsrd	r4
2187	lwz	r8, 0(r10)
2188	mtmsrd	r3
2189
2190	/* Store the result */
2191	stw	r8, VCPU_LAST_INST(r9)
2192
2193	/* Unset guest mode. */
2194	li	r0, KVM_GUEST_MODE_HOST_HV
2195	stb	r0, HSTATE_IN_GUEST(r13)
2196	b	guest_exit_cont
2197
2198.Lradix_hdsi:
2199	std	r4, VCPU_FAULT_DAR(r9)
2200	stw	r6, VCPU_FAULT_DSISR(r9)
2201.Lradix_hisi:
2202	mfspr	r5, SPRN_ASDR
2203	std	r5, VCPU_FAULT_GPA(r9)
2204	b	guest_exit_cont
2205
2206/*
2207 * Similarly for an HISI, reflect it to the guest as an ISI unless
2208 * it is an HPTE not found fault for a page that we have paged out.
2209 */
2210kvmppc_hisi:
2211	ld	r3, VCPU_KVM(r9)
2212	lbz	r0, KVM_RADIX(r3)
2213	cmpwi	r0, 0
2214	bne	.Lradix_hisi		/* for radix, just save ASDR */
2215	andis.	r0, r11, SRR1_ISI_NOPT@h
2216	beq	1f
2217	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
2218	beq	3f
2219BEGIN_FTR_SECTION
2220	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
2221	b	4f
2222END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2223	clrrdi	r0, r10, 28
2224	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
2225	li	r0, BOOK3S_INTERRUPT_INST_SEGMENT
2226	bne	7f			/* if no SLB entry found */
22274:
2228	/* Search the hash table. */
2229	mr	r3, r9			/* vcpu pointer */
2230	mr	r4, r10
2231	mr	r6, r11
2232	li	r7, 0			/* instruction fault */
2233	bl	kvmppc_hpte_hv_fault
2234	ld	r9, HSTATE_KVM_VCPU(r13)
2235	ld	r10, VCPU_PC(r9)
2236	ld	r11, VCPU_MSR(r9)
2237	li	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
2238	cmpdi	r3, 0			/* retry the instruction */
2239	beq	fast_interrupt_c_return
2240	cmpdi	r3, -1			/* handle in kernel mode */
2241	beq	guest_exit_cont
2242
2243	/* Synthesize an ISI (or ISegI) for the guest */
2244	mr	r11, r3
22451:	li	r0, BOOK3S_INTERRUPT_INST_STORAGE
22467:	mtspr	SPRN_SRR0, r10
2247	mtspr	SPRN_SRR1, r11
2248	mr	r10, r0
2249	bl	kvmppc_msr_interrupt
2250	b	fast_interrupt_c_return
2251
22523:	ld	r6, VCPU_KVM(r9)	/* not relocated, use VRMA */
2253	ld	r5, KVM_VRMA_SLB_V(r6)
2254	b	4b
2255
2256/*
2257 * Try to handle an hcall in real mode.
2258 * Returns to the guest if we handle it, or continues on up to
2259 * the kernel if we can't (i.e. if we don't have a handler for
2260 * it, or if the handler returns H_TOO_HARD).
2261 *
2262 * r5 - r8 contain hcall args,
2263 * r9 = vcpu, r10 = pc, r11 = msr, r12 = trap, r13 = paca
2264 */
2265hcall_try_real_mode:
2266	ld	r3,VCPU_GPR(R3)(r9)
2267	andi.	r0,r11,MSR_PR
2268	/* sc 1 from userspace - reflect to guest syscall */
2269	bne	sc_1_fast_return
2270	/* sc 1 from nested guest - give it to L1 to handle */
2271	ld	r0, VCPU_NESTED(r9)
2272	cmpdi	r0, 0
2273	bne	guest_exit_cont
2274	clrrdi	r3,r3,2
2275	cmpldi	r3,hcall_real_table_end - hcall_real_table
2276	bge	guest_exit_cont
2277	/* See if this hcall is enabled for in-kernel handling */
2278	ld	r4, VCPU_KVM(r9)
2279	srdi	r0, r3, 8	/* r0 = (r3 / 4) >> 6 */
2280	sldi	r0, r0, 3	/* index into kvm->arch.enabled_hcalls[] */
2281	add	r4, r4, r0
2282	ld	r0, KVM_ENABLED_HCALLS(r4)
2283	rlwinm	r4, r3, 32-2, 0x3f	/* r4 = (r3 / 4) & 0x3f */
2284	srd	r0, r0, r4
2285	andi.	r0, r0, 1
2286	beq	guest_exit_cont
2287	/* Get pointer to handler, if any, and call it */
2288	LOAD_REG_ADDR(r4, hcall_real_table)
2289	lwax	r3,r3,r4
2290	cmpwi	r3,0
2291	beq	guest_exit_cont
2292	add	r12,r3,r4
2293	mtctr	r12
2294	mr	r3,r9		/* get vcpu pointer */
2295	ld	r4,VCPU_GPR(R4)(r9)
2296	bctrl
2297	cmpdi	r3,H_TOO_HARD
2298	beq	hcall_real_fallback
2299	ld	r4,HSTATE_KVM_VCPU(r13)
2300	std	r3,VCPU_GPR(R3)(r4)
2301	ld	r10,VCPU_PC(r4)
2302	ld	r11,VCPU_MSR(r4)
2303	b	fast_guest_return
2304
2305sc_1_fast_return:
2306	mtspr	SPRN_SRR0,r10
2307	mtspr	SPRN_SRR1,r11
2308	li	r10, BOOK3S_INTERRUPT_SYSCALL
2309	bl	kvmppc_msr_interrupt
2310	mr	r4,r9
2311	b	fast_guest_return
2312
2313	/* We've attempted a real mode hcall, but it's punted it back
2314	 * to userspace.  We need to restore some clobbered volatiles
2315	 * before resuming the pass-it-to-qemu path */
2316hcall_real_fallback:
2317	li	r12,BOOK3S_INTERRUPT_SYSCALL
2318	ld	r9, HSTATE_KVM_VCPU(r13)
2319
2320	b	guest_exit_cont
2321
2322	.globl	hcall_real_table
2323hcall_real_table:
2324	.long	0		/* 0 - unused */
2325	.long	DOTSYM(kvmppc_h_remove) - hcall_real_table
2326	.long	DOTSYM(kvmppc_h_enter) - hcall_real_table
2327	.long	DOTSYM(kvmppc_h_read) - hcall_real_table
2328	.long	DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
2329	.long	DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
2330	.long	DOTSYM(kvmppc_h_protect) - hcall_real_table
2331#ifdef CONFIG_SPAPR_TCE_IOMMU
2332	.long	DOTSYM(kvmppc_h_get_tce) - hcall_real_table
2333	.long	DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table
2334#else
2335	.long	0		/* 0x1c */
2336	.long	0		/* 0x20 */
2337#endif
2338	.long	0		/* 0x24 - H_SET_SPRG0 */
2339	.long	DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
2340	.long	DOTSYM(kvmppc_rm_h_page_init) - hcall_real_table
2341	.long	0		/* 0x30 */
2342	.long	0		/* 0x34 */
2343	.long	0		/* 0x38 */
2344	.long	0		/* 0x3c */
2345	.long	0		/* 0x40 */
2346	.long	0		/* 0x44 */
2347	.long	0		/* 0x48 */
2348	.long	0		/* 0x4c */
2349	.long	0		/* 0x50 */
2350	.long	0		/* 0x54 */
2351	.long	0		/* 0x58 */
2352	.long	0		/* 0x5c */
2353	.long	0		/* 0x60 */
2354#ifdef CONFIG_KVM_XICS
2355	.long	DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
2356	.long	DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
2357	.long	DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
2358	.long	DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
2359	.long	DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
2360#else
2361	.long	0		/* 0x64 - H_EOI */
2362	.long	0		/* 0x68 - H_CPPR */
2363	.long	0		/* 0x6c - H_IPI */
2364	.long	0		/* 0x70 - H_IPOLL */
2365	.long	0		/* 0x74 - H_XIRR */
2366#endif
2367	.long	0		/* 0x78 */
2368	.long	0		/* 0x7c */
2369	.long	0		/* 0x80 */
2370	.long	0		/* 0x84 */
2371	.long	0		/* 0x88 */
2372	.long	0		/* 0x8c */
2373	.long	0		/* 0x90 */
2374	.long	0		/* 0x94 */
2375	.long	0		/* 0x98 */
2376	.long	0		/* 0x9c */
2377	.long	0		/* 0xa0 */
2378	.long	0		/* 0xa4 */
2379	.long	0		/* 0xa8 */
2380	.long	0		/* 0xac */
2381	.long	0		/* 0xb0 */
2382	.long	0		/* 0xb4 */
2383	.long	0		/* 0xb8 */
2384	.long	0		/* 0xbc */
2385	.long	0		/* 0xc0 */
2386	.long	0		/* 0xc4 */
2387	.long	0		/* 0xc8 */
2388	.long	0		/* 0xcc */
2389	.long	0		/* 0xd0 */
2390	.long	0		/* 0xd4 */
2391	.long	0		/* 0xd8 */
2392	.long	0		/* 0xdc */
2393	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
2394	.long	DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
2395	.long	0		/* 0xe8 */
2396	.long	0		/* 0xec */
2397	.long	0		/* 0xf0 */
2398	.long	0		/* 0xf4 */
2399	.long	0		/* 0xf8 */
2400	.long	0		/* 0xfc */
2401	.long	0		/* 0x100 */
2402	.long	0		/* 0x104 */
2403	.long	0		/* 0x108 */
2404	.long	0		/* 0x10c */
2405	.long	0		/* 0x110 */
2406	.long	0		/* 0x114 */
2407	.long	0		/* 0x118 */
2408	.long	0		/* 0x11c */
2409	.long	0		/* 0x120 */
2410	.long	DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table
2411	.long	0		/* 0x128 */
2412	.long	0		/* 0x12c */
2413	.long	0		/* 0x130 */
2414	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
2415#ifdef CONFIG_SPAPR_TCE_IOMMU
2416	.long	DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table
2417	.long	DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
2418#else
2419	.long	0		/* 0x138 */
2420	.long	0		/* 0x13c */
2421#endif
2422	.long	0		/* 0x140 */
2423	.long	0		/* 0x144 */
2424	.long	0		/* 0x148 */
2425	.long	0		/* 0x14c */
2426	.long	0		/* 0x150 */
2427	.long	0		/* 0x154 */
2428	.long	0		/* 0x158 */
2429	.long	0		/* 0x15c */
2430	.long	0		/* 0x160 */
2431	.long	0		/* 0x164 */
2432	.long	0		/* 0x168 */
2433	.long	0		/* 0x16c */
2434	.long	0		/* 0x170 */
2435	.long	0		/* 0x174 */
2436	.long	0		/* 0x178 */
2437	.long	0		/* 0x17c */
2438	.long	0		/* 0x180 */
2439	.long	0		/* 0x184 */
2440	.long	0		/* 0x188 */
2441	.long	0		/* 0x18c */
2442	.long	0		/* 0x190 */
2443	.long	0		/* 0x194 */
2444	.long	0		/* 0x198 */
2445	.long	0		/* 0x19c */
2446	.long	0		/* 0x1a0 */
2447	.long	0		/* 0x1a4 */
2448	.long	0		/* 0x1a8 */
2449	.long	0		/* 0x1ac */
2450	.long	0		/* 0x1b0 */
2451	.long	0		/* 0x1b4 */
2452	.long	0		/* 0x1b8 */
2453	.long	0		/* 0x1bc */
2454	.long	0		/* 0x1c0 */
2455	.long	0		/* 0x1c4 */
2456	.long	0		/* 0x1c8 */
2457	.long	0		/* 0x1cc */
2458	.long	0		/* 0x1d0 */
2459	.long	0		/* 0x1d4 */
2460	.long	0		/* 0x1d8 */
2461	.long	0		/* 0x1dc */
2462	.long	0		/* 0x1e0 */
2463	.long	0		/* 0x1e4 */
2464	.long	0		/* 0x1e8 */
2465	.long	0		/* 0x1ec */
2466	.long	0		/* 0x1f0 */
2467	.long	0		/* 0x1f4 */
2468	.long	0		/* 0x1f8 */
2469	.long	0		/* 0x1fc */
2470	.long	0		/* 0x200 */
2471	.long	0		/* 0x204 */
2472	.long	0		/* 0x208 */
2473	.long	0		/* 0x20c */
2474	.long	0		/* 0x210 */
2475	.long	0		/* 0x214 */
2476	.long	0		/* 0x218 */
2477	.long	0		/* 0x21c */
2478	.long	0		/* 0x220 */
2479	.long	0		/* 0x224 */
2480	.long	0		/* 0x228 */
2481	.long	0		/* 0x22c */
2482	.long	0		/* 0x230 */
2483	.long	0		/* 0x234 */
2484	.long	0		/* 0x238 */
2485	.long	0		/* 0x23c */
2486	.long	0		/* 0x240 */
2487	.long	0		/* 0x244 */
2488	.long	0		/* 0x248 */
2489	.long	0		/* 0x24c */
2490	.long	0		/* 0x250 */
2491	.long	0		/* 0x254 */
2492	.long	0		/* 0x258 */
2493	.long	0		/* 0x25c */
2494	.long	0		/* 0x260 */
2495	.long	0		/* 0x264 */
2496	.long	0		/* 0x268 */
2497	.long	0		/* 0x26c */
2498	.long	0		/* 0x270 */
2499	.long	0		/* 0x274 */
2500	.long	0		/* 0x278 */
2501	.long	0		/* 0x27c */
2502	.long	0		/* 0x280 */
2503	.long	0		/* 0x284 */
2504	.long	0		/* 0x288 */
2505	.long	0		/* 0x28c */
2506	.long	0		/* 0x290 */
2507	.long	0		/* 0x294 */
2508	.long	0		/* 0x298 */
2509	.long	0		/* 0x29c */
2510	.long	0		/* 0x2a0 */
2511	.long	0		/* 0x2a4 */
2512	.long	0		/* 0x2a8 */
2513	.long	0		/* 0x2ac */
2514	.long	0		/* 0x2b0 */
2515	.long	0		/* 0x2b4 */
2516	.long	0		/* 0x2b8 */
2517	.long	0		/* 0x2bc */
2518	.long	0		/* 0x2c0 */
2519	.long	0		/* 0x2c4 */
2520	.long	0		/* 0x2c8 */
2521	.long	0		/* 0x2cc */
2522	.long	0		/* 0x2d0 */
2523	.long	0		/* 0x2d4 */
2524	.long	0		/* 0x2d8 */
2525	.long	0		/* 0x2dc */
2526	.long	0		/* 0x2e0 */
2527	.long	0		/* 0x2e4 */
2528	.long	0		/* 0x2e8 */
2529	.long	0		/* 0x2ec */
2530	.long	0		/* 0x2f0 */
2531	.long	0		/* 0x2f4 */
2532	.long	0		/* 0x2f8 */
2533#ifdef CONFIG_KVM_XICS
2534	.long	DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
2535#else
2536	.long	0		/* 0x2fc - H_XIRR_X*/
2537#endif
2538	.long	DOTSYM(kvmppc_h_random) - hcall_real_table
2539	.globl	hcall_real_table_end
2540hcall_real_table_end:
2541
2542_GLOBAL_TOC(kvmppc_h_set_xdabr)
2543EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
2544	andi.	r0, r5, DABRX_USER | DABRX_KERNEL
2545	beq	6f
2546	li	r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
2547	andc.	r0, r5, r0
2548	beq	3f
25496:	li	r3, H_PARAMETER
2550	blr
2551
2552_GLOBAL_TOC(kvmppc_h_set_dabr)
2553EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
2554	li	r5, DABRX_USER | DABRX_KERNEL
25553:
2556BEGIN_FTR_SECTION
2557	b	2f
2558END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2559	std	r4,VCPU_DABR(r3)
2560	stw	r5, VCPU_DABRX(r3)
2561	mtspr	SPRN_DABRX, r5
2562	/* Work around P7 bug where DABR can get corrupted on mtspr */
25631:	mtspr	SPRN_DABR,r4
2564	mfspr	r5, SPRN_DABR
2565	cmpd	r4, r5
2566	bne	1b
2567	isync
2568	li	r3,0
2569	blr
2570
25712:
2572	LOAD_REG_ADDR(r11, dawr_force_enable)
2573	lbz	r11, 0(r11)
2574	cmpdi	r11, 0
2575	bne	3f
2576	li	r3, H_HARDWARE
2577	blr
25783:
2579	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
2580	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
2581	rlwimi	r5, r4, 2, DAWRX_WT
2582	clrrdi	r4, r4, 3
2583	std	r4, VCPU_DAWR(r3)
2584	std	r5, VCPU_DAWRX(r3)
2585	/*
2586	 * If came in through the real mode hcall handler then it is necessary
2587	 * to write the registers since the return path won't. Otherwise it is
2588	 * sufficient to store then in the vcpu struct as they will be loaded
2589	 * next time the vcpu is run.
2590	 */
2591	mfmsr	r6
2592	andi.	r6, r6, MSR_DR		/* in real mode? */
2593	bne	4f
2594	mtspr	SPRN_DAWR0, r4
2595	mtspr	SPRN_DAWRX0, r5
25964:	li	r3, 0
2597	blr
2598
2599_GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
2600	ori	r11,r11,MSR_EE
2601	std	r11,VCPU_MSR(r3)
2602	li	r0,1
2603	stb	r0,VCPU_CEDED(r3)
2604	sync			/* order setting ceded vs. testing prodded */
2605	lbz	r5,VCPU_PRODDED(r3)
2606	cmpwi	r5,0
2607	bne	kvm_cede_prodded
2608	li	r12,0		/* set trap to 0 to say hcall is handled */
2609	stw	r12,VCPU_TRAP(r3)
2610	li	r0,H_SUCCESS
2611	std	r0,VCPU_GPR(R3)(r3)
2612
2613	/*
2614	 * Set our bit in the bitmask of napping threads unless all the
2615	 * other threads are already napping, in which case we send this
2616	 * up to the host.
2617	 */
2618	ld	r5,HSTATE_KVM_VCORE(r13)
2619	lbz	r6,HSTATE_PTID(r13)
2620	lwz	r8,VCORE_ENTRY_EXIT(r5)
2621	clrldi	r8,r8,56
2622	li	r0,1
2623	sld	r0,r0,r6
2624	addi	r6,r5,VCORE_NAPPING_THREADS
262531:	lwarx	r4,0,r6
2626	or	r4,r4,r0
2627	cmpw	r4,r8
2628	beq	kvm_cede_exit
2629	stwcx.	r4,0,r6
2630	bne	31b
2631	/* order napping_threads update vs testing entry_exit_map */
2632	isync
2633	li	r0,NAPPING_CEDE
2634	stb	r0,HSTATE_NAPPING(r13)
2635	lwz	r7,VCORE_ENTRY_EXIT(r5)
2636	cmpwi	r7,0x100
2637	bge	33f		/* another thread already exiting */
2638
2639/*
2640 * Although not specifically required by the architecture, POWER7
2641 * preserves the following registers in nap mode, even if an SMT mode
2642 * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
2643 * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
2644 */
2645	/* Save non-volatile GPRs */
2646	std	r14, VCPU_GPR(R14)(r3)
2647	std	r15, VCPU_GPR(R15)(r3)
2648	std	r16, VCPU_GPR(R16)(r3)
2649	std	r17, VCPU_GPR(R17)(r3)
2650	std	r18, VCPU_GPR(R18)(r3)
2651	std	r19, VCPU_GPR(R19)(r3)
2652	std	r20, VCPU_GPR(R20)(r3)
2653	std	r21, VCPU_GPR(R21)(r3)
2654	std	r22, VCPU_GPR(R22)(r3)
2655	std	r23, VCPU_GPR(R23)(r3)
2656	std	r24, VCPU_GPR(R24)(r3)
2657	std	r25, VCPU_GPR(R25)(r3)
2658	std	r26, VCPU_GPR(R26)(r3)
2659	std	r27, VCPU_GPR(R27)(r3)
2660	std	r28, VCPU_GPR(R28)(r3)
2661	std	r29, VCPU_GPR(R29)(r3)
2662	std	r30, VCPU_GPR(R30)(r3)
2663	std	r31, VCPU_GPR(R31)(r3)
2664
2665	/* save FP state */
2666	bl	kvmppc_save_fp
2667
2668#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2669/*
2670 * Branch around the call if both CPU_FTR_TM and
2671 * CPU_FTR_P9_TM_HV_ASSIST are off.
2672 */
2673BEGIN_FTR_SECTION
2674	b	91f
2675END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
2676	/*
2677	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
2678	 */
2679	ld	r3, HSTATE_KVM_VCPU(r13)
2680	ld      r4, VCPU_MSR(r3)
2681	li	r5, 0			/* don't preserve non-vol regs */
2682	bl	kvmppc_save_tm_hv
2683	nop
268491:
2685#endif
2686
2687	/*
2688	 * Set DEC to the smaller of DEC and HDEC, so that we wake
2689	 * no later than the end of our timeslice (HDEC interrupts
2690	 * don't wake us from nap).
2691	 */
2692	mfspr	r3, SPRN_DEC
2693	mfspr	r4, SPRN_HDEC
2694	mftb	r5
2695BEGIN_FTR_SECTION
2696	/* On P9 check whether the guest has large decrementer mode enabled */
2697	ld	r6, HSTATE_KVM_VCORE(r13)
2698	ld	r6, VCORE_LPCR(r6)
2699	andis.	r6, r6, LPCR_LD@h
2700	bne	68f
2701END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2702	extsw	r3, r3
270368:	EXTEND_HDEC(r4)
2704	cmpd	r3, r4
2705	ble	67f
2706	mtspr	SPRN_DEC, r4
270767:
2708	/* save expiry time of guest decrementer */
2709	add	r3, r3, r5
2710	ld	r4, HSTATE_KVM_VCPU(r13)
2711	ld	r5, HSTATE_KVM_VCORE(r13)
2712	ld	r6, VCORE_TB_OFFSET_APPL(r5)
2713	subf	r3, r6, r3	/* convert to host TB value */
2714	std	r3, VCPU_DEC_EXPIRES(r4)
2715
2716#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
2717	ld	r4, HSTATE_KVM_VCPU(r13)
2718	addi	r3, r4, VCPU_TB_CEDE
2719	bl	kvmhv_accumulate_time
2720#endif
2721
2722	lis	r3, LPCR_PECEDP@h	/* Do wake on privileged doorbell */
2723
2724	/* Go back to host stack */
2725	ld	r1, HSTATE_HOST_R1(r13)
2726
2727	/*
2728	 * Take a nap until a decrementer or external or doobell interrupt
2729	 * occurs, with PECE1 and PECE0 set in LPCR.
2730	 * On POWER8, set PECEDH, and if we are ceding, also set PECEDP.
2731	 * Also clear the runlatch bit before napping.
2732	 */
2733kvm_do_nap:
2734	mfspr	r0, SPRN_CTRLF
2735	clrrdi	r0, r0, 1
2736	mtspr	SPRN_CTRLT, r0
2737
2738	li	r0,1
2739	stb	r0,HSTATE_HWTHREAD_REQ(r13)
2740	mfspr	r5,SPRN_LPCR
2741	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
2742BEGIN_FTR_SECTION
2743	ori	r5, r5, LPCR_PECEDH
2744	rlwimi	r5, r3, 0, LPCR_PECEDP
2745END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2746
2747kvm_nap_sequence:		/* desired LPCR value in r5 */
2748BEGIN_FTR_SECTION
2749	/*
2750	 * PSSCR bits:	exit criterion = 1 (wakeup based on LPCR at sreset)
2751	 *		enable state loss = 1 (allow SMT mode switch)
2752	 *		requested level = 0 (just stop dispatching)
2753	 */
2754	lis	r3, (PSSCR_EC | PSSCR_ESL)@h
2755	/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
2756	li	r4, LPCR_PECE_HVEE@higher
2757	sldi	r4, r4, 32
2758	or	r5, r5, r4
2759FTR_SECTION_ELSE
2760	li	r3, PNV_THREAD_NAP
2761ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
2762	mtspr	SPRN_LPCR,r5
2763	isync
2764
2765BEGIN_FTR_SECTION
2766	bl	isa300_idle_stop_mayloss
2767FTR_SECTION_ELSE
2768	bl	isa206_idle_insn_mayloss
2769ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
2770
2771	mfspr	r0, SPRN_CTRLF
2772	ori	r0, r0, 1
2773	mtspr	SPRN_CTRLT, r0
2774
2775	mtspr	SPRN_SRR1, r3
2776
2777	li	r0, 0
2778	stb	r0, PACA_FTRACE_ENABLED(r13)
2779
2780	li	r0, KVM_HWTHREAD_IN_KVM
2781	stb	r0, HSTATE_HWTHREAD_STATE(r13)
2782
2783	lbz	r0, HSTATE_NAPPING(r13)
2784	cmpwi	r0, NAPPING_CEDE
2785	beq	kvm_end_cede
2786	cmpwi	r0, NAPPING_NOVCPU
2787	beq	kvm_novcpu_wakeup
2788	cmpwi	r0, NAPPING_UNSPLIT
2789	beq	kvm_unsplit_wakeup
2790	twi	31,0,0 /* Nap state must not be zero */
2791
279233:	mr	r4, r3
2793	li	r3, 0
2794	li	r12, 0
2795	b	34f
2796
2797kvm_end_cede:
2798	/* Woken by external or decrementer interrupt */
2799
2800	/* get vcpu pointer */
2801	ld	r4, HSTATE_KVM_VCPU(r13)
2802
2803#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
2804	addi	r3, r4, VCPU_TB_RMINTR
2805	bl	kvmhv_accumulate_time
2806#endif
2807
2808#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2809/*
2810 * Branch around the call if both CPU_FTR_TM and
2811 * CPU_FTR_P9_TM_HV_ASSIST are off.
2812 */
2813BEGIN_FTR_SECTION
2814	b	91f
2815END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
2816	/*
2817	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
2818	 */
2819	mr      r3, r4
2820	ld      r4, VCPU_MSR(r3)
2821	li	r5, 0			/* don't preserve non-vol regs */
2822	bl	kvmppc_restore_tm_hv
2823	nop
2824	ld	r4, HSTATE_KVM_VCPU(r13)
282591:
2826#endif
2827
2828	/* load up FP state */
2829	bl	kvmppc_load_fp
2830
2831	/* Restore guest decrementer */
2832	ld	r3, VCPU_DEC_EXPIRES(r4)
2833	ld	r5, HSTATE_KVM_VCORE(r13)
2834	ld	r6, VCORE_TB_OFFSET_APPL(r5)
2835	add	r3, r3, r6	/* convert host TB to guest TB value */
2836	mftb	r7
2837	subf	r3, r7, r3
2838	mtspr	SPRN_DEC, r3
2839
2840	/* Load NV GPRS */
2841	ld	r14, VCPU_GPR(R14)(r4)
2842	ld	r15, VCPU_GPR(R15)(r4)
2843	ld	r16, VCPU_GPR(R16)(r4)
2844	ld	r17, VCPU_GPR(R17)(r4)
2845	ld	r18, VCPU_GPR(R18)(r4)
2846	ld	r19, VCPU_GPR(R19)(r4)
2847	ld	r20, VCPU_GPR(R20)(r4)
2848	ld	r21, VCPU_GPR(R21)(r4)
2849	ld	r22, VCPU_GPR(R22)(r4)
2850	ld	r23, VCPU_GPR(R23)(r4)
2851	ld	r24, VCPU_GPR(R24)(r4)
2852	ld	r25, VCPU_GPR(R25)(r4)
2853	ld	r26, VCPU_GPR(R26)(r4)
2854	ld	r27, VCPU_GPR(R27)(r4)
2855	ld	r28, VCPU_GPR(R28)(r4)
2856	ld	r29, VCPU_GPR(R29)(r4)
2857	ld	r30, VCPU_GPR(R30)(r4)
2858	ld	r31, VCPU_GPR(R31)(r4)
2859
2860	/* Check the wake reason in SRR1 to see why we got here */
2861	bl	kvmppc_check_wake_reason
2862
2863	/*
2864	 * Restore volatile registers since we could have called a
2865	 * C routine in kvmppc_check_wake_reason
2866	 *	r4 = VCPU
2867	 * r3 tells us whether we need to return to host or not
2868	 * WARNING: it gets checked further down:
2869	 * should not modify r3 until this check is done.
2870	 */
2871	ld	r4, HSTATE_KVM_VCPU(r13)
2872
2873	/* clear our bit in vcore->napping_threads */
287434:	ld	r5,HSTATE_KVM_VCORE(r13)
2875	lbz	r7,HSTATE_PTID(r13)
2876	li	r0,1
2877	sld	r0,r0,r7
2878	addi	r6,r5,VCORE_NAPPING_THREADS
287932:	lwarx	r7,0,r6
2880	andc	r7,r7,r0
2881	stwcx.	r7,0,r6
2882	bne	32b
2883	li	r0,0
2884	stb	r0,HSTATE_NAPPING(r13)
2885
2886	/* See if the wake reason saved in r3 means we need to exit */
2887	stw	r12, VCPU_TRAP(r4)
2888	mr	r9, r4
2889	cmpdi	r3, 0
2890	bgt	guest_exit_cont
2891	b	maybe_reenter_guest
2892
2893	/* cede when already previously prodded case */
2894kvm_cede_prodded:
2895	li	r0,0
2896	stb	r0,VCPU_PRODDED(r3)
2897	sync			/* order testing prodded vs. clearing ceded */
2898	stb	r0,VCPU_CEDED(r3)
2899	li	r3,H_SUCCESS
2900	blr
2901
2902	/* we've ceded but we want to give control to the host */
2903kvm_cede_exit:
2904	ld	r9, HSTATE_KVM_VCPU(r13)
2905#ifdef CONFIG_KVM_XICS
2906	/* are we using XIVE with single escalation? */
2907	ld	r10, VCPU_XIVE_ESC_VADDR(r9)
2908	cmpdi	r10, 0
2909	beq	3f
2910	li	r6, XIVE_ESB_SET_PQ_00
2911	/*
2912	 * If we still have a pending escalation, abort the cede,
2913	 * and we must set PQ to 10 rather than 00 so that we don't
2914	 * potentially end up with two entries for the escalation
2915	 * interrupt in the XIVE interrupt queue.  In that case
2916	 * we also don't want to set xive_esc_on to 1 here in
2917	 * case we race with xive_esc_irq().
2918	 */
2919	lbz	r5, VCPU_XIVE_ESC_ON(r9)
2920	cmpwi	r5, 0
2921	beq	4f
2922	li	r0, 0
2923	stb	r0, VCPU_CEDED(r9)
2924	/*
2925	 * The escalation interrupts are special as we don't EOI them.
2926	 * There is no need to use the load-after-store ordering offset
2927	 * to set PQ to 10 as we won't use StoreEOI.
2928	 */
2929	li	r6, XIVE_ESB_SET_PQ_10
2930	b	5f
29314:	li	r0, 1
2932	stb	r0, VCPU_XIVE_ESC_ON(r9)
2933	/* make sure store to xive_esc_on is seen before xive_esc_irq runs */
2934	sync
29355:	/* Enable XIVE escalation */
2936	mfmsr	r0
2937	andi.	r0, r0, MSR_DR		/* in real mode? */
2938	beq	1f
2939	ldx	r0, r10, r6
2940	b	2f
29411:	ld	r10, VCPU_XIVE_ESC_RADDR(r9)
2942	ldcix	r0, r10, r6
29432:	sync
2944#endif /* CONFIG_KVM_XICS */
29453:	b	guest_exit_cont
2946
2947	/* Try to do machine check recovery in real mode */
2948machine_check_realmode:
2949	mr	r3, r9		/* get vcpu pointer */
2950	bl	kvmppc_realmode_machine_check
2951	nop
2952	/* all machine checks go to virtual mode for further handling */
2953	ld	r9, HSTATE_KVM_VCPU(r13)
2954	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
2955	b	guest_exit_cont
2956
2957/*
2958 * Call C code to handle a HMI in real mode.
2959 * Only the primary thread does the call, secondary threads are handled
2960 * by calling hmi_exception_realmode() after kvmppc_hv_entry returns.
2961 * r9 points to the vcpu on entry
2962 */
2963hmi_realmode:
2964	lbz	r0, HSTATE_PTID(r13)
2965	cmpwi	r0, 0
2966	bne	guest_exit_cont
2967	bl	kvmppc_realmode_hmi_handler
2968	ld	r9, HSTATE_KVM_VCPU(r13)
2969	li	r12, BOOK3S_INTERRUPT_HMI
2970	b	guest_exit_cont
2971
2972/*
2973 * Check the reason we woke from nap, and take appropriate action.
2974 * Returns (in r3):
2975 *	0 if nothing needs to be done
2976 *	1 if something happened that needs to be handled by the host
2977 *	-1 if there was a guest wakeup (IPI or msgsnd)
2978 *	-2 if we handled a PCI passthrough interrupt (returned by
2979 *		kvmppc_read_intr only)
2980 *
2981 * Also sets r12 to the interrupt vector for any interrupt that needs
2982 * to be handled now by the host (0x500 for external interrupt), or zero.
2983 * Modifies all volatile registers (since it may call a C function).
2984 * This routine calls kvmppc_read_intr, a C function, if an external
2985 * interrupt is pending.
2986 */
2987kvmppc_check_wake_reason:
2988	mfspr	r6, SPRN_SRR1
2989BEGIN_FTR_SECTION
2990	rlwinm	r6, r6, 45-31, 0xf	/* extract wake reason field (P8) */
2991FTR_SECTION_ELSE
2992	rlwinm	r6, r6, 45-31, 0xe	/* P7 wake reason field is 3 bits */
2993ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
2994	cmpwi	r6, 8			/* was it an external interrupt? */
2995	beq	7f			/* if so, see what it was */
2996	li	r3, 0
2997	li	r12, 0
2998	cmpwi	r6, 6			/* was it the decrementer? */
2999	beq	0f
3000BEGIN_FTR_SECTION
3001	cmpwi	r6, 5			/* privileged doorbell? */
3002	beq	0f
3003	cmpwi	r6, 3			/* hypervisor doorbell? */
3004	beq	3f
3005END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3006	cmpwi	r6, 0xa			/* Hypervisor maintenance ? */
3007	beq	4f
3008	li	r3, 1			/* anything else, return 1 */
30090:	blr
3010
3011	/* hypervisor doorbell */
30123:	li	r12, BOOK3S_INTERRUPT_H_DOORBELL
3013
3014	/*
3015	 * Clear the doorbell as we will invoke the handler
3016	 * explicitly in the guest exit path.
3017	 */
3018	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
3019	PPC_MSGCLR(6)
3020	/* see if it's a host IPI */
3021	li	r3, 1
3022BEGIN_FTR_SECTION
3023	PPC_MSGSYNC
3024	lwsync
3025END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
3026	lbz	r0, HSTATE_HOST_IPI(r13)
3027	cmpwi	r0, 0
3028	bnelr
3029	/* if not, return -1 */
3030	li	r3, -1
3031	blr
3032
3033	/* Woken up due to Hypervisor maintenance interrupt */
30344:	li	r12, BOOK3S_INTERRUPT_HMI
3035	li	r3, 1
3036	blr
3037
3038	/* external interrupt - create a stack frame so we can call C */
30397:	mflr	r0
3040	std	r0, PPC_LR_STKOFF(r1)
3041	stdu	r1, -PPC_MIN_STKFRM(r1)
3042	bl	kvmppc_read_intr
3043	nop
3044	li	r12, BOOK3S_INTERRUPT_EXTERNAL
3045	cmpdi	r3, 1
3046	ble	1f
3047
3048	/*
3049	 * Return code of 2 means PCI passthrough interrupt, but
3050	 * we need to return back to host to complete handling the
3051	 * interrupt. Trap reason is expected in r12 by guest
3052	 * exit code.
3053	 */
3054	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
30551:
3056	ld	r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
3057	addi	r1, r1, PPC_MIN_STKFRM
3058	mtlr	r0
3059	blr
3060
3061/*
3062 * Save away FP, VMX and VSX registers.
3063 * r3 = vcpu pointer
3064 * N.B. r30 and r31 are volatile across this function,
3065 * thus it is not callable from C.
3066 */
3067kvmppc_save_fp:
3068	mflr	r30
3069	mr	r31,r3
3070	mfmsr	r5
3071	ori	r8,r5,MSR_FP
3072#ifdef CONFIG_ALTIVEC
3073BEGIN_FTR_SECTION
3074	oris	r8,r8,MSR_VEC@h
3075END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3076#endif
3077#ifdef CONFIG_VSX
3078BEGIN_FTR_SECTION
3079	oris	r8,r8,MSR_VSX@h
3080END_FTR_SECTION_IFSET(CPU_FTR_VSX)
3081#endif
3082	mtmsrd	r8
3083	addi	r3,r3,VCPU_FPRS
3084	bl	store_fp_state
3085#ifdef CONFIG_ALTIVEC
3086BEGIN_FTR_SECTION
3087	addi	r3,r31,VCPU_VRS
3088	bl	store_vr_state
3089END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3090#endif
3091	mfspr	r6,SPRN_VRSAVE
3092	stw	r6,VCPU_VRSAVE(r31)
3093	mtlr	r30
3094	blr
3095
3096/*
3097 * Load up FP, VMX and VSX registers
3098 * r4 = vcpu pointer
3099 * N.B. r30 and r31 are volatile across this function,
3100 * thus it is not callable from C.
3101 */
3102kvmppc_load_fp:
3103	mflr	r30
3104	mr	r31,r4
3105	mfmsr	r9
3106	ori	r8,r9,MSR_FP
3107#ifdef CONFIG_ALTIVEC
3108BEGIN_FTR_SECTION
3109	oris	r8,r8,MSR_VEC@h
3110END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3111#endif
3112#ifdef CONFIG_VSX
3113BEGIN_FTR_SECTION
3114	oris	r8,r8,MSR_VSX@h
3115END_FTR_SECTION_IFSET(CPU_FTR_VSX)
3116#endif
3117	mtmsrd	r8
3118	addi	r3,r4,VCPU_FPRS
3119	bl	load_fp_state
3120#ifdef CONFIG_ALTIVEC
3121BEGIN_FTR_SECTION
3122	addi	r3,r31,VCPU_VRS
3123	bl	load_vr_state
3124END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3125#endif
3126	lwz	r7,VCPU_VRSAVE(r31)
3127	mtspr	SPRN_VRSAVE,r7
3128	mtlr	r30
3129	mr	r4,r31
3130	blr
3131
3132#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
3133/*
3134 * Save transactional state and TM-related registers.
3135 * Called with r3 pointing to the vcpu struct and r4 containing
3136 * the guest MSR value.
3137 * r5 is non-zero iff non-volatile register state needs to be maintained.
3138 * If r5 == 0, this can modify all checkpointed registers, but
3139 * restores r1 and r2 before exit.
3140 */
3141_GLOBAL_TOC(kvmppc_save_tm_hv)
3142EXPORT_SYMBOL_GPL(kvmppc_save_tm_hv)
3143	/* See if we need to handle fake suspend mode */
3144BEGIN_FTR_SECTION
3145	b	__kvmppc_save_tm
3146END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
3147
3148	lbz	r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
3149	cmpwi	r0, 0
3150	beq	__kvmppc_save_tm
3151
3152	/* The following code handles the fake_suspend = 1 case */
3153	mflr	r0
3154	std	r0, PPC_LR_STKOFF(r1)
3155	stdu	r1, -TM_FRAME_SIZE(r1)
3156
3157	/* Turn on TM. */
3158	mfmsr	r8
3159	li	r0, 1
3160	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
3161	mtmsrd	r8
3162
3163	rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
3164	beq	4f
3165BEGIN_FTR_SECTION
3166	bl	pnv_power9_force_smt4_catch
3167END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
3168	nop
3169
3170	/*
3171	 * It's possible that treclaim. may modify registers, if we have lost
3172	 * track of fake-suspend state in the guest due to it using rfscv.
3173	 * Save and restore registers in case this occurs.
3174	 */
3175	mfspr	r3, SPRN_DSCR
3176	mfspr	r4, SPRN_XER
3177	mfspr	r5, SPRN_AMR
3178	/* SPRN_TAR would need to be saved here if the kernel ever used it */
3179	mfcr	r12
3180	SAVE_NVGPRS(r1)
3181	SAVE_GPR(2, r1)
3182	SAVE_GPR(3, r1)
3183	SAVE_GPR(4, r1)
3184	SAVE_GPR(5, r1)
3185	stw	r12, 8(r1)
3186	std	r1, HSTATE_HOST_R1(r13)
3187
3188	/* We have to treclaim here because that's the only way to do S->N */
3189	li	r3, TM_CAUSE_KVM_RESCHED
3190	TRECLAIM(R3)
3191
3192	GET_PACA(r13)
3193	ld	r1, HSTATE_HOST_R1(r13)
3194	REST_GPR(2, r1)
3195	REST_GPR(3, r1)
3196	REST_GPR(4, r1)
3197	REST_GPR(5, r1)
3198	lwz	r12, 8(r1)
3199	REST_NVGPRS(r1)
3200	mtspr	SPRN_DSCR, r3
3201	mtspr	SPRN_XER, r4
3202	mtspr	SPRN_AMR, r5
3203	mtcr	r12
3204	HMT_MEDIUM
3205
3206	/*
3207	 * We were in fake suspend, so we are not going to save the
3208	 * register state as the guest checkpointed state (since
3209	 * we already have it), therefore we can now use any volatile GPR.
3210	 * In fact treclaim in fake suspend state doesn't modify
3211	 * any registers.
3212	 */
3213
3214BEGIN_FTR_SECTION
3215	bl	pnv_power9_force_smt4_release
3216END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
3217	nop
3218
32194:
3220	mfspr	r3, SPRN_PSSCR
3221	/* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */
3222	li	r0, PSSCR_FAKE_SUSPEND
3223	andc	r3, r3, r0
3224	mtspr	SPRN_PSSCR, r3
3225
3226	/* Don't save TEXASR, use value from last exit in real suspend state */
3227	ld	r9, HSTATE_KVM_VCPU(r13)
3228	mfspr	r5, SPRN_TFHAR
3229	mfspr	r6, SPRN_TFIAR
3230	std	r5, VCPU_TFHAR(r9)
3231	std	r6, VCPU_TFIAR(r9)
3232
3233	addi	r1, r1, TM_FRAME_SIZE
3234	ld	r0, PPC_LR_STKOFF(r1)
3235	mtlr	r0
3236	blr
3237
3238/*
3239 * Restore transactional state and TM-related registers.
3240 * Called with r3 pointing to the vcpu struct
3241 * and r4 containing the guest MSR value.
3242 * r5 is non-zero iff non-volatile register state needs to be maintained.
3243 * This potentially modifies all checkpointed registers.
3244 * It restores r1 and r2 from the PACA.
3245 */
3246_GLOBAL_TOC(kvmppc_restore_tm_hv)
3247EXPORT_SYMBOL_GPL(kvmppc_restore_tm_hv)
3248	/*
3249	 * If we are doing TM emulation for the guest on a POWER9 DD2,
3250	 * then we don't actually do a trechkpt -- we either set up
3251	 * fake-suspend mode, or emulate a TM rollback.
3252	 */
3253BEGIN_FTR_SECTION
3254	b	__kvmppc_restore_tm
3255END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
3256	mflr	r0
3257	std	r0, PPC_LR_STKOFF(r1)
3258
3259	li	r0, 0
3260	stb	r0, HSTATE_FAKE_SUSPEND(r13)
3261
3262	/* Turn on TM so we can restore TM SPRs */
3263	mfmsr	r5
3264	li	r0, 1
3265	rldimi	r5, r0, MSR_TM_LG, 63-MSR_TM_LG
3266	mtmsrd	r5
3267
3268	/*
3269	 * The user may change these outside of a transaction, so they must
3270	 * always be context switched.
3271	 */
3272	ld	r5, VCPU_TFHAR(r3)
3273	ld	r6, VCPU_TFIAR(r3)
3274	ld	r7, VCPU_TEXASR(r3)
3275	mtspr	SPRN_TFHAR, r5
3276	mtspr	SPRN_TFIAR, r6
3277	mtspr	SPRN_TEXASR, r7
3278
3279	rldicl. r5, r4, 64 - MSR_TS_S_LG, 62
3280	beqlr		/* TM not active in guest */
3281
3282	/* Make sure the failure summary is set */
3283	oris	r7, r7, (TEXASR_FS)@h
3284	mtspr	SPRN_TEXASR, r7
3285
3286	cmpwi	r5, 1		/* check for suspended state */
3287	bgt	10f
3288	stb	r5, HSTATE_FAKE_SUSPEND(r13)
3289	b	9f		/* and return */
329010:	stdu	r1, -PPC_MIN_STKFRM(r1)
3291	/* guest is in transactional state, so simulate rollback */
3292	bl	kvmhv_emulate_tm_rollback
3293	nop
3294	addi	r1, r1, PPC_MIN_STKFRM
32959:	ld	r0, PPC_LR_STKOFF(r1)
3296	mtlr	r0
3297	blr
3298#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
3299
3300/*
3301 * We come here if we get any exception or interrupt while we are
3302 * executing host real mode code while in guest MMU context.
3303 * r12 is (CR << 32) | vector
3304 * r13 points to our PACA
3305 * r12 is saved in HSTATE_SCRATCH0(r13)
3306 * r9 is saved in HSTATE_SCRATCH2(r13)
3307 * r13 is saved in HSPRG1
3308 * cfar is saved in HSTATE_CFAR(r13)
3309 * ppr is saved in HSTATE_PPR(r13)
3310 */
3311kvmppc_bad_host_intr:
3312	/*
3313	 * Switch to the emergency stack, but start half-way down in
3314	 * case we were already on it.
3315	 */
3316	mr	r9, r1
3317	std	r1, PACAR1(r13)
3318	ld	r1, PACAEMERGSP(r13)
3319	subi	r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE
3320	std	r9, 0(r1)
3321	std	r0, GPR0(r1)
3322	std	r9, GPR1(r1)
3323	std	r2, GPR2(r1)
3324	SAVE_4GPRS(3, r1)
3325	SAVE_2GPRS(7, r1)
3326	srdi	r0, r12, 32
3327	clrldi	r12, r12, 32
3328	std	r0, _CCR(r1)
3329	std	r12, _TRAP(r1)
3330	andi.	r0, r12, 2
3331	beq	1f
3332	mfspr	r3, SPRN_HSRR0
3333	mfspr	r4, SPRN_HSRR1
3334	mfspr	r5, SPRN_HDAR
3335	mfspr	r6, SPRN_HDSISR
3336	b	2f
33371:	mfspr	r3, SPRN_SRR0
3338	mfspr	r4, SPRN_SRR1
3339	mfspr	r5, SPRN_DAR
3340	mfspr	r6, SPRN_DSISR
33412:	std	r3, _NIP(r1)
3342	std	r4, _MSR(r1)
3343	std	r5, _DAR(r1)
3344	std	r6, _DSISR(r1)
3345	ld	r9, HSTATE_SCRATCH2(r13)
3346	ld	r12, HSTATE_SCRATCH0(r13)
3347	GET_SCRATCH0(r0)
3348	SAVE_4GPRS(9, r1)
3349	std	r0, GPR13(r1)
3350	SAVE_NVGPRS(r1)
3351	ld	r5, HSTATE_CFAR(r13)
3352	std	r5, ORIG_GPR3(r1)
3353	mflr	r3
3354	mfctr	r4
3355	mfxer	r5
3356	lbz	r6, PACAIRQSOFTMASK(r13)
3357	std	r3, _LINK(r1)
3358	std	r4, _CTR(r1)
3359	std	r5, _XER(r1)
3360	std	r6, SOFTE(r1)
3361	ld	r2, PACATOC(r13)
3362	LOAD_REG_IMMEDIATE(3, 0x7265677368657265)
3363	std	r3, STACK_FRAME_OVERHEAD-16(r1)
3364
3365	/*
3366	 * On POWER9 do a minimal restore of the MMU and call C code,
3367	 * which will print a message and panic.
3368	 * XXX On POWER7 and POWER8, we just spin here since we don't
3369	 * know what the other threads are doing (and we don't want to
3370	 * coordinate with them) - but at least we now have register state
3371	 * in memory that we might be able to look at from another CPU.
3372	 */
3373BEGIN_FTR_SECTION
3374	b	.
3375END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
3376	ld	r9, HSTATE_KVM_VCPU(r13)
3377	ld	r10, VCPU_KVM(r9)
3378
3379	li	r0, 0
3380	mtspr	SPRN_AMR, r0
3381	mtspr	SPRN_IAMR, r0
3382	mtspr	SPRN_CIABR, r0
3383	mtspr	SPRN_DAWRX0, r0
3384
3385BEGIN_MMU_FTR_SECTION
3386	b	4f
3387END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
3388
3389	slbmte	r0, r0
3390	slbia
3391	ptesync
3392	ld	r8, PACA_SLBSHADOWPTR(r13)
3393	.rept	SLB_NUM_BOLTED
3394	li	r3, SLBSHADOW_SAVEAREA
3395	LDX_BE	r5, r8, r3
3396	addi	r3, r3, 8
3397	LDX_BE	r6, r8, r3
3398	andis.	r7, r5, SLB_ESID_V@h
3399	beq	3f
3400	slbmte	r6, r5
34013:	addi	r8, r8, 16
3402	.endr
3403
34044:	lwz	r7, KVM_HOST_LPID(r10)
3405	mtspr	SPRN_LPID, r7
3406	mtspr	SPRN_PID, r0
3407	ld	r8, KVM_HOST_LPCR(r10)
3408	mtspr	SPRN_LPCR, r8
3409	isync
3410	li	r0, KVM_GUEST_MODE_NONE
3411	stb	r0, HSTATE_IN_GUEST(r13)
3412
3413	/*
3414	 * Turn on the MMU and jump to C code
3415	 */
3416	bcl	20, 31, .+4
34175:	mflr	r3
3418	addi	r3, r3, 9f - 5b
3419	li	r4, -1
3420	rldimi	r3, r4, 62, 0	/* ensure 0xc000000000000000 bits are set */
3421	ld	r4, PACAKMSR(r13)
3422	mtspr	SPRN_SRR0, r3
3423	mtspr	SPRN_SRR1, r4
3424	RFI_TO_KERNEL
34259:	addi	r3, r1, STACK_FRAME_OVERHEAD
3426	bl	kvmppc_bad_interrupt
3427	b	9b
3428
3429/*
3430 * This mimics the MSR transition on IRQ delivery.  The new guest MSR is taken
3431 * from VCPU_INTR_MSR and is modified based on the required TM state changes.
3432 *   r11 has the guest MSR value (in/out)
3433 *   r9 has a vcpu pointer (in)
3434 *   r0 is used as a scratch register
3435 */
3436kvmppc_msr_interrupt:
3437	rldicl	r0, r11, 64 - MSR_TS_S_LG, 62
3438	cmpwi	r0, 2 /* Check if we are in transactional state..  */
3439	ld	r11, VCPU_INTR_MSR(r9)
3440	bne	1f
3441	/* ... if transactional, change to suspended */
3442	li	r0, 1
34431:	rldimi	r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
3444	blr
3445
3446/*
3447 * Load up guest PMU state.  R3 points to the vcpu struct.
3448 */
3449_GLOBAL(kvmhv_load_guest_pmu)
3450EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
3451	mr	r4, r3
3452	mflr	r0
3453	li	r3, 1
3454	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
3455	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
3456	isync
3457BEGIN_FTR_SECTION
3458	ld	r3, VCPU_MMCR(r4)
3459	andi.	r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
3460	cmpwi	r5, MMCR0_PMAO
3461	beql	kvmppc_fix_pmao
3462END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
3463	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
3464	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
3465	lwz	r6, VCPU_PMC + 8(r4)
3466	lwz	r7, VCPU_PMC + 12(r4)
3467	lwz	r8, VCPU_PMC + 16(r4)
3468	lwz	r9, VCPU_PMC + 20(r4)
3469	mtspr	SPRN_PMC1, r3
3470	mtspr	SPRN_PMC2, r5
3471	mtspr	SPRN_PMC3, r6
3472	mtspr	SPRN_PMC4, r7
3473	mtspr	SPRN_PMC5, r8
3474	mtspr	SPRN_PMC6, r9
3475	ld	r3, VCPU_MMCR(r4)
3476	ld	r5, VCPU_MMCR + 8(r4)
3477	ld	r6, VCPU_MMCRA(r4)
3478	ld	r7, VCPU_SIAR(r4)
3479	ld	r8, VCPU_SDAR(r4)
3480	mtspr	SPRN_MMCR1, r5
3481	mtspr	SPRN_MMCRA, r6
3482	mtspr	SPRN_SIAR, r7
3483	mtspr	SPRN_SDAR, r8
3484BEGIN_FTR_SECTION
3485	ld      r5, VCPU_MMCR + 24(r4)
3486	ld      r6, VCPU_SIER + 8(r4)
3487	ld      r7, VCPU_SIER + 16(r4)
3488	mtspr   SPRN_MMCR3, r5
3489	mtspr   SPRN_SIER2, r6
3490	mtspr   SPRN_SIER3, r7
3491END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
3492BEGIN_FTR_SECTION
3493	ld	r5, VCPU_MMCR + 16(r4)
3494	ld	r6, VCPU_SIER(r4)
3495	mtspr	SPRN_MMCR2, r5
3496	mtspr	SPRN_SIER, r6
3497BEGIN_FTR_SECTION_NESTED(96)
3498	lwz	r7, VCPU_PMC + 24(r4)
3499	lwz	r8, VCPU_PMC + 28(r4)
3500	ld	r9, VCPU_MMCRS(r4)
3501	mtspr	SPRN_SPMC1, r7
3502	mtspr	SPRN_SPMC2, r8
3503	mtspr	SPRN_MMCRS, r9
3504END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
3505END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3506	mtspr	SPRN_MMCR0, r3
3507	isync
3508	mtlr	r0
3509	blr
3510
3511/*
3512 * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
3513 */
3514_GLOBAL(kvmhv_load_host_pmu)
3515EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
3516	mflr	r0
3517	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
3518	cmpwi	r4, 0
3519	beq	23f			/* skip if not */
3520BEGIN_FTR_SECTION
3521	ld	r3, HSTATE_MMCR0(r13)
3522	andi.	r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
3523	cmpwi	r4, MMCR0_PMAO
3524	beql	kvmppc_fix_pmao
3525END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
3526	lwz	r3, HSTATE_PMC1(r13)
3527	lwz	r4, HSTATE_PMC2(r13)
3528	lwz	r5, HSTATE_PMC3(r13)
3529	lwz	r6, HSTATE_PMC4(r13)
3530	lwz	r8, HSTATE_PMC5(r13)
3531	lwz	r9, HSTATE_PMC6(r13)
3532	mtspr	SPRN_PMC1, r3
3533	mtspr	SPRN_PMC2, r4
3534	mtspr	SPRN_PMC3, r5
3535	mtspr	SPRN_PMC4, r6
3536	mtspr	SPRN_PMC5, r8
3537	mtspr	SPRN_PMC6, r9
3538	ld	r3, HSTATE_MMCR0(r13)
3539	ld	r4, HSTATE_MMCR1(r13)
3540	ld	r5, HSTATE_MMCRA(r13)
3541	ld	r6, HSTATE_SIAR(r13)
3542	ld	r7, HSTATE_SDAR(r13)
3543	mtspr	SPRN_MMCR1, r4
3544	mtspr	SPRN_MMCRA, r5
3545	mtspr	SPRN_SIAR, r6
3546	mtspr	SPRN_SDAR, r7
3547BEGIN_FTR_SECTION
3548	ld	r8, HSTATE_MMCR2(r13)
3549	ld	r9, HSTATE_SIER(r13)
3550	mtspr	SPRN_MMCR2, r8
3551	mtspr	SPRN_SIER, r9
3552END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3553BEGIN_FTR_SECTION
3554	ld      r5, HSTATE_MMCR3(r13)
3555	ld      r6, HSTATE_SIER2(r13)
3556	ld      r7, HSTATE_SIER3(r13)
3557	mtspr   SPRN_MMCR3, r5
3558	mtspr   SPRN_SIER2, r6
3559	mtspr   SPRN_SIER3, r7
3560END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
3561	mtspr	SPRN_MMCR0, r3
3562	isync
3563	mtlr	r0
356423:	blr
3565
3566/*
3567 * Save guest PMU state into the vcpu struct.
3568 * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
3569 */
3570_GLOBAL(kvmhv_save_guest_pmu)
3571EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
3572	mr	r9, r3
3573	mr	r8, r4
3574BEGIN_FTR_SECTION
3575	/*
3576	 * POWER8 seems to have a hardware bug where setting
3577	 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
3578	 * when some counters are already negative doesn't seem
3579	 * to cause a performance monitor alert (and hence interrupt).
3580	 * The effect of this is that when saving the PMU state,
3581	 * if there is no PMU alert pending when we read MMCR0
3582	 * before freezing the counters, but one becomes pending
3583	 * before we read the counters, we lose it.
3584	 * To work around this, we need a way to freeze the counters
3585	 * before reading MMCR0.  Normally, freezing the counters
3586	 * is done by writing MMCR0 (to set MMCR0[FC]) which
3587	 * unavoidably writes MMCR0[PMA0] as well.  On POWER8,
3588	 * we can also freeze the counters using MMCR2, by writing
3589	 * 1s to all the counter freeze condition bits (there are
3590	 * 9 bits each for 6 counters).
3591	 */
3592	li	r3, -1			/* set all freeze bits */
3593	clrrdi	r3, r3, 10
3594	mfspr	r10, SPRN_MMCR2
3595	mtspr	SPRN_MMCR2, r3
3596	isync
3597END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3598	li	r3, 1
3599	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
3600	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
3601	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
3602	mfspr	r6, SPRN_MMCRA
3603	/* Clear MMCRA in order to disable SDAR updates */
3604	li	r7, 0
3605	mtspr	SPRN_MMCRA, r7
3606	isync
3607	cmpwi	r8, 0			/* did they ask for PMU stuff to be saved? */
3608	bne	21f
3609	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */
3610	b	22f
361121:	mfspr	r5, SPRN_MMCR1
3612	mfspr	r7, SPRN_SIAR
3613	mfspr	r8, SPRN_SDAR
3614	std	r4, VCPU_MMCR(r9)
3615	std	r5, VCPU_MMCR + 8(r9)
3616	std	r6, VCPU_MMCRA(r9)
3617BEGIN_FTR_SECTION
3618	std	r10, VCPU_MMCR + 16(r9)
3619END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3620BEGIN_FTR_SECTION
3621	mfspr   r5, SPRN_MMCR3
3622	mfspr   r6, SPRN_SIER2
3623	mfspr   r7, SPRN_SIER3
3624	std     r5, VCPU_MMCR + 24(r9)
3625	std     r6, VCPU_SIER + 8(r9)
3626	std     r7, VCPU_SIER + 16(r9)
3627END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
3628	std	r7, VCPU_SIAR(r9)
3629	std	r8, VCPU_SDAR(r9)
3630	mfspr	r3, SPRN_PMC1
3631	mfspr	r4, SPRN_PMC2
3632	mfspr	r5, SPRN_PMC3
3633	mfspr	r6, SPRN_PMC4
3634	mfspr	r7, SPRN_PMC5
3635	mfspr	r8, SPRN_PMC6
3636	stw	r3, VCPU_PMC(r9)
3637	stw	r4, VCPU_PMC + 4(r9)
3638	stw	r5, VCPU_PMC + 8(r9)
3639	stw	r6, VCPU_PMC + 12(r9)
3640	stw	r7, VCPU_PMC + 16(r9)
3641	stw	r8, VCPU_PMC + 20(r9)
3642BEGIN_FTR_SECTION
3643	mfspr	r5, SPRN_SIER
3644	std	r5, VCPU_SIER(r9)
3645BEGIN_FTR_SECTION_NESTED(96)
3646	mfspr	r6, SPRN_SPMC1
3647	mfspr	r7, SPRN_SPMC2
3648	mfspr	r8, SPRN_MMCRS
3649	stw	r6, VCPU_PMC + 24(r9)
3650	stw	r7, VCPU_PMC + 28(r9)
3651	std	r8, VCPU_MMCRS(r9)
3652	lis	r4, 0x8000
3653	mtspr	SPRN_MMCRS, r4
3654END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
3655END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
365622:	blr
3657
3658/*
3659 * This works around a hardware bug on POWER8E processors, where
3660 * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
3661 * performance monitor interrupt.  Instead, when we need to have
3662 * an interrupt pending, we have to arrange for a counter to overflow.
3663 */
3664kvmppc_fix_pmao:
3665	li	r3, 0
3666	mtspr	SPRN_MMCR2, r3
3667	lis	r3, (MMCR0_PMXE | MMCR0_FCECE)@h
3668	ori	r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN
3669	mtspr	SPRN_MMCR0, r3
3670	lis	r3, 0x7fff
3671	ori	r3, r3, 0xffff
3672	mtspr	SPRN_PMC6, r3
3673	isync
3674	blr
3675
3676#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
3677/*
3678 * Start timing an activity
3679 * r3 = pointer to time accumulation struct, r4 = vcpu
3680 */
3681kvmhv_start_timing:
3682	ld	r5, HSTATE_KVM_VCORE(r13)
3683	ld	r6, VCORE_TB_OFFSET_APPL(r5)
3684	mftb	r5
3685	subf	r5, r6, r5	/* subtract current timebase offset */
3686	std	r3, VCPU_CUR_ACTIVITY(r4)
3687	std	r5, VCPU_ACTIVITY_START(r4)
3688	blr
3689
3690/*
3691 * Accumulate time to one activity and start another.
3692 * r3 = pointer to new time accumulation struct, r4 = vcpu
3693 */
3694kvmhv_accumulate_time:
3695	ld	r5, HSTATE_KVM_VCORE(r13)
3696	ld	r8, VCORE_TB_OFFSET_APPL(r5)
3697	ld	r5, VCPU_CUR_ACTIVITY(r4)
3698	ld	r6, VCPU_ACTIVITY_START(r4)
3699	std	r3, VCPU_CUR_ACTIVITY(r4)
3700	mftb	r7
3701	subf	r7, r8, r7	/* subtract current timebase offset */
3702	std	r7, VCPU_ACTIVITY_START(r4)
3703	cmpdi	r5, 0
3704	beqlr
3705	subf	r3, r6, r7
3706	ld	r8, TAS_SEQCOUNT(r5)
3707	cmpdi	r8, 0
3708	addi	r8, r8, 1
3709	std	r8, TAS_SEQCOUNT(r5)
3710	lwsync
3711	ld	r7, TAS_TOTAL(r5)
3712	add	r7, r7, r3
3713	std	r7, TAS_TOTAL(r5)
3714	ld	r6, TAS_MIN(r5)
3715	ld	r7, TAS_MAX(r5)
3716	beq	3f
3717	cmpd	r3, r6
3718	bge	1f
37193:	std	r3, TAS_MIN(r5)
37201:	cmpd	r3, r7
3721	ble	2f
3722	std	r3, TAS_MAX(r5)
37232:	lwsync
3724	addi	r8, r8, 1
3725	std	r8, TAS_SEQCOUNT(r5)
3726	blr
3727#endif
3728