1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Paravirtualization interfaces
3 Copyright (C) 2006 Rusty Russell IBM Corporation
4
5
6 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
7 */
8
9 #include <linux/errno.h>
10 #include <linux/init.h>
11 #include <linux/export.h>
12 #include <linux/efi.h>
13 #include <linux/bcd.h>
14 #include <linux/highmem.h>
15 #include <linux/kprobes.h>
16 #include <linux/pgtable.h>
17
18 #include <asm/bug.h>
19 #include <asm/paravirt.h>
20 #include <asm/debugreg.h>
21 #include <asm/desc.h>
22 #include <asm/setup.h>
23 #include <asm/time.h>
24 #include <asm/pgalloc.h>
25 #include <asm/irq.h>
26 #include <asm/delay.h>
27 #include <asm/fixmap.h>
28 #include <asm/apic.h>
29 #include <asm/tlbflush.h>
30 #include <asm/timer.h>
31 #include <asm/special_insns.h>
32 #include <asm/tlb.h>
33 #include <asm/io_bitmap.h>
34
35 /*
36 * nop stub, which must not clobber anything *including the stack* to
37 * avoid confusing the entry prologues.
38 */
39 extern void _paravirt_nop(void);
40 asm (".pushsection .entry.text, \"ax\"\n"
41 ".global _paravirt_nop\n"
42 "_paravirt_nop:\n\t"
43 ASM_RET
44 ".size _paravirt_nop, . - _paravirt_nop\n\t"
45 ".type _paravirt_nop, @function\n\t"
46 ".popsection");
47
default_banner(void)48 void __init default_banner(void)
49 {
50 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
51 pv_info.name);
52 }
53
54 /* Undefined instruction for dealing with missing ops pointers. */
55 static const unsigned char ud2a[] = { 0x0f, 0x0b };
56
57 struct branch {
58 unsigned char opcode;
59 u32 delta;
60 } __attribute__((packed));
61
paravirt_patch_call(void * insn_buff,const void * target,unsigned long addr,unsigned len)62 static unsigned paravirt_patch_call(void *insn_buff, const void *target,
63 unsigned long addr, unsigned len)
64 {
65 __text_gen_insn(insn_buff, CALL_INSN_OPCODE,
66 (void *)addr, target, CALL_INSN_SIZE);
67 return CALL_INSN_SIZE;
68 }
69
70 #ifdef CONFIG_PARAVIRT_XXL
71 /* identity function, which can be inlined */
_paravirt_ident_64(u64 x)72 u64 notrace _paravirt_ident_64(u64 x)
73 {
74 return x;
75 }
76
paravirt_patch_jmp(void * insn_buff,const void * target,unsigned long addr,unsigned len)77 static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
78 unsigned long addr, unsigned len)
79 {
80 struct branch *b = insn_buff;
81 unsigned long delta = (unsigned long)target - (addr+5);
82
83 if (len < 5) {
84 #ifdef CONFIG_RETPOLINE
85 WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
86 #endif
87 return len; /* call too long for patch site */
88 }
89
90 b->opcode = 0xe9; /* jmp */
91 b->delta = delta;
92
93 return 5;
94 }
95 #endif
96
97 DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
98
native_pv_lock_init(void)99 void __init native_pv_lock_init(void)
100 {
101 if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
102 static_branch_disable(&virt_spin_lock_key);
103 }
104
paravirt_patch_default(u8 type,void * insn_buff,unsigned long addr,unsigned len)105 unsigned paravirt_patch_default(u8 type, void *insn_buff,
106 unsigned long addr, unsigned len)
107 {
108 /*
109 * Neat trick to map patch type back to the call within the
110 * corresponding structure.
111 */
112 void *opfunc = *((void **)&pv_ops + type);
113 unsigned ret;
114
115 if (opfunc == NULL)
116 /* If there's no function, patch it with a ud2a (BUG) */
117 ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a));
118 else if (opfunc == _paravirt_nop)
119 ret = 0;
120
121 #ifdef CONFIG_PARAVIRT_XXL
122 /* identity functions just return their single argument */
123 else if (opfunc == _paravirt_ident_64)
124 ret = paravirt_patch_ident_64(insn_buff, len);
125
126 else if (type == PARAVIRT_PATCH(cpu.iret) ||
127 type == PARAVIRT_PATCH(cpu.usergs_sysret64))
128 /* If operation requires a jmp, then jmp */
129 ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
130 #endif
131 else
132 /* Otherwise call the function. */
133 ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
134
135 return ret;
136 }
137
paravirt_patch_insns(void * insn_buff,unsigned len,const char * start,const char * end)138 unsigned paravirt_patch_insns(void *insn_buff, unsigned len,
139 const char *start, const char *end)
140 {
141 unsigned insn_len = end - start;
142
143 /* Alternative instruction is too large for the patch site and we cannot continue: */
144 BUG_ON(insn_len > len || start == NULL);
145
146 memcpy(insn_buff, start, insn_len);
147
148 return insn_len;
149 }
150
151 struct static_key paravirt_steal_enabled;
152 struct static_key paravirt_steal_rq_enabled;
153
native_steal_clock(int cpu)154 static u64 native_steal_clock(int cpu)
155 {
156 return 0;
157 }
158
159 /* These are in entry.S */
160 extern void native_iret(void);
161 extern void native_usergs_sysret64(void);
162
163 static struct resource reserve_ioports = {
164 .start = 0,
165 .end = IO_SPACE_LIMIT,
166 .name = "paravirt-ioport",
167 .flags = IORESOURCE_IO | IORESOURCE_BUSY,
168 };
169
170 /*
171 * Reserve the whole legacy IO space to prevent any legacy drivers
172 * from wasting time probing for their hardware. This is a fairly
173 * brute-force approach to disabling all non-virtual drivers.
174 *
175 * Note that this must be called very early to have any effect.
176 */
paravirt_disable_iospace(void)177 int paravirt_disable_iospace(void)
178 {
179 return request_resource(&ioport_resource, &reserve_ioports);
180 }
181
182 static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
183
enter_lazy(enum paravirt_lazy_mode mode)184 static inline void enter_lazy(enum paravirt_lazy_mode mode)
185 {
186 BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
187
188 this_cpu_write(paravirt_lazy_mode, mode);
189 }
190
leave_lazy(enum paravirt_lazy_mode mode)191 static void leave_lazy(enum paravirt_lazy_mode mode)
192 {
193 BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
194
195 this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
196 }
197
paravirt_enter_lazy_mmu(void)198 void paravirt_enter_lazy_mmu(void)
199 {
200 enter_lazy(PARAVIRT_LAZY_MMU);
201 }
202
paravirt_leave_lazy_mmu(void)203 void paravirt_leave_lazy_mmu(void)
204 {
205 leave_lazy(PARAVIRT_LAZY_MMU);
206 }
207
paravirt_flush_lazy_mmu(void)208 void paravirt_flush_lazy_mmu(void)
209 {
210 preempt_disable();
211
212 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
213 arch_leave_lazy_mmu_mode();
214 arch_enter_lazy_mmu_mode();
215 }
216
217 preempt_enable();
218 }
219
220 #ifdef CONFIG_PARAVIRT_XXL
paravirt_start_context_switch(struct task_struct * prev)221 void paravirt_start_context_switch(struct task_struct *prev)
222 {
223 BUG_ON(preemptible());
224
225 if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
226 arch_leave_lazy_mmu_mode();
227 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
228 }
229 enter_lazy(PARAVIRT_LAZY_CPU);
230 }
231
paravirt_end_context_switch(struct task_struct * next)232 void paravirt_end_context_switch(struct task_struct *next)
233 {
234 BUG_ON(preemptible());
235
236 leave_lazy(PARAVIRT_LAZY_CPU);
237
238 if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
239 arch_enter_lazy_mmu_mode();
240 }
241 #endif
242
paravirt_get_lazy_mode(void)243 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
244 {
245 if (in_interrupt())
246 return PARAVIRT_LAZY_NONE;
247
248 return this_cpu_read(paravirt_lazy_mode);
249 }
250
251 struct pv_info pv_info = {
252 .name = "bare hardware",
253 #ifdef CONFIG_PARAVIRT_XXL
254 .extra_user_64bit_cs = __USER_CS,
255 #endif
256 };
257
258 /* 64-bit pagetable entries */
259 #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
260
261 struct paravirt_patch_template pv_ops = {
262 /* Init ops. */
263 .init.patch = native_patch,
264
265 /* Time ops. */
266 .time.sched_clock = native_sched_clock,
267 .time.steal_clock = native_steal_clock,
268
269 /* Cpu ops. */
270 .cpu.io_delay = native_io_delay,
271
272 #ifdef CONFIG_PARAVIRT_XXL
273 .cpu.cpuid = native_cpuid,
274 .cpu.get_debugreg = native_get_debugreg,
275 .cpu.set_debugreg = native_set_debugreg,
276 .cpu.read_cr0 = native_read_cr0,
277 .cpu.write_cr0 = native_write_cr0,
278 .cpu.write_cr4 = native_write_cr4,
279 .cpu.wbinvd = native_wbinvd,
280 .cpu.read_msr = native_read_msr,
281 .cpu.write_msr = native_write_msr,
282 .cpu.read_msr_safe = native_read_msr_safe,
283 .cpu.write_msr_safe = native_write_msr_safe,
284 .cpu.read_pmc = native_read_pmc,
285 .cpu.load_tr_desc = native_load_tr_desc,
286 .cpu.set_ldt = native_set_ldt,
287 .cpu.load_gdt = native_load_gdt,
288 .cpu.load_idt = native_load_idt,
289 .cpu.store_tr = native_store_tr,
290 .cpu.load_tls = native_load_tls,
291 .cpu.load_gs_index = native_load_gs_index,
292 .cpu.write_ldt_entry = native_write_ldt_entry,
293 .cpu.write_gdt_entry = native_write_gdt_entry,
294 .cpu.write_idt_entry = native_write_idt_entry,
295
296 .cpu.alloc_ldt = paravirt_nop,
297 .cpu.free_ldt = paravirt_nop,
298
299 .cpu.load_sp0 = native_load_sp0,
300
301 .cpu.usergs_sysret64 = native_usergs_sysret64,
302 .cpu.iret = native_iret,
303
304 #ifdef CONFIG_X86_IOPL_IOPERM
305 .cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap,
306 .cpu.update_io_bitmap = native_tss_update_io_bitmap,
307 #endif
308
309 .cpu.start_context_switch = paravirt_nop,
310 .cpu.end_context_switch = paravirt_nop,
311
312 /* Irq ops. */
313 .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
314 .irq.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
315 .irq.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
316 .irq.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
317 .irq.safe_halt = native_safe_halt,
318 .irq.halt = native_halt,
319 #endif /* CONFIG_PARAVIRT_XXL */
320
321 /* Mmu ops. */
322 .mmu.flush_tlb_user = native_flush_tlb_local,
323 .mmu.flush_tlb_kernel = native_flush_tlb_global,
324 .mmu.flush_tlb_one_user = native_flush_tlb_one_user,
325 .mmu.flush_tlb_others = native_flush_tlb_others,
326 .mmu.tlb_remove_table =
327 (void (*)(struct mmu_gather *, void *))tlb_remove_page,
328
329 .mmu.exit_mmap = paravirt_nop,
330
331 #ifdef CONFIG_PARAVIRT_XXL
332 .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2),
333 .mmu.write_cr2 = native_write_cr2,
334 .mmu.read_cr3 = __native_read_cr3,
335 .mmu.write_cr3 = native_write_cr3,
336
337 .mmu.pgd_alloc = __paravirt_pgd_alloc,
338 .mmu.pgd_free = paravirt_nop,
339
340 .mmu.alloc_pte = paravirt_nop,
341 .mmu.alloc_pmd = paravirt_nop,
342 .mmu.alloc_pud = paravirt_nop,
343 .mmu.alloc_p4d = paravirt_nop,
344 .mmu.release_pte = paravirt_nop,
345 .mmu.release_pmd = paravirt_nop,
346 .mmu.release_pud = paravirt_nop,
347 .mmu.release_p4d = paravirt_nop,
348
349 .mmu.set_pte = native_set_pte,
350 .mmu.set_pmd = native_set_pmd,
351
352 .mmu.ptep_modify_prot_start = __ptep_modify_prot_start,
353 .mmu.ptep_modify_prot_commit = __ptep_modify_prot_commit,
354
355 .mmu.set_pud = native_set_pud,
356
357 .mmu.pmd_val = PTE_IDENT,
358 .mmu.make_pmd = PTE_IDENT,
359
360 .mmu.pud_val = PTE_IDENT,
361 .mmu.make_pud = PTE_IDENT,
362
363 .mmu.set_p4d = native_set_p4d,
364
365 #if CONFIG_PGTABLE_LEVELS >= 5
366 .mmu.p4d_val = PTE_IDENT,
367 .mmu.make_p4d = PTE_IDENT,
368
369 .mmu.set_pgd = native_set_pgd,
370 #endif /* CONFIG_PGTABLE_LEVELS >= 5 */
371
372 .mmu.pte_val = PTE_IDENT,
373 .mmu.pgd_val = PTE_IDENT,
374
375 .mmu.make_pte = PTE_IDENT,
376 .mmu.make_pgd = PTE_IDENT,
377
378 .mmu.dup_mmap = paravirt_nop,
379 .mmu.activate_mm = paravirt_nop,
380
381 .mmu.lazy_mode = {
382 .enter = paravirt_nop,
383 .leave = paravirt_nop,
384 .flush = paravirt_nop,
385 },
386
387 .mmu.set_fixmap = native_set_fixmap,
388 #endif /* CONFIG_PARAVIRT_XXL */
389
390 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
391 /* Lock ops. */
392 #ifdef CONFIG_SMP
393 .lock.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
394 .lock.queued_spin_unlock =
395 PV_CALLEE_SAVE(__native_queued_spin_unlock),
396 .lock.wait = paravirt_nop,
397 .lock.kick = paravirt_nop,
398 .lock.vcpu_is_preempted =
399 PV_CALLEE_SAVE(__native_vcpu_is_preempted),
400 #endif /* SMP */
401 #endif
402 };
403
404 #ifdef CONFIG_PARAVIRT_XXL
405 /* At this point, native_get/set_debugreg has real function entries */
406 NOKPROBE_SYMBOL(native_get_debugreg);
407 NOKPROBE_SYMBOL(native_set_debugreg);
408 NOKPROBE_SYMBOL(native_load_idt);
409 #endif
410
411 EXPORT_SYMBOL(pv_ops);
412 EXPORT_SYMBOL_GPL(pv_info);
413