1 /*
2 * QEMU KVM support
3 *
4 * Copyright (C) 2006-2008 Qumranet Technologies
5 * Copyright IBM, Corp. 2008
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 *
10 * This work is licensed under the terms of the GNU GPL, version 2 or later.
11 * See the COPYING file in the top-level directory.
12 *
13 */
14
15 #include <sys/types.h>
16 #include <sys/ioctl.h>
17 #include <sys/mman.h>
18
19 #undef __user
20 #define __xuser /* nothing */
21 #include <linux/kvm.h>
22
23 #include "qemu-common.h"
24 #include "sysemu.h"
25 #include "kvm.h"
26 #include "cpu.h"
27 #include "gdbstub.h"
28
29 #ifdef CONFIG_KVM_GS_RESTORE
30 #include "kvm-gs-restore.h"
31 #endif
32
33 //#define DEBUG_KVM
34
35 #ifdef DEBUG_KVM
36 #define dprintf(fmt, ...) \
37 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
38 #else
39 #define dprintf(fmt, ...) \
40 do { } while (0)
41 #endif
42
43 #ifdef KVM_CAP_EXT_CPUID
44
try_get_cpuid(KVMState * s,int max)45 static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
46 {
47 struct kvm_cpuid2 *cpuid;
48 int r, size;
49
50 size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
51 cpuid = (struct kvm_cpuid2 *)qemu_mallocz(size);
52 cpuid->nent = max;
53 r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
54 if (r == 0 && cpuid->nent >= max) {
55 r = -E2BIG;
56 }
57 if (r < 0) {
58 if (r == -E2BIG) {
59 qemu_free(cpuid);
60 return NULL;
61 } else {
62 fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
63 strerror(-r));
64 exit(1);
65 }
66 }
67 return cpuid;
68 }
69
kvm_arch_get_supported_cpuid(CPUState * env,uint32_t function,int reg)70 uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
71 {
72 struct kvm_cpuid2 *cpuid;
73 int i, max;
74 uint32_t ret = 0;
75 uint32_t cpuid_1_edx;
76
77 if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) {
78 return -1U;
79 }
80
81 max = 1;
82 while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
83 max *= 2;
84 }
85
86 for (i = 0; i < cpuid->nent; ++i) {
87 if (cpuid->entries[i].function == function) {
88 switch (reg) {
89 case R_EAX:
90 ret = cpuid->entries[i].eax;
91 break;
92 case R_EBX:
93 ret = cpuid->entries[i].ebx;
94 break;
95 case R_ECX:
96 ret = cpuid->entries[i].ecx;
97 break;
98 case R_EDX:
99 ret = cpuid->entries[i].edx;
100 if (function == 0x80000001) {
101 /* On Intel, kvm returns cpuid according to the Intel spec,
102 * so add missing bits according to the AMD spec:
103 */
104 cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, R_EDX);
105 ret |= cpuid_1_edx & 0xdfeff7ff;
106 }
107 break;
108 }
109 }
110 }
111
112 qemu_free(cpuid);
113
114 return ret;
115 }
116
117 #else
118
kvm_arch_get_supported_cpuid(CPUState * env,uint32_t function,int reg)119 uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
120 {
121 return -1U;
122 }
123
124 #endif
125
126 #ifndef KVM_MP_STATE_RUNNABLE
127 #define KVM_MP_STATE_RUNNABLE 0
128 #endif
129
kvm_arch_init_vcpu(CPUState * env)130 int kvm_arch_init_vcpu(CPUState *env)
131 {
132 struct {
133 struct kvm_cpuid2 cpuid;
134 struct kvm_cpuid_entry2 entries[100];
135 } __attribute__((packed)) cpuid_data;
136 uint32_t limit, i, j, cpuid_i;
137 uint32_t unused;
138
139 env->mp_state = KVM_MP_STATE_RUNNABLE;
140
141 cpuid_i = 0;
142
143 cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
144
145 for (i = 0; i <= limit; i++) {
146 struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
147
148 switch (i) {
149 case 2: {
150 /* Keep reading function 2 till all the input is received */
151 int times;
152
153 c->function = i;
154 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
155 KVM_CPUID_FLAG_STATE_READ_NEXT;
156 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
157 times = c->eax & 0xff;
158
159 for (j = 1; j < times; ++j) {
160 c = &cpuid_data.entries[cpuid_i++];
161 c->function = i;
162 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
163 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
164 }
165 break;
166 }
167 case 4:
168 case 0xb:
169 case 0xd:
170 for (j = 0; ; j++) {
171 c->function = i;
172 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
173 c->index = j;
174 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
175
176 if (i == 4 && c->eax == 0)
177 break;
178 if (i == 0xb && !(c->ecx & 0xff00))
179 break;
180 if (i == 0xd && c->eax == 0)
181 break;
182
183 c = &cpuid_data.entries[cpuid_i++];
184 }
185 break;
186 default:
187 c->function = i;
188 c->flags = 0;
189 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
190 break;
191 }
192 }
193 cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
194
195 for (i = 0x80000000; i <= limit; i++) {
196 struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
197
198 c->function = i;
199 c->flags = 0;
200 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
201 }
202
203 cpuid_data.cpuid.nent = cpuid_i;
204
205 return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
206 }
207
kvm_has_msr_star(CPUState * env)208 static int kvm_has_msr_star(CPUState *env)
209 {
210 static int has_msr_star;
211 int ret;
212
213 /* first time */
214 if (has_msr_star == 0) {
215 struct kvm_msr_list msr_list, *kvm_msr_list;
216
217 has_msr_star = -1;
218
219 /* Obtain MSR list from KVM. These are the MSRs that we must
220 * save/restore */
221 msr_list.nmsrs = 0;
222 ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
223 if (ret < 0)
224 return 0;
225
226 kvm_msr_list = qemu_mallocz(sizeof(msr_list) +
227 msr_list.nmsrs * sizeof(msr_list.indices[0]));
228
229 kvm_msr_list->nmsrs = msr_list.nmsrs;
230 ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
231 if (ret >= 0) {
232 int i;
233
234 for (i = 0; i < kvm_msr_list->nmsrs; i++) {
235 if (kvm_msr_list->indices[i] == MSR_STAR) {
236 has_msr_star = 1;
237 break;
238 }
239 }
240 }
241
242 free(kvm_msr_list);
243 }
244
245 if (has_msr_star == 1)
246 return 1;
247 return 0;
248 }
249
kvm_arch_init(KVMState * s,int smp_cpus)250 int kvm_arch_init(KVMState *s, int smp_cpus)
251 {
252 int ret;
253
254 /* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code
255 * directly. In order to use vm86 mode, a TSS is needed. Since this
256 * must be part of guest physical memory, we need to allocate it. Older
257 * versions of KVM just assumed that it would be at the end of physical
258 * memory but that doesn't work with more than 4GB of memory. We simply
259 * refuse to work with those older versions of KVM. */
260 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
261 if (ret <= 0) {
262 fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
263 return ret;
264 }
265
266 /* this address is 3 pages before the bios, and the bios should present
267 * as unavaible memory. FIXME, need to ensure the e820 map deals with
268 * this?
269 */
270 return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
271 }
272
set_v8086_seg(struct kvm_segment * lhs,const SegmentCache * rhs)273 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
274 {
275 lhs->selector = rhs->selector;
276 lhs->base = rhs->base;
277 lhs->limit = rhs->limit;
278 lhs->type = 3;
279 lhs->present = 1;
280 lhs->dpl = 3;
281 lhs->db = 0;
282 lhs->s = 1;
283 lhs->l = 0;
284 lhs->g = 0;
285 lhs->avl = 0;
286 lhs->unusable = 0;
287 }
288
set_seg(struct kvm_segment * lhs,const SegmentCache * rhs)289 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
290 {
291 unsigned flags = rhs->flags;
292 lhs->selector = rhs->selector;
293 lhs->base = rhs->base;
294 lhs->limit = rhs->limit;
295 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
296 lhs->present = (flags & DESC_P_MASK) != 0;
297 lhs->dpl = rhs->selector & 3;
298 lhs->db = (flags >> DESC_B_SHIFT) & 1;
299 lhs->s = (flags & DESC_S_MASK) != 0;
300 lhs->l = (flags >> DESC_L_SHIFT) & 1;
301 lhs->g = (flags & DESC_G_MASK) != 0;
302 lhs->avl = (flags & DESC_AVL_MASK) != 0;
303 lhs->unusable = 0;
304 }
305
get_seg(SegmentCache * lhs,const struct kvm_segment * rhs)306 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
307 {
308 lhs->selector = rhs->selector;
309 lhs->base = rhs->base;
310 lhs->limit = rhs->limit;
311 lhs->flags =
312 (rhs->type << DESC_TYPE_SHIFT)
313 | (rhs->present * DESC_P_MASK)
314 | (rhs->dpl << DESC_DPL_SHIFT)
315 | (rhs->db << DESC_B_SHIFT)
316 | (rhs->s * DESC_S_MASK)
317 | (rhs->l << DESC_L_SHIFT)
318 | (rhs->g * DESC_G_MASK)
319 | (rhs->avl * DESC_AVL_MASK);
320 }
321
kvm_getput_reg(__u64 * kvm_reg,target_ulong * qemu_reg,int set)322 static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
323 {
324 if (set)
325 *kvm_reg = *qemu_reg;
326 else
327 *qemu_reg = *kvm_reg;
328 }
329
kvm_getput_regs(CPUState * env,int set)330 static int kvm_getput_regs(CPUState *env, int set)
331 {
332 struct kvm_regs regs;
333 int ret = 0;
334
335 if (!set) {
336 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, ®s);
337 if (ret < 0)
338 return ret;
339 }
340
341 kvm_getput_reg(®s.rax, &env->regs[R_EAX], set);
342 kvm_getput_reg(®s.rbx, &env->regs[R_EBX], set);
343 kvm_getput_reg(®s.rcx, &env->regs[R_ECX], set);
344 kvm_getput_reg(®s.rdx, &env->regs[R_EDX], set);
345 kvm_getput_reg(®s.rsi, &env->regs[R_ESI], set);
346 kvm_getput_reg(®s.rdi, &env->regs[R_EDI], set);
347 kvm_getput_reg(®s.rsp, &env->regs[R_ESP], set);
348 kvm_getput_reg(®s.rbp, &env->regs[R_EBP], set);
349 #ifdef TARGET_X86_64
350 kvm_getput_reg(®s.r8, &env->regs[8], set);
351 kvm_getput_reg(®s.r9, &env->regs[9], set);
352 kvm_getput_reg(®s.r10, &env->regs[10], set);
353 kvm_getput_reg(®s.r11, &env->regs[11], set);
354 kvm_getput_reg(®s.r12, &env->regs[12], set);
355 kvm_getput_reg(®s.r13, &env->regs[13], set);
356 kvm_getput_reg(®s.r14, &env->regs[14], set);
357 kvm_getput_reg(®s.r15, &env->regs[15], set);
358 #endif
359
360 kvm_getput_reg(®s.rflags, &env->eflags, set);
361 kvm_getput_reg(®s.rip, &env->eip, set);
362
363 if (set)
364 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, ®s);
365
366 return ret;
367 }
368
kvm_put_fpu(CPUState * env)369 static int kvm_put_fpu(CPUState *env)
370 {
371 struct kvm_fpu fpu;
372 int i;
373
374 memset(&fpu, 0, sizeof fpu);
375 fpu.fsw = env->fpus & ~(7 << 11);
376 fpu.fsw |= (env->fpstt & 7) << 11;
377 fpu.fcw = env->fpuc;
378 for (i = 0; i < 8; ++i)
379 fpu.ftwx |= (!env->fptags[i]) << i;
380 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
381 memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
382 fpu.mxcsr = env->mxcsr;
383
384 return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu);
385 }
386
kvm_put_sregs(CPUState * env)387 static int kvm_put_sregs(CPUState *env)
388 {
389 struct kvm_sregs sregs;
390
391 memcpy(sregs.interrupt_bitmap,
392 env->interrupt_bitmap,
393 sizeof(sregs.interrupt_bitmap));
394
395 if ((env->eflags & VM_MASK)) {
396 set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
397 set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
398 set_v8086_seg(&sregs.es, &env->segs[R_ES]);
399 set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
400 set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
401 set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
402 } else {
403 set_seg(&sregs.cs, &env->segs[R_CS]);
404 set_seg(&sregs.ds, &env->segs[R_DS]);
405 set_seg(&sregs.es, &env->segs[R_ES]);
406 set_seg(&sregs.fs, &env->segs[R_FS]);
407 set_seg(&sregs.gs, &env->segs[R_GS]);
408 set_seg(&sregs.ss, &env->segs[R_SS]);
409
410 if (env->cr[0] & CR0_PE_MASK) {
411 /* force ss cpl to cs cpl */
412 sregs.ss.selector = (sregs.ss.selector & ~3) |
413 (sregs.cs.selector & 3);
414 sregs.ss.dpl = sregs.ss.selector & 3;
415 }
416 }
417
418 set_seg(&sregs.tr, &env->tr);
419 set_seg(&sregs.ldt, &env->ldt);
420
421 sregs.idt.limit = env->idt.limit;
422 sregs.idt.base = env->idt.base;
423 sregs.gdt.limit = env->gdt.limit;
424 sregs.gdt.base = env->gdt.base;
425
426 sregs.cr0 = env->cr[0];
427 sregs.cr2 = env->cr[2];
428 sregs.cr3 = env->cr[3];
429 sregs.cr4 = env->cr[4];
430
431 sregs.cr8 = cpu_get_apic_tpr(env);
432 sregs.apic_base = cpu_get_apic_base(env);
433
434 sregs.efer = env->efer;
435
436 return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
437 }
438
kvm_msr_entry_set(struct kvm_msr_entry * entry,uint32_t index,uint64_t value)439 static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
440 uint32_t index, uint64_t value)
441 {
442 entry->index = index;
443 entry->data = value;
444 }
445
kvm_put_msrs(CPUState * env)446 static int kvm_put_msrs(CPUState *env)
447 {
448 struct {
449 struct kvm_msrs info;
450 struct kvm_msr_entry entries[100];
451 } msr_data;
452 struct kvm_msr_entry *msrs = msr_data.entries;
453 int n = 0;
454
455 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
456 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
457 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
458 if (kvm_has_msr_star(env))
459 kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
460 kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
461 #ifdef TARGET_X86_64
462 /* FIXME if lm capable */
463 kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
464 kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
465 kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
466 kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
467 #endif
468 msr_data.info.nmsrs = n;
469
470 return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
471
472 }
473
474
kvm_get_fpu(CPUState * env)475 static int kvm_get_fpu(CPUState *env)
476 {
477 struct kvm_fpu fpu;
478 int i, ret;
479
480 ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu);
481 if (ret < 0)
482 return ret;
483
484 env->fpstt = (fpu.fsw >> 11) & 7;
485 env->fpus = fpu.fsw;
486 env->fpuc = fpu.fcw;
487 for (i = 0; i < 8; ++i)
488 env->fptags[i] = !((fpu.ftwx >> i) & 1);
489 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
490 memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
491 env->mxcsr = fpu.mxcsr;
492
493 return 0;
494 }
495
kvm_get_sregs(CPUState * env)496 static int kvm_get_sregs(CPUState *env)
497 {
498 struct kvm_sregs sregs;
499 uint32_t hflags;
500 int ret;
501
502 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
503 if (ret < 0)
504 return ret;
505
506 memcpy(env->interrupt_bitmap,
507 sregs.interrupt_bitmap,
508 sizeof(sregs.interrupt_bitmap));
509
510 get_seg(&env->segs[R_CS], &sregs.cs);
511 get_seg(&env->segs[R_DS], &sregs.ds);
512 get_seg(&env->segs[R_ES], &sregs.es);
513 get_seg(&env->segs[R_FS], &sregs.fs);
514 get_seg(&env->segs[R_GS], &sregs.gs);
515 get_seg(&env->segs[R_SS], &sregs.ss);
516
517 get_seg(&env->tr, &sregs.tr);
518 get_seg(&env->ldt, &sregs.ldt);
519
520 env->idt.limit = sregs.idt.limit;
521 env->idt.base = sregs.idt.base;
522 env->gdt.limit = sregs.gdt.limit;
523 env->gdt.base = sregs.gdt.base;
524
525 env->cr[0] = sregs.cr0;
526 env->cr[2] = sregs.cr2;
527 env->cr[3] = sregs.cr3;
528 env->cr[4] = sregs.cr4;
529
530 cpu_set_apic_base(env, sregs.apic_base);
531
532 env->efer = sregs.efer;
533 //cpu_set_apic_tpr(env, sregs.cr8);
534
535 #define HFLAG_COPY_MASK ~( \
536 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
537 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
538 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
539 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
540
541
542
543 hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
544 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
545 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
546 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
547 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
548 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
549 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
550
551 if (env->efer & MSR_EFER_LMA) {
552 hflags |= HF_LMA_MASK;
553 }
554
555 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
556 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
557 } else {
558 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
559 (DESC_B_SHIFT - HF_CS32_SHIFT);
560 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
561 (DESC_B_SHIFT - HF_SS32_SHIFT);
562 if (!(env->cr[0] & CR0_PE_MASK) ||
563 (env->eflags & VM_MASK) ||
564 !(hflags & HF_CS32_MASK)) {
565 hflags |= HF_ADDSEG_MASK;
566 } else {
567 hflags |= ((env->segs[R_DS].base |
568 env->segs[R_ES].base |
569 env->segs[R_SS].base) != 0) <<
570 HF_ADDSEG_SHIFT;
571 }
572 }
573 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
574
575 return 0;
576 }
577
kvm_get_msrs(CPUState * env)578 static int kvm_get_msrs(CPUState *env)
579 {
580 struct {
581 struct kvm_msrs info;
582 struct kvm_msr_entry entries[100];
583 } msr_data;
584 struct kvm_msr_entry *msrs = msr_data.entries;
585 int ret, i, n;
586
587 n = 0;
588 msrs[n++].index = MSR_IA32_SYSENTER_CS;
589 msrs[n++].index = MSR_IA32_SYSENTER_ESP;
590 msrs[n++].index = MSR_IA32_SYSENTER_EIP;
591 if (kvm_has_msr_star(env))
592 msrs[n++].index = MSR_STAR;
593 msrs[n++].index = MSR_IA32_TSC;
594 #ifdef TARGET_X86_64
595 /* FIXME lm_capable_kernel */
596 msrs[n++].index = MSR_CSTAR;
597 msrs[n++].index = MSR_KERNELGSBASE;
598 msrs[n++].index = MSR_FMASK;
599 msrs[n++].index = MSR_LSTAR;
600 #endif
601 msr_data.info.nmsrs = n;
602 ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
603 if (ret < 0)
604 return ret;
605
606 for (i = 0; i < ret; i++) {
607 switch (msrs[i].index) {
608 case MSR_IA32_SYSENTER_CS:
609 env->sysenter_cs = msrs[i].data;
610 break;
611 case MSR_IA32_SYSENTER_ESP:
612 env->sysenter_esp = msrs[i].data;
613 break;
614 case MSR_IA32_SYSENTER_EIP:
615 env->sysenter_eip = msrs[i].data;
616 break;
617 case MSR_STAR:
618 env->star = msrs[i].data;
619 break;
620 #ifdef TARGET_X86_64
621 case MSR_CSTAR:
622 env->cstar = msrs[i].data;
623 break;
624 case MSR_KERNELGSBASE:
625 env->kernelgsbase = msrs[i].data;
626 break;
627 case MSR_FMASK:
628 env->fmask = msrs[i].data;
629 break;
630 case MSR_LSTAR:
631 env->lstar = msrs[i].data;
632 break;
633 #endif
634 case MSR_IA32_TSC:
635 env->tsc = msrs[i].data;
636 break;
637 }
638 }
639
640 return 0;
641 }
642
kvm_arch_put_registers(CPUState * env)643 int kvm_arch_put_registers(CPUState *env)
644 {
645 int ret;
646
647 ret = kvm_getput_regs(env, 1);
648 if (ret < 0)
649 return ret;
650
651 ret = kvm_put_fpu(env);
652 if (ret < 0)
653 return ret;
654
655 ret = kvm_put_sregs(env);
656 if (ret < 0)
657 return ret;
658
659 ret = kvm_put_msrs(env);
660 if (ret < 0)
661 return ret;
662
663 ret = kvm_put_mp_state(env);
664 if (ret < 0)
665 return ret;
666
667 ret = kvm_get_mp_state(env);
668 if (ret < 0)
669 return ret;
670
671 return 0;
672 }
673
kvm_arch_get_registers(CPUState * env)674 int kvm_arch_get_registers(CPUState *env)
675 {
676 int ret;
677
678 ret = kvm_getput_regs(env, 0);
679 if (ret < 0)
680 return ret;
681
682 ret = kvm_get_fpu(env);
683 if (ret < 0)
684 return ret;
685
686 ret = kvm_get_sregs(env);
687 if (ret < 0)
688 return ret;
689
690 ret = kvm_get_msrs(env);
691 if (ret < 0)
692 return ret;
693
694 return 0;
695 }
696
kvm_arch_vcpu_run(CPUState * env)697 int kvm_arch_vcpu_run(CPUState *env)
698 {
699 #ifdef CONFIG_KVM_GS_RESTORE
700 if (gs_need_restore != KVM_GS_RESTORE_NO)
701 return no_gs_ioctl(env->kvm_fd, KVM_RUN, 0);
702 else
703 #endif
704 return kvm_vcpu_ioctl(env, KVM_RUN, 0);
705 }
706
kvm_arch_pre_run(CPUState * env,struct kvm_run * run)707 int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
708 {
709 /* Try to inject an interrupt if the guest can accept it */
710 if (run->ready_for_interrupt_injection &&
711 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
712 (env->eflags & IF_MASK)) {
713 int irq;
714
715 env->interrupt_request &= ~CPU_INTERRUPT_HARD;
716 irq = cpu_get_pic_interrupt(env);
717 if (irq >= 0) {
718 struct kvm_interrupt intr;
719 intr.irq = irq;
720 /* FIXME: errors */
721 dprintf("injected interrupt %d\n", irq);
722 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
723 }
724 }
725
726 /* If we have an interrupt but the guest is not ready to receive an
727 * interrupt, request an interrupt window exit. This will
728 * cause a return to userspace as soon as the guest is ready to
729 * receive interrupts. */
730 if ((env->interrupt_request & CPU_INTERRUPT_HARD))
731 run->request_interrupt_window = 1;
732 else
733 run->request_interrupt_window = 0;
734
735 dprintf("setting tpr\n");
736 run->cr8 = cpu_get_apic_tpr(env);
737
738 #ifdef CONFIG_KVM_GS_RESTORE
739 gs_base_pre_run();
740 #endif
741
742 return 0;
743 }
744
kvm_arch_post_run(CPUState * env,struct kvm_run * run)745 int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
746 {
747 #ifdef CONFIG_KVM_GS_RESTORE
748 gs_base_post_run();
749 #endif
750 if (run->if_flag)
751 env->eflags |= IF_MASK;
752 else
753 env->eflags &= ~IF_MASK;
754
755 cpu_set_apic_tpr(env, run->cr8);
756 cpu_set_apic_base(env, run->apic_base);
757
758 return 0;
759 }
760
kvm_handle_halt(CPUState * env)761 static int kvm_handle_halt(CPUState *env)
762 {
763 if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
764 (env->eflags & IF_MASK)) &&
765 !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
766 env->halted = 1;
767 env->exception_index = EXCP_HLT;
768 return 0;
769 }
770
771 return 1;
772 }
773
kvm_arch_handle_exit(CPUState * env,struct kvm_run * run)774 int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
775 {
776 int ret = 0;
777
778 switch (run->exit_reason) {
779 case KVM_EXIT_HLT:
780 dprintf("handle_hlt\n");
781 ret = kvm_handle_halt(env);
782 break;
783 }
784
785 return ret;
786 }
787
788 #ifdef KVM_CAP_SET_GUEST_DEBUG
kvm_arch_insert_sw_breakpoint(CPUState * env,struct kvm_sw_breakpoint * bp)789 int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
790 {
791 const static uint8_t int3 = 0xcc;
792
793 if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
794 cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1))
795 return -EINVAL;
796 return 0;
797 }
798
kvm_arch_remove_sw_breakpoint(CPUState * env,struct kvm_sw_breakpoint * bp)799 int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
800 {
801 uint8_t int3;
802
803 if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
804 cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
805 return -EINVAL;
806 return 0;
807 }
808
809 static struct {
810 target_ulong addr;
811 int len;
812 int type;
813 } hw_breakpoint[4];
814
815 static int nb_hw_breakpoint;
816
find_hw_breakpoint(target_ulong addr,int len,int type)817 static int find_hw_breakpoint(target_ulong addr, int len, int type)
818 {
819 int n;
820
821 for (n = 0; n < nb_hw_breakpoint; n++)
822 if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
823 (hw_breakpoint[n].len == len || len == -1))
824 return n;
825 return -1;
826 }
827
kvm_arch_insert_hw_breakpoint(target_ulong addr,target_ulong len,int type)828 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
829 target_ulong len, int type)
830 {
831 switch (type) {
832 case GDB_BREAKPOINT_HW:
833 len = 1;
834 break;
835 case GDB_WATCHPOINT_WRITE:
836 case GDB_WATCHPOINT_ACCESS:
837 switch (len) {
838 case 1:
839 break;
840 case 2:
841 case 4:
842 case 8:
843 if (addr & (len - 1))
844 return -EINVAL;
845 break;
846 default:
847 return -EINVAL;
848 }
849 break;
850 default:
851 return -ENOSYS;
852 }
853
854 if (nb_hw_breakpoint == 4)
855 return -ENOBUFS;
856
857 if (find_hw_breakpoint(addr, len, type) >= 0)
858 return -EEXIST;
859
860 hw_breakpoint[nb_hw_breakpoint].addr = addr;
861 hw_breakpoint[nb_hw_breakpoint].len = len;
862 hw_breakpoint[nb_hw_breakpoint].type = type;
863 nb_hw_breakpoint++;
864
865 return 0;
866 }
867
kvm_arch_remove_hw_breakpoint(target_ulong addr,target_ulong len,int type)868 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
869 target_ulong len, int type)
870 {
871 int n;
872
873 n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
874 if (n < 0)
875 return -ENOENT;
876
877 nb_hw_breakpoint--;
878 hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
879
880 return 0;
881 }
882
kvm_arch_remove_all_hw_breakpoints(void)883 void kvm_arch_remove_all_hw_breakpoints(void)
884 {
885 nb_hw_breakpoint = 0;
886 }
887
888 static CPUWatchpoint hw_watchpoint;
889
kvm_arch_debug(struct kvm_debug_exit_arch * arch_info)890 int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
891 {
892 int handle = 0;
893 int n;
894
895 if (arch_info->exception == 1) {
896 if (arch_info->dr6 & (1 << 14)) {
897 if (cpu_single_env->singlestep_enabled)
898 handle = 1;
899 } else {
900 for (n = 0; n < 4; n++)
901 if (arch_info->dr6 & (1 << n))
902 switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
903 case 0x0:
904 handle = 1;
905 break;
906 case 0x1:
907 handle = 1;
908 cpu_single_env->watchpoint_hit = &hw_watchpoint;
909 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
910 hw_watchpoint.flags = BP_MEM_WRITE;
911 break;
912 case 0x3:
913 handle = 1;
914 cpu_single_env->watchpoint_hit = &hw_watchpoint;
915 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
916 hw_watchpoint.flags = BP_MEM_ACCESS;
917 break;
918 }
919 }
920 } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc))
921 handle = 1;
922
923 if (!handle)
924 kvm_update_guest_debug(cpu_single_env,
925 (arch_info->exception == 1) ?
926 KVM_GUESTDBG_INJECT_DB : KVM_GUESTDBG_INJECT_BP);
927
928 return handle;
929 }
930
kvm_arch_update_guest_debug(CPUState * env,struct kvm_guest_debug * dbg)931 void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg)
932 {
933 const uint8_t type_code[] = {
934 [GDB_BREAKPOINT_HW] = 0x0,
935 [GDB_WATCHPOINT_WRITE] = 0x1,
936 [GDB_WATCHPOINT_ACCESS] = 0x3
937 };
938 const uint8_t len_code[] = {
939 [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
940 };
941 int n;
942
943 if (kvm_sw_breakpoints_active(env))
944 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
945
946 if (nb_hw_breakpoint > 0) {
947 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
948 dbg->arch.debugreg[7] = 0x0600;
949 for (n = 0; n < nb_hw_breakpoint; n++) {
950 dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
951 dbg->arch.debugreg[7] |= (2 << (n * 2)) |
952 (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
953 (len_code[hw_breakpoint[n].len] << (18 + n*4));
954 }
955 }
956 }
957 #endif /* KVM_CAP_SET_GUEST_DEBUG */
958