• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * QEMU KVM support
3  *
4  * Copyright (C) 2006-2008 Qumranet Technologies
5  * Copyright IBM, Corp. 2008
6  *
7  * Authors:
8  *  Anthony Liguori   <aliguori@us.ibm.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  *
13  */
14 
15 #include <sys/types.h>
16 #include <sys/ioctl.h>
17 #include <sys/mman.h>
18 
19 #undef __user
20 #define __xuser  /* nothing */
21 #include <linux/kvm.h>
22 
23 #include "qemu-common.h"
24 #include "sysemu.h"
25 #include "kvm.h"
26 #include "cpu.h"
27 #include "gdbstub.h"
28 
29 #ifdef CONFIG_KVM_GS_RESTORE
30 #include "kvm-gs-restore.h"
31 #endif
32 
33 //#define DEBUG_KVM
34 
35 #ifdef DEBUG_KVM
36 #define dprintf(fmt, ...) \
37     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
38 #else
39 #define dprintf(fmt, ...) \
40     do { } while (0)
41 #endif
42 
43 #ifdef KVM_CAP_EXT_CPUID
44 
try_get_cpuid(KVMState * s,int max)45 static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
46 {
47     struct kvm_cpuid2 *cpuid;
48     int r, size;
49 
50     size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
51     cpuid = (struct kvm_cpuid2 *)qemu_mallocz(size);
52     cpuid->nent = max;
53     r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
54     if (r == 0 && cpuid->nent >= max) {
55         r = -E2BIG;
56     }
57     if (r < 0) {
58         if (r == -E2BIG) {
59             qemu_free(cpuid);
60             return NULL;
61         } else {
62             fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
63                     strerror(-r));
64             exit(1);
65         }
66     }
67     return cpuid;
68 }
69 
kvm_arch_get_supported_cpuid(CPUState * env,uint32_t function,int reg)70 uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
71 {
72     struct kvm_cpuid2 *cpuid;
73     int i, max;
74     uint32_t ret = 0;
75     uint32_t cpuid_1_edx;
76 
77     if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) {
78         return -1U;
79     }
80 
81     max = 1;
82     while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
83         max *= 2;
84     }
85 
86     for (i = 0; i < cpuid->nent; ++i) {
87         if (cpuid->entries[i].function == function) {
88             switch (reg) {
89             case R_EAX:
90                 ret = cpuid->entries[i].eax;
91                 break;
92             case R_EBX:
93                 ret = cpuid->entries[i].ebx;
94                 break;
95             case R_ECX:
96                 ret = cpuid->entries[i].ecx;
97                 break;
98             case R_EDX:
99                 ret = cpuid->entries[i].edx;
100                 if (function == 0x80000001) {
101                     /* On Intel, kvm returns cpuid according to the Intel spec,
102                      * so add missing bits according to the AMD spec:
103                      */
104                     cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, R_EDX);
105                     ret |= cpuid_1_edx & 0xdfeff7ff;
106                 }
107                 break;
108             }
109         }
110     }
111 
112     qemu_free(cpuid);
113 
114     return ret;
115 }
116 
117 #else
118 
kvm_arch_get_supported_cpuid(CPUState * env,uint32_t function,int reg)119 uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
120 {
121     return -1U;
122 }
123 
124 #endif
125 
126 #ifndef KVM_MP_STATE_RUNNABLE
127 #define KVM_MP_STATE_RUNNABLE 0
128 #endif
129 
kvm_arch_init_vcpu(CPUState * env)130 int kvm_arch_init_vcpu(CPUState *env)
131 {
132     struct {
133         struct kvm_cpuid2 cpuid;
134         struct kvm_cpuid_entry2 entries[100];
135     } __attribute__((packed)) cpuid_data;
136     uint32_t limit, i, j, cpuid_i;
137     uint32_t unused;
138 
139     env->mp_state = KVM_MP_STATE_RUNNABLE;
140 
141     cpuid_i = 0;
142 
143     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
144 
145     for (i = 0; i <= limit; i++) {
146         struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
147 
148         switch (i) {
149         case 2: {
150             /* Keep reading function 2 till all the input is received */
151             int times;
152 
153             c->function = i;
154             c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
155                        KVM_CPUID_FLAG_STATE_READ_NEXT;
156             cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
157             times = c->eax & 0xff;
158 
159             for (j = 1; j < times; ++j) {
160                 c = &cpuid_data.entries[cpuid_i++];
161                 c->function = i;
162                 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
163                 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
164             }
165             break;
166         }
167         case 4:
168         case 0xb:
169         case 0xd:
170             for (j = 0; ; j++) {
171                 c->function = i;
172                 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
173                 c->index = j;
174                 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
175 
176                 if (i == 4 && c->eax == 0)
177                     break;
178                 if (i == 0xb && !(c->ecx & 0xff00))
179                     break;
180                 if (i == 0xd && c->eax == 0)
181                     break;
182 
183                 c = &cpuid_data.entries[cpuid_i++];
184             }
185             break;
186         default:
187             c->function = i;
188             c->flags = 0;
189             cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
190             break;
191         }
192     }
193     cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
194 
195     for (i = 0x80000000; i <= limit; i++) {
196         struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
197 
198         c->function = i;
199         c->flags = 0;
200         cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
201     }
202 
203     cpuid_data.cpuid.nent = cpuid_i;
204 
205     return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
206 }
207 
kvm_has_msr_star(CPUState * env)208 static int kvm_has_msr_star(CPUState *env)
209 {
210     static int has_msr_star;
211     int ret;
212 
213     /* first time */
214     if (has_msr_star == 0) {
215         struct kvm_msr_list msr_list, *kvm_msr_list;
216 
217         has_msr_star = -1;
218 
219         /* Obtain MSR list from KVM.  These are the MSRs that we must
220          * save/restore */
221         msr_list.nmsrs = 0;
222         ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
223         if (ret < 0)
224             return 0;
225 
226         kvm_msr_list = qemu_mallocz(sizeof(msr_list) +
227                                     msr_list.nmsrs * sizeof(msr_list.indices[0]));
228 
229         kvm_msr_list->nmsrs = msr_list.nmsrs;
230         ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
231         if (ret >= 0) {
232             int i;
233 
234             for (i = 0; i < kvm_msr_list->nmsrs; i++) {
235                 if (kvm_msr_list->indices[i] == MSR_STAR) {
236                     has_msr_star = 1;
237                     break;
238                 }
239             }
240         }
241 
242         free(kvm_msr_list);
243     }
244 
245     if (has_msr_star == 1)
246         return 1;
247     return 0;
248 }
249 
kvm_arch_init(KVMState * s,int smp_cpus)250 int kvm_arch_init(KVMState *s, int smp_cpus)
251 {
252     int ret;
253 
254     /* create vm86 tss.  KVM uses vm86 mode to emulate 16-bit code
255      * directly.  In order to use vm86 mode, a TSS is needed.  Since this
256      * must be part of guest physical memory, we need to allocate it.  Older
257      * versions of KVM just assumed that it would be at the end of physical
258      * memory but that doesn't work with more than 4GB of memory.  We simply
259      * refuse to work with those older versions of KVM. */
260     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
261     if (ret <= 0) {
262         fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
263         return ret;
264     }
265 
266     /* this address is 3 pages before the bios, and the bios should present
267      * as unavaible memory.  FIXME, need to ensure the e820 map deals with
268      * this?
269      */
270     return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
271 }
272 
set_v8086_seg(struct kvm_segment * lhs,const SegmentCache * rhs)273 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
274 {
275     lhs->selector = rhs->selector;
276     lhs->base = rhs->base;
277     lhs->limit = rhs->limit;
278     lhs->type = 3;
279     lhs->present = 1;
280     lhs->dpl = 3;
281     lhs->db = 0;
282     lhs->s = 1;
283     lhs->l = 0;
284     lhs->g = 0;
285     lhs->avl = 0;
286     lhs->unusable = 0;
287 }
288 
set_seg(struct kvm_segment * lhs,const SegmentCache * rhs)289 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
290 {
291     unsigned flags = rhs->flags;
292     lhs->selector = rhs->selector;
293     lhs->base = rhs->base;
294     lhs->limit = rhs->limit;
295     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
296     lhs->present = (flags & DESC_P_MASK) != 0;
297     lhs->dpl = rhs->selector & 3;
298     lhs->db = (flags >> DESC_B_SHIFT) & 1;
299     lhs->s = (flags & DESC_S_MASK) != 0;
300     lhs->l = (flags >> DESC_L_SHIFT) & 1;
301     lhs->g = (flags & DESC_G_MASK) != 0;
302     lhs->avl = (flags & DESC_AVL_MASK) != 0;
303     lhs->unusable = 0;
304 }
305 
get_seg(SegmentCache * lhs,const struct kvm_segment * rhs)306 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
307 {
308     lhs->selector = rhs->selector;
309     lhs->base = rhs->base;
310     lhs->limit = rhs->limit;
311     lhs->flags =
312 	(rhs->type << DESC_TYPE_SHIFT)
313 	| (rhs->present * DESC_P_MASK)
314 	| (rhs->dpl << DESC_DPL_SHIFT)
315 	| (rhs->db << DESC_B_SHIFT)
316 	| (rhs->s * DESC_S_MASK)
317 	| (rhs->l << DESC_L_SHIFT)
318 	| (rhs->g * DESC_G_MASK)
319 	| (rhs->avl * DESC_AVL_MASK);
320 }
321 
kvm_getput_reg(__u64 * kvm_reg,target_ulong * qemu_reg,int set)322 static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
323 {
324     if (set)
325         *kvm_reg = *qemu_reg;
326     else
327         *qemu_reg = *kvm_reg;
328 }
329 
kvm_getput_regs(CPUState * env,int set)330 static int kvm_getput_regs(CPUState *env, int set)
331 {
332     struct kvm_regs regs;
333     int ret = 0;
334 
335     if (!set) {
336         ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
337         if (ret < 0)
338             return ret;
339     }
340 
341     kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
342     kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
343     kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
344     kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
345     kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
346     kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
347     kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
348     kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
349 #ifdef TARGET_X86_64
350     kvm_getput_reg(&regs.r8, &env->regs[8], set);
351     kvm_getput_reg(&regs.r9, &env->regs[9], set);
352     kvm_getput_reg(&regs.r10, &env->regs[10], set);
353     kvm_getput_reg(&regs.r11, &env->regs[11], set);
354     kvm_getput_reg(&regs.r12, &env->regs[12], set);
355     kvm_getput_reg(&regs.r13, &env->regs[13], set);
356     kvm_getput_reg(&regs.r14, &env->regs[14], set);
357     kvm_getput_reg(&regs.r15, &env->regs[15], set);
358 #endif
359 
360     kvm_getput_reg(&regs.rflags, &env->eflags, set);
361     kvm_getput_reg(&regs.rip, &env->eip, set);
362 
363     if (set)
364         ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
365 
366     return ret;
367 }
368 
kvm_put_fpu(CPUState * env)369 static int kvm_put_fpu(CPUState *env)
370 {
371     struct kvm_fpu fpu;
372     int i;
373 
374     memset(&fpu, 0, sizeof fpu);
375     fpu.fsw = env->fpus & ~(7 << 11);
376     fpu.fsw |= (env->fpstt & 7) << 11;
377     fpu.fcw = env->fpuc;
378     for (i = 0; i < 8; ++i)
379 	fpu.ftwx |= (!env->fptags[i]) << i;
380     memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
381     memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
382     fpu.mxcsr = env->mxcsr;
383 
384     return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu);
385 }
386 
kvm_put_sregs(CPUState * env)387 static int kvm_put_sregs(CPUState *env)
388 {
389     struct kvm_sregs sregs;
390 
391     memcpy(sregs.interrupt_bitmap,
392            env->interrupt_bitmap,
393            sizeof(sregs.interrupt_bitmap));
394 
395     if ((env->eflags & VM_MASK)) {
396 	    set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
397 	    set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
398 	    set_v8086_seg(&sregs.es, &env->segs[R_ES]);
399 	    set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
400 	    set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
401 	    set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
402     } else {
403 	    set_seg(&sregs.cs, &env->segs[R_CS]);
404 	    set_seg(&sregs.ds, &env->segs[R_DS]);
405 	    set_seg(&sregs.es, &env->segs[R_ES]);
406 	    set_seg(&sregs.fs, &env->segs[R_FS]);
407 	    set_seg(&sregs.gs, &env->segs[R_GS]);
408 	    set_seg(&sregs.ss, &env->segs[R_SS]);
409 
410 	    if (env->cr[0] & CR0_PE_MASK) {
411 		/* force ss cpl to cs cpl */
412 		sregs.ss.selector = (sregs.ss.selector & ~3) |
413 			(sregs.cs.selector & 3);
414 		sregs.ss.dpl = sregs.ss.selector & 3;
415 	    }
416     }
417 
418     set_seg(&sregs.tr, &env->tr);
419     set_seg(&sregs.ldt, &env->ldt);
420 
421     sregs.idt.limit = env->idt.limit;
422     sregs.idt.base = env->idt.base;
423     sregs.gdt.limit = env->gdt.limit;
424     sregs.gdt.base = env->gdt.base;
425 
426     sregs.cr0 = env->cr[0];
427     sregs.cr2 = env->cr[2];
428     sregs.cr3 = env->cr[3];
429     sregs.cr4 = env->cr[4];
430 
431     sregs.cr8 = cpu_get_apic_tpr(env);
432     sregs.apic_base = cpu_get_apic_base(env);
433 
434     sregs.efer = env->efer;
435 
436     return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
437 }
438 
kvm_msr_entry_set(struct kvm_msr_entry * entry,uint32_t index,uint64_t value)439 static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
440                               uint32_t index, uint64_t value)
441 {
442     entry->index = index;
443     entry->data = value;
444 }
445 
kvm_put_msrs(CPUState * env)446 static int kvm_put_msrs(CPUState *env)
447 {
448     struct {
449         struct kvm_msrs info;
450         struct kvm_msr_entry entries[100];
451     } msr_data;
452     struct kvm_msr_entry *msrs = msr_data.entries;
453     int n = 0;
454 
455     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
456     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
457     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
458     if (kvm_has_msr_star(env))
459 	kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
460     kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
461 #ifdef TARGET_X86_64
462     /* FIXME if lm capable */
463     kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
464     kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
465     kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
466     kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
467 #endif
468     msr_data.info.nmsrs = n;
469 
470     return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
471 
472 }
473 
474 
kvm_get_fpu(CPUState * env)475 static int kvm_get_fpu(CPUState *env)
476 {
477     struct kvm_fpu fpu;
478     int i, ret;
479 
480     ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu);
481     if (ret < 0)
482         return ret;
483 
484     env->fpstt = (fpu.fsw >> 11) & 7;
485     env->fpus = fpu.fsw;
486     env->fpuc = fpu.fcw;
487     for (i = 0; i < 8; ++i)
488 	env->fptags[i] = !((fpu.ftwx >> i) & 1);
489     memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
490     memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
491     env->mxcsr = fpu.mxcsr;
492 
493     return 0;
494 }
495 
kvm_get_sregs(CPUState * env)496 static int kvm_get_sregs(CPUState *env)
497 {
498     struct kvm_sregs sregs;
499     uint32_t hflags;
500     int ret;
501 
502     ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
503     if (ret < 0)
504         return ret;
505 
506     memcpy(env->interrupt_bitmap,
507            sregs.interrupt_bitmap,
508            sizeof(sregs.interrupt_bitmap));
509 
510     get_seg(&env->segs[R_CS], &sregs.cs);
511     get_seg(&env->segs[R_DS], &sregs.ds);
512     get_seg(&env->segs[R_ES], &sregs.es);
513     get_seg(&env->segs[R_FS], &sregs.fs);
514     get_seg(&env->segs[R_GS], &sregs.gs);
515     get_seg(&env->segs[R_SS], &sregs.ss);
516 
517     get_seg(&env->tr, &sregs.tr);
518     get_seg(&env->ldt, &sregs.ldt);
519 
520     env->idt.limit = sregs.idt.limit;
521     env->idt.base = sregs.idt.base;
522     env->gdt.limit = sregs.gdt.limit;
523     env->gdt.base = sregs.gdt.base;
524 
525     env->cr[0] = sregs.cr0;
526     env->cr[2] = sregs.cr2;
527     env->cr[3] = sregs.cr3;
528     env->cr[4] = sregs.cr4;
529 
530     cpu_set_apic_base(env, sregs.apic_base);
531 
532     env->efer = sregs.efer;
533     //cpu_set_apic_tpr(env, sregs.cr8);
534 
535 #define HFLAG_COPY_MASK ~( \
536 			HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
537 			HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
538 			HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
539 			HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
540 
541 
542 
543     hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
544     hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
545     hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
546 	    (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
547     hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
548     hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
549 	    (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
550 
551     if (env->efer & MSR_EFER_LMA) {
552         hflags |= HF_LMA_MASK;
553     }
554 
555     if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
556         hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
557     } else {
558         hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
559 		(DESC_B_SHIFT - HF_CS32_SHIFT);
560         hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
561 		(DESC_B_SHIFT - HF_SS32_SHIFT);
562         if (!(env->cr[0] & CR0_PE_MASK) ||
563                    (env->eflags & VM_MASK) ||
564                    !(hflags & HF_CS32_MASK)) {
565                 hflags |= HF_ADDSEG_MASK;
566             } else {
567                 hflags |= ((env->segs[R_DS].base |
568                                 env->segs[R_ES].base |
569                                 env->segs[R_SS].base) != 0) <<
570                     HF_ADDSEG_SHIFT;
571             }
572     }
573     env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
574 
575     return 0;
576 }
577 
kvm_get_msrs(CPUState * env)578 static int kvm_get_msrs(CPUState *env)
579 {
580     struct {
581         struct kvm_msrs info;
582         struct kvm_msr_entry entries[100];
583     } msr_data;
584     struct kvm_msr_entry *msrs = msr_data.entries;
585     int ret, i, n;
586 
587     n = 0;
588     msrs[n++].index = MSR_IA32_SYSENTER_CS;
589     msrs[n++].index = MSR_IA32_SYSENTER_ESP;
590     msrs[n++].index = MSR_IA32_SYSENTER_EIP;
591     if (kvm_has_msr_star(env))
592 	msrs[n++].index = MSR_STAR;
593     msrs[n++].index = MSR_IA32_TSC;
594 #ifdef TARGET_X86_64
595     /* FIXME lm_capable_kernel */
596     msrs[n++].index = MSR_CSTAR;
597     msrs[n++].index = MSR_KERNELGSBASE;
598     msrs[n++].index = MSR_FMASK;
599     msrs[n++].index = MSR_LSTAR;
600 #endif
601     msr_data.info.nmsrs = n;
602     ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
603     if (ret < 0)
604         return ret;
605 
606     for (i = 0; i < ret; i++) {
607         switch (msrs[i].index) {
608         case MSR_IA32_SYSENTER_CS:
609             env->sysenter_cs = msrs[i].data;
610             break;
611         case MSR_IA32_SYSENTER_ESP:
612             env->sysenter_esp = msrs[i].data;
613             break;
614         case MSR_IA32_SYSENTER_EIP:
615             env->sysenter_eip = msrs[i].data;
616             break;
617         case MSR_STAR:
618             env->star = msrs[i].data;
619             break;
620 #ifdef TARGET_X86_64
621         case MSR_CSTAR:
622             env->cstar = msrs[i].data;
623             break;
624         case MSR_KERNELGSBASE:
625             env->kernelgsbase = msrs[i].data;
626             break;
627         case MSR_FMASK:
628             env->fmask = msrs[i].data;
629             break;
630         case MSR_LSTAR:
631             env->lstar = msrs[i].data;
632             break;
633 #endif
634         case MSR_IA32_TSC:
635             env->tsc = msrs[i].data;
636             break;
637         }
638     }
639 
640     return 0;
641 }
642 
kvm_arch_put_registers(CPUState * env)643 int kvm_arch_put_registers(CPUState *env)
644 {
645     int ret;
646 
647     ret = kvm_getput_regs(env, 1);
648     if (ret < 0)
649         return ret;
650 
651     ret = kvm_put_fpu(env);
652     if (ret < 0)
653         return ret;
654 
655     ret = kvm_put_sregs(env);
656     if (ret < 0)
657         return ret;
658 
659     ret = kvm_put_msrs(env);
660     if (ret < 0)
661         return ret;
662 
663     ret = kvm_put_mp_state(env);
664     if (ret < 0)
665         return ret;
666 
667     ret = kvm_get_mp_state(env);
668     if (ret < 0)
669         return ret;
670 
671     return 0;
672 }
673 
kvm_arch_get_registers(CPUState * env)674 int kvm_arch_get_registers(CPUState *env)
675 {
676     int ret;
677 
678     ret = kvm_getput_regs(env, 0);
679     if (ret < 0)
680         return ret;
681 
682     ret = kvm_get_fpu(env);
683     if (ret < 0)
684         return ret;
685 
686     ret = kvm_get_sregs(env);
687     if (ret < 0)
688         return ret;
689 
690     ret = kvm_get_msrs(env);
691     if (ret < 0)
692         return ret;
693 
694     return 0;
695 }
696 
kvm_arch_vcpu_run(CPUState * env)697 int kvm_arch_vcpu_run(CPUState *env)
698 {
699 #ifdef CONFIG_KVM_GS_RESTORE
700     if (gs_need_restore  != KVM_GS_RESTORE_NO)
701         return no_gs_ioctl(env->kvm_fd, KVM_RUN, 0);
702     else
703 #endif
704         return kvm_vcpu_ioctl(env, KVM_RUN, 0);
705 }
706 
kvm_arch_pre_run(CPUState * env,struct kvm_run * run)707 int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
708 {
709     /* Try to inject an interrupt if the guest can accept it */
710     if (run->ready_for_interrupt_injection &&
711         (env->interrupt_request & CPU_INTERRUPT_HARD) &&
712         (env->eflags & IF_MASK)) {
713         int irq;
714 
715         env->interrupt_request &= ~CPU_INTERRUPT_HARD;
716         irq = cpu_get_pic_interrupt(env);
717         if (irq >= 0) {
718             struct kvm_interrupt intr;
719             intr.irq = irq;
720             /* FIXME: errors */
721             dprintf("injected interrupt %d\n", irq);
722             kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
723         }
724     }
725 
726     /* If we have an interrupt but the guest is not ready to receive an
727      * interrupt, request an interrupt window exit.  This will
728      * cause a return to userspace as soon as the guest is ready to
729      * receive interrupts. */
730     if ((env->interrupt_request & CPU_INTERRUPT_HARD))
731         run->request_interrupt_window = 1;
732     else
733         run->request_interrupt_window = 0;
734 
735     dprintf("setting tpr\n");
736     run->cr8 = cpu_get_apic_tpr(env);
737 
738 #ifdef CONFIG_KVM_GS_RESTORE
739     gs_base_pre_run();
740 #endif
741 
742     return 0;
743 }
744 
kvm_arch_post_run(CPUState * env,struct kvm_run * run)745 int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
746 {
747 #ifdef CONFIG_KVM_GS_RESTORE
748     gs_base_post_run();
749 #endif
750     if (run->if_flag)
751         env->eflags |= IF_MASK;
752     else
753         env->eflags &= ~IF_MASK;
754 
755     cpu_set_apic_tpr(env, run->cr8);
756     cpu_set_apic_base(env, run->apic_base);
757 
758     return 0;
759 }
760 
kvm_handle_halt(CPUState * env)761 static int kvm_handle_halt(CPUState *env)
762 {
763     if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
764           (env->eflags & IF_MASK)) &&
765         !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
766         env->halted = 1;
767         env->exception_index = EXCP_HLT;
768         return 0;
769     }
770 
771     return 1;
772 }
773 
kvm_arch_handle_exit(CPUState * env,struct kvm_run * run)774 int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
775 {
776     int ret = 0;
777 
778     switch (run->exit_reason) {
779     case KVM_EXIT_HLT:
780         dprintf("handle_hlt\n");
781         ret = kvm_handle_halt(env);
782         break;
783     }
784 
785     return ret;
786 }
787 
788 #ifdef KVM_CAP_SET_GUEST_DEBUG
kvm_arch_insert_sw_breakpoint(CPUState * env,struct kvm_sw_breakpoint * bp)789 int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
790 {
791     const static uint8_t int3 = 0xcc;
792 
793     if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
794         cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1))
795         return -EINVAL;
796     return 0;
797 }
798 
kvm_arch_remove_sw_breakpoint(CPUState * env,struct kvm_sw_breakpoint * bp)799 int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
800 {
801     uint8_t int3;
802 
803     if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
804         cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
805         return -EINVAL;
806     return 0;
807 }
808 
809 static struct {
810     target_ulong addr;
811     int len;
812     int type;
813 } hw_breakpoint[4];
814 
815 static int nb_hw_breakpoint;
816 
find_hw_breakpoint(target_ulong addr,int len,int type)817 static int find_hw_breakpoint(target_ulong addr, int len, int type)
818 {
819     int n;
820 
821     for (n = 0; n < nb_hw_breakpoint; n++)
822         if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
823             (hw_breakpoint[n].len == len || len == -1))
824             return n;
825     return -1;
826 }
827 
kvm_arch_insert_hw_breakpoint(target_ulong addr,target_ulong len,int type)828 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
829                                   target_ulong len, int type)
830 {
831     switch (type) {
832     case GDB_BREAKPOINT_HW:
833         len = 1;
834         break;
835     case GDB_WATCHPOINT_WRITE:
836     case GDB_WATCHPOINT_ACCESS:
837         switch (len) {
838         case 1:
839             break;
840         case 2:
841         case 4:
842         case 8:
843             if (addr & (len - 1))
844                 return -EINVAL;
845             break;
846         default:
847             return -EINVAL;
848         }
849         break;
850     default:
851         return -ENOSYS;
852     }
853 
854     if (nb_hw_breakpoint == 4)
855         return -ENOBUFS;
856 
857     if (find_hw_breakpoint(addr, len, type) >= 0)
858         return -EEXIST;
859 
860     hw_breakpoint[nb_hw_breakpoint].addr = addr;
861     hw_breakpoint[nb_hw_breakpoint].len = len;
862     hw_breakpoint[nb_hw_breakpoint].type = type;
863     nb_hw_breakpoint++;
864 
865     return 0;
866 }
867 
kvm_arch_remove_hw_breakpoint(target_ulong addr,target_ulong len,int type)868 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
869                                   target_ulong len, int type)
870 {
871     int n;
872 
873     n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
874     if (n < 0)
875         return -ENOENT;
876 
877     nb_hw_breakpoint--;
878     hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
879 
880     return 0;
881 }
882 
kvm_arch_remove_all_hw_breakpoints(void)883 void kvm_arch_remove_all_hw_breakpoints(void)
884 {
885     nb_hw_breakpoint = 0;
886 }
887 
888 static CPUWatchpoint hw_watchpoint;
889 
kvm_arch_debug(struct kvm_debug_exit_arch * arch_info)890 int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
891 {
892     int handle = 0;
893     int n;
894 
895     if (arch_info->exception == 1) {
896         if (arch_info->dr6 & (1 << 14)) {
897             if (cpu_single_env->singlestep_enabled)
898                 handle = 1;
899         } else {
900             for (n = 0; n < 4; n++)
901                 if (arch_info->dr6 & (1 << n))
902                     switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
903                     case 0x0:
904                         handle = 1;
905                         break;
906                     case 0x1:
907                         handle = 1;
908                         cpu_single_env->watchpoint_hit = &hw_watchpoint;
909                         hw_watchpoint.vaddr = hw_breakpoint[n].addr;
910                         hw_watchpoint.flags = BP_MEM_WRITE;
911                         break;
912                     case 0x3:
913                         handle = 1;
914                         cpu_single_env->watchpoint_hit = &hw_watchpoint;
915                         hw_watchpoint.vaddr = hw_breakpoint[n].addr;
916                         hw_watchpoint.flags = BP_MEM_ACCESS;
917                         break;
918                     }
919         }
920     } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc))
921         handle = 1;
922 
923     if (!handle)
924         kvm_update_guest_debug(cpu_single_env,
925                         (arch_info->exception == 1) ?
926                         KVM_GUESTDBG_INJECT_DB : KVM_GUESTDBG_INJECT_BP);
927 
928     return handle;
929 }
930 
kvm_arch_update_guest_debug(CPUState * env,struct kvm_guest_debug * dbg)931 void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg)
932 {
933     const uint8_t type_code[] = {
934         [GDB_BREAKPOINT_HW] = 0x0,
935         [GDB_WATCHPOINT_WRITE] = 0x1,
936         [GDB_WATCHPOINT_ACCESS] = 0x3
937     };
938     const uint8_t len_code[] = {
939         [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
940     };
941     int n;
942 
943     if (kvm_sw_breakpoints_active(env))
944         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
945 
946     if (nb_hw_breakpoint > 0) {
947         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
948         dbg->arch.debugreg[7] = 0x0600;
949         for (n = 0; n < nb_hw_breakpoint; n++) {
950             dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
951             dbg->arch.debugreg[7] |= (2 << (n * 2)) |
952                 (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
953                 (len_code[hw_breakpoint[n].len] << (18 + n*4));
954         }
955     }
956 }
957 #endif /* KVM_CAP_SET_GUEST_DEBUG */
958