• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  KQEMU support
3  *
4  *  Copyright (c) 2005-2008 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20 #include "config.h"
21 #ifdef _WIN32
22 #define WIN32_LEAN_AND_MEAN
23 #include <windows.h>
24 #include <winioctl.h>
25 #else
26 #include <sys/types.h>
27 #include <sys/mman.h>
28 #include <sys/ioctl.h>
29 #endif
30 #ifdef HOST_SOLARIS
31 #include <sys/ioccom.h>
32 #endif
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <stdarg.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <unistd.h>
39 #include <inttypes.h>
40 
41 #include "cpu.h"
42 #include "exec-all.h"
43 #include "qemu-common.h"
44 
45 #ifdef USE_KQEMU
46 
47 #define DEBUG
48 //#define PROFILE
49 
50 #include <unistd.h>
51 #include <fcntl.h>
52 #include "kqemu.h"
53 
54 #ifdef _WIN32
55 #define KQEMU_DEVICE "\\\\.\\kqemu"
56 #else
57 #define KQEMU_DEVICE "/dev/kqemu"
58 #endif
59 
60 static void qpi_init(void);
61 
62 #ifdef _WIN32
63 #define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
64 HANDLE kqemu_fd = KQEMU_INVALID_FD;
65 #define kqemu_closefd(x) CloseHandle(x)
66 #else
67 #define KQEMU_INVALID_FD -1
68 int kqemu_fd = KQEMU_INVALID_FD;
69 #define kqemu_closefd(x) close(x)
70 #endif
71 
72 /* 0 = not allowed
73    1 = user kqemu
74    2 = kernel kqemu
75 */
76 int kqemu_allowed = 1;
77 uint64_t *pages_to_flush;
78 unsigned int nb_pages_to_flush;
79 uint64_t *ram_pages_to_update;
80 unsigned int nb_ram_pages_to_update;
81 uint64_t *modified_ram_pages;
82 unsigned int nb_modified_ram_pages;
83 uint8_t *modified_ram_pages_table;
84 int qpi_io_memory;
85 uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
86 
87 #define cpuid(index, eax, ebx, ecx, edx) \
88   asm volatile ("cpuid" \
89                 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
90                 : "0" (index))
91 
92 #ifdef __x86_64__
is_cpuid_supported(void)93 static int is_cpuid_supported(void)
94 {
95     return 1;
96 }
97 #else
is_cpuid_supported(void)98 static int is_cpuid_supported(void)
99 {
100     int v0, v1;
101     asm volatile ("pushf\n"
102                   "popl %0\n"
103                   "movl %0, %1\n"
104                   "xorl $0x00200000, %0\n"
105                   "pushl %0\n"
106                   "popf\n"
107                   "pushf\n"
108                   "popl %0\n"
109                   : "=a" (v0), "=d" (v1)
110                   :
111                   : "cc");
112     return (v0 != v1);
113 }
114 #endif
115 
kqemu_update_cpuid(CPUState * env)116 static void kqemu_update_cpuid(CPUState *env)
117 {
118     int critical_features_mask, features, ext_features, ext_features_mask;
119     uint32_t eax, ebx, ecx, edx;
120 
121     /* the following features are kept identical on the host and
122        target cpus because they are important for user code. Strictly
123        speaking, only SSE really matters because the OS must support
124        it if the user code uses it. */
125     critical_features_mask =
126         CPUID_CMOV | CPUID_CX8 |
127         CPUID_FXSR | CPUID_MMX | CPUID_SSE |
128         CPUID_SSE2 | CPUID_SEP;
129     ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
130     if (!is_cpuid_supported()) {
131         features = 0;
132         ext_features = 0;
133     } else {
134         cpuid(1, eax, ebx, ecx, edx);
135         features = edx;
136         ext_features = ecx;
137     }
138 #ifdef __x86_64__
139     /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
140        compatibility mode, so in order to have the best performances
141        it is better not to use it */
142     features &= ~CPUID_SEP;
143 #endif
144     env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
145         (features & critical_features_mask);
146     env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
147         (ext_features & ext_features_mask);
148     /* XXX: we could update more of the target CPUID state so that the
149        non accelerated code sees exactly the same CPU features as the
150        accelerated code */
151 }
152 
kqemu_init(CPUState * env)153 int kqemu_init(CPUState *env)
154 {
155     struct kqemu_init kinit;
156     int ret, version;
157 #ifdef _WIN32
158     DWORD temp;
159 #endif
160 
161     if (!kqemu_allowed)
162         return -1;
163 
164 #ifdef _WIN32
165     kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
166                           FILE_SHARE_READ | FILE_SHARE_WRITE,
167                           NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
168                           NULL);
169     if (kqemu_fd == KQEMU_INVALID_FD) {
170         fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
171                 KQEMU_DEVICE, GetLastError());
172         return -1;
173     }
174 #else
175     kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
176     if (kqemu_fd == KQEMU_INVALID_FD) {
177         fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
178                 KQEMU_DEVICE, strerror(errno));
179         return -1;
180     }
181 #endif
182     version = 0;
183 #ifdef _WIN32
184     DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
185                     &version, sizeof(version), &temp, NULL);
186 #else
187     ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
188 #endif
189     if (version != KQEMU_VERSION) {
190         fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
191                 version, KQEMU_VERSION);
192         goto fail;
193     }
194 
195     pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
196                                   sizeof(uint64_t));
197     if (!pages_to_flush)
198         goto fail;
199 
200     ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
201                                        sizeof(uint64_t));
202     if (!ram_pages_to_update)
203         goto fail;
204 
205     modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
206                                       sizeof(uint64_t));
207     if (!modified_ram_pages)
208         goto fail;
209     modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
210     if (!modified_ram_pages_table)
211         goto fail;
212 
213     memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
214     kinit.ram_base = phys_ram_base;
215     kinit.ram_size = phys_ram_size;
216     kinit.ram_dirty = phys_ram_dirty;
217     kinit.pages_to_flush = pages_to_flush;
218     kinit.ram_pages_to_update = ram_pages_to_update;
219     kinit.modified_ram_pages = modified_ram_pages;
220 #ifdef _WIN32
221     ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
222                           NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
223 #else
224     ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
225 #endif
226     if (ret < 0) {
227         fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
228     fail:
229         kqemu_closefd(kqemu_fd);
230         kqemu_fd = KQEMU_INVALID_FD;
231         return -1;
232     }
233     kqemu_update_cpuid(env);
234     env->kqemu_enabled = kqemu_allowed;
235     nb_pages_to_flush = 0;
236     nb_ram_pages_to_update = 0;
237 
238     qpi_init();
239     return 0;
240 }
241 
kqemu_flush_page(CPUState * env,target_ulong addr)242 void kqemu_flush_page(CPUState *env, target_ulong addr)
243 {
244 #if defined(DEBUG)
245     if (loglevel & CPU_LOG_INT) {
246         fprintf(logfile, "kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
247     }
248 #endif
249     if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
250         nb_pages_to_flush = KQEMU_FLUSH_ALL;
251     else
252         pages_to_flush[nb_pages_to_flush++] = addr;
253 }
254 
kqemu_flush(CPUState * env,int global)255 void kqemu_flush(CPUState *env, int global)
256 {
257 #ifdef DEBUG
258     if (loglevel & CPU_LOG_INT) {
259         fprintf(logfile, "kqemu_flush:\n");
260     }
261 #endif
262     nb_pages_to_flush = KQEMU_FLUSH_ALL;
263 }
264 
kqemu_set_notdirty(CPUState * env,ram_addr_t ram_addr)265 void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
266 {
267 #ifdef DEBUG
268     if (loglevel & CPU_LOG_INT) {
269         fprintf(logfile, "kqemu_set_notdirty: addr=%08lx\n",
270                 (unsigned long)ram_addr);
271     }
272 #endif
273     /* we only track transitions to dirty state */
274     if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
275         return;
276     if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
277         nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
278     else
279         ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
280 }
281 
kqemu_reset_modified_ram_pages(void)282 static void kqemu_reset_modified_ram_pages(void)
283 {
284     int i;
285     unsigned long page_index;
286 
287     for(i = 0; i < nb_modified_ram_pages; i++) {
288         page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
289         modified_ram_pages_table[page_index] = 0;
290     }
291     nb_modified_ram_pages = 0;
292 }
293 
kqemu_modify_page(CPUState * env,ram_addr_t ram_addr)294 void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
295 {
296     unsigned long page_index;
297     int ret;
298 #ifdef _WIN32
299     DWORD temp;
300 #endif
301 
302     page_index = ram_addr >> TARGET_PAGE_BITS;
303     if (!modified_ram_pages_table[page_index]) {
304 #if 0
305         printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
306 #endif
307         modified_ram_pages_table[page_index] = 1;
308         modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
309         if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
310             /* flush */
311 #ifdef _WIN32
312             ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
313                                   &nb_modified_ram_pages,
314                                   sizeof(nb_modified_ram_pages),
315                                   NULL, 0, &temp, NULL);
316 #else
317             ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
318                         &nb_modified_ram_pages);
319 #endif
320             kqemu_reset_modified_ram_pages();
321         }
322     }
323 }
324 
kqemu_set_phys_mem(uint64_t start_addr,ram_addr_t size,ram_addr_t phys_offset)325 void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size,
326                         ram_addr_t phys_offset)
327 {
328     struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
329     uint64_t end;
330     int ret, io_index;
331 
332     end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
333     start_addr &= TARGET_PAGE_MASK;
334     kphys_mem->phys_addr = start_addr;
335     kphys_mem->size = end - start_addr;
336     kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
337     io_index = phys_offset & ~TARGET_PAGE_MASK;
338     switch(io_index) {
339     case IO_MEM_RAM:
340         kphys_mem->io_index = KQEMU_IO_MEM_RAM;
341         break;
342     case IO_MEM_ROM:
343         kphys_mem->io_index = KQEMU_IO_MEM_ROM;
344         break;
345     default:
346         if (qpi_io_memory == io_index) {
347             kphys_mem->io_index = KQEMU_IO_MEM_COMM;
348         } else {
349             kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
350         }
351         break;
352     }
353 #ifdef _WIN32
354     {
355         DWORD temp;
356         ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM,
357                               kphys_mem, sizeof(*kphys_mem),
358                               NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
359     }
360 #else
361     ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
362 #endif
363     if (ret < 0) {
364         fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
365                 ret, start_addr,
366                 (unsigned long)size, (unsigned long)phys_offset);
367     }
368 }
369 
370 struct fpstate {
371     uint16_t fpuc;
372     uint16_t dummy1;
373     uint16_t fpus;
374     uint16_t dummy2;
375     uint16_t fptag;
376     uint16_t dummy3;
377 
378     uint32_t fpip;
379     uint32_t fpcs;
380     uint32_t fpoo;
381     uint32_t fpos;
382     uint8_t fpregs1[8 * 10];
383 };
384 
385 struct fpxstate {
386     uint16_t fpuc;
387     uint16_t fpus;
388     uint16_t fptag;
389     uint16_t fop;
390     uint32_t fpuip;
391     uint16_t cs_sel;
392     uint16_t dummy0;
393     uint32_t fpudp;
394     uint16_t ds_sel;
395     uint16_t dummy1;
396     uint32_t mxcsr;
397     uint32_t mxcsr_mask;
398     uint8_t fpregs1[8 * 16];
399     uint8_t xmm_regs[16 * 16];
400     uint8_t dummy2[96];
401 };
402 
403 static struct fpxstate fpx1 __attribute__((aligned(16)));
404 
restore_native_fp_frstor(CPUState * env)405 static void restore_native_fp_frstor(CPUState *env)
406 {
407     int fptag, i, j;
408     struct fpstate fp1, *fp = &fp1;
409 
410     fp->fpuc = env->fpuc;
411     fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
412     fptag = 0;
413     for (i=7; i>=0; i--) {
414 	fptag <<= 2;
415 	if (env->fptags[i]) {
416             fptag |= 3;
417         } else {
418             /* the FPU automatically computes it */
419         }
420     }
421     fp->fptag = fptag;
422     j = env->fpstt;
423     for(i = 0;i < 8; i++) {
424         memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
425         j = (j + 1) & 7;
426     }
427     asm volatile ("frstor %0" : "=m" (*fp));
428 }
429 
save_native_fp_fsave(CPUState * env)430 static void save_native_fp_fsave(CPUState *env)
431 {
432     int fptag, i, j;
433     uint16_t fpuc;
434     struct fpstate fp1, *fp = &fp1;
435 
436     asm volatile ("fsave %0" : : "m" (*fp));
437     env->fpuc = fp->fpuc;
438     env->fpstt = (fp->fpus >> 11) & 7;
439     env->fpus = fp->fpus & ~0x3800;
440     fptag = fp->fptag;
441     for(i = 0;i < 8; i++) {
442         env->fptags[i] = ((fptag & 3) == 3);
443         fptag >>= 2;
444     }
445     j = env->fpstt;
446     for(i = 0;i < 8; i++) {
447         memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
448         j = (j + 1) & 7;
449     }
450     /* we must restore the default rounding state */
451     fpuc = 0x037f | (env->fpuc & (3 << 10));
452     asm volatile("fldcw %0" : : "m" (fpuc));
453 }
454 
restore_native_fp_fxrstor(CPUState * env)455 static void restore_native_fp_fxrstor(CPUState *env)
456 {
457     struct fpxstate *fp = &fpx1;
458     int i, j, fptag;
459 
460     fp->fpuc = env->fpuc;
461     fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
462     fptag = 0;
463     for(i = 0; i < 8; i++)
464         fptag |= (env->fptags[i] << i);
465     fp->fptag = fptag ^ 0xff;
466 
467     j = env->fpstt;
468     for(i = 0;i < 8; i++) {
469         memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
470         j = (j + 1) & 7;
471     }
472     if (env->cpuid_features & CPUID_SSE) {
473         fp->mxcsr = env->mxcsr;
474         /* XXX: check if DAZ is not available */
475         fp->mxcsr_mask = 0xffff;
476         memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
477     }
478     asm volatile ("fxrstor %0" : "=m" (*fp));
479 }
480 
save_native_fp_fxsave(CPUState * env)481 static void save_native_fp_fxsave(CPUState *env)
482 {
483     struct fpxstate *fp = &fpx1;
484     int fptag, i, j;
485     uint16_t fpuc;
486 
487     asm volatile ("fxsave %0" : : "m" (*fp));
488     env->fpuc = fp->fpuc;
489     env->fpstt = (fp->fpus >> 11) & 7;
490     env->fpus = fp->fpus & ~0x3800;
491     fptag = fp->fptag ^ 0xff;
492     for(i = 0;i < 8; i++) {
493         env->fptags[i] = (fptag >> i) & 1;
494     }
495     j = env->fpstt;
496     for(i = 0;i < 8; i++) {
497         memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
498         j = (j + 1) & 7;
499     }
500     if (env->cpuid_features & CPUID_SSE) {
501         env->mxcsr = fp->mxcsr;
502         memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
503     }
504 
505     /* we must restore the default rounding state */
506     asm volatile ("fninit");
507     fpuc = 0x037f | (env->fpuc & (3 << 10));
508     asm volatile("fldcw %0" : : "m" (fpuc));
509 }
510 
do_syscall(CPUState * env,struct kqemu_cpu_state * kenv)511 static int do_syscall(CPUState *env,
512                       struct kqemu_cpu_state *kenv)
513 {
514     int selector;
515 
516     selector = (env->star >> 32) & 0xffff;
517 #ifdef TARGET_X86_64
518     if (env->hflags & HF_LMA_MASK) {
519         int code64;
520 
521         env->regs[R_ECX] = kenv->next_eip;
522         env->regs[11] = env->eflags;
523 
524         code64 = env->hflags & HF_CS64_MASK;
525 
526         cpu_x86_set_cpl(env, 0);
527         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
528                                0, 0xffffffff,
529                                DESC_G_MASK | DESC_P_MASK |
530                                DESC_S_MASK |
531                                DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
532         cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
533                                0, 0xffffffff,
534                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
535                                DESC_S_MASK |
536                                DESC_W_MASK | DESC_A_MASK);
537         env->eflags &= ~env->fmask;
538         if (code64)
539             env->eip = env->lstar;
540         else
541             env->eip = env->cstar;
542     } else
543 #endif
544     {
545         env->regs[R_ECX] = (uint32_t)kenv->next_eip;
546 
547         cpu_x86_set_cpl(env, 0);
548         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
549                            0, 0xffffffff,
550                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
551                                DESC_S_MASK |
552                                DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
553         cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
554                                0, 0xffffffff,
555                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
556                                DESC_S_MASK |
557                                DESC_W_MASK | DESC_A_MASK);
558         env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
559         env->eip = (uint32_t)env->star;
560     }
561     return 2;
562 }
563 
564 #ifdef CONFIG_PROFILER
565 
566 #define PC_REC_SIZE 1
567 #define PC_REC_HASH_BITS 16
568 #define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
569 
570 typedef struct PCRecord {
571     unsigned long pc;
572     int64_t count;
573     struct PCRecord *next;
574 } PCRecord;
575 
576 static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
577 static int nb_pc_records;
578 
kqemu_record_pc(unsigned long pc)579 static void kqemu_record_pc(unsigned long pc)
580 {
581     unsigned long h;
582     PCRecord **pr, *r;
583 
584     h = pc / PC_REC_SIZE;
585     h = h ^ (h >> PC_REC_HASH_BITS);
586     h &= (PC_REC_HASH_SIZE - 1);
587     pr = &pc_rec_hash[h];
588     for(;;) {
589         r = *pr;
590         if (r == NULL)
591             break;
592         if (r->pc == pc) {
593             r->count++;
594             return;
595         }
596         pr = &r->next;
597     }
598     r = malloc(sizeof(PCRecord));
599     r->count = 1;
600     r->pc = pc;
601     r->next = NULL;
602     *pr = r;
603     nb_pc_records++;
604 }
605 
pc_rec_cmp(const void * p1,const void * p2)606 static int pc_rec_cmp(const void *p1, const void *p2)
607 {
608     PCRecord *r1 = *(PCRecord **)p1;
609     PCRecord *r2 = *(PCRecord **)p2;
610     if (r1->count < r2->count)
611         return 1;
612     else if (r1->count == r2->count)
613         return 0;
614     else
615         return -1;
616 }
617 
kqemu_record_flush(void)618 static void kqemu_record_flush(void)
619 {
620     PCRecord *r, *r_next;
621     int h;
622 
623     for(h = 0; h < PC_REC_HASH_SIZE; h++) {
624         for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
625             r_next = r->next;
626             free(r);
627         }
628         pc_rec_hash[h] = NULL;
629     }
630     nb_pc_records = 0;
631 }
632 
kqemu_record_dump(void)633 void kqemu_record_dump(void)
634 {
635     PCRecord **pr, *r;
636     int i, h;
637     FILE *f;
638     int64_t total, sum;
639 
640     pr = malloc(sizeof(PCRecord *) * nb_pc_records);
641     i = 0;
642     total = 0;
643     for(h = 0; h < PC_REC_HASH_SIZE; h++) {
644         for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
645             pr[i++] = r;
646             total += r->count;
647         }
648     }
649     qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
650 
651     f = fopen("/tmp/kqemu.stats", "w");
652     if (!f) {
653         perror("/tmp/kqemu.stats");
654         exit(1);
655     }
656     fprintf(f, "total: %" PRId64 "\n", total);
657     sum = 0;
658     for(i = 0; i < nb_pc_records; i++) {
659         r = pr[i];
660         sum += r->count;
661         fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
662                 r->pc,
663                 r->count,
664                 (double)r->count / (double)total * 100.0,
665                 (double)sum / (double)total * 100.0);
666     }
667     fclose(f);
668     free(pr);
669 
670     kqemu_record_flush();
671 }
672 #endif
673 
kqemu_load_seg(struct kqemu_segment_cache * ksc,const SegmentCache * sc)674 static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
675                                   const SegmentCache *sc)
676 {
677     ksc->selector = sc->selector;
678     ksc->flags = sc->flags;
679     ksc->limit = sc->limit;
680     ksc->base = sc->base;
681 }
682 
kqemu_save_seg(SegmentCache * sc,const struct kqemu_segment_cache * ksc)683 static inline void kqemu_save_seg(SegmentCache *sc,
684                                   const struct kqemu_segment_cache *ksc)
685 {
686     sc->selector = ksc->selector;
687     sc->flags = ksc->flags;
688     sc->limit = ksc->limit;
689     sc->base = ksc->base;
690 }
691 
kqemu_cpu_exec(CPUState * env)692 int kqemu_cpu_exec(CPUState *env)
693 {
694     struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
695     int ret, cpl, i;
696 #ifdef CONFIG_PROFILER
697     int64_t ti;
698 #endif
699 #ifdef _WIN32
700     DWORD temp;
701 #endif
702 
703 #ifdef CONFIG_PROFILER
704     ti = profile_getclock();
705 #endif
706 #ifdef DEBUG
707     if (loglevel & CPU_LOG_INT) {
708         fprintf(logfile, "kqemu: cpu_exec: enter\n");
709         cpu_dump_state(env, logfile, fprintf, 0);
710     }
711 #endif
712     for(i = 0; i < CPU_NB_REGS; i++)
713         kenv->regs[i] = env->regs[i];
714     kenv->eip = env->eip;
715     kenv->eflags = env->eflags;
716     for(i = 0; i < 6; i++)
717         kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
718     kqemu_load_seg(&kenv->ldt, &env->ldt);
719     kqemu_load_seg(&kenv->tr, &env->tr);
720     kqemu_load_seg(&kenv->gdt, &env->gdt);
721     kqemu_load_seg(&kenv->idt, &env->idt);
722     kenv->cr0 = env->cr[0];
723     kenv->cr2 = env->cr[2];
724     kenv->cr3 = env->cr[3];
725     kenv->cr4 = env->cr[4];
726     kenv->a20_mask = env->a20_mask;
727     kenv->efer = env->efer;
728     kenv->tsc_offset = 0;
729     kenv->star = env->star;
730     kenv->sysenter_cs = env->sysenter_cs;
731     kenv->sysenter_esp = env->sysenter_esp;
732     kenv->sysenter_eip = env->sysenter_eip;
733 #ifdef TARGET_X86_64
734     kenv->lstar = env->lstar;
735     kenv->cstar = env->cstar;
736     kenv->fmask = env->fmask;
737     kenv->kernelgsbase = env->kernelgsbase;
738 #endif
739     if (env->dr[7] & 0xff) {
740         kenv->dr7 = env->dr[7];
741         kenv->dr0 = env->dr[0];
742         kenv->dr1 = env->dr[1];
743         kenv->dr2 = env->dr[2];
744         kenv->dr3 = env->dr[3];
745     } else {
746         kenv->dr7 = 0;
747     }
748     kenv->dr6 = env->dr[6];
749     cpl = (env->hflags & HF_CPL_MASK);
750     kenv->cpl = cpl;
751     kenv->nb_pages_to_flush = nb_pages_to_flush;
752     kenv->user_only = (env->kqemu_enabled == 1);
753     kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
754     nb_ram_pages_to_update = 0;
755     kenv->nb_modified_ram_pages = nb_modified_ram_pages;
756 
757     kqemu_reset_modified_ram_pages();
758 
759     if (env->cpuid_features & CPUID_FXSR)
760         restore_native_fp_fxrstor(env);
761     else
762         restore_native_fp_frstor(env);
763 
764 #ifdef _WIN32
765     if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
766                         kenv, sizeof(struct kqemu_cpu_state),
767                         kenv, sizeof(struct kqemu_cpu_state),
768                         &temp, NULL)) {
769         ret = kenv->retval;
770     } else {
771         ret = -1;
772     }
773 #else
774     ioctl(kqemu_fd, KQEMU_EXEC, kenv);
775     ret = kenv->retval;
776 #endif
777     if (env->cpuid_features & CPUID_FXSR)
778         save_native_fp_fxsave(env);
779     else
780         save_native_fp_fsave(env);
781 
782     for(i = 0; i < CPU_NB_REGS; i++)
783         env->regs[i] = kenv->regs[i];
784     env->eip = kenv->eip;
785     env->eflags = kenv->eflags;
786     for(i = 0; i < 6; i++)
787         kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
788     cpu_x86_set_cpl(env, kenv->cpl);
789     kqemu_save_seg(&env->ldt, &kenv->ldt);
790     env->cr[0] = kenv->cr0;
791     env->cr[4] = kenv->cr4;
792     env->cr[3] = kenv->cr3;
793     env->cr[2] = kenv->cr2;
794     env->dr[6] = kenv->dr6;
795 #ifdef TARGET_X86_64
796     env->kernelgsbase = kenv->kernelgsbase;
797 #endif
798 
799     /* flush pages as indicated by kqemu */
800     if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
801         tlb_flush(env, 1);
802     } else {
803         for(i = 0; i < kenv->nb_pages_to_flush; i++) {
804             tlb_flush_page(env, pages_to_flush[i]);
805         }
806     }
807     nb_pages_to_flush = 0;
808 
809 #ifdef CONFIG_PROFILER
810     kqemu_time += profile_getclock() - ti;
811     kqemu_exec_count++;
812 #endif
813 
814     if (kenv->nb_ram_pages_to_update > 0) {
815         cpu_tlb_update_dirty(env);
816     }
817 
818     if (kenv->nb_modified_ram_pages > 0) {
819         for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
820             unsigned long addr;
821             addr = modified_ram_pages[i];
822             tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
823         }
824     }
825 
826     /* restore the hidden flags */
827     {
828         unsigned int new_hflags;
829 #ifdef TARGET_X86_64
830         if ((env->hflags & HF_LMA_MASK) &&
831             (env->segs[R_CS].flags & DESC_L_MASK)) {
832             /* long mode */
833             new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
834         } else
835 #endif
836         {
837             /* legacy / compatibility case */
838             new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
839                 >> (DESC_B_SHIFT - HF_CS32_SHIFT);
840             new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
841                 >> (DESC_B_SHIFT - HF_SS32_SHIFT);
842             if (!(env->cr[0] & CR0_PE_MASK) ||
843                    (env->eflags & VM_MASK) ||
844                    !(env->hflags & HF_CS32_MASK)) {
845                 /* XXX: try to avoid this test. The problem comes from the
846                    fact that is real mode or vm86 mode we only modify the
847                    'base' and 'selector' fields of the segment cache to go
848                    faster. A solution may be to force addseg to one in
849                    translate-i386.c. */
850                 new_hflags |= HF_ADDSEG_MASK;
851             } else {
852                 new_hflags |= ((env->segs[R_DS].base |
853                                 env->segs[R_ES].base |
854                                 env->segs[R_SS].base) != 0) <<
855                     HF_ADDSEG_SHIFT;
856             }
857         }
858         env->hflags = (env->hflags &
859            ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
860             new_hflags;
861     }
862     /* update FPU flags */
863     env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
864         ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
865     if (env->cr[4] & CR4_OSFXSR_MASK)
866         env->hflags |= HF_OSFXSR_MASK;
867     else
868         env->hflags &= ~HF_OSFXSR_MASK;
869 
870 #ifdef DEBUG
871     if (loglevel & CPU_LOG_INT) {
872         fprintf(logfile, "kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
873     }
874 #endif
875     if (ret == KQEMU_RET_SYSCALL) {
876         /* syscall instruction */
877         return do_syscall(env, kenv);
878     } else
879     if ((ret & 0xff00) == KQEMU_RET_INT) {
880         env->exception_index = ret & 0xff;
881         env->error_code = 0;
882         env->exception_is_int = 1;
883         env->exception_next_eip = kenv->next_eip;
884 #ifdef CONFIG_PROFILER
885         kqemu_ret_int_count++;
886 #endif
887 #ifdef DEBUG
888         if (loglevel & CPU_LOG_INT) {
889             fprintf(logfile, "kqemu: interrupt v=%02x:\n",
890                     env->exception_index);
891             cpu_dump_state(env, logfile, fprintf, 0);
892         }
893 #endif
894         return 1;
895     } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
896         env->exception_index = ret & 0xff;
897         env->error_code = kenv->error_code;
898         env->exception_is_int = 0;
899         env->exception_next_eip = 0;
900 #ifdef CONFIG_PROFILER
901         kqemu_ret_excp_count++;
902 #endif
903 #ifdef DEBUG
904         if (loglevel & CPU_LOG_INT) {
905             fprintf(logfile, "kqemu: exception v=%02x e=%04x:\n",
906                     env->exception_index, env->error_code);
907             cpu_dump_state(env, logfile, fprintf, 0);
908         }
909 #endif
910         return 1;
911     } else if (ret == KQEMU_RET_INTR) {
912 #ifdef CONFIG_PROFILER
913         kqemu_ret_intr_count++;
914 #endif
915 #ifdef DEBUG
916         if (loglevel & CPU_LOG_INT) {
917             cpu_dump_state(env, logfile, fprintf, 0);
918         }
919 #endif
920         return 0;
921     } else if (ret == KQEMU_RET_SOFTMMU) {
922 #ifdef CONFIG_PROFILER
923         {
924             unsigned long pc = env->eip + env->segs[R_CS].base;
925             kqemu_record_pc(pc);
926         }
927 #endif
928 #ifdef DEBUG
929         if (loglevel & CPU_LOG_INT) {
930             cpu_dump_state(env, logfile, fprintf, 0);
931         }
932 #endif
933         return 2;
934     } else {
935         cpu_dump_state(env, stderr, fprintf, 0);
936         fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
937         exit(1);
938     }
939     return 0;
940 }
941 
kqemu_cpu_interrupt(CPUState * env)942 void kqemu_cpu_interrupt(CPUState *env)
943 {
944 #if defined(_WIN32)
945     /* cancelling the I/O request causes KQEMU to finish executing the
946        current block and successfully returning. */
947     CancelIo(kqemu_fd);
948 #endif
949 }
950 
951 /*
952    QEMU paravirtualization interface. The current interface only
953    allows to modify the IF and IOPL flags when running in
954    kqemu.
955 
956    At this point it is not very satisfactory. I leave it for reference
957    as it adds little complexity.
958 */
959 
960 #define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
961 
qpi_mem_readb(void * opaque,target_phys_addr_t addr)962 static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
963 {
964     return 0;
965 }
966 
qpi_mem_readw(void * opaque,target_phys_addr_t addr)967 static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
968 {
969     return 0;
970 }
971 
qpi_mem_writeb(void * opaque,target_phys_addr_t addr,uint32_t val)972 static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
973 {
974 }
975 
qpi_mem_writew(void * opaque,target_phys_addr_t addr,uint32_t val)976 static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
977 {
978 }
979 
qpi_mem_readl(void * opaque,target_phys_addr_t addr)980 static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
981 {
982     CPUState *env;
983 
984     env = cpu_single_env;
985     if (!env)
986         return 0;
987     return env->eflags & (IF_MASK | IOPL_MASK);
988 }
989 
990 /* Note: after writing to this address, the guest code must make sure
991    it is exiting the current TB. pushf/popf can be used for that
992    purpose. */
qpi_mem_writel(void * opaque,target_phys_addr_t addr,uint32_t val)993 static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
994 {
995     CPUState *env;
996 
997     env = cpu_single_env;
998     if (!env)
999         return;
1000     env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) |
1001         (val & (IF_MASK | IOPL_MASK));
1002 }
1003 
1004 static CPUReadMemoryFunc *qpi_mem_read[3] = {
1005     qpi_mem_readb,
1006     qpi_mem_readw,
1007     qpi_mem_readl,
1008 };
1009 
1010 static CPUWriteMemoryFunc *qpi_mem_write[3] = {
1011     qpi_mem_writeb,
1012     qpi_mem_writew,
1013     qpi_mem_writel,
1014 };
1015 
qpi_init(void)1016 static void qpi_init(void)
1017 {
1018     kqemu_comm_base = 0xff000000 | 1;
1019     qpi_io_memory = cpu_register_io_memory(0,
1020                                            qpi_mem_read,
1021                                            qpi_mem_write, NULL);
1022     cpu_register_physical_memory(kqemu_comm_base & ~0xfff,
1023                                  0x1000, qpi_io_memory);
1024 }
1025 #endif
1026