X-Git-Url: http://vcs.maemo.org/git/?a=blobdiff_plain;f=kqemu.c;h=5ba314f42bf25a4f094dc611e3d83704e9cfeb22;hb=768a4a36a444ef5aef1f103adf42553eadfe4614;hp=4d152502e7f8cbe3de44896e45f2c7f3c565376d;hpb=c45b3c0e1b5125d1588c988dca2174bf3cfa5287;p=qemu diff --git a/kqemu.c b/kqemu.c index 4d15250..5ba314f 100644 --- a/kqemu.c +++ b/kqemu.c @@ -40,15 +40,23 @@ #ifdef USE_KQEMU #define DEBUG +//#define PROFILE #include #include -#include "kqemu/kqemu.h" +#include "kqemu.h" /* compatibility stuff */ #ifndef KQEMU_RET_SYSCALL #define KQEMU_RET_SYSCALL 0x0300 /* syscall insn */ #endif +#ifndef KQEMU_MAX_RAM_PAGES_TO_UPDATE +#define KQEMU_MAX_RAM_PAGES_TO_UPDATE 512 +#define KQEMU_RAM_PAGES_UPDATE_ALL (KQEMU_MAX_RAM_PAGES_TO_UPDATE + 1) +#endif +#ifndef KQEMU_MAX_MODIFIED_RAM_PAGES +#define KQEMU_MAX_MODIFIED_RAM_PAGES 512 +#endif #ifdef _WIN32 #define KQEMU_DEVICE "\\\\.\\kqemu" @@ -66,9 +74,18 @@ int kqemu_fd = KQEMU_INVALID_FD; #define kqemu_closefd(x) close(x) #endif +/* 0 = not allowed + 1 = user kqemu + 2 = kernel kqemu +*/ int kqemu_allowed = 1; unsigned long *pages_to_flush; unsigned int nb_pages_to_flush; +unsigned long *ram_pages_to_update; +unsigned int nb_ram_pages_to_update; +unsigned long *modified_ram_pages; +unsigned int nb_modified_ram_pages; +uint8_t *modified_ram_pages_table; extern uint32_t **l1_phys_map; #define cpuid(index, eax, ebx, ecx, edx) \ @@ -102,7 +119,7 @@ static int is_cpuid_supported(void) static void kqemu_update_cpuid(CPUState *env) { - int critical_features_mask, features; + int critical_features_mask, features, ext_features, ext_features_mask; uint32_t eax, ebx, ecx, edx; /* the following features are kept identical on the host and @@ -112,15 +129,26 @@ static void kqemu_update_cpuid(CPUState *env) critical_features_mask = CPUID_CMOV | CPUID_CX8 | CPUID_FXSR | CPUID_MMX | CPUID_SSE | - CPUID_SSE2; + CPUID_SSE2 | CPUID_SEP; + ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR; if (!is_cpuid_supported()) { features = 0; + ext_features = 0; } else { cpuid(1, eax, ebx, ecx, edx); features = edx; + ext_features = ecx; } +#ifdef __x86_64__ + /* NOTE: on x86_64 CPUs, SYSENTER is not supported in + compatibility mode, so in order to have the best performances + it is better not to use it */ + features &= ~CPUID_SEP; +#endif env->cpuid_features = (env->cpuid_features & ~critical_features_mask) | (features & critical_features_mask); + env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) | + (ext_features & ext_features_mask); /* XXX: we could update more of the target CPUID state so that the non accelerated code sees exactly the same CPU features as the accelerated code */ @@ -167,11 +195,30 @@ int kqemu_init(CPUState *env) if (!pages_to_flush) goto fail; + ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE * + sizeof(unsigned long)); + if (!ram_pages_to_update) + goto fail; + + modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES * + sizeof(unsigned long)); + if (!modified_ram_pages) + goto fail; + modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS); + if (!modified_ram_pages_table) + goto fail; + init.ram_base = phys_ram_base; init.ram_size = phys_ram_size; init.ram_dirty = phys_ram_dirty; init.phys_to_ram_map = l1_phys_map; init.pages_to_flush = pages_to_flush; +#if KQEMU_VERSION >= 0x010200 + init.ram_pages_to_update = ram_pages_to_update; +#endif +#if KQEMU_VERSION >= 0x010300 + init.modified_ram_pages = modified_ram_pages; +#endif #ifdef _WIN32 ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &init, sizeof(init), NULL, 0, &temp, NULL) == TRUE ? 0 : -1; @@ -186,14 +233,15 @@ int kqemu_init(CPUState *env) return -1; } kqemu_update_cpuid(env); - env->kqemu_enabled = 1; + env->kqemu_enabled = kqemu_allowed; nb_pages_to_flush = 0; + nb_ram_pages_to_update = 0; return 0; } void kqemu_flush_page(CPUState *env, target_ulong addr) { -#ifdef DEBUG +#if defined(DEBUG) if (loglevel & CPU_LOG_INT) { fprintf(logfile, "kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr); } @@ -214,6 +262,65 @@ void kqemu_flush(CPUState *env, int global) nb_pages_to_flush = KQEMU_FLUSH_ALL; } +void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr) +{ +#ifdef DEBUG + if (loglevel & CPU_LOG_INT) { + fprintf(logfile, "kqemu_set_notdirty: addr=%08lx\n", ram_addr); + } +#endif + /* we only track transitions to dirty state */ + if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff) + return; + if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE) + nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL; + else + ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr; +} + +static void kqemu_reset_modified_ram_pages(void) +{ + int i; + unsigned long page_index; + + for(i = 0; i < nb_modified_ram_pages; i++) { + page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS; + modified_ram_pages_table[page_index] = 0; + } + nb_modified_ram_pages = 0; +} + +void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr) +{ + unsigned long page_index; + int ret; +#ifdef _WIN32 + DWORD temp; +#endif + + page_index = ram_addr >> TARGET_PAGE_BITS; + if (!modified_ram_pages_table[page_index]) { +#if 0 + printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr); +#endif + modified_ram_pages_table[page_index] = 1; + modified_ram_pages[nb_modified_ram_pages++] = ram_addr; + if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) { + /* flush */ +#ifdef _WIN32 + ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES, + &nb_modified_ram_pages, + sizeof(nb_modified_ram_pages), + NULL, 0, &temp, NULL); +#else + ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES, + &nb_modified_ram_pages); +#endif + kqemu_reset_modified_ram_pages(); + } + } +} + struct fpstate { uint16_t fpuc; uint16_t dummy1; @@ -363,9 +470,13 @@ static int do_syscall(CPUState *env, selector = (env->star >> 32) & 0xffff; #ifdef __x86_64__ if (env->hflags & HF_LMA_MASK) { + int code64; + env->regs[R_ECX] = kenv->next_eip; env->regs[11] = env->eflags; + code64 = env->hflags & HF_CS64_MASK; + cpu_x86_set_cpl(env, 0); cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, 0, 0xffffffff, @@ -378,7 +489,7 @@ static int do_syscall(CPUState *env, DESC_S_MASK | DESC_W_MASK | DESC_A_MASK); env->eflags &= ~env->fmask; - if (env->hflags & HF_CS64_MASK) + if (code64) env->eip = env->lstar; else env->eip = env->cstar; @@ -404,14 +515,131 @@ static int do_syscall(CPUState *env, return 2; } +#ifdef CONFIG_PROFILER + +#define PC_REC_SIZE 1 +#define PC_REC_HASH_BITS 16 +#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS) + +typedef struct PCRecord { + unsigned long pc; + int64_t count; + struct PCRecord *next; +} PCRecord; + +static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE]; +static int nb_pc_records; + +static void kqemu_record_pc(unsigned long pc) +{ + unsigned long h; + PCRecord **pr, *r; + + h = pc / PC_REC_SIZE; + h = h ^ (h >> PC_REC_HASH_BITS); + h &= (PC_REC_HASH_SIZE - 1); + pr = &pc_rec_hash[h]; + for(;;) { + r = *pr; + if (r == NULL) + break; + if (r->pc == pc) { + r->count++; + return; + } + pr = &r->next; + } + r = malloc(sizeof(PCRecord)); + r->count = 1; + r->pc = pc; + r->next = NULL; + *pr = r; + nb_pc_records++; +} + +static int pc_rec_cmp(const void *p1, const void *p2) +{ + PCRecord *r1 = *(PCRecord **)p1; + PCRecord *r2 = *(PCRecord **)p2; + if (r1->count < r2->count) + return 1; + else if (r1->count == r2->count) + return 0; + else + return -1; +} + +static void kqemu_record_flush(void) +{ + PCRecord *r, *r_next; + int h; + + for(h = 0; h < PC_REC_HASH_SIZE; h++) { + for(r = pc_rec_hash[h]; r != NULL; r = r_next) { + r_next = r->next; + free(r); + } + pc_rec_hash[h] = NULL; + } + nb_pc_records = 0; +} + +void kqemu_record_dump(void) +{ + PCRecord **pr, *r; + int i, h; + FILE *f; + int64_t total, sum; + + pr = malloc(sizeof(PCRecord *) * nb_pc_records); + i = 0; + total = 0; + for(h = 0; h < PC_REC_HASH_SIZE; h++) { + for(r = pc_rec_hash[h]; r != NULL; r = r->next) { + pr[i++] = r; + total += r->count; + } + } + qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp); + + f = fopen("/tmp/kqemu.stats", "w"); + if (!f) { + perror("/tmp/kqemu.stats"); + exit(1); + } + fprintf(f, "total: %" PRId64 "\n", total); + sum = 0; + for(i = 0; i < nb_pc_records; i++) { + r = pr[i]; + sum += r->count; + fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n", + r->pc, + r->count, + (double)r->count / (double)total * 100.0, + (double)sum / (double)total * 100.0); + } + fclose(f); + free(pr); + + kqemu_record_flush(); +} +#endif + int kqemu_cpu_exec(CPUState *env) { struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state; - int ret; + int ret, cpl, i; +#ifdef CONFIG_PROFILER + int64_t ti; +#endif + #ifdef _WIN32 DWORD temp; #endif +#ifdef CONFIG_PROFILER + ti = profile_getclock(); +#endif #ifdef DEBUG if (loglevel & CPU_LOG_INT) { fprintf(logfile, "kqemu: cpu_exec: enter\n"); @@ -434,6 +662,19 @@ int kqemu_cpu_exec(CPUState *env) #if KQEMU_VERSION >= 0x010100 kenv->efer = env->efer; #endif +#if KQEMU_VERSION >= 0x010300 + kenv->tsc_offset = 0; + kenv->star = env->star; + kenv->sysenter_cs = env->sysenter_cs; + kenv->sysenter_esp = env->sysenter_esp; + kenv->sysenter_eip = env->sysenter_eip; +#ifdef __x86_64__ + kenv->lstar = env->lstar; + kenv->cstar = env->cstar; + kenv->fmask = env->fmask; + kenv->kernelgsbase = env->kernelgsbase; +#endif +#endif if (env->dr[7] & 0xff) { kenv->dr7 = env->dr[7]; kenv->dr0 = env->dr[0]; @@ -444,23 +685,34 @@ int kqemu_cpu_exec(CPUState *env) kenv->dr7 = 0; } kenv->dr6 = env->dr[6]; - kenv->cpl = 3; + cpl = (env->hflags & HF_CPL_MASK); + kenv->cpl = cpl; kenv->nb_pages_to_flush = nb_pages_to_flush; - nb_pages_to_flush = 0; +#if KQEMU_VERSION >= 0x010200 + kenv->user_only = (env->kqemu_enabled == 1); + kenv->nb_ram_pages_to_update = nb_ram_pages_to_update; +#endif + nb_ram_pages_to_update = 0; - if (!(kenv->cr0 & CR0_TS_MASK)) { - if (env->cpuid_features & CPUID_FXSR) - restore_native_fp_fxrstor(env); - else - restore_native_fp_frstor(env); - } +#if KQEMU_VERSION >= 0x010300 + kenv->nb_modified_ram_pages = nb_modified_ram_pages; +#endif + kqemu_reset_modified_ram_pages(); + + if (env->cpuid_features & CPUID_FXSR) + restore_native_fp_fxrstor(env); + else + restore_native_fp_frstor(env); #ifdef _WIN32 - DeviceIoControl(kqemu_fd, KQEMU_EXEC, - kenv, sizeof(struct kqemu_cpu_state), - kenv, sizeof(struct kqemu_cpu_state), - &temp, NULL); - ret = kenv->retval; + if (DeviceIoControl(kqemu_fd, KQEMU_EXEC, + kenv, sizeof(struct kqemu_cpu_state), + kenv, sizeof(struct kqemu_cpu_state), + &temp, NULL)) { + ret = kenv->retval; + } else { + ret = -1; + } #else #if KQEMU_VERSION >= 0x010100 ioctl(kqemu_fd, KQEMU_EXEC, kenv); @@ -469,31 +721,110 @@ int kqemu_cpu_exec(CPUState *env) ret = ioctl(kqemu_fd, KQEMU_EXEC, kenv); #endif #endif - if (!(kenv->cr0 & CR0_TS_MASK)) { - if (env->cpuid_features & CPUID_FXSR) - save_native_fp_fxsave(env); - else - save_native_fp_fsave(env); - } + if (env->cpuid_features & CPUID_FXSR) + save_native_fp_fxsave(env); + else + save_native_fp_fsave(env); memcpy(env->regs, kenv->regs, sizeof(env->regs)); env->eip = kenv->eip; env->eflags = kenv->eflags; memcpy(env->segs, kenv->segs, sizeof(env->segs)); + cpu_x86_set_cpl(env, kenv->cpl); + memcpy(&env->ldt, &kenv->ldt, sizeof(env->ldt)); #if 0 /* no need to restore that */ - memcpy(env->ldt, kenv->ldt, sizeof(env->ldt)); memcpy(env->tr, kenv->tr, sizeof(env->tr)); memcpy(env->gdt, kenv->gdt, sizeof(env->gdt)); memcpy(env->idt, kenv->idt, sizeof(env->idt)); - env->cr[0] = kenv->cr0; - env->cr[3] = kenv->cr3; - env->cr[4] = kenv->cr4; env->a20_mask = kenv->a20_mask; #endif + env->cr[0] = kenv->cr0; + env->cr[4] = kenv->cr4; + env->cr[3] = kenv->cr3; env->cr[2] = kenv->cr2; env->dr[6] = kenv->dr6; +#if KQEMU_VERSION >= 0x010300 +#ifdef __x86_64__ + env->kernelgsbase = kenv->kernelgsbase; +#endif +#endif + /* flush pages as indicated by kqemu */ + if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) { + tlb_flush(env, 1); + } else { + for(i = 0; i < kenv->nb_pages_to_flush; i++) { + tlb_flush_page(env, pages_to_flush[i]); + } + } + nb_pages_to_flush = 0; + +#ifdef CONFIG_PROFILER + kqemu_time += profile_getclock() - ti; + kqemu_exec_count++; +#endif + +#if KQEMU_VERSION >= 0x010200 + if (kenv->nb_ram_pages_to_update > 0) { + cpu_tlb_update_dirty(env); + } +#endif + +#if KQEMU_VERSION >= 0x010300 + if (kenv->nb_modified_ram_pages > 0) { + for(i = 0; i < kenv->nb_modified_ram_pages; i++) { + unsigned long addr; + addr = modified_ram_pages[i]; + tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0); + } + } +#endif + + /* restore the hidden flags */ + { + unsigned int new_hflags; +#ifdef TARGET_X86_64 + if ((env->hflags & HF_LMA_MASK) && + (env->segs[R_CS].flags & DESC_L_MASK)) { + /* long mode */ + new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK; + } else +#endif + { + /* legacy / compatibility case */ + new_hflags = (env->segs[R_CS].flags & DESC_B_MASK) + >> (DESC_B_SHIFT - HF_CS32_SHIFT); + new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK) + >> (DESC_B_SHIFT - HF_SS32_SHIFT); + if (!(env->cr[0] & CR0_PE_MASK) || + (env->eflags & VM_MASK) || + !(env->hflags & HF_CS32_MASK)) { + /* XXX: try to avoid this test. The problem comes from the + fact that is real mode or vm86 mode we only modify the + 'base' and 'selector' fields of the segment cache to go + faster. A solution may be to force addseg to one in + translate-i386.c. */ + new_hflags |= HF_ADDSEG_MASK; + } else { + new_hflags |= ((env->segs[R_DS].base | + env->segs[R_ES].base | + env->segs[R_SS].base) != 0) << + HF_ADDSEG_SHIFT; + } + } + env->hflags = (env->hflags & + ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) | + new_hflags; + } + /* update FPU flags */ + env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) | + ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)); + if (env->cr[4] & CR4_OSFXSR_MASK) + env->hflags |= HF_OSFXSR_MASK; + else + env->hflags &= ~HF_OSFXSR_MASK; + #ifdef DEBUG if (loglevel & CPU_LOG_INT) { fprintf(logfile, "kqemu: kqemu_cpu_exec: ret=0x%x\n", ret); @@ -508,6 +839,9 @@ int kqemu_cpu_exec(CPUState *env) env->error_code = 0; env->exception_is_int = 1; env->exception_next_eip = kenv->next_eip; +#ifdef CONFIG_PROFILER + kqemu_ret_int_count++; +#endif #ifdef DEBUG if (loglevel & CPU_LOG_INT) { fprintf(logfile, "kqemu: interrupt v=%02x:\n", @@ -521,6 +855,9 @@ int kqemu_cpu_exec(CPUState *env) env->error_code = kenv->error_code; env->exception_is_int = 0; env->exception_next_eip = 0; +#ifdef CONFIG_PROFILER + kqemu_ret_excp_count++; +#endif #ifdef DEBUG if (loglevel & CPU_LOG_INT) { fprintf(logfile, "kqemu: exception v=%02x e=%04x:\n", @@ -530,6 +867,9 @@ int kqemu_cpu_exec(CPUState *env) #endif return 1; } else if (ret == KQEMU_RET_INTR) { +#ifdef CONFIG_PROFILER + kqemu_ret_intr_count++; +#endif #ifdef DEBUG if (loglevel & CPU_LOG_INT) { cpu_dump_state(env, logfile, fprintf, 0); @@ -537,6 +877,17 @@ int kqemu_cpu_exec(CPUState *env) #endif return 0; } else if (ret == KQEMU_RET_SOFTMMU) { +#ifdef CONFIG_PROFILER + { + unsigned long pc = env->eip + env->segs[R_CS].base; + kqemu_record_pc(pc); + } +#endif +#ifdef DEBUG + if (loglevel & CPU_LOG_INT) { + cpu_dump_state(env, logfile, fprintf, 0); + } +#endif return 2; } else { cpu_dump_state(env, stderr, fprintf, 0); @@ -546,4 +897,13 @@ int kqemu_cpu_exec(CPUState *env) return 0; } +void kqemu_cpu_interrupt(CPUState *env) +{ +#if defined(_WIN32) && KQEMU_VERSION >= 0x010101 + /* cancelling the I/O request causes KQEMU to finish executing the + current block and successfully returning. */ + CancelIo(kqemu_fd); +#endif +} + #endif