--- zzzz-none-000/linux-3.10.107/drivers/lguest/x86/core.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/lguest/x86/core.c 2021-02-04 17:41:59.000000000 +0000 @@ -46,7 +46,8 @@ #include #include #include -#include +#include +#include #include "../lg.h" static int cpu_had_pge; @@ -157,7 +158,7 @@ * stack, then the address of this call. This stack layout happens to * exactly match the stack layout created by an interrupt... */ - asm volatile("pushf; lcall *lguest_entry" + asm volatile("pushf; lcall *%4" /* * This is how we tell GCC that %eax ("a") and %ebx ("b") * are changed by this routine. The "=" means output. @@ -169,7 +170,9 @@ * physical address of the Guest's top-level page * directory. */ - : "0"(pages), "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)) + : "0"(pages), + "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)), + "m"(lguest_entry) /* * We tell gcc that all these registers could change, * which means we don't have to save and restore them in @@ -179,6 +182,52 @@ } /*:*/ +unsigned long *lguest_arch_regptr(struct lg_cpu *cpu, size_t reg_off, bool any) +{ + switch (reg_off) { + case offsetof(struct pt_regs, bx): + return &cpu->regs->ebx; + case offsetof(struct pt_regs, cx): + return &cpu->regs->ecx; + case offsetof(struct pt_regs, dx): + return &cpu->regs->edx; + case offsetof(struct pt_regs, si): + return &cpu->regs->esi; + case offsetof(struct pt_regs, di): + return &cpu->regs->edi; + case offsetof(struct pt_regs, bp): + return &cpu->regs->ebp; + case offsetof(struct pt_regs, ax): + return &cpu->regs->eax; + case offsetof(struct pt_regs, ip): + return &cpu->regs->eip; + case offsetof(struct pt_regs, sp): + return &cpu->regs->esp; + } + + /* Launcher can read these, but we don't allow any setting. */ + if (any) { + switch (reg_off) { + case offsetof(struct pt_regs, ds): + return &cpu->regs->ds; + case offsetof(struct pt_regs, es): + return &cpu->regs->es; + case offsetof(struct pt_regs, fs): + return &cpu->regs->fs; + case offsetof(struct pt_regs, gs): + return &cpu->regs->gs; + case offsetof(struct pt_regs, cs): + return &cpu->regs->cs; + case offsetof(struct pt_regs, flags): + return &cpu->regs->eflags; + case offsetof(struct pt_regs, ss): + return &cpu->regs->ss; + } + } + + return NULL; +} + /*M:002 * There are hooks in the scheduler which we can register to tell when we * get kicked off the CPU (preempt_notifier_register()). This would allow us @@ -202,7 +251,7 @@ * we set it now, so we can trap and pass that trap to the Guest if it * uses the FPU. */ - if (cpu->ts && user_has_fpu()) + if (cpu->ts && fpregs_active()) stts(); /* @@ -234,7 +283,7 @@ wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); /* Clear the host TS bit if it was set above. */ - if (cpu->ts && user_has_fpu()) + if (cpu->ts && fpregs_active()) clts(); /* @@ -248,12 +297,12 @@ /* * Similarly, if we took a trap because the Guest used the FPU, * we have to restore the FPU it expects to see. - * math_state_restore() may sleep and we may even move off to + * fpu__restore() may sleep and we may even move off to * a different CPU. So all the critical stuff should be done * before this. */ - else if (cpu->regs->trapnum == 7 && !user_has_fpu()) - math_state_restore(); + else if (cpu->regs->trapnum == 7 && !fpregs_active()) + fpu__restore(¤t->thread.fpu); } /*H:130 @@ -266,110 +315,73 @@ * usually attached to a PC. * * When the Guest uses one of these instructions, we get a trap (General - * Protection Fault) and come here. We see if it's one of those troublesome - * instructions and skip over it. We return true if we did. + * Protection Fault) and come here. We queue this to be sent out to the + * Launcher to handle. */ -static int emulate_insn(struct lg_cpu *cpu) -{ - u8 insn; - unsigned int insnlen = 0, in = 0, small_operand = 0; - /* - * The eip contains the *virtual* address of the Guest's instruction: - * walk the Guest's page tables to find the "physical" address. - */ - unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); - /* - * This must be the Guest kernel trying to do something, not userspace! - * The bottom two bits of the CS segment register are the privilege - * level. - */ - if ((cpu->regs->cs & 3) != GUEST_PL) - return 0; +/* + * The eip contains the *virtual* address of the Guest's instruction: + * we copy the instruction here so the Launcher doesn't have to walk + * the page tables to decode it. We handle the case (eg. in a kernel + * module) where the instruction is over two pages, and the pages are + * virtually but not physically contiguous. + * + * The longest possible x86 instruction is 15 bytes, but we don't handle + * anything that strange. + */ +static void copy_from_guest(struct lg_cpu *cpu, + void *dst, unsigned long vaddr, size_t len) +{ + size_t to_page_end = PAGE_SIZE - (vaddr % PAGE_SIZE); + unsigned long paddr; - /* Decoding x86 instructions is icky. */ - insn = lgread(cpu, physaddr, u8); + BUG_ON(len > PAGE_SIZE); - /* - * Around 2.6.33, the kernel started using an emulation for the - * cmpxchg8b instruction in early boot on many configurations. This - * code isn't paravirtualized, and it tries to disable interrupts. - * Ignore it, which will Mostly Work. - */ - if (insn == 0xfa) { - /* "cli", or Clear Interrupt Enable instruction. Skip it. */ - cpu->regs->eip++; - return 1; + /* If it goes over a page, copy in two parts. */ + if (len > to_page_end) { + /* But make sure the next page is mapped! */ + if (__guest_pa(cpu, vaddr + to_page_end, &paddr)) + copy_from_guest(cpu, dst + to_page_end, + vaddr + to_page_end, + len - to_page_end); + else + /* Otherwise fill with zeroes. */ + memset(dst + to_page_end, 0, len - to_page_end); + len = to_page_end; } - /* - * 0x66 is an "operand prefix". It means a 16, not 32 bit in/out. - */ - if (insn == 0x66) { - small_operand = 1; - /* The instruction is 1 byte so far, read the next byte. */ - insnlen = 1; - insn = lgread(cpu, physaddr + insnlen, u8); - } + /* This will kill the guest if it isn't mapped, but that + * shouldn't happen. */ + __lgread(cpu, dst, guest_pa(cpu, vaddr), len); +} - /* - * We can ignore the lower bit for the moment and decode the 4 opcodes - * we need to emulate. - */ - switch (insn & 0xFE) { - case 0xE4: /* in ,%al */ - insnlen += 2; - in = 1; - break; - case 0xEC: /* in (%dx),%al */ - insnlen += 1; - in = 1; - break; - case 0xE6: /* out %al, */ - insnlen += 2; - break; - case 0xEE: /* out %al,(%dx) */ - insnlen += 1; - break; - default: - /* OK, we don't know what this is, can't emulate. */ - return 0; - } - /* - * If it was an "IN" instruction, they expect the result to be read - * into %eax, so we change %eax. We always return all-ones, which - * traditionally means "there's nothing there". - */ - if (in) { - /* Lower bit tells means it's a 32/16 bit access */ - if (insn & 0x1) { - if (small_operand) - cpu->regs->eax |= 0xFFFF; - else - cpu->regs->eax = 0xFFFFFFFF; - } else - cpu->regs->eax |= 0xFF; - } - /* Finally, we've "done" the instruction, so move past it. */ - cpu->regs->eip += insnlen; - /* Success! */ - return 1; +static void setup_emulate_insn(struct lg_cpu *cpu) +{ + cpu->pending.trap = 13; + copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip, + sizeof(cpu->pending.insn)); +} + +static void setup_iomem_insn(struct lg_cpu *cpu, unsigned long iomem_addr) +{ + cpu->pending.trap = 14; + cpu->pending.addr = iomem_addr; + copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip, + sizeof(cpu->pending.insn)); } /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ void lguest_arch_handle_trap(struct lg_cpu *cpu) { + unsigned long iomem_addr; + switch (cpu->regs->trapnum) { case 13: /* We've intercepted a General Protection Fault. */ - /* - * Check if this was one of those annoying IN or OUT - * instructions which we need to emulate. If so, we just go - * back into the Guest after we've done it. - */ + /* Hand to Launcher to emulate those pesky IN and OUT insns */ if (cpu->regs->errcode == 0) { - if (emulate_insn(cpu)) - return; + setup_emulate_insn(cpu); + return; } break; case 14: /* We've intercepted a Page Fault. */ @@ -384,9 +396,16 @@ * whether kernel or userspace code. */ if (demand_page(cpu, cpu->arch.last_pagefault, - cpu->regs->errcode)) + cpu->regs->errcode, &iomem_addr)) return; + /* Was this an access to memory mapped IO? */ + if (iomem_addr) { + /* Tell Launcher, let it handle it. */ + setup_iomem_insn(cpu, iomem_addr); + return; + } + /* * OK, it's really not there (or not OK): the Guest needs to * know. We write out the cr2 value so it knows where the @@ -450,9 +469,9 @@ static void adjust_pge(void *on) { if (on) - write_cr4(read_cr4() | X86_CR4_PGE); + cr4_set_bits(X86_CR4_PGE); else - write_cr4(read_cr4() & ~X86_CR4_PGE); + cr4_clear_bits(X86_CR4_PGE); } /*H:020