--- zzzz-none-000/linux-2.6.19.2/arch/i386/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ davinci-8020-5505/linux-2.6.19.2/arch/i386/mm/fault.c 2007-01-19 14:42:56.000000000 +0000 @@ -22,6 +22,9 @@ #include #include #include +#include +#include +#include #include #include @@ -104,7 +107,8 @@ { unsigned long eip = regs->eip; unsigned seg = regs->xcs & 0xffff; - u32 seg_ar, seg_limit, base, *desc; + u32 seg_ar, seg_limit, base; + struct desc_struct *desc; /* Unlikely, but must come before segment checks. */ if (unlikely(regs->eflags & VM_MASK)) { @@ -118,7 +122,7 @@ /* By far the most common cases. */ if (likely(SEGMENT_IS_FLAT_CODE(seg))) - return eip; + return eip + (seg == __KERNEL_CS ? __KERNEL_TEXT_OFFSET : 0); /* Check the segment exists, is within the current LDT/GDT size, that kernel/user (ring 0..3) has the appropriate privilege, @@ -136,16 +140,14 @@ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ down(¤t->mm->context.sem); - desc = current->mm->context.ldt; - desc = (void *)desc + (seg & ~7); + desc = ¤t->mm->context.ldt[seg >> 3]; } else { /* Must disable preemption while reading the GDT. */ - desc = (u32 *)get_cpu_gdt_table(get_cpu()); - desc = (void *)desc + (seg & ~7); + desc = &get_cpu_gdt_table(get_cpu())[seg >> 3]; } /* Decode the code segment base from the descriptor */ - base = get_desc_base((unsigned long *)desc); + base = get_desc_base(desc); if (seg & (1<<2)) { up(¤t->mm->context.sem); @@ -246,6 +248,30 @@ fastcall void do_invalid_op(struct pt_regs *, unsigned long); +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +static int pax_handle_fetch_fault(struct pt_regs *regs); +#endif + +#ifdef CONFIG_PAX_PAGEEXEC +static inline pmd_t * pax_get_pmd(struct mm_struct *mm, unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + return NULL; + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return NULL; + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return NULL; + return pmd; +} +#endif + static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) { unsigned index = pgd_index(address); @@ -326,14 +352,20 @@ struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; - unsigned long address; - unsigned long page; int write, si_code; +#ifdef CONFIG_PAX_PAGEEXEC + pmd_t *pmd; + pte_t *pte; + spinlock_t *ptl; + unsigned char pte_mask; +#endif + /* get the address */ - address = read_cr2(); + const unsigned long address = read_cr2(); tsk = current; + mm = tsk->mm; si_code = SEGV_MAPERR; @@ -372,14 +404,12 @@ if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) local_irq_enable(); - mm = tsk->mm; - /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault.. */ if (in_atomic() || !mm) - goto bad_area_nosemaphore; + goto bad_area_nopax; /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -399,10 +429,101 @@ if (!down_read_trylock(&mm->mmap_sem)) { if ((error_code & 4) == 0 && !search_exception_tables(regs->eip)) - goto bad_area_nosemaphore; + goto bad_area_nopax; down_read(&mm->mmap_sem); } +#ifdef CONFIG_PAX_PAGEEXEC + if (unlikely((error_code & 5) != 5 || + (regs->eflags & X86_EFLAGS_VM) || + !(mm->pax_flags & MF_PAX_PAGEEXEC))) + goto not_pax_fault; + + /* PaX: it's our fault, let's handle it if we can */ + + /* PaX: take a look at read faults before acquiring any locks */ + if (unlikely(!(error_code & 2) && (regs->eip == address))) { + /* instruction fetch attempt from a protected page in user mode */ + up_read(&mm->mmap_sem); + +#ifdef CONFIG_PAX_EMUTRAMP + switch (pax_handle_fetch_fault(regs)) { + case 2: + return; + } +#endif + + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } + + pmd = pax_get_pmd(mm, address); + if (unlikely(!pmd)) + goto not_pax_fault; + + pte = pte_offset_map_lock(mm, pmd, address, &ptl); + if (unlikely(!(pte_val(*pte) & _PAGE_PRESENT) || pte_user(*pte))) { + pte_unmap_unlock(pte, ptl); + goto not_pax_fault; + } + + if (unlikely((error_code & 2) && !pte_write(*pte))) { + /* write attempt to a protected page in user mode */ + pte_unmap_unlock(pte, ptl); + goto not_pax_fault; + } + +#ifdef CONFIG_SMP + if (likely(address > get_limit(regs->xcs) && cpu_isset(smp_processor_id(), mm->context.cpu_user_cs_mask))) +#else + if (likely(address > get_limit(regs->xcs))) +#endif + { + set_pte(pte, pte_mkread(*pte)); + __flush_tlb_one(address); + pte_unmap_unlock(pte, ptl); + up_read(&mm->mmap_sem); + return; + } + + pte_mask = _PAGE_ACCESSED | _PAGE_USER | ((error_code & 2) << (_PAGE_BIT_DIRTY-1)); + + /* + * PaX: fill DTLB with user rights and retry + */ + __asm__ __volatile__ ( + "movw %w4,%%ds\n" + "orb %2,%%ss:(%1)\n" +#if defined(CONFIG_M586) || defined(CONFIG_M586TSC) +/* + * PaX: let this uncommented 'invlpg' remind us on the behaviour of Intel's + * (and AMD's) TLBs. namely, they do not cache PTEs that would raise *any* + * page fault when examined during a TLB load attempt. this is true not only + * for PTEs holding a non-present entry but also present entries that will + * raise a page fault (such as those set up by PaX, or the copy-on-write + * mechanism). in effect it means that we do *not* need to flush the TLBs + * for our target pages since their PTEs are simply not in the TLBs at all. + + * the best thing in omitting it is that we gain around 15-20% speed in the + * fast path of the page fault handler and can get rid of tracing since we + * can no longer flush unintended entries. + */ + "invlpg (%0)\n" +#endif + "testb $0,(%0)\n" + "xorb %3,%%ss:(%1)\n" + "pushl %%ss\n" + "popl %%ds\n" + : + : "q" (address), "r" (pte), "q" (pte_mask), "i" (_PAGE_USER), "r" (__USER_DS) + : "memory", "cc"); + pte_unmap_unlock(pte, ptl); + up_read(&mm->mmap_sem); + return; + +not_pax_fault: +#endif + vma = find_vma(mm, address); if (!vma) goto bad_area; @@ -484,6 +605,36 @@ up_read(&mm->mmap_sem); bad_area_nosemaphore: + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (mm && (error_code & 4) && !(regs->eflags & X86_EFLAGS_VM)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && !(error_code & 3) && (regs->eip == address)) { + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && !(error_code & 3) && (regs->eip + SEGMEXEC_TASK_SIZE == address)) { + +#ifdef CONFIG_PAX_EMUTRAMP + switch (pax_handle_fetch_fault(regs)) { + case 2: + return; + } +#endif + + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } +#endif + + } +#endif + +bad_area_nopax: /* User mode accesses just cause a SIGSEGV */ if (error_code & 4) { /* @@ -551,6 +702,22 @@ if (address < PAGE_SIZE) printk(KERN_ALERT "BUG: unable to handle kernel NULL " "pointer dereference"); + +#ifdef CONFIG_PAX_KERNEXEC +#ifdef CONFIG_MODULES + else if (init_mm.start_code <= address && address < (unsigned long)MODULES_END) { +#else + else if (init_mm.start_code <= address && address < init_mm.end_code) { +#endif + if (tsk->signal->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + NIPQUAD(tsk->signal->curr_ip), tsk->comm, tsk->pid, tsk->uid, tsk->euid); + else + printk(KERN_ERR "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + tsk->comm, tsk->pid, tsk->uid, tsk->euid); + } +#endif + else printk(KERN_ALERT "BUG: unable to handle kernel paging" " request"); @@ -558,24 +725,34 @@ printk(KERN_ALERT " printing eip:\n"); printk("%08lx\n", regs->eip); } - page = read_cr3(); - page = ((unsigned long *) __va(page))[address >> 22]; - if (oops_may_print()) - printk(KERN_ALERT "*pde = %08lx\n", page); - /* - * We must not directly access the pte in the highpte - * case, the page table might be allocated in highmem. - * And lets rather not kmap-atomic the pte, just in case - * it's allocated already. - */ + + if (oops_may_print()) { + unsigned long index = pgd_index(address); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = index + (pgd_t *)__va(read_cr3()); + printk(KERN_ALERT "*pgd = %*llx\n", sizeof(*pgd), (unsigned long long)pgd_val(*pgd)); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + printk(KERN_ALERT "*pmd = %*llx\n", sizeof(*pmd), (unsigned long long)pmd_val(*pmd)); + /* + * We must not directly access the pte in the highpte + * case, the page table might be allocated in highmem. + * And lets rather not kmap-atomic the pte, just in case + * it's allocated already. + */ #ifndef CONFIG_HIGHPTE - if ((page & 1) && oops_may_print()) { - page &= PAGE_MASK; - address &= 0x003ff000; - page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; - printk(KERN_ALERT "*pte = %08lx\n", page); - } + if (pmd_present(*pmd) && !pmd_large(*pmd)) { + pte = pte_offset_kernel(pmd, address); + printk(KERN_ALERT "*pte = %*llx\n", sizeof(*pte), (unsigned long long)pte_val(*pte)); + } #endif + } + } tsk->thread.cr2 = address; tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; @@ -652,3 +829,101 @@ } } #endif + +#ifdef CONFIG_PAX_EMUTRAMP +/* + * PaX: decide what to do with offenders (regs->eip = fault address) + * + * returns 1 when task should be killed + * 2 when gcc trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + + static const unsigned char trans[8] = {6, 1, 2, 0, 13, 5, 3, 4}; + int err; + + if (regs->eflags & X86_EFLAGS_VM) + return 1; + + if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) + return 1; + + do { /* PaX: gcc trampoline emulation #1 */ + unsigned char mov1, mov2; + unsigned short jmp; + unsigned long addr1, addr2; + + err = get_user(mov1, (unsigned char __user *)regs->eip); + err |= get_user(addr1, (unsigned long __user *)(regs->eip + 1)); + err |= get_user(mov2, (unsigned char __user *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long __user *)(regs->eip + 6)); + err |= get_user(jmp, (unsigned short __user *)(regs->eip + 10)); + + if (err) + break; + + if ((mov1 & 0xF8) == 0xB8 && + (mov2 & 0xF8) == 0xB8 && + (mov1 & 0x07) != (mov2 & 0x07) && + (jmp & 0xF8FF) == 0xE0FF && + (mov2 & 0x07) == ((jmp>>8) & 0x07)) + { + ((unsigned long *)regs)[trans[mov1 & 0x07]] = addr1; + ((unsigned long *)regs)[trans[mov2 & 0x07]] = addr2; + regs->eip = addr2; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #2 */ + unsigned char mov, jmp; + unsigned long addr1, addr2; + + err = get_user(mov, (unsigned char __user *)regs->eip); + err |= get_user(addr1, (unsigned long __user *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char __user *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long __user *)(regs->eip + 6)); + + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9) + { + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 2; + } + } while (0); + + return 1; /* PaX in action */ +} +#endif + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +void pax_report_insns(void *pc, void *sp) +{ + long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char __user *)pc+i)) + printk("?? "); + else + printk("%02x ", c); + } + printk("\n"); + + printk(KERN_ERR "PAX: bytes at SP-4: "); + for (i = -1; i < 20; i++) { + unsigned long c; + if (get_user(c, (unsigned long __user *)sp+i)) + printk("???????? "); + else + printk("%08lx ", c); + } + printk("\n"); +} +#endif