--- zzzz-none-000/linux-2.6.19.2/mm/mmap.c 2007-01-10 19:10:37.000000000 +0000 +++ davinci-8020-5505/linux-2.6.19.2/mm/mmap.c 2007-01-19 14:42:56.000000000 +0000 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -251,6 +252,7 @@ * not page aligned -Ram Gupta */ rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + gr_learn_resource(current, RLIMIT_DATA, brk - mm->start_data, 1); if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) goto out; @@ -639,11 +641,17 @@ * If the vma has a ->close operation then the driver probably needs to release * per-vma resources, so we don't attempt to merge those. */ +#ifdef CONFIG_PAX_SEGMEXEC +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP | VM_MIRROR) +#else #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) +#endif static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags) { + if ((vma->vm_flags | vm_flags) & VM_SPECIAL) + return 0; if (vma->vm_flags != vm_flags) return 0; if (vma->vm_file != file) @@ -868,14 +876,11 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags, struct file *file, long pages) { - const unsigned long stack_flags - = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN); - if (file) { mm->shared_vm += pages; if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC) mm->exec_vm += pages; - } else if (flags & stack_flags) + } else if (flags & (VM_GROWSUP|VM_GROWSDOWN)) mm->stack_vm += pages; if (flags & (VM_RESERVED|VM_IO)) mm->reserved_vm += pages; @@ -886,10 +891,55 @@ * The caller must hold down_write(current->mm->mmap_sem). */ +#ifdef CONFIG_PAX_SEGMEXEC +static unsigned long __do_mmap_pgoff(struct file * file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flags, unsigned long pgoff); + unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff) { + unsigned long ret = -EINVAL; + + if (flags & MAP_MIRROR) + return ret; + + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && + (len > SEGMEXEC_TASK_SIZE || (addr > SEGMEXEC_TASK_SIZE-len))) + return ret; + + ret = __do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && ret < TASK_SIZE && ((flags & MAP_TYPE) == MAP_PRIVATE) + +#ifdef CONFIG_PAX_MPROTECT + && (!(current->mm->pax_flags & MF_PAX_MPROTECT) || ((prot & PROT_EXEC) && file && !(prot & PROT_WRITE))) +#endif + + ) + { + unsigned long ret_m; + prot = prot & PROT_EXEC ? prot & ~PROT_WRITE : PROT_NONE; + ret_m = __do_mmap_pgoff(NULL, ret + SEGMEXEC_TASK_SIZE, 0UL, prot, flags | MAP_MIRROR | MAP_FIXED, ret); + if (ret_m >= TASK_SIZE) { + do_munmap(current->mm, ret, len); + ret = ret_m; + } + } + + return ret; +} + +static unsigned long __do_mmap_pgoff(struct file * file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flags, unsigned long pgoff) +#else +unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flags, unsigned long pgoff) +#endif +{ struct mm_struct * mm = current->mm; struct vm_area_struct * vma, * prev; struct inode *inode; @@ -900,13 +950,35 @@ int accountable = 1; unsigned long charged = 0, reqprot = prot; +#ifdef CONFIG_PAX_SEGMEXEC + struct vm_area_struct * vma_m = NULL; + + if (flags & MAP_MIRROR) { + /* PaX: sanity checks, to be removed when proved to be stable */ + if (file || len || ((flags & MAP_TYPE) != MAP_PRIVATE)) + return -EINVAL; + + vma_m = find_vma(mm, pgoff); + + if (!vma_m || is_vm_hugetlb_page(vma_m) || + vma_m->vm_start != pgoff || + (vma_m->vm_flags & VM_SPECIAL) || + (prot & PROT_WRITE)) + return -EINVAL; + + file = vma_m->vm_file; + pgoff = vma_m->vm_pgoff; + len = vma_m->vm_end - vma_m->vm_start; + } +#endif + /* * Does the application expect PROT_READ to imply PROT_EXEC? * * (the exception is when the underlying filesystem is noexec * mounted, in which case we dont add PROT_EXEC.) */ - if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) + if ((prot & (PROT_READ | PROT_WRITE)) && (current->personality & READ_IMPLIES_EXEC)) if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))) prot |= PROT_EXEC; @@ -933,9 +1005,10 @@ /* Obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space. */ - addr = get_unmapped_area(file, addr, len, pgoff, flags); - if (addr & ~PAGE_MASK) + addr = get_unmapped_area(file, addr, len, pgoff, flags | ((prot & PROT_EXEC) ? MAP_EXECUTABLE : 0)); + if (addr & ~PAGE_MASK) { return addr; + } /* Do simple checking here so the lower-level routines won't have * to. we assume access permissions have been handled by the open @@ -944,6 +1017,21 @@ vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { + +#ifdef CONFIG_PAX_MPROTECT + if (mm->pax_flags & MF_PAX_MPROTECT) { + if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC) + vm_flags &= ~(VM_EXEC | VM_MAYEXEC); + else + vm_flags &= ~(VM_WRITE | VM_MAYWRITE); + } +#endif + + } +#endif + if (flags & MAP_LOCKED) { if (!can_do_mlock()) return -EPERM; @@ -956,6 +1044,7 @@ locked += mm->locked_vm; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; + gr_learn_resource(current, RLIMIT_MEMLOCK, locked << PAGE_SHIFT, 1); if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } @@ -965,7 +1054,7 @@ if (file) { switch (flags & MAP_TYPE) { case MAP_SHARED: - if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) + if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE)) return -EACCES; /* @@ -1013,6 +1102,11 @@ /* * Set pgoff according to addr for anon_vma. */ + +#ifdef CONFIG_PAX_SEGMEXEC + if (!(flags & MAP_MIRROR)) +#endif + pgoff = addr >> PAGE_SHIFT; break; default: @@ -1021,19 +1115,26 @@ } error = security_file_mmap(file, reqprot, prot, flags); - if (error) + if (error) { return error; + } + if (!gr_acl_handle_mmap(file, prot)) { + return -EACCES; + } + /* Clear old maps */ error = -ENOMEM; -munmap_back: vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); if (vma && vma->vm_start < addr + len) { + if (do_munmap(mm, addr, len)) return -ENOMEM; - goto munmap_back; + vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); + BUG_ON(vma && vma->vm_start < addr + len); } + error = -ENOMEM; /* Check against address space limit. */ if (!may_expand_vm(mm, len >> PAGE_SHIFT)) return -ENOMEM; @@ -1079,6 +1180,13 @@ vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) + if ((file || !(mm->pax_flags & MF_PAX_PAGEEXEC)) && (vm_flags & (VM_READ|VM_WRITE))) + vma->vm_page_prot = protection_map[(vm_flags | VM_EXEC) & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; + else +#endif + vma->vm_page_prot = protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; vma->vm_pgoff = pgoff; @@ -1104,6 +1212,14 @@ goto free_vma; } +#ifdef CONFIG_PAX_SEGMEXEC + if (flags & MAP_MIRROR) { + vma_m->vm_flags |= VM_MIRROR; + vma_m->vm_mirror = vma->vm_start - vma_m->vm_start; + vma->vm_mirror = vma_m->vm_start - vma->vm_start; + } +#endif + /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform * shmem_zero_setup (perhaps called through /dev/zero's ->mmap) * that memory reservation must be checked; but that reservation @@ -1121,9 +1237,17 @@ pgoff = vma->vm_pgoff; vm_flags = vma->vm_flags; - if (vma_wants_writenotify(vma)) + if (vma_wants_writenotify(vma)) { + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) + if ((file || !(mm->pax_flags & MF_PAX_PAGEEXEC)) && (vm_flags & (VM_READ|VM_WRITE))) + vma->vm_page_prot = protection_map[(vm_flags | VM_EXEC) & (VM_READ|VM_WRITE|VM_EXEC)]; + else +#endif + vma->vm_page_prot = protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; + } if (!file || !vma_merge(mm, prev, addr, vma->vm_end, vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { @@ -1143,10 +1267,12 @@ out: mm->total_vm += len >> PAGE_SHIFT; vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); + track_exec_limit(mm, addr, addr + len, vm_flags); if (vm_flags & VM_LOCKED) { mm->locked_vm += len >> PAGE_SHIFT; make_pages_present(addr, addr + len); } + if (flags & MAP_POPULATE) { up_write(&mm->mmap_sem); sys_remap_file_pages(addr, len, 0, @@ -1197,6 +1323,10 @@ if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); @@ -1207,7 +1337,7 @@ if (len > mm->cached_hole_size) { start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } @@ -1219,9 +1349,8 @@ * Start a new search - just in case we missed * some holes. */ - if (start_addr != TASK_UNMAPPED_BASE) { - addr = TASK_UNMAPPED_BASE; - start_addr = addr; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } @@ -1246,7 +1375,7 @@ /* * Is this a new hole at the lowest possible address? */ - if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) { + if (addr >= mm->mmap_base && addr < mm->free_area_cache) { mm->free_area_cache = addr; mm->cached_hole_size = ~0UL; } @@ -1264,12 +1393,16 @@ { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; - unsigned long addr = addr0; + unsigned long base = mm->mmap_base, addr = addr0; /* requested length too big for entire address space */ if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + /* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); @@ -1327,13 +1460,21 @@ * can happen with large stack limits and large mmap() * allocations. */ + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; mm->cached_hole_size = ~0UL; - mm->free_area_cache = TASK_UNMAPPED_BASE; addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); /* * Restore the topdown base: */ - mm->free_area_cache = mm->mmap_base; + mm->mmap_base = base; + mm->free_area_cache = base; mm->cached_hole_size = ~0UL; return addr; @@ -1349,8 +1490,10 @@ mm->free_area_cache = addr; /* dont allow allocations above current base */ - if (mm->free_area_cache > mm->mmap_base) + if (mm->free_area_cache > mm->mmap_base) { mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + } } unsigned long @@ -1379,7 +1522,7 @@ * Check if the given range is hugepage aligned, and * can be made suitable for hugepages. */ - ret = prepare_hugepage_range(addr, len, pgoff); + ret = prepare_hugepage_range(addr, len); } else { /* * Ensure that a normal request is not falling in a @@ -1483,6 +1626,7 @@ return -ENOMEM; /* Stack limit test */ + gr_learn_resource(current, RLIMIT_STACK, size, 1); if (size > rlim[RLIMIT_STACK].rlim_cur) return -ENOMEM; @@ -1492,6 +1636,7 @@ unsigned long limit; locked = mm->locked_vm + grow; limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + gr_learn_resource(current, RLIMIT_MEMLOCK, locked << PAGE_SHIFT, 1); if (locked > limit && !capable(CAP_IPC_LOCK)) return -ENOMEM; } @@ -1609,13 +1754,49 @@ if (address < vma->vm_start) { unsigned long size, grow; +#ifdef CONFIG_PAX_SEGMEXEC + struct vm_area_struct *vma_m = NULL; + unsigned long address_m = 0UL; + + if (vma->vm_flags & VM_MIRROR) { + address_m = vma->vm_start + vma->vm_mirror; + vma_m = find_vma(vma->vm_mm, address_m); + if (!vma_m || vma_m->vm_start != address_m || + !(vma_m->vm_flags & VM_MIRROR) || + vma->vm_end - vma->vm_start != + vma_m->vm_end - vma_m->vm_start || + vma->anon_vma != vma_m->anon_vma) { + printk(KERN_ERR "PAX: VMMIRROR: expand bug, %08lx, %08lx, %08lx, %08lx, %08lx\n", + address, vma->vm_start, vma_m->vm_start, vma->vm_end, vma_m->vm_end); + anon_vma_unlock(vma); + return -EFAULT; + } + address_m = address + vma->vm_mirror; + } +#endif + size = vma->vm_end - address; grow = (vma->vm_start - address) >> PAGE_SHIFT; +#ifdef CONFIG_PAX_SEGMEXEC + if (vma_m) + error = acct_stack_growth(vma, size, 2*grow); + else +#endif + error = acct_stack_growth(vma, size, grow); if (!error) { vma->vm_start = address; vma->vm_pgoff -= grow; + track_exec_limit(vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_flags); + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma_m) { + vma_m->vm_start = address_m; + vma_m->vm_pgoff -= grow; + } +#endif + } } anon_vma_unlock(vma); @@ -1777,8 +1958,25 @@ * work. This now handles partial unmappings. * Jeremy Fitzhardinge */ +#ifdef CONFIG_PAX_SEGMEXEC +static int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len); + int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) { + if (mm->pax_flags & MF_PAX_SEGMEXEC) { + int ret = __do_munmap(mm, start + SEGMEXEC_TASK_SIZE, len); + if (ret) + return ret; + } + + return __do_munmap(mm, start, len); +} + +static int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len) +#else +int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) +#endif +{ unsigned long end; struct vm_area_struct *vma, *prev, *last; @@ -1831,6 +2029,8 @@ /* Fix up all other VM information */ remove_vma_list(mm, vma); + track_exec_limit(mm, start, end, 0UL); + return 0; } @@ -1843,6 +2043,12 @@ profile_munmap(addr); +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && + (len > SEGMEXEC_TASK_SIZE || addr > SEGMEXEC_TASK_SIZE-len)) + return -EINVAL; +#endif + down_write(&mm->mmap_sem); ret = do_munmap(mm, addr, len); up_write(&mm->mmap_sem); @@ -1864,11 +2070,35 @@ * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ +#ifdef CONFIG_PAX_SEGMEXEC +static unsigned long __do_brk(unsigned long addr, unsigned long len); + +unsigned long do_brk(unsigned long addr, unsigned long len) +{ + unsigned long ret; + + ret = __do_brk(addr, len); + if (ret == addr && (current->mm->pax_flags & (MF_PAX_SEGMEXEC | MF_PAX_MPROTECT)) == MF_PAX_SEGMEXEC) { + unsigned long ret_m; + + ret_m = __do_mmap_pgoff(NULL, addr + SEGMEXEC_TASK_SIZE, 0UL, PROT_NONE, MAP_PRIVATE | MAP_FIXED | MAP_MIRROR, addr); + if (ret_m > TASK_SIZE) { + do_munmap(current->mm, addr, len); + ret = ret_m; + } + } + + return ret; +} + +static unsigned long __do_brk(unsigned long addr, unsigned long len) +#else unsigned long do_brk(unsigned long addr, unsigned long len) +#endif { struct mm_struct * mm = current->mm; struct vm_area_struct * vma, * prev; - unsigned long flags; + unsigned long flags, task_size = TASK_SIZE; struct rb_node ** rb_link, * rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; @@ -1877,14 +2107,28 @@ if (!len) return addr; - if ((addr + len) > TASK_SIZE || (addr + len) < addr) - return -EINVAL; +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif - if (is_hugepage_only_range(mm, addr, len)) + if ((addr + len) > task_size || (addr + len) < addr) return -EINVAL; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { + flags &= ~VM_EXEC; + +#ifdef CONFIG_PAX_MPROTECT + if (mm->pax_flags & MF_PAX_MPROTECT) + flags &= ~VM_MAYEXEC; +#endif + + } +#endif + error = arch_mmap_check(addr, len, flags); if (error) return error; @@ -1898,6 +2142,7 @@ locked += mm->locked_vm; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; + gr_learn_resource(current, RLIMIT_MEMLOCK, locked << PAGE_SHIFT, 1); if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } @@ -1911,12 +2156,12 @@ /* * Clear old maps. this also does some error checking for us */ - munmap_back: vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); if (vma && vma->vm_start < addr + len) { if (do_munmap(mm, addr, len)) return -ENOMEM; - goto munmap_back; + vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); + BUG_ON(vma && vma->vm_start < addr + len); } /* Check against address space limits *after* clearing old maps... */ @@ -1948,6 +2193,13 @@ vma->vm_end = addr + len; vma->vm_pgoff = pgoff; vma->vm_flags = flags; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) + if (!(mm->pax_flags & MF_PAX_PAGEEXEC)) + vma->vm_page_prot = protection_map[(flags | VM_EXEC) & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; + else +#endif + vma->vm_page_prot = protection_map[flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; vma_link(mm, vma, prev, rb_link, rb_parent); @@ -1957,6 +2209,7 @@ mm->locked_vm += len >> PAGE_SHIFT; make_pages_present(addr, addr + len); } + track_exec_limit(mm, addr, addr + len, flags); return addr; } @@ -2089,7 +2342,7 @@ unsigned long lim; lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - + gr_learn_resource(current, RLIMIT_AS, (cur + npages) << PAGE_SHIFT, 1); if (cur + npages > lim) return 0; return 1;