--- zzzz-none-000/linux-3.10.107/drivers/xen/privcmd.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/xen/privcmd.c 2021-02-04 17:41:59.000000000 +0000 @@ -6,6 +6,8 @@ * Copyright (c) 2002-2004, K A Fraser, B Dragovic */ +#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt + #include #include #include @@ -41,9 +43,10 @@ #define PRIV_VMA_LOCKED ((void *)1) -#ifndef HAVE_ARCH_PRIVCMD_MMAP -static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); -#endif +static int privcmd_vma_range_is_mapped( + struct vm_area_struct *vma, + unsigned long addr, + unsigned long nr_pages); static long privcmd_ioctl_hypercall(void __user *udata) { @@ -53,10 +56,12 @@ if (copy_from_user(&hypercall, udata, sizeof(hypercall))) return -EFAULT; + xen_preemptible_hcall_begin(); ret = privcmd_call(hypercall.op, hypercall.arg[0], hypercall.arg[1], hypercall.arg[2], hypercall.arg[3], hypercall.arg[4]); + xen_preemptible_hcall_end(); return ret; } @@ -154,16 +159,50 @@ return ret; } -struct mmap_mfn_state { +/* + * Similar to traverse_pages, but use each page as a "block" of + * data to be processed as one unit. + */ +static int traverse_pages_block(unsigned nelem, size_t size, + struct list_head *pos, + int (*fn)(void *data, int nr, void *state), + void *state) +{ + void *pagedata; + unsigned pageidx; + int ret = 0; + + BUG_ON(size > PAGE_SIZE); + + pageidx = PAGE_SIZE; + + while (nelem) { + int nr = (PAGE_SIZE/size); + struct page *page; + if (nr > nelem) + nr = nelem; + pos = pos->next; + page = list_entry(pos, struct page, lru); + pagedata = page_address(page); + ret = (*fn)(pagedata, nr, state); + if (ret) + break; + nelem -= nr; + } + + return ret; +} + +struct mmap_gfn_state { unsigned long va; struct vm_area_struct *vma; domid_t domain; }; -static int mmap_mfn_range(void *data, void *state) +static int mmap_gfn_range(void *data, void *state) { struct privcmd_mmap_entry *msg = data; - struct mmap_mfn_state *st = state; + struct mmap_gfn_state *st = state; struct vm_area_struct *vma = st->vma; int rc; @@ -177,7 +216,7 @@ ((msg->va+(msg->npages< vma->vm_end)) return -EINVAL; - rc = xen_remap_domain_mfn_range(vma, + rc = xen_remap_domain_gfn_range(vma, msg->va & PAGE_MASK, msg->mfn, msg->npages, vma->vm_page_prot, @@ -197,7 +236,7 @@ struct vm_area_struct *vma; int rc; LIST_HEAD(pagelist); - struct mmap_mfn_state state; + struct mmap_gfn_state state; /* We only support privcmd_ioctl_mmap_batch for auto translated. */ if (xen_feature(XENFEAT_auto_translated_physmap)) @@ -223,9 +262,9 @@ vma = find_vma(mm, msg->va); rc = -EINVAL; - if (!vma || (msg->va != vma->vm_start) || - !privcmd_enforce_singleshot_mapping(vma)) + if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data) goto out_up; + vma->vm_private_data = PRIV_VMA_LOCKED; } state.va = vma->vm_start; @@ -234,7 +273,7 @@ rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), &pagelist, - mmap_mfn_range, &state); + mmap_gfn_range, &state); out_up: @@ -260,48 +299,34 @@ int global_error; int version; - /* User-space mfn array to store errors in the second pass for V1. */ - xen_pfn_t __user *user_mfn; + /* User-space gfn array to store errors in the second pass for V1. */ + xen_pfn_t __user *user_gfn; /* User-space int array to store errors in the second pass for V2. */ int __user *user_err; }; -/* auto translated dom0 note: if domU being created is PV, then mfn is - * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). +/* auto translated dom0 note: if domU being created is PV, then gfn is + * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP). */ -static int mmap_batch_fn(void *data, void *state) +static int mmap_batch_fn(void *data, int nr, void *state) { - xen_pfn_t *mfnp = data; + xen_pfn_t *gfnp = data; struct mmap_batch_state *st = state; struct vm_area_struct *vma = st->vma; struct page **pages = vma->vm_private_data; - struct page *cur_page = NULL; + struct page **cur_pages = NULL; int ret; if (xen_feature(XENFEAT_auto_translated_physmap)) - cur_page = pages[st->index++]; - - ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain, - &cur_page); + cur_pages = &pages[st->index]; - /* Store error code for second pass. */ - if (st->version == 1) { - if (ret < 0) { - /* - * V1 encodes the error codes in the 32bit top nibble of the - * mfn (with its known limitations vis-a-vis 64 bit callers). - */ - *mfnp |= (ret == -ENOENT) ? - PRIVCMD_MMAPBATCH_PAGED_ERROR : - PRIVCMD_MMAPBATCH_MFN_ERROR; - } - } else { /* st->version == 2 */ - *((int *) mfnp) = ret; - } + BUG_ON(nr < 0); + ret = xen_remap_domain_gfn_array(st->vma, st->va & PAGE_MASK, gfnp, nr, + (int *)gfnp, st->vma->vm_page_prot, + st->domain, cur_pages); - /* And see if it affects the global_error. */ - if (ret < 0) { + /* Adjust the global_error? */ + if (ret != nr) { if (ret == -ENOENT) st->global_error = -ENOENT; else { @@ -310,23 +335,35 @@ st->global_error = 1; } } - st->va += PAGE_SIZE; + st->va += PAGE_SIZE * nr; + st->index += nr; return 0; } -static int mmap_return_errors(void *data, void *state) +static int mmap_return_error(int err, struct mmap_batch_state *st) { - struct mmap_batch_state *st = state; + int ret; if (st->version == 1) { - xen_pfn_t mfnp = *((xen_pfn_t *) data); - if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR) - return __put_user(mfnp, st->user_mfn++); - else - st->user_mfn++; + if (err) { + xen_pfn_t gfn; + + ret = get_user(gfn, st->user_gfn); + if (ret < 0) + return ret; + /* + * V1 encodes the error codes in the 32bit top + * nibble of the gfn (with its known + * limitations vis-a-vis 64 bit callers). + */ + gfn |= (err == -ENOENT) ? + PRIVCMD_MMAPBATCH_PAGED_ERROR : + PRIVCMD_MMAPBATCH_MFN_ERROR; + return __put_user(gfn, st->user_gfn++); + } else + st->user_gfn++; } else { /* st->version == 2 */ - int err = *((int *) data); if (err) return __put_user(err, st->user_err++); else @@ -336,7 +373,22 @@ return 0; } -/* Allocate pfns that are then mapped with gmfns from foreign domid. Update +static int mmap_return_errors(void *data, int nr, void *state) +{ + struct mmap_batch_state *st = state; + int *errs = data; + int i; + int ret; + + for (i = 0; i < nr; i++) { + ret = mmap_return_error(errs[i], st); + if (ret < 0) + return ret; + } + return 0; +} + +/* Allocate pfns that are then mapped with gfns from foreign domid. Update * the vma with the page info to use later. * Returns: 0 if success, otherwise -errno */ @@ -349,20 +401,20 @@ if (pages == NULL) return -ENOMEM; - rc = alloc_xenballooned_pages(numpgs, pages, 0); + rc = alloc_xenballooned_pages(numpgs, pages); if (rc != 0) { pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, numpgs, rc); kfree(pages); return -ENOMEM; } - BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); + BUG_ON(vma->vm_private_data != NULL); vma->vm_private_data = pages; return 0; } -static struct vm_operations_struct privcmd_vm_ops; +static const struct vm_operations_struct privcmd_vm_ops; static long privcmd_ioctl_mmap_batch(void __user *udata, int version) { @@ -394,7 +446,7 @@ return -EINVAL; } - nr_pages = m.num; + nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE); if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) return -EINVAL; @@ -419,19 +471,43 @@ vma = find_vma(mm, m.addr); if (!vma || - vma->vm_ops != &privcmd_vm_ops || - (m.addr != vma->vm_start) || - ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || - !privcmd_enforce_singleshot_mapping(vma)) { - up_write(&mm->mmap_sem); + vma->vm_ops != &privcmd_vm_ops) { ret = -EINVAL; - goto out; + goto out_unlock; } - if (xen_feature(XENFEAT_auto_translated_physmap)) { - ret = alloc_empty_pages(vma, m.num); - if (ret < 0) { - up_write(&mm->mmap_sem); - goto out; + + /* + * Caller must either: + * + * Map the whole VMA range, which will also allocate all the + * pages required for the auto_translated_physmap case. + * + * Or + * + * Map unmapped holes left from a previous map attempt (e.g., + * because those foreign frames were previously paged out). + */ + if (vma->vm_private_data == NULL) { + if (m.addr != vma->vm_start || + m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) { + ret = -EINVAL; + goto out_unlock; + } + if (xen_feature(XENFEAT_auto_translated_physmap)) { + ret = alloc_empty_pages(vma, nr_pages); + if (ret < 0) + goto out_unlock; + } else + vma->vm_private_data = PRIV_VMA_LOCKED; + } else { + if (m.addr < vma->vm_start || + m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) { + ret = -EINVAL; + goto out_unlock; + } + if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) { + ret = -EINVAL; + goto out_unlock; } } @@ -442,18 +518,19 @@ state.global_error = 0; state.version = version; + BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0); /* mmap_batch_fn guarantees ret == 0 */ - BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_batch_fn, &state)); + BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_batch_fn, &state)); up_write(&mm->mmap_sem); if (state.global_error) { /* Write back errors in second pass. */ - state.user_mfn = (xen_pfn_t *)m.arr; + state.user_gfn = (xen_pfn_t *)m.arr; state.user_err = m.err; - ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_return_errors, &state); + ret = traverse_pages_block(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_return_errors, &state); } else ret = 0; @@ -464,8 +541,11 @@ out: free_page_list(&pagelist); - return ret; + +out_unlock: + up_write(&mm->mmap_sem); + goto out; } static long privcmd_ioctl(struct file *file, @@ -503,12 +583,18 @@ { struct page **pages = vma->vm_private_data; int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT; + int rc; if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages) return; - xen_unmap_domain_mfn_range(vma, numpgs, pages); - free_xenballooned_pages(numpgs, pages); + rc = xen_unmap_domain_gfn_range(vma, numgfns, pages); + if (rc == 0) + free_xenballooned_pages(numpgs, pages); + else + pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n", + numpgs, rc); kfree(pages); } @@ -521,7 +607,7 @@ return VM_FAULT_SIGBUS; } -static struct vm_operations_struct privcmd_vm_ops = { +static const struct vm_operations_struct privcmd_vm_ops = { .close = privcmd_close, .fault = privcmd_fault }; @@ -538,9 +624,24 @@ return 0; } -static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) +/* + * For MMAPBATCH*. This allows asserting the singleshot mapping + * on a per pfn/pte basis. Mapping calls that fail with ENOENT + * can be then retried until success. + */ +static int is_mapped_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + return pte_none(*pte) ? 0 : -EBUSY; +} + +static int privcmd_vma_range_is_mapped( + struct vm_area_struct *vma, + unsigned long addr, + unsigned long nr_pages) { - return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); + return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT, + is_mapped_fn, NULL) != 0; } const struct file_operations xen_privcmd_fops = { @@ -565,7 +666,7 @@ err = misc_register(&privcmd_dev); if (err != 0) { - printk(KERN_ERR "Could not register Xen privcmd device\n"); + pr_err("Could not register Xen privcmd device\n"); return err; } return 0;