--- zzzz-none-000/linux-3.10.107/drivers/vfio/pci/vfio_pci_intrs.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/vfio/pci/vfio_pci_intrs.c 2021-02-04 17:41:59.000000000 +0000 @@ -16,235 +16,22 @@ #include #include #include +#include #include #include -#include #include #include -#include #include #include "vfio_pci_private.h" /* - * IRQfd - generic - */ -struct virqfd { - struct vfio_pci_device *vdev; - struct eventfd_ctx *eventfd; - int (*handler)(struct vfio_pci_device *, void *); - void (*thread)(struct vfio_pci_device *, void *); - void *data; - struct work_struct inject; - wait_queue_t wait; - poll_table pt; - struct work_struct shutdown; - struct virqfd **pvirqfd; -}; - -static struct workqueue_struct *vfio_irqfd_cleanup_wq; - -int __init vfio_pci_virqfd_init(void) -{ - vfio_irqfd_cleanup_wq = - create_singlethread_workqueue("vfio-irqfd-cleanup"); - if (!vfio_irqfd_cleanup_wq) - return -ENOMEM; - - return 0; -} - -void vfio_pci_virqfd_exit(void) -{ - destroy_workqueue(vfio_irqfd_cleanup_wq); -} - -static void virqfd_deactivate(struct virqfd *virqfd) -{ - queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown); -} - -static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) -{ - struct virqfd *virqfd = container_of(wait, struct virqfd, wait); - unsigned long flags = (unsigned long)key; - - if (flags & POLLIN) { - /* An event has been signaled, call function */ - if ((!virqfd->handler || - virqfd->handler(virqfd->vdev, virqfd->data)) && - virqfd->thread) - schedule_work(&virqfd->inject); - } - - if (flags & POLLHUP) { - unsigned long flags; - spin_lock_irqsave(&virqfd->vdev->irqlock, flags); - - /* - * The eventfd is closing, if the virqfd has not yet been - * queued for release, as determined by testing whether the - * vdev pointer to it is still valid, queue it now. As - * with kvm irqfds, we know we won't race against the virqfd - * going away because we hold wqh->lock to get here. - */ - if (*(virqfd->pvirqfd) == virqfd) { - *(virqfd->pvirqfd) = NULL; - virqfd_deactivate(virqfd); - } - - spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags); - } - - return 0; -} - -static void virqfd_ptable_queue_proc(struct file *file, - wait_queue_head_t *wqh, poll_table *pt) -{ - struct virqfd *virqfd = container_of(pt, struct virqfd, pt); - add_wait_queue(wqh, &virqfd->wait); -} - -static void virqfd_shutdown(struct work_struct *work) -{ - struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); - u64 cnt; - - eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); - flush_work(&virqfd->inject); - eventfd_ctx_put(virqfd->eventfd); - - kfree(virqfd); -} - -static void virqfd_inject(struct work_struct *work) -{ - struct virqfd *virqfd = container_of(work, struct virqfd, inject); - if (virqfd->thread) - virqfd->thread(virqfd->vdev, virqfd->data); -} - -static int virqfd_enable(struct vfio_pci_device *vdev, - int (*handler)(struct vfio_pci_device *, void *), - void (*thread)(struct vfio_pci_device *, void *), - void *data, struct virqfd **pvirqfd, int fd) -{ - struct file *file = NULL; - struct eventfd_ctx *ctx = NULL; - struct virqfd *virqfd; - int ret = 0; - unsigned int events; - - virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); - if (!virqfd) - return -ENOMEM; - - virqfd->pvirqfd = pvirqfd; - virqfd->vdev = vdev; - virqfd->handler = handler; - virqfd->thread = thread; - virqfd->data = data; - - INIT_WORK(&virqfd->shutdown, virqfd_shutdown); - INIT_WORK(&virqfd->inject, virqfd_inject); - - file = eventfd_fget(fd); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto fail; - } - - ctx = eventfd_ctx_fileget(file); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto fail; - } - - virqfd->eventfd = ctx; - - /* - * virqfds can be released by closing the eventfd or directly - * through ioctl. These are both done through a workqueue, so - * we update the pointer to the virqfd under lock to avoid - * pushing multiple jobs to release the same virqfd. - */ - spin_lock_irq(&vdev->irqlock); - - if (*pvirqfd) { - spin_unlock_irq(&vdev->irqlock); - ret = -EBUSY; - goto fail; - } - *pvirqfd = virqfd; - - spin_unlock_irq(&vdev->irqlock); - - /* - * Install our own custom wake-up handling so we are notified via - * a callback whenever someone signals the underlying eventfd. - */ - init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); - init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); - - events = file->f_op->poll(file, &virqfd->pt); - - /* - * Check if there was an event already pending on the eventfd - * before we registered and trigger it as if we didn't miss it. - */ - if (events & POLLIN) { - if ((!handler || handler(vdev, data)) && thread) - schedule_work(&virqfd->inject); - } - - /* - * Do not drop the file until the irqfd is fully initialized, - * otherwise we might race against the POLLHUP. - */ - fput(file); - - return 0; - -fail: - if (ctx && !IS_ERR(ctx)) - eventfd_ctx_put(ctx); - - if (file && !IS_ERR(file)) - fput(file); - - kfree(virqfd); - - return ret; -} - -static void virqfd_disable(struct vfio_pci_device *vdev, - struct virqfd **pvirqfd) -{ - unsigned long flags; - - spin_lock_irqsave(&vdev->irqlock, flags); - - if (*pvirqfd) { - virqfd_deactivate(*pvirqfd); - *pvirqfd = NULL; - } - - spin_unlock_irqrestore(&vdev->irqlock, flags); - - /* - * Block until we know all outstanding shutdown jobs have completed. - * Even if we don't queue the job, flush the wq to be sure it's - * been released. - */ - flush_workqueue(vfio_irqfd_cleanup_wq); -} - -/* * INTx */ -static void vfio_send_intx_eventfd(struct vfio_pci_device *vdev, void *unused) +static void vfio_send_intx_eventfd(void *opaque, void *unused) { + struct vfio_pci_device *vdev = opaque; + if (likely(is_intx(vdev) && !vdev->virq_disabled)) eventfd_signal(vdev->ctx[0].trigger, 1); } @@ -287,9 +74,9 @@ * a signal is necessary, which can then be handled via a work queue * or directly depending on the caller. */ -static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev, - void *unused) +static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) { + struct vfio_pci_device *vdev = opaque; struct pci_dev *pdev = vdev->pdev; unsigned long flags; int ret = 0; @@ -442,8 +229,8 @@ static void vfio_intx_disable(struct vfio_pci_device *vdev) { vfio_intx_set_signal(vdev, -1); - virqfd_disable(vdev, &vdev->ctx[0].unmask); - virqfd_disable(vdev, &vdev->ctx[0].mask); + vfio_virqfd_disable(&vdev->ctx[0].unmask); + vfio_virqfd_disable(&vdev->ctx[0].mask); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -468,7 +255,7 @@ if (!is_irq_none(vdev)) return -EINVAL; - vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); + vdev->ctx = kzalloc(nvec * sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); if (!vdev->ctx) return -ENOMEM; @@ -485,15 +272,19 @@ for (i = 0; i < nvec; i++) vdev->msix[i].entry = i; - ret = pci_enable_msix(pdev, vdev->msix, nvec); - if (ret) { + ret = pci_enable_msix_range(pdev, vdev->msix, 1, nvec); + if (ret < nvec) { + if (ret > 0) + pci_disable_msix(pdev); kfree(vdev->msix); kfree(vdev->ctx); return ret; } } else { - ret = pci_enable_msi_block(pdev, nvec); - if (ret) { + ret = pci_enable_msi_range(pdev, 1, nvec); + if (ret < nvec) { + if (ret > 0) + pci_disable_msi(pdev); kfree(vdev->ctx); return ret; } @@ -528,6 +319,7 @@ if (vdev->ctx[vector].trigger) { free_irq(irq, vdev->ctx[vector].trigger); + irq_bypass_unregister_producer(&vdev->ctx[vector].producer); kfree(vdev->ctx[vector].name); eventfd_ctx_put(vdev->ctx[vector].trigger); vdev->ctx[vector].trigger = NULL; @@ -547,6 +339,20 @@ return PTR_ERR(trigger); } + /* + * The MSIx vector table resides in device memory which may be cleared + * via backdoor resets. We don't allow direct access to the vector + * table so even if a userspace driver attempts to save/restore around + * such a reset it would be unsuccessful. To avoid this, restore the + * cached value of the message prior to enabling. + */ + if (msix) { + struct msi_msg msg; + + get_cached_msi_msg(irq, &msg); + pci_write_msi_msg(irq, &msg); + } + ret = request_irq(irq, vfio_msihandler, 0, vdev->ctx[vector].name, trigger); if (ret) { @@ -555,6 +361,14 @@ return ret; } + vdev->ctx[vector].producer.token = trigger; + vdev->ctx[vector].producer.irq = irq; + ret = irq_bypass_register_producer(&vdev->ctx[vector].producer); + if (unlikely(ret)) + dev_info(&pdev->dev, + "irq bypass producer (token %p) registration fails: %d\n", + vdev->ctx[vector].producer.token, ret); + vdev->ctx[vector].trigger = trigger; return 0; @@ -589,8 +403,8 @@ vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); for (i = 0; i < vdev->num_ctx; i++) { - virqfd_disable(vdev, &vdev->ctx[i].unmask); - virqfd_disable(vdev, &vdev->ctx[i].mask); + vfio_virqfd_disable(&vdev->ctx[i].unmask); + vfio_virqfd_disable(&vdev->ctx[i].mask); } if (msix) { @@ -623,11 +437,12 @@ } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; if (fd >= 0) - return virqfd_enable(vdev, vfio_pci_intx_unmask_handler, - vfio_send_intx_eventfd, NULL, - &vdev->ctx[0].unmask, fd); + return vfio_virqfd_enable((void *) vdev, + vfio_pci_intx_unmask_handler, + vfio_send_intx_eventfd, NULL, + &vdev->ctx[0].unmask, fd); - virqfd_disable(vdev, &vdev->ctx[0].unmask); + vfio_virqfd_disable(&vdev->ctx[0].unmask); } return 0; @@ -747,63 +562,83 @@ return 0; } -static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev, - unsigned index, unsigned start, - unsigned count, uint32_t flags, void *data) +static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx, + unsigned int count, uint32_t flags, + void *data) { - int32_t fd = *(int32_t *)data; - struct pci_dev *pdev = vdev->pdev; - - if ((index != VFIO_PCI_ERR_IRQ_INDEX) || - !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK)) - return -EINVAL; + /* DATA_NONE/DATA_BOOL enables loopback testing */ + if (flags & VFIO_IRQ_SET_DATA_NONE) { + if (*ctx) { + if (count) { + eventfd_signal(*ctx, 1); + } else { + eventfd_ctx_put(*ctx); + *ctx = NULL; + } + return 0; + } + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t trigger; - /* - * device_lock synchronizes setting and checking of - * err_trigger. The vfio_pci_aer_err_detected() is also - * called with device_lock held. - */ + if (!count) + return -EINVAL; - /* DATA_NONE/DATA_BOOL enables loopback testing */ + trigger = *(uint8_t *)data; + if (trigger && *ctx) + eventfd_signal(*ctx, 1); - if (flags & VFIO_IRQ_SET_DATA_NONE) { - device_lock(&pdev->dev); - if (vdev->err_trigger) - eventfd_signal(vdev->err_trigger, 1); - device_unlock(&pdev->dev); return 0; - } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { - uint8_t trigger = *(uint8_t *)data; - device_lock(&pdev->dev); - if (trigger && vdev->err_trigger) - eventfd_signal(vdev->err_trigger, 1); - device_unlock(&pdev->dev); + } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd; + + if (!count) + return -EINVAL; + + fd = *(int32_t *)data; + if (fd == -1) { + if (*ctx) + eventfd_ctx_put(*ctx); + *ctx = NULL; + } else if (fd >= 0) { + struct eventfd_ctx *efdctx; + + efdctx = eventfd_ctx_fdget(fd); + if (IS_ERR(efdctx)) + return PTR_ERR(efdctx); + + if (*ctx) + eventfd_ctx_put(*ctx); + + *ctx = efdctx; + } return 0; } - /* Handle SET_DATA_EVENTFD */ + return -EINVAL; +} - if (fd == -1) { - device_lock(&pdev->dev); - if (vdev->err_trigger) - eventfd_ctx_put(vdev->err_trigger); - vdev->err_trigger = NULL; - device_unlock(&pdev->dev); - return 0; - } else if (fd >= 0) { - struct eventfd_ctx *efdctx; - efdctx = eventfd_ctx_fdget(fd); - if (IS_ERR(efdctx)) - return PTR_ERR(efdctx); - device_lock(&pdev->dev); - if (vdev->err_trigger) - eventfd_ctx_put(vdev->err_trigger); - vdev->err_trigger = efdctx; - device_unlock(&pdev->dev); - return 0; - } else +static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev, + unsigned index, unsigned start, + unsigned count, uint32_t flags, void *data) +{ + if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1) return -EINVAL; + + return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, + count, flags, data); } + +static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev, + unsigned index, unsigned start, + unsigned count, uint32_t flags, void *data) +{ + if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1) + return -EINVAL; + + return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, + count, flags, data); +} + int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, unsigned index, unsigned start, unsigned count, void *data) @@ -845,6 +680,14 @@ func = vfio_pci_set_err_trigger; break; } + break; + case VFIO_PCI_REQ_IRQ_INDEX: + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { + case VFIO_IRQ_SET_ACTION_TRIGGER: + func = vfio_pci_set_req_trigger; + break; + } + break; } if (!func)