--- zzzz-none-000/linux-3.10.107/virt/kvm/eventfd.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/virt/kvm/eventfd.c 2021-02-04 17:41:59.000000000 +0000 @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -31,72 +32,23 @@ #include #include #include +#include #include +#include +#include +#include -#include "iodev.h" +#include -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING -/* - * -------------------------------------------------------------------- - * irqfd: Allows an fd to be used to inject an interrupt to the guest - * - * Credit goes to Avi Kivity for the original idea. - * -------------------------------------------------------------------- - */ - -/* - * Resampling irqfds are a special variety of irqfds used to emulate - * level triggered interrupts. The interrupt is asserted on eventfd - * trigger. On acknowledgement through the irq ack notifier, the - * interrupt is de-asserted and userspace is notified through the - * resamplefd. All resamplers on the same gsi are de-asserted - * together, so we don't need to track the state of each individual - * user. We can also therefore share the same irq source ID. - */ -struct _irqfd_resampler { - struct kvm *kvm; - /* - * List of resampling struct _irqfd objects sharing this gsi. - * RCU list modified under kvm->irqfds.resampler_lock - */ - struct list_head list; - struct kvm_irq_ack_notifier notifier; - /* - * Entry in list of kvm->irqfd.resampler_list. Use for sharing - * resamplers among irqfds on the same gsi. - * Accessed and modified under kvm->irqfds.resampler_lock - */ - struct list_head link; -}; - -struct _irqfd { - /* Used for MSI fast-path */ - struct kvm *kvm; - wait_queue_t wait; - /* Update side is protected by irqfds.lock */ - struct kvm_kernel_irq_routing_entry __rcu *irq_entry; - /* Used for level IRQ fast-path */ - int gsi; - struct work_struct inject; - /* The resampler used by this irqfd (resampler-only) */ - struct _irqfd_resampler *resampler; - /* Eventfd notified on resample (resampler-only) */ - struct eventfd_ctx *resamplefd; - /* Entry in list of irqfds for a resampler (resampler-only) */ - struct list_head resampler_link; - /* Used for setup/shutdown */ - struct eventfd_ctx *eventfd; - struct list_head list; - poll_table pt; - struct work_struct shutdown; -}; +#ifdef CONFIG_HAVE_KVM_IRQFD static struct workqueue_struct *irqfd_cleanup_wq; static void irqfd_inject(struct work_struct *work) { - struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); + struct kvm_kernel_irqfd *irqfd = + container_of(work, struct kvm_kernel_irqfd, inject); struct kvm *kvm = irqfd->kvm; if (!irqfd->resampler) { @@ -117,32 +69,36 @@ static void irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) { - struct _irqfd_resampler *resampler; - struct _irqfd *irqfd; + struct kvm_kernel_irqfd_resampler *resampler; + struct kvm *kvm; + struct kvm_kernel_irqfd *irqfd; + int idx; - resampler = container_of(kian, struct _irqfd_resampler, notifier); + resampler = container_of(kian, + struct kvm_kernel_irqfd_resampler, notifier); + kvm = resampler->kvm; - kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, + kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, resampler->notifier.gsi, 0, false); - rcu_read_lock(); + idx = srcu_read_lock(&kvm->irq_srcu); list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) eventfd_signal(irqfd->resamplefd, 1); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } static void -irqfd_resampler_shutdown(struct _irqfd *irqfd) +irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) { - struct _irqfd_resampler *resampler = irqfd->resampler; + struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; struct kvm *kvm = resampler->kvm; mutex_lock(&kvm->irqfds.resampler_lock); list_del_rcu(&irqfd->resampler_link); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); if (list_empty(&resampler->list)) { list_del(&resampler->link); @@ -161,7 +117,8 @@ static void irqfd_shutdown(struct work_struct *work) { - struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); + struct kvm_kernel_irqfd *irqfd = + container_of(work, struct kvm_kernel_irqfd, shutdown); u64 cnt; /* @@ -184,6 +141,9 @@ /* * It is now safe to release the object's resources */ +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + irq_bypass_unregister_consumer(&irqfd->consumer); +#endif eventfd_ctx_put(irqfd->eventfd); kfree(irqfd); } @@ -191,7 +151,7 @@ /* assumes kvm->irqfds.lock is held */ static bool -irqfd_is_active(struct _irqfd *irqfd) +irqfd_is_active(struct kvm_kernel_irqfd *irqfd) { return list_empty(&irqfd->list) ? false : true; } @@ -202,7 +162,7 @@ * assumes kvm->irqfds.lock is held */ static void -irqfd_deactivate(struct _irqfd *irqfd) +irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) { BUG_ON(!irqfd_is_active(irqfd)); @@ -211,27 +171,41 @@ queue_work(irqfd_cleanup_wq, &irqfd->shutdown); } +int __attribute__((weak)) kvm_arch_set_irq_inatomic( + struct kvm_kernel_irq_routing_entry *irq, + struct kvm *kvm, int irq_source_id, + int level, + bool line_status) +{ + return -EWOULDBLOCK; +} + /* * Called with wqh->lock held and interrupts disabled */ static int irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { - struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); + struct kvm_kernel_irqfd *irqfd = + container_of(wait, struct kvm_kernel_irqfd, wait); unsigned long flags = (unsigned long)key; - struct kvm_kernel_irq_routing_entry *irq; + struct kvm_kernel_irq_routing_entry irq; struct kvm *kvm = irqfd->kvm; + unsigned seq; + int idx; if (flags & POLLIN) { - rcu_read_lock(); - irq = rcu_dereference(irqfd->irq_entry); + idx = srcu_read_lock(&kvm->irq_srcu); + do { + seq = read_seqcount_begin(&irqfd->irq_entry_sc); + irq = irqfd->irq_entry; + } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); /* An event has been signaled, inject an interrupt */ - if (irq) - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, - false); - else + if (kvm_arch_set_irq_inatomic(&irq, kvm, + KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } if (flags & POLLHUP) { @@ -262,39 +236,62 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { - struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); + struct kvm_kernel_irqfd *irqfd = + container_of(pt, struct kvm_kernel_irqfd, pt); add_wait_queue(wqh, &irqfd->wait); } /* Must be called under irqfds.lock */ -static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, - struct kvm_irq_routing_table *irq_rt) +static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; + int n_entries; - if (irqfd->gsi >= irq_rt->nr_rt_entries) { - rcu_assign_pointer(irqfd->irq_entry, NULL); - return; - } + n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); - hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { - /* Only fast-path MSI. */ - if (e->type == KVM_IRQ_ROUTING_MSI) - rcu_assign_pointer(irqfd->irq_entry, e); - else - rcu_assign_pointer(irqfd->irq_entry, NULL); - } + write_seqcount_begin(&irqfd->irq_entry_sc); + + e = entries; + if (n_entries == 1) + irqfd->irq_entry = *e; + else + irqfd->irq_entry.type = 0; + + write_seqcount_end(&irqfd->irq_entry_sc); +} + +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +void __attribute__((weak)) kvm_arch_irq_bypass_stop( + struct irq_bypass_consumer *cons) +{ +} + +void __attribute__((weak)) kvm_arch_irq_bypass_start( + struct irq_bypass_consumer *cons) +{ +} + +int __attribute__((weak)) kvm_arch_update_irqfd_routing( + struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) +{ + return 0; } +#endif static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { - struct kvm_irq_routing_table *irq_rt; - struct _irqfd *irqfd, *tmp; - struct file *file = NULL; + struct kvm_kernel_irqfd *irqfd, *tmp; + struct fd f; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; int ret; unsigned int events; + int idx; + + if (!kvm_arch_intc_initialized(kvm)) + return -EAGAIN; irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); if (!irqfd) @@ -305,14 +302,15 @@ INIT_LIST_HEAD(&irqfd->list); INIT_WORK(&irqfd->inject, irqfd_inject); INIT_WORK(&irqfd->shutdown, irqfd_shutdown); + seqcount_init(&irqfd->irq_entry_sc); - file = eventfd_fget(args->fd); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto fail; + f = fdget(args->fd); + if (!f.file) { + ret = -EBADF; + goto out; } - eventfd = eventfd_ctx_fileget(file); + eventfd = eventfd_ctx_fileget(f.file); if (IS_ERR(eventfd)) { ret = PTR_ERR(eventfd); goto fail; @@ -321,7 +319,7 @@ irqfd->eventfd = eventfd; if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { - struct _irqfd_resampler *resampler; + struct kvm_kernel_irqfd_resampler *resampler; resamplefd = eventfd_ctx_fdget(args->resamplefd); if (IS_ERR(resamplefd)) { @@ -363,7 +361,7 @@ } list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); mutex_unlock(&kvm->irqfds.resampler_lock); } @@ -387,28 +385,39 @@ goto fail; } - irq_rt = rcu_dereference_protected(kvm->irq_routing, - lockdep_is_held(&kvm->irqfds.lock)); - irqfd_update(kvm, irqfd, irq_rt); - - events = file->f_op->poll(file, &irqfd->pt); + idx = srcu_read_lock(&kvm->irq_srcu); + irqfd_update(kvm, irqfd); + srcu_read_unlock(&kvm->irq_srcu, idx); list_add_tail(&irqfd->list, &kvm->irqfds.items); + spin_unlock_irq(&kvm->irqfds.lock); + /* * Check if there was an event already pending on the eventfd * before we registered, and trigger it as if we didn't miss it. */ + events = f.file->f_op->poll(f.file, &irqfd->pt); + if (events & POLLIN) schedule_work(&irqfd->inject); - spin_unlock_irq(&kvm->irqfds.lock); - /* * do not drop the file until the irqfd is fully initialized, otherwise * we might race against the POLLHUP */ - fput(file); + fdput(f); +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + irqfd->consumer.token = (void *)irqfd->eventfd; + irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; + irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; + irqfd->consumer.stop = kvm_arch_irq_bypass_stop; + irqfd->consumer.start = kvm_arch_irq_bypass_start; + ret = irq_bypass_register_consumer(&irqfd->consumer); + if (ret) + pr_info("irq bypass consumer (token %p) registration fails: %d\n", + irqfd->consumer.token, ret); +#endif return 0; @@ -422,18 +431,81 @@ if (eventfd && !IS_ERR(eventfd)) eventfd_ctx_put(eventfd); - if (!IS_ERR(file)) - fput(file); + fdput(f); +out: kfree(irqfd); return ret; } + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) +{ + struct kvm_irq_ack_notifier *kian; + + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); +} + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + int gsi, idx; + + trace_kvm_ack_irq(irqchip, pin); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + kvm_notify_acked_gsi(kvm, gsi); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); + kvm_vcpu_request_scan_ioapic(kvm); +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); + kvm_vcpu_request_scan_ioapic(kvm); +} #endif void kvm_eventfd_init(struct kvm *kvm) { -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.resampler_list); @@ -442,14 +514,14 @@ INIT_LIST_HEAD(&kvm->ioeventfds); } -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD /* * shutdown any irqfd's that match fd+gsi */ static int kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; struct eventfd_ctx *eventfd; eventfd = eventfd_ctx_fdget(args->fd); @@ -461,14 +533,14 @@ list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { /* - * This rcu_assign_pointer is needed for when + * This clearing of irq_entry.type is needed for when * another thread calls kvm_irq_routing_update before * we flush workqueue below (we synchronize with * kvm_irq_routing_update using irqfds.lock). - * It is paired with synchronize_rcu done by caller - * of that function. */ - rcu_assign_pointer(irqfd->irq_entry, NULL); + write_seqcount_begin(&irqfd->irq_entry_sc); + irqfd->irq_entry.type = 0; + write_seqcount_end(&irqfd->irq_entry_sc); irqfd_deactivate(irqfd); } } @@ -505,7 +577,7 @@ void kvm_irqfd_release(struct kvm *kvm) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; spin_lock_irq(&kvm->irqfds.lock); @@ -523,20 +595,27 @@ } /* - * Change irq_routing and irqfd. - * Caller must invoke synchronize_rcu afterwards. + * Take note of a change in irq routing. + * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. */ -void kvm_irq_routing_update(struct kvm *kvm, - struct kvm_irq_routing_table *irq_rt) +void kvm_irq_routing_update(struct kvm *kvm) { - struct _irqfd *irqfd; + struct kvm_kernel_irqfd *irqfd; spin_lock_irq(&kvm->irqfds.lock); - rcu_assign_pointer(kvm->irq_routing, irq_rt); + list_for_each_entry(irqfd, &kvm->irqfds.items, list) { + irqfd_update(kvm, irqfd); - list_for_each_entry(irqfd, &kvm->irqfds.items, list) - irqfd_update(kvm, irqfd, irq_rt); +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + if (irqfd->producer) { + int ret = kvm_arch_update_irqfd_routing( + irqfd->kvm, irqfd->producer->irq, + irqfd->gsi, 1); + WARN_ON(ret); + } +#endif + } spin_unlock_irq(&kvm->irqfds.lock); } @@ -600,7 +679,15 @@ { u64 _val; - if (!(addr == p->addr && len == p->length)) + if (addr != p->addr) + /* address must be precise for a hit */ + return false; + + if (!p->length) + /* length = 0 means only look at the address, so always a hit */ + return true; + + if (len != p->length) /* address-range must be precise for a hit */ return false; @@ -634,8 +721,8 @@ /* MMIO/PIO writes trigger an event if the addr/val match */ static int -ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, - const void *val) +ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, + int len, const void *val) { struct _ioeventfd *p = to_ioeventfd(this); @@ -671,9 +758,11 @@ list_for_each_entry(_p, &kvm->ioeventfds, list) if (_p->bus_idx == p->bus_idx && - _p->addr == p->addr && _p->length == p->length && - (_p->wildcard || p->wildcard || - _p->datamatch == p->datamatch)) + _p->addr == p->addr && + (!_p->length || !p->length || + (_p->length == p->length && + (_p->wildcard || p->wildcard || + _p->datamatch == p->datamatch)))) return true; return false; @@ -688,33 +777,14 @@ return KVM_MMIO_BUS; } -static int -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +static int kvm_assign_ioeventfd_idx(struct kvm *kvm, + enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; - struct _ioeventfd *p; - struct eventfd_ctx *eventfd; - int ret; - - bus_idx = ioeventfd_bus_from_flags(args->flags); - /* must be natural-word sized */ - switch (args->len) { - case 1: - case 2: - case 4: - case 8: - break; - default: - return -EINVAL; - } - /* check for range overflow */ - if (args->addr + args->len < args->addr) - return -EINVAL; - - /* check for extra flags that we don't understand */ - if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) - return -EINVAL; + struct eventfd_ctx *eventfd; + struct _ioeventfd *p; + int ret; eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) @@ -771,14 +841,13 @@ } static int -kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; - bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); @@ -813,6 +882,69 @@ return ret; } +static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); + int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); + + if (!args->len && bus_idx == KVM_MMIO_BUS) + kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + + return ret; +} + +static int +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx; + int ret; + + bus_idx = ioeventfd_bus_from_flags(args->flags); + /* must be natural-word sized, or 0 to ignore length */ + switch (args->len) { + case 0: + case 1: + case 2: + case 4: + case 8: + break; + default: + return -EINVAL; + } + + /* check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* check for extra flags that we don't understand */ + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) + return -EINVAL; + + /* ioeventfd with no length can't be combined with DATAMATCH */ + if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) + return -EINVAL; + + ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); + if (ret) + goto fail; + + /* When length is ignored, MMIO is also put on a separate bus, for + * faster lookups. + */ + if (!args->len && bus_idx == KVM_MMIO_BUS) { + ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + if (ret < 0) + goto fast_fail; + } + + return 0; + +fast_fail: + kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); +fail: + return ret; +} + int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) {