--- zzzz-none-000/linux-3.10.107/drivers/hv/vmbus_drv.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/hv/vmbus_drv.c 2021-02-04 17:41:59.000000000 +0000 @@ -25,46 +25,88 @@ #include #include #include -#include #include #include #include #include -#include #include -#include #include #include +#include +#include #include #include #include +#include +#include +#include +#include +#include #include "hyperv_vmbus.h" - static struct acpi_device *hv_acpi_dev; static struct tasklet_struct msg_dpc; static struct completion probe_event; static int irq; -struct hv_device_info { - u32 chn_id; - u32 chn_state; - uuid_le chn_type; - uuid_le chn_instance; - - u32 monitor_id; - u32 server_monitor_pending; - u32 server_monitor_latency; - u32 server_monitor_conn_id; - u32 client_monitor_pending; - u32 client_monitor_latency; - u32 client_monitor_conn_id; - struct hv_dev_port_info inbound; - struct hv_dev_port_info outbound; +static void hyperv_report_panic(struct pt_regs *regs) +{ + static bool panic_reported; + + /* + * We prefer to report panic on 'die' chain as we have proper + * registers to report, but if we miss it (e.g. on BUG()) we need + * to report it on 'panic'. + */ + if (panic_reported) + return; + panic_reported = true; + + wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip); + wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax); + wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx); + wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx); + wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx); + + /* + * Let Hyper-V know there is crash data available + */ + wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); +} + +static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, + void *args) +{ + struct pt_regs *regs; + + regs = current_pt_regs(); + + hyperv_report_panic(regs); + return NOTIFY_DONE; +} + +static int hyperv_die_event(struct notifier_block *nb, unsigned long val, + void *args) +{ + struct die_args *die = (struct die_args *)args; + struct pt_regs *regs = die->regs; + + hyperv_report_panic(regs); + return NOTIFY_DONE; +} + +static struct notifier_block hyperv_die_block = { + .notifier_call = hyperv_die_event, +}; +static struct notifier_block hyperv_panic_block = { + .notifier_call = hyperv_panic_event, }; +struct resource *hyperv_mmio; +DEFINE_SEMAPHORE(hyperv_mmio_lock); + static int vmbus_exists(void) { if (hv_acpi_dev == NULL) @@ -73,169 +115,399 @@ return 0; } +#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2) +static void print_alias_name(struct hv_device *hv_dev, char *alias_name) +{ + int i; + for (i = 0; i < VMBUS_ALIAS_LEN; i += 2) + sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]); +} + +static u8 channel_monitor_group(struct vmbus_channel *channel) +{ + return (u8)channel->offermsg.monitorid / 32; +} -static void get_channel_info(struct hv_device *device, - struct hv_device_info *info) +static u8 channel_monitor_offset(struct vmbus_channel *channel) { - struct vmbus_channel_debug_info debug_info; + return (u8)channel->offermsg.monitorid % 32; +} - if (!device->channel) - return; +static u32 channel_pending(struct vmbus_channel *channel, + struct hv_monitor_page *monitor_page) +{ + u8 monitor_group = channel_monitor_group(channel); + return monitor_page->trigger_group[monitor_group].pending; +} - vmbus_get_debug_info(device->channel, &debug_info); +static u32 channel_latency(struct vmbus_channel *channel, + struct hv_monitor_page *monitor_page) +{ + u8 monitor_group = channel_monitor_group(channel); + u8 monitor_offset = channel_monitor_offset(channel); + return monitor_page->latency[monitor_group][monitor_offset]; +} - info->chn_id = debug_info.relid; - info->chn_state = debug_info.state; - memcpy(&info->chn_type, &debug_info.interfacetype, - sizeof(uuid_le)); - memcpy(&info->chn_instance, &debug_info.interface_instance, - sizeof(uuid_le)); +static u32 channel_conn_id(struct vmbus_channel *channel, + struct hv_monitor_page *monitor_page) +{ + u8 monitor_group = channel_monitor_group(channel); + u8 monitor_offset = channel_monitor_offset(channel); + return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; +} - info->monitor_id = debug_info.monitorid; +static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); - info->server_monitor_pending = debug_info.servermonitor_pending; - info->server_monitor_latency = debug_info.servermonitor_latency; - info->server_monitor_conn_id = debug_info.servermonitor_connectionid; - - info->client_monitor_pending = debug_info.clientmonitor_pending; - info->client_monitor_latency = debug_info.clientmonitor_latency; - info->client_monitor_conn_id = debug_info.clientmonitor_connectionid; - - info->inbound.int_mask = debug_info.inbound.current_interrupt_mask; - info->inbound.read_idx = debug_info.inbound.current_read_index; - info->inbound.write_idx = debug_info.inbound.current_write_index; - info->inbound.bytes_avail_toread = - debug_info.inbound.bytes_avail_toread; - info->inbound.bytes_avail_towrite = - debug_info.inbound.bytes_avail_towrite; - - info->outbound.int_mask = - debug_info.outbound.current_interrupt_mask; - info->outbound.read_idx = debug_info.outbound.current_read_index; - info->outbound.write_idx = debug_info.outbound.current_write_index; - info->outbound.bytes_avail_toread = - debug_info.outbound.bytes_avail_toread; - info->outbound.bytes_avail_towrite = - debug_info.outbound.bytes_avail_towrite; + if (!hv_dev->channel) + return -ENODEV; + return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); } +static DEVICE_ATTR_RO(id); -#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2) -static void print_alias_name(struct hv_device *hv_dev, char *alias_name) +static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, + char *buf) { - int i; - for (i = 0; i < VMBUS_ALIAS_LEN; i += 2) - sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]); + struct hv_device *hv_dev = device_to_hv_device(dev); + + if (!hv_dev->channel) + return -ENODEV; + return sprintf(buf, "%d\n", hv_dev->channel->state); } +static DEVICE_ATTR_RO(state); -/* - * vmbus_show_device_attr - Show the device attribute in sysfs. - * - * This is invoked when user does a - * "cat /sys/bus/vmbus/devices//" - */ -static ssize_t vmbus_show_device_attr(struct device *dev, - struct device_attribute *dev_attr, - char *buf) +static ssize_t monitor_id_show(struct device *dev, + struct device_attribute *dev_attr, char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + + if (!hv_dev->channel) + return -ENODEV; + return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid); +} +static DEVICE_ATTR_RO(monitor_id); + +static ssize_t class_id_show(struct device *dev, + struct device_attribute *dev_attr, char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + + if (!hv_dev->channel) + return -ENODEV; + return sprintf(buf, "{%pUl}\n", + hv_dev->channel->offermsg.offer.if_type.b); +} +static DEVICE_ATTR_RO(class_id); + +static ssize_t device_id_show(struct device *dev, + struct device_attribute *dev_attr, char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + + if (!hv_dev->channel) + return -ENODEV; + return sprintf(buf, "{%pUl}\n", + hv_dev->channel->offermsg.offer.if_instance.b); +} +static DEVICE_ATTR_RO(device_id); + +static ssize_t modalias_show(struct device *dev, + struct device_attribute *dev_attr, char *buf) { struct hv_device *hv_dev = device_to_hv_device(dev); - struct hv_device_info *device_info; char alias_name[VMBUS_ALIAS_LEN + 1]; - int ret = 0; - device_info = kzalloc(sizeof(struct hv_device_info), GFP_KERNEL); - if (!device_info) - return ret; + print_alias_name(hv_dev, alias_name); + return sprintf(buf, "vmbus:%s\n", alias_name); +} +static DEVICE_ATTR_RO(modalias); + +static ssize_t server_monitor_pending_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + + if (!hv_dev->channel) + return -ENODEV; + return sprintf(buf, "%d\n", + channel_pending(hv_dev->channel, + vmbus_connection.monitor_pages[1])); +} +static DEVICE_ATTR_RO(server_monitor_pending); + +static ssize_t client_monitor_pending_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + + if (!hv_dev->channel) + return -ENODEV; + return sprintf(buf, "%d\n", + channel_pending(hv_dev->channel, + vmbus_connection.monitor_pages[1])); +} +static DEVICE_ATTR_RO(client_monitor_pending); + +static ssize_t server_monitor_latency_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + + if (!hv_dev->channel) + return -ENODEV; + return sprintf(buf, "%d\n", + channel_latency(hv_dev->channel, + vmbus_connection.monitor_pages[0])); +} +static DEVICE_ATTR_RO(server_monitor_latency); + +static ssize_t client_monitor_latency_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + + if (!hv_dev->channel) + return -ENODEV; + return sprintf(buf, "%d\n", + channel_latency(hv_dev->channel, + vmbus_connection.monitor_pages[1])); +} +static DEVICE_ATTR_RO(client_monitor_latency); + +static ssize_t server_monitor_conn_id_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + + if (!hv_dev->channel) + return -ENODEV; + return sprintf(buf, "%d\n", + channel_conn_id(hv_dev->channel, + vmbus_connection.monitor_pages[0])); +} +static DEVICE_ATTR_RO(server_monitor_conn_id); + +static ssize_t client_monitor_conn_id_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + + if (!hv_dev->channel) + return -ENODEV; + return sprintf(buf, "%d\n", + channel_conn_id(hv_dev->channel, + vmbus_connection.monitor_pages[1])); +} +static DEVICE_ATTR_RO(client_monitor_conn_id); + +static ssize_t out_intr_mask_show(struct device *dev, + struct device_attribute *dev_attr, char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_ring_buffer_debug_info outbound; + + if (!hv_dev->channel) + return -ENODEV; + hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + return sprintf(buf, "%d\n", outbound.current_interrupt_mask); +} +static DEVICE_ATTR_RO(out_intr_mask); + +static ssize_t out_read_index_show(struct device *dev, + struct device_attribute *dev_attr, char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_ring_buffer_debug_info outbound; + + if (!hv_dev->channel) + return -ENODEV; + hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + return sprintf(buf, "%d\n", outbound.current_read_index); +} +static DEVICE_ATTR_RO(out_read_index); + +static ssize_t out_write_index_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_ring_buffer_debug_info outbound; + + if (!hv_dev->channel) + return -ENODEV; + hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + return sprintf(buf, "%d\n", outbound.current_write_index); +} +static DEVICE_ATTR_RO(out_write_index); + +static ssize_t out_read_bytes_avail_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_ring_buffer_debug_info outbound; + + if (!hv_dev->channel) + return -ENODEV; + hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + return sprintf(buf, "%d\n", outbound.bytes_avail_toread); +} +static DEVICE_ATTR_RO(out_read_bytes_avail); + +static ssize_t out_write_bytes_avail_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_ring_buffer_debug_info outbound; + + if (!hv_dev->channel) + return -ENODEV; + hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); +} +static DEVICE_ATTR_RO(out_write_bytes_avail); + +static ssize_t in_intr_mask_show(struct device *dev, + struct device_attribute *dev_attr, char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_ring_buffer_debug_info inbound; + + if (!hv_dev->channel) + return -ENODEV; + hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + return sprintf(buf, "%d\n", inbound.current_interrupt_mask); +} +static DEVICE_ATTR_RO(in_intr_mask); + +static ssize_t in_read_index_show(struct device *dev, + struct device_attribute *dev_attr, char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_ring_buffer_debug_info inbound; + + if (!hv_dev->channel) + return -ENODEV; + hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + return sprintf(buf, "%d\n", inbound.current_read_index); +} +static DEVICE_ATTR_RO(in_read_index); + +static ssize_t in_write_index_show(struct device *dev, + struct device_attribute *dev_attr, char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_ring_buffer_debug_info inbound; + + if (!hv_dev->channel) + return -ENODEV; + hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + return sprintf(buf, "%d\n", inbound.current_write_index); +} +static DEVICE_ATTR_RO(in_write_index); - get_channel_info(hv_dev, device_info); +static ssize_t in_read_bytes_avail_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_ring_buffer_debug_info inbound; - if (!strcmp(dev_attr->attr.name, "class_id")) { - ret = sprintf(buf, "{%pUl}\n", device_info->chn_type.b); - } else if (!strcmp(dev_attr->attr.name, "device_id")) { - ret = sprintf(buf, "{%pUl}\n", device_info->chn_instance.b); - } else if (!strcmp(dev_attr->attr.name, "modalias")) { - print_alias_name(hv_dev, alias_name); - ret = sprintf(buf, "vmbus:%s\n", alias_name); - } else if (!strcmp(dev_attr->attr.name, "state")) { - ret = sprintf(buf, "%d\n", device_info->chn_state); - } else if (!strcmp(dev_attr->attr.name, "id")) { - ret = sprintf(buf, "%d\n", device_info->chn_id); - } else if (!strcmp(dev_attr->attr.name, "out_intr_mask")) { - ret = sprintf(buf, "%d\n", device_info->outbound.int_mask); - } else if (!strcmp(dev_attr->attr.name, "out_read_index")) { - ret = sprintf(buf, "%d\n", device_info->outbound.read_idx); - } else if (!strcmp(dev_attr->attr.name, "out_write_index")) { - ret = sprintf(buf, "%d\n", device_info->outbound.write_idx); - } else if (!strcmp(dev_attr->attr.name, "out_read_bytes_avail")) { - ret = sprintf(buf, "%d\n", - device_info->outbound.bytes_avail_toread); - } else if (!strcmp(dev_attr->attr.name, "out_write_bytes_avail")) { - ret = sprintf(buf, "%d\n", - device_info->outbound.bytes_avail_towrite); - } else if (!strcmp(dev_attr->attr.name, "in_intr_mask")) { - ret = sprintf(buf, "%d\n", device_info->inbound.int_mask); - } else if (!strcmp(dev_attr->attr.name, "in_read_index")) { - ret = sprintf(buf, "%d\n", device_info->inbound.read_idx); - } else if (!strcmp(dev_attr->attr.name, "in_write_index")) { - ret = sprintf(buf, "%d\n", device_info->inbound.write_idx); - } else if (!strcmp(dev_attr->attr.name, "in_read_bytes_avail")) { - ret = sprintf(buf, "%d\n", - device_info->inbound.bytes_avail_toread); - } else if (!strcmp(dev_attr->attr.name, "in_write_bytes_avail")) { - ret = sprintf(buf, "%d\n", - device_info->inbound.bytes_avail_towrite); - } else if (!strcmp(dev_attr->attr.name, "monitor_id")) { - ret = sprintf(buf, "%d\n", device_info->monitor_id); - } else if (!strcmp(dev_attr->attr.name, "server_monitor_pending")) { - ret = sprintf(buf, "%d\n", device_info->server_monitor_pending); - } else if (!strcmp(dev_attr->attr.name, "server_monitor_latency")) { - ret = sprintf(buf, "%d\n", device_info->server_monitor_latency); - } else if (!strcmp(dev_attr->attr.name, "server_monitor_conn_id")) { - ret = sprintf(buf, "%d\n", - device_info->server_monitor_conn_id); - } else if (!strcmp(dev_attr->attr.name, "client_monitor_pending")) { - ret = sprintf(buf, "%d\n", device_info->client_monitor_pending); - } else if (!strcmp(dev_attr->attr.name, "client_monitor_latency")) { - ret = sprintf(buf, "%d\n", device_info->client_monitor_latency); - } else if (!strcmp(dev_attr->attr.name, "client_monitor_conn_id")) { - ret = sprintf(buf, "%d\n", - device_info->client_monitor_conn_id); + if (!hv_dev->channel) + return -ENODEV; + hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + return sprintf(buf, "%d\n", inbound.bytes_avail_toread); +} +static DEVICE_ATTR_RO(in_read_bytes_avail); + +static ssize_t in_write_bytes_avail_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_ring_buffer_debug_info inbound; + + if (!hv_dev->channel) + return -ENODEV; + hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); +} +static DEVICE_ATTR_RO(in_write_bytes_avail); + +static ssize_t channel_vp_mapping_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct vmbus_channel *channel = hv_dev->channel, *cur_sc; + unsigned long flags; + int buf_size = PAGE_SIZE, n_written, tot_written; + struct list_head *cur; + + if (!channel) + return -ENODEV; + + tot_written = snprintf(buf, buf_size, "%u:%u\n", + channel->offermsg.child_relid, channel->target_cpu); + + spin_lock_irqsave(&channel->lock, flags); + + list_for_each(cur, &channel->sc_list) { + if (tot_written >= buf_size - 1) + break; + + cur_sc = list_entry(cur, struct vmbus_channel, sc_list); + n_written = scnprintf(buf + tot_written, + buf_size - tot_written, + "%u:%u\n", + cur_sc->offermsg.child_relid, + cur_sc->target_cpu); + tot_written += n_written; } - kfree(device_info); - return ret; + spin_unlock_irqrestore(&channel->lock, flags); + + return tot_written; } +static DEVICE_ATTR_RO(channel_vp_mapping); /* Set up per device attributes in /sys/bus/vmbus/devices/ */ -static struct device_attribute vmbus_device_attrs[] = { - __ATTR(id, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(state, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(class_id, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(device_id, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(monitor_id, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(modalias, S_IRUGO, vmbus_show_device_attr, NULL), - - __ATTR(server_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(server_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(server_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL), - - __ATTR(client_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(client_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(client_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL), - - __ATTR(out_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(out_read_index, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(out_write_index, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(out_read_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(out_write_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), - - __ATTR(in_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(in_read_index, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(in_write_index, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(in_read_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(in_write_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR_NULL +static struct attribute *vmbus_attrs[] = { + &dev_attr_id.attr, + &dev_attr_state.attr, + &dev_attr_monitor_id.attr, + &dev_attr_class_id.attr, + &dev_attr_device_id.attr, + &dev_attr_modalias.attr, + &dev_attr_server_monitor_pending.attr, + &dev_attr_client_monitor_pending.attr, + &dev_attr_server_monitor_latency.attr, + &dev_attr_client_monitor_latency.attr, + &dev_attr_server_monitor_conn_id.attr, + &dev_attr_client_monitor_conn_id.attr, + &dev_attr_out_intr_mask.attr, + &dev_attr_out_read_index.attr, + &dev_attr_out_write_index.attr, + &dev_attr_out_read_bytes_avail.attr, + &dev_attr_out_write_bytes_avail.attr, + &dev_attr_in_intr_mask.attr, + &dev_attr_in_read_index.attr, + &dev_attr_in_write_index.attr, + &dev_attr_in_read_bytes_avail.attr, + &dev_attr_in_write_bytes_avail.attr, + &dev_attr_channel_vp_mapping.attr, + NULL, }; - +ATTRIBUTE_GROUPS(vmbus); /* * vmbus_uevent - add uevent for our device @@ -259,7 +531,7 @@ return ret; } -static uuid_le null_guid; +static const uuid_le null_guid; static inline bool is_null_guid(const __u8 *guid) { @@ -274,7 +546,7 @@ */ static const struct hv_vmbus_device_id *hv_vmbus_get_id( const struct hv_vmbus_device_id *id, - __u8 *guid) + const __u8 *guid) { for (; !is_null_guid(id->guid); id++) if (!memcmp(&id->guid, guid, sizeof(uuid_le))) @@ -330,14 +602,14 @@ */ static int vmbus_remove(struct device *child_device) { - struct hv_driver *drv = drv_to_hv_drv(child_device->driver); + struct hv_driver *drv; struct hv_device *dev = device_to_hv_device(child_device); - if (drv->remove) - drv->remove(dev); - else - pr_err("remove not set for driver %s\n", - dev_name(child_device)); + if (child_device->driver) { + drv = drv_to_hv_drv(child_device->driver); + if (drv->remove) + drv->remove(dev); + } return 0; } @@ -371,7 +643,10 @@ static void vmbus_device_release(struct device *device) { struct hv_device *hv_dev = device_to_hv_device(device); + struct vmbus_channel *channel = hv_dev->channel; + hv_process_channel_removal(channel, + channel->offermsg.child_relid); kfree(hv_dev); } @@ -384,12 +659,9 @@ .remove = vmbus_remove, .probe = vmbus_probe, .uevent = vmbus_uevent, - .dev_attrs = vmbus_device_attrs, + .dev_groups = vmbus_groups, }; -static const char *driver_name = "hyperv"; - - struct onmessage_work_context { struct work_struct work; struct hv_message msg; @@ -399,33 +671,80 @@ { struct onmessage_work_context *ctx; + /* Do not process messages if we're in DISCONNECTED state */ + if (vmbus_connection.conn_state == DISCONNECTED) + return; + ctx = container_of(work, struct onmessage_work_context, work); vmbus_onmessage(&ctx->msg); kfree(ctx); } +static void hv_process_timer_expiration(struct hv_message *msg, int cpu) +{ + struct clock_event_device *dev = hv_context.clk_evt[cpu]; + + if (dev->event_handler) + dev->event_handler(dev); + + msg->header.message_type = HVMSG_NONE; + + /* + * Make sure the write to MessageType (ie set to + * HVMSG_NONE) happens before we read the + * MessagePending and EOMing. Otherwise, the EOMing + * will not deliver any more messages since there is + * no empty slot + */ + mb(); + + if (msg->header.message_flags.msg_pending) { + /* + * This will cause message queue rescan to + * possibly deliver another msg from the + * hypervisor + */ + wrmsrl(HV_X64_MSR_EOM, 0); + } +} + static void vmbus_on_msg_dpc(unsigned long data) { int cpu = smp_processor_id(); void *page_addr = hv_context.synic_message_page[cpu]; struct hv_message *msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; + struct vmbus_channel_message_header *hdr; + struct vmbus_channel_message_table_entry *entry; struct onmessage_work_context *ctx; while (1) { - if (msg->header.message_type == HVMSG_NONE) { + if (msg->header.message_type == HVMSG_NONE) /* no msg */ break; - } else { + + hdr = (struct vmbus_channel_message_header *)msg->u.payload; + + if (hdr->msgtype >= CHANNELMSG_COUNT) { + WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); + goto msg_handled; + } + + entry = &channel_message_table[hdr->msgtype]; + if (entry->handler_type == VMHT_BLOCKING) { ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); if (ctx == NULL) continue; + INIT_WORK(&ctx->work, vmbus_onmessage_work); memcpy(&ctx->msg, msg, sizeof(*msg)); + queue_work(vmbus_connection.work_queue, &ctx->work); - } + } else + entry->message_handler(hdr); +msg_handled: msg->header.message_type = HVMSG_NONE; /* @@ -448,7 +767,7 @@ } } -static irqreturn_t vmbus_isr(int irq, void *dev_id) +static void vmbus_isr(void) { int cpu = smp_processor_id(); void *page_addr; @@ -458,7 +777,7 @@ page_addr = hv_context.synic_event_page[cpu]; if (page_addr == NULL) - return IRQ_NONE; + return; event = (union hv_synic_event_flags *)page_addr + VMBUS_MESSAGE_SINT; @@ -495,61 +814,15 @@ /* Check if there are actual msgs to be processed */ if (msg->header.message_type != HVMSG_NONE) { - handled = true; - tasklet_schedule(&msg_dpc); + if (msg->header.message_type == HVMSG_TIMER_EXPIRED) + hv_process_timer_expiration(msg, cpu); + else + tasklet_schedule(&msg_dpc); } - if (handled) - return IRQ_HANDLED; - else - return IRQ_NONE; -} - -/* - * vmbus interrupt flow handler: - * vmbus interrupts can concurrently occur on multiple CPUs and - * can be handled concurrently. - */ - -static void vmbus_flow_handler(unsigned int irq, struct irq_desc *desc) -{ - kstat_incr_irqs_this_cpu(irq, desc); - - desc->action->handler(irq, desc->action->dev_id); -} - -#ifdef CONFIG_HOTPLUG_CPU -static int hyperv_cpu_disable(void) -{ - return -ENOSYS; + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); } -static void hv_cpu_hotplug_quirk(bool vmbus_loaded) -{ - static void *previous_cpu_disable; - - /* - * Offlining a CPU when running on newer hypervisors (WS2012R2, Win8, - * ...) is not supported at this moment as channel interrupts are - * distributed across all of them. - */ - - if ((vmbus_proto_version == VERSION_WS2008) || - (vmbus_proto_version == VERSION_WIN7)) - return; - - if (vmbus_loaded) { - previous_cpu_disable = smp_ops.cpu_disable; - smp_ops.cpu_disable = hyperv_cpu_disable; - pr_notice("CPU offlining is not supported by hypervisor\n"); - } else if (previous_cpu_disable) - smp_ops.cpu_disable = previous_cpu_disable; -} -#else -static void hv_cpu_hotplug_quirk(bool vmbus_loaded) -{ -} -#endif /* * vmbus_bus_init -Main vmbus driver initialization routine. @@ -577,26 +850,11 @@ if (ret) goto err_cleanup; - ret = request_irq(irq, vmbus_isr, 0, driver_name, hv_acpi_dev); - - if (ret != 0) { - pr_err("Unable to request IRQ %d\n", - irq); - goto err_unregister; - } - - /* - * Vmbus interrupts can be handled concurrently on - * different CPUs. Establish an appropriate interrupt flow - * handler that can support this model. - */ - irq_set_handler(irq, vmbus_flow_handler); - - /* - * Register our interrupt handler. - */ - hv_register_vmbus_handler(irq, vmbus_isr); + hv_setup_vmbus_irq(vmbus_isr); + ret = hv_synic_alloc(); + if (ret) + goto err_alloc; /* * Initialize the per-cpu interrupt state and * connect to the host. @@ -604,17 +862,30 @@ on_each_cpu(hv_synic_init, NULL, 1); ret = vmbus_connect(); if (ret) - goto err_irq; + goto err_connect; + + if (vmbus_proto_version > VERSION_WIN7) + cpu_hotplug_disable(); + + /* + * Only register if the crash MSRs are available + */ + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + register_die_notifier(&hyperv_die_block); + atomic_notifier_chain_register(&panic_notifier_list, + &hyperv_panic_block); + } - hv_cpu_hotplug_quirk(true); vmbus_request_offers(); return 0; -err_irq: - free_irq(irq, hv_acpi_dev); +err_connect: + on_each_cpu(hv_synic_cleanup, NULL, 1); +err_alloc: + hv_synic_free(); + hv_remove_vmbus_irq(); -err_unregister: bus_unregister(&hv_bus); err_cleanup: @@ -624,8 +895,8 @@ } /** - * __vmbus_child_driver_register - Register a vmbus's driver - * @drv: Pointer to driver structure you want to register + * __vmbus_child_driver_register() - Register a vmbus's driver + * @hv_driver: Pointer to driver structure you want to register * @owner: owner module of the drv * @mod_name: module name string * @@ -657,7 +928,8 @@ /** * vmbus_driver_unregister() - Unregister a vmbus's driver - * @drv: Pointer to driver structure you want to un-register + * @hv_driver: Pointer to driver structure you want to + * un-register * * Un-register the given driver that was previous registered with a call to * vmbus_driver_register() @@ -675,9 +947,9 @@ * vmbus_device_create - Creates and registers a new child device * on the vmbus. */ -struct hv_device *vmbus_device_create(uuid_le *type, - uuid_le *instance, - struct vmbus_channel *channel) +struct hv_device *vmbus_device_create(const uuid_le *type, + const uuid_le *instance, + struct vmbus_channel *channel) { struct hv_device *child_device_obj; @@ -703,10 +975,8 @@ { int ret = 0; - static atomic_t device_num = ATOMIC_INIT(0); - - dev_set_name(&child_device_obj->device, "vmbus_0_%d", - atomic_inc_return(&device_num)); + dev_set_name(&child_device_obj->device, "vmbus_%d", + child_device_obj->channel->id); child_device_obj->device.bus = &hv_bus; child_device_obj->device.parent = &hv_acpi_dev->dev; @@ -745,38 +1015,219 @@ /* - * VMBUS is an acpi enumerated device. Get the the IRQ information - * from DSDT. + * VMBUS is an acpi enumerated device. Get the information we + * need from DSDT. */ - -static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *irq) +#define VTPM_BASE_ADDRESS 0xfed40000 +static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) { + resource_size_t start = 0; + resource_size_t end = 0; + struct resource *new_res; + struct resource **old_res = &hyperv_mmio; + struct resource **prev_res = NULL; + + switch (res->type) { + case ACPI_RESOURCE_TYPE_IRQ: + irq = res->data.irq.interrupts[0]; + return AE_OK; - if (res->type == ACPI_RESOURCE_TYPE_IRQ) { - struct acpi_resource_irq *irqp; - irqp = &res->data.irq; + /* + * "Address" descriptors are for bus windows. Ignore + * "memory" descriptors, which are for registers on + * devices. + */ + case ACPI_RESOURCE_TYPE_ADDRESS32: + start = res->data.address32.address.minimum; + end = res->data.address32.address.maximum; + break; + + case ACPI_RESOURCE_TYPE_ADDRESS64: + start = res->data.address64.address.minimum; + end = res->data.address64.address.maximum; + break; + + default: + /* Unused resource type */ + return AE_OK; - *((unsigned int *)irq) = irqp->interrupts[0]; } + /* + * Ignore ranges that are below 1MB, as they're not + * necessary or useful here. + */ + if (end < 0x100000) + return AE_OK; + + new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); + if (!new_res) + return AE_NO_MEMORY; + + /* If this range overlaps the virtual TPM, truncate it. */ + if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) + end = VTPM_BASE_ADDRESS; + + new_res->name = "hyperv mmio"; + new_res->flags = IORESOURCE_MEM; + new_res->start = start; + new_res->end = end; + + do { + if (!*old_res) { + *old_res = new_res; + break; + } + + if ((*old_res)->end < new_res->start) { + new_res->sibling = *old_res; + if (prev_res) + (*prev_res)->sibling = new_res; + *old_res = new_res; + break; + } + + prev_res = old_res; + old_res = &(*old_res)->sibling; + + } while (1); return AE_OK; } +static int vmbus_acpi_remove(struct acpi_device *device) +{ + struct resource *cur_res; + struct resource *next_res; + + if (hyperv_mmio) { + for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { + next_res = cur_res->sibling; + kfree(cur_res); + } + } + + return 0; +} + +/** + * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. + * @new: If successful, supplied a pointer to the + * allocated MMIO space. + * @device_obj: Identifies the caller + * @min: Minimum guest physical address of the + * allocation + * @max: Maximum guest physical address + * @size: Size of the range to be allocated + * @align: Alignment of the range to be allocated + * @fb_overlap_ok: Whether this allocation can be allowed + * to overlap the video frame buffer. + * + * This function walks the resources granted to VMBus by the + * _CRS object in the ACPI namespace underneath the parent + * "bridge" whether that's a root PCI bus in the Generation 1 + * case or a Module Device in the Generation 2 case. It then + * attempts to allocate from the global MMIO pool in a way that + * matches the constraints supplied in these parameters and by + * that _CRS. + * + * Return: 0 on success, -errno on failure + */ +int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + resource_size_t min, resource_size_t max, + resource_size_t size, resource_size_t align, + bool fb_overlap_ok) +{ + struct resource *iter; + resource_size_t range_min, range_max, start, local_min, local_max; + const char *dev_n = dev_name(&device_obj->device); + u32 fb_end = screen_info.lfb_base + (screen_info.lfb_size << 1); + int i, retval; + + retval = -ENXIO; + down(&hyperv_mmio_lock); + + for (iter = hyperv_mmio; iter; iter = iter->sibling) { + if ((iter->start >= max) || (iter->end <= min)) + continue; + + range_min = iter->start; + range_max = iter->end; + + /* If this range overlaps the frame buffer, split it into + two tries. */ + for (i = 0; i < 2; i++) { + local_min = range_min; + local_max = range_max; + if (fb_overlap_ok || (range_min >= fb_end) || + (range_max <= screen_info.lfb_base)) { + i++; + } else { + if ((range_min <= screen_info.lfb_base) && + (range_max >= screen_info.lfb_base)) { + /* + * The frame buffer is in this window, + * so trim this into the part that + * preceeds the frame buffer. + */ + local_max = screen_info.lfb_base - 1; + range_min = fb_end; + } else { + range_min = fb_end; + continue; + } + } + + start = (local_min + align - 1) & ~(align - 1); + for (; start + size - 1 <= local_max; start += align) { + *new = request_mem_region_exclusive(start, size, + dev_n); + if (*new) { + retval = 0; + goto exit; + } + } + } + } + +exit: + up(&hyperv_mmio_lock); + return retval; +} +EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); + static int vmbus_acpi_add(struct acpi_device *device) { acpi_status result; + int ret_val = -ENODEV; + struct acpi_device *ancestor; hv_acpi_dev = device; result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, - vmbus_walk_resources, &irq); + vmbus_walk_resources, NULL); - if (ACPI_FAILURE(result)) { - complete(&probe_event); - return -ENODEV; + if (ACPI_FAILURE(result)) + goto acpi_walk_err; + /* + * Some ancestor of the vmbus acpi device (Gen1 or Gen2 + * firmware) is the VMOD that has the mmio ranges. Get that. + */ + for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { + result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, + vmbus_walk_resources, NULL); + + if (ACPI_FAILURE(result)) + continue; + if (hyperv_mmio) + break; } + ret_val = 0; + +acpi_walk_err: complete(&probe_event); - return 0; + if (ret_val) + vmbus_acpi_remove(device); + return ret_val; } static const struct acpi_device_id vmbus_acpi_device_ids[] = { @@ -791,9 +1242,33 @@ .ids = vmbus_acpi_device_ids, .ops = { .add = vmbus_acpi_add, + .remove = vmbus_acpi_remove, }, }; +static void hv_kexec_handler(void) +{ + int cpu; + + hv_synic_clockevents_cleanup(); + vmbus_initiate_unload(); + for_each_online_cpu(cpu) + smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); + hv_cleanup(false); +}; + +static void hv_crash_handler(struct pt_regs *regs) +{ + vmbus_initiate_unload(); + /* + * In crash handler we can't schedule synic cleanup for all CPUs, + * doing the cleanup for current CPU only. This should be sufficient + * for kdump. + */ + hv_synic_cleanup(NULL); + hv_cleanup(true); +}; + static int __init hv_acpi_init(void) { int ret, t; @@ -806,7 +1281,6 @@ /* * Get irq resources first. */ - ret = acpi_bus_register_driver(&vmbus_acpi_driver); if (ret) @@ -827,6 +1301,9 @@ if (ret) goto cleanup; + hv_setup_kexec_handler(hv_kexec_handler); + hv_setup_crash_handler(hv_crash_handler); + return 0; cleanup: @@ -837,18 +1314,35 @@ static void __exit vmbus_exit(void) { + int cpu; - free_irq(irq, hv_acpi_dev); + hv_remove_kexec_handler(); + hv_remove_crash_handler(); + vmbus_connection.conn_state = DISCONNECTED; + hv_synic_clockevents_cleanup(); + vmbus_disconnect(); + hv_remove_vmbus_irq(); + tasklet_kill(&msg_dpc); vmbus_free_channels(); + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + unregister_die_notifier(&hyperv_die_block); + atomic_notifier_chain_unregister(&panic_notifier_list, + &hyperv_panic_block); + } bus_unregister(&hv_bus); hv_cleanup(false); + for_each_online_cpu(cpu) { + tasklet_kill(hv_context.event_dpc[cpu]); + smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); + } + hv_synic_free(); acpi_bus_unregister_driver(&vmbus_acpi_driver); - hv_cpu_hotplug_quirk(false); + if (vmbus_proto_version > VERSION_WIN7) + cpu_hotplug_enable(); } MODULE_LICENSE("GPL"); -MODULE_VERSION(HV_DRV_VERSION); subsys_initcall(hv_acpi_init); module_exit(vmbus_exit);