--- zzzz-none-000/linux-3.10.107/drivers/hv/channel.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/hv/channel.c 2021-02-04 17:41:59.000000000 +0000 @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include "hyperv_vmbus.h" @@ -47,8 +49,8 @@ (unsigned long *) vmbus_connection.send_int_page + (channel->offermsg.child_relid >> 5)); - monitorpage = vmbus_connection.monitor_pages; - monitorpage++; /* Get the child to parent monitor page */ + /* Get the child to parent monitor page */ + monitorpage = vmbus_connection.monitor_pages[1]; sync_set_bit(channel->monitor_bit, (unsigned long *)&monitorpage->trigger_group @@ -60,50 +62,6 @@ } /* - * vmbus_get_debug_info -Retrieve various channel debug info - */ -void vmbus_get_debug_info(struct vmbus_channel *channel, - struct vmbus_channel_debug_info *debuginfo) -{ - struct hv_monitor_page *monitorpage; - u8 monitor_group = (u8)channel->offermsg.monitorid / 32; - u8 monitor_offset = (u8)channel->offermsg.monitorid % 32; - - debuginfo->relid = channel->offermsg.child_relid; - debuginfo->state = channel->state; - memcpy(&debuginfo->interfacetype, - &channel->offermsg.offer.if_type, sizeof(uuid_le)); - memcpy(&debuginfo->interface_instance, - &channel->offermsg.offer.if_instance, - sizeof(uuid_le)); - - monitorpage = (struct hv_monitor_page *)vmbus_connection.monitor_pages; - - debuginfo->monitorid = channel->offermsg.monitorid; - - debuginfo->servermonitor_pending = - monitorpage->trigger_group[monitor_group].pending; - debuginfo->servermonitor_latency = - monitorpage->latency[monitor_group][monitor_offset]; - debuginfo->servermonitor_connectionid = - monitorpage->parameter[monitor_group] - [monitor_offset].connectionid.u.id; - - monitorpage++; - - debuginfo->clientmonitor_pending = - monitorpage->trigger_group[monitor_group].pending; - debuginfo->clientmonitor_latency = - monitorpage->latency[monitor_group][monitor_offset]; - debuginfo->clientmonitor_connectionid = - monitorpage->parameter[monitor_group] - [monitor_offset].connectionid.u.id; - - hv_ringbuffer_get_debuginfo(&channel->inbound, &debuginfo->inbound); - hv_ringbuffer_get_debuginfo(&channel->outbound, &debuginfo->outbound); -} - -/* * vmbus_open - Open the specified channel. */ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, @@ -115,17 +73,38 @@ void *in, *out; unsigned long flags; int ret, err = 0; + unsigned long t; + struct page *page; + + spin_lock_irqsave(&newchannel->lock, flags); + if (newchannel->state == CHANNEL_OPEN_STATE) { + newchannel->state = CHANNEL_OPENING_STATE; + } else { + spin_unlock_irqrestore(&newchannel->lock, flags); + return -EINVAL; + } + spin_unlock_irqrestore(&newchannel->lock, flags); newchannel->onchannel_callback = onchannelcallback; newchannel->channel_callback_context = context; /* Allocate the ring buffer */ - out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, - get_order(send_ringbuffer_size + recv_ringbuffer_size)); - - if (!out) - return -ENOMEM; + page = alloc_pages_node(cpu_to_node(newchannel->target_cpu), + GFP_KERNEL|__GFP_ZERO, + get_order(send_ringbuffer_size + + recv_ringbuffer_size)); + + if (!page) + out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, + get_order(send_ringbuffer_size + + recv_ringbuffer_size)); + else + out = (void *)page_address(page); + if (!out) { + err = -ENOMEM; + goto error0; + } in = (void *)((unsigned long)out + send_ringbuffer_size); @@ -204,18 +183,24 @@ goto error1; } - wait_for_completion(&open_info->waitevent); - - - if (open_info->response.open_result.status) - err = open_info->response.open_result.status; + t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ); + if (t == 0) { + err = -ETIMEDOUT; + goto error1; + } spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); list_del(&open_info->msglistentry); spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + if (open_info->response.open_result.status) { + err = -EAGAIN; + goto error_gpadl; + } + + newchannel->state = CHANNEL_OPENED_STATE; kfree(open_info); - return err; + return 0; error1: spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); @@ -229,6 +214,7 @@ free_pages((unsigned long)out, get_order(send_ringbuffer_size + recv_ringbuffer_size)); kfree(open_info); + newchannel->state = CHANNEL_OPEN_STATE; return err; } EXPORT_SYMBOL_GPL(vmbus_open); @@ -242,7 +228,6 @@ { int i; int pagecount; - unsigned long long pfn; struct vmbus_channel_gpadl_header *gpadl_header; struct vmbus_channel_gpadl_body *gpadl_body; struct vmbus_channel_msginfo *msgheader; @@ -252,7 +237,6 @@ int pfnsum, pfncount, pfnleft, pfncurr, pfnsize; pagecount = size >> PAGE_SHIFT; - pfn = virt_to_phys(kbuffer) >> PAGE_SHIFT; /* do we need a gpadl body msg */ pfnsize = MAX_SIZE_CHANNEL_MESSAGE - @@ -281,7 +265,8 @@ gpadl_header->range[0].byte_offset = 0; gpadl_header->range[0].byte_count = size; for (i = 0; i < pfncount; i++) - gpadl_header->range[0].pfn_array[i] = pfn+i; + gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys( + kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT; *msginfo = msgheader; *messagecount = 1; @@ -334,7 +319,9 @@ * so the hypervisor gurantees that this is ok. */ for (i = 0; i < pfncurr; i++) - gpadl_body->pfn[i] = pfn + pfnsum + i; + gpadl_body->pfn[i] = slow_virt_to_phys( + kbuffer + PAGE_SIZE * (pfnsum + i)) >> + PAGE_SHIFT; /* add to msg header */ list_add_tail(&msgbody->msglistentry, @@ -360,7 +347,8 @@ gpadl_header->range[0].byte_offset = 0; gpadl_header->range[0].byte_count = size; for (i = 0; i < pagecount; i++) - gpadl_header->range[0].pfn_array[i] = pfn+i; + gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys( + kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT; *msginfo = msgheader; *messagecount = 1; @@ -377,7 +365,7 @@ * vmbus_establish_gpadl - Estabish a GPADL for the specified buffer * * @channel: a channel - * @kbuffer: from kmalloc + * @kbuffer: from kmalloc or vmalloc * @size: page-size multiple * @gpadl_handle: some funky thing */ @@ -387,15 +375,15 @@ struct vmbus_channel_gpadl_header *gpadlmsg; struct vmbus_channel_gpadl_body *gpadl_body; struct vmbus_channel_msginfo *msginfo = NULL; - struct vmbus_channel_msginfo *submsginfo, *tmp; + struct vmbus_channel_msginfo *submsginfo; u32 msgcount; struct list_head *curr; u32 next_gpadl_handle; unsigned long flags; int ret = 0; - next_gpadl_handle = atomic_read(&vmbus_connection.next_gpadl_handle); - atomic_inc(&vmbus_connection.next_gpadl_handle); + next_gpadl_handle = + (atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1); ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount); if (ret) @@ -449,13 +437,6 @@ list_del(&msginfo->msglistentry); spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); - if (msgcount > 1) { - list_for_each_entry_safe(submsginfo, tmp, &msginfo->submsglist, - msglistentry) { - kfree(submsginfo); - } - } - kfree(msginfo); return ret; } @@ -506,19 +487,42 @@ } EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl); -/* - * vmbus_close - Close the specified channel - */ -void vmbus_close(struct vmbus_channel *channel) +static void reset_channel_cb(void *arg) +{ + struct vmbus_channel *channel = arg; + + channel->onchannel_callback = NULL; +} + +static int vmbus_close_internal(struct vmbus_channel *channel) { struct vmbus_channel_close_channel *msg; + struct tasklet_struct *tasklet; int ret; - unsigned long flags; + /* + * process_chn_event(), running in the tasklet, can race + * with vmbus_close_internal() in the case of SMP guest, e.g., when + * the former is accessing channel->inbound.ring_buffer, the latter + * could be freeing the ring_buffer pages. + * + * To resolve the race, we can serialize them by disabling the + * tasklet when the latter is running here. + */ + tasklet = hv_context.event_dpc[channel->target_cpu]; + tasklet_disable(tasklet); + + channel->state = CHANNEL_OPEN_STATE; + channel->sc_creation_callback = NULL; /* Stop callback and cancel the timer asap */ - spin_lock_irqsave(&channel->inbound_lock, flags); - channel->onchannel_callback = NULL; - spin_unlock_irqrestore(&channel->inbound_lock, flags); + if (channel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(channel->target_cpu, reset_channel_cb, + channel, true); + } else { + reset_channel_cb(channel); + put_cpu(); + } /* Send a closing message */ @@ -529,11 +533,28 @@ ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel)); - BUG_ON(ret != 0); + if (ret) { + pr_err("Close failed: close post msg return is %d\n", ret); + /* + * If we failed to post the close msg, + * it is perhaps better to leak memory. + */ + goto out; + } + /* Tear down the gpadl for the channel's ring buffer */ - if (channel->ringbuffer_gpadlhandle) - vmbus_teardown_gpadl(channel, - channel->ringbuffer_gpadlhandle); + if (channel->ringbuffer_gpadlhandle) { + ret = vmbus_teardown_gpadl(channel, + channel->ringbuffer_gpadlhandle); + if (ret) { + pr_err("Close failed: teardown gpadl return %d\n", ret); + /* + * If we failed to teardown gpadl, + * it is perhaps better to leak memory. + */ + goto out; + } + } /* Cleanup the ring buffers for this channel */ hv_ringbuffer_cleanup(&channel->outbound); @@ -542,35 +563,56 @@ free_pages((unsigned long)channel->ringbuffer_pages, get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); +out: + tasklet_enable(tasklet); + return ret; } -EXPORT_SYMBOL_GPL(vmbus_close); -/** - * vmbus_sendpacket() - Send the specified buffer on the given channel - * @channel: Pointer to vmbus_channel structure. - * @buffer: Pointer to the buffer you want to receive the data into. - * @bufferlen: Maximum size of what the the buffer will hold - * @requestid: Identifier of the request - * @type: Type of packet that is being send e.g. negotiate, time - * packet etc. - * - * Sends data in @buffer directly to hyper-v via the vmbus - * This will send the data unparsed to hyper-v. - * - * Mainly used by Hyper-V drivers. +/* + * vmbus_close - Close the specified channel */ -int vmbus_sendpacket(struct vmbus_channel *channel, const void *buffer, +void vmbus_close(struct vmbus_channel *channel) +{ + struct list_head *cur, *tmp; + struct vmbus_channel *cur_channel; + + if (channel->primary_channel != NULL) { + /* + * We will only close sub-channels when + * the primary is closed. + */ + return; + } + /* + * Close all the sub-channels first and then close the + * primary channel. + */ + list_for_each_safe(cur, tmp, &channel->sc_list) { + cur_channel = list_entry(cur, struct vmbus_channel, sc_list); + if (cur_channel->state != CHANNEL_OPENED_STATE) + continue; + vmbus_close_internal(cur_channel); + } + /* + * Now close the primary. + */ + vmbus_close_internal(channel); +} +EXPORT_SYMBOL_GPL(vmbus_close); + +int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, u32 bufferlen, u64 requestid, - enum vmbus_packet_type type, u32 flags) + enum vmbus_packet_type type, u32 flags, bool kick_q) { struct vmpacket_descriptor desc; u32 packetlen = sizeof(struct vmpacket_descriptor) + bufferlen; u32 packetlen_aligned = ALIGN(packetlen, sizeof(u64)); - struct scatterlist bufferlist[3]; + struct kvec bufferlist[3]; u64 aligned_data = 0; int ret; bool signal = false; + int num_vecs = ((bufferlen != 0) ? 3 : 1); /* Setup the descriptor */ @@ -581,29 +623,80 @@ desc.len8 = (u16)(packetlen_aligned >> 3); desc.trans_id = requestid; - sg_init_table(bufferlist, 3); - sg_set_buf(&bufferlist[0], &desc, sizeof(struct vmpacket_descriptor)); - sg_set_buf(&bufferlist[1], buffer, bufferlen); - sg_set_buf(&bufferlist[2], &aligned_data, - packetlen_aligned - packetlen); + bufferlist[0].iov_base = &desc; + bufferlist[0].iov_len = sizeof(struct vmpacket_descriptor); + bufferlist[1].iov_base = buffer; + bufferlist[1].iov_len = bufferlen; + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs, + &signal); - if (ret == 0 && signal) + /* + * Signalling the host is conditional on many factors: + * 1. The ring state changed from being empty to non-empty. + * This is tracked by the variable "signal". + * 2. The variable kick_q tracks if more data will be placed + * on the ring. We will not signal if more data is + * to be placed. + * + * Based on the channel signal state, we will decide + * which signaling policy will be applied. + * + * If we cannot write to the ring-buffer; signal the host + * even if we may not have written anything. This is a rare + * enough condition that it should not matter. + */ + + if (channel->signal_policy) + signal = true; + else + kick_q = true; + + if (((ret == 0) && kick_q && signal) || (ret)) vmbus_setevent(channel); return ret; } +EXPORT_SYMBOL(vmbus_sendpacket_ctl); + +/** + * vmbus_sendpacket() - Send the specified buffer on the given channel + * @channel: Pointer to vmbus_channel structure. + * @buffer: Pointer to the buffer you want to receive the data into. + * @bufferlen: Maximum size of what the the buffer will hold + * @requestid: Identifier of the request + * @type: Type of packet that is being send e.g. negotiate, time + * packet etc. + * + * Sends data in @buffer directly to hyper-v via the vmbus + * This will send the data unparsed to hyper-v. + * + * Mainly used by Hyper-V drivers. + */ +int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, + u32 bufferlen, u64 requestid, + enum vmbus_packet_type type, u32 flags) +{ + return vmbus_sendpacket_ctl(channel, buffer, bufferlen, requestid, + type, flags, true); +} EXPORT_SYMBOL(vmbus_sendpacket); /* - * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer - * packets using a GPADL Direct packet type. + * vmbus_sendpacket_pagebuffer_ctl - Send a range of single-page buffer + * packets using a GPADL Direct packet type. This interface allows you + * to control notifying the host. This will be useful for sending + * batched data. Also the sender can control the send flags + * explicitly. */ -int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, +int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, struct hv_page_buffer pagebuffers[], u32 pagecount, void *buffer, u32 bufferlen, - u64 requestid) + u64 requestid, + u32 flags, + bool kick_q) { int ret; int i; @@ -611,7 +704,7 @@ u32 descsize; u32 packetlen; u32 packetlen_aligned; - struct scatterlist bufferlist[3]; + struct kvec bufferlist[3]; u64 aligned_data = 0; bool signal = false; @@ -631,7 +724,7 @@ /* Setup the descriptor */ desc.type = VM_PKT_DATA_USING_GPA_DIRECT; - desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + desc.flags = flags; desc.dataoffset8 = descsize >> 3; /* in 8-bytes grandularity */ desc.length8 = (u16)(packetlen_aligned >> 3); desc.transactionid = requestid; @@ -643,24 +736,107 @@ desc.range[i].pfn = pagebuffers[i].pfn; } - sg_init_table(bufferlist, 3); - sg_set_buf(&bufferlist[0], &desc, descsize); - sg_set_buf(&bufferlist[1], buffer, bufferlen); - sg_set_buf(&bufferlist[2], &aligned_data, - packetlen_aligned - packetlen); + bufferlist[0].iov_base = &desc; + bufferlist[0].iov_len = descsize; + bufferlist[1].iov_base = buffer; + bufferlist[1].iov_len = bufferlen; + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); - if (ret == 0 && signal) + /* + * Signalling the host is conditional on many factors: + * 1. The ring state changed from being empty to non-empty. + * This is tracked by the variable "signal". + * 2. The variable kick_q tracks if more data will be placed + * on the ring. We will not signal if more data is + * to be placed. + * + * Based on the channel signal state, we will decide + * which signaling policy will be applied. + * + * If we cannot write to the ring-buffer; signal the host + * even if we may not have written anything. This is a rare + * enough condition that it should not matter. + */ + + if (channel->signal_policy) + signal = true; + else + kick_q = true; + + if (((ret == 0) && kick_q && signal) || (ret)) vmbus_setevent(channel); return ret; } +EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl); + +/* + * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer + * packets using a GPADL Direct packet type. + */ +int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, + struct hv_page_buffer pagebuffers[], + u32 pagecount, void *buffer, u32 bufferlen, + u64 requestid) +{ + u32 flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + return vmbus_sendpacket_pagebuffer_ctl(channel, pagebuffers, pagecount, + buffer, bufferlen, requestid, + flags, true); + +} EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); /* * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet * using a GPADL Direct packet type. + * The buffer includes the vmbus descriptor. + */ +int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, + struct vmbus_packet_mpb_array *desc, + u32 desc_size, + void *buffer, u32 bufferlen, u64 requestid) +{ + int ret; + u32 packetlen; + u32 packetlen_aligned; + struct kvec bufferlist[3]; + u64 aligned_data = 0; + bool signal = false; + + packetlen = desc_size + bufferlen; + packetlen_aligned = ALIGN(packetlen, sizeof(u64)); + + /* Setup the descriptor */ + desc->type = VM_PKT_DATA_USING_GPA_DIRECT; + desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + desc->dataoffset8 = desc_size >> 3; /* in 8-bytes grandularity */ + desc->length8 = (u16)(packetlen_aligned >> 3); + desc->transactionid = requestid; + desc->rangecount = 1; + + bufferlist[0].iov_base = desc; + bufferlist[0].iov_len = desc_size; + bufferlist[1].iov_base = buffer; + bufferlist[1].iov_len = bufferlen; + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); + + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); + + if (ret == 0 && signal) + vmbus_setevent(channel); + + return ret; +} +EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc); + +/* + * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet + * using a GPADL Direct packet type. */ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, struct hv_multipage_buffer *multi_pagebuffer, @@ -671,14 +847,13 @@ u32 descsize; u32 packetlen; u32 packetlen_aligned; - struct scatterlist bufferlist[3]; + struct kvec bufferlist[3]; u64 aligned_data = 0; bool signal = false; u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset, multi_pagebuffer->len); - - if ((pfncount < 0) || (pfncount > MAX_MULTIPAGE_BUFFER_COUNT)) + if (pfncount > MAX_MULTIPAGE_BUFFER_COUNT) return -EINVAL; /* @@ -706,11 +881,12 @@ memcpy(desc.range.pfn_array, multi_pagebuffer->pfn_array, pfncount * sizeof(u64)); - sg_init_table(bufferlist, 3); - sg_set_buf(&bufferlist[0], &desc, descsize); - sg_set_buf(&bufferlist[1], buffer, bufferlen); - sg_set_buf(&bufferlist[2], &aligned_data, - packetlen_aligned - packetlen); + bufferlist[0].iov_base = &desc; + bufferlist[0].iov_len = descsize; + bufferlist[1].iov_base = buffer; + bufferlist[1].iov_len = bufferlen; + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); @@ -803,12 +979,8 @@ *buffer_actual_len = packetlen; - if (packetlen > bufferlen) { - pr_err("Buffer too small - needed %d bytes but " - "got space for only %d bytes\n", - packetlen, bufferlen); + if (packetlen > bufferlen) return -ENOBUFS; - } *requestid = desc.trans_id; @@ -819,6 +991,6 @@ if (signal) vmbus_setevent(channel); - return 0; + return ret; } EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw);