/******************************************************************************* Intel(R) 82576 Virtual Function Linux driver Copyright(c) 2009 - 2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, version 2, as published by the Free Software Foundation. This program is distributed in the hope it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . The full GNU General Public License is included in this distribution in the file called "COPYING". Contact Information: e1000-devel Mailing List Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 *******************************************************************************/ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "igbvf.h" #define DRV_VERSION "2.4.0-k" char igbvf_driver_name[] = "igbvf"; const char igbvf_driver_version[] = DRV_VERSION; static const char igbvf_driver_string[] = "Intel(R) Gigabit Virtual Function Network Driver"; static const char igbvf_copyright[] = "Copyright (c) 2009 - 2012 Intel Corporation."; #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); static int igbvf_poll(struct napi_struct *napi, int budget); static void igbvf_reset(struct igbvf_adapter *); static void igbvf_set_interrupt_capability(struct igbvf_adapter *); static void igbvf_reset_interrupt_capability(struct igbvf_adapter *); static struct igbvf_info igbvf_vf_info = { .mac = e1000_vfadapt, .flags = 0, .pba = 10, .init_ops = e1000_init_function_pointers_vf, }; static struct igbvf_info igbvf_i350_vf_info = { .mac = e1000_vfadapt_i350, .flags = 0, .pba = 10, .init_ops = e1000_init_function_pointers_vf, }; static const struct igbvf_info *igbvf_info_tbl[] = { [board_vf] = &igbvf_vf_info, [board_i350_vf] = &igbvf_i350_vf_info, }; /** * igbvf_desc_unused - calculate if we have unused descriptors * @rx_ring: address of receive ring structure **/ static int igbvf_desc_unused(struct igbvf_ring *ring) { if (ring->next_to_clean > ring->next_to_use) return ring->next_to_clean - ring->next_to_use - 1; return ring->count + ring->next_to_clean - ring->next_to_use - 1; } /** * igbvf_receive_skb - helper function to handle Rx indications * @adapter: board private structure * @status: descriptor status field as written by hardware * @vlan: descriptor vlan field as written by hardware (no le/be conversion) * @skb: pointer to sk_buff to be indicated to stack **/ static void igbvf_receive_skb(struct igbvf_adapter *adapter, struct net_device *netdev, struct sk_buff *skb, u32 status, u16 vlan) { u16 vid; if (status & E1000_RXD_STAT_VP) { if ((adapter->flags & IGBVF_FLAG_RX_LB_VLAN_BSWAP) && (status & E1000_RXDEXT_STATERR_LB)) vid = be16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK; else vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK; if (test_bit(vid, adapter->active_vlans)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } napi_gro_receive(&adapter->rx_ring->napi, skb); } static inline void igbvf_rx_checksum_adv(struct igbvf_adapter *adapter, u32 status_err, struct sk_buff *skb) { skb_checksum_none_assert(skb); /* Ignore Checksum bit is set or checksum is disabled through ethtool */ if ((status_err & E1000_RXD_STAT_IXSM) || (adapter->flags & IGBVF_FLAG_RX_CSUM_DISABLED)) return; /* TCP/UDP checksum error bit is set */ if (status_err & (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) { /* let the stack verify checksum errors */ adapter->hw_csum_err++; return; } /* It must be a TCP or UDP packet with a valid checksum */ if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) skb->ip_summed = CHECKSUM_UNNECESSARY; adapter->hw_csum_good++; } /** * igbvf_alloc_rx_buffers - Replace used receive buffers; packet split * @rx_ring: address of ring structure to repopulate * @cleaned_count: number of buffers to repopulate **/ static void igbvf_alloc_rx_buffers(struct igbvf_ring *rx_ring, int cleaned_count) { struct igbvf_adapter *adapter = rx_ring->adapter; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; union e1000_adv_rx_desc *rx_desc; struct igbvf_buffer *buffer_info; struct sk_buff *skb; unsigned int i; int bufsz; i = rx_ring->next_to_use; buffer_info = &rx_ring->buffer_info[i]; if (adapter->rx_ps_hdr_size) bufsz = adapter->rx_ps_hdr_size; else bufsz = adapter->rx_buffer_len; while (cleaned_count--) { rx_desc = IGBVF_RX_DESC_ADV(*rx_ring, i); if (adapter->rx_ps_hdr_size && !buffer_info->page_dma) { if (!buffer_info->page) { buffer_info->page = alloc_page(GFP_ATOMIC); if (!buffer_info->page) { adapter->alloc_rx_buff_failed++; goto no_buffers; } buffer_info->page_offset = 0; } else { buffer_info->page_offset ^= PAGE_SIZE / 2; } buffer_info->page_dma = dma_map_page(&pdev->dev, buffer_info->page, buffer_info->page_offset, PAGE_SIZE / 2, DMA_FROM_DEVICE); if (dma_mapping_error(&pdev->dev, buffer_info->page_dma)) { __free_page(buffer_info->page); buffer_info->page = NULL; dev_err(&pdev->dev, "RX DMA map failed\n"); break; } } if (!buffer_info->skb) { skb = netdev_alloc_skb_ip_align(netdev, bufsz); if (!skb) { adapter->alloc_rx_buff_failed++; goto no_buffers; } buffer_info->skb = skb; buffer_info->dma = dma_map_single(&pdev->dev, skb->data, bufsz, DMA_FROM_DEVICE); if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { dev_kfree_skb(buffer_info->skb); buffer_info->skb = NULL; dev_err(&pdev->dev, "RX DMA map failed\n"); goto no_buffers; } } /* Refresh the desc even if buffer_addrs didn't change because * each write-back erases this info. */ if (adapter->rx_ps_hdr_size) { rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->page_dma); rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma); } else { rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma); rx_desc->read.hdr_addr = 0; } i++; if (i == rx_ring->count) i = 0; buffer_info = &rx_ring->buffer_info[i]; } no_buffers: if (rx_ring->next_to_use != i) { rx_ring->next_to_use = i; if (i == 0) i = (rx_ring->count - 1); else i--; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64). */ wmb(); writel(i, adapter->hw.hw_addr + rx_ring->tail); } } /** * igbvf_clean_rx_irq - Send received data up the network stack; legacy * @adapter: board private structure * * the return value indicates whether actual cleaning was done, there * is no guarantee that everything was cleaned **/ static bool igbvf_clean_rx_irq(struct igbvf_adapter *adapter, int *work_done, int work_to_do) { struct igbvf_ring *rx_ring = adapter->rx_ring; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; union e1000_adv_rx_desc *rx_desc, *next_rxd; struct igbvf_buffer *buffer_info, *next_buffer; struct sk_buff *skb; bool cleaned = false; int cleaned_count = 0; unsigned int total_bytes = 0, total_packets = 0; unsigned int i; u32 length, hlen, staterr; i = rx_ring->next_to_clean; rx_desc = IGBVF_RX_DESC_ADV(*rx_ring, i); staterr = le32_to_cpu(rx_desc->wb.upper.status_error); while (staterr & E1000_RXD_STAT_DD) { if (*work_done >= work_to_do) break; (*work_done)++; rmb(); /* read descriptor and rx_buffer_info after status DD */ buffer_info = &rx_ring->buffer_info[i]; /* HW will not DMA in data larger than the given buffer, even * if it parses the (NFS, of course) header to be larger. In * that case, it fills the header buffer and spills the rest * into the page. */ hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info) & E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; if (hlen > adapter->rx_ps_hdr_size) hlen = adapter->rx_ps_hdr_size; length = le16_to_cpu(rx_desc->wb.upper.length); cleaned = true; cleaned_count++; skb = buffer_info->skb; prefetch(skb->data - NET_IP_ALIGN); buffer_info->skb = NULL; if (!adapter->rx_ps_hdr_size) { dma_unmap_single(&pdev->dev, buffer_info->dma, adapter->rx_buffer_len, DMA_FROM_DEVICE); buffer_info->dma = 0; skb_put(skb, length); goto send_up; } if (!skb_shinfo(skb)->nr_frags) { dma_unmap_single(&pdev->dev, buffer_info->dma, adapter->rx_ps_hdr_size, DMA_FROM_DEVICE); buffer_info->dma = 0; skb_put(skb, hlen); } if (length) { dma_unmap_page(&pdev->dev, buffer_info->page_dma, PAGE_SIZE / 2, DMA_FROM_DEVICE); buffer_info->page_dma = 0; skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, buffer_info->page, buffer_info->page_offset, length); if ((adapter->rx_buffer_len > (PAGE_SIZE / 2)) || (page_count(buffer_info->page) != 1)) buffer_info->page = NULL; else get_page(buffer_info->page); skb->len += length; skb->data_len += length; skb->truesize += PAGE_SIZE / 2; } send_up: i++; if (i == rx_ring->count) i = 0; next_rxd = IGBVF_RX_DESC_ADV(*rx_ring, i); prefetch(next_rxd); next_buffer = &rx_ring->buffer_info[i]; if (!(staterr & E1000_RXD_STAT_EOP)) { buffer_info->skb = next_buffer->skb; buffer_info->dma = next_buffer->dma; next_buffer->skb = skb; next_buffer->dma = 0; goto next_desc; } if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { dev_kfree_skb_irq(skb); goto next_desc; } total_bytes += skb->len; total_packets++; igbvf_rx_checksum_adv(adapter, staterr, skb); skb->protocol = eth_type_trans(skb, netdev); igbvf_receive_skb(adapter, netdev, skb, staterr, rx_desc->wb.upper.vlan); next_desc: rx_desc->wb.upper.status_error = 0; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IGBVF_RX_BUFFER_WRITE) { igbvf_alloc_rx_buffers(rx_ring, cleaned_count); cleaned_count = 0; } /* use prefetched values */ rx_desc = next_rxd; buffer_info = next_buffer; staterr = le32_to_cpu(rx_desc->wb.upper.status_error); } rx_ring->next_to_clean = i; cleaned_count = igbvf_desc_unused(rx_ring); if (cleaned_count) igbvf_alloc_rx_buffers(rx_ring, cleaned_count); adapter->total_rx_packets += total_packets; adapter->total_rx_bytes += total_bytes; adapter->net_stats.rx_bytes += total_bytes; adapter->net_stats.rx_packets += total_packets; return cleaned; } static void igbvf_put_txbuf(struct igbvf_adapter *adapter, struct igbvf_buffer *buffer_info) { if (buffer_info->dma) { if (buffer_info->mapped_as_page) dma_unmap_page(&adapter->pdev->dev, buffer_info->dma, buffer_info->length, DMA_TO_DEVICE); else dma_unmap_single(&adapter->pdev->dev, buffer_info->dma, buffer_info->length, DMA_TO_DEVICE); buffer_info->dma = 0; } if (buffer_info->skb) { dev_kfree_skb_any(buffer_info->skb); buffer_info->skb = NULL; } buffer_info->time_stamp = 0; } /** * igbvf_setup_tx_resources - allocate Tx resources (Descriptors) * @adapter: board private structure * * Return 0 on success, negative on failure **/ int igbvf_setup_tx_resources(struct igbvf_adapter *adapter, struct igbvf_ring *tx_ring) { struct pci_dev *pdev = adapter->pdev; int size; size = sizeof(struct igbvf_buffer) * tx_ring->count; tx_ring->buffer_info = vzalloc(size); if (!tx_ring->buffer_info) goto err; /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size, &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) goto err; tx_ring->adapter = adapter; tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; return 0; err: vfree(tx_ring->buffer_info); dev_err(&adapter->pdev->dev, "Unable to allocate memory for the transmit descriptor ring\n"); return -ENOMEM; } /** * igbvf_setup_rx_resources - allocate Rx resources (Descriptors) * @adapter: board private structure * * Returns 0 on success, negative on failure **/ int igbvf_setup_rx_resources(struct igbvf_adapter *adapter, struct igbvf_ring *rx_ring) { struct pci_dev *pdev = adapter->pdev; int size, desc_len; size = sizeof(struct igbvf_buffer) * rx_ring->count; rx_ring->buffer_info = vzalloc(size); if (!rx_ring->buffer_info) goto err; desc_len = sizeof(union e1000_adv_rx_desc); /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * desc_len; rx_ring->size = ALIGN(rx_ring->size, 4096); rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) goto err; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; rx_ring->adapter = adapter; return 0; err: vfree(rx_ring->buffer_info); rx_ring->buffer_info = NULL; dev_err(&adapter->pdev->dev, "Unable to allocate memory for the receive descriptor ring\n"); return -ENOMEM; } /** * igbvf_clean_tx_ring - Free Tx Buffers * @tx_ring: ring to be cleaned **/ static void igbvf_clean_tx_ring(struct igbvf_ring *tx_ring) { struct igbvf_adapter *adapter = tx_ring->adapter; struct igbvf_buffer *buffer_info; unsigned long size; unsigned int i; if (!tx_ring->buffer_info) return; /* Free all the Tx ring sk_buffs */ for (i = 0; i < tx_ring->count; i++) { buffer_info = &tx_ring->buffer_info[i]; igbvf_put_txbuf(adapter, buffer_info); } size = sizeof(struct igbvf_buffer) * tx_ring->count; memset(tx_ring->buffer_info, 0, size); /* Zero out the descriptor ring */ memset(tx_ring->desc, 0, tx_ring->size); tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; writel(0, adapter->hw.hw_addr + tx_ring->head); writel(0, adapter->hw.hw_addr + tx_ring->tail); } /** * igbvf_free_tx_resources - Free Tx Resources per Queue * @tx_ring: ring to free resources from * * Free all transmit software resources **/ void igbvf_free_tx_resources(struct igbvf_ring *tx_ring) { struct pci_dev *pdev = tx_ring->adapter->pdev; igbvf_clean_tx_ring(tx_ring); vfree(tx_ring->buffer_info); tx_ring->buffer_info = NULL; dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc, tx_ring->dma); tx_ring->desc = NULL; } /** * igbvf_clean_rx_ring - Free Rx Buffers per Queue * @adapter: board private structure **/ static void igbvf_clean_rx_ring(struct igbvf_ring *rx_ring) { struct igbvf_adapter *adapter = rx_ring->adapter; struct igbvf_buffer *buffer_info; struct pci_dev *pdev = adapter->pdev; unsigned long size; unsigned int i; if (!rx_ring->buffer_info) return; /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { buffer_info = &rx_ring->buffer_info[i]; if (buffer_info->dma) { if (adapter->rx_ps_hdr_size) { dma_unmap_single(&pdev->dev, buffer_info->dma, adapter->rx_ps_hdr_size, DMA_FROM_DEVICE); } else { dma_unmap_single(&pdev->dev, buffer_info->dma, adapter->rx_buffer_len, DMA_FROM_DEVICE); } buffer_info->dma = 0; } if (buffer_info->skb) { dev_kfree_skb(buffer_info->skb); buffer_info->skb = NULL; } if (buffer_info->page) { if (buffer_info->page_dma) dma_unmap_page(&pdev->dev, buffer_info->page_dma, PAGE_SIZE / 2, DMA_FROM_DEVICE); put_page(buffer_info->page); buffer_info->page = NULL; buffer_info->page_dma = 0; buffer_info->page_offset = 0; } } size = sizeof(struct igbvf_buffer) * rx_ring->count; memset(rx_ring->buffer_info, 0, size); /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; writel(0, adapter->hw.hw_addr + rx_ring->head); writel(0, adapter->hw.hw_addr + rx_ring->tail); } /** * igbvf_free_rx_resources - Free Rx Resources * @rx_ring: ring to clean the resources from * * Free all receive software resources **/ void igbvf_free_rx_resources(struct igbvf_ring *rx_ring) { struct pci_dev *pdev = rx_ring->adapter->pdev; igbvf_clean_rx_ring(rx_ring); vfree(rx_ring->buffer_info); rx_ring->buffer_info = NULL; dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; } /** * igbvf_update_itr - update the dynamic ITR value based on statistics * @adapter: pointer to adapter * @itr_setting: current adapter->itr * @packets: the number of packets during this measurement interval * @bytes: the number of bytes during this measurement interval * * Stores a new ITR value based on packets and byte counts during the last * interrupt. The advantage of per interrupt computation is faster updates * and more accurate ITR for the current traffic pattern. Constants in this * function were computed based on theoretical maximum wire speed and thresholds * were set based on testing data as well as attempting to minimize response * time while increasing bulk throughput. **/ static enum latency_range igbvf_update_itr(struct igbvf_adapter *adapter, enum latency_range itr_setting, int packets, int bytes) { enum latency_range retval = itr_setting; if (packets == 0) goto update_itr_done; switch (itr_setting) { case lowest_latency: /* handle TSO and jumbo frames */ if (bytes/packets > 8000) retval = bulk_latency; else if ((packets < 5) && (bytes > 512)) retval = low_latency; break; case low_latency: /* 50 usec aka 20000 ints/s */ if (bytes > 10000) { /* this if handles the TSO accounting */ if (bytes/packets > 8000) retval = bulk_latency; else if ((packets < 10) || ((bytes/packets) > 1200)) retval = bulk_latency; else if ((packets > 35)) retval = lowest_latency; } else if (bytes/packets > 2000) { retval = bulk_latency; } else if (packets <= 2 && bytes < 512) { retval = lowest_latency; } break; case bulk_latency: /* 250 usec aka 4000 ints/s */ if (bytes > 25000) { if (packets > 35) retval = low_latency; } else if (bytes < 6000) { retval = low_latency; } break; default: break; } update_itr_done: return retval; } static int igbvf_range_to_itr(enum latency_range current_range) { int new_itr; switch (current_range) { /* counts and packets in update_itr are dependent on these numbers */ case lowest_latency: new_itr = IGBVF_70K_ITR; break; case low_latency: new_itr = IGBVF_20K_ITR; break; case bulk_latency: new_itr = IGBVF_4K_ITR; break; default: new_itr = IGBVF_START_ITR; break; } return new_itr; } static void igbvf_set_itr(struct igbvf_adapter *adapter) { u32 new_itr; adapter->tx_ring->itr_range = igbvf_update_itr(adapter, adapter->tx_ring->itr_val, adapter->total_tx_packets, adapter->total_tx_bytes); /* conservative mode (itr 3) eliminates the lowest_latency setting */ if (adapter->requested_itr == 3 && adapter->tx_ring->itr_range == lowest_latency) adapter->tx_ring->itr_range = low_latency; new_itr = igbvf_range_to_itr(adapter->tx_ring->itr_range); if (new_itr != adapter->tx_ring->itr_val) { u32 current_itr = adapter->tx_ring->itr_val; /* this attempts to bias the interrupt rate towards Bulk * by adding intermediate steps when interrupt rate is * increasing */ new_itr = new_itr > current_itr ? min(current_itr + (new_itr >> 2), new_itr) : new_itr; adapter->tx_ring->itr_val = new_itr; adapter->tx_ring->set_itr = 1; } adapter->rx_ring->itr_range = igbvf_update_itr(adapter, adapter->rx_ring->itr_val, adapter->total_rx_packets, adapter->total_rx_bytes); if (adapter->requested_itr == 3 && adapter->rx_ring->itr_range == lowest_latency) adapter->rx_ring->itr_range = low_latency; new_itr = igbvf_range_to_itr(adapter->rx_ring->itr_range); if (new_itr != adapter->rx_ring->itr_val) { u32 current_itr = adapter->rx_ring->itr_val; new_itr = new_itr > current_itr ? min(current_itr + (new_itr >> 2), new_itr) : new_itr; adapter->rx_ring->itr_val = new_itr; adapter->rx_ring->set_itr = 1; } } /** * igbvf_clean_tx_irq - Reclaim resources after transmit completes * @adapter: board private structure * * returns true if ring is completely cleaned **/ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring) { struct igbvf_adapter *adapter = tx_ring->adapter; struct net_device *netdev = adapter->netdev; struct igbvf_buffer *buffer_info; struct sk_buff *skb; union e1000_adv_tx_desc *tx_desc, *eop_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int i, count = 0; bool cleaned = false; i = tx_ring->next_to_clean; buffer_info = &tx_ring->buffer_info[i]; eop_desc = buffer_info->next_to_watch; do { /* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break; /* prevent any other reads prior to eop_desc */ smp_rmb(); /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) break; /* clear next_to_watch to prevent false hangs */ buffer_info->next_to_watch = NULL; for (cleaned = false; !cleaned; count++) { tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i); cleaned = (tx_desc == eop_desc); skb = buffer_info->skb; if (skb) { unsigned int segs, bytecount; /* gso_segs is currently only valid for tcp */ segs = skb_shinfo(skb)->gso_segs ?: 1; /* multiply data chunks by size of headers */ bytecount = ((segs - 1) * skb_headlen(skb)) + skb->len; total_packets += segs; total_bytes += bytecount; } igbvf_put_txbuf(adapter, buffer_info); tx_desc->wb.status = 0; i++; if (i == tx_ring->count) i = 0; buffer_info = &tx_ring->buffer_info[i]; } eop_desc = buffer_info->next_to_watch; } while (count < tx_ring->count); tx_ring->next_to_clean = i; if (unlikely(count && netif_carrier_ok(netdev) && igbvf_desc_unused(tx_ring) >= IGBVF_TX_QUEUE_WAKE)) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. */ smp_mb(); if (netif_queue_stopped(netdev) && !(test_bit(__IGBVF_DOWN, &adapter->state))) { netif_wake_queue(netdev); ++adapter->restart_queue; } } adapter->net_stats.tx_bytes += total_bytes; adapter->net_stats.tx_packets += total_packets; return count < tx_ring->count; } static irqreturn_t igbvf_msix_other(int irq, void *data) { struct net_device *netdev = data; struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; adapter->int_counter1++; hw->mac.get_link_status = 1; if (!test_bit(__IGBVF_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); ew32(EIMS, adapter->eims_other); return IRQ_HANDLED; } static irqreturn_t igbvf_intr_msix_tx(int irq, void *data) { struct net_device *netdev = data; struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; struct igbvf_ring *tx_ring = adapter->tx_ring; if (tx_ring->set_itr) { writel(tx_ring->itr_val, adapter->hw.hw_addr + tx_ring->itr_register); adapter->tx_ring->set_itr = 0; } adapter->total_tx_bytes = 0; adapter->total_tx_packets = 0; /* auto mask will automatically re-enable the interrupt when we write * EICS */ if (!igbvf_clean_tx_irq(tx_ring)) /* Ring was not completely cleaned, so fire another interrupt */ ew32(EICS, tx_ring->eims_value); else ew32(EIMS, tx_ring->eims_value); return IRQ_HANDLED; } static irqreturn_t igbvf_intr_msix_rx(int irq, void *data) { struct net_device *netdev = data; struct igbvf_adapter *adapter = netdev_priv(netdev); adapter->int_counter0++; /* Write the ITR value calculated at the end of the * previous interrupt. */ if (adapter->rx_ring->set_itr) { writel(adapter->rx_ring->itr_val, adapter->hw.hw_addr + adapter->rx_ring->itr_register); adapter->rx_ring->set_itr = 0; } if (napi_schedule_prep(&adapter->rx_ring->napi)) { adapter->total_rx_bytes = 0; adapter->total_rx_packets = 0; __napi_schedule(&adapter->rx_ring->napi); } return IRQ_HANDLED; } #define IGBVF_NO_QUEUE -1 static void igbvf_assign_vector(struct igbvf_adapter *adapter, int rx_queue, int tx_queue, int msix_vector) { struct e1000_hw *hw = &adapter->hw; u32 ivar, index; /* 82576 uses a table-based method for assigning vectors. * Each queue has a single entry in the table to which we write * a vector number along with a "valid" bit. Sadly, the layout * of the table is somewhat counterintuitive. */ if (rx_queue > IGBVF_NO_QUEUE) { index = (rx_queue >> 1); ivar = array_er32(IVAR0, index); if (rx_queue & 0x1) { /* vector goes into third byte of register */ ivar = ivar & 0xFF00FFFF; ivar |= (msix_vector | E1000_IVAR_VALID) << 16; } else { /* vector goes into low byte of register */ ivar = ivar & 0xFFFFFF00; ivar |= msix_vector | E1000_IVAR_VALID; } adapter->rx_ring[rx_queue].eims_value = BIT(msix_vector); array_ew32(IVAR0, index, ivar); } if (tx_queue > IGBVF_NO_QUEUE) { index = (tx_queue >> 1); ivar = array_er32(IVAR0, index); if (tx_queue & 0x1) { /* vector goes into high byte of register */ ivar = ivar & 0x00FFFFFF; ivar |= (msix_vector | E1000_IVAR_VALID) << 24; } else { /* vector goes into second byte of register */ ivar = ivar & 0xFFFF00FF; ivar |= (msix_vector | E1000_IVAR_VALID) << 8; } adapter->tx_ring[tx_queue].eims_value = BIT(msix_vector); array_ew32(IVAR0, index, ivar); } } /** * igbvf_configure_msix - Configure MSI-X hardware * @adapter: board private structure * * igbvf_configure_msix sets up the hardware to properly * generate MSI-X interrupts. **/ static void igbvf_configure_msix(struct igbvf_adapter *adapter) { u32 tmp; struct e1000_hw *hw = &adapter->hw; struct igbvf_ring *tx_ring = adapter->tx_ring; struct igbvf_ring *rx_ring = adapter->rx_ring; int vector = 0; adapter->eims_enable_mask = 0; igbvf_assign_vector(adapter, IGBVF_NO_QUEUE, 0, vector++); adapter->eims_enable_mask |= tx_ring->eims_value; writel(tx_ring->itr_val, hw->hw_addr + tx_ring->itr_register); igbvf_assign_vector(adapter, 0, IGBVF_NO_QUEUE, vector++); adapter->eims_enable_mask |= rx_ring->eims_value; writel(rx_ring->itr_val, hw->hw_addr + rx_ring->itr_register); /* set vector for other causes, i.e. link changes */ tmp = (vector++ | E1000_IVAR_VALID); ew32(IVAR_MISC, tmp); adapter->eims_enable_mask = GENMASK(vector - 1, 0); adapter->eims_other = BIT(vector - 1); e1e_flush(); } static void igbvf_reset_interrupt_capability(struct igbvf_adapter *adapter) { if (adapter->msix_entries) { pci_disable_msix(adapter->pdev); kfree(adapter->msix_entries); adapter->msix_entries = NULL; } } /** * igbvf_set_interrupt_capability - set MSI or MSI-X if supported * @adapter: board private structure * * Attempt to configure interrupts using the best available * capabilities of the hardware and kernel. **/ static void igbvf_set_interrupt_capability(struct igbvf_adapter *adapter) { int err = -ENOMEM; int i; /* we allocate 3 vectors, 1 for Tx, 1 for Rx, one for PF messages */ adapter->msix_entries = kcalloc(3, sizeof(struct msix_entry), GFP_KERNEL); if (adapter->msix_entries) { for (i = 0; i < 3; i++) adapter->msix_entries[i].entry = i; err = pci_enable_msix_range(adapter->pdev, adapter->msix_entries, 3, 3); } if (err < 0) { /* MSI-X failed */ dev_err(&adapter->pdev->dev, "Failed to initialize MSI-X interrupts.\n"); igbvf_reset_interrupt_capability(adapter); } } /** * igbvf_request_msix - Initialize MSI-X interrupts * @adapter: board private structure * * igbvf_request_msix allocates MSI-X vectors and requests interrupts from the * kernel. **/ static int igbvf_request_msix(struct igbvf_adapter *adapter) { struct net_device *netdev = adapter->netdev; int err = 0, vector = 0; if (strlen(netdev->name) < (IFNAMSIZ - 5)) { sprintf(adapter->tx_ring->name, "%s-tx-0", netdev->name); sprintf(adapter->rx_ring->name, "%s-rx-0", netdev->name); } else { memcpy(adapter->tx_ring->name, netdev->name, IFNAMSIZ); memcpy(adapter->rx_ring->name, netdev->name, IFNAMSIZ); } err = request_irq(adapter->msix_entries[vector].vector, igbvf_intr_msix_tx, 0, adapter->tx_ring->name, netdev); if (err) goto out; adapter->tx_ring->itr_register = E1000_EITR(vector); adapter->tx_ring->itr_val = adapter->current_itr; vector++; err = request_irq(adapter->msix_entries[vector].vector, igbvf_intr_msix_rx, 0, adapter->rx_ring->name, netdev); if (err) goto out; adapter->rx_ring->itr_register = E1000_EITR(vector); adapter->rx_ring->itr_val = adapter->current_itr; vector++; err = request_irq(adapter->msix_entries[vector].vector, igbvf_msix_other, 0, netdev->name, netdev); if (err) goto out; igbvf_configure_msix(adapter); return 0; out: return err; } /** * igbvf_alloc_queues - Allocate memory for all rings * @adapter: board private structure to initialize **/ static int igbvf_alloc_queues(struct igbvf_adapter *adapter) { struct net_device *netdev = adapter->netdev; adapter->tx_ring = kzalloc(sizeof(struct igbvf_ring), GFP_KERNEL); if (!adapter->tx_ring) return -ENOMEM; adapter->rx_ring = kzalloc(sizeof(struct igbvf_ring), GFP_KERNEL); if (!adapter->rx_ring) { kfree(adapter->tx_ring); return -ENOMEM; } netif_napi_add(netdev, &adapter->rx_ring->napi, igbvf_poll, 64); return 0; } /** * igbvf_request_irq - initialize interrupts * @adapter: board private structure * * Attempts to configure interrupts using the best available * capabilities of the hardware and kernel. **/ static int igbvf_request_irq(struct igbvf_adapter *adapter) { int err = -1; /* igbvf supports msi-x only */ if (adapter->msix_entries) err = igbvf_request_msix(adapter); if (!err) return err; dev_err(&adapter->pdev->dev, "Unable to allocate interrupt, Error: %d\n", err); return err; } static void igbvf_free_irq(struct igbvf_adapter *adapter) { struct net_device *netdev = adapter->netdev; int vector; if (adapter->msix_entries) { for (vector = 0; vector < 3; vector++) free_irq(adapter->msix_entries[vector].vector, netdev); } } /** * igbvf_irq_disable - Mask off interrupt generation on the NIC * @adapter: board private structure **/ static void igbvf_irq_disable(struct igbvf_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; ew32(EIMC, ~0); if (adapter->msix_entries) ew32(EIAC, 0); } /** * igbvf_irq_enable - Enable default interrupt generation settings * @adapter: board private structure **/ static void igbvf_irq_enable(struct igbvf_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; ew32(EIAC, adapter->eims_enable_mask); ew32(EIAM, adapter->eims_enable_mask); ew32(EIMS, adapter->eims_enable_mask); } /** * igbvf_poll - NAPI Rx polling callback * @napi: struct associated with this polling callback * @budget: amount of packets driver is allowed to process this poll **/ static int igbvf_poll(struct napi_struct *napi, int budget) { struct igbvf_ring *rx_ring = container_of(napi, struct igbvf_ring, napi); struct igbvf_adapter *adapter = rx_ring->adapter; struct e1000_hw *hw = &adapter->hw; int work_done = 0; igbvf_clean_rx_irq(adapter, &work_done, budget); /* If not enough Rx work done, exit the polling mode */ if (work_done < budget) { napi_complete_done(napi, work_done); if (adapter->requested_itr & 3) igbvf_set_itr(adapter); if (!test_bit(__IGBVF_DOWN, &adapter->state)) ew32(EIMS, adapter->rx_ring->eims_value); } return work_done; } /** * igbvf_set_rlpml - set receive large packet maximum length * @adapter: board private structure * * Configure the maximum size of packets that will be received */ static void igbvf_set_rlpml(struct igbvf_adapter *adapter) { int max_frame_size; struct e1000_hw *hw = &adapter->hw; max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE; e1000_rlpml_set_vf(hw, max_frame_size); } static int igbvf_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; if (hw->mac.ops.set_vfta(hw, vid, true)) { dev_err(&adapter->pdev->dev, "Failed to add vlan id %d\n", vid); return -EINVAL; } set_bit(vid, adapter->active_vlans); return 0; } static int igbvf_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; if (hw->mac.ops.set_vfta(hw, vid, false)) { dev_err(&adapter->pdev->dev, "Failed to remove vlan id %d\n", vid); return -EINVAL; } clear_bit(vid, adapter->active_vlans); return 0; } static void igbvf_restore_vlan(struct igbvf_adapter *adapter) { u16 vid; for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) igbvf_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } /** * igbvf_configure_tx - Configure Transmit Unit after Reset * @adapter: board private structure * * Configure the Tx unit of the MAC after a reset. **/ static void igbvf_configure_tx(struct igbvf_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct igbvf_ring *tx_ring = adapter->tx_ring; u64 tdba; u32 txdctl, dca_txctrl; /* disable transmits */ txdctl = er32(TXDCTL(0)); ew32(TXDCTL(0), txdctl & ~E1000_TXDCTL_QUEUE_ENABLE); e1e_flush(); msleep(10); /* Setup the HW Tx Head and Tail descriptor pointers */ ew32(TDLEN(0), tx_ring->count * sizeof(union e1000_adv_tx_desc)); tdba = tx_ring->dma; ew32(TDBAL(0), (tdba & DMA_BIT_MASK(32))); ew32(TDBAH(0), (tdba >> 32)); ew32(TDH(0), 0); ew32(TDT(0), 0); tx_ring->head = E1000_TDH(0); tx_ring->tail = E1000_TDT(0); /* Turn off Relaxed Ordering on head write-backs. The writebacks * MUST be delivered in order or it will completely screw up * our bookkeeping. */ dca_txctrl = er32(DCA_TXCTRL(0)); dca_txctrl &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN; ew32(DCA_TXCTRL(0), dca_txctrl); /* enable transmits */ txdctl |= E1000_TXDCTL_QUEUE_ENABLE; ew32(TXDCTL(0), txdctl); /* Setup Transmit Descriptor Settings for eop descriptor */ adapter->txd_cmd = E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_IFCS; /* enable Report Status bit */ adapter->txd_cmd |= E1000_ADVTXD_DCMD_RS; } /** * igbvf_setup_srrctl - configure the receive control registers * @adapter: Board private structure **/ static void igbvf_setup_srrctl(struct igbvf_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 srrctl = 0; srrctl &= ~(E1000_SRRCTL_DESCTYPE_MASK | E1000_SRRCTL_BSIZEHDR_MASK | E1000_SRRCTL_BSIZEPKT_MASK); /* Enable queue drop to avoid head of line blocking */ srrctl |= E1000_SRRCTL_DROP_EN; /* Setup buffer sizes */ srrctl |= ALIGN(adapter->rx_buffer_len, 1024) >> E1000_SRRCTL_BSIZEPKT_SHIFT; if (adapter->rx_buffer_len < 2048) { adapter->rx_ps_hdr_size = 0; srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; } else { adapter->rx_ps_hdr_size = 128; srrctl |= adapter->rx_ps_hdr_size << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; } ew32(SRRCTL(0), srrctl); } /** * igbvf_configure_rx - Configure Receive Unit after Reset * @adapter: board private structure * * Configure the Rx unit of the MAC after a reset. **/ static void igbvf_configure_rx(struct igbvf_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct igbvf_ring *rx_ring = adapter->rx_ring; u64 rdba; u32 rxdctl; /* disable receives */ rxdctl = er32(RXDCTL(0)); ew32(RXDCTL(0), rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE); e1e_flush(); msleep(10); /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ rdba = rx_ring->dma; ew32(RDBAL(0), (rdba & DMA_BIT_MASK(32))); ew32(RDBAH(0), (rdba >> 32)); ew32(RDLEN(0), rx_ring->count * sizeof(union e1000_adv_rx_desc)); rx_ring->head = E1000_RDH(0); rx_ring->tail = E1000_RDT(0); ew32(RDH(0), 0); ew32(RDT(0), 0); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGBVF_RX_PTHRESH; rxdctl |= IGBVF_RX_HTHRESH << 8; rxdctl |= IGBVF_RX_WTHRESH << 16; igbvf_set_rlpml(adapter); /* enable receives */ ew32(RXDCTL(0), rxdctl); } /** * igbvf_set_multi - Multicast and Promiscuous mode set * @netdev: network interface device structure * * The set_multi entry point is called whenever the multicast address * list or the network interface flags are updated. This routine is * responsible for configuring the hardware for proper multicast, * promiscuous mode, and all-multi behavior. **/ static void igbvf_set_multi(struct net_device *netdev) { struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; struct netdev_hw_addr *ha; u8 *mta_list = NULL; int i; if (!netdev_mc_empty(netdev)) { mta_list = kmalloc_array(netdev_mc_count(netdev), ETH_ALEN, GFP_ATOMIC); if (!mta_list) return; } /* prepare a packed array of only addresses. */ i = 0; netdev_for_each_mc_addr(ha, netdev) memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); hw->mac.ops.update_mc_addr_list(hw, mta_list, i, 0, 0); kfree(mta_list); } /** * igbvf_configure - configure the hardware for Rx and Tx * @adapter: private board structure **/ static void igbvf_configure(struct igbvf_adapter *adapter) { igbvf_set_multi(adapter->netdev); igbvf_restore_vlan(adapter); igbvf_configure_tx(adapter); igbvf_setup_srrctl(adapter); igbvf_configure_rx(adapter); igbvf_alloc_rx_buffers(adapter->rx_ring, igbvf_desc_unused(adapter->rx_ring)); } /* igbvf_reset - bring the hardware into a known good state * @adapter: private board structure * * This function boots the hardware and enables some settings that * require a configuration cycle of the hardware - those cannot be * set/changed during runtime. After reset the device needs to be * properly configured for Rx, Tx etc. */ static void igbvf_reset(struct igbvf_adapter *adapter) { struct e1000_mac_info *mac = &adapter->hw.mac; struct net_device *netdev = adapter->netdev; struct e1000_hw *hw = &adapter->hw; /* Allow time for pending master requests to run */ if (mac->ops.reset_hw(hw)) dev_err(&adapter->pdev->dev, "PF still resetting\n"); mac->ops.init_hw(hw); if (is_valid_ether_addr(adapter->hw.mac.addr)) { memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); memcpy(netdev->perm_addr, adapter->hw.mac.addr, netdev->addr_len); } adapter->last_reset = jiffies; } int igbvf_up(struct igbvf_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; /* hardware has been reset, we need to reload some things */ igbvf_configure(adapter); clear_bit(__IGBVF_DOWN, &adapter->state); napi_enable(&adapter->rx_ring->napi); if (adapter->msix_entries) igbvf_configure_msix(adapter); /* Clear any pending interrupts. */ er32(EICR); igbvf_irq_enable(adapter); /* start the watchdog */ hw->mac.get_link_status = 1; mod_timer(&adapter->watchdog_timer, jiffies + 1); return 0; } void igbvf_down(struct igbvf_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct e1000_hw *hw = &adapter->hw; u32 rxdctl, txdctl; /* signal that we're down so the interrupt handler does not * reschedule our watchdog timer */ set_bit(__IGBVF_DOWN, &adapter->state); /* disable receives in the hardware */ rxdctl = er32(RXDCTL(0)); ew32(RXDCTL(0), rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE); netif_carrier_off(netdev); netif_stop_queue(netdev); /* disable transmits in the hardware */ txdctl = er32(TXDCTL(0)); ew32(TXDCTL(0), txdctl & ~E1000_TXDCTL_QUEUE_ENABLE); /* flush both disables and wait for them to finish */ e1e_flush(); msleep(10); napi_disable(&adapter->rx_ring->napi); igbvf_irq_disable(adapter); del_timer_sync(&adapter->watchdog_timer); /* record the stats before reset*/ igbvf_update_stats(adapter); adapter->link_speed = 0; adapter->link_duplex = 0; igbvf_reset(adapter); igbvf_clean_tx_ring(adapter->tx_ring); igbvf_clean_rx_ring(adapter->rx_ring); } void igbvf_reinit_locked(struct igbvf_adapter *adapter) { might_sleep(); while (test_and_set_bit(__IGBVF_RESETTING, &adapter->state)) usleep_range(1000, 2000); igbvf_down(adapter); igbvf_up(adapter); clear_bit(__IGBVF_RESETTING, &adapter->state); } /** * igbvf_sw_init - Initialize general software structures (struct igbvf_adapter) * @adapter: board private structure to initialize * * igbvf_sw_init initializes the Adapter private data structure. * Fields are initialized based on PCI device information and * OS network device settings (MTU size). **/ static int igbvf_sw_init(struct igbvf_adapter *adapter) { struct net_device *netdev = adapter->netdev; s32 rc; adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN; adapter->rx_ps_hdr_size = 0; adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; adapter->tx_int_delay = 8; adapter->tx_abs_int_delay = 32; adapter->rx_int_delay = 0; adapter->rx_abs_int_delay = 8; adapter->requested_itr = 3; adapter->current_itr = IGBVF_START_ITR; /* Set various function pointers */ adapter->ei->init_ops(&adapter->hw); rc = adapter->hw.mac.ops.init_params(&adapter->hw); if (rc) return rc; rc = adapter->hw.mbx.ops.init_params(&adapter->hw); if (rc) return rc; igbvf_set_interrupt_capability(adapter); if (igbvf_alloc_queues(adapter)) return -ENOMEM; spin_lock_init(&adapter->tx_queue_lock); /* Explicitly disable IRQ since the NIC can be in any state. */ igbvf_irq_disable(adapter); spin_lock_init(&adapter->stats_lock); set_bit(__IGBVF_DOWN, &adapter->state); return 0; } static void igbvf_initialize_last_counter_stats(struct igbvf_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; adapter->stats.last_gprc = er32(VFGPRC); adapter->stats.last_gorc = er32(VFGORC); adapter->stats.last_gptc = er32(VFGPTC); adapter->stats.last_gotc = er32(VFGOTC); adapter->stats.last_mprc = er32(VFMPRC); adapter->stats.last_gotlbc = er32(VFGOTLBC); adapter->stats.last_gptlbc = er32(VFGPTLBC); adapter->stats.last_gorlbc = er32(VFGORLBC); adapter->stats.last_gprlbc = er32(VFGPRLBC); adapter->stats.base_gprc = er32(VFGPRC); adapter->stats.base_gorc = er32(VFGORC); adapter->stats.base_gptc = er32(VFGPTC); adapter->stats.base_gotc = er32(VFGOTC); adapter->stats.base_mprc = er32(VFMPRC); adapter->stats.base_gotlbc = er32(VFGOTLBC); adapter->stats.base_gptlbc = er32(VFGPTLBC); adapter->stats.base_gorlbc = er32(VFGORLBC); adapter->stats.base_gprlbc = er32(VFGPRLBC); } /** * igbvf_open - Called when a network interface is made active * @netdev: network interface device structure * * Returns 0 on success, negative value on failure * * The open entry point is called when a network interface is made * active by the system (IFF_UP). At this point all resources needed * for transmit and receive operations are allocated, the interrupt * handler is registered with the OS, the watchdog timer is started, * and the stack is notified that the interface is ready. **/ static int igbvf_open(struct net_device *netdev) { struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; int err; /* disallow open during test */ if (test_bit(__IGBVF_TESTING, &adapter->state)) return -EBUSY; /* allocate transmit descriptors */ err = igbvf_setup_tx_resources(adapter, adapter->tx_ring); if (err) goto err_setup_tx; /* allocate receive descriptors */ err = igbvf_setup_rx_resources(adapter, adapter->rx_ring); if (err) goto err_setup_rx; /* before we allocate an interrupt, we must be ready to handle it. * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt * as soon as we call pci_request_irq, so we have to setup our * clean_rx handler before we do so. */ igbvf_configure(adapter); err = igbvf_request_irq(adapter); if (err) goto err_req_irq; /* From here on the code is the same as igbvf_up() */ clear_bit(__IGBVF_DOWN, &adapter->state); napi_enable(&adapter->rx_ring->napi); /* clear any pending interrupts */ er32(EICR); igbvf_irq_enable(adapter); /* start the watchdog */ hw->mac.get_link_status = 1; mod_timer(&adapter->watchdog_timer, jiffies + 1); return 0; err_req_irq: igbvf_free_rx_resources(adapter->rx_ring); err_setup_rx: igbvf_free_tx_resources(adapter->tx_ring); err_setup_tx: igbvf_reset(adapter); return err; } /** * igbvf_close - Disables a network interface * @netdev: network interface device structure * * Returns 0, this is not allowed to fail * * The close entry point is called when an interface is de-activated * by the OS. The hardware is still under the drivers control, but * needs to be disabled. A global MAC reset is issued to stop the * hardware, and all transmit and receive resources are freed. **/ static int igbvf_close(struct net_device *netdev) { struct igbvf_adapter *adapter = netdev_priv(netdev); WARN_ON(test_bit(__IGBVF_RESETTING, &adapter->state)); igbvf_down(adapter); igbvf_free_irq(adapter); igbvf_free_tx_resources(adapter->tx_ring); igbvf_free_rx_resources(adapter->rx_ring); return 0; } /** * igbvf_set_mac - Change the Ethernet Address of the NIC * @netdev: network interface device structure * @p: pointer to an address structure * * Returns 0 on success, negative on failure **/ static int igbvf_set_mac(struct net_device *netdev, void *p) { struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; struct sockaddr *addr = p; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); hw->mac.ops.rar_set(hw, hw->mac.addr, 0); if (!ether_addr_equal(addr->sa_data, hw->mac.addr)) return -EADDRNOTAVAIL; memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); return 0; } #define UPDATE_VF_COUNTER(reg, name) \ { \ u32 current_counter = er32(reg); \ if (current_counter < adapter->stats.last_##name) \ adapter->stats.name += 0x100000000LL; \ adapter->stats.last_##name = current_counter; \ adapter->stats.name &= 0xFFFFFFFF00000000LL; \ adapter->stats.name |= current_counter; \ } /** * igbvf_update_stats - Update the board statistics counters * @adapter: board private structure **/ void igbvf_update_stats(struct igbvf_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; /* Prevent stats update while adapter is being reset, link is down * or if the pci connection is down. */ if (adapter->link_speed == 0) return; if (test_bit(__IGBVF_RESETTING, &adapter->state)) return; if (pci_channel_offline(pdev)) return; UPDATE_VF_COUNTER(VFGPRC, gprc); UPDATE_VF_COUNTER(VFGORC, gorc); UPDATE_VF_COUNTER(VFGPTC, gptc); UPDATE_VF_COUNTER(VFGOTC, gotc); UPDATE_VF_COUNTER(VFMPRC, mprc); UPDATE_VF_COUNTER(VFGOTLBC, gotlbc); UPDATE_VF_COUNTER(VFGPTLBC, gptlbc); UPDATE_VF_COUNTER(VFGORLBC, gorlbc); UPDATE_VF_COUNTER(VFGPRLBC, gprlbc); /* Fill out the OS statistics structure */ adapter->net_stats.multicast = adapter->stats.mprc; } static void igbvf_print_link_info(struct igbvf_adapter *adapter) { dev_info(&adapter->pdev->dev, "Link is Up %d Mbps %s Duplex\n", adapter->link_speed, adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half"); } static bool igbvf_has_link(struct igbvf_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; s32 ret_val = E1000_SUCCESS; bool link_active; /* If interface is down, stay link down */ if (test_bit(__IGBVF_DOWN, &adapter->state)) return false; ret_val = hw->mac.ops.check_for_link(hw); link_active = !hw->mac.get_link_status; /* if check for link returns error we will need to reset */ if (ret_val && time_after(jiffies, adapter->last_reset + (10 * HZ))) schedule_work(&adapter->reset_task); return link_active; } /** * igbvf_watchdog - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ static void igbvf_watchdog(unsigned long data) { struct igbvf_adapter *adapter = (struct igbvf_adapter *)data; /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); } static void igbvf_watchdog_task(struct work_struct *work) { struct igbvf_adapter *adapter = container_of(work, struct igbvf_adapter, watchdog_task); struct net_device *netdev = adapter->netdev; struct e1000_mac_info *mac = &adapter->hw.mac; struct igbvf_ring *tx_ring = adapter->tx_ring; struct e1000_hw *hw = &adapter->hw; u32 link; int tx_pending = 0; link = igbvf_has_link(adapter); if (link) { if (!netif_carrier_ok(netdev)) { mac->ops.get_link_up_info(&adapter->hw, &adapter->link_speed, &adapter->link_duplex); igbvf_print_link_info(adapter); netif_carrier_on(netdev); netif_wake_queue(netdev); } } else { if (netif_carrier_ok(netdev)) { adapter->link_speed = 0; adapter->link_duplex = 0; dev_info(&adapter->pdev->dev, "Link is Down\n"); netif_carrier_off(netdev); netif_stop_queue(netdev); } } if (netif_carrier_ok(netdev)) { igbvf_update_stats(adapter); } else { tx_pending = (igbvf_desc_unused(tx_ring) + 1 < tx_ring->count); if (tx_pending) { /* We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going * to get done, so reset controller to flush Tx. * (Do the reset outside of interrupt context). */ adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); } } /* Cause software interrupt to ensure Rx ring is cleaned */ ew32(EICS, adapter->rx_ring->eims_value); /* Reset the timer */ if (!test_bit(__IGBVF_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, round_jiffies(jiffies + (2 * HZ))); } #define IGBVF_TX_FLAGS_CSUM 0x00000001 #define IGBVF_TX_FLAGS_VLAN 0x00000002 #define IGBVF_TX_FLAGS_TSO 0x00000004 #define IGBVF_TX_FLAGS_IPV4 0x00000008 #define IGBVF_TX_FLAGS_VLAN_MASK 0xffff0000 #define IGBVF_TX_FLAGS_VLAN_SHIFT 16 static void igbvf_tx_ctxtdesc(struct igbvf_ring *tx_ring, u32 vlan_macip_lens, u32 type_tucmd, u32 mss_l4len_idx) { struct e1000_adv_tx_context_desc *context_desc; struct igbvf_buffer *buffer_info; u16 i = tx_ring->next_to_use; context_desc = IGBVF_TX_CTXTDESC_ADV(*tx_ring, i); buffer_info = &tx_ring->buffer_info[i]; i++; tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; /* set bits to identify this as an advanced context descriptor */ type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT; context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); context_desc->seqnum_seed = 0; context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); buffer_info->time_stamp = jiffies; buffer_info->dma = 0; } static int igbvf_tso(struct igbvf_ring *tx_ring, struct sk_buff *skb, u32 tx_flags, u8 *hdr_len) { u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; union { struct iphdr *v4; struct ipv6hdr *v6; unsigned char *hdr; } ip; union { struct tcphdr *tcp; unsigned char *hdr; } l4; u32 paylen, l4_offset; int err; if (skb->ip_summed != CHECKSUM_PARTIAL) return 0; if (!skb_is_gso(skb)) return 0; err = skb_cow_head(skb, 0); if (err < 0) return err; ip.hdr = skb_network_header(skb); l4.hdr = skb_checksum_start(skb); /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; /* initialize outer IP header fields */ if (ip.v4->version == 4) { unsigned char *csum_start = skb_checksum_start(skb); unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); /* IP header will have to cancel out any data that * is not a part of the outer IP header */ ip.v4->check = csum_fold(csum_partial(trans_start, csum_start - trans_start, 0)); type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; ip.v4->tot_len = 0; } else { ip.v6->payload_len = 0; } /* determine offset of inner transport header */ l4_offset = l4.hdr - skb->data; /* compute length of segmentation header */ *hdr_len = (l4.tcp->doff * 4) + l4_offset; /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); /* MSS L4LEN IDX */ mss_l4len_idx = (*hdr_len - l4_offset) << E1000_ADVTXD_L4LEN_SHIFT; mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT; /* VLAN MACLEN IPLEN */ vlan_macip_lens = l4.hdr - ip.hdr; vlan_macip_lens |= (ip.hdr - skb->data) << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= tx_flags & IGBVF_TX_FLAGS_VLAN_MASK; igbvf_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); return 1; } static inline bool igbvf_ipv6_csum_is_sctp(struct sk_buff *skb) { unsigned int offset = 0; ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); return offset == skb_checksum_start_offset(skb); } static bool igbvf_tx_csum(struct igbvf_ring *tx_ring, struct sk_buff *skb, u32 tx_flags, __be16 protocol) { u32 vlan_macip_lens = 0; u32 type_tucmd = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_failed: if (!(tx_flags & IGBVF_TX_FLAGS_VLAN)) return false; goto no_csum; } switch (skb->csum_offset) { case offsetof(struct tcphdr, check): type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; /* fall through */ case offsetof(struct udphdr, check): break; case offsetof(struct sctphdr, checksum): /* validate that this is actually an SCTP request */ if (((protocol == htons(ETH_P_IP)) && (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || ((protocol == htons(ETH_P_IPV6)) && igbvf_ipv6_csum_is_sctp(skb))) { type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP; break; } default: skb_checksum_help(skb); goto csum_failed; } vlan_macip_lens = skb_checksum_start_offset(skb) - skb_network_offset(skb); no_csum: vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= tx_flags & IGBVF_TX_FLAGS_VLAN_MASK; igbvf_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0); return true; } static int igbvf_maybe_stop_tx(struct net_device *netdev, int size) { struct igbvf_adapter *adapter = netdev_priv(netdev); /* there is enough descriptors then we don't need to worry */ if (igbvf_desc_unused(adapter->tx_ring) >= size) return 0; netif_stop_queue(netdev); /* Herbert's original patch had: * smp_mb__after_netif_stop_queue(); * but since that doesn't exist yet, just open code it. */ smp_mb(); /* We need to check again just in case room has been made available */ if (igbvf_desc_unused(adapter->tx_ring) < size) return -EBUSY; netif_wake_queue(netdev); ++adapter->restart_queue; return 0; } #define IGBVF_MAX_TXD_PWR 16 #define IGBVF_MAX_DATA_PER_TXD (1u << IGBVF_MAX_TXD_PWR) static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, struct igbvf_ring *tx_ring, struct sk_buff *skb) { struct igbvf_buffer *buffer_info; struct pci_dev *pdev = adapter->pdev; unsigned int len = skb_headlen(skb); unsigned int count = 0, i; unsigned int f; i = tx_ring->next_to_use; buffer_info = &tx_ring->buffer_info[i]; BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD); buffer_info->length = len; /* set time_stamp *before* dma to help avoid a possible race */ buffer_info->time_stamp = jiffies; buffer_info->mapped_as_page = false; buffer_info->dma = dma_map_single(&pdev->dev, skb->data, len, DMA_TO_DEVICE); if (dma_mapping_error(&pdev->dev, buffer_info->dma)) goto dma_error; for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { const struct skb_frag_struct *frag; count++; i++; if (i == tx_ring->count) i = 0; frag = &skb_shinfo(skb)->frags[f]; len = skb_frag_size(frag); buffer_info = &tx_ring->buffer_info[i]; BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD); buffer_info->length = len; buffer_info->time_stamp = jiffies; buffer_info->mapped_as_page = true; buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag, 0, len, DMA_TO_DEVICE); if (dma_mapping_error(&pdev->dev, buffer_info->dma)) goto dma_error; } tx_ring->buffer_info[i].skb = skb; return ++count; dma_error: dev_err(&pdev->dev, "TX DMA map failed\n"); /* clear timestamp and dma mappings for failed buffer_info mapping */ buffer_info->dma = 0; buffer_info->time_stamp = 0; buffer_info->length = 0; buffer_info->mapped_as_page = false; if (count) count--; /* clear timestamp and dma mappings for remaining portion of packet */ while (count--) { if (i == 0) i += tx_ring->count; i--; buffer_info = &tx_ring->buffer_info[i]; igbvf_put_txbuf(adapter, buffer_info); } return 0; } static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter, struct igbvf_ring *tx_ring, int tx_flags, int count, unsigned int first, u32 paylen, u8 hdr_len) { union e1000_adv_tx_desc *tx_desc = NULL; struct igbvf_buffer *buffer_info; u32 olinfo_status = 0, cmd_type_len; unsigned int i; cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); if (tx_flags & IGBVF_TX_FLAGS_VLAN) cmd_type_len |= E1000_ADVTXD_DCMD_VLE; if (tx_flags & IGBVF_TX_FLAGS_TSO) { cmd_type_len |= E1000_ADVTXD_DCMD_TSE; /* insert tcp checksum */ olinfo_status |= E1000_TXD_POPTS_TXSM << 8; /* insert ip checksum */ if (tx_flags & IGBVF_TX_FLAGS_IPV4) olinfo_status |= E1000_TXD_POPTS_IXSM << 8; } else if (tx_flags & IGBVF_TX_FLAGS_CSUM) { olinfo_status |= E1000_TXD_POPTS_TXSM << 8; } olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT); i = tx_ring->next_to_use; while (count--) { buffer_info = &tx_ring->buffer_info[i]; tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i); tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma); tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type_len | buffer_info->length); tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); i++; if (i == tx_ring->count) i = 0; } tx_desc->read.cmd_type_len |= cpu_to_le32(adapter->txd_cmd); /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64). */ wmb(); tx_ring->buffer_info[first].next_to_watch = tx_desc; tx_ring->next_to_use = i; writel(i, adapter->hw.hw_addr + tx_ring->tail); /* we need this if more than one processor can write to our tail * at a time, it synchronizes IO on IA64/Altix systems */ mmiowb(); } static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb, struct net_device *netdev, struct igbvf_ring *tx_ring) { struct igbvf_adapter *adapter = netdev_priv(netdev); unsigned int first, tx_flags = 0; u8 hdr_len = 0; int count = 0; int tso = 0; __be16 protocol = vlan_get_protocol(skb); if (test_bit(__IGBVF_DOWN, &adapter->state)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } if (skb->len <= 0) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } /* need: count + 4 desc gap to keep tail from touching * + 2 desc gap to keep tail from touching head, * + 1 desc for skb->data, * + 1 desc for context descriptor, * head, otherwise try next time */ if (igbvf_maybe_stop_tx(netdev, skb_shinfo(skb)->nr_frags + 4)) { /* this is a hard error */ return NETDEV_TX_BUSY; } if (skb_vlan_tag_present(skb)) { tx_flags |= IGBVF_TX_FLAGS_VLAN; tx_flags |= (skb_vlan_tag_get(skb) << IGBVF_TX_FLAGS_VLAN_SHIFT); } if (protocol == htons(ETH_P_IP)) tx_flags |= IGBVF_TX_FLAGS_IPV4; first = tx_ring->next_to_use; tso = igbvf_tso(tx_ring, skb, tx_flags, &hdr_len); if (unlikely(tso < 0)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } if (tso) tx_flags |= IGBVF_TX_FLAGS_TSO; else if (igbvf_tx_csum(tx_ring, skb, tx_flags, protocol) && (skb->ip_summed == CHECKSUM_PARTIAL)) tx_flags |= IGBVF_TX_FLAGS_CSUM; /* count reflects descriptors mapped, if 0 then mapping error * has occurred and we need to rewind the descriptor queue */ count = igbvf_tx_map_adv(adapter, tx_ring, skb); if (count) { igbvf_tx_queue_adv(adapter, tx_ring, tx_flags, count, first, skb->len, hdr_len); /* Make sure there is space in the ring for the next send. */ igbvf_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 4); } else { dev_kfree_skb_any(skb); tx_ring->buffer_info[first].time_stamp = 0; tx_ring->next_to_use = first; } return NETDEV_TX_OK; } static netdev_tx_t igbvf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct igbvf_adapter *adapter = netdev_priv(netdev); struct igbvf_ring *tx_ring; if (test_bit(__IGBVF_DOWN, &adapter->state)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } tx_ring = &adapter->tx_ring[0]; return igbvf_xmit_frame_ring_adv(skb, netdev, tx_ring); } /** * igbvf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ static void igbvf_tx_timeout(struct net_device *netdev) { struct igbvf_adapter *adapter = netdev_priv(netdev); /* Do the reset outside of interrupt context */ adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); } static void igbvf_reset_task(struct work_struct *work) { struct igbvf_adapter *adapter; adapter = container_of(work, struct igbvf_adapter, reset_task); igbvf_reinit_locked(adapter); } /** * igbvf_get_stats - Get System Network Statistics * @netdev: network interface device structure * * Returns the address of the device statistics structure. * The statistics are actually updated from the timer callback. **/ static struct net_device_stats *igbvf_get_stats(struct net_device *netdev) { struct igbvf_adapter *adapter = netdev_priv(netdev); /* only return the current stats */ return &adapter->net_stats; } /** * igbvf_change_mtu - Change the Maximum Transfer Unit * @netdev: network interface device structure * @new_mtu: new value for maximum frame size * * Returns 0 on success, negative on failure **/ static int igbvf_change_mtu(struct net_device *netdev, int new_mtu) { struct igbvf_adapter *adapter = netdev_priv(netdev); int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; if (new_mtu < 68 || new_mtu > INT_MAX - ETH_HLEN - ETH_FCS_LEN || max_frame > MAX_JUMBO_FRAME_SIZE) return -EINVAL; #define MAX_STD_JUMBO_FRAME_SIZE 9234 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n"); return -EINVAL; } while (test_and_set_bit(__IGBVF_RESETTING, &adapter->state)) usleep_range(1000, 2000); /* igbvf_down has a dependency on max_frame_size */ adapter->max_frame_size = max_frame; if (netif_running(netdev)) igbvf_down(adapter); /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN * means we reserve 2 more, this pushes us to allocate from the next * larger slab size. * i.e. RXBUFFER_2048 --> size-4096 slab * However with the new *_jumbo_rx* routines, jumbo receives will use * fragmented skbs */ if (max_frame <= 1024) adapter->rx_buffer_len = 1024; else if (max_frame <= 2048) adapter->rx_buffer_len = 2048; else #if (PAGE_SIZE / 2) > 16384 adapter->rx_buffer_len = 16384; #else adapter->rx_buffer_len = PAGE_SIZE / 2; #endif /* adjust allocation if LPE protects us, and we aren't using SBP */ if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) || (max_frame == ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN)) adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN; dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); netdev->mtu = new_mtu; if (netif_running(netdev)) igbvf_up(adapter); else igbvf_reset(adapter); clear_bit(__IGBVF_RESETTING, &adapter->state); return 0; } static int igbvf_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { switch (cmd) { default: return -EOPNOTSUPP; } } static int igbvf_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct igbvf_adapter *adapter = netdev_priv(netdev); #ifdef CONFIG_PM int retval = 0; #endif netif_device_detach(netdev); if (netif_running(netdev)) { WARN_ON(test_bit(__IGBVF_RESETTING, &adapter->state)); igbvf_down(adapter); igbvf_free_irq(adapter); } #ifdef CONFIG_PM retval = pci_save_state(pdev); if (retval) return retval; #endif pci_disable_device(pdev); return 0; } #ifdef CONFIG_PM static int igbvf_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct igbvf_adapter *adapter = netdev_priv(netdev); u32 err; pci_restore_state(pdev); err = pci_enable_device_mem(pdev); if (err) { dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); return err; } pci_set_master(pdev); if (netif_running(netdev)) { err = igbvf_request_irq(adapter); if (err) return err; } igbvf_reset(adapter); if (netif_running(netdev)) igbvf_up(adapter); netif_device_attach(netdev); return 0; } #endif static void igbvf_shutdown(struct pci_dev *pdev) { igbvf_suspend(pdev, PMSG_SUSPEND); } #ifdef CONFIG_NET_POLL_CONTROLLER /* Polling 'interrupt' - used by things like netconsole to send skbs * without having to re-enable interrupts. It's not called while * the interrupt routine is executing. */ static void igbvf_netpoll(struct net_device *netdev) { struct igbvf_adapter *adapter = netdev_priv(netdev); disable_irq(adapter->pdev->irq); igbvf_clean_tx_irq(adapter->tx_ring); enable_irq(adapter->pdev->irq); } #endif /** * igbvf_io_error_detected - called when PCI error is detected * @pdev: Pointer to PCI device * @state: The current pci connection state * * This function is called after a PCI bus error affecting * this device has been detected. */ static pci_ers_result_t igbvf_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct igbvf_adapter *adapter = netdev_priv(netdev); netif_device_detach(netdev); if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT; if (netif_running(netdev)) igbvf_down(adapter); pci_disable_device(pdev); /* Request a slot slot reset. */ return PCI_ERS_RESULT_NEED_RESET; } /** * igbvf_io_slot_reset - called after the pci bus has been reset. * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot. Implementation * resembles the first-half of the igbvf_resume routine. */ static pci_ers_result_t igbvf_io_slot_reset(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct igbvf_adapter *adapter = netdev_priv(netdev); if (pci_enable_device_mem(pdev)) { dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n"); return PCI_ERS_RESULT_DISCONNECT; } pci_set_master(pdev); igbvf_reset(adapter); return PCI_ERS_RESULT_RECOVERED; } /** * igbvf_io_resume - called when traffic can start flowing again. * @pdev: Pointer to PCI device * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation. Implementation resembles the * second-half of the igbvf_resume routine. */ static void igbvf_io_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct igbvf_adapter *adapter = netdev_priv(netdev); if (netif_running(netdev)) { if (igbvf_up(adapter)) { dev_err(&pdev->dev, "can't bring device back up after reset\n"); return; } } netif_device_attach(netdev); } static void igbvf_print_device_info(struct igbvf_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; if (hw->mac.type == e1000_vfadapt_i350) dev_info(&pdev->dev, "Intel(R) I350 Virtual Function\n"); else dev_info(&pdev->dev, "Intel(R) 82576 Virtual Function\n"); dev_info(&pdev->dev, "Address: %pM\n", netdev->dev_addr); } static int igbvf_set_features(struct net_device *netdev, netdev_features_t features) { struct igbvf_adapter *adapter = netdev_priv(netdev); if (features & NETIF_F_RXCSUM) adapter->flags &= ~IGBVF_FLAG_RX_CSUM_DISABLED; else adapter->flags |= IGBVF_FLAG_RX_CSUM_DISABLED; return 0; } #define IGBVF_MAX_MAC_HDR_LEN 127 #define IGBVF_MAX_NETWORK_HDR_LEN 511 static netdev_features_t igbvf_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ mac_hdr_len = skb_network_header(skb) - skb->data; if (unlikely(mac_hdr_len > IGBVF_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_TSO | NETIF_F_TSO6); network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); if (unlikely(network_hdr_len > IGBVF_MAX_NETWORK_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | NETIF_F_TSO | NETIF_F_TSO6); /* We can only support IPV4 TSO in tunnels if we can mangle the * inner IP ID field, so strip TSO if MANGLEID is not supported. */ if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) features &= ~NETIF_F_TSO; return features; } static const struct net_device_ops igbvf_netdev_ops = { .ndo_open = igbvf_open, .ndo_stop = igbvf_close, .ndo_start_xmit = igbvf_xmit_frame, .ndo_get_stats = igbvf_get_stats, .ndo_set_rx_mode = igbvf_set_multi, .ndo_set_mac_address = igbvf_set_mac, .ndo_change_mtu = igbvf_change_mtu, .ndo_do_ioctl = igbvf_ioctl, .ndo_tx_timeout = igbvf_tx_timeout, .ndo_vlan_rx_add_vid = igbvf_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = igbvf_vlan_rx_kill_vid, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = igbvf_netpoll, #endif .ndo_set_features = igbvf_set_features, .ndo_features_check = igbvf_features_check, }; /** * igbvf_probe - Device Initialization Routine * @pdev: PCI device information struct * @ent: entry in igbvf_pci_tbl * * Returns 0 on success, negative on failure * * igbvf_probe initializes an adapter identified by a pci_dev structure. * The OS initialization, configuring of the adapter private structure, * and a hardware reset occur. **/ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; struct igbvf_adapter *adapter; struct e1000_hw *hw; const struct igbvf_info *ei = igbvf_info_tbl[ent->driver_data]; static int cards_found; int err, pci_using_dac; err = pci_enable_device_mem(pdev); if (err) return err; pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (!err) { pci_using_dac = 1; } else { err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); goto err_dma; } } err = pci_request_regions(pdev, igbvf_driver_name); if (err) goto err_pci_reg; pci_set_master(pdev); err = -ENOMEM; netdev = alloc_etherdev(sizeof(struct igbvf_adapter)); if (!netdev) goto err_alloc_etherdev; SET_NETDEV_DEV(netdev, &pdev->dev); pci_set_drvdata(pdev, netdev); adapter = netdev_priv(netdev); hw = &adapter->hw; adapter->netdev = netdev; adapter->pdev = pdev; adapter->ei = ei; adapter->pba = ei->pba; adapter->flags = ei->flags; adapter->hw.back = adapter; adapter->hw.mac.type = ei->mac; adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); /* PCI config space info */ hw->vendor_id = pdev->vendor; hw->device_id = pdev->device; hw->subsystem_vendor_id = pdev->subsystem_vendor; hw->subsystem_device_id = pdev->subsystem_device; hw->revision_id = pdev->revision; err = -EIO; adapter->hw.hw_addr = ioremap(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); if (!adapter->hw.hw_addr) goto err_ioremap; if (ei->get_variants) { err = ei->get_variants(adapter); if (err) goto err_get_variants; } /* setup adapter struct */ err = igbvf_sw_init(adapter); if (err) goto err_sw_init; /* construct the net_device struct */ netdev->netdev_ops = &igbvf_netdev_ops; igbvf_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); adapter->bd_number = cards_found++; netdev->hw_features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC; #define IGBVF_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ NETIF_F_GSO_GRE_CSUM | \ NETIF_F_GSO_IPXIP4 | \ NETIF_F_GSO_IPXIP6 | \ NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) netdev->gso_partial_features = IGBVF_GSO_PARTIAL_FEATURES; netdev->hw_features |= NETIF_F_GSO_PARTIAL | IGBVF_GSO_PARTIAL_FEATURES; netdev->features = netdev->hw_features; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->mpls_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= netdev->vlan_features; /* set this bit last since it cannot be part of vlan_features */ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX; /*reset the controller to put the device in a known good state */ err = hw->mac.ops.reset_hw(hw); if (err) { dev_info(&pdev->dev, "PF still in reset state. Is the PF interface up?\n"); } else { err = hw->mac.ops.read_mac_addr(hw); if (err) dev_info(&pdev->dev, "Error reading MAC address.\n"); else if (is_zero_ether_addr(adapter->hw.mac.addr)) dev_info(&pdev->dev, "MAC address not assigned by administrator.\n"); memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); } if (!is_valid_ether_addr(netdev->dev_addr)) { dev_info(&pdev->dev, "Assigning random MAC address.\n"); eth_hw_addr_random(netdev); memcpy(adapter->hw.mac.addr, netdev->dev_addr, netdev->addr_len); } setup_timer(&adapter->watchdog_timer, &igbvf_watchdog, (unsigned long)adapter); INIT_WORK(&adapter->reset_task, igbvf_reset_task); INIT_WORK(&adapter->watchdog_task, igbvf_watchdog_task); /* ring size defaults */ adapter->rx_ring->count = 1024; adapter->tx_ring->count = 1024; /* reset the hardware with the new settings */ igbvf_reset(adapter); /* set hardware-specific flags */ if (adapter->hw.mac.type == e1000_vfadapt_i350) adapter->flags |= IGBVF_FLAG_RX_LB_VLAN_BSWAP; strcpy(netdev->name, "eth%d"); err = register_netdev(netdev); if (err) goto err_hw_init; /* tell the stack to leave us alone until igbvf_open() is called */ netif_carrier_off(netdev); netif_stop_queue(netdev); igbvf_print_device_info(adapter); igbvf_initialize_last_counter_stats(adapter); return 0; err_hw_init: netif_napi_del(&adapter->rx_ring->napi); kfree(adapter->tx_ring); kfree(adapter->rx_ring); err_sw_init: igbvf_reset_interrupt_capability(adapter); err_get_variants: iounmap(adapter->hw.hw_addr); err_ioremap: free_netdev(netdev); err_alloc_etherdev: pci_release_regions(pdev); err_pci_reg: err_dma: pci_disable_device(pdev); return err; } /** * igbvf_remove - Device Removal Routine * @pdev: PCI device information struct * * igbvf_remove is called by the PCI subsystem to alert the driver * that it should release a PCI device. The could be caused by a * Hot-Plug event, or because the driver is going to be removed from * memory. **/ static void igbvf_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; /* The watchdog timer may be rescheduled, so explicitly * disable it from being rescheduled. */ set_bit(__IGBVF_DOWN, &adapter->state); del_timer_sync(&adapter->watchdog_timer); cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->watchdog_task); unregister_netdev(netdev); igbvf_reset_interrupt_capability(adapter); /* it is important to delete the NAPI struct prior to freeing the * Rx ring so that you do not end up with null pointer refs */ netif_napi_del(&adapter->rx_ring->napi); kfree(adapter->tx_ring); kfree(adapter->rx_ring); iounmap(hw->hw_addr); if (hw->flash_address) iounmap(hw->flash_address); pci_release_regions(pdev); free_netdev(netdev); pci_disable_device(pdev); } /* PCI Error Recovery (ERS) */ static const struct pci_error_handlers igbvf_err_handler = { .error_detected = igbvf_io_error_detected, .slot_reset = igbvf_io_slot_reset, .resume = igbvf_io_resume, }; static const struct pci_device_id igbvf_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_VF), board_vf }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_VF), board_i350_vf }, { } /* terminate list */ }; MODULE_DEVICE_TABLE(pci, igbvf_pci_tbl); /* PCI Device API Driver */ static struct pci_driver igbvf_driver = { .name = igbvf_driver_name, .id_table = igbvf_pci_tbl, .probe = igbvf_probe, .remove = igbvf_remove, #ifdef CONFIG_PM /* Power Management Hooks */ .suspend = igbvf_suspend, .resume = igbvf_resume, #endif .shutdown = igbvf_shutdown, .err_handler = &igbvf_err_handler }; /** * igbvf_init_module - Driver Registration Routine * * igbvf_init_module is the first routine called when the driver is * loaded. All it does is register with the PCI subsystem. **/ static int __init igbvf_init_module(void) { int ret; pr_info("%s - version %s\n", igbvf_driver_string, igbvf_driver_version); pr_info("%s\n", igbvf_copyright); ret = pci_register_driver(&igbvf_driver); return ret; } module_init(igbvf_init_module); /** * igbvf_exit_module - Driver Exit Cleanup Routine * * igbvf_exit_module is called just before the driver is removed * from memory. **/ static void __exit igbvf_exit_module(void) { pci_unregister_driver(&igbvf_driver); } module_exit(igbvf_exit_module); MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Gigabit Virtual Function Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); /* netdev.c */