--- zzzz-none-000/linux-3.10.107/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c 2021-02-04 17:41:59.000000000 +0000 @@ -1,12 +1,14 @@ -/* bnx2x_cmn.c: Broadcom Everest network driver. +/* bnx2x_cmn.c: QLogic Everest network driver. * * Copyright (c) 2007-2013 Broadcom Corporation + * Copyright (c) 2014 QLogic Corporation + * All rights reserved * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation. * - * Maintained by: Eilon Greenstein + * Maintained by: Ariel Elior * Written by: Eliezer Tamir * Based on code from Michael Chan's bnx2 driver * UDP CSUM errata workaround by Arik Gendelman @@ -21,14 +23,57 @@ #include #include #include +#include #include #include #include +#include #include #include "bnx2x_cmn.h" #include "bnx2x_init.h" #include "bnx2x_sp.h" +static void bnx2x_free_fp_mem_cnic(struct bnx2x *bp); +static int bnx2x_alloc_fp_mem_cnic(struct bnx2x *bp); +static int bnx2x_alloc_fp_mem(struct bnx2x *bp); +static int bnx2x_poll(struct napi_struct *napi, int budget); + +static void bnx2x_add_all_napi_cnic(struct bnx2x *bp) +{ + int i; + + /* Add NAPI objects */ + for_each_rx_queue_cnic(bp, i) { + netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), + bnx2x_poll, NAPI_POLL_WEIGHT); + napi_hash_add(&bnx2x_fp(bp, i, napi)); + } +} + +static void bnx2x_add_all_napi(struct bnx2x *bp) +{ + int i; + + /* Add NAPI objects */ + for_each_eth_queue(bp, i) { + netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), + bnx2x_poll, NAPI_POLL_WEIGHT); + napi_hash_add(&bnx2x_fp(bp, i, napi)); + } +} + +static int bnx2x_calc_num_queues(struct bnx2x *bp) +{ + int nq = bnx2x_num_queues ? : netif_get_num_default_rss_queues(); + + /* Reduce memory usage in kdump environment by using only one queue */ + if (is_kdump_kernel()) + nq = 1; + + nq = clamp(nq, 1, BNX2X_MAX_QUEUES(bp)); + return nq; +} + /** * bnx2x_move_fp - move content of the fastpath structure. * @@ -52,6 +97,7 @@ struct bnx2x_fp_stats *to_fp_stats = &bp->fp_stats[to]; int old_max_eth_txqs, new_max_eth_txqs; int old_txdata_index = 0, new_txdata_index = 0; + struct bnx2x_agg_info *old_tpa_info = to_fp->tpa_info; /* Copy the NAPI object as it has been already initialized */ from_fp->napi = to_fp->napi; @@ -60,6 +106,11 @@ memcpy(to_fp, from_fp, sizeof(*to_fp)); to_fp->index = to; + /* Retain the tpa_info of the original `to' version as we don't want + * 2 FPs to contain the same tpa_info pointer. + */ + to_fp->tpa_info = old_tpa_info; + /* move sp_objs contents as well, as their indices match fp ones */ memcpy(to_sp_objs, from_sp_objs, sizeof(*to_sp_objs)); @@ -124,7 +175,7 @@ int i, cos, old_eth_num = BNX2X_NUM_ETH_QUEUES(bp); /* Queue pointer cannot be re-set on an fp-basis, as moving pointer - * backward along the array could cause memory to be overriden + * backward along the array could cause memory to be overridden */ for (cos = 1; cos < bp->max_cos; cos++) { for (i = 0; i < old_eth_num - delta; i++) { @@ -138,7 +189,7 @@ } } -int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */ +int bnx2x_load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */ /* free skb in the packet ring at pos idx * return idx of last bd freed @@ -163,7 +214,6 @@ tx_start_bd = &txdata->tx_desc_ring[bd_idx].start_bd; - nbd = le16_to_cpu(tx_start_bd->nbd) - 1; #ifdef BNX2X_STOP_ON_ERROR if ((nbd - 1) > (MAX_SKB_FRAGS + 2)) { @@ -214,9 +264,9 @@ if (likely(skb)) { (*pkts_compl)++; (*bytes_compl) += skb->len; + dev_kfree_skb_any(skb); } - dev_kfree_skb_any(skb); tx_buf->first_bd = 0; tx_buf->skb = NULL; @@ -270,7 +320,7 @@ smp_mb(); if (unlikely(netif_tx_queue_stopped(txq))) { - /* Taking tx_lock() is needed to prevent reenabling the queue + /* Taking tx_lock() is needed to prevent re-enabling the queue * while it's empty. This could have happen if rx_action() gets * suspended in bnx2x_tx_int() after the condition before * netif_tx_wake_queue(), while tx_action (bnx2x_start_xmit()): @@ -359,7 +409,7 @@ */ static u32 bnx2x_get_rxhash(const struct bnx2x *bp, const struct eth_fast_path_rx_cqe *cqe, - bool *l4_rxhash) + enum pkt_hash_types *rxhash_type) { /* Get Toeplitz hash from CQE */ if ((bp->dev->features & NETIF_F_RXHASH) && @@ -367,11 +417,13 @@ enum eth_rss_hash_type htype; htype = cqe->status_flags & ETH_FAST_PATH_RX_CQE_RSS_HASH_TYPE; - *l4_rxhash = (htype == TCP_IPV4_HASH_TYPE) || - (htype == TCP_IPV6_HASH_TYPE); + *rxhash_type = ((htype == TCP_IPV4_HASH_TYPE) || + (htype == TCP_IPV6_HASH_TYPE)) ? + PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3; + return le32_to_cpu(cqe->rss_hash_result); } - *l4_rxhash = false; + *rxhash_type = PKT_HASH_TYPE_NONE; return 0; } @@ -425,7 +477,7 @@ tpa_info->tpa_state = BNX2X_TPA_START; tpa_info->len_on_bd = le16_to_cpu(cqe->len_on_bd); tpa_info->placement_offset = cqe->placement_offset; - tpa_info->rxhash = bnx2x_get_rxhash(bp, cqe, &tpa_info->l4_rxhash); + tpa_info->rxhash = bnx2x_get_rxhash(bp, cqe, &tpa_info->rxhash_type); if (fp->mode == TPA_MODE_GRO) { u16 gro_size = le16_to_cpu(cqe->pkt_len_or_gro_seg_len); tpa_info->full_page = SGE_PAGES / gro_size * gro_size; @@ -434,11 +486,7 @@ #ifdef BNX2X_STOP_ON_ERROR fp->tpa_queue_used |= (1 << queue); -#ifdef _ASM_GENERIC_INT_L64_H - DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%lx\n", -#else DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n", -#endif fp->tpa_queue_used); #endif } @@ -495,33 +543,49 @@ NAPI_GRO_CB(skb)->count = num_of_coalesced_segs; } -static int bnx2x_alloc_rx_sge(struct bnx2x *bp, - struct bnx2x_fastpath *fp, u16 index) +static int bnx2x_alloc_rx_sge(struct bnx2x *bp, struct bnx2x_fastpath *fp, + u16 index, gfp_t gfp_mask) { - struct page *page = alloc_pages(GFP_ATOMIC, PAGES_PER_SGE_SHIFT); struct sw_rx_page *sw_buf = &fp->rx_page_ring[index]; struct eth_rx_sge *sge = &fp->rx_sge_ring[index]; + struct bnx2x_alloc_pool *pool = &fp->page_pool; dma_addr_t mapping; - if (unlikely(page == NULL)) { - BNX2X_ERR("Can't alloc sge\n"); - return -ENOMEM; + if (!pool->page || (PAGE_SIZE - pool->offset) < SGE_PAGE_SIZE) { + + /* put page reference used by the memory pool, since we + * won't be using this page as the mempool anymore. + */ + if (pool->page) + put_page(pool->page); + + pool->page = alloc_pages(gfp_mask, PAGES_PER_SGE_SHIFT); + if (unlikely(!pool->page)) { + BNX2X_ERR("Can't alloc sge\n"); + return -ENOMEM; + } + + pool->offset = 0; } - mapping = dma_map_page(&bp->pdev->dev, page, 0, - SGE_PAGES, DMA_FROM_DEVICE); + mapping = dma_map_page(&bp->pdev->dev, pool->page, + pool->offset, SGE_PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) { - __free_pages(page, PAGES_PER_SGE_SHIFT); BNX2X_ERR("Can't map sge\n"); return -ENOMEM; } - sw_buf->page = page; + get_page(pool->page); + sw_buf->page = pool->page; + sw_buf->offset = pool->offset; + dma_unmap_addr_set(sw_buf, mapping, mapping); sge->addr_hi = cpu_to_le32(U64_HI(mapping)); sge->addr_lo = cpu_to_le32(U64_LO(mapping)); + pool->offset += SGE_PAGE_SIZE; + return 0; } @@ -577,26 +641,28 @@ /* If we fail to allocate a substitute page, we simply stop where we are and drop the whole packet */ - err = bnx2x_alloc_rx_sge(bp, fp, sge_idx); + err = bnx2x_alloc_rx_sge(bp, fp, sge_idx, GFP_ATOMIC); if (unlikely(err)) { bnx2x_fp_qstats(bp, fp)->rx_skb_alloc_failed++; return err; } - /* Unmap the page as we r going to pass it to the stack */ dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(&old_rx_pg, mapping), - SGE_PAGES, DMA_FROM_DEVICE); + SGE_PAGE_SIZE, DMA_FROM_DEVICE); /* Add one frag and update the appropriate fields in the skb */ if (fp->mode == TPA_MODE_LRO) - skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len); + skb_fill_page_desc(skb, j, old_rx_pg.page, + old_rx_pg.offset, frag_len); else { /* GRO */ int rem; int offset = 0; for (rem = frag_len; rem > 0; rem -= gro_size) { int len = rem > gro_size ? gro_size : rem; skb_fill_page_desc(skb, frag_id++, - old_rx_pg.page, offset, len); + old_rx_pg.page, + old_rx_pg.offset + offset, + len); if (offset) get_page(old_rx_pg.page); offset += len; @@ -616,17 +682,22 @@ static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data) { if (fp->rx_frag_size) - put_page(virt_to_head_page(data)); + skb_free_frag(data); else kfree(data); } -static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp) +static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask) { - if (fp->rx_frag_size) + if (fp->rx_frag_size) { + /* GFP_KERNEL allocations are used only during initialization */ + if (unlikely(gfpflags_allow_blocking(gfp_mask))) + return (void *)__get_free_page(gfp_mask); + return netdev_alloc_frag(fp->rx_frag_size); + } - return kmalloc(fp->rx_buf_size + NET_SKB_PAD, GFP_ATOMIC); + return kmalloc(fp->rx_buf_size + NET_SKB_PAD, gfp_mask); } #ifdef CONFIG_INET @@ -707,7 +778,7 @@ goto drop; /* Try to allocate the new data */ - new_data = bnx2x_frag_alloc(fp); + new_data = bnx2x_frag_alloc(fp, GFP_ATOMIC); /* Unmap skb in the pool anyway, as we are going to change pool entry status to BNX2X_TPA_STOP even if new skb allocation fails. */ @@ -728,8 +799,7 @@ skb_reserve(skb, pad + NET_SKB_PAD); skb_put(skb, len); - skb->rxhash = tpa_info->rxhash; - skb->l4_rxhash = tpa_info->l4_rxhash; + skb_set_hash(skb, tpa_info->rxhash, tpa_info->rxhash_type); skb->protocol = eth_type_trans(skb, bp->dev); skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -745,7 +815,6 @@ dev_kfree_skb_any(skb); } - /* put new data in bin */ rx_buf->data = new_data; @@ -760,15 +829,15 @@ bnx2x_fp_stats(bp, fp)->eth_q_stats.rx_skb_alloc_failed++; } -static int bnx2x_alloc_rx_data(struct bnx2x *bp, - struct bnx2x_fastpath *fp, u16 index) +static int bnx2x_alloc_rx_data(struct bnx2x *bp, struct bnx2x_fastpath *fp, + u16 index, gfp_t gfp_mask) { u8 *data; struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[index]; struct eth_rx_bd *rx_bd = &fp->rx_desc_ring[index]; dma_addr_t mapping; - data = bnx2x_frag_alloc(fp); + data = bnx2x_frag_alloc(fp, gfp_mask); if (unlikely(data == NULL)) return -ENOMEM; @@ -814,23 +883,21 @@ skb->ip_summed = CHECKSUM_UNNECESSARY; } -int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) +static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) { struct bnx2x *bp = fp->bp; u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons; - u16 hw_comp_cons, sw_comp_cons, sw_comp_prod; + u16 sw_comp_cons, sw_comp_prod; int rx_pkt = 0; + union eth_rx_cqe *cqe; + struct eth_fast_path_rx_cqe *cqe_fp; #ifdef BNX2X_STOP_ON_ERROR if (unlikely(bp->panic)) return 0; #endif - - /* CQ "next element" is of the size of the regular element, - that's why it's ok here */ - hw_comp_cons = le16_to_cpu(*fp->rx_cons_sb); - if ((hw_comp_cons & MAX_RCQ_DESC_CNT) == MAX_RCQ_DESC_CNT) - hw_comp_cons++; + if (budget <= 0) + return rx_pkt; bd_cons = fp->rx_bd_cons; bd_prod = fp->rx_bd_prod; @@ -838,37 +905,43 @@ sw_comp_cons = fp->rx_comp_cons; sw_comp_prod = fp->rx_comp_prod; - /* Memory barrier necessary as speculative reads of the rx - * buffer can be ahead of the index in the status block - */ - rmb(); + comp_ring_cons = RCQ_BD(sw_comp_cons); + cqe = &fp->rx_comp_ring[comp_ring_cons]; + cqe_fp = &cqe->fast_path_cqe; DP(NETIF_MSG_RX_STATUS, - "queue[%d]: hw_comp_cons %u sw_comp_cons %u\n", - fp->index, hw_comp_cons, sw_comp_cons); + "queue[%d]: sw_comp_cons %u\n", fp->index, sw_comp_cons); - while (sw_comp_cons != hw_comp_cons) { + while (BNX2X_IS_CQE_COMPLETED(cqe_fp)) { struct sw_rx_bd *rx_buf = NULL; struct sk_buff *skb; - union eth_rx_cqe *cqe; - struct eth_fast_path_rx_cqe *cqe_fp; u8 cqe_fp_flags; enum eth_rx_cqe_type cqe_fp_type; u16 len, pad, queue; u8 *data; - bool l4_rxhash; + u32 rxhash; + enum pkt_hash_types rxhash_type; #ifdef BNX2X_STOP_ON_ERROR if (unlikely(bp->panic)) return 0; #endif - comp_ring_cons = RCQ_BD(sw_comp_cons); bd_prod = RX_BD(bd_prod); bd_cons = RX_BD(bd_cons); - cqe = &fp->rx_comp_ring[comp_ring_cons]; - cqe_fp = &cqe->fast_path_cqe; + /* A rmb() is required to ensure that the CQE is not read + * before it is written by the adapter DMA. PCI ordering + * rules will make sure the other fields are written before + * the marker at the end of struct eth_fast_path_rx_cqe + * but without rmb() a weakly ordered processor can process + * stale data. Without the barrier TPA state-machine might + * enter inconsistent state and kernel stack might be + * provided with incorrect packet description - these lead + * to various kernel crashed. + */ + rmb(); + cqe_fp_flags = cqe_fp->type_error_flags; cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE; @@ -894,10 +967,10 @@ u16 frag_size, pages; #ifdef BNX2X_STOP_ON_ERROR /* sanity check */ - if (fp->disable_tpa && + if (fp->mode == TPA_MODE_DISABLED && (CQE_TYPE_START(cqe_fp_type) || CQE_TYPE_STOP(cqe_fp_type))) - BNX2X_ERR("START/STOP packet while disable_tpa type %x\n", + BNX2X_ERR("START/STOP packet while TPA disabled, type %x\n", CQE_TYPE(cqe_fp_type)); #endif @@ -912,7 +985,6 @@ cqe_fp); goto next_rx; - } queue = cqe->end_agg_cqe.queue_index; tpa_info = &fp->tpa_info[queue]; @@ -963,7 +1035,7 @@ */ if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) && (len <= RX_COPY_THRESH)) { - skb = netdev_alloc_skb_ip_align(bp->dev, len); + skb = napi_alloc_skb(&fp->napi, len); if (skb == NULL) { DP(NETIF_MSG_RX_ERR | NETIF_MSG_RX_STATUS, "ERROR packet dropped because of alloc failure\n"); @@ -973,7 +1045,8 @@ memcpy(skb->data, data + pad, len); bnx2x_reuse_rx_data(fp, bd_cons, bd_prod); } else { - if (likely(bnx2x_alloc_rx_data(bp, fp, bd_prod) == 0)) { + if (likely(bnx2x_alloc_rx_data(bp, fp, bd_prod, + GFP_ATOMIC) == 0)) { dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping), fp->rx_buf_size, @@ -1000,8 +1073,8 @@ skb->protocol = eth_type_trans(skb, bp->dev); /* Set Toeplitz hash for a none-LRO skb */ - skb->rxhash = bnx2x_get_rxhash(bp, cqe_fp, &l4_rxhash); - skb->l4_rxhash = l4_rxhash; + rxhash = bnx2x_get_rxhash(bp, cqe_fp, &rxhash_type); + skb_set_hash(skb, rxhash, rxhash_type); skb_checksum_none_assert(skb); @@ -1011,13 +1084,22 @@ skb_record_rx_queue(skb, fp->rx_queue); + /* Check if this packet was timestamped */ + if (unlikely(cqe->fast_path_cqe.type_error_flags & + (1 << ETH_FAST_PATH_RX_CQE_PTP_PKT_SHIFT))) + bnx2x_set_rx_ts(bp, skb); + if (le16_to_cpu(cqe_fp->pars_flags.flags) & PARSING_FLAGS_VLAN) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(cqe_fp->vlan_tag)); - napi_gro_receive(&fp->napi, skb); + skb_mark_napi_id(skb, &fp->napi); + if (bnx2x_fp_ll_polling(fp)) + netif_receive_skb(skb); + else + napi_gro_receive(&fp->napi, skb); next_rx: rx_buf->data = NULL; @@ -1029,8 +1111,15 @@ sw_comp_prod = NEXT_RCQ_IDX(sw_comp_prod); sw_comp_cons = NEXT_RCQ_IDX(sw_comp_cons); + /* mark CQE as free */ + BNX2X_SEED_CQE(cqe_fp); + if (rx_pkt == budget) break; + + comp_ring_cons = RCQ_BD(sw_comp_cons); + cqe = &fp->rx_comp_ring[comp_ring_cons]; + cqe_fp = &cqe->fast_path_cqe; } /* while */ fp->rx_bd_cons = bd_cons; @@ -1066,13 +1155,11 @@ #endif /* Handle Rx and Tx according to MSI-X vector */ - prefetch(fp->rx_cons_sb); - for_each_cos_in_tx_queue(fp, cos) prefetch(fp->txdata_ptr[cos]->tx_cons_sb); prefetch(&fp->sb_running_index[SM_RX_ID]); - napi_schedule(&bnx2x_fp(bp, fp->index, napi)); + napi_schedule_irqoff(&bnx2x_fp(bp, fp->index, napi)); return IRQ_HANDLED; } @@ -1103,7 +1190,7 @@ /* Calculate the current MAX line speed limit for the MF * devices */ - if (IS_MF_SI(bp)) + if (IS_MF_PERCENT_BW(bp)) line_speed = (line_speed * maxCfg) / 100; else { /* SD mode */ u16 vn_max_rate = maxCfg * 100; @@ -1127,29 +1214,38 @@ static void bnx2x_fill_report_data(struct bnx2x *bp, struct bnx2x_link_report_data *data) { - u16 line_speed = bnx2x_get_mf_speed(bp); - memset(data, 0, sizeof(*data)); - /* Fill the report data: efective line speed */ - data->line_speed = line_speed; + if (IS_PF(bp)) { + /* Fill the report data: effective line speed */ + data->line_speed = bnx2x_get_mf_speed(bp); - /* Link is down */ - if (!bp->link_vars.link_up || (bp->flags & MF_FUNC_DIS)) - __set_bit(BNX2X_LINK_REPORT_LINK_DOWN, - &data->link_report_flags); - - /* Full DUPLEX */ - if (bp->link_vars.duplex == DUPLEX_FULL) - __set_bit(BNX2X_LINK_REPORT_FD, &data->link_report_flags); - - /* Rx Flow Control is ON */ - if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_RX) - __set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags); - - /* Tx Flow Control is ON */ - if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX) - __set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags); + /* Link is down */ + if (!bp->link_vars.link_up || (bp->flags & MF_FUNC_DIS)) + __set_bit(BNX2X_LINK_REPORT_LINK_DOWN, + &data->link_report_flags); + + if (!BNX2X_NUM_ETH_QUEUES(bp)) + __set_bit(BNX2X_LINK_REPORT_LINK_DOWN, + &data->link_report_flags); + + /* Full DUPLEX */ + if (bp->link_vars.duplex == DUPLEX_FULL) + __set_bit(BNX2X_LINK_REPORT_FD, + &data->link_report_flags); + + /* Rx Flow Control is ON */ + if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_RX) + __set_bit(BNX2X_LINK_REPORT_RX_FC_ON, + &data->link_report_flags); + + /* Tx Flow Control is ON */ + if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX) + __set_bit(BNX2X_LINK_REPORT_TX_FC_ON, + &data->link_report_flags); + } else { /* VF */ + *data = bp->vf_link_vars; + } } /** @@ -1174,7 +1270,7 @@ * * @bp: driver handle * - * None atomic inmlementation. + * None atomic implementation. * Should be called under the phy_lock. */ void __bnx2x_link_report(struct bnx2x *bp) @@ -1203,6 +1299,10 @@ */ memcpy(&bp->last_reported_link, &cur_data, sizeof(cur_data)); + /* propagate status to VFs */ + if (IS_PF(bp)) + bnx2x_iov_link_update(bp); + if (test_bit(BNX2X_LINK_REPORT_LINK_DOWN, &cur_data.link_report_flags)) { netif_carrier_off(bp->dev); @@ -1316,20 +1416,21 @@ DP(NETIF_MSG_IFUP, "mtu %d rx_buf_size %d\n", bp->dev->mtu, fp->rx_buf_size); - if (!fp->disable_tpa) { - /* Fill the per-aggregtion pool */ + if (fp->mode != TPA_MODE_DISABLED) { + /* Fill the per-aggregation pool */ for (i = 0; i < MAX_AGG_QS(bp); i++) { struct bnx2x_agg_info *tpa_info = &fp->tpa_info[i]; struct sw_rx_bd *first_buf = &tpa_info->first_buf; - first_buf->data = bnx2x_frag_alloc(fp); + first_buf->data = + bnx2x_frag_alloc(fp, GFP_KERNEL); if (!first_buf->data) { BNX2X_ERR("Failed to allocate TPA skb pool for queue[%d] - disabling TPA on this queue!\n", j); bnx2x_free_tpa_pool(bp, fp, i); - fp->disable_tpa = 1; + fp->mode = TPA_MODE_DISABLED; break; } dma_unmap_addr_set(first_buf, mapping, 0); @@ -1346,7 +1447,8 @@ for (i = 0, ring_prod = 0; i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) { - if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) { + if (bnx2x_alloc_rx_sge(bp, fp, ring_prod, + GFP_KERNEL) < 0) { BNX2X_ERR("was only able to allocate %d rx sges\n", i); BNX2X_ERR("disabling TPA for queue[%d]\n", @@ -1356,7 +1458,7 @@ ring_prod); bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp)); - fp->disable_tpa = 1; + fp->mode = TPA_MODE_DISABLED; ring_prod = 0; break; } @@ -1478,12 +1580,12 @@ bnx2x_free_rx_bds(fp); - if (!fp->disable_tpa) + if (fp->mode != TPA_MODE_DISABLED) bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp)); } } -void bnx2x_free_skbs_cnic(struct bnx2x *bp) +static void bnx2x_free_skbs_cnic(struct bnx2x *bp) { bnx2x_free_tx_skbs_cnic(bp); bnx2x_free_rx_skbs_cnic(bp); @@ -1596,36 +1698,16 @@ DP(BNX2X_MSG_SP, "about to request enable msix with %d vectors\n", msix_vec); - rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], msix_vec); - + rc = pci_enable_msix_range(bp->pdev, &bp->msix_table[0], + BNX2X_MIN_MSIX_VEC_CNT(bp), msix_vec); /* * reconfigure number of tx/rx queues according to available * MSI-X vectors */ - if (rc >= BNX2X_MIN_MSIX_VEC_CNT(bp)) { - /* how less vectors we will have? */ - int diff = msix_vec - rc; - - BNX2X_DEV_INFO("Trying to use less MSI-X vectors: %d\n", rc); - - rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], rc); - - if (rc) { - BNX2X_DEV_INFO("MSI-X is not attainable rc %d\n", rc); - goto no_msix; - } - /* - * decrease number of queues by number of unallocated entries - */ - bp->num_ethernet_queues -= diff; - bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues; - - BNX2X_DEV_INFO("New queue configuration set: %d\n", - bp->num_queues); - } else if (rc > 0) { + if (rc == -ENOSPC) { /* Get by with single vector */ - rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], 1); - if (rc) { + rc = pci_enable_msix_range(bp->pdev, &bp->msix_table[0], 1, 1); + if (rc < 0) { BNX2X_DEV_INFO("Single MSI-X is not attainable rc %d\n", rc); goto no_msix; @@ -1638,8 +1720,22 @@ bp->num_ethernet_queues = 1; bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues; } else if (rc < 0) { - BNX2X_DEV_INFO("MSI-X is not attainable rc %d\n", rc); + BNX2X_DEV_INFO("MSI-X is not attainable rc %d\n", rc); goto no_msix; + } else if (rc < msix_vec) { + /* how less vectors we will have? */ + int diff = msix_vec - rc; + + BNX2X_DEV_INFO("Trying to use less MSI-X vectors: %d\n", rc); + + /* + * decrease number of queues by number of unallocated entries + */ + bp->num_ethernet_queues -= diff; + bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues; + + BNX2X_DEV_INFO("New queue configuration set: %d\n", + bp->num_queues); } bp->flags |= USING_MSIX_FLAG; @@ -1739,7 +1835,7 @@ return request_irq(irq, bnx2x_interrupt, flags, bp->dev->name, bp->dev); } -int bnx2x_setup_irqs(struct bnx2x *bp) +static int bnx2x_setup_irqs(struct bnx2x *bp) { int rc = 0; if (bp->flags & USING_MSIX_FLAG && @@ -1772,32 +1868,42 @@ { int i; - for_each_rx_queue_cnic(bp, i) + for_each_rx_queue_cnic(bp, i) { + bnx2x_fp_busy_poll_init(&bp->fp[i]); napi_enable(&bnx2x_fp(bp, i, napi)); + } } static void bnx2x_napi_enable(struct bnx2x *bp) { int i; - for_each_eth_queue(bp, i) + for_each_eth_queue(bp, i) { + bnx2x_fp_busy_poll_init(&bp->fp[i]); napi_enable(&bnx2x_fp(bp, i, napi)); + } } static void bnx2x_napi_disable_cnic(struct bnx2x *bp) { int i; - for_each_rx_queue_cnic(bp, i) + for_each_rx_queue_cnic(bp, i) { napi_disable(&bnx2x_fp(bp, i, napi)); + while (!bnx2x_fp_ll_disable(&bp->fp[i])) + usleep_range(1000, 2000); + } } static void bnx2x_napi_disable(struct bnx2x *bp) { int i; - for_each_eth_queue(bp, i) + for_each_eth_queue(bp, i) { napi_disable(&bnx2x_fp(bp, i, napi)); + while (!bnx2x_fp_ll_disable(&bp->fp[i])) + usleep_range(1000, 2000); + } } void bnx2x_netif_start(struct bnx2x *bp) @@ -1820,7 +1926,8 @@ bnx2x_napi_disable_cnic(bp); } -u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb) +u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) { struct bnx2x *bp = netdev_priv(dev); @@ -1842,7 +1949,7 @@ } /* select a non-FCoE queue */ - return __skb_tx_hash(dev, skb, BNX2X_NUM_ETH_QUEUES(bp)); + return fallback(dev, skb) % BNX2X_NUM_ETH_QUEUES(bp); } void bnx2x_set_num_queues(struct bnx2x *bp) @@ -1851,7 +1958,7 @@ bp->num_ethernet_queues = bnx2x_calc_num_queues(bp); /* override in STORAGE SD modes */ - if (IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp)) + if (IS_MF_STORAGE_ONLY(bp)) bp->num_ethernet_queues = 1; /* Add special queues */ @@ -1875,7 +1982,7 @@ * * If the actual number of Tx queues (for each CoS) is less than 16 then there * will be the holes at the end of each group of 16 ETh L2 indices (0..15, - * 16..31,...) with indicies that are not coupled with any real Tx queue. + * 16..31,...) with indices that are not coupled with any real Tx queue. * * The proper configuration of skb->queue_mapping is handled by * bnx2x_select_queue() and __skb_tx_hash(). @@ -1937,7 +2044,7 @@ ETH_OVREHEAD + mtu + BNX2X_FW_RX_ALIGN_END; - /* Note : rx_buf_size doesnt take into account NET_SKB_PAD */ + /* Note : rx_buf_size doesn't take into account NET_SKB_PAD */ if (fp->rx_buf_size + NET_SKB_PAD <= PAGE_SIZE) fp->rx_frag_size = fp->rx_buf_size + NET_SKB_PAD; else @@ -1945,12 +2052,12 @@ } } -static int bnx2x_init_rss_pf(struct bnx2x *bp) +static int bnx2x_init_rss(struct bnx2x *bp) { int i; u8 num_eth_queues = BNX2X_NUM_ETH_QUEUES(bp); - /* Prepare the initial contents fo the indirection table if RSS is + /* Prepare the initial contents for the indirection table if RSS is * enabled */ for (i = 0; i < sizeof(bp->rss_conf_obj.ind_table); i++) @@ -1969,8 +2076,8 @@ return bnx2x_config_rss_eth(bp, bp->port.pmf || !CHIP_IS_E1x(bp)); } -int bnx2x_config_rss_pf(struct bnx2x *bp, struct bnx2x_rss_config_obj *rss_obj, - bool config_hash) +int bnx2x_rss(struct bnx2x *bp, struct bnx2x_rss_config_obj *rss_obj, + bool config_hash, bool enable) { struct bnx2x_config_rss_params params = {NULL}; @@ -1985,17 +2092,30 @@ __set_bit(RAMROD_COMP_WAIT, ¶ms.ramrod_flags); - __set_bit(BNX2X_RSS_MODE_REGULAR, ¶ms.rss_flags); + if (enable) { + __set_bit(BNX2X_RSS_MODE_REGULAR, ¶ms.rss_flags); + + /* RSS configuration */ + __set_bit(BNX2X_RSS_IPV4, ¶ms.rss_flags); + __set_bit(BNX2X_RSS_IPV4_TCP, ¶ms.rss_flags); + __set_bit(BNX2X_RSS_IPV6, ¶ms.rss_flags); + __set_bit(BNX2X_RSS_IPV6_TCP, ¶ms.rss_flags); + if (rss_obj->udp_rss_v4) + __set_bit(BNX2X_RSS_IPV4_UDP, ¶ms.rss_flags); + if (rss_obj->udp_rss_v6) + __set_bit(BNX2X_RSS_IPV6_UDP, ¶ms.rss_flags); + + if (!CHIP_IS_E1x(bp)) { + /* valid only for TUNN_MODE_VXLAN tunnel mode */ + __set_bit(BNX2X_RSS_IPV4_VXLAN, ¶ms.rss_flags); + __set_bit(BNX2X_RSS_IPV6_VXLAN, ¶ms.rss_flags); - /* RSS configuration */ - __set_bit(BNX2X_RSS_IPV4, ¶ms.rss_flags); - __set_bit(BNX2X_RSS_IPV4_TCP, ¶ms.rss_flags); - __set_bit(BNX2X_RSS_IPV6, ¶ms.rss_flags); - __set_bit(BNX2X_RSS_IPV6_TCP, ¶ms.rss_flags); - if (rss_obj->udp_rss_v4) - __set_bit(BNX2X_RSS_IPV4_UDP, ¶ms.rss_flags); - if (rss_obj->udp_rss_v6) - __set_bit(BNX2X_RSS_IPV6_UDP, ¶ms.rss_flags); + /* valid only for TUNN_MODE_GRE tunnel mode */ + __set_bit(BNX2X_RSS_TUNN_INNER_HDRS, ¶ms.rss_flags); + } + } else { + __set_bit(BNX2X_RSS_MODE_DISABLED, ¶ms.rss_flags); + } /* Hash bits */ params.rss_result_mask = MULTI_MASK; @@ -2004,11 +2124,14 @@ if (config_hash) { /* RSS keys */ - prandom_bytes(params.rss_key, sizeof(params.rss_key)); + netdev_rss_key_fill(params.rss_key, T_ETH_RSS_KEY * 4); __set_bit(BNX2X_RSS_SET_SRCH, ¶ms.rss_flags); } - return bnx2x_config_rss(bp, ¶ms); + if (IS_PF(bp)) + return bnx2x_config_rss(bp, ¶ms); + else + return bnx2x_vfpf_config_rss(bp, ¶ms); } static int bnx2x_init_hw(struct bnx2x *bp, u32 load_code) @@ -2028,7 +2151,7 @@ /* * Cleans the object that have internal lists without sending - * ramrods. Should be run when interrutps are disabled. + * ramrods. Should be run when interrupts are disabled. */ void bnx2x_squeeze_objects(struct bnx2x *bp) { @@ -2063,7 +2186,11 @@ rparam.mcast_obj = &bp->mcast_obj; __set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags); - /* Add a DEL command... */ + /* Add a DEL command... - Since we're doing a driver cleanup only, + * we take a lock surrounding both the initial send and the CONTs, + * as we don't want a true completion to disrupt us in the middle. + */ + netif_addr_lock_bh(bp->dev); rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL); if (rc < 0) BNX2X_ERR("Failed to add a new DEL command to a multi-cast object: %d\n", @@ -2075,11 +2202,13 @@ if (rc < 0) { BNX2X_ERR("Failed to clean multi-cast object: %d\n", rc); + netif_addr_unlock_bh(bp->dev); return; } rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT); } + netif_addr_unlock_bh(bp->dev); } #ifndef BNX2X_STOP_ON_ERROR @@ -2168,8 +2297,10 @@ sizeof(struct per_queue_stats) * num_queue_stats + sizeof(struct stats_counter); - BNX2X_PCI_ALLOC(bp->fw_stats, &bp->fw_stats_mapping, - bp->fw_stats_data_sz + bp->fw_stats_req_sz); + bp->fw_stats = BNX2X_PCI_ALLOC(&bp->fw_stats_mapping, + bp->fw_stats_data_sz + bp->fw_stats_req_sz); + if (!bp->fw_stats) + goto alloc_mem_err; /* Set shortcuts */ bp->fw_stats_req = (struct bnx2x_fw_stats_req *)bp->fw_stats; @@ -2179,10 +2310,10 @@ bp->fw_stats_data_mapping = bp->fw_stats_mapping + bp->fw_stats_req_sz; - DP(BNX2X_MSG_SP, "statistics request base address set to %x %x", + DP(BNX2X_MSG_SP, "statistics request base address set to %x %x\n", U64_HI(bp->fw_stats_req_mapping), U64_LO(bp->fw_stats_req_mapping)); - DP(BNX2X_MSG_SP, "statistics data base address set to %x %x", + DP(BNX2X_MSG_SP, "statistics data base address set to %x %x\n", U64_HI(bp->fw_stats_data_mapping), U64_LO(bp->fw_stats_data_mapping)); return 0; @@ -2196,6 +2327,8 @@ /* send load request to mcp and analyze response */ static int bnx2x_nic_load_request(struct bnx2x *bp, u32 *load_code) { + u32 param; + /* init fw_seq */ bp->fw_seq = (SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) & @@ -2208,9 +2341,13 @@ DRV_PULSE_SEQ_MASK); BNX2X_DEV_INFO("drv_pulse 0x%x\n", bp->fw_drv_pulse_wr_seq); + param = DRV_MSG_CODE_LOAD_REQ_WITH_LFA; + + if (IS_MF_SD(bp) && bnx2x_port_after_undi(bp)) + param |= DRV_MSG_CODE_LOAD_REQ_FORCE_LFA; + /* load request */ - (*load_code) = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, - DRV_MSG_CODE_LOAD_REQ_WITH_LFA); + (*load_code) = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, param); /* if mcp fails to respond we must abort */ if (!(*load_code)) { @@ -2232,7 +2369,7 @@ * virtualized environments a pf from another VM may have already * initialized the device including loading FW */ -int bnx2x_nic_load_analyze_req(struct bnx2x *bp, u32 load_code) +int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err) { /* is another pf loaded on this engine? */ if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP && @@ -2251,8 +2388,12 @@ /* abort nic load if version mismatch */ if (my_fw != loaded_fw) { - BNX2X_ERR("bnx2x with FW %x was already loaded which mismatches my %x FW. aborting\n", - loaded_fw, my_fw); + if (print_err) + BNX2X_ERR("bnx2x with FW %x was already loaded which mismatches my %x FW. Aborting\n", + loaded_fw, my_fw); + else + BNX2X_DEV_INFO("bnx2x with FW %x was already loaded which mismatches my %x FW, possibly due to MF UNDI\n", + loaded_fw, my_fw); return -EBUSY; } } @@ -2265,16 +2406,16 @@ int path = BP_PATH(bp); DP(NETIF_MSG_IFUP, "NO MCP - load counts[%d] %d, %d, %d\n", - path, load_count[path][0], load_count[path][1], - load_count[path][2]); - load_count[path][0]++; - load_count[path][1 + port]++; + path, bnx2x_load_count[path][0], bnx2x_load_count[path][1], + bnx2x_load_count[path][2]); + bnx2x_load_count[path][0]++; + bnx2x_load_count[path][1 + port]++; DP(NETIF_MSG_IFUP, "NO MCP - new load counts[%d] %d, %d, %d\n", - path, load_count[path][0], load_count[path][1], - load_count[path][2]); - if (load_count[path][0] == 1) + path, bnx2x_load_count[path][0], bnx2x_load_count[path][1], + bnx2x_load_count[path][2]); + if (bnx2x_load_count[path][0] == 1) return FW_MSG_CODE_DRV_LOAD_COMMON; - else if (load_count[path][1 + port] == 1) + else if (bnx2x_load_count[path][1 + port] == 1) return FW_MSG_CODE_DRV_LOAD_PORT; else return FW_MSG_CODE_DRV_LOAD_FUNCTION; @@ -2329,10 +2470,10 @@ static void bnx2x_bz_fp(struct bnx2x *bp, int index) { struct bnx2x_fastpath *fp = &bp->fp[index]; - int cos; struct napi_struct orig_napi = fp->napi; struct bnx2x_agg_info *orig_tpa_info = fp->tpa_info; + /* bzero bnx2x_fastpath contents */ if (fp->tpa_info) memset(fp->tpa_info, 0, ETH_MAX_AGGREGATION_QUEUES_E1H_E2 * @@ -2358,21 +2499,36 @@ fp->txdata_ptr[cos] = &bp->bnx2x_txq[cos * BNX2X_NUM_ETH_QUEUES(bp) + index]; - /* - * set the tpa flag for each queue. The tpa flag determines the queue + /* set the tpa flag for each queue. The tpa flag determines the queue * minimal size so it must be set prior to queue memory allocation */ - fp->disable_tpa = !(bp->flags & TPA_ENABLE_FLAG || - (bp->flags & GRO_ENABLE_FLAG && - bnx2x_mtu_allows_gro(bp->dev->mtu))); - if (bp->flags & TPA_ENABLE_FLAG) + if (bp->dev->features & NETIF_F_LRO) fp->mode = TPA_MODE_LRO; - else if (bp->flags & GRO_ENABLE_FLAG) + else if (bp->dev->features & NETIF_F_GRO && + bnx2x_mtu_allows_gro(bp->dev->mtu)) fp->mode = TPA_MODE_GRO; + else + fp->mode = TPA_MODE_DISABLED; - /* We don't want TPA on an FCoE L2 ring */ - if (IS_FCOE_FP(fp)) - fp->disable_tpa = 1; + /* We don't want TPA if it's disabled in bp + * or if this is an FCoE L2 ring. + */ + if (bp->disable_tpa || IS_FCOE_FP(fp)) + fp->mode = TPA_MODE_DISABLED; +} + +void bnx2x_set_os_driver_state(struct bnx2x *bp, u32 state) +{ + u32 cur; + + if (!IS_MF_BD(bp) || !SHMEM2_HAS(bp, os_driver_state) || IS_VF(bp)) + return; + + cur = SHMEM2_RD(bp, os_driver_state[BP_FW_MB_IDX(bp)]); + DP(NETIF_MSG_IFUP, "Driver state %08x-->%08x\n", + cur, state); + + SHMEM2_WR(bp, os_driver_state[BP_FW_MB_IDX(bp)], state); } int bnx2x_load_cnic(struct bnx2x *bp) @@ -2430,9 +2586,7 @@ } /* Initialize Rx filter. */ - netif_addr_lock_bh(bp->dev); - bnx2x_set_rx_mode(bp->dev); - netif_addr_unlock_bh(bp->dev); + bnx2x_set_rx_mode_inner(bp); /* re-read iscsi info */ bnx2x_get_iscsi_info(bp); @@ -2442,7 +2596,6 @@ if (bp->state == BNX2X_STATE_OPEN) bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD); - DP(NETIF_MSG_IFUP, "Ending successfully CNIC-related load\n"); return 0; @@ -2455,8 +2608,7 @@ load_error_cnic1: bnx2x_napi_disable_cnic(bp); /* Update the number of queues without the cnic queues */ - rc = bnx2x_set_real_num_queues(bp, 0); - if (rc) + if (bnx2x_set_real_num_queues(bp, 0)) BNX2X_ERR("Unable to set real_num_queues not including cnic\n"); load_error_cnic0: BNX2X_ERR("CNIC-related load failed\n"); @@ -2485,6 +2637,7 @@ bp->state = BNX2X_STATE_OPENING_WAIT4_LOAD; + /* zero the structure w/o any lock, before SP handler is initialized */ memset(&bp->last_reported_link, 0, sizeof(bp->last_reported_link)); __set_bit(BNX2X_LINK_REPORT_LINK_DOWN, &bp->last_reported_link.link_report_flags); @@ -2496,7 +2649,7 @@ /* * Zero fastpath structures preserving invariants like napi, which are * allocated only once, fp index, max_cos, bp pointer. - * Also set fp->disable_tpa and txdata_ptr. + * Also set fp->mode and txdata_ptr. */ DP(NETIF_MSG_IFUP, "num queues: %d", bp->num_queues); for_each_queue(bp, i) @@ -2518,10 +2671,6 @@ } } - /* Allocated memory for FW statistics */ - if (bnx2x_alloc_fw_stats_mem(bp)) - LOAD_ERROR_EXIT(bp, load_error0); - /* need to be done after alloc mem, since it's self adjusting to amount * of memory available for RSS queues */ @@ -2531,6 +2680,10 @@ LOAD_ERROR_EXIT(bp, load_error0); } + /* Allocated memory for FW statistics */ + if (bnx2x_alloc_fw_stats_mem(bp)) + LOAD_ERROR_EXIT(bp, load_error0); + /* request pf to initialize status blocks */ if (IS_VF(bp)) { rc = bnx2x_vfpf_init(bp); @@ -2549,8 +2702,8 @@ } /* configure multi cos mappings in kernel. - * this configuration may be overriden by a multi class queue discipline - * or by a dcbx negotiation result. + * this configuration may be overridden by a multi class queue + * discipline or by a dcbx negotiation result. */ bnx2x_setup_tc(bp->dev, bp->max_cos); @@ -2571,7 +2724,7 @@ LOAD_ERROR_EXIT(bp, load_error1); /* what did mcp say? */ - rc = bnx2x_nic_load_analyze_req(bp, load_code); + rc = bnx2x_compare_fw_ver(bp, load_code, true); if (rc) { bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0); LOAD_ERROR_EXIT(bp, load_error2); @@ -2639,38 +2792,32 @@ /* initialize FW coalescing state machines in RAM */ bnx2x_update_coalesce(bp); + } - /* setup the leading queue */ - rc = bnx2x_setup_leading(bp); - if (rc) { - BNX2X_ERR("Setup leading failed!\n"); - LOAD_ERROR_EXIT(bp, load_error3); - } - - /* set up the rest of the queues */ - for_each_nondefault_eth_queue(bp, i) { - rc = bnx2x_setup_queue(bp, &bp->fp[i], 0); - if (rc) { - BNX2X_ERR("Queue setup failed\n"); - LOAD_ERROR_EXIT(bp, load_error3); - } - } + /* setup the leading queue */ + rc = bnx2x_setup_leading(bp); + if (rc) { + BNX2X_ERR("Setup leading failed!\n"); + LOAD_ERROR_EXIT(bp, load_error3); + } - /* setup rss */ - rc = bnx2x_init_rss_pf(bp); + /* set up the rest of the queues */ + for_each_nondefault_eth_queue(bp, i) { + if (IS_PF(bp)) + rc = bnx2x_setup_queue(bp, &bp->fp[i], false); + else /* VF */ + rc = bnx2x_vfpf_setup_q(bp, &bp->fp[i], false); if (rc) { - BNX2X_ERR("PF RSS init failed\n"); + BNX2X_ERR("Queue %d setup failed\n", i); LOAD_ERROR_EXIT(bp, load_error3); } + } - } else { /* vf */ - for_each_eth_queue(bp, i) { - rc = bnx2x_vfpf_setup_q(bp, i); - if (rc) { - BNX2X_ERR("Queue setup failed\n"); - LOAD_ERROR_EXIT(bp, load_error3); - } - } + /* setup rss */ + rc = bnx2x_init_rss(bp); + if (rc) { + BNX2X_ERR("PF RSS init failed\n"); + LOAD_ERROR_EXIT(bp, load_error3); } /* Now when Clients are configured we are ready to work */ @@ -2701,21 +2848,28 @@ /* Start fast path */ + /* Re-configure vlan filters */ + rc = bnx2x_vlan_reconfigure_vid(bp); + if (rc) + LOAD_ERROR_EXIT(bp, load_error3); + /* Initialize Rx filter. */ - netif_addr_lock_bh(bp->dev); - bnx2x_set_rx_mode(bp->dev); - netif_addr_unlock_bh(bp->dev); + bnx2x_set_rx_mode_inner(bp); - /* Start the Tx */ + if (bp->flags & PTP_SUPPORTED) { + bnx2x_init_ptp(bp); + bnx2x_configure_ptp_filters(bp); + } + /* Start Tx */ switch (load_mode) { case LOAD_NORMAL: - /* Tx queue should be only reenabled */ + /* Tx queue should be only re-enabled */ netif_tx_wake_all_queues(bp->dev); break; case LOAD_OPEN: netif_tx_start_all_queues(bp->dev); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); break; case LOAD_DIAG: @@ -2738,10 +2892,15 @@ if (CNIC_ENABLED(bp)) bnx2x_load_cnic(bp); + if (IS_PF(bp)) + bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_GET_DRV_VERSION, 0); + if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) { /* mark driver is loaded in shmem2 */ u32 val; val = SHMEM2_RD(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)]); + val &= ~DRV_FLAGS_MTU_MASK; + val |= (bp->dev->mtu << DRV_FLAGS_MTU_SHIFT); SHMEM2_WR(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)], val | DRV_FLAGS_CAPABILITIES_LOADED_SUPPORTED | DRV_FLAGS_CAPABILITIES_LOADED_L2); @@ -2754,10 +2913,17 @@ return -EBUSY; } + /* Update driver data for On-Chip MFW dump. */ + if (IS_PF(bp)) + bnx2x_update_mfw_dump(bp); + /* If PMF - send ADMIN DCBX msg to MFW to initiate DCBX FSM */ if (bp->port.pmf && (bp->state != BNX2X_STATE_DIAG)) bnx2x_dcbx_init(bp, false); + if (!IS_MF_SD_STORAGE_PERSONALITY_ONLY(bp)) + bnx2x_set_os_driver_state(bp, OS_DRIVER_STATE_ACTIVE); + DP(NETIF_MSG_IFUP, "Ending successfully NIC load\n"); return 0; @@ -2793,8 +2959,8 @@ if (IS_PF(bp)) bnx2x_clear_pf_load(bp); load_error0: - bnx2x_free_fp_mem(bp); bnx2x_free_fw_stats_mem(bp); + bnx2x_free_fp_mem(bp); bnx2x_free_mem(bp); return rc; @@ -2825,6 +2991,9 @@ DP(NETIF_MSG_IFUP, "Starting NIC unload\n"); + if (!IS_MF_SD_STORAGE_PERSONALITY_ONLY(bp)) + bnx2x_set_os_driver_state(bp, OS_DRIVER_STATE_DISABLED); + /* mark driver is unloaded in shmem2 */ if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) { u32 val; @@ -2854,7 +3023,7 @@ } /* Nothing to do during unload if previous bnx2x_nic_load() - * have not completed succesfully - all resourses are released. + * have not completed successfully - all resources are released. * * we can get here only after unsuccessful ndo_* callback, during which * dev->IFF_UP flag is still on. @@ -2869,6 +3038,9 @@ bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT; smp_mb(); + /* indicate to VFs that the PF is going down */ + bnx2x_iov_channel_down(bp); + if (CNIC_LOADED(bp)) bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD); @@ -2903,10 +3075,9 @@ /* Send the UNLOAD_REQUEST to the MCP */ bnx2x_send_unload_req(bp, unload_mode); - /* - * Prevent transactions to host from the functions on the + /* Prevent transactions to host from the functions on the * engine that doesn't reset global blocks in case of global - * attention once gloabl blocks are reset and gates are opened + * attention once global blocks are reset and gates are opened * (the engine which leader will perform the recovery * last). */ @@ -2927,7 +3098,7 @@ } /* - * At this stage no more interrupts will arrive so we may safly clean + * At this stage no more interrupts will arrive so we may safely clean * the queueable objects here in case they failed to get cleaned so far. */ if (IS_PF(bp)) @@ -2938,6 +3109,10 @@ bp->port.pmf = 0; + /* clear pending work in rtnl task */ + bp->sp_rtnl_state = 0; + smp_mb(); + /* Free SKBs, SGEs, TPA pool and driver internals */ bnx2x_free_skbs(bp); if (CNIC_LOADED(bp)) @@ -2952,11 +3127,16 @@ if (IS_PF(bp)) { if (CNIC_LOADED(bp)) bnx2x_free_mem_cnic(bp); - bnx2x_free_mem(bp); } + bnx2x_free_mem(bp); + bp->state = BNX2X_STATE_CLOSED; bp->cnic_loaded = false; + /* Clear driver version indication in shmem */ + if (IS_PF(bp)) + bnx2x_update_mng_version(bp); + /* Check if there are pending parity attentions. If there are - set * RECOVERY_IN_PROGRESS. */ @@ -2968,7 +3148,6 @@ bnx2x_set_reset_global(bp); } - /* The last driver must disable a "close the gate" if there is no * parity attention or "process kill" pending. */ @@ -2987,16 +3166,16 @@ u16 pmcsr; /* If there is no power capability, silently succeed */ - if (!bp->pm_cap) { + if (!bp->pdev->pm_cap) { BNX2X_DEV_INFO("No power capability. Breaking.\n"); return 0; } - pci_read_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, &pmcsr); + pci_read_config_word(bp->pdev, bp->pdev->pm_cap + PCI_PM_CTRL, &pmcsr); switch (state) { case PCI_D0: - pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, + pci_write_config_word(bp->pdev, bp->pdev->pm_cap + PCI_PM_CTRL, ((pmcsr & ~PCI_PM_CTRL_STATE_MASK) | PCI_PM_CTRL_PME_STATUS)); @@ -3020,7 +3199,7 @@ if (bp->wol) pmcsr |= PCI_PM_CTRL_PME_ENABLE; - pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, + pci_write_config_word(bp->pdev, bp->pdev->pm_cap + PCI_PM_CTRL, pmcsr); /* No more memory access after this point until @@ -3038,7 +3217,7 @@ /* * net_device service functions */ -int bnx2x_poll(struct napi_struct *napi, int budget) +static int bnx2x_poll(struct napi_struct *napi, int budget) { int work_done = 0; u8 cos; @@ -3053,6 +3232,8 @@ return 0; } #endif + if (!bnx2x_fp_lock_napi(fp)) + return budget; for_each_cos_in_tx_queue(fp, cos) if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos])) @@ -3062,10 +3243,14 @@ work_done += bnx2x_rx_int(fp, budget - work_done); /* must not complete if we consumed full budget */ - if (work_done >= budget) + if (work_done >= budget) { + bnx2x_fp_unlock_napi(fp); break; + } } + bnx2x_fp_unlock_napi(fp); + /* Fall out from the NAPI loop if needed */ if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { @@ -3109,6 +3294,32 @@ return work_done; } +#ifdef CONFIG_NET_RX_BUSY_POLL +/* must be called with local_bh_disable()d */ +int bnx2x_low_latency_recv(struct napi_struct *napi) +{ + struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath, + napi); + struct bnx2x *bp = fp->bp; + int found = 0; + + if ((bp->state == BNX2X_STATE_CLOSED) || + (bp->state == BNX2X_STATE_ERROR) || + (bp->dev->features & (NETIF_F_LRO | NETIF_F_GRO))) + return LL_FLUSH_FAILED; + + if (!bnx2x_fp_lock_poll(fp)) + return LL_FLUSH_BUSY; + + if (bnx2x_has_rx_work(fp)) + found = bnx2x_rx_int(fp, 4); + + bnx2x_fp_unlock_poll(fp); + + return found; +} +#endif + /* we split the first BD into headers and data BDs * to ease the pain of our fellow microcode engineers * we use one mapping for both BDs @@ -3204,38 +3415,44 @@ if (prot == IPPROTO_TCP) rc |= XMIT_CSUM_TCP; - if (skb_is_gso_v6(skb)) { - rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP); - if (rc & XMIT_CSUM_ENC) - rc |= XMIT_GSO_ENC_V6; - } else if (skb_is_gso(skb)) { - rc |= (XMIT_GSO_V4 | XMIT_CSUM_TCP); - if (rc & XMIT_CSUM_ENC) - rc |= XMIT_GSO_ENC_V4; + if (skb_is_gso(skb)) { + if (skb_is_gso_v6(skb)) { + rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP); + if (rc & XMIT_CSUM_ENC) + rc |= XMIT_GSO_ENC_V6; + } else { + rc |= (XMIT_GSO_V4 | XMIT_CSUM_TCP); + if (rc & XMIT_CSUM_ENC) + rc |= XMIT_GSO_ENC_V4; + } } return rc; } -#if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3) +/* VXLAN: 4 = 1 (for linear data BD) + 3 (2 for PBD and last BD) */ +#define BNX2X_NUM_VXLAN_TSO_WIN_SUB_BDS 4 + +/* Regular: 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */ +#define BNX2X_NUM_TSO_WIN_SUB_BDS 3 + +#if (MAX_SKB_FRAGS >= MAX_FETCH_BD - BDS_PER_TX_PKT) /* check if packet requires linearization (packet is too fragmented) no need to check fragmentation if page size > 8K (there will be no violation to FW restrictions) */ static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb, u32 xmit_type) { - int to_copy = 0; - int hlen = 0; - int first_bd_sz = 0; + int first_bd_sz = 0, num_tso_win_sub = BNX2X_NUM_TSO_WIN_SUB_BDS; + int to_copy = 0, hlen = 0; - /* 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */ - if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - 3)) { + if (xmit_type & XMIT_GSO_ENC) + num_tso_win_sub = BNX2X_NUM_VXLAN_TSO_WIN_SUB_BDS; + if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - num_tso_win_sub)) { if (xmit_type & XMIT_GSO) { unsigned short lso_mss = skb_shinfo(skb)->gso_size; - /* Check if LSO packet needs to be copied: - 3 = 1 (for headers BD) + 2 (for PBD and last BD) */ - int wnd_size = MAX_FETCH_BD - 3; + int wnd_size = MAX_FETCH_BD - num_tso_win_sub; /* Number of windows to check */ int num_wnds = skb_shinfo(skb)->nr_frags - wnd_size; int wnd_idx = 0; @@ -3243,8 +3460,13 @@ u32 wnd_sum = 0; /* Headers length */ - hlen = (int)(skb_transport_header(skb) - skb->data) + - tcp_hdrlen(skb); + if (xmit_type & XMIT_GSO_ENC) + hlen = (int)(skb_inner_transport_header(skb) - + skb->data) + + inner_tcp_hdrlen(skb); + else + hlen = (int)(skb_transport_header(skb) - + skb->data) + tcp_hdrlen(skb); /* Amount of data (w/o headers) on linear part of SKB*/ first_bd_sz = skb_headlen(skb) - hlen; @@ -3297,26 +3519,6 @@ } #endif -static void bnx2x_set_pbd_gso_e2(struct sk_buff *skb, u32 *parsing_data, - u32 xmit_type) -{ - struct ipv6hdr *ipv6; - - *parsing_data |= (skb_shinfo(skb)->gso_size << - ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) & - ETH_TX_PARSE_BD_E2_LSO_MSS; - - if (xmit_type & XMIT_GSO_ENC_V6) - ipv6 = inner_ipv6_hdr(skb); - else if (xmit_type & XMIT_GSO_V6) - ipv6 = ipv6_hdr(skb); - else - ipv6 = NULL; - - if (ipv6 && ipv6->nexthdr == NEXTHDR_IPV6) - *parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR; -} - /** * bnx2x_set_pbd_gso - update PBD in GSO case. * @@ -3326,7 +3528,6 @@ */ static void bnx2x_set_pbd_gso(struct sk_buff *skb, struct eth_tx_parse_bd_e1x *pbd, - struct eth_tx_start_bd *tx_start_bd, u32 xmit_type) { pbd->lso_mss = cpu_to_le16(skb_shinfo(skb)->gso_size); @@ -3339,9 +3540,6 @@ bswab16(~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0)); - - /* GSO on 57710/57711 needs FW to calculate IP checksum */ - tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IP_CSUM; } else { pbd->tcp_pseudo_csum = bswab16(~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, @@ -3509,19 +3707,27 @@ /* outer IP header info */ if (xmit_type & XMIT_CSUM_V4) { struct iphdr *iph = ip_hdr(skb); + u32 csum = (__force u32)(~iph->check) - + (__force u32)iph->tot_len - + (__force u32)iph->frag_off; + + outerip_len = iph->ihl << 1; + pbd2->fw_ip_csum_wo_len_flags_frag = - bswab16(csum_fold((~iph->check) - - iph->tot_len - iph->frag_off)); + bswab16(csum_fold((__force __wsum)csum)); } else { pbd2->fw_ip_hdr_to_payload_w = hlen_w - ((sizeof(struct ipv6hdr)) >> 1); + pbd_e2->data.tunnel_data.flags |= + ETH_TUNNEL_DATA_IPV6_OUTER; } pbd2->tcp_send_seq = bswab32(inner_tcp_hdr(skb)->seq); pbd2->tcp_flags = pbd_tcp_flags(inner_tcp_hdr(skb)); - if (xmit_type & XMIT_GSO_V4) { + /* inner IP header info */ + if (xmit_type & XMIT_CSUM_ENC_V4) { pbd2->hw_ip_id = bswab16(inner_ip_hdr(skb)->id); pbd_e2->data.tunnel_data.pseudo_csum = @@ -3529,8 +3735,6 @@ inner_ip_hdr(skb)->saddr, inner_ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0)); - - outerip_len = ip_hdr(skb)->ihl << 1; } else { pbd_e2->data.tunnel_data.pseudo_csum = bswab16(~csum_ipv6_magic( @@ -3543,8 +3747,6 @@ *global_data |= outerip_off | - (!!(xmit_type & XMIT_CSUM_V6) << - ETH_TX_PARSE_2ND_BD_IP_HDR_TYPE_OUTER_SHIFT) | (outerip_len << ETH_TX_PARSE_2ND_BD_IP_HDR_LEN_OUTER_W_SHIFT) | ((skb->protocol == cpu_to_be16(ETH_P_8021Q)) << @@ -3556,6 +3758,23 @@ } } +static inline void bnx2x_set_ipv6_ext_e2(struct sk_buff *skb, u32 *parsing_data, + u32 xmit_type) +{ + struct ipv6hdr *ipv6; + + if (!(xmit_type & (XMIT_GSO_ENC_V6 | XMIT_GSO_V6))) + return; + + if (xmit_type & XMIT_GSO_ENC_V6) + ipv6 = inner_ipv6_hdr(skb); + else /* XMIT_GSO_V6 */ + ipv6 = ipv6_hdr(skb); + + if (ipv6->nexthdr == NEXTHDR_IPV6) + *parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR; +} + /* called with netif_tx_lock * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call * netif_wake_queue() @@ -3599,7 +3818,7 @@ DP(NETIF_MSG_TX_QUEUED, "indices: txq %d, fp %d, txdata %d\n", txq_index, fp_index, txdata_index); */ - /* enable this debug print to view the tranmission details + /* enable this debug print to view the transmission details DP(NETIF_MSG_TX_QUEUED, "transmitting packet cid %d fp index %d txdata_index %d tx_data ptr %p fp pointer %p\n", txdata->cid, fp_index, txdata_index, txdata, fp); */ @@ -3688,6 +3907,20 @@ tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + if (!(bp->flags & TX_TIMESTAMPING_EN)) { + BNX2X_ERR("Tx timestamping was not enabled, this packet will not be timestamped\n"); + } else if (bp->ptp_tx_skb) { + BNX2X_ERR("The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n"); + } else { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + /* schedule check for Tx timestamp */ + bp->ptp_tx_skb = skb_get(skb); + bp->ptp_tx_start = jiffies; + schedule_work(&bp->ptp_task); + } + } + /* header nbd: indirectly zero other flags! */ tx_start_bd->general_data = 1 << ETH_TX_START_BD_HDR_NBDS_SHIFT; @@ -3700,21 +3933,25 @@ "sending pkt %u @%p next_idx %u bd %u @%p\n", pkt_prod, tx_buf, txdata->tx_pkt_prod, bd_prod, tx_start_bd); - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { tx_start_bd->vlan_or_ethertype = - cpu_to_le16(vlan_tx_tag_get(skb)); + cpu_to_le16(skb_vlan_tag_get(skb)); tx_start_bd->bd_flags.as_bitfield |= (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT); } else { /* when transmitting in a vf, start bd must hold the ethertype * for fw to enforce it */ +#ifndef BNX2X_STOP_ON_ERROR if (IS_VF(bp)) +#endif tx_start_bd->vlan_or_ethertype = cpu_to_le16(ntohs(eth->h_proto)); +#ifndef BNX2X_STOP_ON_ERROR else /* used by FW for packet accounting */ tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod); +#endif } nbd = 2; /* start_bd + pbd + frags (updated when pages are mapped) */ @@ -3772,7 +4009,10 @@ xmit_type); } - /* Add the macs to the parsing BD this is a vf */ + bnx2x_set_ipv6_ext_e2(skb, &pbd_e2_parsing_data, xmit_type); + /* Add the macs to the parsing BD if this is a vf or if + * Tx Switching is enabled. + */ if (IS_VF(bp)) { /* override GRE parameters in BD */ bnx2x_set_fw_mac_addr(&pbd_e2->data.mac_addr.src_hi, @@ -3784,6 +4024,22 @@ &pbd_e2->data.mac_addr.dst_mid, &pbd_e2->data.mac_addr.dst_lo, eth->h_dest); + } else { + if (bp->flags & TX_SWITCHING) + bnx2x_set_fw_mac_addr( + &pbd_e2->data.mac_addr.dst_hi, + &pbd_e2->data.mac_addr.dst_mid, + &pbd_e2->data.mac_addr.dst_lo, + eth->h_dest); +#ifdef BNX2X_STOP_ON_ERROR + /* Enforce security is always set in Stop on Error - + * source mac should be present in the parsing BD + */ + bnx2x_set_fw_mac_addr(&pbd_e2->data.mac_addr.src_hi, + &pbd_e2->data.mac_addr.src_mid, + &pbd_e2->data.mac_addr.src_lo, + eth->h_source); +#endif } SET_FLAG(pbd_e2_parsing_data, @@ -3830,10 +4086,12 @@ bd_prod); } if (!CHIP_IS_E1x(bp)) - bnx2x_set_pbd_gso_e2(skb, &pbd_e2_parsing_data, - xmit_type); + pbd_e2_parsing_data |= + (skb_shinfo(skb)->gso_size << + ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) & + ETH_TX_PARSE_BD_E2_LSO_MSS; else - bnx2x_set_pbd_gso(skb, pbd_e1x, first_bd, xmit_type); + bnx2x_set_pbd_gso(skb, pbd_e1x, xmit_type); } /* Set the PBD's parsing_data field if not zero @@ -3968,6 +4226,41 @@ return NETDEV_TX_OK; } +void bnx2x_get_c2s_mapping(struct bnx2x *bp, u8 *c2s_map, u8 *c2s_default) +{ + int mfw_vn = BP_FW_MB_IDX(bp); + u32 tmp; + + /* If the shmem shouldn't affect configuration, reflect */ + if (!IS_MF_BD(bp)) { + int i; + + for (i = 0; i < BNX2X_MAX_PRIORITY; i++) + c2s_map[i] = i; + *c2s_default = 0; + + return; + } + + tmp = SHMEM2_RD(bp, c2s_pcp_map_lower[mfw_vn]); + tmp = (__force u32)be32_to_cpu((__force __be32)tmp); + c2s_map[0] = tmp & 0xff; + c2s_map[1] = (tmp >> 8) & 0xff; + c2s_map[2] = (tmp >> 16) & 0xff; + c2s_map[3] = (tmp >> 24) & 0xff; + + tmp = SHMEM2_RD(bp, c2s_pcp_map_upper[mfw_vn]); + tmp = (__force u32)be32_to_cpu((__force __be32)tmp); + c2s_map[4] = tmp & 0xff; + c2s_map[5] = (tmp >> 8) & 0xff; + c2s_map[6] = (tmp >> 16) & 0xff; + c2s_map[7] = (tmp >> 24) & 0xff; + + tmp = SHMEM2_RD(bp, c2s_pcp_map_default[mfw_vn]); + tmp = (__force u32)be32_to_cpu((__force __be32)tmp); + *c2s_default = (tmp >> (8 * mfw_vn)) & 0xff; +} + /** * bnx2x_setup_tc - routine to configure net_device for multi tc * @@ -3978,13 +4271,14 @@ */ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc) { - int cos, prio, count, offset; struct bnx2x *bp = netdev_priv(dev); + u8 c2s_map[BNX2X_MAX_PRIORITY], c2s_def; + int cos, prio, count, offset; /* setup tc must be called under rtnl lock */ ASSERT_RTNL(); - /* no traffic classes requested. aborting */ + /* no traffic classes requested. Aborting */ if (!num_tc) { netdev_reset_tc(dev); return 0; @@ -3992,7 +4286,7 @@ /* requested to support too many traffic classes */ if (num_tc > bp->max_cos) { - BNX2X_ERR("support for too many traffic classes requested: %d. max supported is %d\n", + BNX2X_ERR("support for too many traffic classes requested: %d. Max supported is %d\n", num_tc, bp->max_cos); return -EINVAL; } @@ -4003,16 +4297,19 @@ return -EINVAL; } + bnx2x_get_c2s_mapping(bp, c2s_map, &c2s_def); + /* configure priority to traffic class mapping */ for (prio = 0; prio < BNX2X_MAX_PRIORITY; prio++) { - netdev_set_prio_tc_map(dev, prio, bp->prio_to_cos[prio]); + int outer_prio = c2s_map[prio]; + + netdev_set_prio_tc_map(dev, prio, bp->prio_to_cos[outer_prio]); DP(BNX2X_MSG_SP | NETIF_MSG_IFUP, "mapping priority %d to tc %d\n", - prio, bp->prio_to_cos[prio]); + outer_prio, bp->prio_to_cos[outer_prio]); } - - /* Use this configuration to diffrentiate tc0 from other COSes + /* Use this configuration to differentiate tc0 from other COSes This can be used for ets or pfc, and save the effort of setting up a multio class queue disc or negotiating DCBX with a switch netdev_set_prio_tc_map(dev, 0, 0); @@ -4042,14 +4339,13 @@ struct bnx2x *bp = netdev_priv(dev); int rc = 0; - if (!bnx2x_is_valid_ether_addr(bp, addr->sa_data)) { + if (!is_valid_ether_addr(addr->sa_data)) { BNX2X_ERR("Requested MAC address is not valid\n"); return -EINVAL; } - if ((IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp)) && - !is_zero_ether_addr(addr->sa_data)) { - BNX2X_ERR("Can't configure non-zero address on iSCSI or FCoE functions in MF-SD mode\n"); + if (IS_MF_STORAGE_ONLY(bp)) { + BNX2X_ERR("Can't change address on STORAGE ONLY function\n"); return -EINVAL; } @@ -4064,6 +4360,9 @@ if (netif_running(dev)) rc = bnx2x_set_eth_mac(bp, true); + if (IS_PF(bp) && SHMEM2_HAS(bp, curr_cfg)) + SHMEM2_WR(bp, curr_cfg, CURR_CFG_MET_OS); + return rc; } @@ -4133,7 +4432,7 @@ /* end of fastpath */ } -void bnx2x_free_fp_mem_cnic(struct bnx2x *bp) +static void bnx2x_free_fp_mem_cnic(struct bnx2x *bp) { int i; for_each_cnic_queue(bp, i) @@ -4178,7 +4477,7 @@ * fp->eth_q_stats.rx_skb_alloc_failed = 0 */ for (i = 0; i < rx_ring_size; i++) { - if (bnx2x_alloc_rx_data(bp, fp, ring_prod) < 0) { + if (bnx2x_alloc_rx_data(bp, fp, ring_prod, GFP_KERNEL) < 0) { failure_cnt++; continue; } @@ -4228,8 +4527,7 @@ u8 cos; int rx_ring_size = 0; - if (!bp->rx_ring_size && - (IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp))) { + if (!bp->rx_ring_size && IS_MF_STORAGE_ONLY(bp)) { rx_ring_size = MIN_RX_SIZE_NONTPA; bp->rx_ring_size = rx_ring_size; } else if (!bp->rx_ring_size) { @@ -4261,14 +4559,17 @@ if (!IS_FCOE_IDX(index)) { /* status blocks */ - if (!CHIP_IS_E1x(bp)) - BNX2X_PCI_ALLOC(sb->e2_sb, - &bnx2x_fp(bp, index, status_blk_mapping), - sizeof(struct host_hc_status_block_e2)); - else - BNX2X_PCI_ALLOC(sb->e1x_sb, - &bnx2x_fp(bp, index, status_blk_mapping), - sizeof(struct host_hc_status_block_e1x)); + if (!CHIP_IS_E1x(bp)) { + sb->e2_sb = BNX2X_PCI_ALLOC(&bnx2x_fp(bp, index, status_blk_mapping), + sizeof(struct host_hc_status_block_e2)); + if (!sb->e2_sb) + goto alloc_mem_err; + } else { + sb->e1x_sb = BNX2X_PCI_ALLOC(&bnx2x_fp(bp, index, status_blk_mapping), + sizeof(struct host_hc_status_block_e1x)); + if (!sb->e1x_sb) + goto alloc_mem_err; + } } /* FCoE Queue uses Default SB and doesn't ACK the SB, thus no need to @@ -4287,34 +4588,49 @@ "allocating tx memory of fp %d cos %d\n", index, cos); - BNX2X_ALLOC(txdata->tx_buf_ring, - sizeof(struct sw_tx_bd) * NUM_TX_BD); - BNX2X_PCI_ALLOC(txdata->tx_desc_ring, - &txdata->tx_desc_mapping, - sizeof(union eth_tx_bd_types) * NUM_TX_BD); + txdata->tx_buf_ring = kcalloc(NUM_TX_BD, + sizeof(struct sw_tx_bd), + GFP_KERNEL); + if (!txdata->tx_buf_ring) + goto alloc_mem_err; + txdata->tx_desc_ring = BNX2X_PCI_ALLOC(&txdata->tx_desc_mapping, + sizeof(union eth_tx_bd_types) * NUM_TX_BD); + if (!txdata->tx_desc_ring) + goto alloc_mem_err; } } /* Rx */ if (!skip_rx_queue(bp, index)) { /* fastpath rx rings: rx_buf rx_desc rx_comp */ - BNX2X_ALLOC(bnx2x_fp(bp, index, rx_buf_ring), - sizeof(struct sw_rx_bd) * NUM_RX_BD); - BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_desc_ring), - &bnx2x_fp(bp, index, rx_desc_mapping), - sizeof(struct eth_rx_bd) * NUM_RX_BD); - - BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_comp_ring), - &bnx2x_fp(bp, index, rx_comp_mapping), - sizeof(struct eth_fast_path_rx_cqe) * - NUM_RCQ_BD); + bnx2x_fp(bp, index, rx_buf_ring) = + kcalloc(NUM_RX_BD, sizeof(struct sw_rx_bd), GFP_KERNEL); + if (!bnx2x_fp(bp, index, rx_buf_ring)) + goto alloc_mem_err; + bnx2x_fp(bp, index, rx_desc_ring) = + BNX2X_PCI_ALLOC(&bnx2x_fp(bp, index, rx_desc_mapping), + sizeof(struct eth_rx_bd) * NUM_RX_BD); + if (!bnx2x_fp(bp, index, rx_desc_ring)) + goto alloc_mem_err; + + /* Seed all CQEs by 1s */ + bnx2x_fp(bp, index, rx_comp_ring) = + BNX2X_PCI_FALLOC(&bnx2x_fp(bp, index, rx_comp_mapping), + sizeof(struct eth_fast_path_rx_cqe) * NUM_RCQ_BD); + if (!bnx2x_fp(bp, index, rx_comp_ring)) + goto alloc_mem_err; /* SGE ring */ - BNX2X_ALLOC(bnx2x_fp(bp, index, rx_page_ring), - sizeof(struct sw_rx_page) * NUM_RX_SGE); - BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_sge_ring), - &bnx2x_fp(bp, index, rx_sge_mapping), - BCM_PAGE_SIZE * NUM_RX_SGE_PAGES); + bnx2x_fp(bp, index, rx_page_ring) = + kcalloc(NUM_RX_SGE, sizeof(struct sw_rx_page), + GFP_KERNEL); + if (!bnx2x_fp(bp, index, rx_page_ring)) + goto alloc_mem_err; + bnx2x_fp(bp, index, rx_sge_ring) = + BNX2X_PCI_ALLOC(&bnx2x_fp(bp, index, rx_sge_mapping), + BCM_PAGE_SIZE * NUM_RX_SGE_PAGES); + if (!bnx2x_fp(bp, index, rx_sge_ring)) + goto alloc_mem_err; /* RX BD ring */ bnx2x_set_next_page_rx_bd(fp); @@ -4337,7 +4653,7 @@ * In these cases we disable the queue * Min size is different for OOO, TPA and non-TPA queues */ - if (ring_size < (fp->disable_tpa ? + if (ring_size < (fp->mode == TPA_MODE_DISABLED ? MIN_RX_SIZE_NONTPA : MIN_RX_SIZE_TPA)) { /* release memory allocated for this queue */ bnx2x_free_fp_mem_at(bp, index); @@ -4346,7 +4662,7 @@ return 0; } -int bnx2x_alloc_fp_mem_cnic(struct bnx2x *bp) +static int bnx2x_alloc_fp_mem_cnic(struct bnx2x *bp) { if (!NO_FCOE(bp)) /* FCoE */ @@ -4359,7 +4675,7 @@ return 0; } -int bnx2x_alloc_fp_mem(struct bnx2x *bp) +static int bnx2x_alloc_fp_mem(struct bnx2x *bp) { int i; @@ -4488,7 +4804,6 @@ alloc_err: bnx2x_free_mem_bp(bp); return -ENOMEM; - } int bnx2x_reload_if_running(struct net_device *dev) @@ -4530,7 +4845,6 @@ } return sel_phy_idx; - } int bnx2x_get_link_cfg_idx(struct bnx2x *bp) { @@ -4580,6 +4894,11 @@ { struct bnx2x *bp = netdev_priv(dev); + if (pci_num_vf(bp->pdev)) { + DP(BNX2X_MSG_IOV, "VFs are enabled, can not change MTU\n"); + return -EPERM; + } + if (bp->recovery_state != BNX2X_RECOVERY_DONE) { BNX2X_ERR("Can't perform change MTU during parity recovery\n"); return -EAGAIN; @@ -4597,6 +4916,9 @@ */ dev->mtu = new_mtu; + if (IS_PF(bp) && SHMEM2_HAS(bp, curr_cfg)) + SHMEM2_WR(bp, curr_cfg, CURR_CFG_MET_OS); + return bnx2x_reload_if_running(dev); } @@ -4605,8 +4927,25 @@ { struct bnx2x *bp = netdev_priv(dev); + if (pci_num_vf(bp->pdev)) { + netdev_features_t changed = dev->features ^ features; + + /* Revert the requested changes in features if they + * would require internal reload of PF in bnx2x_set_features(). + */ + if (!(features & NETIF_F_RXCSUM) && !bp->disable_tpa) { + features &= ~NETIF_F_RXCSUM; + features |= dev->features & NETIF_F_RXCSUM; + } + + if (changed & NETIF_F_LOOPBACK) { + features &= ~NETIF_F_LOOPBACK; + features |= dev->features & NETIF_F_LOOPBACK; + } + } + /* TPA requires Rx CSUM offloading */ - if (!(features & NETIF_F_RXCSUM) || bp->disable_tpa) { + if (!(features & NETIF_F_RXCSUM)) { features &= ~NETIF_F_LRO; features &= ~NETIF_F_GRO; } @@ -4617,39 +4956,42 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features) { struct bnx2x *bp = netdev_priv(dev); - u32 flags = bp->flags; + netdev_features_t changes = features ^ dev->features; bool bnx2x_reload = false; + int rc; - if (features & NETIF_F_LRO) - flags |= TPA_ENABLE_FLAG; - else - flags &= ~TPA_ENABLE_FLAG; - - if (features & NETIF_F_GRO) - flags |= GRO_ENABLE_FLAG; - else - flags &= ~GRO_ENABLE_FLAG; - - if (features & NETIF_F_LOOPBACK) { - if (bp->link_params.loopback_mode != LOOPBACK_BMAC) { - bp->link_params.loopback_mode = LOOPBACK_BMAC; - bnx2x_reload = true; - } - } else { - if (bp->link_params.loopback_mode != LOOPBACK_NONE) { - bp->link_params.loopback_mode = LOOPBACK_NONE; - bnx2x_reload = true; + /* VFs or non SRIOV PFs should be able to change loopback feature */ + if (!pci_num_vf(bp->pdev)) { + if (features & NETIF_F_LOOPBACK) { + if (bp->link_params.loopback_mode != LOOPBACK_BMAC) { + bp->link_params.loopback_mode = LOOPBACK_BMAC; + bnx2x_reload = true; + } + } else { + if (bp->link_params.loopback_mode != LOOPBACK_NONE) { + bp->link_params.loopback_mode = LOOPBACK_NONE; + bnx2x_reload = true; + } } } - if (flags ^ bp->flags) { - bp->flags = flags; + /* if GRO is changed while LRO is enabled, don't force a reload */ + if ((changes & NETIF_F_GRO) && (features & NETIF_F_LRO)) + changes &= ~NETIF_F_GRO; + + /* if GRO is changed while HW TPA is off, don't force a reload */ + if ((changes & NETIF_F_GRO) && bp->disable_tpa) + changes &= ~NETIF_F_GRO; + + if (changes) bnx2x_reload = true; - } if (bnx2x_reload) { - if (bp->recovery_state == BNX2X_RECOVERY_DONE) - return bnx2x_reload_if_running(dev); + if (bp->recovery_state == BNX2X_RECOVERY_DONE) { + dev->features = features; + rc = bnx2x_reload_if_running(dev); + return rc ? rc : 1; + } /* else: bnx2x_nic_load() will be called at end of recovery */ } @@ -4665,12 +5007,8 @@ bnx2x_panic(); #endif - smp_mb__before_clear_bit(); - set_bit(BNX2X_SP_RTNL_TX_TIMEOUT, &bp->sp_rtnl_state); - smp_mb__after_clear_bit(); - /* This allows the netif to be shutdown gracefully before resetting */ - schedule_delayed_work(&bp->sp_rtnl_task, 0); + bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_TX_TIMEOUT, 0); } int bnx2x_suspend(struct pci_dev *pdev, pm_message_t state) @@ -4740,10 +5078,14 @@ return rc; } - void bnx2x_set_ctx_validation(struct bnx2x *bp, struct eth_context *cxt, u32 cid) { + if (!cxt) { + BNX2X_ERR("bad context pointer %p\n", cxt); + return; + } + /* ustorm cxt validation */ cxt->ustorm_ag_context.cdu_usage = CDU_RSRVD_VALUE_TYPE_A(HW_CID(bp, cid), @@ -4758,7 +5100,6 @@ u8 fw_sb_id, u8 sb_index, u8 ticks) { - u32 addr = BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_TIMEOUT_OFFSET(fw_sb_id, sb_index); REG_WR8(bp, addr, ticks); @@ -4795,3 +5136,15 @@ disable = disable ? 1 : (usec ? 0 : 1); storm_memset_hc_disable(bp, port, fw_sb_id, sb_index, disable); } + +void bnx2x_schedule_sp_rtnl(struct bnx2x *bp, enum sp_rtnl_flag flag, + u32 verbose) +{ + smp_mb__before_atomic(); + set_bit(flag, &bp->sp_rtnl_state); + smp_mb__after_atomic(); + DP((BNX2X_MSG_SP | verbose), "Scheduling sp_rtnl task [Flag: %d]\n", + flag); + schedule_delayed_work(&bp->sp_rtnl_task, 0); +} +EXPORT_SYMBOL(bnx2x_schedule_sp_rtnl);