--- zzzz-none-000/linux-3.10.107/drivers/net/ethernet/chelsio/cxgb4/sge.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/net/ethernet/chelsio/cxgb4/sge.c 2021-02-04 17:41:59.000000000 +0000 @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -43,8 +43,15 @@ #include #include #include +#ifdef CONFIG_NET_RX_BUSY_POLL +#include +#endif /* CONFIG_NET_RX_BUSY_POLL */ +#ifdef CONFIG_CHELSIO_T4_FCOE +#include +#endif /* CONFIG_CHELSIO_T4_FCOE */ #include "cxgb4.h" #include "t4_regs.h" +#include "t4_values.h" #include "t4_msg.h" #include "t4fw_api.h" @@ -104,12 +111,6 @@ #define NOMEM_TMR_IDX (SGE_NTIMERS - 1) /* - * An FL with <= FL_STARVE_THRES buffers is starving and a periodic timer will - * attempt to refill it. - */ -#define FL_STARVE_THRES 4 - -/* * Suspend an Ethernet Tx queue with fewer available descriptors than this. * This is the same as calc_tx_descs() for a TSO packet with * nr_frags == MAX_SKB_FRAGS. @@ -127,7 +128,7 @@ * Max Tx descriptor space we allow for an Ethernet packet to be inlined * into a WR. */ -#define MAX_IMM_TX_PKT_LEN 128 +#define MAX_IMM_TX_PKT_LEN 256 /* * Max size of a WR sent through a control Tx queue. @@ -193,6 +194,9 @@ RX_LARGE_MTU_BUF = 0x3, /* large MTU buffer */ }; +static int timer_pkt_quota[] = {1, 1, 2, 3, 4, 5}; +#define MIN_NAPI_WORK 1 + static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *d) { return d->dma_addr & ~(dma_addr_t)RX_BUF_FLAGS; @@ -228,9 +232,21 @@ return fl->size - 8; /* 1 descriptor = 8 buffers */ } -static inline bool fl_starving(const struct sge_fl *fl) +/** + * fl_starving - return whether a Free List is starving. + * @adapter: pointer to the adapter + * @fl: the Free List + * + * Tests specified Free List to see whether the number of buffers + * available to the hardware has falled below our "starvation" + * threshold. + */ +static inline bool fl_starving(const struct adapter *adapter, + const struct sge_fl *fl) { - return fl->avail - fl->pend_cred <= FL_STARVE_THRES; + const struct sge *s = &adapter->sge; + + return fl->avail - fl->pend_cred <= s->fl_starve_thres; } static int map_skb(struct device *dev, const struct sk_buff *skb, @@ -373,7 +389,7 @@ if (d->skb) { /* an SGL is present */ if (unmap) unmap_sgl(dev, d->skb, d->sgl, q); - kfree_skb(d->skb); + dev_consume_skb_any(d->skb); d->skb = NULL; } ++d; @@ -506,14 +522,35 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) { - u32 val; if (q->pend_cred >= 8) { - val = PIDX(q->pend_cred / 8); - if (!is_t4(adap->chip)) - val |= DBTYPE(1); + u32 val = adap->params.arch.sge_fl_db; + + if (is_t4(adap->params.chip)) + val |= PIDX_V(q->pend_cred / 8); + else + val |= PIDX_T5_V(q->pend_cred / 8); + + /* Make sure all memory writes to the Free List queue are + * committed before we tell the hardware about them. + */ wmb(); - t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) | - QID(q->cntxt_id) | val); + + /* If we don't have access to the new User Doorbell (T5+), use + * the old doorbell mechanism; otherwise use the new BAR2 + * mechanism. + */ + if (unlikely(q->bar2_addr == NULL)) { + t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A), + val | QID_V(q->cntxt_id)); + } else { + writel(val | QID_V(q->bar2_qid), + q->bar2_addr + SGE_UDB_KDOORBELL); + + /* This Write memory Barrier will force the write to + * the User Doorbell area to be flushed. + */ + wmb(); + } q->pend_cred &= 7; } } @@ -548,8 +585,15 @@ unsigned int cred = q->avail; __be64 *d = &q->desc[q->pidx]; struct rx_sw_desc *sd = &q->sdesc[q->pidx]; + int node; + +#ifdef CONFIG_DEBUG_FS + if (test_bit(q->cntxt_id - adap->sge.egr_start, adap->sge.blocked_fl)) + goto out; +#endif - gfp |= __GFP_NOWARN | __GFP_COLD; + gfp |= __GFP_NOWARN; + node = dev_to_node(adap->pdev_dev); if (s->fl_pg_order == 0) goto alloc_small_pages; @@ -558,7 +602,7 @@ * Prefer large buffers */ while (n) { - pg = alloc_pages(gfp | __GFP_COMP, s->fl_pg_order); + pg = alloc_pages_node(node, gfp | __GFP_COMP, s->fl_pg_order); if (unlikely(!pg)) { q->large_alloc_failed++; break; /* fall back to single pages */ @@ -588,7 +632,7 @@ alloc_small_pages: while (n--) { - pg = __skb_alloc_page(gfp, NULL); + pg = alloc_pages_node(node, gfp, 0); if (unlikely(!pg)) { q->alloc_failed++; break; @@ -617,7 +661,7 @@ q->pend_cred += cred; ring_fl_db(adap, q); - if (unlikely(fl_starving(q))) { + if (unlikely(fl_starving(adap, q))) { smp_wmb(); set_bit(q->cntxt_id - adap->sge.egr_start, adap->sge.starving_fl); @@ -684,6 +728,22 @@ */ static inline unsigned int sgl_len(unsigned int n) { + /* A Direct Scatter Gather List uses 32-bit lengths and 64-bit PCI DMA + * addresses. The DSGL Work Request starts off with a 32-bit DSGL + * ULPTX header, then Length0, then Address0, then, for 1 <= i <= N, + * repeated sequences of { Length[i], Length[i+1], Address[i], + * Address[i+1] } (this ensures that all addresses are on 64-bit + * boundaries). If N is even, then Length[N+1] should be set to 0 and + * Address[N+1] is omitted. + * + * The following calculation incorporates all of the above. It's + * somewhat hard to follow but, briefly: the "+2" accounts for the + * first two flits which include the DSGL header, Length0 and + * Address0; the "(3*(n-1))/2" covers the main body of list entries (3 + * flits for every pair of the remaining N) +1 if (n-1) is odd; and + * finally the "+((n-1)&1)" adds the one remaining flit needed if + * (n-1) is odd ... + */ n--; return (3 * n) / 2 + (n & 1) + 2; } @@ -706,11 +766,17 @@ * @skb: the packet * * Returns whether an Ethernet packet is small enough to fit as - * immediate data. + * immediate data. Return value corresponds to headroom required. */ static inline int is_eth_imm(const struct sk_buff *skb) { - return skb->len <= MAX_IMM_TX_PKT_LEN - sizeof(struct cpl_tx_pkt); + int hdrlen = skb_shinfo(skb)->gso_size ? + sizeof(struct cpl_tx_pkt_lso_core) : 0; + + hdrlen += sizeof(struct cpl_tx_pkt); + if (skb->len <= MAX_IMM_TX_PKT_LEN - hdrlen) + return hdrlen; + return 0; } /** @@ -723,13 +789,32 @@ static inline unsigned int calc_tx_flits(const struct sk_buff *skb) { unsigned int flits; + int hdrlen = is_eth_imm(skb); - if (is_eth_imm(skb)) - return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt), 8); + /* If the skb is small enough, we can pump it out as a work request + * with only immediate data. In that case we just have to have the + * TX Packet header plus the skb data in the Work Request. + */ + + if (hdrlen) + return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64)); - flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 4; + /* Otherwise, we're going to have to construct a Scatter gather list + * of the skb body and fragments. We also include the flits necessary + * for the TX Packet Work Request and CPL. We always have a firmware + * Write Header (incorporated as part of the cpl_tx_pkt_lso and + * cpl_tx_pkt structures), followed by either a TX Packet Write CPL + * message or, if we're doing a Large Send Offload, an LSO CPL message + * with an embedded TX Packet Write CPL message. + */ + flits = sgl_len(skb_shinfo(skb)->nr_frags + 1); if (skb_shinfo(skb)->gso_size) - flits += 2; + flits += (sizeof(struct fw_eth_tx_pkt_wr) + + sizeof(struct cpl_tx_pkt_lso_core) + + sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); + else + flits += (sizeof(struct fw_eth_tx_pkt_wr) + + sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); return flits; } @@ -782,7 +867,8 @@ sgl->addr0 = cpu_to_be64(addr[1]); } - sgl->cmd_nsge = htonl(ULPTX_CMD(ULP_TX_SC_DSGL) | ULPTX_NSGE(nfrags)); + sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) | + ULPTX_NSGE_V(nfrags)); if (likely(--nfrags == 0)) return; /* @@ -817,8 +903,8 @@ } /* This function copies 64 byte coalesced work request to - * memory mapped BAR2 space(user space writes). - * For coalesced WR SGE, fetches data from the FIFO instead of from Host. + * memory mapped BAR2 space. For coalesced WR SGE fetches + * data from the FIFO instead of from Host. */ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) { @@ -842,28 +928,70 @@ */ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { - unsigned int *wr, index; + /* Make sure that all writes to the TX Descriptors are committed + * before we tell the hardware about them. + */ + wmb(); + + /* If we don't have access to the new User Doorbell (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(q->bar2_addr == NULL)) { + u32 val = PIDX_V(n); + unsigned long flags; + + /* For T4 we need to participate in the Doorbell Recovery + * mechanism. + */ + spin_lock_irqsave(&q->db_lock, flags); + if (!q->db_disabled) + t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A), + QID_V(q->cntxt_id) | val); + else + q->db_pidx_inc += n; + q->db_pidx = q->pidx; + spin_unlock_irqrestore(&q->db_lock, flags); + } else { + u32 val = PIDX_T5_V(n); - wmb(); /* write descriptors before telling HW */ - spin_lock(&q->db_lock); - if (!q->db_disabled) { - if (is_t4(adap->chip)) { - t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), - QID(q->cntxt_id) | PIDX(n)); + /* T4 and later chips share the same PIDX field offset within + * the doorbell, but T5 and later shrank the field in order to + * gain a bit for Doorbell Priority. The field was absurdly + * large in the first place (14 bits) so we just use the T5 + * and later limits and warn if a Queue ID is too large. + */ + WARN_ON(val & DBPRIO_F); + + /* If we're only writing a single TX Descriptor and we can use + * Inferred QID registers, we can use the Write Combining + * Gather Buffer; otherwise we use the simple doorbell. + */ + if (n == 1 && q->bar2_qid == 0) { + int index = (q->pidx + ? (q->pidx - 1) + : (q->size - 1)); + u64 *wr = (u64 *)&q->desc[index]; + + cxgb_pio_copy((u64 __iomem *) + (q->bar2_addr + SGE_UDB_WCDOORBELL), + wr); } else { - if (n == 1) { - index = q->pidx ? (q->pidx - 1) : (q->size - 1); - wr = (unsigned int *)&q->desc[index]; - cxgb_pio_copy((u64 __iomem *) - (adap->bar2 + q->udb + 64), - (u64 *)wr); - } else - writel(n, adap->bar2 + q->udb + 8); - wmb(); + writel(val | QID_V(q->bar2_qid), + q->bar2_addr + SGE_UDB_KDOORBELL); } + + /* This Write Memory Barrier will force the write to the User + * Doorbell area to be flushed. This is needed to prevent + * writes on different CPUs for the same queue from hitting + * the adapter out of order. This is required when some Work + * Requests take the Write Combine Gather Buffer path (user + * doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some + * take the traditional path where we simply increment the + * PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the + * hardware DMA read the actual Work Request. + */ + wmb(); } - q->db_pidx = q->pidx; - spin_unlock(&q->db_lock); } /** @@ -905,7 +1033,7 @@ * Figure out what HW csum a packet wants and return the appropriate control * bits. */ -static u64 hwcsum(const struct sk_buff *skb) +static u64 hwcsum(enum chip_type chip, const struct sk_buff *skb) { int csum_type; const struct iphdr *iph = ip_hdr(skb); @@ -920,7 +1048,7 @@ * unknown protocol, disable HW csum * and hope a bad packet is detected */ - return TXPKT_L4CSUM_DIS; + return TXPKT_L4CSUM_DIS_F; } } else { /* @@ -936,15 +1064,21 @@ goto nocsum; } - if (likely(csum_type >= TX_CSUM_TCPIP)) - return TXPKT_CSUM_TYPE(csum_type) | - TXPKT_IPHDR_LEN(skb_network_header_len(skb)) | - TXPKT_ETHHDR_LEN(skb_network_offset(skb) - ETH_HLEN); - else { + if (likely(csum_type >= TX_CSUM_TCPIP)) { + u64 hdr_len = TXPKT_IPHDR_LEN_V(skb_network_header_len(skb)); + int eth_hdr_len = skb_network_offset(skb) - ETH_HLEN; + + if (CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5) + hdr_len |= TXPKT_ETHHDR_LEN_V(eth_hdr_len); + else + hdr_len |= T6_TXPKT_ETHHDR_LEN_V(eth_hdr_len); + return TXPKT_CSUM_TYPE_V(csum_type) | hdr_len; + } else { int start = skb_transport_offset(skb); - return TXPKT_CSUM_TYPE(csum_type) | TXPKT_CSUM_START(start) | - TXPKT_CSUM_LOC(start + skb->csum_offset); + return TXPKT_CSUM_TYPE_V(csum_type) | + TXPKT_CSUM_START_V(start) | + TXPKT_CSUM_LOC_V(start + skb->csum_offset); } } @@ -962,6 +1096,38 @@ q->pidx -= q->size; } +#ifdef CONFIG_CHELSIO_T4_FCOE +static inline int +cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap, + const struct port_info *pi, u64 *cntrl) +{ + const struct cxgb_fcoe *fcoe = &pi->fcoe; + + if (!(fcoe->flags & CXGB_FCOE_ENABLED)) + return 0; + + if (skb->protocol != htons(ETH_P_FCOE)) + return 0; + + skb_reset_mac_header(skb); + skb->mac_len = sizeof(struct ethhdr); + + skb_set_network_header(skb, skb->mac_len); + skb_set_transport_header(skb, skb->mac_len + sizeof(struct fcoe_hdr)); + + if (!cxgb_fcoe_sof_eof_supported(adap, skb)) + return -ENOTSUPP; + + /* FC CRC offload */ + *cntrl = TXPKT_CSUM_TYPE_V(TX_CSUM_FCOE) | + TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F | + TXPKT_CSUM_START_V(CXGB_FCOE_TXPKT_CSUM_START) | + TXPKT_CSUM_END_V(CXGB_FCOE_TXPKT_CSUM_END) | + TXPKT_CSUM_LOC_V(CXGB_FCOE_TXPKT_CSUM_END); + return 0; +} +#endif /* CONFIG_CHELSIO_T4_FCOE */ + /** * t4_eth_xmit - add a packet to an Ethernet Tx queue * @skb: the packet @@ -971,7 +1137,7 @@ */ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) { - u32 wr_mid; + u32 wr_mid, ctrl0; u64 cntrl, *end; int qidx, credits; unsigned int flits, ndesc; @@ -982,22 +1148,41 @@ struct cpl_tx_pkt_core *cpl; const struct skb_shared_info *ssi; dma_addr_t addr[MAX_SKB_FRAGS + 1]; + bool immediate = false; + int len, max_pkt_len; +#ifdef CONFIG_CHELSIO_T4_FCOE + int err; +#endif /* CONFIG_CHELSIO_T4_FCOE */ /* * The chip min packet length is 10 octets but play safe and reject * anything shorter than an Ethernet header. */ if (unlikely(skb->len < ETH_HLEN)) { -out_free: dev_kfree_skb(skb); +out_free: dev_kfree_skb_any(skb); return NETDEV_TX_OK; } + /* Discard the packet if the length is greater than mtu */ + max_pkt_len = ETH_HLEN + dev->mtu; + if (skb_vlan_tag_present(skb)) + max_pkt_len += VLAN_HLEN; + if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len))) + goto out_free; + pi = netdev_priv(dev); adap = pi->adapter; qidx = skb_get_queue_mapping(skb); q = &adap->sge.ethtxq[qidx + pi->first_qset]; reclaim_completed_tx(adap, &q->q, true); + cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F; + +#ifdef CONFIG_CHELSIO_T4_FCOE + err = cxgb_fcoe_offload(skb, adap, pi, &cntrl); + if (unlikely(err == -ENOTSUPP)) + goto out_free; +#endif /* CONFIG_CHELSIO_T4_FCOE */ flits = calc_tx_flits(skb); ndesc = flits_to_desc(flits); @@ -1011,16 +1196,19 @@ return NETDEV_TX_BUSY; } - if (!is_eth_imm(skb) && + if (is_eth_imm(skb)) + immediate = true; + + if (!immediate && unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) { q->mapping_err++; goto out_free; } - wr_mid = FW_WR_LEN16(DIV_ROUND_UP(flits, 2)); + wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2)); if (unlikely(credits < ETHTXQ_STOP_THRES)) { eth_txq_stop(q); - wr_mid |= FW_WR_EQUEQ | FW_WR_EQUIQ; + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } wr = (void *)&q->q.desc[q->q.pidx]; @@ -1028,6 +1216,7 @@ wr->r3 = cpu_to_be64(0); end = (u64 *)wr + flits; + len = immediate ? skb->len : 0; ssi = skb_shinfo(skb); if (ssi->gso_size) { struct cpl_tx_pkt_lso *lso = (void *)wr; @@ -1035,52 +1224,72 @@ int l3hdr_len = skb_network_header_len(skb); int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN; - wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) | - FW_WR_IMMDLEN(sizeof(*lso))); - lso->c.lso_ctrl = htonl(LSO_OPCODE(CPL_TX_PKT_LSO) | - LSO_FIRST_SLICE | LSO_LAST_SLICE | - LSO_IPV6(v6) | - LSO_ETHHDR_LEN(eth_xtra_len / 4) | - LSO_IPHDR_LEN(l3hdr_len / 4) | - LSO_TCPHDR_LEN(tcp_hdr(skb)->doff)); + len += sizeof(*lso); + wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | + FW_WR_IMMDLEN_V(len)); + lso->c.lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) | + LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F | + LSO_IPV6_V(v6) | + LSO_ETHHDR_LEN_V(eth_xtra_len / 4) | + LSO_IPHDR_LEN_V(l3hdr_len / 4) | + LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff)); lso->c.ipid_ofst = htons(0); lso->c.mss = htons(ssi->gso_size); lso->c.seqno_offset = htonl(0); - lso->c.len = htonl(skb->len); + if (is_t4(adap->params.chip)) + lso->c.len = htonl(skb->len); + else + lso->c.len = htonl(LSO_T5_XFER_SIZE_V(skb->len)); cpl = (void *)(lso + 1); - cntrl = TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | - TXPKT_IPHDR_LEN(l3hdr_len) | - TXPKT_ETHHDR_LEN(eth_xtra_len); + + if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) + cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len); + else + cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len); + + cntrl |= TXPKT_CSUM_TYPE_V(v6 ? + TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | + TXPKT_IPHDR_LEN_V(l3hdr_len); q->tso++; q->tx_cso += ssi->gso_segs; } else { - int len; - - len = is_eth_imm(skb) ? skb->len + sizeof(*cpl) : sizeof(*cpl); - wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) | - FW_WR_IMMDLEN(len)); + len += sizeof(*cpl); + wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | + FW_WR_IMMDLEN_V(len)); cpl = (void *)(wr + 1); if (skb->ip_summed == CHECKSUM_PARTIAL) { - cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS; + cntrl = hwcsum(adap->params.chip, skb) | + TXPKT_IPCSUM_DIS_F; q->tx_cso++; - } else - cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS; + } } - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { q->vlan_ins++; - cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(vlan_tx_tag_get(skb)); + cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb)); +#ifdef CONFIG_CHELSIO_T4_FCOE + if (skb->protocol == htons(ETH_P_FCOE)) + cntrl |= TXPKT_VLAN_V( + ((skb->priority & 0x7) << VLAN_PRIO_SHIFT)); +#endif /* CONFIG_CHELSIO_T4_FCOE */ } - cpl->ctrl0 = htonl(TXPKT_OPCODE(CPL_TX_PKT_XT) | - TXPKT_INTF(pi->tx_chan) | TXPKT_PF(adap->fn)); + ctrl0 = TXPKT_OPCODE_V(CPL_TX_PKT_XT) | TXPKT_INTF_V(pi->tx_chan) | + TXPKT_PF_V(adap->pf); +#ifdef CONFIG_CHELSIO_T4_DCB + if (is_t4(adap->params.chip)) + ctrl0 |= TXPKT_OVLAN_IDX_V(q->dcb_prio); + else + ctrl0 |= TXPKT_T5_OVLAN_IDX_V(q->dcb_prio); +#endif + cpl->ctrl0 = htonl(ctrl0); cpl->pack = htons(0); cpl->len = htons(skb->len); cpl->ctrl1 = cpu_to_be64(cntrl); - if (is_eth_imm(skb)) { + if (immediate) { inline_tx_skb(skb, &q->q, cpl + 1); - dev_kfree_skb(skb); + dev_consume_skb_any(skb); } else { int last_desc; @@ -1146,7 +1355,7 @@ { reclaim_completed_tx_imm(&q->q); if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) { - wr->lo |= htonl(FW_WR_EQUEQ | FW_WR_EQUIQ); + wr->lo |= htonl(FW_WR_EQUEQ_F | FW_WR_EQUIQ_F); q->q.stops++; q->full = 1; } @@ -1215,18 +1424,17 @@ struct fw_wr_hdr *wr; unsigned int ndesc = skb->priority; /* previously saved */ - /* - * Write descriptors and free skbs outside the lock to limit + written += ndesc; + /* Write descriptors and free skbs outside the lock to limit * wait times. q->full is still set so new skbs will be queued. */ + wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx]; + txq_advance(&q->q, ndesc); spin_unlock(&q->sendq.lock); - wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx]; inline_tx_skb(skb, &q->q, wr); kfree_skb(skb); - written += ndesc; - txq_advance(&q->q, ndesc); if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) { unsigned long old = q->q.stops; @@ -1294,7 +1502,7 @@ flits = skb_transport_offset(skb) / 8U; /* headers */ cnt = skb_shinfo(skb)->nr_frags; - if (skb->tail != skb->transport_header) + if (skb_tail_pointer(skb) != skb_transport_header(skb)) cnt++; return flits + sgl_len(cnt); } @@ -1328,7 +1536,7 @@ { struct fw_wr_hdr *wr = (struct fw_wr_hdr *)skb->data; - wr->lo |= htonl(FW_WR_EQUEQ | FW_WR_EQUIQ); + wr->lo |= htonl(FW_WR_EQUEQ_F | FW_WR_EQUIQ_F); q->q.stops++; q->full = 1; } @@ -1467,8 +1675,12 @@ { unsigned int idx = skb_txq(skb); - if (unlikely(is_ctrl_pkt(skb))) + if (unlikely(is_ctrl_pkt(skb))) { + /* Single ctrl queue is a requirement for LE workaround path */ + if (adap->tids.nsftids) + idx = 0; return ctrl_xmit(&adap->sge.ctrlq[idx], skb); + } return ofld_xmit(&adap->sge.ofldtxq[idx], skb); } @@ -1596,7 +1808,7 @@ return 0; } - if (is_t4(adap->chip)) + if (is_t4(adap->params.chip)) __skb_pull(skb, sizeof(struct cpl_trace_pkt)); else __skb_pull(skb, sizeof(struct cpl_t5_trace_pkt)); @@ -1608,11 +1820,34 @@ return 0; } +/** + * cxgb4_sgetim_to_hwtstamp - convert sge time stamp to hw time stamp + * @adap: the adapter + * @hwtstamps: time stamp structure to update + * @sgetstamp: 60bit iqe timestamp + * + * Every ingress queue entry has the 60-bit timestamp, convert that timestamp + * which is in Core Clock ticks into ktime_t and assign it + **/ +static void cxgb4_sgetim_to_hwtstamp(struct adapter *adap, + struct skb_shared_hwtstamps *hwtstamps, + u64 sgetstamp) +{ + u64 ns; + u64 tmp = (sgetstamp * 1000 * 1000 + adap->params.vpd.cclk / 2); + + ns = div_u64(tmp, adap->params.vpd.cclk); + + memset(hwtstamps, 0, sizeof(*hwtstamps)); + hwtstamps->hwtstamp = ns_to_ktime(ns); +} + static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, const struct cpl_rx_pkt *pkt) { struct adapter *adapter = rxq->rspq.adap; struct sge *s = &adapter->sge; + struct port_info *pi; int ret; struct sk_buff *skb; @@ -1629,8 +1864,14 @@ skb->truesize += skb->data_len; skb->ip_summed = CHECKSUM_UNNECESSARY; skb_record_rx_queue(skb, rxq->rspq.idx); + skb_mark_napi_id(skb, &rxq->rspq.napi); + pi = netdev_priv(skb->dev); + if (pi->rxtstamp) + cxgb4_sgetim_to_hwtstamp(adapter, skb_hwtstamps(skb), + gl->sgetstamp); if (rxq->rspq.netdev->features & NETIF_F_RXHASH) - skb->rxhash = (__force u32)pkt->rsshdr.hash_val; + skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val, + PKT_HASH_TYPE_L3); if (unlikely(pkt->vlan_ex)) { __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan)); @@ -1661,15 +1902,18 @@ const struct cpl_rx_pkt *pkt; struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq); struct sge *s = &q->adap->sge; - int cpl_trace_pkt = is_t4(q->adap->chip) ? + int cpl_trace_pkt = is_t4(q->adap->params.chip) ? CPL_TRACE_PKT : CPL_TRACE_PKT_T5; + struct port_info *pi; if (unlikely(*(u8 *)rsp == cpl_trace_pkt)) return handle_trace_pkt(q->adap, si); pkt = (const struct cpl_rx_pkt *)rsp; - csum_ok = pkt->csum_calc && !pkt->err_vec; - if ((pkt->l2info & htonl(RXF_TCP)) && + csum_ok = pkt->csum_calc && !pkt->err_vec && + (q->netdev->features & NETIF_F_RXCSUM); + if ((pkt->l2info & htonl(RXF_TCP_F)) && + !(cxgb_poll_busy_polling(q)) && (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) { do_gro(rxq, si, pkt); return 0; @@ -1686,28 +1930,48 @@ skb->protocol = eth_type_trans(skb, q->netdev); skb_record_rx_queue(skb, q->idx); if (skb->dev->features & NETIF_F_RXHASH) - skb->rxhash = (__force u32)pkt->rsshdr.hash_val; + skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val, + PKT_HASH_TYPE_L3); rxq->stats.pkts++; - if (csum_ok && (q->netdev->features & NETIF_F_RXCSUM) && - (pkt->l2info & htonl(RXF_UDP | RXF_TCP))) { + pi = netdev_priv(skb->dev); + if (pi->rxtstamp) + cxgb4_sgetim_to_hwtstamp(q->adap, skb_hwtstamps(skb), + si->sgetstamp); + if (csum_ok && (pkt->l2info & htonl(RXF_UDP_F | RXF_TCP_F))) { if (!pkt->ip_frag) { skb->ip_summed = CHECKSUM_UNNECESSARY; rxq->stats.rx_cso++; - } else if (pkt->l2info & htonl(RXF_IP)) { + } else if (pkt->l2info & htonl(RXF_IP_F)) { __sum16 c = (__force __sum16)pkt->csum; skb->csum = csum_unfold(c); skb->ip_summed = CHECKSUM_COMPLETE; rxq->stats.rx_cso++; } - } else + } else { skb_checksum_none_assert(skb); +#ifdef CONFIG_CHELSIO_T4_FCOE +#define CPL_RX_PKT_FLAGS (RXF_PSH_F | RXF_SYN_F | RXF_UDP_F | \ + RXF_TCP_F | RXF_IP_F | RXF_IP6_F | RXF_LRO_F) + + if (!(pkt->l2info & cpu_to_be32(CPL_RX_PKT_FLAGS))) { + if ((pkt->l2info & cpu_to_be32(RXF_FCOE_F)) && + (pi->fcoe.flags & CXGB_FCOE_ENABLED)) { + if (!(pkt->err_vec & cpu_to_be16(RXERR_CSUM_F))) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } + +#undef CPL_RX_PKT_FLAGS +#endif /* CONFIG_CHELSIO_T4_FCOE */ + } if (unlikely(pkt->vlan_ex)) { __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan)); rxq->stats.vlan_ex++; } + skb_mark_napi_id(skb, &q->napi); netif_receive_skb(skb); return 0; } @@ -1755,7 +2019,7 @@ static inline bool is_new_response(const struct rsp_ctrl *r, const struct sge_rspq *q) { - return RSPD_GEN(r->type_gen) == q->gen; + return (r->type_gen >> RSPD_GEN_S) == q->gen; } /** @@ -1801,20 +2065,20 @@ if (!is_new_response(rc, q)) break; - rmb(); - rsp_type = RSPD_TYPE(rc->type_gen); - if (likely(rsp_type == RSP_TYPE_FLBUF)) { + dma_rmb(); + rsp_type = RSPD_TYPE_G(rc->type_gen); + if (likely(rsp_type == RSPD_TYPE_FLBUF_X)) { struct page_frag *fp; struct pkt_gl si; const struct rx_sw_desc *rsd; u32 len = ntohl(rc->pldbuflen_qid), bufsz, frags; - if (len & RSPD_NEWBUF) { + if (len & RSPD_NEWBUF_F) { if (likely(q->offset > 0)) { free_rx_bufs(q->adap, &rxq->fl, 1); q->offset = 0; } - len = RSPD_LEN(len); + len = RSPD_LEN_G(len); } si.tot_len = len; @@ -1831,6 +2095,8 @@ unmap_rx_buf(q->adap, &rxq->fl); } + si.sgetstamp = SGE_TIMESTAMP_G( + be64_to_cpu(rc->last_flit)); /* * Last buffer remains mapped so explicitly make it * coherent for CPU access. @@ -1849,7 +2115,7 @@ q->offset += ALIGN(fp->size, s->fl_align); else restore_rx_bufs(&si, &rxq->fl, frags); - } else if (likely(rsp_type == RSP_TYPE_CPL)) { + } else if (likely(rsp_type == RSPD_TYPE_CPL_X)) { ret = q->handler(q, q->cur_desc, NULL); } else { ret = q->handler(q, (const __be64 *)rc, CXGB4_MSG_AN); @@ -1857,7 +2123,7 @@ if (unlikely(ret)) { /* couldn't process descriptor, back off for recovery */ - q->next_intr_params = QINTR_TIMER_IDX(NOMEM_TMR_IDX); + q->next_intr_params = QINTR_TIMER_IDX_V(NOMEM_TMR_IDX); break; } @@ -1870,6 +2136,38 @@ return budget - budget_left; } +#ifdef CONFIG_NET_RX_BUSY_POLL +int cxgb_busy_poll(struct napi_struct *napi) +{ + struct sge_rspq *q = container_of(napi, struct sge_rspq, napi); + unsigned int params, work_done; + u32 val; + + if (!cxgb_poll_lock_poll(q)) + return LL_FLUSH_BUSY; + + work_done = process_responses(q, 4); + params = QINTR_TIMER_IDX_V(TIMERREG_COUNTER0_X) | QINTR_CNT_EN_V(1); + q->next_intr_params = params; + val = CIDXINC_V(work_done) | SEINTARM_V(params); + + /* If we don't have access to the new User GTS (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(!q->bar2_addr)) + t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS_A), + val | INGRESSQID_V((u32)q->cntxt_id)); + else { + writel(val | INGRESSQID_V(q->bar2_qid), + q->bar2_addr + SGE_UDB_GTS); + wmb(); + } + + cxgb_poll_unlock_poll(q); + return work_done; +} +#endif /* CONFIG_NET_RX_BUSY_POLL */ + /** * napi_rx_handler - the NAPI handler for Rx processing * @napi: the napi instance @@ -1885,17 +2183,52 @@ { unsigned int params; struct sge_rspq *q = container_of(napi, struct sge_rspq, napi); - int work_done = process_responses(q, budget); + int work_done; + u32 val; + if (!cxgb_poll_lock_napi(q)) + return budget; + + work_done = process_responses(q, budget); if (likely(work_done < budget)) { + int timer_index; + napi_complete(napi); - params = q->next_intr_params; - q->next_intr_params = q->intr_params; + timer_index = QINTR_TIMER_IDX_G(q->next_intr_params); + + if (q->adaptive_rx) { + if (work_done > max(timer_pkt_quota[timer_index], + MIN_NAPI_WORK)) + timer_index = (timer_index + 1); + else + timer_index = timer_index - 1; + + timer_index = clamp(timer_index, 0, SGE_TIMERREGS - 1); + q->next_intr_params = + QINTR_TIMER_IDX_V(timer_index) | + QINTR_CNT_EN_V(0); + params = q->next_intr_params; + } else { + params = q->next_intr_params; + q->next_intr_params = q->intr_params; + } } else - params = QINTR_TIMER_IDX(7); + params = QINTR_TIMER_IDX_V(7); + + val = CIDXINC_V(work_done) | SEINTARM_V(params); - t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), CIDXINC(work_done) | - INGRESSQID((u32)q->cntxt_id) | SEINTARM(params)); + /* If we don't have access to the new User GTS (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(q->bar2_addr == NULL)) { + t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS_A), + val | INGRESSQID_V((u32)q->cntxt_id)); + } else { + writel(val | INGRESSQID_V(q->bar2_qid), + q->bar2_addr + SGE_UDB_GTS); + wmb(); + } + cxgb_poll_unlock_napi(q); return work_done; } @@ -1919,6 +2252,7 @@ unsigned int credits; const struct rsp_ctrl *rc; struct sge_rspq *q = &adap->sge.intrq; + u32 val; spin_lock(&adap->sge.intrq_lock); for (credits = 0; ; credits++) { @@ -1926,8 +2260,8 @@ if (!is_new_response(rc, q)) break; - rmb(); - if (RSPD_TYPE(rc->type_gen) == RSP_TYPE_INTR) { + dma_rmb(); + if (RSPD_TYPE_G(rc->type_gen) == RSPD_TYPE_INTR_X) { unsigned int qid = ntohl(rc->pldbuflen_qid); qid -= adap->sge.ingr_start; @@ -1937,8 +2271,19 @@ rspq_next(q); } - t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), CIDXINC(credits) | - INGRESSQID(q->cntxt_id) | SEINTARM(q->intr_params)); + val = CIDXINC_V(credits) | SEINTARM_V(q->intr_params); + + /* If we don't have access to the new User GTS (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(q->bar2_addr == NULL)) { + t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A), + val | INGRESSQID_V(q->cntxt_id)); + } else { + writel(val | INGRESSQID_V(q->bar2_qid), + q->bar2_addr + SGE_UDB_GTS); + wmb(); + } spin_unlock(&adap->sge.intrq_lock); return credits; } @@ -1951,7 +2296,8 @@ { struct adapter *adap = cookie; - t4_slow_intr_handler(adap); + if (adap->flags & MASTER_PF) + t4_slow_intr_handler(adap); process_intrq(adap); return IRQ_HANDLED; } @@ -1965,8 +2311,9 @@ { struct adapter *adap = cookie; - t4_write_reg(adap, MYPF_REG(PCIE_PF_CLI), 0); - if (t4_slow_intr_handler(adap) | process_intrq(adap)) + t4_write_reg(adap, MYPF_REG(PCIE_PF_CLI_A), 0); + if (((adap->flags & MASTER_PF) && t4_slow_intr_handler(adap)) | + process_intrq(adap)) return IRQ_HANDLED; return IRQ_NONE; /* probably shared interrupt */ } @@ -1990,20 +2337,20 @@ static void sge_rx_timer_cb(unsigned long data) { unsigned long m; - unsigned int i, cnt[2]; + unsigned int i; struct adapter *adap = (struct adapter *)data; struct sge *s = &adap->sge; - for (i = 0; i < ARRAY_SIZE(s->starving_fl); i++) + for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) for (m = s->starving_fl[i]; m; m &= m - 1) { struct sge_eth_rxq *rxq; unsigned int id = __ffs(m) + i * BITS_PER_LONG; struct sge_fl *fl = s->egr_map[id]; clear_bit(id, s->starving_fl); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); - if (fl_starving(fl)) { + if (fl_starving(adap, fl)) { rxq = container_of(fl, struct sge_eth_rxq, fl); if (napi_reschedule(&rxq->rspq.napi)) fl->starving++; @@ -2011,24 +2358,16 @@ set_bit(id, s->starving_fl); } } + /* The remainder of the SGE RX Timer Callback routine is dedicated to + * global Master PF activities like checking for chip ingress stalls, + * etc. + */ + if (!(adap->flags & MASTER_PF)) + goto done; - t4_write_reg(adap, SGE_DEBUG_INDEX, 13); - cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH); - cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW); - - for (i = 0; i < 2; i++) - if (cnt[i] >= s->starve_thres) { - if (s->idma_state[i] || cnt[i] == 0xffffffff) - continue; - s->idma_state[i] = 1; - t4_write_reg(adap, SGE_DEBUG_INDEX, 11); - m = t4_read_reg(adap, SGE_DEBUG_DATA_LOW) >> (i * 16); - dev_warn(adap->pdev_dev, - "SGE idma%u starvation detected for " - "queue %lu\n", i, m & 0xffff); - } else if (s->idma_state[i]) - s->idma_state[i] = 0; + t4_idma_monitor(adap, &s->idma_monitor, HZ, RX_QCHECK_PERIOD); +done: mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD); } @@ -2039,7 +2378,7 @@ struct adapter *adap = (struct adapter *)data; struct sge *s = &adap->sge; - for (i = 0; i < ARRAY_SIZE(s->txq_maperr); i++) + for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) for (m = s->txq_maperr[i]; m; m &= m - 1) { unsigned long id = __ffs(m) + i * BITS_PER_LONG; struct sge_ofld_txq *txq = s->egr_map[id]; @@ -2076,9 +2415,41 @@ mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2)); } +/** + * bar2_address - return the BAR2 address for an SGE Queue's Registers + * @adapter: the adapter + * @qid: the SGE Queue ID + * @qtype: the SGE Queue Type (Egress or Ingress) + * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues + * + * Returns the BAR2 address for the SGE Queue Registers associated with + * @qid. If BAR2 SGE Registers aren't available, returns NULL. Also + * returns the BAR2 Queue ID to be used with writes to the BAR2 SGE + * Queue Registers. If the BAR2 Queue ID is 0, then "Inferred Queue ID" + * Registers are supported (e.g. the Write Combining Doorbell Buffer). + */ +static void __iomem *bar2_address(struct adapter *adapter, + unsigned int qid, + enum t4_bar2_qtype qtype, + unsigned int *pbar2_qid) +{ + u64 bar2_qoffset; + int ret; + + ret = t4_bar2_sge_qregs(adapter, qid, qtype, 0, + &bar2_qoffset, pbar2_qid); + if (ret) + return NULL; + + return adapter->bar2 + bar2_qoffset; +} + +/* @intr_idx: MSI/MSI-X vector if >=0, -(absolute qid + 1) if < 0 + * @cong: < 0 -> no congestion feedback, >= 0 -> congestion channel map + */ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct net_device *dev, int intr_idx, - struct sge_fl *fl, rspq_handler_t hnd) + struct sge_fl *fl, rspq_handler_t hnd, int cong) { int ret, flsz = 0; struct fw_iq_cmd c; @@ -2094,24 +2465,38 @@ return -ENOMEM; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP(FW_IQ_CMD) | FW_CMD_REQUEST | - FW_CMD_WRITE | FW_CMD_EXEC | - FW_IQ_CMD_PFN(adap->fn) | FW_IQ_CMD_VFN(0)); - c.alloc_to_len16 = htonl(FW_IQ_CMD_ALLOC | FW_IQ_CMD_IQSTART(1) | + c.op_to_vfn = htonl(FW_CMD_OP_V(FW_IQ_CMD) | FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | FW_CMD_EXEC_F | + FW_IQ_CMD_PFN_V(adap->pf) | FW_IQ_CMD_VFN_V(0)); + c.alloc_to_len16 = htonl(FW_IQ_CMD_ALLOC_F | FW_IQ_CMD_IQSTART_F | FW_LEN16(c)); - c.type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | - FW_IQ_CMD_IQASYNCH(fwevtq) | FW_IQ_CMD_VIID(pi->viid) | - FW_IQ_CMD_IQANDST(intr_idx < 0) | FW_IQ_CMD_IQANUD(1) | - FW_IQ_CMD_IQANDSTINDEX(intr_idx >= 0 ? intr_idx : + c.type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE_V(FW_IQ_TYPE_FL_INT_CAP) | + FW_IQ_CMD_IQASYNCH_V(fwevtq) | FW_IQ_CMD_VIID_V(pi->viid) | + FW_IQ_CMD_IQANDST_V(intr_idx < 0) | + FW_IQ_CMD_IQANUD_V(UPDATEDELIVERY_INTERRUPT_X) | + FW_IQ_CMD_IQANDSTINDEX_V(intr_idx >= 0 ? intr_idx : -intr_idx - 1)); - c.iqdroprss_to_iqesize = htons(FW_IQ_CMD_IQPCIECH(pi->tx_chan) | - FW_IQ_CMD_IQGTSMODE | - FW_IQ_CMD_IQINTCNTTHRESH(iq->pktcnt_idx) | - FW_IQ_CMD_IQESIZE(ilog2(iq->iqe_len) - 4)); + c.iqdroprss_to_iqesize = htons(FW_IQ_CMD_IQPCIECH_V(pi->tx_chan) | + FW_IQ_CMD_IQGTSMODE_F | + FW_IQ_CMD_IQINTCNTTHRESH_V(iq->pktcnt_idx) | + FW_IQ_CMD_IQESIZE_V(ilog2(iq->iqe_len) - 4)); c.iqsize = htons(iq->size); c.iqaddr = cpu_to_be64(iq->phys_addr); + if (cong >= 0) + c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F); if (fl) { + enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip); + + /* Allocate the ring for the hardware free list (with space + * for its status page) along with the associated software + * descriptor ring. The free list size needs to be a multiple + * of the Egress Queue Unit and at least 2 Egress Units larger + * than the SGE's Egress Congrestion Threshold + * (fl_starve_thres - 1). + */ + if (fl->size < s->fl_starve_thres - 1 + 2 * 8) + fl->size = s->fl_starve_thres - 1 + 2 * 8; fl->size = roundup(fl->size, 8); fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64), sizeof(struct rx_sw_desc), &fl->addr, @@ -2120,29 +2505,41 @@ goto fl_nomem; flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc); - c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN(1) | - FW_IQ_CMD_FL0FETCHRO(1) | - FW_IQ_CMD_FL0DATARO(1) | - FW_IQ_CMD_FL0PADEN(1)); - c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN(2) | - FW_IQ_CMD_FL0FBMAX(3)); + c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F | + FW_IQ_CMD_FL0FETCHRO_F | + FW_IQ_CMD_FL0DATARO_F | + FW_IQ_CMD_FL0PADEN_F); + if (cong >= 0) + c.iqns_to_fl0congen |= + htonl(FW_IQ_CMD_FL0CNGCHMAP_V(cong) | + FW_IQ_CMD_FL0CONGCIF_F | + FW_IQ_CMD_FL0CONGEN_F); + c.fl0dcaen_to_fl0cidxfthresh = + htons(FW_IQ_CMD_FL0FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ? + FETCHBURSTMAX_512B_X : + FETCHBURSTMAX_256B_X)); c.fl0size = htons(flsz); c.fl0addr = cpu_to_be64(fl->addr); } - ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); + ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c); if (ret) goto err; netif_napi_add(dev, &iq->napi, napi_rx_handler, 64); + napi_hash_add(&iq->napi); iq->cur_desc = iq->desc; iq->cidx = 0; iq->gen = 1; iq->next_intr_params = iq->intr_params; iq->cntxt_id = ntohs(c.iqid); iq->abs_id = ntohs(c.physiqid); + iq->bar2_addr = bar2_address(adap, + iq->cntxt_id, + T4_BAR2_QTYPE_INGRESS, + &iq->bar2_qid); iq->size--; /* subtract status entry */ - iq->adap = adap; iq->netdev = dev; iq->handler = hnd; @@ -2157,8 +2554,51 @@ fl->pidx = fl->cidx = 0; fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0; adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl; + + /* Note, we must initialize the BAR2 Free List User Doorbell + * information before refilling the Free List! + */ + fl->bar2_addr = bar2_address(adap, + fl->cntxt_id, + T4_BAR2_QTYPE_EGRESS, + &fl->bar2_qid); refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); } + + /* For T5 and later we attempt to set up the Congestion Manager values + * of the new RX Ethernet Queue. This should really be handled by + * firmware because it's more complex than any host driver wants to + * get involved with and it's different per chip and this is almost + * certainly wrong. Firmware would be wrong as well, but it would be + * a lot easier to fix in one place ... For now we do something very + * simple (and hopefully less wrong). + */ + if (!is_t4(adap->params.chip) && cong >= 0) { + u32 param, val; + int i; + + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | + FW_PARAMS_PARAM_YZ_V(iq->cntxt_id)); + if (cong == 0) { + val = CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_QUEUE_X); + } else { + val = + CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_CHANNEL_X); + for (i = 0; i < 4; i++) { + if (cong & (1 << i)) + val |= + CONMCTXT_CNGCHMAP_V(1 << (i << 2)); + } + } + ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1, + ¶m, &val); + if (ret) + dev_warn(adap->pdev_dev, "Failed to set Congestion" + " Manager Context for Ingress Queue %d: %d\n", + iq->cntxt_id, -ret); + } + return 0; fl_nomem: @@ -2182,22 +2622,10 @@ static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) { q->cntxt_id = id; - if (!is_t4(adap->chip)) { - unsigned int s_qpp; - unsigned short udb_density; - unsigned long qpshift; - int page; - - s_qpp = QUEUESPERPAGEPF1 * adap->fn; - udb_density = 1 << QUEUESPERPAGEPF0_GET((t4_read_reg(adap, - SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp)); - qpshift = PAGE_SHIFT - ilog2(udb_density); - q->udb = q->cntxt_id << qpshift; - q->udb &= PAGE_MASK; - page = q->udb / PAGE_SIZE; - q->udb += (q->cntxt_id - (page * udb_density)) * 128; - } - + q->bar2_addr = bar2_address(adap, + q->cntxt_id, + T4_BAR2_QTYPE_EGRESS, + &q->bar2_qid); q->in_use = 0; q->cidx = q->pidx = 0; q->stops = q->restarts = 0; @@ -2226,23 +2654,26 @@ return -ENOMEM; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_ETH_CMD) | FW_CMD_REQUEST | - FW_CMD_WRITE | FW_CMD_EXEC | - FW_EQ_ETH_CMD_PFN(adap->fn) | FW_EQ_ETH_CMD_VFN(0)); - c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC | - FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); - c.viid_pkd = htonl(FW_EQ_ETH_CMD_VIID(pi->viid)); - c.fetchszm_to_iqid = htonl(FW_EQ_ETH_CMD_HOSTFCMODE(2) | - FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | - FW_EQ_ETH_CMD_FETCHRO(1) | - FW_EQ_ETH_CMD_IQID(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_ETH_CMD_FBMIN(2) | - FW_EQ_ETH_CMD_FBMAX(3) | - FW_EQ_ETH_CMD_CIDXFTHRESH(5) | - FW_EQ_ETH_CMD_EQSIZE(nentries)); + c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_ETH_CMD) | FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | FW_CMD_EXEC_F | + FW_EQ_ETH_CMD_PFN_V(adap->pf) | + FW_EQ_ETH_CMD_VFN_V(0)); + c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC_F | + FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c)); + c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F | + FW_EQ_ETH_CMD_VIID_V(pi->viid)); + c.fetchszm_to_iqid = + htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = + htonl(FW_EQ_ETH_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | + FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | + FW_EQ_ETH_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); - ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); + ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c); if (ret) { kfree(txq->q.sdesc); txq->q.sdesc = NULL; @@ -2253,7 +2684,7 @@ return ret; } - init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_GET(ntohl(c.eqid_pkd))); + init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_G(ntohl(c.eqid_pkd))); txq->txq = netdevq; txq->tso = txq->tx_cso = txq->vlan_ins = 0; txq->mapping_err = 0; @@ -2274,29 +2705,30 @@ txq->q.desc = alloc_ring(adap->pdev_dev, nentries, sizeof(struct tx_desc), 0, &txq->q.phys_addr, - NULL, 0, NUMA_NO_NODE); + NULL, 0, dev_to_node(adap->pdev_dev)); if (!txq->q.desc) return -ENOMEM; - c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_CTRL_CMD) | FW_CMD_REQUEST | - FW_CMD_WRITE | FW_CMD_EXEC | - FW_EQ_CTRL_CMD_PFN(adap->fn) | - FW_EQ_CTRL_CMD_VFN(0)); - c.alloc_to_len16 = htonl(FW_EQ_CTRL_CMD_ALLOC | - FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); - c.cmpliqid_eqid = htonl(FW_EQ_CTRL_CMD_CMPLIQID(cmplqid)); + c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_CTRL_CMD) | FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | FW_CMD_EXEC_F | + FW_EQ_CTRL_CMD_PFN_V(adap->pf) | + FW_EQ_CTRL_CMD_VFN_V(0)); + c.alloc_to_len16 = htonl(FW_EQ_CTRL_CMD_ALLOC_F | + FW_EQ_CTRL_CMD_EQSTART_F | FW_LEN16(c)); + c.cmpliqid_eqid = htonl(FW_EQ_CTRL_CMD_CMPLIQID_V(cmplqid)); c.physeqid_pkd = htonl(0); - c.fetchszm_to_iqid = htonl(FW_EQ_CTRL_CMD_HOSTFCMODE(2) | - FW_EQ_CTRL_CMD_PCIECHN(pi->tx_chan) | - FW_EQ_CTRL_CMD_FETCHRO | - FW_EQ_CTRL_CMD_IQID(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_CTRL_CMD_FBMIN(2) | - FW_EQ_CTRL_CMD_FBMAX(3) | - FW_EQ_CTRL_CMD_CIDXFTHRESH(5) | - FW_EQ_CTRL_CMD_EQSIZE(nentries)); + c.fetchszm_to_iqid = + htonl(FW_EQ_CTRL_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_CTRL_CMD_FETCHRO_F | FW_EQ_CTRL_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = + htonl(FW_EQ_CTRL_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_EQ_CTRL_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | + FW_EQ_CTRL_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | + FW_EQ_CTRL_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); - ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); + ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c); if (ret) { dma_free_coherent(adap->pdev_dev, nentries * sizeof(struct tx_desc), @@ -2305,7 +2737,7 @@ return ret; } - init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_GET(ntohl(c.cmpliqid_eqid))); + init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_G(ntohl(c.cmpliqid_eqid))); txq->adap = adap; skb_queue_head_init(&txq->sendq); tasklet_init(&txq->qresume_tsk, restart_ctrlq, (unsigned long)txq); @@ -2332,23 +2764,24 @@ return -ENOMEM; memset(&c, 0, sizeof(c)); - c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_OFLD_CMD) | FW_CMD_REQUEST | - FW_CMD_WRITE | FW_CMD_EXEC | - FW_EQ_OFLD_CMD_PFN(adap->fn) | - FW_EQ_OFLD_CMD_VFN(0)); - c.alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_ALLOC | - FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); - c.fetchszm_to_iqid = htonl(FW_EQ_OFLD_CMD_HOSTFCMODE(2) | - FW_EQ_OFLD_CMD_PCIECHN(pi->tx_chan) | - FW_EQ_OFLD_CMD_FETCHRO(1) | - FW_EQ_OFLD_CMD_IQID(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_OFLD_CMD_FBMIN(2) | - FW_EQ_OFLD_CMD_FBMAX(3) | - FW_EQ_OFLD_CMD_CIDXFTHRESH(5) | - FW_EQ_OFLD_CMD_EQSIZE(nentries)); + c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_OFLD_CMD) | FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | FW_CMD_EXEC_F | + FW_EQ_OFLD_CMD_PFN_V(adap->pf) | + FW_EQ_OFLD_CMD_VFN_V(0)); + c.alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_ALLOC_F | + FW_EQ_OFLD_CMD_EQSTART_F | FW_LEN16(c)); + c.fetchszm_to_iqid = + htonl(FW_EQ_OFLD_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = + htonl(FW_EQ_OFLD_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | + FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | + FW_EQ_OFLD_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); - ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); + ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c); if (ret) { kfree(txq->q.sdesc); txq->q.sdesc = NULL; @@ -2359,7 +2792,7 @@ return ret; } - init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_GET(ntohl(c.eqid_pkd))); + init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_G(ntohl(c.eqid_pkd))); txq->adap = adap; skb_queue_head_init(&txq->sendq); tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq); @@ -2387,10 +2820,11 @@ unsigned int fl_id = fl ? fl->cntxt_id : 0xffff; adap->sge.ingr_map[rq->cntxt_id - adap->sge.ingr_start] = NULL; - t4_iq_free(adap, adap->fn, adap->fn, 0, FW_IQ_TYPE_FL_INT_CAP, + t4_iq_free(adap, adap->mbox, adap->pf, 0, FW_IQ_TYPE_FL_INT_CAP, rq->cntxt_id, fl_id, 0xffff); dma_free_coherent(adap->pdev_dev, (rq->size + 1) * rq->iqe_len, rq->desc, rq->phys_addr); + napi_hash_del(&rq->napi); netif_napi_del(&rq->napi); rq->netdev = NULL; rq->cntxt_id = rq->abs_id = 0; @@ -2408,6 +2842,22 @@ } /** + * t4_free_ofld_rxqs - free a block of consecutive Rx queues + * @adap: the adapter + * @n: number of queues + * @q: pointer to first queue + * + * Release the resources of a consecutive block of offload Rx queues. + */ +void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q) +{ + for ( ; n; n--, q++) + if (q->rspq.desc) + free_rspq_fl(adap, &q->rspq, + q->fl.size ? &q->fl : NULL); +} + +/** * t4_free_sge_resources - free SGE resources * @adap: the adapter * @@ -2418,14 +2868,14 @@ int i; struct sge_eth_rxq *eq = adap->sge.ethrxq; struct sge_eth_txq *etq = adap->sge.ethtxq; - struct sge_ofld_rxq *oq = adap->sge.ofldrxq; /* clean up Ethernet Tx/Rx queues */ for (i = 0; i < adap->sge.ethqsets; i++, eq++, etq++) { if (eq->rspq.desc) - free_rspq_fl(adap, &eq->rspq, &eq->fl); + free_rspq_fl(adap, &eq->rspq, + eq->fl.size ? &eq->fl : NULL); if (etq->q.desc) { - t4_eth_eq_free(adap, adap->fn, adap->fn, 0, + t4_eth_eq_free(adap, adap->mbox, adap->pf, 0, etq->q.cntxt_id); free_tx_desc(adap, &etq->q, etq->q.in_use, true); kfree(etq->q.sdesc); @@ -2434,14 +2884,9 @@ } /* clean up RDMA and iSCSI Rx queues */ - for (i = 0; i < adap->sge.ofldqsets; i++, oq++) { - if (oq->rspq.desc) - free_rspq_fl(adap, &oq->rspq, &oq->fl); - } - for (i = 0, oq = adap->sge.rdmarxq; i < adap->sge.rdmaqs; i++, oq++) { - if (oq->rspq.desc) - free_rspq_fl(adap, &oq->rspq, &oq->fl); - } + t4_free_ofld_rxqs(adap, adap->sge.ofldqsets, adap->sge.ofldrxq); + t4_free_ofld_rxqs(adap, adap->sge.rdmaqs, adap->sge.rdmarxq); + t4_free_ofld_rxqs(adap, adap->sge.rdmaciqs, adap->sge.rdmaciq); /* clean up offload Tx queues */ for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) { @@ -2449,7 +2894,7 @@ if (q->q.desc) { tasklet_kill(&q->qresume_tsk); - t4_ofld_eq_free(adap, adap->fn, adap->fn, 0, + t4_ofld_eq_free(adap, adap->mbox, adap->pf, 0, q->q.cntxt_id); free_tx_desc(adap, &q->q, q->q.in_use, false); kfree(q->q.sdesc); @@ -2464,7 +2909,7 @@ if (cq->q.desc) { tasklet_kill(&cq->qresume_tsk); - t4_ctrl_eq_free(adap, adap->fn, adap->fn, 0, + t4_ctrl_eq_free(adap, adap->mbox, adap->pf, 0, cq->q.cntxt_id); __skb_queue_purge(&cq->sendq); free_txq(adap, &cq->q); @@ -2478,7 +2923,8 @@ free_rspq_fl(adap, &adap->sge.intrq, NULL); /* clear the reverse egress queue map */ - memset(adap->sge.egr_map, 0, sizeof(adap->sge.egr_map)); + memset(adap->sge.egr_map, 0, + adap->sge.egr_sz * sizeof(*adap->sge.egr_map)); } void t4_sge_start(struct adapter *adap) @@ -2524,24 +2970,11 @@ } /** - * t4_sge_init - initialize SGE + * t4_sge_init_soft - grab core SGE values needed by SGE code * @adap: the adapter * - * Performs SGE initialization needed every time after a chip reset. - * We do not initialize any of the queues here, instead the driver - * top-level must request them individually. - * - * Called in two different modes: - * - * 1. Perform actual hardware initialization and record hard-coded - * parameters which were used. This gets used when we're the - * Master PF and the Firmware Configuration File support didn't - * work for some reason. - * - * 2. We're not the Master PF or initialization was performed with - * a Firmware Configuration File. In this case we need to grab - * any of the SGE operating parameters that we need to have in - * order to do our job and make sure we can live with them ... + * We need to grab the SGE operating parameters that we need to have + * in order to do our job and make sure we can live with them. */ static int t4_sge_init_soft(struct adapter *adap) @@ -2556,8 +2989,8 @@ * process_responses() and that only packet data is going to the * Free Lists. */ - if ((t4_read_reg(adap, SGE_CONTROL) & RXPKTCPLMODE_MASK) != - RXPKTCPLMODE(X_RXPKTCPLMODE_SPLIT)) { + if ((t4_read_reg(adap, SGE_CONTROL_A) & RXPKTCPLMODE_F) != + RXPKTCPLMODE_V(RXPKTCPLMODE_SPLIT_X)) { dev_err(adap->pdev_dev, "bad SGE CPL MODE\n"); return -EINVAL; } @@ -2571,18 +3004,26 @@ * XXX meet our needs! */ #define READ_FL_BUF(x) \ - t4_read_reg(adap, SGE_FL_BUFFER_SIZE0+(x)*sizeof(u32)) + t4_read_reg(adap, SGE_FL_BUFFER_SIZE0_A+(x)*sizeof(u32)) fl_small_pg = READ_FL_BUF(RX_SMALL_PG_BUF); fl_large_pg = READ_FL_BUF(RX_LARGE_PG_BUF); fl_small_mtu = READ_FL_BUF(RX_SMALL_MTU_BUF); fl_large_mtu = READ_FL_BUF(RX_LARGE_MTU_BUF); + /* We only bother using the Large Page logic if the Large Page Buffer + * is larger than our Page Size Buffer. + */ + if (fl_large_pg <= fl_small_pg) + fl_large_pg = 0; + #undef READ_FL_BUF + /* The Page Size Buffer must be exactly equal to our Page Size and the + * Large Page Size Buffer should be 0 (per above) or a power of 2. + */ if (fl_small_pg != PAGE_SIZE || - (fl_large_pg != 0 && (fl_large_pg <= fl_small_pg || - (fl_large_pg & (fl_large_pg-1)) != 0))) { + (fl_large_pg & (fl_large_pg-1)) != 0) { dev_err(adap->pdev_dev, "bad SGE FL page buffer sizes [%d, %d]\n", fl_small_pg, fl_large_pg); return -EINVAL; @@ -2601,119 +3042,83 @@ * Retrieve our RX interrupt holdoff timer values and counter * threshold values from the SGE parameters. */ - timer_value_0_and_1 = t4_read_reg(adap, SGE_TIMER_VALUE_0_AND_1); - timer_value_2_and_3 = t4_read_reg(adap, SGE_TIMER_VALUE_2_AND_3); - timer_value_4_and_5 = t4_read_reg(adap, SGE_TIMER_VALUE_4_AND_5); + timer_value_0_and_1 = t4_read_reg(adap, SGE_TIMER_VALUE_0_AND_1_A); + timer_value_2_and_3 = t4_read_reg(adap, SGE_TIMER_VALUE_2_AND_3_A); + timer_value_4_and_5 = t4_read_reg(adap, SGE_TIMER_VALUE_4_AND_5_A); s->timer_val[0] = core_ticks_to_us(adap, - TIMERVALUE0_GET(timer_value_0_and_1)); + TIMERVALUE0_G(timer_value_0_and_1)); s->timer_val[1] = core_ticks_to_us(adap, - TIMERVALUE1_GET(timer_value_0_and_1)); + TIMERVALUE1_G(timer_value_0_and_1)); s->timer_val[2] = core_ticks_to_us(adap, - TIMERVALUE2_GET(timer_value_2_and_3)); + TIMERVALUE2_G(timer_value_2_and_3)); s->timer_val[3] = core_ticks_to_us(adap, - TIMERVALUE3_GET(timer_value_2_and_3)); + TIMERVALUE3_G(timer_value_2_and_3)); s->timer_val[4] = core_ticks_to_us(adap, - TIMERVALUE4_GET(timer_value_4_and_5)); + TIMERVALUE4_G(timer_value_4_and_5)); s->timer_val[5] = core_ticks_to_us(adap, - TIMERVALUE5_GET(timer_value_4_and_5)); + TIMERVALUE5_G(timer_value_4_and_5)); - ingress_rx_threshold = t4_read_reg(adap, SGE_INGRESS_RX_THRESHOLD); - s->counter_val[0] = THRESHOLD_0_GET(ingress_rx_threshold); - s->counter_val[1] = THRESHOLD_1_GET(ingress_rx_threshold); - s->counter_val[2] = THRESHOLD_2_GET(ingress_rx_threshold); - s->counter_val[3] = THRESHOLD_3_GET(ingress_rx_threshold); - - return 0; -} - -static int t4_sge_init_hard(struct adapter *adap) -{ - struct sge *s = &adap->sge; - - /* - * Set up our basic SGE mode to deliver CPL messages to our Ingress - * Queue and Packet Date to the Free List. - */ - t4_set_reg_field(adap, SGE_CONTROL, RXPKTCPLMODE_MASK, - RXPKTCPLMODE_MASK); - - /* - * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows - * and generate an interrupt when this occurs so we can recover. - */ - if (is_t4(adap->chip)) { - t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS, - V_HP_INT_THRESH(M_HP_INT_THRESH) | - V_LP_INT_THRESH(M_LP_INT_THRESH), - V_HP_INT_THRESH(dbfifo_int_thresh) | - V_LP_INT_THRESH(dbfifo_int_thresh)); - } else { - t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS, - V_LP_INT_THRESH_T5(M_LP_INT_THRESH_T5), - V_LP_INT_THRESH_T5(dbfifo_int_thresh)); - t4_set_reg_field(adap, SGE_DBFIFO_STATUS2, - V_HP_INT_THRESH_T5(M_HP_INT_THRESH_T5), - V_HP_INT_THRESH_T5(dbfifo_int_thresh)); - } - t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_ENABLE_DROP, - F_ENABLE_DROP); - - /* - * SGE_FL_BUFFER_SIZE0 (RX_SMALL_PG_BUF) is set up by - * t4_fixup_host_params(). - */ - s->fl_pg_order = FL_PG_ORDER; - if (s->fl_pg_order) - t4_write_reg(adap, - SGE_FL_BUFFER_SIZE0+RX_LARGE_PG_BUF*sizeof(u32), - PAGE_SIZE << FL_PG_ORDER); - t4_write_reg(adap, SGE_FL_BUFFER_SIZE0+RX_SMALL_MTU_BUF*sizeof(u32), - FL_MTU_SMALL_BUFSIZE(adap)); - t4_write_reg(adap, SGE_FL_BUFFER_SIZE0+RX_LARGE_MTU_BUF*sizeof(u32), - FL_MTU_LARGE_BUFSIZE(adap)); - - /* - * Note that the SGE Ingress Packet Count Interrupt Threshold and - * Timer Holdoff values must be supplied by our caller. - */ - t4_write_reg(adap, SGE_INGRESS_RX_THRESHOLD, - THRESHOLD_0(s->counter_val[0]) | - THRESHOLD_1(s->counter_val[1]) | - THRESHOLD_2(s->counter_val[2]) | - THRESHOLD_3(s->counter_val[3])); - t4_write_reg(adap, SGE_TIMER_VALUE_0_AND_1, - TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[0])) | - TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[1]))); - t4_write_reg(adap, SGE_TIMER_VALUE_2_AND_3, - TIMERVALUE2(us_to_core_ticks(adap, s->timer_val[2])) | - TIMERVALUE3(us_to_core_ticks(adap, s->timer_val[3]))); - t4_write_reg(adap, SGE_TIMER_VALUE_4_AND_5, - TIMERVALUE4(us_to_core_ticks(adap, s->timer_val[4])) | - TIMERVALUE5(us_to_core_ticks(adap, s->timer_val[5]))); + ingress_rx_threshold = t4_read_reg(adap, SGE_INGRESS_RX_THRESHOLD_A); + s->counter_val[0] = THRESHOLD_0_G(ingress_rx_threshold); + s->counter_val[1] = THRESHOLD_1_G(ingress_rx_threshold); + s->counter_val[2] = THRESHOLD_2_G(ingress_rx_threshold); + s->counter_val[3] = THRESHOLD_3_G(ingress_rx_threshold); return 0; } +/** + * t4_sge_init - initialize SGE + * @adap: the adapter + * + * Perform low-level SGE code initialization needed every time after a + * chip reset. + */ int t4_sge_init(struct adapter *adap) { struct sge *s = &adap->sge; - u32 sge_control; - int ret; + u32 sge_control, sge_control2, sge_conm_ctrl; + unsigned int ingpadboundary, ingpackboundary; + int ret, egress_threshold; /* * Ingress Padding Boundary and Egress Status Page Size are set up by * t4_fixup_host_params(). */ - sge_control = t4_read_reg(adap, SGE_CONTROL); - s->pktshift = PKTSHIFT_GET(sge_control); - s->stat_len = (sge_control & EGRSTATUSPAGESIZE_MASK) ? 128 : 64; - s->fl_align = 1 << (INGPADBOUNDARY_GET(sge_control) + - X_INGPADBOUNDARY_SHIFT); + sge_control = t4_read_reg(adap, SGE_CONTROL_A); + s->pktshift = PKTSHIFT_G(sge_control); + s->stat_len = (sge_control & EGRSTATUSPAGESIZE_F) ? 128 : 64; + + /* T4 uses a single control field to specify both the PCIe Padding and + * Packing Boundary. T5 introduced the ability to specify these + * separately. The actual Ingress Packet Data alignment boundary + * within Packed Buffer Mode is the maximum of these two + * specifications. (Note that it makes no real practical sense to + * have the Pading Boudary be larger than the Packing Boundary but you + * could set the chip up that way and, in fact, legacy T4 code would + * end doing this because it would initialize the Padding Boundary and + * leave the Packing Boundary initialized to 0 (16 bytes).) + */ + ingpadboundary = 1 << (INGPADBOUNDARY_G(sge_control) + + INGPADBOUNDARY_SHIFT_X); + if (is_t4(adap->params.chip)) { + s->fl_align = ingpadboundary; + } else { + /* T5 has a different interpretation of one of the PCIe Packing + * Boundary values. + */ + sge_control2 = t4_read_reg(adap, SGE_CONTROL2_A); + ingpackboundary = INGPACKBOUNDARY_G(sge_control2); + if (ingpackboundary == INGPACKBOUNDARY_16B_X) + ingpackboundary = 16; + else + ingpackboundary = 1 << (ingpackboundary + + INGPACKBOUNDARY_SHIFT_X); - if (adap->flags & USING_SOFT_PARAMS) - ret = t4_sge_init_soft(adap); - else - ret = t4_sge_init_hard(adap); + s->fl_align = max(ingpadboundary, ingpackboundary); + } + + ret = t4_sge_init_soft(adap); if (ret < 0) return ret; @@ -2723,15 +3128,27 @@ * SGE's Egress Congestion Threshold. If it isn't, then we can get * stuck waiting for new packets while the SGE is waiting for us to * give it more Free List entries. (Note that the SGE's Egress - * Congestion Threshold is in units of 2 Free List pointers.) + * Congestion Threshold is in units of 2 Free List pointers.) For T4, + * there was only a single field to control this. For T5 there's the + * original field which now only applies to Unpacked Mode Free List + * buffers and a new field which only applies to Packed Mode Free List + * buffers. */ - s->fl_starve_thres - = EGRTHRESHOLD_GET(t4_read_reg(adap, SGE_CONM_CTRL))*2 + 1; + sge_conm_ctrl = t4_read_reg(adap, SGE_CONM_CTRL_A); + if (is_t4(adap->params.chip)) + egress_threshold = EGRTHRESHOLD_G(sge_conm_ctrl); + else + egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl); + s->fl_starve_thres = 2*egress_threshold + 1; + t4_idma_monitor_init(adap, &s->idma_monitor); + + /* Set up timers used for recuring callbacks to process RX and TX + * administrative tasks. + */ setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap); setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap); - s->starve_thres = core_ticks_per_usec(adap) * 1000000; /* 1 s */ - s->idma_state[0] = s->idma_state[1] = 0; + spin_lock_init(&s->intrq_lock); return 0;