--- zzzz-none-000/linux-3.10.107/drivers/infiniband/hw/cxgb4/qp.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/infiniband/hw/cxgb4/qp.c 2021-02-04 17:41:59.000000000 +0000 @@ -58,6 +58,31 @@ module_param(max_fr_immd, int, 0644); MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate"); +static int alloc_ird(struct c4iw_dev *dev, u32 ird) +{ + int ret = 0; + + spin_lock_irq(&dev->lock); + if (ird <= dev->avail_ird) + dev->avail_ird -= ird; + else + ret = -ENOMEM; + spin_unlock_irq(&dev->lock); + + if (ret) + dev_warn(&dev->rdev.lldi.pdev->dev, + "device IRD resources exhausted\n"); + + return ret; +} + +static void free_ird(struct c4iw_dev *dev, int ird) +{ + spin_lock_irq(&dev->lock); + dev->avail_ird += ird; + spin_unlock_irq(&dev->lock); +} + static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) { unsigned long flag; @@ -140,6 +165,33 @@ return 0; } +/* + * Determine the BAR2 virtual address and qid. If pbar2_pa is not NULL, + * then this is a user mapping so compute the page-aligned physical address + * for mapping. + */ +void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, + enum cxgb4_bar2_qtype qtype, + unsigned int *pbar2_qid, u64 *pbar2_pa) +{ + u64 bar2_qoffset; + int ret; + + ret = cxgb4_bar2_sge_qregs(rdev->lldi.ports[0], qid, qtype, + pbar2_pa ? 1 : 0, + &bar2_qoffset, pbar2_qid); + if (ret) + return NULL; + + if (pbar2_pa) + *pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK; + + if (is_t4(rdev->lldi.adapter_type)) + return NULL; + + return rdev->bar2_kva + bar2_qoffset; +} + static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx) @@ -180,9 +232,9 @@ } /* - * RQT must be a power of 2. + * RQT must be a power of 2 and at least 16 deep. */ - wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); + wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); if (!wq->rq.rqt_hwaddr) { ret = -ENOMEM; @@ -211,15 +263,23 @@ dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = rdev->lldi.db_reg; - wq->gts = rdev->lldi.gts_reg; - if (user) { - wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + - (wq->sq.qid << rdev->qpshift); - wq->sq.udb &= PAGE_MASK; - wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + - (wq->rq.qid << rdev->qpshift); - wq->rq.udb &= PAGE_MASK; + + wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, + &wq->sq.bar2_qid, + user ? &wq->sq.bar2_pa : NULL); + wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS, + &wq->rq.bar2_qid, + user ? &wq->rq.bar2_pa : NULL); + + /* + * User mode must have bar2 access. + */ + if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) { + pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); + goto free_dma; } + wq->rdev = rdev; wq->rq.msn = 1; @@ -236,11 +296,11 @@ res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); memset(res_wr, 0, wr_len); res_wr->op_nres = cpu_to_be32( - FW_WR_OP(FW_RI_RES_WR) | - V_FW_RI_RES_WR_NRES(2) | - FW_WR_COMPL(1)); + FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(2) | + FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (unsigned long) &wr_wait; + res_wr->cookie = (uintptr_t)&wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; @@ -248,22 +308,23 @@ /* * eqsize is the number of 64B entries plus the status page size. */ - eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; + eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( - V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ - V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ - V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ - (t4_sq_onchip(&wq->sq) ? F_FW_RI_RES_WR_ONCHIP : 0) | - V_FW_RI_RES_WR_IQID(scq->cqid)); + FW_RI_RES_WR_HOSTFCMODE_V(0) | /* no host cidx updates */ + FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ + FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ + (t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_ONCHIP_F : 0) | + FW_RI_RES_WR_IQID_V(scq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( - V_FW_RI_RES_WR_DCAEN(0) | - V_FW_RI_RES_WR_DCACPU(0) | - V_FW_RI_RES_WR_FBMIN(2) | - V_FW_RI_RES_WR_FBMAX(2) | - V_FW_RI_RES_WR_CIDXFTHRESHO(0) | - V_FW_RI_RES_WR_CIDXFTHRESH(0) | - V_FW_RI_RES_WR_EQSIZE(eqsize)); + FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(2) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); res++; @@ -273,20 +334,21 @@ /* * eqsize is the number of 64B entries plus the status page size. */ - eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; + eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( - V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ - V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ - V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ - V_FW_RI_RES_WR_IQID(rcq->cqid)); + FW_RI_RES_WR_HOSTFCMODE_V(0) | /* no host cidx updates */ + FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ + FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ + FW_RI_RES_WR_IQID_V(rcq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( - V_FW_RI_RES_WR_DCAEN(0) | - V_FW_RI_RES_WR_DCACPU(0) | - V_FW_RI_RES_WR_FBMIN(2) | - V_FW_RI_RES_WR_FBMAX(2) | - V_FW_RI_RES_WR_CIDXFTHRESHO(0) | - V_FW_RI_RES_WR_CIDXFTHRESH(0) | - V_FW_RI_RES_WR_EQSIZE(eqsize)); + FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(2) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); @@ -299,9 +361,9 @@ if (ret) goto free_dma; - PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", + PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n", __func__, wq->sq.qid, wq->rq.qid, wq->db, - (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); + wq->sq.bar2_va, wq->rq.bar2_va); return 0; free_dma: @@ -406,25 +468,27 @@ case IB_WR_SEND: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.sendop_pkd = cpu_to_be32( - V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE)); + FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE)); else wqe->send.sendop_pkd = cpu_to_be32( - V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND)); + FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND)); wqe->send.stag_inv = 0; break; case IB_WR_SEND_WITH_INV: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.sendop_pkd = cpu_to_be32( - V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE_INV)); + FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE_INV)); else wqe->send.sendop_pkd = cpu_to_be32( - V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_INV)); + FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_INV)); wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); break; default: return -EINVAL; } + wqe->send.r3 = 0; + wqe->send.r4 = 0; plen = 0; if (wr->num_sge) { @@ -468,8 +532,8 @@ if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; wqe->write.r2 = 0; - wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); - wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->num_sge) { if (wr->send_flags & IB_SEND_INLINE) { ret = build_immd(sq, wqe->write.u.immd_src, wr, @@ -506,10 +570,10 @@ if (wr->num_sge > 1) return -EINVAL; if (wr->num_sge) { - wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey); - wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr + wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr >> 32)); - wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr); + wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr); wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr @@ -545,46 +609,41 @@ return 0; } -static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16, u8 t5dev) +static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, + struct ib_reg_wr *wr, u8 *len16, u8 t5dev) { - + struct c4iw_mr *mhp = to_c4iw_mr(wr->mr); struct fw_ri_immd *imdp; __be64 *p; int i; - int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); + int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32); int rem; - if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH) + if (mhp->mpl_len > t4_max_fr_depth(use_dsgl)) return -EINVAL; wqe->fr.qpbinde_to_dcacpu = 0; - wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12; + wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12; wqe->fr.addr_type = FW_RI_VA_BASED_TO; - wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags); + wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access); wqe->fr.len_hi = 0; - wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length); - wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey); - wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); - wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & + wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length); + wqe->fr.stag = cpu_to_be32(wr->key); + wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff); if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { - struct c4iw_fr_page_list *c4pl = - to_c4iw_fr_page_list(wr->wr.fast_reg.page_list); struct fw_ri_dsgl *sglp; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - wr->wr.fast_reg.page_list->page_list[i] = (__force u64) - cpu_to_be64((u64) - wr->wr.fast_reg.page_list->page_list[i]); - } + for (i = 0; i < mhp->mpl_len; i++) + mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]); sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); sglp->op = FW_RI_DATA_DSGL; sglp->r1 = 0; sglp->nsge = cpu_to_be16(1); - sglp->addr0 = cpu_to_be64(c4pl->dma_addr); + sglp->addr0 = cpu_to_be64(mhp->mpl_addr); sglp->len0 = cpu_to_be32(pbllen); *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); @@ -596,9 +655,8 @@ imdp->immdlen = cpu_to_be32(pbllen); p = (__be64 *)(imdp + 1); rem = pbllen; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - *p = cpu_to_be64( - (u64)wr->wr.fast_reg.page_list->page_list[i]); + for (i = 0; i < mhp->mpl_len; i++) { + *p = cpu_to_be64((u64)mhp->mpl[i]); rem -= sizeof(*p); if (++p == (__be64 *)&sq->queue[sq->size]) p = (__be64 *)sq->queue; @@ -638,6 +696,46 @@ wake_up(&(to_c4iw_qp(qp)->wait)); } +static void add_to_fc_list(struct list_head *head, struct list_head *entry) +{ + if (list_empty(entry)) + list_add_tail(entry, head); +} + +static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) + t4_ring_sq_db(&qhp->wq, inc, NULL); + else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.sq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&qhp->rhp->lock, flags); + return 0; +} + +static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) + t4_ring_rq_db(&qhp->wq, inc, NULL); + else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.rq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&qhp->rhp->lock, flags); + return 0; +} + int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -646,7 +744,7 @@ enum fw_wr_opcodes fw_opcode = 0; enum fw_ri_wr_flags fw_flags; struct c4iw_qp *qhp; - union t4_wr *wqe; + union t4_wr *wqe = NULL; u32 num_wrs; struct t4_swsqe *swsqe; unsigned long flag; @@ -675,7 +773,7 @@ fw_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) + if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all) fw_flags |= FW_RI_COMPLETION_FLAG; swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; switch (wr->opcode) { @@ -710,13 +808,13 @@ if (!qhp->wq.sq.oldest_read) qhp->wq.sq.oldest_read = swsqe; break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16, - is_t5( - qhp->rhp->rdev.lldi.adapter_type) ? - 1 : 0); + err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16, + is_t5( + qhp->rhp->rdev.lldi.adapter_type) ? + 1 : 0); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) @@ -736,8 +834,15 @@ } swsqe->idx = qhp->wq.sq.pidx; swsqe->complete = 0; - swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED); + swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + swsqe->flushed = 0; swsqe->wr_id = wr->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = cxgb4_read_sge_timestamp( + qhp->rhp->rdev.lldi.ports[0]); + getnstimeofday(&swsqe->host_ts); + } init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); @@ -749,9 +854,13 @@ t4_sq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } - if (t4_wq_db_enabled(&qhp->wq)) - t4_ring_sq_db(&qhp->wq, idx); - spin_unlock_irqrestore(&qhp->lock, flag); + if (!qhp->rhp->rdev.status_page->db_off) { + t4_ring_sq_db(&qhp->wq, idx, wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_sq_db(qhp, idx); + } return err; } @@ -760,7 +869,7 @@ { int err = 0; struct c4iw_qp *qhp; - union t4_recv_wr *wqe; + union t4_recv_wr *wqe = NULL; u32 num_wrs; u8 len16 = 0; unsigned long flag; @@ -796,6 +905,13 @@ } qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id; + if (c4iw_wr_log) { + qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts = + cxgb4_read_sge_timestamp( + qhp->rhp->rdev.lldi.ports[0]); + getnstimeofday( + &qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_ts); + } wqe->recv.opcode = FW_RI_RECV_WR; wqe->recv.r1 = 0; @@ -811,9 +927,13 @@ wr = wr->next; num_wrs--; } - if (t4_wq_db_enabled(&qhp->wq)) - t4_ring_rq_db(&qhp->wq, idx); - spin_unlock_irqrestore(&qhp->lock, flag); + if (!qhp->rhp->rdev.status_page->db_off) { + t4_ring_rq_db(&qhp->wq, idx, wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_rq_db(qhp, idx); + } return err; } @@ -975,10 +1095,10 @@ wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof *wqe); - wqe->op_compl = cpu_to_be32(FW_WR_OP(FW_RI_INIT_WR)); + wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR)); wqe->flowid_len16 = cpu_to_be32( - FW_WR_FLOWID(qhp->ep->hwtid) | - FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); + FW_WR_FLOWID_V(qhp->ep->hwtid) | + FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); wqe->u.terminate.type = FW_RI_TYPE_TERMINATE; wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term); @@ -998,7 +1118,7 @@ struct c4iw_cq *schp) { int count; - int flushed; + int rq_flushed, sq_flushed; unsigned long flag; PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); @@ -1006,29 +1126,50 @@ /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&rchp->lock, flag); spin_lock(&qhp->lock); - c4iw_flush_hw_cq(&rchp->cq); + + if (qhp->wq.flushed) { + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); + return; + } + qhp->wq.flushed = 1; + + c4iw_flush_hw_cq(rchp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, flag); - if (flushed) { - spin_lock_irqsave(&rchp->comp_handler_lock, flag); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); - } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); - c4iw_flush_hw_cq(&schp->cq); - c4iw_count_scqes(&schp->cq, &qhp->wq, &count); - flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); + if (schp != rchp) + c4iw_flush_hw_cq(schp); + sq_flushed = c4iw_flush_sq(qhp); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); - if (flushed) { - spin_lock_irqsave(&schp->comp_handler_lock, flag); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + + if (schp == rchp) { + if (t4_clear_cq_armed(&rchp->cq) && + (rq_flushed || sq_flushed)) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + } else { + if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + if (t4_clear_cq_armed(&schp->cq) && sq_flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } } } @@ -1037,11 +1178,11 @@ struct c4iw_cq *rchp, *schp; unsigned long flag; - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); + rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + schp = to_c4iw_cq(qhp->ibqp.send_cq); + t4_set_wq_in_error(&qhp->wq); if (qhp->ibqp.uobject) { - t4_set_wq_in_error(&qhp->wq); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); @@ -1076,12 +1217,12 @@ wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof *wqe); wqe->op_compl = cpu_to_be32( - FW_WR_OP(FW_RI_INIT_WR) | - FW_WR_COMPL(1)); + FW_WR_OP_V(FW_RI_INIT_WR) | + FW_WR_COMPL_F); wqe->flowid_len16 = cpu_to_be32( - FW_WR_FLOWID(ep->hwtid) | - FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); - wqe->cookie = (unsigned long) &ep->com.wr_wait; + FW_WR_FLOWID_V(ep->hwtid) | + FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); + wqe->cookie = (uintptr_t)&ep->com.wr_wait; wqe->u.fini.type = FW_RI_TYPE_FINI; ret = c4iw_ofld_send(&rhp->rdev, skb); @@ -1126,29 +1267,37 @@ int ret; struct sk_buff *skb; - PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, - qhp->ep->hwtid); + PDBG("%s qhp %p qid 0x%x tid %u ird %u ord %u\n", __func__, qhp, + qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord); skb = alloc_skb(sizeof *wqe, GFP_KERNEL); - if (!skb) - return -ENOMEM; + if (!skb) { + ret = -ENOMEM; + goto out; + } + ret = alloc_ird(rhp, qhp->attr.max_ird); + if (ret) { + qhp->attr.max_ird = 0; + kfree_skb(skb); + goto out; + } set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof *wqe); wqe->op_compl = cpu_to_be32( - FW_WR_OP(FW_RI_INIT_WR) | - FW_WR_COMPL(1)); + FW_WR_OP_V(FW_RI_INIT_WR) | + FW_WR_COMPL_F); wqe->flowid_len16 = cpu_to_be32( - FW_WR_FLOWID(qhp->ep->hwtid) | - FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); + FW_WR_FLOWID_V(qhp->ep->hwtid) | + FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); - wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait; + wqe->cookie = (uintptr_t)&qhp->ep->com.wr_wait; wqe->u.init.type = FW_RI_TYPE_INIT; wqe->u.init.mpareqbit_p2ptype = - V_FW_RI_WR_MPAREQBIT(qhp->attr.mpa_attr.initiator) | - V_FW_RI_WR_P2PTYPE(qhp->attr.mpa_attr.p2p_type); + FW_RI_WR_MPAREQBIT_V(qhp->attr.mpa_attr.initiator) | + FW_RI_WR_P2PTYPE_V(qhp->attr.mpa_attr.p2p_type); wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE; if (qhp->attr.mpa_attr.recv_marker_enabled) wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE; @@ -1182,44 +1331,19 @@ ret = c4iw_ofld_send(&rhp->rdev, skb); if (ret) - goto out; + goto err1; ret = c4iw_wait_for_reply(&rhp->rdev, &qhp->ep->com.wr_wait, qhp->ep->hwtid, qhp->wq.sq.qid, __func__); + if (!ret) + goto out; +err1: + free_ird(rhp, qhp->attr.max_ird); out: PDBG("%s ret %d\n", __func__, ret); return ret; } -/* - * Called by the library when the qp has user dbs disabled due to - * a DB_FULL condition. This function will single-thread all user - * DB rings to avoid overflowing the hw db-fifo. - */ -static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc) -{ - int delay = db_delay_usecs; - - mutex_lock(&qhp->rhp->db_mutex); - do { - - /* - * The interrupt threshold is dbfifo_int_thresh << 6. So - * make sure we don't cross that and generate an interrupt. - */ - if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < - (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) { - writel(QID(qid) | PIDX(inc), qhp->wq.db); - break; - } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(delay)); - delay = min(delay << 1, 2000); - } while (1); - mutex_unlock(&qhp->rhp->db_mutex); - return 0; -} - int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, enum c4iw_qp_attr_mask mask, struct c4iw_qp_attributes *attrs, @@ -1259,7 +1383,7 @@ newattr.max_ord = attrs->max_ord; } if (mask & C4IW_QP_ATTR_MAX_IRD) { - if (attrs->max_ird > c4iw_max_read_depth) { + if (attrs->max_ird > cur_max_read_depth(rhp)) { ret = -EINVAL; goto out; } @@ -1269,11 +1393,11 @@ } if (mask & C4IW_QP_ATTR_SQ_DB) { - ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc); + ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc); goto out; } if (mask & C4IW_QP_ATTR_RQ_DB) { - ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc); + ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc); goto out; } @@ -1323,6 +1447,7 @@ switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); + t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { @@ -1330,28 +1455,30 @@ disconnect = 1; c4iw_get_ep(&qhp->ep->com); } - if (qhp->ibqp.uobject) - t4_set_wq_in_error(&qhp->wq); ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; break; case C4IW_QP_STATE_TERMINATE: + t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_TERMINATE); qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; - if (qhp->ibqp.uobject) - t4_set_wq_in_error(&qhp->wq); ep = qhp->ep; - if (!internal) + if (!internal) { + c4iw_get_ep(&qhp->ep->com); terminate = 1; - disconnect = 1; - c4iw_get_ep(&qhp->ep->com); + disconnect = 1; + } else { + terminate = qhp->attr.send_term; + ret = rdma_fini(rhp, qhp, ep); + if (ret) + goto err; + } break; case C4IW_QP_STATE_ERROR: + t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_ERROR); - if (qhp->ibqp.uobject) - t4_set_wq_in_error(&qhp->wq); if (!internal) { abort = 1; disconnect = 1; @@ -1424,9 +1551,9 @@ set_state(qhp, C4IW_QP_STATE_ERROR); free = 1; abort = 1; - wake_up(&qhp->wait); BUG_ON(!ep); flush_qp(qhp); + wake_up(&qhp->wait); out: mutex_unlock(&qhp->mutex); @@ -1454,14 +1581,6 @@ return ret; } -static int enable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_enable_wq_db(&qp->wq); - return 0; -} - int c4iw_destroy_qp(struct ib_qp *ib_qp) { struct c4iw_dev *rhp; @@ -1479,22 +1598,16 @@ c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); - spin_lock_irq(&rhp->lock); - remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid); - rhp->qpcnt--; - BUG_ON(rhp->qpcnt < 0); - if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) { - rhp->rdev.stats.db_state_transitions++; - rhp->db_state = NORMAL; - idr_for_each(&rhp->qpidr, enable_qp_db, NULL); - } - if (db_coalescing_threshold >= 0) - if (rhp->qpcnt <= db_coalescing_threshold) - cxgb4_enable_db_coalescing(rhp->rdev.lldi.ports[0]); - spin_unlock_irq(&rhp->lock); + remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); + spin_lock_irq(&rhp->lock); + if (!list_empty(&qhp->db_fc_entry)) + list_del_init(&qhp->db_fc_entry); + spin_unlock_irq(&rhp->lock); + free_ird(rhp, qhp->attr.max_ird); + ucontext = ib_qp->uobject ? to_c4iw_ucontext(ib_qp->uobject->context) : NULL; destroy_qp(&rhp->rdev, &qhp->wq, @@ -1505,14 +1618,6 @@ return 0; } -static int disable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_disable_wq_db(&qp->wq); - return 0; -} - struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { @@ -1522,7 +1627,7 @@ struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; - int sqsize, rqsize; + unsigned int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; @@ -1542,33 +1647,38 @@ if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) return ERR_PTR(-EINVAL); - rqsize = roundup(attrs->cap.max_recv_wr + 1, 16); - if (rqsize > T4_MAX_RQ_SIZE) + if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) return ERR_PTR(-E2BIG); + rqsize = attrs->cap.max_recv_wr + 1; + if (rqsize < 8) + rqsize = 8; - sqsize = roundup(attrs->cap.max_send_wr + 1, 16); - if (sqsize > T4_MAX_SQ_SIZE) + if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size) return ERR_PTR(-E2BIG); + sqsize = attrs->cap.max_send_wr + 1; + if (sqsize < 8) + sqsize = 8; ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; - qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) return ERR_PTR(-ENOMEM); qhp->wq.sq.size = sqsize; - qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue; + qhp->wq.sq.memsize = + (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64); + qhp->wq.sq.flush_cidx = -1; qhp->wq.rq.size = rqsize; - qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue; + qhp->wq.rq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.rq.queue); if (ucontext) { qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE); } - PDBG("%s sqsize %u sqmemsize %zu rqsize %u rqmemsize %zu\n", - __func__, sqsize, qhp->wq.sq.memsize, rqsize, qhp->wq.rq.memsize); - ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); if (ret) @@ -1592,27 +1702,15 @@ qhp->attr.enable_rdma_read = 1; qhp->attr.enable_rdma_write = 1; qhp->attr.enable_bind = 1; - qhp->attr.max_ord = 1; - qhp->attr.max_ird = 1; + qhp->attr.max_ord = 0; + qhp->attr.max_ird = 0; + qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); - spin_lock_irq(&rhp->lock); - if (rhp->db_state != NORMAL) - t4_disable_wq_db(&qhp->wq); - rhp->qpcnt++; - if (rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) { - rhp->rdev.stats.db_state_transitions++; - rhp->db_state = FLOW_CONTROL; - idr_for_each(&rhp->qpidr, disable_qp_db, NULL); - } - if (db_coalescing_threshold >= 0) - if (rhp->qpcnt > db_coalescing_threshold) - cxgb4_disable_db_coalescing(rhp->rdev.lldi.ports[0]); - ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); - spin_unlock_irq(&rhp->lock); + ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); if (ret) goto err2; @@ -1657,6 +1755,8 @@ if (mm5) { uresp.ma_sync_key = ucontext->key; ucontext->key += PAGE_SIZE; + } else { + uresp.ma_sync_key = 0; } uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; @@ -1679,26 +1779,29 @@ mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); insert_mmap(ucontext, mm2); mm3->key = uresp.sq_db_gts_key; - mm3->addr = qhp->wq.sq.udb; + mm3->addr = (__force unsigned long)qhp->wq.sq.bar2_pa; mm3->len = PAGE_SIZE; insert_mmap(ucontext, mm3); mm4->key = uresp.rq_db_gts_key; - mm4->addr = qhp->wq.rq.udb; + mm4->addr = (__force unsigned long)qhp->wq.rq.bar2_pa; mm4->len = PAGE_SIZE; insert_mmap(ucontext, mm4); if (mm5) { mm5->key = uresp.ma_sync_key; mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0) - + A_PCIE_MA_SYNC) & PAGE_MASK; + + PCIE_MA_SYNC_A) & PAGE_MASK; mm5->len = PAGE_SIZE; insert_mmap(ucontext, mm5); } } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); - PDBG("%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x\n", - __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, - qhp->wq.sq.qid); + INIT_LIST_HEAD(&qhp->db_fc_entry); + PDBG("%s sq id %u size %u memsize %zu num_entries %u " + "rq id %u size %u memsize %zu num_entries %u\n", __func__, + qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize, + attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size, + qhp->wq.rq.memsize, attrs->cap.max_recv_wr); return &qhp->ibqp; err8: kfree(mm5); @@ -1759,11 +1862,15 @@ /* * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for * ringing the queue db when we're in DB_FULL mode. + * Only allow this on T4 devices. */ attrs.sq_db_inc = attr->sq_psn; attrs.rq_db_inc = attr->rq_psn; mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; + if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && + (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) + return -EINVAL; return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); } @@ -1782,5 +1889,11 @@ memset(attr, 0, sizeof *attr); memset(init_attr, 0, sizeof *init_attr); attr->qp_state = to_ib_qp_state(qhp->attr.state); + init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; + init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; + init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; + init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges; + init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE; + init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; }