--- zzzz-none-000/linux-3.10.107/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c 2021-02-04 17:41:59.000000000 +0000 @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #include #include @@ -31,6 +46,7 @@ #include #include #include +#include #include "ocrdma.h" #include "ocrdma_hw.h" @@ -49,6 +65,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *sgid) { + int ret; struct ocrdma_dev *dev; dev = get_ocrdma_dev(ibdev); @@ -56,39 +73,64 @@ if (index >= OCRDMA_MAX_SGID) return -EINVAL; - memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid)); + ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL); + if (ret == -EAGAIN) { + memcpy(sgid, &zgid, sizeof(*sgid)); + return 0; + } + return ret; +} + +int ocrdma_add_gid(struct ib_device *device, + u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + void **context) { + return 0; +} + +int ocrdma_del_gid(struct ib_device *device, + u8 port_num, + unsigned int index, + void **context) { return 0; } -int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) +int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, + struct ib_udata *uhw) { struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + memset(attr, 0, sizeof *attr); memcpy(&attr->fw_ver, &dev->attr.fw_ver[0], min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver))); ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid); - attr->max_mr_size = ~0ull; + attr->max_mr_size = dev->attr.max_mr_size; attr->page_size_cap = 0xffff000; attr->vendor_id = dev->nic_info.pdev->vendor; attr->vendor_part_id = dev->nic_info.pdev->device; - attr->hw_ver = 0; + attr->hw_ver = dev->asic_id; attr->max_qp = dev->attr.max_qp; - attr->max_ah = dev->attr.max_qp; + attr->max_ah = OCRDMA_MAX_AH; attr->max_qp_wr = dev->attr.max_wqe; attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_LOCAL_DMA_LKEY; + IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_MGT_EXTENSIONS; attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge); attr->max_sge_rd = 0; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; - attr->max_mw = 0; + attr->max_mw = dev->attr.max_mw; attr->max_pd = dev->attr.max_pd; attr->atomic_cap = 0; attr->max_fmr = 0; @@ -96,15 +138,71 @@ attr->max_qp_rd_atom = min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp); attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp; - attr->max_srq = (dev->attr.max_qp - 1); + attr->max_srq = dev->attr.max_srq; attr->max_srq_sge = dev->attr.max_srq_sge; attr->max_srq_wr = dev->attr.max_rqe; attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay; - attr->max_fast_reg_page_list_len = 0; + attr->max_fast_reg_page_list_len = dev->attr.max_pages_per_frmr; attr->max_pkeys = 1; return 0; } +struct net_device *ocrdma_get_netdev(struct ib_device *ibdev, u8 port_num) +{ + struct ocrdma_dev *dev; + struct net_device *ndev = NULL; + + rcu_read_lock(); + + dev = get_ocrdma_dev(ibdev); + if (dev) + ndev = dev->nic_info.netdev; + if (ndev) + dev_hold(ndev); + + rcu_read_unlock(); + + return ndev; +} + +static inline void get_link_speed_and_width(struct ocrdma_dev *dev, + u8 *ib_speed, u8 *ib_width) +{ + int status; + u8 speed; + + status = ocrdma_mbx_get_link_speed(dev, &speed, NULL); + if (status) + speed = OCRDMA_PHYS_LINK_SPEED_ZERO; + + switch (speed) { + case OCRDMA_PHYS_LINK_SPEED_1GBPS: + *ib_speed = IB_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + break; + + case OCRDMA_PHYS_LINK_SPEED_10GBPS: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_1X; + break; + + case OCRDMA_PHYS_LINK_SPEED_20GBPS: + *ib_speed = IB_SPEED_DDR; + *ib_width = IB_WIDTH_4X; + break; + + case OCRDMA_PHYS_LINK_SPEED_40GBPS: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_4X; + break; + + default: + /* Unsupported */ + *ib_speed = IB_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + } +} + int ocrdma_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { @@ -114,8 +212,8 @@ dev = get_ocrdma_dev(ibdev); if (port > 1) { - ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__, - dev->id, port); + pr_err("%s(%d) invalid_port=0x%x\n", __func__, + dev->id, port); return -EINVAL; } netdev = dev->nic_info.netdev; @@ -136,13 +234,14 @@ props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | - IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP; + IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | + IB_PORT_IP_BASED_GIDS; props->gid_tbl_len = OCRDMA_MAX_SGID; props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; props->qkey_viol_cntr = 0; - props->active_width = IB_WIDTH_1X; - props->active_speed = 4; + get_link_speed_and_width(dev, &props->active_speed, + &props->active_width); props->max_msg_sz = 0x80000000; props->max_vl_num = 4; return 0; @@ -155,8 +254,7 @@ dev = get_ocrdma_dev(ibdev); if (port > 1) { - ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__, - dev->id, port); + pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port); return -EINVAL; } return 0; @@ -187,7 +285,7 @@ mutex_lock(&uctx->mm_list_lock); list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { - if (len != mm->key.len || phy_addr != mm->key.phy_addr) + if (len != mm->key.len && phy_addr != mm->key.phy_addr) continue; list_del(&mm->entry); @@ -205,7 +303,7 @@ mutex_lock(&uctx->mm_list_lock); list_for_each_entry(mm, &uctx->mm_head, entry) { - if (len != mm->key.len || phy_addr != mm->key.phy_addr) + if (len != mm->key.len && phy_addr != mm->key.phy_addr) continue; found = true; @@ -215,6 +313,226 @@ return found; } + +static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool) +{ + u16 pd_bitmap_idx = 0; + const unsigned long *pd_bitmap; + + if (dpp_pool) { + pd_bitmap = dev->pd_mgr->pd_dpp_bitmap; + pd_bitmap_idx = find_first_zero_bit(pd_bitmap, + dev->pd_mgr->max_dpp_pd); + __set_bit(pd_bitmap_idx, dev->pd_mgr->pd_dpp_bitmap); + dev->pd_mgr->pd_dpp_count++; + if (dev->pd_mgr->pd_dpp_count > dev->pd_mgr->pd_dpp_thrsh) + dev->pd_mgr->pd_dpp_thrsh = dev->pd_mgr->pd_dpp_count; + } else { + pd_bitmap = dev->pd_mgr->pd_norm_bitmap; + pd_bitmap_idx = find_first_zero_bit(pd_bitmap, + dev->pd_mgr->max_normal_pd); + __set_bit(pd_bitmap_idx, dev->pd_mgr->pd_norm_bitmap); + dev->pd_mgr->pd_norm_count++; + if (dev->pd_mgr->pd_norm_count > dev->pd_mgr->pd_norm_thrsh) + dev->pd_mgr->pd_norm_thrsh = dev->pd_mgr->pd_norm_count; + } + return pd_bitmap_idx; +} + +static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id, + bool dpp_pool) +{ + u16 pd_count; + u16 pd_bit_index; + + pd_count = dpp_pool ? dev->pd_mgr->pd_dpp_count : + dev->pd_mgr->pd_norm_count; + if (pd_count == 0) + return -EINVAL; + + if (dpp_pool) { + pd_bit_index = pd_id - dev->pd_mgr->pd_dpp_start; + if (pd_bit_index >= dev->pd_mgr->max_dpp_pd) { + return -EINVAL; + } else { + __clear_bit(pd_bit_index, dev->pd_mgr->pd_dpp_bitmap); + dev->pd_mgr->pd_dpp_count--; + } + } else { + pd_bit_index = pd_id - dev->pd_mgr->pd_norm_start; + if (pd_bit_index >= dev->pd_mgr->max_normal_pd) { + return -EINVAL; + } else { + __clear_bit(pd_bit_index, dev->pd_mgr->pd_norm_bitmap); + dev->pd_mgr->pd_norm_count--; + } + } + + return 0; +} + +static u8 ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id, + bool dpp_pool) +{ + int status; + + mutex_lock(&dev->dev_lock); + status = _ocrdma_pd_mgr_put_bitmap(dev, pd_id, dpp_pool); + mutex_unlock(&dev->dev_lock); + return status; +} + +static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd) +{ + u16 pd_idx = 0; + int status = 0; + + mutex_lock(&dev->dev_lock); + if (pd->dpp_enabled) { + /* try allocating DPP PD, if not available then normal PD */ + if (dev->pd_mgr->pd_dpp_count < dev->pd_mgr->max_dpp_pd) { + pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, true); + pd->id = dev->pd_mgr->pd_dpp_start + pd_idx; + pd->dpp_page = dev->pd_mgr->dpp_page_index + pd_idx; + } else if (dev->pd_mgr->pd_norm_count < + dev->pd_mgr->max_normal_pd) { + pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false); + pd->id = dev->pd_mgr->pd_norm_start + pd_idx; + pd->dpp_enabled = false; + } else { + status = -EINVAL; + } + } else { + if (dev->pd_mgr->pd_norm_count < dev->pd_mgr->max_normal_pd) { + pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false); + pd->id = dev->pd_mgr->pd_norm_start + pd_idx; + } else { + status = -EINVAL; + } + } + mutex_unlock(&dev->dev_lock); + return status; +} + +static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, + struct ocrdma_ucontext *uctx, + struct ib_udata *udata) +{ + struct ocrdma_pd *pd = NULL; + int status = 0; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + if (udata && uctx && dev->attr.max_dpp_pds) { + pd->dpp_enabled = + ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; + pd->num_dpp_qp = + pd->dpp_enabled ? (dev->nic_info.db_page_size / + dev->attr.wqe_size) : 0; + } + + if (dev->pd_mgr->pd_prealloc_valid) { + status = ocrdma_get_pd_num(dev, pd); + if (status == 0) { + return pd; + } else { + kfree(pd); + return ERR_PTR(status); + } + } + +retry: + status = ocrdma_mbx_alloc_pd(dev, pd); + if (status) { + if (pd->dpp_enabled) { + pd->dpp_enabled = false; + pd->num_dpp_qp = 0; + goto retry; + } else { + kfree(pd); + return ERR_PTR(status); + } + } + + return pd; +} + +static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx, + struct ocrdma_pd *pd) +{ + return (uctx->cntxt_pd == pd ? true : false); +} + +static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev, + struct ocrdma_pd *pd) +{ + int status = 0; + + if (dev->pd_mgr->pd_prealloc_valid) + status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled); + else + status = ocrdma_mbx_dealloc_pd(dev, pd); + + kfree(pd); + return status; +} + +static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev, + struct ocrdma_ucontext *uctx, + struct ib_udata *udata) +{ + int status = 0; + + uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata); + if (IS_ERR(uctx->cntxt_pd)) { + status = PTR_ERR(uctx->cntxt_pd); + uctx->cntxt_pd = NULL; + goto err; + } + + uctx->cntxt_pd->uctx = uctx; + uctx->cntxt_pd->ibpd.device = &dev->ibdev; +err: + return status; +} + +static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) +{ + struct ocrdma_pd *pd = uctx->cntxt_pd; + struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); + + if (uctx->pd_in_use) { + pr_err("%s(%d) Freeing in use pdid=0x%x.\n", + __func__, dev->id, pd->id); + } + uctx->cntxt_pd = NULL; + (void)_ocrdma_dealloc_pd(dev, pd); + return 0; +} + +static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) +{ + struct ocrdma_pd *pd = NULL; + + mutex_lock(&uctx->mm_list_lock); + if (!uctx->pd_in_use) { + uctx->pd_in_use = true; + pd = uctx->cntxt_pd; + } + mutex_unlock(&uctx->mm_list_lock); + + return pd; +} + +static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx) +{ + mutex_lock(&uctx->mm_list_lock); + uctx->pd_in_use = false; + mutex_unlock(&uctx->mm_list_lock); +} + struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { @@ -230,7 +548,6 @@ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return ERR_PTR(-ENOMEM); - ctx->dev = dev; INIT_LIST_HEAD(&ctx->mm_head); mutex_init(&ctx->mm_list_lock); @@ -243,18 +560,23 @@ memset(ctx->ah_tbl.va, 0, map_len); ctx->ah_tbl.len = map_len; + memset(&resp, 0, sizeof(resp)); resp.ah_tbl_len = ctx->ah_tbl.len; - resp.ah_tbl_page = ctx->ah_tbl.pa; + resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va); status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len); if (status) goto map_err; + + status = ocrdma_alloc_ucontext_pd(dev, ctx, udata); + if (status) + goto pd_err; + resp.dev_id = dev->id; resp.max_inline_data = dev->attr.max_inline_data; resp.wqe_size = dev->attr.wqe_size; resp.rqe_size = dev->attr.rqe_size; resp.dpp_wqe_size = dev->attr.wqe_size; - resp.rsvd = 0; memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver)); status = ib_copy_to_udata(udata, &resp, sizeof(resp)); @@ -263,6 +585,7 @@ return &ctx->ibucontext; cpy_err: +pd_err: ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len); map_err: dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va, @@ -273,9 +596,13 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) { + int status = 0; struct ocrdma_mm *mm, *tmp; struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); - struct pci_dev *pdev = uctx->dev->nic_info.pdev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); + struct pci_dev *pdev = dev->nic_info.pdev; + + status = ocrdma_dealloc_ucontext_pd(uctx); ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len); dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va, @@ -286,13 +613,13 @@ kfree(mm); } kfree(uctx); - return 0; + return status; } int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context); - struct ocrdma_dev *dev = ucontext->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(context->device); unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; u64 unmapped_db = (u64) dev->nic_info.unmapped_db; unsigned long len = (vma->vm_end - vma->vm_start); @@ -308,7 +635,10 @@ if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + dev->nic_info.db_total_size)) && (len <= dev->nic_info.db_page_size)) { - /* doorbell mapping */ + if (vma->vm_flags & VM_READ) + return -EPERM; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, vma->vm_page_prot); } else if (dev->nic_info.dpp_unmapped_len && @@ -316,19 +646,20 @@ (vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr + dev->nic_info.dpp_unmapped_len)) && (len <= dev->nic_info.dpp_unmapped_len)) { - /* dpp area mapping */ + if (vma->vm_flags & VM_READ) + return -EPERM; + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, vma->vm_page_prot); } else { - /* queue memory mapping */ status = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, vma->vm_page_prot); } return status; } -static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd, +static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, struct ib_ucontext *ib_ctx, struct ib_udata *udata) { @@ -339,21 +670,21 @@ struct ocrdma_alloc_pd_uresp rsp; struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); + memset(&rsp, 0, sizeof(rsp)); rsp.id = pd->id; rsp.dpp_enabled = pd->dpp_enabled; - db_page_addr = pd->dev->nic_info.unmapped_db + - (pd->id * pd->dev->nic_info.db_page_size); - db_page_size = pd->dev->nic_info.db_page_size; + db_page_addr = ocrdma_get_db_addr(dev, pd->id); + db_page_size = dev->nic_info.db_page_size; status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size); if (status) return status; if (pd->dpp_enabled) { - dpp_page_addr = pd->dev->nic_info.dpp_unmapped_addr + - (pd->id * OCRDMA_DPP_PAGE_SIZE); + dpp_page_addr = dev->nic_info.dpp_unmapped_addr + + (pd->id * PAGE_SIZE); status = ocrdma_add_mmap(uctx, dpp_page_addr, - OCRDMA_DPP_PAGE_SIZE); + PAGE_SIZE); if (status) goto dpp_map_err; rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr); @@ -369,7 +700,7 @@ ucopy_err: if (pd->dpp_enabled) - ocrdma_del_mmap(pd->uctx, dpp_page_addr, OCRDMA_DPP_PAGE_SIZE); + ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE); dpp_map_err: ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size); return status; @@ -381,84 +712,74 @@ { struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); struct ocrdma_pd *pd; + struct ocrdma_ucontext *uctx = NULL; int status; + u8 is_uctx_pd = false; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - pd->dev = dev; if (udata && context) { - pd->dpp_enabled = (dev->nic_info.dev_family == - OCRDMA_GEN2_FAMILY) ? true : false; - pd->num_dpp_qp = - pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; + uctx = get_ocrdma_ucontext(context); + pd = ocrdma_get_ucontext_pd(uctx); + if (pd) { + is_uctx_pd = true; + goto pd_mapping; + } } - status = ocrdma_mbx_alloc_pd(dev, pd); - if (status) { - kfree(pd); - return ERR_PTR(status); + + pd = _ocrdma_alloc_pd(dev, uctx, udata); + if (IS_ERR(pd)) { + status = PTR_ERR(pd); + goto exit; } - atomic_set(&pd->use_cnt, 0); +pd_mapping: if (udata && context) { - status = ocrdma_copy_pd_uresp(pd, context, udata); + status = ocrdma_copy_pd_uresp(dev, pd, context, udata); if (status) goto err; } return &pd->ibpd; err: - ocrdma_dealloc_pd(&pd->ibpd); + if (is_uctx_pd) { + ocrdma_release_ucontext_pd(uctx); + } else { + status = _ocrdma_dealloc_pd(dev, pd); + } +exit: return ERR_PTR(status); } int ocrdma_dealloc_pd(struct ib_pd *ibpd) { struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = pd->dev; - int status; + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + struct ocrdma_ucontext *uctx = NULL; + int status = 0; u64 usr_db; - if (atomic_read(&pd->use_cnt)) { - ocrdma_err("%s(%d) pd=0x%x is in use.\n", - __func__, dev->id, pd->id); - status = -EFAULT; - goto dealloc_err; - } - status = ocrdma_mbx_dealloc_pd(dev, pd); - if (pd->uctx) { + uctx = pd->uctx; + if (uctx) { u64 dpp_db = dev->nic_info.dpp_unmapped_addr + - (pd->id * OCRDMA_DPP_PAGE_SIZE); + (pd->id * PAGE_SIZE); if (pd->dpp_enabled) - ocrdma_del_mmap(pd->uctx, dpp_db, OCRDMA_DPP_PAGE_SIZE); - usr_db = dev->nic_info.unmapped_db + - (pd->id * dev->nic_info.db_page_size); + ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE); + usr_db = ocrdma_get_db_addr(dev, pd->id); ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size); + + if (is_ucontext_pd(uctx, pd)) { + ocrdma_release_ucontext_pd(uctx); + return status; + } } - kfree(pd); -dealloc_err: + status = _ocrdma_dealloc_pd(dev, pd); return status; } -static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd, - int acc, u32 num_pbls, - u32 addr_check) +static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr, + u32 pdid, int acc, u32 num_pbls, u32 addr_check) { int status; - struct ocrdma_mr *mr; - struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = pd->dev; - if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { - ocrdma_err("%s(%d) leaving err, invalid access rights\n", - __func__, dev->id); - return ERR_PTR(-EINVAL); - } - - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return ERR_PTR(-ENOMEM); - mr->hwmr.dev = dev; mr->hwmr.fr_mr = 0; mr->hwmr.local_rd = 1; mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; @@ -468,26 +789,38 @@ mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; mr->hwmr.num_pbls = num_pbls; - status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pd->id, addr_check); - if (status) { - kfree(mr); - return ERR_PTR(-ENOMEM); - } - mr->pd = pd; - atomic_inc(&pd->use_cnt); + status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check); + if (status) + return status; + mr->ibmr.lkey = mr->hwmr.lkey; if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) mr->ibmr.rkey = mr->hwmr.lkey; - return mr; + return 0; } struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc) { + int status; struct ocrdma_mr *mr; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + + if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { + pr_err("%s err, invalid access rights\n", __func__); + return ERR_PTR(-EINVAL); + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); - mr = ocrdma_alloc_lkey(ibpd, acc, 0, OCRDMA_ADDR_CHECK_DISABLE); - if (IS_ERR(mr)) - return ERR_CAST(mr); + status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0, + OCRDMA_ADDR_CHECK_DISABLE); + if (status) { + kfree(mr); + return ERR_PTR(status); + } return &mr->ibmr; } @@ -511,7 +844,8 @@ } } -static int ocrdma_get_pbl_info(struct ocrdma_mr *mr, u32 num_pbes) +static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr, + u32 num_pbes) { u32 num_pbls = 0; u32 idx = 0; @@ -527,7 +861,7 @@ num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64))); num_pbls = num_pbls / (pbl_size / sizeof(u64)); idx++; - } while (num_pbls >= mr->hwmr.dev->attr.max_num_mr_pbl); + } while (num_pbls >= dev->attr.max_num_mr_pbl); mr->hwmr.num_pbes = num_pbes; mr->hwmr.num_pbls = num_pbls; @@ -568,10 +902,10 @@ u32 num_pbes) { struct ocrdma_pbe *pbe; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; struct ib_umem *umem = mr->umem; - int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0; + int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0; if (!mr->hwmr.num_pbes) return; @@ -581,39 +915,37 @@ shift = ilog2(umem->page_size); - list_for_each_entry(chunk, &umem->chunk_list, list) { - /* get all the dma regions from the chunk. */ - for (i = 0; i < chunk->nmap; i++) { - pages = sg_dma_len(&chunk->page_list[i]) >> shift; - for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { - /* store the page address in pbe */ - pbe->pa_lo = - cpu_to_le32(sg_dma_address - (&chunk->page_list[i]) + - (umem->page_size * pg_cnt)); - pbe->pa_hi = - cpu_to_le32(upper_32_bits - ((sg_dma_address - (&chunk->page_list[i]) + - umem->page_size * pg_cnt))); - pbe_cnt += 1; - total_num_pbes += 1; - pbe++; - - /* if done building pbes, issue the mbx cmd. */ - if (total_num_pbes == num_pbes) - return; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + pages = sg_dma_len(sg) >> shift; + for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { + /* store the page address in pbe */ + pbe->pa_lo = + cpu_to_le32(sg_dma_address + (sg) + + (umem->page_size * pg_cnt)); + pbe->pa_hi = + cpu_to_le32(upper_32_bits + ((sg_dma_address + (sg) + + umem->page_size * pg_cnt))); + pbe_cnt += 1; + total_num_pbes += 1; + pbe++; + + /* if done building pbes, issue the mbx cmd. */ + if (total_num_pbes == num_pbes) + return; - /* if the given pbl is full storing the pbes, - * move to next pbl. - */ - if (pbe_cnt == - (mr->hwmr.pbl_size / sizeof(u64))) { - pbl_tbl++; - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - pbe_cnt = 0; - } + /* if the given pbl is full storing the pbes, + * move to next pbl. + */ + if (pbe_cnt == + (mr->hwmr.pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + pbe_cnt = 0; } + } } } @@ -622,13 +954,12 @@ u64 usr_addr, int acc, struct ib_udata *udata) { int status = -ENOMEM; - struct ocrdma_dev *dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); struct ocrdma_mr *mr; struct ocrdma_pd *pd; u32 num_pbes; pd = get_ocrdma_pd(ibpd); - dev = pd->dev; if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) return ERR_PTR(-EINVAL); @@ -636,19 +967,18 @@ mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(status); - mr->hwmr.dev = dev; mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); if (IS_ERR(mr->umem)) { status = -EFAULT; goto umem_err; } num_pbes = ib_umem_page_count(mr->umem); - status = ocrdma_get_pbl_info(mr, num_pbes); + status = ocrdma_get_pbl_info(dev, mr, num_pbes); if (status) goto umem_err; mr->hwmr.pbe_size = mr->umem->page_size; - mr->hwmr.fbo = mr->umem->offset; + mr->hwmr.fbo = ib_umem_offset(mr->umem); mr->hwmr.va = usr_addr; mr->hwmr.len = len; mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; @@ -663,8 +993,6 @@ status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc); if (status) goto mbx_err; - mr->pd = pd; - atomic_inc(&pd->use_cnt); mr->ibmr.lkey = mr->hwmr.lkey; if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) mr->ibmr.rkey = mr->hwmr.lkey; @@ -681,44 +1009,49 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) { struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr); - struct ocrdma_dev *dev = mr->hwmr.dev; - int status; + struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device); - status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey); + (void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey); - if (mr->hwmr.fr_mr == 0) - ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); + kfree(mr->pages); + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); - atomic_dec(&mr->pd->use_cnt); /* it could be user registered memory. */ if (mr->umem) ib_umem_release(mr->umem); kfree(mr); - return status; + + /* Don't stop cleanup, in case FW is unresponsive */ + if (dev->mqe_ctx.fw_error_state) { + pr_err("%s(%d) fw not responding.\n", + __func__, dev->id); + } + return 0; } -static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata, +static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, + struct ib_udata *udata, struct ib_ucontext *ib_ctx) { int status; - struct ocrdma_ucontext *uctx; + struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); struct ocrdma_create_cq_uresp uresp; + memset(&uresp, 0, sizeof(uresp)); uresp.cq_id = cq->id; - uresp.page_size = cq->len; + uresp.page_size = PAGE_ALIGN(cq->len); uresp.num_pages = 1; uresp.max_hw_cqe = cq->max_hw_cqe; - uresp.page_addr[0] = cq->pa; - uresp.db_page_addr = cq->dev->nic_info.unmapped_db; - uresp.db_page_size = cq->dev->nic_info.db_page_size; + uresp.page_addr[0] = virt_to_phys(cq->va); + uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id); + uresp.db_page_size = dev->nic_info.db_page_size; uresp.phase_change = cq->phase_change ? 1 : 0; status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (status) { - ocrdma_err("%s(%d) copy error cqid=0x%x.\n", - __func__, cq->dev->id, cq->id); + pr_err("%s(%d) copy error cqid=0x%x.\n", + __func__, dev->id, cq->id); goto err; } - uctx = get_ocrdma_ucontext(ib_ctx); status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size); if (status) goto err; @@ -732,15 +1065,22 @@ return status; } -struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, +struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_ctx, struct ib_udata *udata) { + int entries = attr->cqe; struct ocrdma_cq *cq; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); + struct ocrdma_ucontext *uctx = NULL; + u16 pd_id = 0; int status; struct ocrdma_create_cq_ureq ureq; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (udata) { if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return ERR_PTR(-EFAULT); @@ -752,25 +1092,27 @@ spin_lock_init(&cq->cq_lock); spin_lock_init(&cq->comp_handler_lock); - atomic_set(&cq->use_cnt, 0); INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); - cq->dev = dev; + cq->first_arm = true; - status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq); + if (ib_ctx) { + uctx = get_ocrdma_ucontext(ib_ctx); + pd_id = uctx->cntxt_pd->id; + } + + status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id); if (status) { kfree(cq); return ERR_PTR(status); } if (ib_ctx) { - status = ocrdma_copy_cq_uresp(cq, udata, ib_ctx); + status = ocrdma_copy_cq_uresp(dev, cq, udata, ib_ctx); if (status) goto ctx_err; } cq->phase = OCRDMA_CQE_VALID; - cq->arm_needed = true; dev->cq_tbl[cq->id] = cq; - return &cq->ibcq; ctx_err: @@ -793,26 +1135,62 @@ return status; } +static void ocrdma_flush_cq(struct ocrdma_cq *cq) +{ + int cqe_cnt; + int valid_count = 0; + unsigned long flags; + + struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device); + struct ocrdma_cqe *cqe = NULL; + + cqe = cq->va; + cqe_cnt = cq->cqe_cnt; + + /* Last irq might have scheduled a polling thread + * sync-up with it before hard flushing. + */ + spin_lock_irqsave(&cq->cq_lock, flags); + while (cqe_cnt) { + if (is_cqe_valid(cq, cqe)) + valid_count++; + cqe++; + cqe_cnt--; + } + ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count); + spin_unlock_irqrestore(&cq->cq_lock, flags); +} + int ocrdma_destroy_cq(struct ib_cq *ibcq) { - int status; struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); - struct ocrdma_dev *dev = cq->dev; + struct ocrdma_eq *eq = NULL; + struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); + int pdid = 0; + u32 irq, indx; - if (atomic_read(&cq->use_cnt)) - return -EINVAL; + dev->cq_tbl[cq->id] = NULL; + indx = ocrdma_get_eq_table_index(dev, cq->eqn); + if (indx == -EINVAL) + BUG(); - status = ocrdma_mbx_destroy_cq(dev, cq); + eq = &dev->eq_tbl[indx]; + irq = ocrdma_get_irq(dev, eq); + synchronize_irq(irq); + ocrdma_flush_cq(cq); + (void)ocrdma_mbx_destroy_cq(dev, cq); if (cq->ucontext) { - ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, cq->len); - ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db, + pdid = cq->ucontext->cntxt_pd->id; + ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, + PAGE_ALIGN(cq->len)); + ocrdma_del_mmap(cq->ucontext, + ocrdma_get_db_addr(dev, pdid), dev->nic_info.db_page_size); } - dev->cq_tbl[cq->id] = NULL; kfree(cq); - return status; + return 0; } static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) @@ -834,70 +1212,70 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, struct ib_qp_init_attr *attrs) { - if (attrs->qp_type != IB_QPT_GSI && - attrs->qp_type != IB_QPT_RC && - attrs->qp_type != IB_QPT_UD) { - ocrdma_err("%s(%d) unsupported qp type=0x%x requested\n", - __func__, dev->id, attrs->qp_type); + if ((attrs->qp_type != IB_QPT_GSI) && + (attrs->qp_type != IB_QPT_RC) && + (attrs->qp_type != IB_QPT_UC) && + (attrs->qp_type != IB_QPT_UD)) { + pr_err("%s(%d) unsupported qp type=0x%x requested\n", + __func__, dev->id, attrs->qp_type); return -EINVAL; } - if (attrs->cap.max_send_wr > dev->attr.max_wqe) { - ocrdma_err("%s(%d) unsupported send_wr=0x%x requested\n", - __func__, dev->id, attrs->cap.max_send_wr); - ocrdma_err("%s(%d) supported send_wr=0x%x\n", - __func__, dev->id, dev->attr.max_wqe); + /* Skip the check for QP1 to support CM size of 128 */ + if ((attrs->qp_type != IB_QPT_GSI) && + (attrs->cap.max_send_wr > dev->attr.max_wqe)) { + pr_err("%s(%d) unsupported send_wr=0x%x requested\n", + __func__, dev->id, attrs->cap.max_send_wr); + pr_err("%s(%d) supported send_wr=0x%x\n", + __func__, dev->id, dev->attr.max_wqe); return -EINVAL; } if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) { - ocrdma_err("%s(%d) unsupported recv_wr=0x%x requested\n", - __func__, dev->id, attrs->cap.max_recv_wr); - ocrdma_err("%s(%d) supported recv_wr=0x%x\n", - __func__, dev->id, dev->attr.max_rqe); + pr_err("%s(%d) unsupported recv_wr=0x%x requested\n", + __func__, dev->id, attrs->cap.max_recv_wr); + pr_err("%s(%d) supported recv_wr=0x%x\n", + __func__, dev->id, dev->attr.max_rqe); return -EINVAL; } if (attrs->cap.max_inline_data > dev->attr.max_inline_data) { - ocrdma_err("%s(%d) unsupported inline data size=0x%x" - " requested\n", __func__, dev->id, - attrs->cap.max_inline_data); - ocrdma_err("%s(%d) supported inline data size=0x%x\n", - __func__, dev->id, dev->attr.max_inline_data); + pr_err("%s(%d) unsupported inline data size=0x%x requested\n", + __func__, dev->id, attrs->cap.max_inline_data); + pr_err("%s(%d) supported inline data size=0x%x\n", + __func__, dev->id, dev->attr.max_inline_data); return -EINVAL; } if (attrs->cap.max_send_sge > dev->attr.max_send_sge) { - ocrdma_err("%s(%d) unsupported send_sge=0x%x requested\n", - __func__, dev->id, attrs->cap.max_send_sge); - ocrdma_err("%s(%d) supported send_sge=0x%x\n", - __func__, dev->id, dev->attr.max_send_sge); + pr_err("%s(%d) unsupported send_sge=0x%x requested\n", + __func__, dev->id, attrs->cap.max_send_sge); + pr_err("%s(%d) supported send_sge=0x%x\n", + __func__, dev->id, dev->attr.max_send_sge); return -EINVAL; } if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) { - ocrdma_err("%s(%d) unsupported recv_sge=0x%x requested\n", - __func__, dev->id, attrs->cap.max_recv_sge); - ocrdma_err("%s(%d) supported recv_sge=0x%x\n", - __func__, dev->id, dev->attr.max_recv_sge); + pr_err("%s(%d) unsupported recv_sge=0x%x requested\n", + __func__, dev->id, attrs->cap.max_recv_sge); + pr_err("%s(%d) supported recv_sge=0x%x\n", + __func__, dev->id, dev->attr.max_recv_sge); return -EINVAL; } /* unprivileged user space cannot create special QP */ if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { - ocrdma_err + pr_err ("%s(%d) Userspace can't create special QPs of type=0x%x\n", __func__, dev->id, attrs->qp_type); return -EINVAL; } /* allow creating only one GSI type of QP */ if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) { - ocrdma_err("%s(%d) GSI special QPs already created.\n", - __func__, dev->id); + pr_err("%s(%d) GSI special QPs already created.\n", + __func__, dev->id); return -EINVAL; } /* verify consumer QPs are not trying to use GSI QP's CQ */ if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) { if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) || - (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) || - (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) || - (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) { - ocrdma_err("%s(%d) Consumer QP cannot use GSI CQs.\n", - __func__, dev->id); + (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) { + pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n", + __func__, dev->id); return -EINVAL; } } @@ -911,8 +1289,8 @@ int status = 0; u64 usr_db; struct ocrdma_create_qp_uresp uresp; - struct ocrdma_dev *dev = qp->dev; struct ocrdma_pd *pd = qp->pd; + struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); memset(&uresp, 0, sizeof(uresp)); usr_db = dev->nic_info.unmapped_db + @@ -920,28 +1298,21 @@ uresp.qp_id = qp->id; uresp.sq_dbid = qp->sq.dbid; uresp.num_sq_pages = 1; - uresp.sq_page_size = qp->sq.len; - uresp.sq_page_addr[0] = qp->sq.pa; + uresp.sq_page_size = PAGE_ALIGN(qp->sq.len); + uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va); uresp.num_wqe_allocated = qp->sq.max_cnt; if (!srq) { uresp.rq_dbid = qp->rq.dbid; uresp.num_rq_pages = 1; - uresp.rq_page_size = qp->rq.len; - uresp.rq_page_addr[0] = qp->rq.pa; + uresp.rq_page_size = PAGE_ALIGN(qp->rq.len); + uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va); uresp.num_rqe_allocated = qp->rq.max_cnt; } uresp.db_page_addr = usr_db; uresp.db_page_size = dev->nic_info.db_page_size; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { - uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET; - uresp.db_rq_offset = ((qp->id & 0xFFFF) < 128) ? - OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET; - uresp.db_shift = (qp->id < 128) ? 24 : 16; - } else { - uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET; - uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET; - uresp.db_shift = 16; - } + uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET; + uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; + uresp.db_shift = OCRDMA_DB_RQ_SHIFT; if (qp->dpp_enabled) { uresp.dpp_credit = dpp_credit_lmt; @@ -949,7 +1320,7 @@ } status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (status) { - ocrdma_err("%s(%d) user copy error.\n", __func__, dev->id); + pr_err("%s(%d) user copy error.\n", __func__, dev->id); goto err; } status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0], @@ -973,14 +1344,13 @@ static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp, struct ocrdma_pd *pd) { - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { qp->sq_db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size) + OCRDMA_DB_GEN2_SQ_OFFSET; qp->rq_db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size) + - ((qp->id < 128) ? - OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET); + OCRDMA_DB_GEN2_RQ_OFFSET; } else { qp->sq_db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size) + @@ -1021,16 +1391,7 @@ qp->sq.max_sges = attrs->cap.max_send_sge; qp->rq.max_sges = attrs->cap.max_recv_sge; qp->state = OCRDMA_QPS_RST; -} - -static void ocrdma_set_qp_use_cnt(struct ocrdma_qp *qp, struct ocrdma_pd *pd) -{ - atomic_inc(&pd->use_cnt); - atomic_inc(&qp->sq_cq->use_cnt); - atomic_inc(&qp->rq_cq->use_cnt); - if (qp->srq) - atomic_inc(&qp->srq->use_cnt); - qp->ibqp.qp_num = qp->id; + qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; } static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev, @@ -1050,7 +1411,7 @@ int status; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_qp *qp; - struct ocrdma_dev *dev = pd->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); struct ocrdma_create_qp_ureq ureq; u16 dpp_credit_lmt, dpp_offset; @@ -1068,8 +1429,10 @@ status = -ENOMEM; goto gen_err; } - qp->dev = dev; ocrdma_set_qp_init_params(qp, pd, attrs); + if (udata == NULL) + qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 | + OCRDMA_QP_FAST_REG); mutex_lock(&dev->dev_lock); status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq, @@ -1080,8 +1443,6 @@ /* user space QP's wr_id table are managed in library */ if (udata == NULL) { - qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 | - OCRDMA_QP_FAST_REG); status = ocrdma_alloc_wr_id_tbl(qp); if (status) goto map_err; @@ -1099,7 +1460,7 @@ goto cpy_err; } ocrdma_store_gsi_qp_cq(dev, attrs); - ocrdma_set_qp_use_cnt(qp, pd); + qp->ibqp.qp_num = qp->id; mutex_unlock(&dev->dev_lock); return &qp->ibqp; @@ -1112,7 +1473,7 @@ kfree(qp->wqe_wr_id_tbl); kfree(qp->rqe_wr_id_tbl); kfree(qp); - ocrdma_err("%s(%d) error=%d\n", __func__, dev->id, status); + pr_err("%s(%d) error=%d\n", __func__, dev->id, status); gen_err: return ERR_PTR(status); } @@ -1126,15 +1487,16 @@ enum ib_qp_state old_qps; qp = get_ocrdma_qp(ibqp); - dev = qp->dev; + dev = get_ocrdma_dev(ibqp->device); if (attr_mask & IB_QP_STATE) - status = ocrdma_qp_state_machine(qp, attr->qp_state, &old_qps); + status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps); /* if new and previous states are same hw doesn't need to * know about it. */ if (status < 0) return status; - status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps); + status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); + return status; } @@ -1148,7 +1510,7 @@ enum ib_qp_state old_qps, new_qps; qp = get_ocrdma_qp(ibqp); - dev = qp->dev; + dev = get_ocrdma_dev(ibqp->device); /* syncronize with multiple context trying to change, retrive qps */ mutex_lock(&dev->dev_lock); @@ -1161,11 +1523,12 @@ new_qps = old_qps; spin_unlock_irqrestore(&qp->q_lock, flags); - if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) { - ocrdma_err("%s(%d) invalid attribute mask=0x%x specified for " - "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", - __func__, dev->id, attr_mask, qp->id, ibqp->qp_type, - old_qps, new_qps); + if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask, + IB_LINK_LAYER_ETHERNET)) { + pr_err("%s(%d) invalid attribute mask=0x%x specified for\n" + "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", + __func__, dev->id, attr_mask, qp->id, ibqp->qp_type, + old_qps, new_qps); goto param_err; } @@ -1214,7 +1577,7 @@ u32 qp_state; struct ocrdma_qp_params params; struct ocrdma_qp *qp = get_ocrdma_qp(ibqp); - struct ocrdma_dev *dev = qp->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device); memset(¶ms, 0, sizeof(params)); mutex_lock(&dev->dev_lock); @@ -1222,8 +1585,8 @@ mutex_unlock(&dev->dev_lock); if (status) goto mbx_err; - qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT); - qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT); + if (qp->qp_type == IB_QPT_UD) + qp_attr->qkey = params.qkey; qp_attr->path_mtu = ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx & OCRDMA_QP_PARAMS_PATH_MTU_MASK) >> @@ -1239,7 +1602,7 @@ qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1; qp_attr->cap.max_send_sge = qp->sq.max_sges; qp_attr->cap.max_recv_sge = qp->rq.max_sges; - qp_attr->cap.max_inline_data = dev->attr.max_inline_data; + qp_attr->cap.max_inline_data = qp->max_inline_data; qp_init_attr->cap = qp_attr->cap; memcpy(&qp_attr->ah_attr.grh.dgid, ¶ms.dgid[0], sizeof(params.dgid)); @@ -1250,7 +1613,7 @@ OCRDMA_QP_PARAMS_HOP_LMT_MASK) >> OCRDMA_QP_PARAMS_HOP_LMT_SHIFT; qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn & - OCRDMA_QP_PARAMS_SQ_PSN_MASK) >> + OCRDMA_QP_PARAMS_TCLASS_MASK) >> OCRDMA_QP_PARAMS_TCLASS_SHIFT; qp_attr->ah_attr.ah_flags = IB_AH_GRH; @@ -1278,6 +1641,8 @@ memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr)); qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >> OCRDMA_QP_PARAMS_STATE_SHIFT; + qp_attr->qp_state = get_ibqp_state(qp_state); + qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0; qp_attr->max_dest_rd_atomic = params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT; @@ -1285,40 +1650,33 @@ params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK; qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0; + /* Sync driver QP state with FW */ + ocrdma_qp_state_change(qp, qp_attr->qp_state, NULL); mbx_err: return status; } -static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, int idx) +static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, unsigned int idx) { - int i = idx / 32; - unsigned int mask = (1 << (idx % 32)); + unsigned int i = idx / 32; + u32 mask = (1U << (idx % 32)); - if (srq->idx_bit_fields[i] & mask) - srq->idx_bit_fields[i] &= ~mask; - else - srq->idx_bit_fields[i] |= mask; + srq->idx_bit_fields[i] ^= mask; } static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q) { - int free_cnt; - if (q->head >= q->tail) - free_cnt = (q->max_cnt - q->head) + q->tail; - else - free_cnt = q->tail - q->head; - return free_cnt; + return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt; } static int is_hw_sq_empty(struct ocrdma_qp *qp) { - return (qp->sq.tail == qp->sq.head && - ocrdma_hwq_free_cnt(&qp->sq) ? 1 : 0); + return (qp->sq.tail == qp->sq.head); } static int is_hw_rq_empty(struct ocrdma_qp *qp) { - return (qp->rq.tail == qp->rq.head) ? 1 : 0; + return (qp->rq.tail == qp->rq.head); } static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q) @@ -1350,7 +1708,7 @@ int discard_cnt = 0; u32 cur_getp, stop_getp; struct ocrdma_cqe *cqe; - u32 qpn = 0; + u32 qpn = 0, wqe_idx = 0; spin_lock_irqsave(&cq->cq_lock, cq_flags); @@ -1379,34 +1737,40 @@ if (qpn == 0 || qpn != qp->id) goto skip_cqe; - /* mark cqe discarded so that it is not picked up later - * in the poll_cq(). - */ - discard_cnt += 1; - cqe->cmn.qpn = 0; - if (is_cqe_for_sq(cqe)) + if (is_cqe_for_sq(cqe)) { ocrdma_hwq_inc_tail(&qp->sq); - else { + } else { if (qp->srq) { + wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >> + OCRDMA_CQE_BUFTAG_SHIFT) & + qp->srq->rq.max_wqe_idx; + if (wqe_idx < 1) + BUG(); spin_lock_irqsave(&qp->srq->q_lock, flags); ocrdma_hwq_inc_tail(&qp->srq->rq); - ocrdma_srq_toggle_bit(qp->srq, cur_getp); + ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1); spin_unlock_irqrestore(&qp->srq->q_lock, flags); - } else + } else { ocrdma_hwq_inc_tail(&qp->rq); + } } + /* mark cqe discarded so that it is not picked up later + * in the poll_cq(). + */ + discard_cnt += 1; + cqe->cmn.qpn = 0; skip_cqe: cur_getp = (cur_getp + 1) % cq->max_hw_cqe; } while (cur_getp != stop_getp); spin_unlock_irqrestore(&cq->cq_lock, cq_flags); } -static void ocrdma_del_flush_qp(struct ocrdma_qp *qp) +void ocrdma_del_flush_qp(struct ocrdma_qp *qp) { int found = false; unsigned long flags; - struct ocrdma_dev *dev = qp->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); /* sync with any active CQ poll */ spin_lock_irqsave(&dev->flush_q_lock, flags); @@ -1423,29 +1787,30 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp) { - int status; struct ocrdma_pd *pd; struct ocrdma_qp *qp; struct ocrdma_dev *dev; struct ib_qp_attr attrs; - int attr_mask = IB_QP_STATE; + int attr_mask; unsigned long flags; qp = get_ocrdma_qp(ibqp); - dev = qp->dev; + dev = get_ocrdma_dev(ibqp->device); - attrs.qp_state = IB_QPS_ERR; pd = qp->pd; /* change the QP state to ERROR */ - _ocrdma_modify_qp(ibqp, &attrs, attr_mask); - + if (qp->state != OCRDMA_QPS_RST) { + attrs.qp_state = IB_QPS_ERR; + attr_mask = IB_QP_STATE; + _ocrdma_modify_qp(ibqp, &attrs, attr_mask); + } /* ensure that CQEs for newly created QP (whose id may be same with * one which just getting destroyed are same), dont get * discarded until the old CQEs are discarded. */ mutex_lock(&dev->dev_lock); - status = ocrdma_mbx_destroy_qp(dev, qp); + (void) ocrdma_mbx_destroy_qp(dev, qp); /* * acquire CQ lock while destroy is in progress, in order to @@ -1468,39 +1833,38 @@ mutex_unlock(&dev->dev_lock); if (pd->uctx) { - ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, qp->sq.len); + ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, + PAGE_ALIGN(qp->sq.len)); if (!qp->srq) - ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, qp->rq.len); + ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, + PAGE_ALIGN(qp->rq.len)); } ocrdma_del_flush_qp(qp); - atomic_dec(&qp->pd->use_cnt); - atomic_dec(&qp->sq_cq->use_cnt); - atomic_dec(&qp->rq_cq->use_cnt); - if (qp->srq) - atomic_dec(&qp->srq->use_cnt); kfree(qp->wqe_wr_id_tbl); kfree(qp->rqe_wr_id_tbl); kfree(qp); - return status; + return 0; } -static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata) +static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, + struct ib_udata *udata) { int status; struct ocrdma_create_srq_uresp uresp; + memset(&uresp, 0, sizeof(uresp)); uresp.rq_dbid = srq->rq.dbid; uresp.num_rq_pages = 1; - uresp.rq_page_addr[0] = srq->rq.pa; + uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va); uresp.rq_page_size = srq->rq.len; - uresp.db_page_addr = srq->dev->nic_info.unmapped_db + - (srq->pd->id * srq->dev->nic_info.db_page_size); - uresp.db_page_size = srq->dev->nic_info.db_page_size; + uresp.db_page_addr = dev->nic_info.unmapped_db + + (srq->pd->id * dev->nic_info.db_page_size); + uresp.db_page_size = dev->nic_info.db_page_size; uresp.num_rqe_allocated = srq->rq.max_cnt; - if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { - uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET; + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { + uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; uresp.db_shift = 24; } else { uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET; @@ -1523,7 +1887,7 @@ { int status = -ENOMEM; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = pd->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); struct ocrdma_srq *srq; if (init_attr->attr.max_sge > dev->attr.max_recv_sge) @@ -1536,10 +1900,9 @@ return ERR_PTR(status); spin_lock_init(&srq->q_lock); - srq->dev = dev; srq->pd = pd; srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size); - status = ocrdma_mbx_create_srq(srq, init_attr, pd); + status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd); if (status) goto err; @@ -1565,14 +1928,12 @@ goto arm_err; } - atomic_set(&srq->use_cnt, 0); if (udata) { - status = ocrdma_copy_srq_uresp(srq, udata); + status = ocrdma_copy_srq_uresp(dev, srq, udata); if (status) goto arm_err; } - atomic_inc(&pd->use_cnt); return &srq->ibsrq; arm_err: @@ -1614,22 +1975,16 @@ { int status; struct ocrdma_srq *srq; - struct ocrdma_dev *dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); srq = get_ocrdma_srq(ibsrq); - dev = srq->dev; - if (atomic_read(&srq->use_cnt)) { - ocrdma_err("%s(%d) err, srq=0x%x in use\n", - __func__, dev->id, srq->id); - return -EAGAIN; - } status = ocrdma_mbx_destroy_srq(dev, srq); if (srq->pd->uctx) - ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len); + ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, + PAGE_ALIGN(srq->rq.len)); - atomic_dec(&srq->pd->use_cnt); kfree(srq->idx_bit_fields); kfree(srq->rqe_wr_id_tbl); kfree(srq); @@ -1643,14 +1998,16 @@ { struct ocrdma_ewqe_ud_hdr *ud_hdr = (struct ocrdma_ewqe_ud_hdr *)(hdr + 1); - struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah); + struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah); - ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn; + ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn; if (qp->qp_type == IB_QPT_GSI) ud_hdr->qkey = qp->qkey; else - ud_hdr->qkey = wr->wr.ud.remote_qkey; + ud_hdr->qkey = ud_wr(wr)->remote_qkey; ud_hdr->rsvd_ahid = ah->id; + if (ah->av->valid & OCRDMA_AV_VLAN_VALID) + hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT); } static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr, @@ -1670,23 +2027,43 @@ memset(sge, 0, sizeof(*sge)); } +static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge) +{ + uint32_t total_len = 0, i; + + for (i = 0; i < num_sge; i++) + total_len += sg_list[i].length; + return total_len; +} + + static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, struct ocrdma_sge *sge, struct ib_send_wr *wr, u32 wqe_size) { - if (wr->send_flags & IB_SEND_INLINE) { - if (wr->sg_list[0].length > qp->max_inline_data) { - ocrdma_err("%s() supported_len=0x%x," - " unspported len req=0x%x\n", __func__, - qp->max_inline_data, wr->sg_list[0].length); + int i; + char *dpp_addr; + + if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) { + hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge); + if (unlikely(hdr->total_len > qp->max_inline_data)) { + pr_err("%s() supported_len=0x%x,\n" + " unsupported len req=0x%x\n", __func__, + qp->max_inline_data, hdr->total_len); return -EINVAL; } - memcpy(sge, - (void *)(unsigned long)wr->sg_list[0].addr, - wr->sg_list[0].length); - hdr->total_len = wr->sg_list[0].length; + dpp_addr = (char *)sge; + for (i = 0; i < wr->num_sge; i++) { + memcpy(dpp_addr, + (void *)(unsigned long)wr->sg_list[i].addr, + wr->sg_list[i].length); + dpp_addr += wr->sg_list[i].length; + } + wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES); + if (0 == hdr->total_len) + wqe_size += sizeof(struct ocrdma_sge); hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT); } else { ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list); @@ -1711,8 +2088,9 @@ ocrdma_build_ud_hdr(qp, hdr, wr); sge = (struct ocrdma_sge *)(hdr + 2); wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr); - } else + } else { sge = (struct ocrdma_sge *)(hdr + 1); + } status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); return status; @@ -1729,9 +2107,9 @@ status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); if (status) return status; - ext_rw->addr_lo = wr->wr.rdma.remote_addr; - ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); - ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->addr_lo = rdma_wr(wr)->remote_addr; + ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr); + ext_rw->lrkey = rdma_wr(wr)->rkey; ext_rw->len = hdr->total_len; return 0; } @@ -1749,15 +2127,81 @@ hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT); hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); - ext_rw->addr_lo = wr->wr.rdma.remote_addr; - ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); - ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->addr_lo = rdma_wr(wr)->remote_addr; + ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr); + ext_rw->lrkey = rdma_wr(wr)->rkey; ext_rw->len = hdr->total_len; } +static int get_encoded_page_size(int pg_sz) +{ + /* Max size is 256M 4096 << 16 */ + int i = 0; + for (; i < 17; i++) + if (pg_sz == (4096 << i)) + break; + return i; +} + +static int ocrdma_build_reg(struct ocrdma_qp *qp, + struct ocrdma_hdr_wqe *hdr, + struct ib_reg_wr *wr) +{ + u64 fbo; + struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); + struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr); + struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; + struct ocrdma_pbe *pbe; + u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr); + int num_pbes = 0, i; + + wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); + + hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); + hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); + + if (wr->access & IB_ACCESS_LOCAL_WRITE) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR; + if (wr->access & IB_ACCESS_REMOTE_WRITE) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR; + if (wr->access & IB_ACCESS_REMOTE_READ) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD; + hdr->lkey = wr->key; + hdr->total_len = mr->ibmr.length; + + fbo = mr->ibmr.iova - mr->pages[0]; + + fast_reg->va_hi = upper_32_bits(mr->ibmr.iova); + fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff); + fast_reg->fbo_hi = upper_32_bits(fbo); + fast_reg->fbo_lo = (u32) fbo & 0xffffffff; + fast_reg->num_sges = mr->npages; + fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size); + + pbe = pbl_tbl->va; + for (i = 0; i < mr->npages; i++) { + u64 buf_addr = mr->pages[i]; + + pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK)); + pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr)); + num_pbes += 1; + pbe++; + + /* if the pbl is full storing the pbes, + * move to next pbl. + */ + if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + } + } + + return 0; +} + static void ocrdma_ring_sq_db(struct ocrdma_qp *qp) { - u32 val = qp->sq.dbid | (1 << 16); + u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT); iowrite32(val, qp->sq_db); } @@ -1773,18 +2217,27 @@ spin_lock_irqsave(&qp->q_lock, flags); if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) { spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; return -EINVAL; } while (wr) { + if (qp->qp_type == IB_QPT_UD && + (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM)) { + *bad_wr = wr; + status = -EINVAL; + break; + } if (ocrdma_hwq_free_cnt(&qp->sq) == 0 || wr->num_sge > qp->sq.max_sges) { + *bad_wr = wr; status = -ENOMEM; break; } hdr = ocrdma_hwq_head(&qp->sq); hdr->cw = 0; - if (wr->send_flags & IB_SEND_SIGNALED) + if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled) hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT); if (wr->send_flags & IB_SEND_FENCE) hdr->cw |= @@ -1814,18 +2267,20 @@ hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT); status = ocrdma_build_write(qp, hdr, wr); break; - case IB_WR_RDMA_READ_WITH_INV: - hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT); case IB_WR_RDMA_READ: ocrdma_build_read(qp, hdr, wr); break; case IB_WR_LOCAL_INV: hdr->cw |= (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT); - hdr->cw |= (sizeof(struct ocrdma_hdr_wqe) / + hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) + + sizeof(struct ocrdma_sge)) / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT; hdr->lkey = wr->ex.invalidate_rkey; break; + case IB_WR_REG_MR: + status = ocrdma_build_reg(qp, hdr, reg_wr(wr)); + break; default: status = -EINVAL; break; @@ -1834,7 +2289,7 @@ *bad_wr = wr; break; } - if (wr->send_flags & IB_SEND_SIGNALED) + if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled) qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1; else qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0; @@ -1856,7 +2311,7 @@ static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) { - u32 val = qp->rq.dbid | (1 << OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp)); + u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT); iowrite32(val, qp->rq_db); } @@ -1944,7 +2399,7 @@ if (row == srq->bit_fields_len) BUG(); - return indx; + return indx + 1; /* Use from index 1 */ } static void ocrdma_ring_srq_db(struct ocrdma_srq *srq) @@ -1992,7 +2447,7 @@ static enum ib_wc_status ocrdma_to_ibwc_err(u16 status) { - enum ib_wc_status ibwc_status = IB_WC_GENERAL_ERR; + enum ib_wc_status ibwc_status; switch (status) { case OCRDMA_CQE_GENERAL_ERR: @@ -2061,7 +2516,7 @@ default: ibwc_status = IB_WC_GENERAL_ERR; break; - }; + } return ibwc_status; } @@ -2089,15 +2544,18 @@ case OCRDMA_SEND: ibwc->opcode = IB_WC_SEND; break; + case OCRDMA_FR_MR: + ibwc->opcode = IB_WC_REG_MR; + break; case OCRDMA_LKEY_INV: ibwc->opcode = IB_WC_LOCAL_INV; break; default: ibwc->status = IB_WC_GENERAL_ERR; - ocrdma_err("%s() invalid opcode received = 0x%x\n", - __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK); + pr_err("%s() invalid opcode received = 0x%x\n", + __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK); break; - }; + } } static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp, @@ -2142,7 +2600,7 @@ ibwc->status = ocrdma_to_ibwc_err(status); ocrdma_flush_qp(qp); - ocrdma_qp_state_machine(qp, IB_QPS_ERR, NULL); + ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL); /* if wqe/rqe pending for which cqe needs to be returned, * trigger inflating it. @@ -2179,8 +2637,11 @@ bool *polled, bool *stop) { bool expand; + struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); int status = (le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT; + if (status < OCRDMA_MAX_CQE_ERR) + atomic_inc(&dev->cqe_err_stats[status]); /* when hw sq is empty, but rq is not empty, so we continue * to keep the cqe in order to get the cq event again. @@ -2203,6 +2664,11 @@ *stop = true; expand = false; } + } else if (is_hw_sq_empty(qp)) { + /* Do nothing */ + expand = false; + *polled = false; + *stop = false; } else { *polled = true; expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status); @@ -2227,7 +2693,8 @@ ocrdma_update_wc(qp, ibwc, tail); *polled = true; } - wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK; + wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) & + OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx; if (tail != wqe_idx) expand = true; /* Coalesced CQE can't be consumed yet */ @@ -2276,10 +2743,14 @@ u32 wqe_idx; srq = get_ocrdma_srq(qp->ibqp.srq); - wqe_idx = le32_to_cpu(cqe->rq.buftag_qpn) >> OCRDMA_CQE_BUFTAG_SHIFT; + wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >> + OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx; + if (wqe_idx < 1) + BUG(); + ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx]; spin_lock_irqsave(&srq->q_lock, flags); - ocrdma_srq_toggle_bit(srq, wqe_idx); + ocrdma_srq_toggle_bit(srq, wqe_idx - 1); spin_unlock_irqrestore(&srq->q_lock, flags); ocrdma_hwq_inc_tail(&srq->rq); } @@ -2289,6 +2760,10 @@ int status) { bool expand; + struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); + + if (status < OCRDMA_MAX_CQE_ERR) + atomic_inc(&dev->cqe_err_stats[status]); /* when hw_rq is empty, but wq is not empty, so continue * to keep the cqe to get the cq event again. @@ -2303,6 +2778,11 @@ *stop = true; expand = false; } + } else if (is_hw_rq_empty(qp)) { + /* Do nothing */ + expand = false; + *polled = false; + *stop = false; } else { *polled = true; expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status); @@ -2333,9 +2813,9 @@ ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt); ibwc->wc_flags |= IB_WC_WITH_INVALIDATE; } - if (qp->ibqp.srq) + if (qp->ibqp.srq) { ocrdma_update_free_srq_cqe(ibwc, cqe, qp); - else { + } else { ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail]; ocrdma_hwq_inc_tail(&qp->rq); } @@ -2348,13 +2828,14 @@ bool expand = false; ibwc->wc_flags = 0; - if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) + if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) { status = (le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT; - else + } else { status = (le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT; + } if (status == OCRDMA_CQE_SUCCESS) { *polled = true; @@ -2372,9 +2853,10 @@ if (cq->phase_change) { if (cur_getp == 0) cq->phase = (~cq->phase & OCRDMA_CQE_VALID); - } else + } else { /* clear valid bit */ cqe->flags_status_srcqpn = 0; + } } static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries, @@ -2385,7 +2867,7 @@ bool expand = false; int polled_hw_cqes = 0; struct ocrdma_qp *qp = NULL; - struct ocrdma_dev *dev = cq->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device); struct ocrdma_cqe *cqe; u16 cur_getp; bool polled = false; bool stop = false; @@ -2429,10 +2911,13 @@ } stop_cqe: cq->getp = cur_getp; - if (polled_hw_cqes || expand || stop) { - ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited, - polled_hw_cqes); + if (cq->deferred_arm || polled_hw_cqes) { + ocrdma_ring_cq_db(dev, cq->id, cq->deferred_arm, + cq->deferred_sol, polled_hw_cqes); + cq->deferred_arm = false; + cq->deferred_sol = false; } + return i; } @@ -2451,8 +2936,9 @@ } else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) { ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail]; ocrdma_hwq_inc_tail(&qp->rq); - } else + } else { return err_cqes; + } ibwc->byte_len = 0; ibwc->status = IB_WC_WR_FLUSH_ERR; ibwc = ibwc + 1; @@ -2465,14 +2951,11 @@ int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { int cqes_to_poll = num_entries; - struct ocrdma_cq *cq = NULL; - unsigned long flags; - struct ocrdma_dev *dev; + struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); int num_os_cqe = 0, err_cqes = 0; struct ocrdma_qp *qp; - - cq = get_ocrdma_cq(ibcq); - dev = cq->dev; + unsigned long flags; /* poll cqes from adapter CQ */ spin_lock_irqsave(&cq->cq_lock, flags); @@ -2503,34 +2986,268 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) { - struct ocrdma_cq *cq; - unsigned long flags; - struct ocrdma_dev *dev; + struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); u16 cq_id; - u16 cur_getp; - struct ocrdma_cqe *cqe; + unsigned long flags; + bool arm_needed = false, sol_needed = false; - cq = get_ocrdma_cq(ibcq); cq_id = cq->id; - dev = cq->dev; spin_lock_irqsave(&cq->cq_lock, flags); if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED) - cq->armed = true; + arm_needed = true; if (cq_flags & IB_CQ_SOLICITED) - cq->solicited = true; + sol_needed = true; - cur_getp = cq->getp; - cqe = cq->va + cur_getp; - - /* check whether any valid cqe exist or not, if not then safe to - * arm. If cqe is not yet consumed, then let it get consumed and then - * we arm it to avoid false interrupts. - */ - if (!is_cqe_valid(cq, cqe) || cq->arm_needed) { - cq->arm_needed = false; - ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0); + if (cq->first_arm) { + ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0); + cq->first_arm = false; } + + cq->deferred_arm = true; + cq->deferred_sol = sol_needed; spin_unlock_irqrestore(&cq->cq_lock, flags); + return 0; } + +struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, + enum ib_mr_type mr_type, + u32 max_num_sg) +{ + int status; + struct ocrdma_mr *mr; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + if (max_num_sg > dev->attr.max_pages_per_frmr) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); + if (!mr->pages) { + status = -ENOMEM; + goto pl_err; + } + + status = ocrdma_get_pbl_info(dev, mr, max_num_sg); + if (status) + goto pbl_err; + mr->hwmr.fr_mr = 1; + mr->hwmr.remote_rd = 0; + mr->hwmr.remote_wr = 0; + mr->hwmr.local_rd = 0; + mr->hwmr.local_wr = 0; + mr->hwmr.mw_bind = 0; + status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); + if (status) + goto pbl_err; + status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0); + if (status) + goto mbx_err; + mr->ibmr.rkey = mr->hwmr.lkey; + mr->ibmr.lkey = mr->hwmr.lkey; + dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = + (unsigned long) mr; + return &mr->ibmr; +mbx_err: + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); +pbl_err: + kfree(mr->pages); +pl_err: + kfree(mr); + return ERR_PTR(-ENOMEM); +} + +#define MAX_KERNEL_PBE_SIZE 65536 +static inline int count_kernel_pbes(struct ib_phys_buf *buf_list, + int buf_cnt, u32 *pbe_size) +{ + u64 total_size = 0; + u64 buf_size = 0; + int i; + *pbe_size = roundup(buf_list[0].size, PAGE_SIZE); + *pbe_size = roundup_pow_of_two(*pbe_size); + + /* find the smallest PBE size that we can have */ + for (i = 0; i < buf_cnt; i++) { + /* first addr may not be page aligned, so ignore checking */ + if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) || + (buf_list[i].size & ~PAGE_MASK))) { + return 0; + } + + /* if configured PBE size is greater then the chosen one, + * reduce the PBE size. + */ + buf_size = roundup(buf_list[i].size, PAGE_SIZE); + /* pbe_size has to be even multiple of 4K 1,2,4,8...*/ + buf_size = roundup_pow_of_two(buf_size); + if (*pbe_size > buf_size) + *pbe_size = buf_size; + + total_size += buf_size; + } + *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ? + (MAX_KERNEL_PBE_SIZE) : (*pbe_size); + + /* num_pbes = total_size / (*pbe_size); this is implemented below. */ + + return total_size >> ilog2(*pbe_size); +} + +static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt, + u32 pbe_size, struct ocrdma_pbl *pbl_tbl, + struct ocrdma_hw_mr *hwmr) +{ + int i; + int idx; + int pbes_per_buf = 0; + u64 buf_addr = 0; + int num_pbes; + struct ocrdma_pbe *pbe; + int total_num_pbes = 0; + + if (!hwmr->num_pbes) + return; + + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + num_pbes = 0; + + /* go through the OS phy regions & fill hw pbe entries into pbls. */ + for (i = 0; i < ib_buf_cnt; i++) { + buf_addr = buf_list[i].addr; + pbes_per_buf = + roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) / + pbe_size; + hwmr->len += buf_list[i].size; + /* number of pbes can be more for one OS buf, when + * buffers are of different sizes. + * split the ib_buf to one or more pbes. + */ + for (idx = 0; idx < pbes_per_buf; idx++) { + /* we program always page aligned addresses, + * first unaligned address is taken care by fbo. + */ + if (i == 0) { + /* for non zero fbo, assign the + * start of the page. + */ + pbe->pa_lo = + cpu_to_le32((u32) (buf_addr & PAGE_MASK)); + pbe->pa_hi = + cpu_to_le32((u32) upper_32_bits(buf_addr)); + } else { + pbe->pa_lo = + cpu_to_le32((u32) (buf_addr & 0xffffffff)); + pbe->pa_hi = + cpu_to_le32((u32) upper_32_bits(buf_addr)); + } + buf_addr += pbe_size; + num_pbes += 1; + total_num_pbes += 1; + pbe++; + + if (total_num_pbes == hwmr->num_pbes) + goto mr_tbl_done; + /* if the pbl is full storing the pbes, + * move to next pbl. + */ + if (num_pbes == (hwmr->pbl_size/sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + num_pbes = 0; + } + } + } +mr_tbl_done: + return; +} + +struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd, + struct ib_phys_buf *buf_list, + int buf_cnt, int acc, u64 *iova_start) +{ + int status = -ENOMEM; + struct ocrdma_mr *mr; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + u32 num_pbes; + u32 pbe_size = 0; + + if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE)) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(status); + + num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size); + if (num_pbes == 0) { + status = -EINVAL; + goto pbl_err; + } + status = ocrdma_get_pbl_info(dev, mr, num_pbes); + if (status) + goto pbl_err; + + mr->hwmr.pbe_size = pbe_size; + mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK); + mr->hwmr.va = *iova_start; + mr->hwmr.local_rd = 1; + mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0; + + status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); + if (status) + goto pbl_err; + build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table, + &mr->hwmr); + status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc); + if (status) + goto mbx_err; + + mr->ibmr.lkey = mr->hwmr.lkey; + if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) + mr->ibmr.rkey = mr->hwmr.lkey; + return &mr->ibmr; + +mbx_err: + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); +pbl_err: + kfree(mr); + return ERR_PTR(status); +} + +static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); + + if (unlikely(mr->npages == mr->hwmr.num_pbes)) + return -ENOMEM; + + mr->pages[mr->npages++] = addr; + + return 0; +} + +int ocrdma_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); + + mr->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page); +}