/* * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * Copyright(c) 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * BSD LICENSE * * Copyright(c) 2015 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * - Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ /* * This file contains all of the code that is specific to the HFI chip */ #include #include #include #include #include "hfi.h" #include "trace.h" #include "mad.h" #include "pio.h" #include "sdma.h" #include "eprom.h" #define NUM_IB_PORTS 1 uint kdeth_qp; module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO); MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix"); uint num_vls = HFI1_MAX_VLS_SUPPORTED; module_param(num_vls, uint, S_IRUGO); MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)"); /* * Default time to aggregate two 10K packets from the idle state * (timer not running). The timer starts at the end of the first packet, * so only the time for one 10K packet and header plus a bit extra is needed. * 10 * 1024 + 64 header byte = 10304 byte * 10304 byte / 12.5 GB/s = 824.32ns */ uint rcv_intr_timeout = (824 + 16); /* 16 is for coalescing interrupt */ module_param(rcv_intr_timeout, uint, S_IRUGO); MODULE_PARM_DESC(rcv_intr_timeout, "Receive interrupt mitigation timeout in ns"); uint rcv_intr_count = 16; /* same as qib */ module_param(rcv_intr_count, uint, S_IRUGO); MODULE_PARM_DESC(rcv_intr_count, "Receive interrupt mitigation count"); ushort link_crc_mask = SUPPORTED_CRCS; module_param(link_crc_mask, ushort, S_IRUGO); MODULE_PARM_DESC(link_crc_mask, "CRCs to use on the link"); uint loopback; module_param_named(loopback, loopback, uint, S_IRUGO); MODULE_PARM_DESC(loopback, "Put into loopback mode (1 = serdes, 3 = external cable"); /* Other driver tunables */ uint rcv_intr_dynamic = 1; /* enable dynamic mode for rcv int mitigation*/ static ushort crc_14b_sideband = 1; static uint use_flr = 1; uint quick_linkup; /* skip LNI */ struct flag_table { u64 flag; /* the flag */ char *str; /* description string */ u16 extra; /* extra information */ u16 unused0; u32 unused1; }; /* str must be a string constant */ #define FLAG_ENTRY(str, extra, flag) {flag, str, extra} #define FLAG_ENTRY0(str, flag) {flag, str, 0} /* Send Error Consequences */ #define SEC_WRITE_DROPPED 0x1 #define SEC_PACKET_DROPPED 0x2 #define SEC_SC_HALTED 0x4 /* per-context only */ #define SEC_SPC_FREEZE 0x8 /* per-HFI only */ #define VL15CTXT 1 #define MIN_KERNEL_KCTXTS 2 #define NUM_MAP_REGS 32 /* Bit offset into the GUID which carries HFI id information */ #define GUID_HFI_INDEX_SHIFT 39 /* extract the emulation revision */ #define emulator_rev(dd) ((dd)->irev >> 8) /* parallel and serial emulation versions are 3 and 4 respectively */ #define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3) #define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4) /* RSM fields */ /* packet type */ #define IB_PACKET_TYPE 2ull #define QW_SHIFT 6ull /* QPN[7..1] */ #define QPN_WIDTH 7ull /* LRH.BTH: QW 0, OFFSET 48 - for match */ #define LRH_BTH_QW 0ull #define LRH_BTH_BIT_OFFSET 48ull #define LRH_BTH_OFFSET(off) ((LRH_BTH_QW << QW_SHIFT) | (off)) #define LRH_BTH_MATCH_OFFSET LRH_BTH_OFFSET(LRH_BTH_BIT_OFFSET) #define LRH_BTH_SELECT #define LRH_BTH_MASK 3ull #define LRH_BTH_VALUE 2ull /* LRH.SC[3..0] QW 0, OFFSET 56 - for match */ #define LRH_SC_QW 0ull #define LRH_SC_BIT_OFFSET 56ull #define LRH_SC_OFFSET(off) ((LRH_SC_QW << QW_SHIFT) | (off)) #define LRH_SC_MATCH_OFFSET LRH_SC_OFFSET(LRH_SC_BIT_OFFSET) #define LRH_SC_MASK 128ull #define LRH_SC_VALUE 0ull /* SC[n..0] QW 0, OFFSET 60 - for select */ #define LRH_SC_SELECT_OFFSET ((LRH_SC_QW << QW_SHIFT) | (60ull)) /* QPN[m+n:1] QW 1, OFFSET 1 */ #define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull)) /* defines to build power on SC2VL table */ #define SC2VL_VAL( \ num, \ sc0, sc0val, \ sc1, sc1val, \ sc2, sc2val, \ sc3, sc3val, \ sc4, sc4val, \ sc5, sc5val, \ sc6, sc6val, \ sc7, sc7val) \ ( \ ((u64)(sc0val) << SEND_SC2VLT##num##_SC##sc0##_SHIFT) | \ ((u64)(sc1val) << SEND_SC2VLT##num##_SC##sc1##_SHIFT) | \ ((u64)(sc2val) << SEND_SC2VLT##num##_SC##sc2##_SHIFT) | \ ((u64)(sc3val) << SEND_SC2VLT##num##_SC##sc3##_SHIFT) | \ ((u64)(sc4val) << SEND_SC2VLT##num##_SC##sc4##_SHIFT) | \ ((u64)(sc5val) << SEND_SC2VLT##num##_SC##sc5##_SHIFT) | \ ((u64)(sc6val) << SEND_SC2VLT##num##_SC##sc6##_SHIFT) | \ ((u64)(sc7val) << SEND_SC2VLT##num##_SC##sc7##_SHIFT) \ ) #define DC_SC_VL_VAL( \ range, \ e0, e0val, \ e1, e1val, \ e2, e2val, \ e3, e3val, \ e4, e4val, \ e5, e5val, \ e6, e6val, \ e7, e7val, \ e8, e8val, \ e9, e9val, \ e10, e10val, \ e11, e11val, \ e12, e12val, \ e13, e13val, \ e14, e14val, \ e15, e15val) \ ( \ ((u64)(e0val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e0##_SHIFT) | \ ((u64)(e1val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e1##_SHIFT) | \ ((u64)(e2val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e2##_SHIFT) | \ ((u64)(e3val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e3##_SHIFT) | \ ((u64)(e4val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e4##_SHIFT) | \ ((u64)(e5val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e5##_SHIFT) | \ ((u64)(e6val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e6##_SHIFT) | \ ((u64)(e7val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e7##_SHIFT) | \ ((u64)(e8val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e8##_SHIFT) | \ ((u64)(e9val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e9##_SHIFT) | \ ((u64)(e10val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e10##_SHIFT) | \ ((u64)(e11val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e11##_SHIFT) | \ ((u64)(e12val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e12##_SHIFT) | \ ((u64)(e13val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e13##_SHIFT) | \ ((u64)(e14val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e14##_SHIFT) | \ ((u64)(e15val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e15##_SHIFT) \ ) /* all CceStatus sub-block freeze bits */ #define ALL_FROZE (CCE_STATUS_SDMA_FROZE_SMASK \ | CCE_STATUS_RXE_FROZE_SMASK \ | CCE_STATUS_TXE_FROZE_SMASK \ | CCE_STATUS_TXE_PIO_FROZE_SMASK) /* all CceStatus sub-block TXE pause bits */ #define ALL_TXE_PAUSE (CCE_STATUS_TXE_PIO_PAUSED_SMASK \ | CCE_STATUS_TXE_PAUSED_SMASK \ | CCE_STATUS_SDMA_PAUSED_SMASK) /* all CceStatus sub-block RXE pause bits */ #define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK /* * CCE Error flags. */ static struct flag_table cce_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("CceCsrParityErr", CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK), /* 1*/ FLAG_ENTRY0("CceCsrReadBadAddrErr", CCE_ERR_STATUS_CCE_CSR_READ_BAD_ADDR_ERR_SMASK), /* 2*/ FLAG_ENTRY0("CceCsrWriteBadAddrErr", CCE_ERR_STATUS_CCE_CSR_WRITE_BAD_ADDR_ERR_SMASK), /* 3*/ FLAG_ENTRY0("CceTrgtAsyncFifoParityErr", CCE_ERR_STATUS_CCE_TRGT_ASYNC_FIFO_PARITY_ERR_SMASK), /* 4*/ FLAG_ENTRY0("CceTrgtAccessErr", CCE_ERR_STATUS_CCE_TRGT_ACCESS_ERR_SMASK), /* 5*/ FLAG_ENTRY0("CceRspdDataParityErr", CCE_ERR_STATUS_CCE_RSPD_DATA_PARITY_ERR_SMASK), /* 6*/ FLAG_ENTRY0("CceCli0AsyncFifoParityErr", CCE_ERR_STATUS_CCE_CLI0_ASYNC_FIFO_PARITY_ERR_SMASK), /* 7*/ FLAG_ENTRY0("CceCsrCfgBusParityErr", CCE_ERR_STATUS_CCE_CSR_CFG_BUS_PARITY_ERR_SMASK), /* 8*/ FLAG_ENTRY0("CceCli2AsyncFifoParityErr", CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK), /* 9*/ FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr", CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_PIO_CRDT_PARITY_ERR_SMASK), /*10*/ FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr", CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_SDMA_HD_PARITY_ERR_SMASK), /*11*/ FLAG_ENTRY0("CceCli1AsyncFifoRxdmaParityError", CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_RXDMA_PARITY_ERROR_SMASK), /*12*/ FLAG_ENTRY0("CceCli1AsyncFifoDbgParityError", CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_DBG_PARITY_ERROR_SMASK), /*13*/ FLAG_ENTRY0("PcicRetryMemCorErr", CCE_ERR_STATUS_PCIC_RETRY_MEM_COR_ERR_SMASK), /*14*/ FLAG_ENTRY0("PcicRetryMemCorErr", CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_COR_ERR_SMASK), /*15*/ FLAG_ENTRY0("PcicPostHdQCorErr", CCE_ERR_STATUS_PCIC_POST_HD_QCOR_ERR_SMASK), /*16*/ FLAG_ENTRY0("PcicPostHdQCorErr", CCE_ERR_STATUS_PCIC_POST_DAT_QCOR_ERR_SMASK), /*17*/ FLAG_ENTRY0("PcicPostHdQCorErr", CCE_ERR_STATUS_PCIC_CPL_HD_QCOR_ERR_SMASK), /*18*/ FLAG_ENTRY0("PcicCplDatQCorErr", CCE_ERR_STATUS_PCIC_CPL_DAT_QCOR_ERR_SMASK), /*19*/ FLAG_ENTRY0("PcicNPostHQParityErr", CCE_ERR_STATUS_PCIC_NPOST_HQ_PARITY_ERR_SMASK), /*20*/ FLAG_ENTRY0("PcicNPostDatQParityErr", CCE_ERR_STATUS_PCIC_NPOST_DAT_QPARITY_ERR_SMASK), /*21*/ FLAG_ENTRY0("PcicRetryMemUncErr", CCE_ERR_STATUS_PCIC_RETRY_MEM_UNC_ERR_SMASK), /*22*/ FLAG_ENTRY0("PcicRetrySotMemUncErr", CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_UNC_ERR_SMASK), /*23*/ FLAG_ENTRY0("PcicPostHdQUncErr", CCE_ERR_STATUS_PCIC_POST_HD_QUNC_ERR_SMASK), /*24*/ FLAG_ENTRY0("PcicPostDatQUncErr", CCE_ERR_STATUS_PCIC_POST_DAT_QUNC_ERR_SMASK), /*25*/ FLAG_ENTRY0("PcicCplHdQUncErr", CCE_ERR_STATUS_PCIC_CPL_HD_QUNC_ERR_SMASK), /*26*/ FLAG_ENTRY0("PcicCplDatQUncErr", CCE_ERR_STATUS_PCIC_CPL_DAT_QUNC_ERR_SMASK), /*27*/ FLAG_ENTRY0("PcicTransmitFrontParityErr", CCE_ERR_STATUS_PCIC_TRANSMIT_FRONT_PARITY_ERR_SMASK), /*28*/ FLAG_ENTRY0("PcicTransmitBackParityErr", CCE_ERR_STATUS_PCIC_TRANSMIT_BACK_PARITY_ERR_SMASK), /*29*/ FLAG_ENTRY0("PcicReceiveParityErr", CCE_ERR_STATUS_PCIC_RECEIVE_PARITY_ERR_SMASK), /*30*/ FLAG_ENTRY0("CceTrgtCplTimeoutErr", CCE_ERR_STATUS_CCE_TRGT_CPL_TIMEOUT_ERR_SMASK), /*31*/ FLAG_ENTRY0("LATriggered", CCE_ERR_STATUS_LA_TRIGGERED_SMASK), /*32*/ FLAG_ENTRY0("CceSegReadBadAddrErr", CCE_ERR_STATUS_CCE_SEG_READ_BAD_ADDR_ERR_SMASK), /*33*/ FLAG_ENTRY0("CceSegWriteBadAddrErr", CCE_ERR_STATUS_CCE_SEG_WRITE_BAD_ADDR_ERR_SMASK), /*34*/ FLAG_ENTRY0("CceRcplAsyncFifoParityErr", CCE_ERR_STATUS_CCE_RCPL_ASYNC_FIFO_PARITY_ERR_SMASK), /*35*/ FLAG_ENTRY0("CceRxdmaConvFifoParityErr", CCE_ERR_STATUS_CCE_RXDMA_CONV_FIFO_PARITY_ERR_SMASK), /*36*/ FLAG_ENTRY0("CceMsixTableCorErr", CCE_ERR_STATUS_CCE_MSIX_TABLE_COR_ERR_SMASK), /*37*/ FLAG_ENTRY0("CceMsixTableUncErr", CCE_ERR_STATUS_CCE_MSIX_TABLE_UNC_ERR_SMASK), /*38*/ FLAG_ENTRY0("CceIntMapCorErr", CCE_ERR_STATUS_CCE_INT_MAP_COR_ERR_SMASK), /*39*/ FLAG_ENTRY0("CceIntMapUncErr", CCE_ERR_STATUS_CCE_INT_MAP_UNC_ERR_SMASK), /*40*/ FLAG_ENTRY0("CceMsixCsrParityErr", CCE_ERR_STATUS_CCE_MSIX_CSR_PARITY_ERR_SMASK), /*41-63 reserved*/ }; /* * Misc Error flags */ #define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK static struct flag_table misc_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)), /* 1*/ FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)), /* 2*/ FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)), /* 3*/ FLAG_ENTRY0("SBUS_WRITE_FAILED", MES(SBUS_WRITE_FAILED)), /* 4*/ FLAG_ENTRY0("KEY_MISMATCH", MES(KEY_MISMATCH)), /* 5*/ FLAG_ENTRY0("FW_AUTH_FAILED", MES(FW_AUTH_FAILED)), /* 6*/ FLAG_ENTRY0("EFUSE_CSR_PARITY", MES(EFUSE_CSR_PARITY)), /* 7*/ FLAG_ENTRY0("EFUSE_READ_BAD_ADDR", MES(EFUSE_READ_BAD_ADDR)), /* 8*/ FLAG_ENTRY0("EFUSE_WRITE", MES(EFUSE_WRITE)), /* 9*/ FLAG_ENTRY0("EFUSE_DONE_PARITY", MES(EFUSE_DONE_PARITY)), /*10*/ FLAG_ENTRY0("INVALID_EEP_CMD", MES(INVALID_EEP_CMD)), /*11*/ FLAG_ENTRY0("MBIST_FAIL", MES(MBIST_FAIL)), /*12*/ FLAG_ENTRY0("PLL_LOCK_FAIL", MES(PLL_LOCK_FAIL)) }; /* * TXE PIO Error flags and consequences */ static struct flag_table pio_err_status_flags[] = { /* 0*/ FLAG_ENTRY("PioWriteBadCtxt", SEC_WRITE_DROPPED, SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK), /* 1*/ FLAG_ENTRY("PioWriteAddrParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK), /* 2*/ FLAG_ENTRY("PioCsrParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK), /* 3*/ FLAG_ENTRY("PioSbMemFifo0", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK), /* 4*/ FLAG_ENTRY("PioSbMemFifo1", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK), /* 5*/ FLAG_ENTRY("PioPccFifoParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK), /* 6*/ FLAG_ENTRY("PioPecFifoParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK), /* 7*/ FLAG_ENTRY("PioSbrdctlCrrelParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK), /* 8*/ FLAG_ENTRY("PioSbrdctrlCrrelFifoParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK), /* 9*/ FLAG_ENTRY("PioPktEvictFifoParityErr", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK), /*10*/ FLAG_ENTRY("PioSmPktResetParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK), /*11*/ FLAG_ENTRY("PioVlLenMemBank0Unc", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK), /*12*/ FLAG_ENTRY("PioVlLenMemBank1Unc", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK), /*13*/ FLAG_ENTRY("PioVlLenMemBank0Cor", 0, SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_COR_ERR_SMASK), /*14*/ FLAG_ENTRY("PioVlLenMemBank1Cor", 0, SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_COR_ERR_SMASK), /*15*/ FLAG_ENTRY("PioCreditRetFifoParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK), /*16*/ FLAG_ENTRY("PioPpmcPblFifo", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK), /*17*/ FLAG_ENTRY("PioInitSmIn", 0, SEND_PIO_ERR_STATUS_PIO_INIT_SM_IN_ERR_SMASK), /*18*/ FLAG_ENTRY("PioPktEvictSmOrArbSm", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK), /*19*/ FLAG_ENTRY("PioHostAddrMemUnc", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK), /*20*/ FLAG_ENTRY("PioHostAddrMemCor", 0, SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_COR_ERR_SMASK), /*21*/ FLAG_ENTRY("PioWriteDataParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK), /*22*/ FLAG_ENTRY("PioStateMachine", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK), /*23*/ FLAG_ENTRY("PioWriteQwValidParity", SEC_WRITE_DROPPED|SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK), /*24*/ FLAG_ENTRY("PioBlockQwCountParity", SEC_WRITE_DROPPED|SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK), /*25*/ FLAG_ENTRY("PioVlfVlLenParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK), /*26*/ FLAG_ENTRY("PioVlfSopParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK), /*27*/ FLAG_ENTRY("PioVlFifoParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK), /*28*/ FLAG_ENTRY("PioPpmcBqcMemParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK), /*29*/ FLAG_ENTRY("PioPpmcSopLen", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK), /*30-31 reserved*/ /*32*/ FLAG_ENTRY("PioCurrentFreeCntParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK), /*33*/ FLAG_ENTRY("PioLastReturnedCntParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK), /*34*/ FLAG_ENTRY("PioPccSopHeadParity", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK), /*35*/ FLAG_ENTRY("PioPecSopHeadParityErr", SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK), /*36-63 reserved*/ }; /* TXE PIO errors that cause an SPC freeze */ #define ALL_PIO_FREEZE_ERR \ (SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK \ | SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK) /* * TXE SDMA Error flags */ static struct flag_table sdma_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("SDmaRpyTagErr", SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK), /* 1*/ FLAG_ENTRY0("SDmaCsrParityErr", SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK), /* 2*/ FLAG_ENTRY0("SDmaPcieReqTrackingUncErr", SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK), /* 3*/ FLAG_ENTRY0("SDmaPcieReqTrackingCorErr", SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_COR_ERR_SMASK), /*04-63 reserved*/ }; /* TXE SDMA errors that cause an SPC freeze */ #define ALL_SDMA_FREEZE_ERR \ (SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK \ | SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \ | SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK) /* * TXE Egress Error flags */ #define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK static struct flag_table egress_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)), /* 1*/ FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)), /* 2 reserved */ /* 3*/ FLAG_ENTRY0("TxEgressFifoUnderrunOrParityErr", SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY)), /* 4*/ FLAG_ENTRY0("TxLinkdownErr", SEES(TX_LINKDOWN)), /* 5*/ FLAG_ENTRY0("TxIncorrectLinkStateErr", SEES(TX_INCORRECT_LINK_STATE)), /* 6 reserved */ /* 7*/ FLAG_ENTRY0("TxPioLaunchIntfParityErr", SEES(TX_PIO_LAUNCH_INTF_PARITY)), /* 8*/ FLAG_ENTRY0("TxSdmaLaunchIntfParityErr", SEES(TX_SDMA_LAUNCH_INTF_PARITY)), /* 9-10 reserved */ /*11*/ FLAG_ENTRY0("TxSbrdCtlStateMachineParityErr", SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY)), /*12*/ FLAG_ENTRY0("TxIllegalVLErr", SEES(TX_ILLEGAL_VL)), /*13*/ FLAG_ENTRY0("TxLaunchCsrParityErr", SEES(TX_LAUNCH_CSR_PARITY)), /*14*/ FLAG_ENTRY0("TxSbrdCtlCsrParityErr", SEES(TX_SBRD_CTL_CSR_PARITY)), /*15*/ FLAG_ENTRY0("TxConfigParityErr", SEES(TX_CONFIG_PARITY)), /*16*/ FLAG_ENTRY0("TxSdma0DisallowedPacketErr", SEES(TX_SDMA0_DISALLOWED_PACKET)), /*17*/ FLAG_ENTRY0("TxSdma1DisallowedPacketErr", SEES(TX_SDMA1_DISALLOWED_PACKET)), /*18*/ FLAG_ENTRY0("TxSdma2DisallowedPacketErr", SEES(TX_SDMA2_DISALLOWED_PACKET)), /*19*/ FLAG_ENTRY0("TxSdma3DisallowedPacketErr", SEES(TX_SDMA3_DISALLOWED_PACKET)), /*20*/ FLAG_ENTRY0("TxSdma4DisallowedPacketErr", SEES(TX_SDMA4_DISALLOWED_PACKET)), /*21*/ FLAG_ENTRY0("TxSdma5DisallowedPacketErr", SEES(TX_SDMA5_DISALLOWED_PACKET)), /*22*/ FLAG_ENTRY0("TxSdma6DisallowedPacketErr", SEES(TX_SDMA6_DISALLOWED_PACKET)), /*23*/ FLAG_ENTRY0("TxSdma7DisallowedPacketErr", SEES(TX_SDMA7_DISALLOWED_PACKET)), /*24*/ FLAG_ENTRY0("TxSdma8DisallowedPacketErr", SEES(TX_SDMA8_DISALLOWED_PACKET)), /*25*/ FLAG_ENTRY0("TxSdma9DisallowedPacketErr", SEES(TX_SDMA9_DISALLOWED_PACKET)), /*26*/ FLAG_ENTRY0("TxSdma10DisallowedPacketErr", SEES(TX_SDMA10_DISALLOWED_PACKET)), /*27*/ FLAG_ENTRY0("TxSdma11DisallowedPacketErr", SEES(TX_SDMA11_DISALLOWED_PACKET)), /*28*/ FLAG_ENTRY0("TxSdma12DisallowedPacketErr", SEES(TX_SDMA12_DISALLOWED_PACKET)), /*29*/ FLAG_ENTRY0("TxSdma13DisallowedPacketErr", SEES(TX_SDMA13_DISALLOWED_PACKET)), /*30*/ FLAG_ENTRY0("TxSdma14DisallowedPacketErr", SEES(TX_SDMA14_DISALLOWED_PACKET)), /*31*/ FLAG_ENTRY0("TxSdma15DisallowedPacketErr", SEES(TX_SDMA15_DISALLOWED_PACKET)), /*32*/ FLAG_ENTRY0("TxLaunchFifo0UncOrParityErr", SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY)), /*33*/ FLAG_ENTRY0("TxLaunchFifo1UncOrParityErr", SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY)), /*34*/ FLAG_ENTRY0("TxLaunchFifo2UncOrParityErr", SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY)), /*35*/ FLAG_ENTRY0("TxLaunchFifo3UncOrParityErr", SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY)), /*36*/ FLAG_ENTRY0("TxLaunchFifo4UncOrParityErr", SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY)), /*37*/ FLAG_ENTRY0("TxLaunchFifo5UncOrParityErr", SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY)), /*38*/ FLAG_ENTRY0("TxLaunchFifo6UncOrParityErr", SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY)), /*39*/ FLAG_ENTRY0("TxLaunchFifo7UncOrParityErr", SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY)), /*40*/ FLAG_ENTRY0("TxLaunchFifo8UncOrParityErr", SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY)), /*41*/ FLAG_ENTRY0("TxCreditReturnParityErr", SEES(TX_CREDIT_RETURN_PARITY)), /*42*/ FLAG_ENTRY0("TxSbHdrUncErr", SEES(TX_SB_HDR_UNC)), /*43*/ FLAG_ENTRY0("TxReadSdmaMemoryUncErr", SEES(TX_READ_SDMA_MEMORY_UNC)), /*44*/ FLAG_ENTRY0("TxReadPioMemoryUncErr", SEES(TX_READ_PIO_MEMORY_UNC)), /*45*/ FLAG_ENTRY0("TxEgressFifoUncErr", SEES(TX_EGRESS_FIFO_UNC)), /*46*/ FLAG_ENTRY0("TxHcrcInsertionErr", SEES(TX_HCRC_INSERTION)), /*47*/ FLAG_ENTRY0("TxCreditReturnVLErr", SEES(TX_CREDIT_RETURN_VL)), /*48*/ FLAG_ENTRY0("TxLaunchFifo0CorErr", SEES(TX_LAUNCH_FIFO0_COR)), /*49*/ FLAG_ENTRY0("TxLaunchFifo1CorErr", SEES(TX_LAUNCH_FIFO1_COR)), /*50*/ FLAG_ENTRY0("TxLaunchFifo2CorErr", SEES(TX_LAUNCH_FIFO2_COR)), /*51*/ FLAG_ENTRY0("TxLaunchFifo3CorErr", SEES(TX_LAUNCH_FIFO3_COR)), /*52*/ FLAG_ENTRY0("TxLaunchFifo4CorErr", SEES(TX_LAUNCH_FIFO4_COR)), /*53*/ FLAG_ENTRY0("TxLaunchFifo5CorErr", SEES(TX_LAUNCH_FIFO5_COR)), /*54*/ FLAG_ENTRY0("TxLaunchFifo6CorErr", SEES(TX_LAUNCH_FIFO6_COR)), /*55*/ FLAG_ENTRY0("TxLaunchFifo7CorErr", SEES(TX_LAUNCH_FIFO7_COR)), /*56*/ FLAG_ENTRY0("TxLaunchFifo8CorErr", SEES(TX_LAUNCH_FIFO8_COR)), /*57*/ FLAG_ENTRY0("TxCreditOverrunErr", SEES(TX_CREDIT_OVERRUN)), /*58*/ FLAG_ENTRY0("TxSbHdrCorErr", SEES(TX_SB_HDR_COR)), /*59*/ FLAG_ENTRY0("TxReadSdmaMemoryCorErr", SEES(TX_READ_SDMA_MEMORY_COR)), /*60*/ FLAG_ENTRY0("TxReadPioMemoryCorErr", SEES(TX_READ_PIO_MEMORY_COR)), /*61*/ FLAG_ENTRY0("TxEgressFifoCorErr", SEES(TX_EGRESS_FIFO_COR)), /*62*/ FLAG_ENTRY0("TxReadSdmaMemoryCsrUncErr", SEES(TX_READ_SDMA_MEMORY_CSR_UNC)), /*63*/ FLAG_ENTRY0("TxReadPioMemoryCsrUncErr", SEES(TX_READ_PIO_MEMORY_CSR_UNC)), }; /* * TXE Egress Error Info flags */ #define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK static struct flag_table egress_err_info_flags[] = { /* 0*/ FLAG_ENTRY0("Reserved", 0ull), /* 1*/ FLAG_ENTRY0("VLErr", SEEI(VL)), /* 2*/ FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)), /* 3*/ FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)), /* 4*/ FLAG_ENTRY0("PartitionKeyErr", SEEI(PARTITION_KEY)), /* 5*/ FLAG_ENTRY0("SLIDErr", SEEI(SLID)), /* 6*/ FLAG_ENTRY0("OpcodeErr", SEEI(OPCODE)), /* 7*/ FLAG_ENTRY0("VLMappingErr", SEEI(VL_MAPPING)), /* 8*/ FLAG_ENTRY0("RawErr", SEEI(RAW)), /* 9*/ FLAG_ENTRY0("RawIPv6Err", SEEI(RAW_IPV6)), /*10*/ FLAG_ENTRY0("GRHErr", SEEI(GRH)), /*11*/ FLAG_ENTRY0("BypassErr", SEEI(BYPASS)), /*12*/ FLAG_ENTRY0("KDETHPacketsErr", SEEI(KDETH_PACKETS)), /*13*/ FLAG_ENTRY0("NonKDETHPacketsErr", SEEI(NON_KDETH_PACKETS)), /*14*/ FLAG_ENTRY0("TooSmallIBPacketsErr", SEEI(TOO_SMALL_IB_PACKETS)), /*15*/ FLAG_ENTRY0("TooSmallBypassPacketsErr", SEEI(TOO_SMALL_BYPASS_PACKETS)), /*16*/ FLAG_ENTRY0("PbcTestErr", SEEI(PBC_TEST)), /*17*/ FLAG_ENTRY0("BadPktLenErr", SEEI(BAD_PKT_LEN)), /*18*/ FLAG_ENTRY0("TooLongIBPacketErr", SEEI(TOO_LONG_IB_PACKET)), /*19*/ FLAG_ENTRY0("TooLongBypassPacketsErr", SEEI(TOO_LONG_BYPASS_PACKETS)), /*20*/ FLAG_ENTRY0("PbcStaticRateControlErr", SEEI(PBC_STATIC_RATE_CONTROL)), /*21*/ FLAG_ENTRY0("BypassBadPktLenErr", SEEI(BAD_PKT_LEN)), }; /* TXE Egress errors that cause an SPC freeze */ #define ALL_TXE_EGRESS_FREEZE_ERR \ (SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY) \ | SEES(TX_PIO_LAUNCH_INTF_PARITY) \ | SEES(TX_SDMA_LAUNCH_INTF_PARITY) \ | SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY) \ | SEES(TX_LAUNCH_CSR_PARITY) \ | SEES(TX_SBRD_CTL_CSR_PARITY) \ | SEES(TX_CONFIG_PARITY) \ | SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY) \ | SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY) \ | SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY) \ | SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY) \ | SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY) \ | SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY) \ | SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY) \ | SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY) \ | SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY) \ | SEES(TX_CREDIT_RETURN_PARITY)) /* * TXE Send error flags */ #define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK static struct flag_table send_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("SDmaRpyTagErr", SES(CSR_PARITY)), /* 1*/ FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)), /* 2*/ FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR)) }; /* * TXE Send Context Error flags and consequences */ static struct flag_table sc_err_status_flags[] = { /* 0*/ FLAG_ENTRY("InconsistentSop", SEC_PACKET_DROPPED | SEC_SC_HALTED, SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK), /* 1*/ FLAG_ENTRY("DisallowedPacket", SEC_PACKET_DROPPED | SEC_SC_HALTED, SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK), /* 2*/ FLAG_ENTRY("WriteCrossesBoundary", SEC_WRITE_DROPPED | SEC_SC_HALTED, SEND_CTXT_ERR_STATUS_PIO_WRITE_CROSSES_BOUNDARY_ERR_SMASK), /* 3*/ FLAG_ENTRY("WriteOverflow", SEC_WRITE_DROPPED | SEC_SC_HALTED, SEND_CTXT_ERR_STATUS_PIO_WRITE_OVERFLOW_ERR_SMASK), /* 4*/ FLAG_ENTRY("WriteOutOfBounds", SEC_WRITE_DROPPED | SEC_SC_HALTED, SEND_CTXT_ERR_STATUS_PIO_WRITE_OUT_OF_BOUNDS_ERR_SMASK), /* 5-63 reserved*/ }; /* * RXE Receive Error flags */ #define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK static struct flag_table rxe_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)), /* 1*/ FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)), /* 2*/ FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)), /* 3*/ FLAG_ENTRY0("RxRcvHdrCorErr", RXES(RCV_HDR_COR)), /* 4*/ FLAG_ENTRY0("RxRcvDataUncErr", RXES(RCV_DATA_UNC)), /* 5*/ FLAG_ENTRY0("RxRcvDataCorErr", RXES(RCV_DATA_COR)), /* 6*/ FLAG_ENTRY0("RxRcvQpMapTableUncErr", RXES(RCV_QP_MAP_TABLE_UNC)), /* 7*/ FLAG_ENTRY0("RxRcvQpMapTableCorErr", RXES(RCV_QP_MAP_TABLE_COR)), /* 8*/ FLAG_ENTRY0("RxRcvCsrParityErr", RXES(RCV_CSR_PARITY)), /* 9*/ FLAG_ENTRY0("RxDcSopEopParityErr", RXES(DC_SOP_EOP_PARITY)), /*10*/ FLAG_ENTRY0("RxDmaFlagUncErr", RXES(DMA_FLAG_UNC)), /*11*/ FLAG_ENTRY0("RxDmaFlagCorErr", RXES(DMA_FLAG_COR)), /*12*/ FLAG_ENTRY0("RxRcvFsmEncodingErr", RXES(RCV_FSM_ENCODING)), /*13*/ FLAG_ENTRY0("RxRbufFreeListUncErr", RXES(RBUF_FREE_LIST_UNC)), /*14*/ FLAG_ENTRY0("RxRbufFreeListCorErr", RXES(RBUF_FREE_LIST_COR)), /*15*/ FLAG_ENTRY0("RxRbufLookupDesRegUncErr", RXES(RBUF_LOOKUP_DES_REG_UNC)), /*16*/ FLAG_ENTRY0("RxRbufLookupDesRegUncCorErr", RXES(RBUF_LOOKUP_DES_REG_UNC_COR)), /*17*/ FLAG_ENTRY0("RxRbufLookupDesUncErr", RXES(RBUF_LOOKUP_DES_UNC)), /*18*/ FLAG_ENTRY0("RxRbufLookupDesCorErr", RXES(RBUF_LOOKUP_DES_COR)), /*19*/ FLAG_ENTRY0("RxRbufBlockListReadUncErr", RXES(RBUF_BLOCK_LIST_READ_UNC)), /*20*/ FLAG_ENTRY0("RxRbufBlockListReadCorErr", RXES(RBUF_BLOCK_LIST_READ_COR)), /*21*/ FLAG_ENTRY0("RxRbufCsrQHeadBufNumParityErr", RXES(RBUF_CSR_QHEAD_BUF_NUM_PARITY)), /*22*/ FLAG_ENTRY0("RxRbufCsrQEntCntParityErr", RXES(RBUF_CSR_QENT_CNT_PARITY)), /*23*/ FLAG_ENTRY0("RxRbufCsrQNextBufParityErr", RXES(RBUF_CSR_QNEXT_BUF_PARITY)), /*24*/ FLAG_ENTRY0("RxRbufCsrQVldBitParityErr", RXES(RBUF_CSR_QVLD_BIT_PARITY)), /*25*/ FLAG_ENTRY0("RxRbufCsrQHdPtrParityErr", RXES(RBUF_CSR_QHD_PTR_PARITY)), /*26*/ FLAG_ENTRY0("RxRbufCsrQTlPtrParityErr", RXES(RBUF_CSR_QTL_PTR_PARITY)), /*27*/ FLAG_ENTRY0("RxRbufCsrQNumOfPktParityErr", RXES(RBUF_CSR_QNUM_OF_PKT_PARITY)), /*28*/ FLAG_ENTRY0("RxRbufCsrQEOPDWParityErr", RXES(RBUF_CSR_QEOPDW_PARITY)), /*29*/ FLAG_ENTRY0("RxRbufCtxIdParityErr", RXES(RBUF_CTX_ID_PARITY)), /*30*/ FLAG_ENTRY0("RxRBufBadLookupErr", RXES(RBUF_BAD_LOOKUP)), /*31*/ FLAG_ENTRY0("RxRbufFullErr", RXES(RBUF_FULL)), /*32*/ FLAG_ENTRY0("RxRbufEmptyErr", RXES(RBUF_EMPTY)), /*33*/ FLAG_ENTRY0("RxRbufFlRdAddrParityErr", RXES(RBUF_FL_RD_ADDR_PARITY)), /*34*/ FLAG_ENTRY0("RxRbufFlWrAddrParityErr", RXES(RBUF_FL_WR_ADDR_PARITY)), /*35*/ FLAG_ENTRY0("RxRbufFlInitdoneParityErr", RXES(RBUF_FL_INITDONE_PARITY)), /*36*/ FLAG_ENTRY0("RxRbufFlInitWrAddrParityErr", RXES(RBUF_FL_INIT_WR_ADDR_PARITY)), /*37*/ FLAG_ENTRY0("RxRbufNextFreeBufUncErr", RXES(RBUF_NEXT_FREE_BUF_UNC)), /*38*/ FLAG_ENTRY0("RxRbufNextFreeBufCorErr", RXES(RBUF_NEXT_FREE_BUF_COR)), /*39*/ FLAG_ENTRY0("RxLookupDesPart1UncErr", RXES(LOOKUP_DES_PART1_UNC)), /*40*/ FLAG_ENTRY0("RxLookupDesPart1UncCorErr", RXES(LOOKUP_DES_PART1_UNC_COR)), /*41*/ FLAG_ENTRY0("RxLookupDesPart2ParityErr", RXES(LOOKUP_DES_PART2_PARITY)), /*42*/ FLAG_ENTRY0("RxLookupRcvArrayUncErr", RXES(LOOKUP_RCV_ARRAY_UNC)), /*43*/ FLAG_ENTRY0("RxLookupRcvArrayCorErr", RXES(LOOKUP_RCV_ARRAY_COR)), /*44*/ FLAG_ENTRY0("RxLookupCsrParityErr", RXES(LOOKUP_CSR_PARITY)), /*45*/ FLAG_ENTRY0("RxHqIntrCsrParityErr", RXES(HQ_INTR_CSR_PARITY)), /*46*/ FLAG_ENTRY0("RxHqIntrFsmErr", RXES(HQ_INTR_FSM)), /*47*/ FLAG_ENTRY0("RxRbufDescPart1UncErr", RXES(RBUF_DESC_PART1_UNC)), /*48*/ FLAG_ENTRY0("RxRbufDescPart1CorErr", RXES(RBUF_DESC_PART1_COR)), /*49*/ FLAG_ENTRY0("RxRbufDescPart2UncErr", RXES(RBUF_DESC_PART2_UNC)), /*50*/ FLAG_ENTRY0("RxRbufDescPart2CorErr", RXES(RBUF_DESC_PART2_COR)), /*51*/ FLAG_ENTRY0("RxDmaHdrFifoRdUncErr", RXES(DMA_HDR_FIFO_RD_UNC)), /*52*/ FLAG_ENTRY0("RxDmaHdrFifoRdCorErr", RXES(DMA_HDR_FIFO_RD_COR)), /*53*/ FLAG_ENTRY0("RxDmaDataFifoRdUncErr", RXES(DMA_DATA_FIFO_RD_UNC)), /*54*/ FLAG_ENTRY0("RxDmaDataFifoRdCorErr", RXES(DMA_DATA_FIFO_RD_COR)), /*55*/ FLAG_ENTRY0("RxRbufDataUncErr", RXES(RBUF_DATA_UNC)), /*56*/ FLAG_ENTRY0("RxRbufDataCorErr", RXES(RBUF_DATA_COR)), /*57*/ FLAG_ENTRY0("RxDmaCsrParityErr", RXES(DMA_CSR_PARITY)), /*58*/ FLAG_ENTRY0("RxDmaEqFsmEncodingErr", RXES(DMA_EQ_FSM_ENCODING)), /*59*/ FLAG_ENTRY0("RxDmaDqFsmEncodingErr", RXES(DMA_DQ_FSM_ENCODING)), /*60*/ FLAG_ENTRY0("RxDmaCsrUncErr", RXES(DMA_CSR_UNC)), /*61*/ FLAG_ENTRY0("RxCsrReadBadAddrErr", RXES(CSR_READ_BAD_ADDR)), /*62*/ FLAG_ENTRY0("RxCsrWriteBadAddrErr", RXES(CSR_WRITE_BAD_ADDR)), /*63*/ FLAG_ENTRY0("RxCsrParityErr", RXES(CSR_PARITY)) }; /* RXE errors that will trigger an SPC freeze */ #define ALL_RXE_FREEZE_ERR \ (RCV_ERR_STATUS_RX_RCV_QP_MAP_TABLE_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_RCV_CSR_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_DMA_FLAG_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_RCV_FSM_ENCODING_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_FREE_LIST_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_COR_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_BLOCK_LIST_READ_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_CSR_QHEAD_BUF_NUM_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_CSR_QENT_CNT_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_CSR_QNEXT_BUF_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_CSR_QVLD_BIT_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_CSR_QHD_PTR_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_CSR_QTL_PTR_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_CSR_QNUM_OF_PKT_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_CSR_QEOPDW_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_CTX_ID_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_BAD_LOOKUP_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_FULL_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_EMPTY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_FL_RD_ADDR_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_FL_WR_ADDR_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_FL_INITDONE_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_FL_INIT_WR_ADDR_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_NEXT_FREE_BUF_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_COR_ERR_SMASK \ | RCV_ERR_STATUS_RX_LOOKUP_DES_PART2_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_LOOKUP_RCV_ARRAY_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_LOOKUP_CSR_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_HQ_INTR_CSR_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_HQ_INTR_FSM_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_COR_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_DESC_PART2_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_RBUF_DATA_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_DMA_CSR_PARITY_ERR_SMASK \ | RCV_ERR_STATUS_RX_DMA_EQ_FSM_ENCODING_ERR_SMASK \ | RCV_ERR_STATUS_RX_DMA_DQ_FSM_ENCODING_ERR_SMASK \ | RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK \ | RCV_ERR_STATUS_RX_CSR_PARITY_ERR_SMASK) #define RXE_FREEZE_ABORT_MASK \ (RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK | \ RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK | \ RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK) /* * DCC Error Flags */ #define DCCE(name) DCC_ERR_FLG_##name##_SMASK static struct flag_table dcc_err_flags[] = { FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)), FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)), FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)), FLAG_ENTRY0("bad_preemption_err", DCCE(BAD_PREEMPTION_ERR)), FLAG_ENTRY0("preemption_err", DCCE(PREEMPTION_ERR)), FLAG_ENTRY0("preemptionvl15_err", DCCE(PREEMPTIONVL15_ERR)), FLAG_ENTRY0("bad_vl_marker_err", DCCE(BAD_VL_MARKER_ERR)), FLAG_ENTRY0("bad_dlid_target_err", DCCE(BAD_DLID_TARGET_ERR)), FLAG_ENTRY0("bad_lver_err", DCCE(BAD_LVER_ERR)), FLAG_ENTRY0("uncorrectable_err", DCCE(UNCORRECTABLE_ERR)), FLAG_ENTRY0("bad_crdt_ack_err", DCCE(BAD_CRDT_ACK_ERR)), FLAG_ENTRY0("unsup_pkt_type", DCCE(UNSUP_PKT_TYPE)), FLAG_ENTRY0("bad_ctrl_flit_err", DCCE(BAD_CTRL_FLIT_ERR)), FLAG_ENTRY0("event_cntr_parity_err", DCCE(EVENT_CNTR_PARITY_ERR)), FLAG_ENTRY0("event_cntr_rollover_err", DCCE(EVENT_CNTR_ROLLOVER_ERR)), FLAG_ENTRY0("link_err", DCCE(LINK_ERR)), FLAG_ENTRY0("misc_cntr_rollover_err", DCCE(MISC_CNTR_ROLLOVER_ERR)), FLAG_ENTRY0("bad_ctrl_dist_err", DCCE(BAD_CTRL_DIST_ERR)), FLAG_ENTRY0("bad_tail_dist_err", DCCE(BAD_TAIL_DIST_ERR)), FLAG_ENTRY0("bad_head_dist_err", DCCE(BAD_HEAD_DIST_ERR)), FLAG_ENTRY0("nonvl15_state_err", DCCE(NONVL15_STATE_ERR)), FLAG_ENTRY0("vl15_multi_err", DCCE(VL15_MULTI_ERR)), FLAG_ENTRY0("bad_pkt_length_err", DCCE(BAD_PKT_LENGTH_ERR)), FLAG_ENTRY0("unsup_vl_err", DCCE(UNSUP_VL_ERR)), FLAG_ENTRY0("perm_nvl15_err", DCCE(PERM_NVL15_ERR)), FLAG_ENTRY0("slid_zero_err", DCCE(SLID_ZERO_ERR)), FLAG_ENTRY0("dlid_zero_err", DCCE(DLID_ZERO_ERR)), FLAG_ENTRY0("length_mtu_err", DCCE(LENGTH_MTU_ERR)), FLAG_ENTRY0("rx_early_drop_err", DCCE(RX_EARLY_DROP_ERR)), FLAG_ENTRY0("late_short_err", DCCE(LATE_SHORT_ERR)), FLAG_ENTRY0("late_long_err", DCCE(LATE_LONG_ERR)), FLAG_ENTRY0("late_ebp_err", DCCE(LATE_EBP_ERR)), FLAG_ENTRY0("fpe_tx_fifo_ovflw_err", DCCE(FPE_TX_FIFO_OVFLW_ERR)), FLAG_ENTRY0("fpe_tx_fifo_unflw_err", DCCE(FPE_TX_FIFO_UNFLW_ERR)), FLAG_ENTRY0("csr_access_blocked_host", DCCE(CSR_ACCESS_BLOCKED_HOST)), FLAG_ENTRY0("csr_access_blocked_uc", DCCE(CSR_ACCESS_BLOCKED_UC)), FLAG_ENTRY0("tx_ctrl_parity_err", DCCE(TX_CTRL_PARITY_ERR)), FLAG_ENTRY0("tx_ctrl_parity_mbe_err", DCCE(TX_CTRL_PARITY_MBE_ERR)), FLAG_ENTRY0("tx_sc_parity_err", DCCE(TX_SC_PARITY_ERR)), FLAG_ENTRY0("rx_ctrl_parity_mbe_err", DCCE(RX_CTRL_PARITY_MBE_ERR)), FLAG_ENTRY0("csr_parity_err", DCCE(CSR_PARITY_ERR)), FLAG_ENTRY0("csr_inval_addr", DCCE(CSR_INVAL_ADDR)), FLAG_ENTRY0("tx_byte_shft_parity_err", DCCE(TX_BYTE_SHFT_PARITY_ERR)), FLAG_ENTRY0("rx_byte_shft_parity_err", DCCE(RX_BYTE_SHFT_PARITY_ERR)), FLAG_ENTRY0("fmconfig_err", DCCE(FMCONFIG_ERR)), FLAG_ENTRY0("rcvport_err", DCCE(RCVPORT_ERR)), }; /* * LCB error flags */ #define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK static struct flag_table lcb_err_flags[] = { /* 0*/ FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)), /* 1*/ FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)), /* 2*/ FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)), /* 3*/ FLAG_ENTRY0("ALL_LNS_FAILED_REINIT_TEST", LCBE(ALL_LNS_FAILED_REINIT_TEST)), /* 4*/ FLAG_ENTRY0("LOST_REINIT_STALL_OR_TOS", LCBE(LOST_REINIT_STALL_OR_TOS)), /* 5*/ FLAG_ENTRY0("TX_LESS_THAN_FOUR_LNS", LCBE(TX_LESS_THAN_FOUR_LNS)), /* 6*/ FLAG_ENTRY0("RX_LESS_THAN_FOUR_LNS", LCBE(RX_LESS_THAN_FOUR_LNS)), /* 7*/ FLAG_ENTRY0("SEQ_CRC_ERR", LCBE(SEQ_CRC_ERR)), /* 8*/ FLAG_ENTRY0("REINIT_FROM_PEER", LCBE(REINIT_FROM_PEER)), /* 9*/ FLAG_ENTRY0("REINIT_FOR_LN_DEGRADE", LCBE(REINIT_FOR_LN_DEGRADE)), /*10*/ FLAG_ENTRY0("CRC_ERR_CNT_HIT_LIMIT", LCBE(CRC_ERR_CNT_HIT_LIMIT)), /*11*/ FLAG_ENTRY0("RCLK_STOPPED", LCBE(RCLK_STOPPED)), /*12*/ FLAG_ENTRY0("UNEXPECTED_REPLAY_MARKER", LCBE(UNEXPECTED_REPLAY_MARKER)), /*13*/ FLAG_ENTRY0("UNEXPECTED_ROUND_TRIP_MARKER", LCBE(UNEXPECTED_ROUND_TRIP_MARKER)), /*14*/ FLAG_ENTRY0("ILLEGAL_NULL_LTP", LCBE(ILLEGAL_NULL_LTP)), /*15*/ FLAG_ENTRY0("ILLEGAL_FLIT_ENCODING", LCBE(ILLEGAL_FLIT_ENCODING)), /*16*/ FLAG_ENTRY0("FLIT_INPUT_BUF_OFLW", LCBE(FLIT_INPUT_BUF_OFLW)), /*17*/ FLAG_ENTRY0("VL_ACK_INPUT_BUF_OFLW", LCBE(VL_ACK_INPUT_BUF_OFLW)), /*18*/ FLAG_ENTRY0("VL_ACK_INPUT_PARITY_ERR", LCBE(VL_ACK_INPUT_PARITY_ERR)), /*19*/ FLAG_ENTRY0("VL_ACK_INPUT_WRONG_CRC_MODE", LCBE(VL_ACK_INPUT_WRONG_CRC_MODE)), /*20*/ FLAG_ENTRY0("FLIT_INPUT_BUF_MBE", LCBE(FLIT_INPUT_BUF_MBE)), /*21*/ FLAG_ENTRY0("FLIT_INPUT_BUF_SBE", LCBE(FLIT_INPUT_BUF_SBE)), /*22*/ FLAG_ENTRY0("REPLAY_BUF_MBE", LCBE(REPLAY_BUF_MBE)), /*23*/ FLAG_ENTRY0("REPLAY_BUF_SBE", LCBE(REPLAY_BUF_SBE)), /*24*/ FLAG_ENTRY0("CREDIT_RETURN_FLIT_MBE", LCBE(CREDIT_RETURN_FLIT_MBE)), /*25*/ FLAG_ENTRY0("RST_FOR_LINK_TIMEOUT", LCBE(RST_FOR_LINK_TIMEOUT)), /*26*/ FLAG_ENTRY0("RST_FOR_INCOMPLT_RND_TRIP", LCBE(RST_FOR_INCOMPLT_RND_TRIP)), /*27*/ FLAG_ENTRY0("HOLD_REINIT", LCBE(HOLD_REINIT)), /*28*/ FLAG_ENTRY0("NEG_EDGE_LINK_TRANSFER_ACTIVE", LCBE(NEG_EDGE_LINK_TRANSFER_ACTIVE)), /*29*/ FLAG_ENTRY0("REDUNDANT_FLIT_PARITY_ERR", LCBE(REDUNDANT_FLIT_PARITY_ERR)) }; /* * DC8051 Error Flags */ #define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK static struct flag_table dc8051_err_flags[] = { FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)), FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)), FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)), FLAG_ENTRY0("CRAM_SBE", D8E(CRAM_SBE)), FLAG_ENTRY0("DRAM_MBE", D8E(DRAM_MBE)), FLAG_ENTRY0("DRAM_SBE", D8E(DRAM_SBE)), FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)), FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)), FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES", D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)), FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)), }; /* * DC8051 Information Error flags * * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field. */ static struct flag_table dc8051_info_err_flags[] = { FLAG_ENTRY0("Spico ROM check failed", SPICO_ROM_FAILED), FLAG_ENTRY0("Unknown frame received", UNKNOWN_FRAME), FLAG_ENTRY0("Target BER not met", TARGET_BER_NOT_MET), FLAG_ENTRY0("Serdes internal loopback failure", FAILED_SERDES_INTERNAL_LOOPBACK), FLAG_ENTRY0("Failed SerDes init", FAILED_SERDES_INIT), FLAG_ENTRY0("Failed LNI(Polling)", FAILED_LNI_POLLING), FLAG_ENTRY0("Failed LNI(Debounce)", FAILED_LNI_DEBOUNCE), FLAG_ENTRY0("Failed LNI(EstbComm)", FAILED_LNI_ESTBCOMM), FLAG_ENTRY0("Failed LNI(OptEq)", FAILED_LNI_OPTEQ), FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1), FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2), FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT) }; /* * DC8051 Information Host Information flags * * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field. */ static struct flag_table dc8051_info_host_msg_flags[] = { FLAG_ENTRY0("Host request done", 0x0001), FLAG_ENTRY0("BC SMA message", 0x0002), FLAG_ENTRY0("BC PWR_MGM message", 0x0004), FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008), FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010), FLAG_ENTRY0("External device config request", 0x0020), FLAG_ENTRY0("VerifyCap all frames received", 0x0040), FLAG_ENTRY0("LinkUp achieved", 0x0080), FLAG_ENTRY0("Link going down", 0x0100), }; static u32 encoded_size(u32 size); static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate); static int set_physical_link_state(struct hfi1_devdata *dd, u64 state); static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management, u8 *continuous); static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z, u8 *vcu, u16 *vl15buf, u8 *crc_sizes); static void read_vc_remote_link_width(struct hfi1_devdata *dd, u8 *remote_tx_rate, u16 *link_widths); static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits, u8 *flag_bits, u16 *link_widths); static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, u8 *device_rev); static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed); static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx); static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx, u8 *tx_polarity_inversion, u8 *rx_polarity_inversion, u8 *max_rate); static void handle_sdma_eng_err(struct hfi1_devdata *dd, unsigned int context, u64 err_status); static void handle_qsfp_int(struct hfi1_devdata *dd, u32 source, u64 reg); static void handle_dcc_err(struct hfi1_devdata *dd, unsigned int context, u64 err_status); static void handle_lcb_err(struct hfi1_devdata *dd, unsigned int context, u64 err_status); static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void set_partition_keys(struct hfi1_pportdata *); static const char *link_state_name(u32 state); static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state); static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, u64 *out_data); static int read_idle_sma(struct hfi1_devdata *dd, u64 *data); static int thermal_init(struct hfi1_devdata *dd); static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void handle_temp_err(struct hfi1_devdata *); static void dc_shutdown(struct hfi1_devdata *); static void dc_start(struct hfi1_devdata *); /* * Error interrupt table entry. This is used as input to the interrupt * "clear down" routine used for all second tier error interrupt register. * Second tier interrupt registers have a single bit representing them * in the top-level CceIntStatus. */ struct err_reg_info { u32 status; /* status CSR offset */ u32 clear; /* clear CSR offset */ u32 mask; /* mask CSR offset */ void (*handler)(struct hfi1_devdata *dd, u32 source, u64 reg); const char *desc; }; #define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START) #define NUM_DC_ERRS (IS_DC_END - IS_DC_START) #define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START) /* * Helpers for building HFI and DC error interrupt table entries. Different * helpers are needed because of inconsistent register names. */ #define EE(reg, handler, desc) \ { reg##_STATUS, reg##_CLEAR, reg##_MASK, \ handler, desc } #define DC_EE1(reg, handler, desc) \ { reg##_FLG, reg##_FLG_CLR, reg##_FLG_EN, handler, desc } #define DC_EE2(reg, handler, desc) \ { reg##_FLG, reg##_CLR, reg##_EN, handler, desc } /* * Table of the "misc" grouping of error interrupts. Each entry refers to * another register containing more information. */ static const struct err_reg_info misc_errs[NUM_MISC_ERRS] = { /* 0*/ EE(CCE_ERR, handle_cce_err, "CceErr"), /* 1*/ EE(RCV_ERR, handle_rxe_err, "RxeErr"), /* 2*/ EE(MISC_ERR, handle_misc_err, "MiscErr"), /* 3*/ { 0, 0, 0, NULL }, /* reserved */ /* 4*/ EE(SEND_PIO_ERR, handle_pio_err, "PioErr"), /* 5*/ EE(SEND_DMA_ERR, handle_sdma_err, "SDmaErr"), /* 6*/ EE(SEND_EGRESS_ERR, handle_egress_err, "EgressErr"), /* 7*/ EE(SEND_ERR, handle_txe_err, "TxeErr") /* the rest are reserved */ }; /* * Index into the Various section of the interrupt sources * corresponding to the Critical Temperature interrupt. */ #define TCRIT_INT_SOURCE 4 /* * SDMA error interrupt entry - refers to another register containing more * information. */ static const struct err_reg_info sdma_eng_err = EE(SEND_DMA_ENG_ERR, handle_sdma_eng_err, "SDmaEngErr"); static const struct err_reg_info various_err[NUM_VARIOUS] = { /* 0*/ { 0, 0, 0, NULL }, /* PbcInt */ /* 1*/ { 0, 0, 0, NULL }, /* GpioAssertInt */ /* 2*/ EE(ASIC_QSFP1, handle_qsfp_int, "QSFP1"), /* 3*/ EE(ASIC_QSFP2, handle_qsfp_int, "QSFP2"), /* 4*/ { 0, 0, 0, NULL }, /* TCritInt */ /* rest are reserved */ }; /* * The DC encoding of mtu_cap for 10K MTU in the DCC_CFG_PORT_CONFIG * register can not be derived from the MTU value because 10K is not * a power of 2. Therefore, we need a constant. Everything else can * be calculated. */ #define DCC_CFG_PORT_MTU_CAP_10240 7 /* * Table of the DC grouping of error interrupts. Each entry refers to * another register containing more information. */ static const struct err_reg_info dc_errs[NUM_DC_ERRS] = { /* 0*/ DC_EE1(DCC_ERR, handle_dcc_err, "DCC Err"), /* 1*/ DC_EE2(DC_LCB_ERR, handle_lcb_err, "LCB Err"), /* 2*/ DC_EE2(DC_DC8051_ERR, handle_8051_interrupt, "DC8051 Interrupt"), /* 3*/ /* dc_lbm_int - special, see is_dc_int() */ /* the rest are reserved */ }; struct cntr_entry { /* * counter name */ char *name; /* * csr to read for name (if applicable) */ u64 csr; /* * offset into dd or ppd to store the counter's value */ int offset; /* * flags */ u8 flags; /* * accessor for stat element, context either dd or ppd */ u64 (*rw_cntr)(const struct cntr_entry *, void *context, int vl, int mode, u64 data); }; #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0 #define C_RCV_HDR_OVF_LAST C_RCV_HDR_OVF_159 #define CNTR_ELEM(name, csr, offset, flags, accessor) \ { \ name, \ csr, \ offset, \ flags, \ accessor \ } /* 32bit RXE */ #define RXE32_PORT_CNTR_ELEM(name, counter, flags) \ CNTR_ELEM(#name, \ (counter * 8 + RCV_COUNTER_ARRAY32), \ 0, flags | CNTR_32BIT, \ port_access_u32_csr) #define RXE32_DEV_CNTR_ELEM(name, counter, flags) \ CNTR_ELEM(#name, \ (counter * 8 + RCV_COUNTER_ARRAY32), \ 0, flags | CNTR_32BIT, \ dev_access_u32_csr) /* 64bit RXE */ #define RXE64_PORT_CNTR_ELEM(name, counter, flags) \ CNTR_ELEM(#name, \ (counter * 8 + RCV_COUNTER_ARRAY64), \ 0, flags, \ port_access_u64_csr) #define RXE64_DEV_CNTR_ELEM(name, counter, flags) \ CNTR_ELEM(#name, \ (counter * 8 + RCV_COUNTER_ARRAY64), \ 0, flags, \ dev_access_u64_csr) #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx #define OVR_ELM(ctx) \ CNTR_ELEM("RcvHdrOvr" #ctx, \ (RCV_HDR_OVFL_CNT + ctx*0x100), \ 0, CNTR_NORMAL, port_access_u64_csr) /* 32bit TXE */ #define TXE32_PORT_CNTR_ELEM(name, counter, flags) \ CNTR_ELEM(#name, \ (counter * 8 + SEND_COUNTER_ARRAY32), \ 0, flags | CNTR_32BIT, \ port_access_u32_csr) /* 64bit TXE */ #define TXE64_PORT_CNTR_ELEM(name, counter, flags) \ CNTR_ELEM(#name, \ (counter * 8 + SEND_COUNTER_ARRAY64), \ 0, flags, \ port_access_u64_csr) # define TX64_DEV_CNTR_ELEM(name, counter, flags) \ CNTR_ELEM(#name,\ counter * 8 + SEND_COUNTER_ARRAY64, \ 0, \ flags, \ dev_access_u64_csr) /* CCE */ #define CCE_PERF_DEV_CNTR_ELEM(name, counter, flags) \ CNTR_ELEM(#name, \ (counter * 8 + CCE_COUNTER_ARRAY32), \ 0, flags | CNTR_32BIT, \ dev_access_u32_csr) #define CCE_INT_DEV_CNTR_ELEM(name, counter, flags) \ CNTR_ELEM(#name, \ (counter * 8 + CCE_INT_COUNTER_ARRAY32), \ 0, flags | CNTR_32BIT, \ dev_access_u32_csr) /* DC */ #define DC_PERF_CNTR(name, counter, flags) \ CNTR_ELEM(#name, \ counter, \ 0, \ flags, \ dev_access_u64_csr) #define DC_PERF_CNTR_LCB(name, counter, flags) \ CNTR_ELEM(#name, \ counter, \ 0, \ flags, \ dc_access_lcb_cntr) /* ibp counters */ #define SW_IBP_CNTR(name, cntr) \ CNTR_ELEM(#name, \ 0, \ 0, \ CNTR_SYNTH, \ access_ibp_##cntr) u64 read_csr(const struct hfi1_devdata *dd, u32 offset) { u64 val; if (dd->flags & HFI1_PRESENT) { val = readq((void __iomem *)dd->kregbase + offset); return val; } return -1; } void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) { if (dd->flags & HFI1_PRESENT) writeq(value, (void __iomem *)dd->kregbase + offset); } void __iomem *get_csr_addr( struct hfi1_devdata *dd, u32 offset) { return (void __iomem *)dd->kregbase + offset; } static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr, int mode, u64 value) { u64 ret; if (mode == CNTR_MODE_R) { ret = read_csr(dd, csr); } else if (mode == CNTR_MODE_W) { write_csr(dd, csr, value); ret = value; } else { dd_dev_err(dd, "Invalid cntr register access mode"); return 0; } hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, ret, mode); return ret; } /* Dev Access */ static u64 dev_access_u32_csr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; if (vl != CNTR_INVALID_VL) return 0; return read_write_csr(dd, entry->csr, mode, data); } static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; u64 val = 0; u64 csr = entry->csr; if (entry->flags & CNTR_VL) { if (vl == CNTR_INVALID_VL) return 0; csr += 8 * vl; } else { if (vl != CNTR_INVALID_VL) return 0; } val = read_write_csr(dd, csr, mode, data); return val; } static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; u32 csr = entry->csr; int ret = 0; if (vl != CNTR_INVALID_VL) return 0; if (mode == CNTR_MODE_R) ret = read_lcb_csr(dd, csr, &data); else if (mode == CNTR_MODE_W) ret = write_lcb_csr(dd, csr, data); if (ret) { dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr); return 0; } hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, data, mode); return data; } /* Port Access */ static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; return read_write_csr(ppd->dd, entry->csr, mode, data); } static u64 port_access_u64_csr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; u64 val; u64 csr = entry->csr; if (entry->flags & CNTR_VL) { if (vl == CNTR_INVALID_VL) return 0; csr += 8 * vl; } else { if (vl != CNTR_INVALID_VL) return 0; } val = read_write_csr(ppd->dd, csr, mode, data); return val; } /* Software defined */ static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode, u64 data) { u64 ret; if (mode == CNTR_MODE_R) { ret = *cntr; } else if (mode == CNTR_MODE_W) { *cntr = data; ret = data; } else { dd_dev_err(dd, "Invalid cntr sw access mode"); return 0; } hfi1_cdbg(CNTR, "val 0x%llx mode %d", ret, mode); return ret; } static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; return read_write_sw(ppd->dd, &ppd->link_downed, mode, data); } static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; return read_write_sw(ppd->dd, &ppd->link_up, mode, data); } static u64 access_sw_xmit_discards(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data); } static u64 access_xmit_constraint_errs(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; return read_write_sw(ppd->dd, &ppd->port_xmit_constraint_errors, mode, data); } static u64 access_rcv_constraint_errs(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; return read_write_sw(ppd->dd, &ppd->port_rcv_constraint_errors, mode, data); } u64 get_all_cpu_total(u64 __percpu *cntr) { int cpu; u64 counter = 0; for_each_possible_cpu(cpu) counter += *per_cpu_ptr(cntr, cpu); return counter; } static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val, u64 __percpu *cntr, int vl, int mode, u64 data) { u64 ret = 0; if (vl != CNTR_INVALID_VL) return 0; if (mode == CNTR_MODE_R) { ret = get_all_cpu_total(cntr) - *z_val; } else if (mode == CNTR_MODE_W) { /* A write can only zero the counter */ if (data == 0) *z_val = get_all_cpu_total(cntr); else dd_dev_err(dd, "Per CPU cntrs can only be zeroed"); } else { dd_dev_err(dd, "Invalid cntr sw cpu access mode"); return 0; } return ret; } static u64 access_sw_cpu_intr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; return read_write_cpu(dd, &dd->z_int_counter, dd->int_counter, vl, mode, data); } static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; return read_write_cpu(dd, &dd->z_rcv_limit, dd->rcv_limit, vl, mode, data); } static u64 access_sw_pio_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; return dd->verbs_dev.n_piowait; } static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; return dd->verbs_dev.n_txwait; } static u64 access_sw_kmem_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; return dd->verbs_dev.n_kmem_wait; } static u64 access_sw_send_schedule(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = (struct hfi1_devdata *)context; return dd->verbs_dev.n_send_schedule; } #define def_access_sw_cpu(cntr) \ static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \ void *context, int vl, int mode, u64 data) \ { \ struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; \ return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr, \ ppd->ibport_data.cntr, vl, \ mode, data); \ } def_access_sw_cpu(rc_acks); def_access_sw_cpu(rc_qacks); def_access_sw_cpu(rc_delayed_comp); #define def_access_ibp_counter(cntr) \ static u64 access_ibp_##cntr(const struct cntr_entry *entry, \ void *context, int vl, int mode, u64 data) \ { \ struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; \ \ if (vl != CNTR_INVALID_VL) \ return 0; \ \ return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr, \ mode, data); \ } def_access_ibp_counter(loop_pkts); def_access_ibp_counter(rc_resends); def_access_ibp_counter(rnr_naks); def_access_ibp_counter(other_naks); def_access_ibp_counter(rc_timeouts); def_access_ibp_counter(pkt_drops); def_access_ibp_counter(dmawait); def_access_ibp_counter(rc_seqnak); def_access_ibp_counter(rc_dupreq); def_access_ibp_counter(rdma_seq); def_access_ibp_counter(unaligned); def_access_ibp_counter(seq_naks); static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH), [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT, CNTR_NORMAL), [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT, CNTR_NORMAL), [C_RX_TID_FLGMS] = RXE32_DEV_CNTR_ELEM(RxTidFLGMs, RCV_TID_FLOW_GEN_MISMATCH_CNT, CNTR_NORMAL), [C_RX_CTX_RHQS] = RXE32_DEV_CNTR_ELEM(RxCtxRHQS, RCV_CONTEXT_RHQ_STALL, CNTR_NORMAL), [C_RX_CTX_EGRS] = RXE32_DEV_CNTR_ELEM(RxCtxEgrS, RCV_CONTEXT_EGR_STALL, CNTR_NORMAL), [C_RCV_TID_FLSMS] = RXE32_DEV_CNTR_ELEM(RxTidFLSMs, RCV_TID_FLOW_SEQ_MISMATCH_CNT, CNTR_NORMAL), [C_CCE_PCI_CR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciCrSt, CCE_PCIE_POSTED_CRDT_STALL_CNT, CNTR_NORMAL), [C_CCE_PCI_TR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciTrSt, CCE_PCIE_TRGT_STALL_CNT, CNTR_NORMAL), [C_CCE_PIO_WR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePioWrSt, CCE_PIO_WR_STALL_CNT, CNTR_NORMAL), [C_CCE_ERR_INT] = CCE_INT_DEV_CNTR_ELEM(CceErrInt, CCE_ERR_INT_CNT, CNTR_NORMAL), [C_CCE_SDMA_INT] = CCE_INT_DEV_CNTR_ELEM(CceSdmaInt, CCE_SDMA_INT_CNT, CNTR_NORMAL), [C_CCE_MISC_INT] = CCE_INT_DEV_CNTR_ELEM(CceMiscInt, CCE_MISC_INT_CNT, CNTR_NORMAL), [C_CCE_RCV_AV_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvAvInt, CCE_RCV_AVAIL_INT_CNT, CNTR_NORMAL), [C_CCE_RCV_URG_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvUrgInt, CCE_RCV_URGENT_INT_CNT, CNTR_NORMAL), [C_CCE_SEND_CR_INT] = CCE_INT_DEV_CNTR_ELEM(CceSndCrInt, CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL), [C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT, CNTR_SYNTH), [C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH), [C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT, CNTR_SYNTH), [C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT, CNTR_SYNTH), [C_DC_DROPPED_PKT] = DC_PERF_CNTR(DcDroppedPkt, DCC_ERR_DROPPED_PKT_CNT, CNTR_SYNTH), [C_DC_MC_XMIT_PKTS] = DC_PERF_CNTR(DcMcXmitPkts, DCC_PRF_PORT_XMIT_MULTICAST_CNT, CNTR_SYNTH), [C_DC_MC_RCV_PKTS] = DC_PERF_CNTR(DcMcRcvPkts, DCC_PRF_PORT_RCV_MULTICAST_PKT_CNT, CNTR_SYNTH), [C_DC_XMIT_CERR] = DC_PERF_CNTR(DcXmitCorr, DCC_PRF_PORT_XMIT_CORRECTABLE_CNT, CNTR_SYNTH), [C_DC_RCV_CERR] = DC_PERF_CNTR(DcRcvCorrCnt, DCC_PRF_PORT_RCV_CORRECTABLE_CNT, CNTR_SYNTH), [C_DC_RCV_FCC] = DC_PERF_CNTR(DcRxFCntl, DCC_PRF_RX_FLOW_CRTL_CNT, CNTR_SYNTH), [C_DC_XMIT_FCC] = DC_PERF_CNTR(DcXmitFCntl, DCC_PRF_TX_FLOW_CRTL_CNT, CNTR_SYNTH), [C_DC_XMIT_FLITS] = DC_PERF_CNTR(DcXmitFlits, DCC_PRF_PORT_XMIT_DATA_CNT, CNTR_SYNTH), [C_DC_RCV_FLITS] = DC_PERF_CNTR(DcRcvFlits, DCC_PRF_PORT_RCV_DATA_CNT, CNTR_SYNTH), [C_DC_XMIT_PKTS] = DC_PERF_CNTR(DcXmitPkts, DCC_PRF_PORT_XMIT_PKTS_CNT, CNTR_SYNTH), [C_DC_RCV_PKTS] = DC_PERF_CNTR(DcRcvPkts, DCC_PRF_PORT_RCV_PKTS_CNT, CNTR_SYNTH), [C_DC_RX_FLIT_VL] = DC_PERF_CNTR(DcRxFlitVl, DCC_PRF_PORT_VL_RCV_DATA_CNT, CNTR_SYNTH | CNTR_VL), [C_DC_RX_PKT_VL] = DC_PERF_CNTR(DcRxPktVl, DCC_PRF_PORT_VL_RCV_PKTS_CNT, CNTR_SYNTH | CNTR_VL), [C_DC_RCV_FCN] = DC_PERF_CNTR(DcRcvFcn, DCC_PRF_PORT_RCV_FECN_CNT, CNTR_SYNTH), [C_DC_RCV_FCN_VL] = DC_PERF_CNTR(DcRcvFcnVl, DCC_PRF_PORT_VL_RCV_FECN_CNT, CNTR_SYNTH | CNTR_VL), [C_DC_RCV_BCN] = DC_PERF_CNTR(DcRcvBcn, DCC_PRF_PORT_RCV_BECN_CNT, CNTR_SYNTH), [C_DC_RCV_BCN_VL] = DC_PERF_CNTR(DcRcvBcnVl, DCC_PRF_PORT_VL_RCV_BECN_CNT, CNTR_SYNTH | CNTR_VL), [C_DC_RCV_BBL] = DC_PERF_CNTR(DcRcvBbl, DCC_PRF_PORT_RCV_BUBBLE_CNT, CNTR_SYNTH), [C_DC_RCV_BBL_VL] = DC_PERF_CNTR(DcRcvBblVl, DCC_PRF_PORT_VL_RCV_BUBBLE_CNT, CNTR_SYNTH | CNTR_VL), [C_DC_MARK_FECN] = DC_PERF_CNTR(DcMarkFcn, DCC_PRF_PORT_MARK_FECN_CNT, CNTR_SYNTH), [C_DC_MARK_FECN_VL] = DC_PERF_CNTR(DcMarkFcnVl, DCC_PRF_PORT_VL_MARK_FECN_CNT, CNTR_SYNTH | CNTR_VL), [C_DC_TOTAL_CRC] = DC_PERF_CNTR_LCB(DcTotCrc, DC_LCB_ERR_INFO_TOTAL_CRC_ERR, CNTR_SYNTH), [C_DC_CRC_LN0] = DC_PERF_CNTR_LCB(DcCrcLn0, DC_LCB_ERR_INFO_CRC_ERR_LN0, CNTR_SYNTH), [C_DC_CRC_LN1] = DC_PERF_CNTR_LCB(DcCrcLn1, DC_LCB_ERR_INFO_CRC_ERR_LN1, CNTR_SYNTH), [C_DC_CRC_LN2] = DC_PERF_CNTR_LCB(DcCrcLn2, DC_LCB_ERR_INFO_CRC_ERR_LN2, CNTR_SYNTH), [C_DC_CRC_LN3] = DC_PERF_CNTR_LCB(DcCrcLn3, DC_LCB_ERR_INFO_CRC_ERR_LN3, CNTR_SYNTH), [C_DC_CRC_MULT_LN] = DC_PERF_CNTR_LCB(DcMultLn, DC_LCB_ERR_INFO_CRC_ERR_MULTI_LN, CNTR_SYNTH), [C_DC_TX_REPLAY] = DC_PERF_CNTR_LCB(DcTxReplay, DC_LCB_ERR_INFO_TX_REPLAY_CNT, CNTR_SYNTH), [C_DC_RX_REPLAY] = DC_PERF_CNTR_LCB(DcRxReplay, DC_LCB_ERR_INFO_RX_REPLAY_CNT, CNTR_SYNTH), [C_DC_SEQ_CRC_CNT] = DC_PERF_CNTR_LCB(DcLinkSeqCrc, DC_LCB_ERR_INFO_SEQ_CRC_CNT, CNTR_SYNTH), [C_DC_ESC0_ONLY_CNT] = DC_PERF_CNTR_LCB(DcEsc0, DC_LCB_ERR_INFO_ESCAPE_0_ONLY_CNT, CNTR_SYNTH), [C_DC_ESC0_PLUS1_CNT] = DC_PERF_CNTR_LCB(DcEsc1, DC_LCB_ERR_INFO_ESCAPE_0_PLUS1_CNT, CNTR_SYNTH), [C_DC_ESC0_PLUS2_CNT] = DC_PERF_CNTR_LCB(DcEsc0Plus2, DC_LCB_ERR_INFO_ESCAPE_0_PLUS2_CNT, CNTR_SYNTH), [C_DC_REINIT_FROM_PEER_CNT] = DC_PERF_CNTR_LCB(DcReinitPeer, DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, CNTR_SYNTH), [C_DC_SBE_CNT] = DC_PERF_CNTR_LCB(DcSbe, DC_LCB_ERR_INFO_SBE_CNT, CNTR_SYNTH), [C_DC_MISC_FLG_CNT] = DC_PERF_CNTR_LCB(DcMiscFlg, DC_LCB_ERR_INFO_MISC_FLG_CNT, CNTR_SYNTH), [C_DC_PRF_GOOD_LTP_CNT] = DC_PERF_CNTR_LCB(DcGoodLTP, DC_LCB_PRF_GOOD_LTP_CNT, CNTR_SYNTH), [C_DC_PRF_ACCEPTED_LTP_CNT] = DC_PERF_CNTR_LCB(DcAccLTP, DC_LCB_PRF_ACCEPTED_LTP_CNT, CNTR_SYNTH), [C_DC_PRF_RX_FLIT_CNT] = DC_PERF_CNTR_LCB(DcPrfRxFlit, DC_LCB_PRF_RX_FLIT_CNT, CNTR_SYNTH), [C_DC_PRF_TX_FLIT_CNT] = DC_PERF_CNTR_LCB(DcPrfTxFlit, DC_LCB_PRF_TX_FLIT_CNT, CNTR_SYNTH), [C_DC_PRF_CLK_CNTR] = DC_PERF_CNTR_LCB(DcPrfClk, DC_LCB_PRF_CLK_CNTR, CNTR_SYNTH), [C_DC_PG_DBG_FLIT_CRDTS_CNT] = DC_PERF_CNTR_LCB(DcFltCrdts, DC_LCB_PG_DBG_FLIT_CRDTS_CNT, CNTR_SYNTH), [C_DC_PG_STS_PAUSE_COMPLETE_CNT] = DC_PERF_CNTR_LCB(DcPauseComp, DC_LCB_PG_STS_PAUSE_COMPLETE_CNT, CNTR_SYNTH), [C_DC_PG_STS_TX_SBE_CNT] = DC_PERF_CNTR_LCB(DcStsTxSbe, DC_LCB_PG_STS_TX_SBE_CNT, CNTR_SYNTH), [C_DC_PG_STS_TX_MBE_CNT] = DC_PERF_CNTR_LCB(DcStsTxMbe, DC_LCB_PG_STS_TX_MBE_CNT, CNTR_SYNTH), [C_SW_CPU_INTR] = CNTR_ELEM("Intr", 0, 0, CNTR_NORMAL, access_sw_cpu_intr), [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL, access_sw_cpu_rcv_limit), [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL, access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, access_sw_pio_wait), [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL, access_sw_kmem_wait), [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL, access_sw_send_schedule), }; static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = { [C_TX_UNSUP_VL] = TXE32_PORT_CNTR_ELEM(TxUnVLErr, SEND_UNSUP_VL_ERR_CNT, CNTR_NORMAL), [C_TX_INVAL_LEN] = TXE32_PORT_CNTR_ELEM(TxInvalLen, SEND_LEN_ERR_CNT, CNTR_NORMAL), [C_TX_MM_LEN_ERR] = TXE32_PORT_CNTR_ELEM(TxMMLenErr, SEND_MAX_MIN_LEN_ERR_CNT, CNTR_NORMAL), [C_TX_UNDERRUN] = TXE32_PORT_CNTR_ELEM(TxUnderrun, SEND_UNDERRUN_CNT, CNTR_NORMAL), [C_TX_FLOW_STALL] = TXE32_PORT_CNTR_ELEM(TxFlowStall, SEND_FLOW_STALL_CNT, CNTR_NORMAL), [C_TX_DROPPED] = TXE32_PORT_CNTR_ELEM(TxDropped, SEND_DROPPED_PKT_CNT, CNTR_NORMAL), [C_TX_HDR_ERR] = TXE32_PORT_CNTR_ELEM(TxHdrErr, SEND_HEADERS_ERR_CNT, CNTR_NORMAL), [C_TX_PKT] = TXE64_PORT_CNTR_ELEM(TxPkt, SEND_DATA_PKT_CNT, CNTR_NORMAL), [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL), [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH), [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT, CNTR_SYNTH | CNTR_VL), [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT, CNTR_SYNTH | CNTR_VL), [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT, CNTR_SYNTH | CNTR_VL), [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL), [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL), [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT, access_sw_link_dn_cnt), [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT, access_sw_link_up_cnt), [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT, access_sw_xmit_discards), [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0, CNTR_SYNTH | CNTR_32BIT | CNTR_VL, access_sw_xmit_discards), [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH, access_xmit_constraint_errs), [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH, access_rcv_constraint_errs), [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts), [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends), [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks), [C_SW_IBP_OTHER_NAKS] = SW_IBP_CNTR(OtherNak, other_naks), [C_SW_IBP_RC_TIMEOUTS] = SW_IBP_CNTR(RcTimeOut, rc_timeouts), [C_SW_IBP_PKT_DROPS] = SW_IBP_CNTR(PktDrop, pkt_drops), [C_SW_IBP_DMA_WAIT] = SW_IBP_CNTR(DmaWait, dmawait), [C_SW_IBP_RC_SEQNAK] = SW_IBP_CNTR(RcSeqNak, rc_seqnak), [C_SW_IBP_RC_DUPREQ] = SW_IBP_CNTR(RcDupRew, rc_dupreq), [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq), [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned), [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks), [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL, access_sw_cpu_rc_acks), [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL, access_sw_cpu_rc_qacks), [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL, access_sw_cpu_rc_delayed_comp), [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1), [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3), [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5), [OVR_LBL(6)] = OVR_ELM(6), [OVR_LBL(7)] = OVR_ELM(7), [OVR_LBL(8)] = OVR_ELM(8), [OVR_LBL(9)] = OVR_ELM(9), [OVR_LBL(10)] = OVR_ELM(10), [OVR_LBL(11)] = OVR_ELM(11), [OVR_LBL(12)] = OVR_ELM(12), [OVR_LBL(13)] = OVR_ELM(13), [OVR_LBL(14)] = OVR_ELM(14), [OVR_LBL(15)] = OVR_ELM(15), [OVR_LBL(16)] = OVR_ELM(16), [OVR_LBL(17)] = OVR_ELM(17), [OVR_LBL(18)] = OVR_ELM(18), [OVR_LBL(19)] = OVR_ELM(19), [OVR_LBL(20)] = OVR_ELM(20), [OVR_LBL(21)] = OVR_ELM(21), [OVR_LBL(22)] = OVR_ELM(22), [OVR_LBL(23)] = OVR_ELM(23), [OVR_LBL(24)] = OVR_ELM(24), [OVR_LBL(25)] = OVR_ELM(25), [OVR_LBL(26)] = OVR_ELM(26), [OVR_LBL(27)] = OVR_ELM(27), [OVR_LBL(28)] = OVR_ELM(28), [OVR_LBL(29)] = OVR_ELM(29), [OVR_LBL(30)] = OVR_ELM(30), [OVR_LBL(31)] = OVR_ELM(31), [OVR_LBL(32)] = OVR_ELM(32), [OVR_LBL(33)] = OVR_ELM(33), [OVR_LBL(34)] = OVR_ELM(34), [OVR_LBL(35)] = OVR_ELM(35), [OVR_LBL(36)] = OVR_ELM(36), [OVR_LBL(37)] = OVR_ELM(37), [OVR_LBL(38)] = OVR_ELM(38), [OVR_LBL(39)] = OVR_ELM(39), [OVR_LBL(40)] = OVR_ELM(40), [OVR_LBL(41)] = OVR_ELM(41), [OVR_LBL(42)] = OVR_ELM(42), [OVR_LBL(43)] = OVR_ELM(43), [OVR_LBL(44)] = OVR_ELM(44), [OVR_LBL(45)] = OVR_ELM(45), [OVR_LBL(46)] = OVR_ELM(46), [OVR_LBL(47)] = OVR_ELM(47), [OVR_LBL(48)] = OVR_ELM(48), [OVR_LBL(49)] = OVR_ELM(49), [OVR_LBL(50)] = OVR_ELM(50), [OVR_LBL(51)] = OVR_ELM(51), [OVR_LBL(52)] = OVR_ELM(52), [OVR_LBL(53)] = OVR_ELM(53), [OVR_LBL(54)] = OVR_ELM(54), [OVR_LBL(55)] = OVR_ELM(55), [OVR_LBL(56)] = OVR_ELM(56), [OVR_LBL(57)] = OVR_ELM(57), [OVR_LBL(58)] = OVR_ELM(58), [OVR_LBL(59)] = OVR_ELM(59), [OVR_LBL(60)] = OVR_ELM(60), [OVR_LBL(61)] = OVR_ELM(61), [OVR_LBL(62)] = OVR_ELM(62), [OVR_LBL(63)] = OVR_ELM(63), [OVR_LBL(64)] = OVR_ELM(64), [OVR_LBL(65)] = OVR_ELM(65), [OVR_LBL(66)] = OVR_ELM(66), [OVR_LBL(67)] = OVR_ELM(67), [OVR_LBL(68)] = OVR_ELM(68), [OVR_LBL(69)] = OVR_ELM(69), [OVR_LBL(70)] = OVR_ELM(70), [OVR_LBL(71)] = OVR_ELM(71), [OVR_LBL(72)] = OVR_ELM(72), [OVR_LBL(73)] = OVR_ELM(73), [OVR_LBL(74)] = OVR_ELM(74), [OVR_LBL(75)] = OVR_ELM(75), [OVR_LBL(76)] = OVR_ELM(76), [OVR_LBL(77)] = OVR_ELM(77), [OVR_LBL(78)] = OVR_ELM(78), [OVR_LBL(79)] = OVR_ELM(79), [OVR_LBL(80)] = OVR_ELM(80), [OVR_LBL(81)] = OVR_ELM(81), [OVR_LBL(82)] = OVR_ELM(82), [OVR_LBL(83)] = OVR_ELM(83), [OVR_LBL(84)] = OVR_ELM(84), [OVR_LBL(85)] = OVR_ELM(85), [OVR_LBL(86)] = OVR_ELM(86), [OVR_LBL(87)] = OVR_ELM(87), [OVR_LBL(88)] = OVR_ELM(88), [OVR_LBL(89)] = OVR_ELM(89), [OVR_LBL(90)] = OVR_ELM(90), [OVR_LBL(91)] = OVR_ELM(91), [OVR_LBL(92)] = OVR_ELM(92), [OVR_LBL(93)] = OVR_ELM(93), [OVR_LBL(94)] = OVR_ELM(94), [OVR_LBL(95)] = OVR_ELM(95), [OVR_LBL(96)] = OVR_ELM(96), [OVR_LBL(97)] = OVR_ELM(97), [OVR_LBL(98)] = OVR_ELM(98), [OVR_LBL(99)] = OVR_ELM(99), [OVR_LBL(100)] = OVR_ELM(100), [OVR_LBL(101)] = OVR_ELM(101), [OVR_LBL(102)] = OVR_ELM(102), [OVR_LBL(103)] = OVR_ELM(103), [OVR_LBL(104)] = OVR_ELM(104), [OVR_LBL(105)] = OVR_ELM(105), [OVR_LBL(106)] = OVR_ELM(106), [OVR_LBL(107)] = OVR_ELM(107), [OVR_LBL(108)] = OVR_ELM(108), [OVR_LBL(109)] = OVR_ELM(109), [OVR_LBL(110)] = OVR_ELM(110), [OVR_LBL(111)] = OVR_ELM(111), [OVR_LBL(112)] = OVR_ELM(112), [OVR_LBL(113)] = OVR_ELM(113), [OVR_LBL(114)] = OVR_ELM(114), [OVR_LBL(115)] = OVR_ELM(115), [OVR_LBL(116)] = OVR_ELM(116), [OVR_LBL(117)] = OVR_ELM(117), [OVR_LBL(118)] = OVR_ELM(118), [OVR_LBL(119)] = OVR_ELM(119), [OVR_LBL(120)] = OVR_ELM(120), [OVR_LBL(121)] = OVR_ELM(121), [OVR_LBL(122)] = OVR_ELM(122), [OVR_LBL(123)] = OVR_ELM(123), [OVR_LBL(124)] = OVR_ELM(124), [OVR_LBL(125)] = OVR_ELM(125), [OVR_LBL(126)] = OVR_ELM(126), [OVR_LBL(127)] = OVR_ELM(127), [OVR_LBL(128)] = OVR_ELM(128), [OVR_LBL(129)] = OVR_ELM(129), [OVR_LBL(130)] = OVR_ELM(130), [OVR_LBL(131)] = OVR_ELM(131), [OVR_LBL(132)] = OVR_ELM(132), [OVR_LBL(133)] = OVR_ELM(133), [OVR_LBL(134)] = OVR_ELM(134), [OVR_LBL(135)] = OVR_ELM(135), [OVR_LBL(136)] = OVR_ELM(136), [OVR_LBL(137)] = OVR_ELM(137), [OVR_LBL(138)] = OVR_ELM(138), [OVR_LBL(139)] = OVR_ELM(139), [OVR_LBL(140)] = OVR_ELM(140), [OVR_LBL(141)] = OVR_ELM(141), [OVR_LBL(142)] = OVR_ELM(142), [OVR_LBL(143)] = OVR_ELM(143), [OVR_LBL(144)] = OVR_ELM(144), [OVR_LBL(145)] = OVR_ELM(145), [OVR_LBL(146)] = OVR_ELM(146), [OVR_LBL(147)] = OVR_ELM(147), [OVR_LBL(148)] = OVR_ELM(148), [OVR_LBL(149)] = OVR_ELM(149), [OVR_LBL(150)] = OVR_ELM(150), [OVR_LBL(151)] = OVR_ELM(151), [OVR_LBL(152)] = OVR_ELM(152), [OVR_LBL(153)] = OVR_ELM(153), [OVR_LBL(154)] = OVR_ELM(154), [OVR_LBL(155)] = OVR_ELM(155), [OVR_LBL(156)] = OVR_ELM(156), [OVR_LBL(157)] = OVR_ELM(157), [OVR_LBL(158)] = OVR_ELM(158), [OVR_LBL(159)] = OVR_ELM(159), }; /* ======================================================================== */ /* return true if this is chip revision revision a0 */ int is_a0(struct hfi1_devdata *dd) { return ((dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT) & CCE_REVISION_CHIP_REV_MINOR_MASK) == 0; } /* return true if this is chip revision revision a */ int is_ax(struct hfi1_devdata *dd) { u8 chip_rev_minor = dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT & CCE_REVISION_CHIP_REV_MINOR_MASK; return (chip_rev_minor & 0xf0) == 0; } /* return true if this is chip revision revision b */ int is_bx(struct hfi1_devdata *dd) { u8 chip_rev_minor = dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT & CCE_REVISION_CHIP_REV_MINOR_MASK; return !!(chip_rev_minor & 0x10); } /* * Append string s to buffer buf. Arguments curp and len are the current * position and remaining length, respectively. * * return 0 on success, 1 on out of room */ static int append_str(char *buf, char **curp, int *lenp, const char *s) { char *p = *curp; int len = *lenp; int result = 0; /* success */ char c; /* add a comma, if first in the buffer */ if (p != buf) { if (len == 0) { result = 1; /* out of room */ goto done; } *p++ = ','; len--; } /* copy the string */ while ((c = *s++) != 0) { if (len == 0) { result = 1; /* out of room */ goto done; } *p++ = c; len--; } done: /* write return values */ *curp = p; *lenp = len; return result; } /* * Using the given flag table, print a comma separated string into * the buffer. End in '*' if the buffer is too short. */ static char *flag_string(char *buf, int buf_len, u64 flags, struct flag_table *table, int table_size) { char extra[32]; char *p = buf; int len = buf_len; int no_room = 0; int i; /* make sure there is at least 2 so we can form "*" */ if (len < 2) return ""; len--; /* leave room for a nul */ for (i = 0; i < table_size; i++) { if (flags & table[i].flag) { no_room = append_str(buf, &p, &len, table[i].str); if (no_room) break; flags &= ~table[i].flag; } } /* any undocumented bits left? */ if (!no_room && flags) { snprintf(extra, sizeof(extra), "bits 0x%llx", flags); no_room = append_str(buf, &p, &len, extra); } /* add * if ran out of room */ if (no_room) { /* may need to back up to add space for a '*' */ if (len == 0) --p; *p++ = '*'; } /* add final nul - space already allocated above */ *p = 0; return buf; } /* first 8 CCE error interrupt source names */ static const char * const cce_misc_names[] = { "CceErrInt", /* 0 */ "RxeErrInt", /* 1 */ "MiscErrInt", /* 2 */ "Reserved3", /* 3 */ "PioErrInt", /* 4 */ "SDmaErrInt", /* 5 */ "EgressErrInt", /* 6 */ "TxeErrInt" /* 7 */ }; /* * Return the miscellaneous error interrupt name. */ static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source) { if (source < ARRAY_SIZE(cce_misc_names)) strncpy(buf, cce_misc_names[source], bsize); else snprintf(buf, bsize, "Reserved%u", source + IS_GENERAL_ERR_START); return buf; } /* * Return the SDMA engine error interrupt name. */ static char *is_sdma_eng_err_name(char *buf, size_t bsize, unsigned int source) { snprintf(buf, bsize, "SDmaEngErrInt%u", source); return buf; } /* * Return the send context error interrupt name. */ static char *is_sendctxt_err_name(char *buf, size_t bsize, unsigned int source) { snprintf(buf, bsize, "SendCtxtErrInt%u", source); return buf; } static const char * const various_names[] = { "PbcInt", "GpioAssertInt", "Qsfp1Int", "Qsfp2Int", "TCritInt" }; /* * Return the various interrupt name. */ static char *is_various_name(char *buf, size_t bsize, unsigned int source) { if (source < ARRAY_SIZE(various_names)) strncpy(buf, various_names[source], bsize); else snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START); return buf; } /* * Return the DC interrupt name. */ static char *is_dc_name(char *buf, size_t bsize, unsigned int source) { static const char * const dc_int_names[] = { "common", "lcb", "8051", "lbm" /* local block merge */ }; if (source < ARRAY_SIZE(dc_int_names)) snprintf(buf, bsize, "dc_%s_int", dc_int_names[source]); else snprintf(buf, bsize, "DCInt%u", source); return buf; } static const char * const sdma_int_names[] = { "SDmaInt", "SdmaIdleInt", "SdmaProgressInt", }; /* * Return the SDMA engine interrupt name. */ static char *is_sdma_eng_name(char *buf, size_t bsize, unsigned int source) { /* what interrupt */ unsigned int what = source / TXE_NUM_SDMA_ENGINES; /* which engine */ unsigned int which = source % TXE_NUM_SDMA_ENGINES; if (likely(what < 3)) snprintf(buf, bsize, "%s%u", sdma_int_names[what], which); else snprintf(buf, bsize, "Invalid SDMA interrupt %u", source); return buf; } /* * Return the receive available interrupt name. */ static char *is_rcv_avail_name(char *buf, size_t bsize, unsigned int source) { snprintf(buf, bsize, "RcvAvailInt%u", source); return buf; } /* * Return the receive urgent interrupt name. */ static char *is_rcv_urgent_name(char *buf, size_t bsize, unsigned int source) { snprintf(buf, bsize, "RcvUrgentInt%u", source); return buf; } /* * Return the send credit interrupt name. */ static char *is_send_credit_name(char *buf, size_t bsize, unsigned int source) { snprintf(buf, bsize, "SendCreditInt%u", source); return buf; } /* * Return the reserved interrupt name. */ static char *is_reserved_name(char *buf, size_t bsize, unsigned int source) { snprintf(buf, bsize, "Reserved%u", source + IS_RESERVED_START); return buf; } static char *cce_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags)); } static char *rxe_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags)); } static char *misc_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, misc_err_status_flags, ARRAY_SIZE(misc_err_status_flags)); } static char *pio_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags)); } static char *sdma_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, sdma_err_status_flags, ARRAY_SIZE(sdma_err_status_flags)); } static char *egress_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags)); } static char *egress_err_info_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags)); } static char *send_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, send_err_status_flags, ARRAY_SIZE(send_err_status_flags)); } static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg) { char buf[96]; /* * For most these errors, there is nothing that can be done except * report or record it. */ dd_dev_info(dd, "CCE Error: %s\n", cce_err_status_string(buf, sizeof(buf), reg)); if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK) && is_a0(dd) && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) { /* this error requires a manual drop into SPC freeze mode */ /* then a fix up */ start_freeze_handling(dd->pport, FREEZE_SELF); } } /* * Check counters for receive errors that do not have an interrupt * associated with them. */ #define RCVERR_CHECK_TIME 10 static void update_rcverr_timer(unsigned long opaque) { struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; struct hfi1_pportdata *ppd = dd->pport; u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL); if (dd->rcv_ovfl_cnt < cur_ovfl_cnt && ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) { dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__); set_link_down_reason(ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN); queue_work(ppd->hfi1_wq, &ppd->link_bounce_work); } dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt; mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME); } static int init_rcverr(struct hfi1_devdata *dd) { setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd); /* Assume the hardware counter has been reset */ dd->rcv_ovfl_cnt = 0; return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME); } static void free_rcverr(struct hfi1_devdata *dd) { if (dd->rcverr_timer.data) del_timer_sync(&dd->rcverr_timer); dd->rcverr_timer.data = 0; } static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg) { char buf[96]; dd_dev_info(dd, "Receive Error: %s\n", rxe_err_status_string(buf, sizeof(buf), reg)); if (reg & ALL_RXE_FREEZE_ERR) { int flags = 0; /* * Freeze mode recovery is disabled for the errors * in RXE_FREEZE_ABORT_MASK */ if (is_a0(dd) && (reg & RXE_FREEZE_ABORT_MASK)) flags = FREEZE_ABORT; start_freeze_handling(dd->pport, flags); } } static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) { char buf[96]; dd_dev_info(dd, "Misc Error: %s", misc_err_status_string(buf, sizeof(buf), reg)); } static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg) { char buf[96]; dd_dev_info(dd, "PIO Error: %s\n", pio_err_status_string(buf, sizeof(buf), reg)); if (reg & ALL_PIO_FREEZE_ERR) start_freeze_handling(dd->pport, 0); } static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg) { char buf[96]; dd_dev_info(dd, "SDMA Error: %s\n", sdma_err_status_string(buf, sizeof(buf), reg)); if (reg & ALL_SDMA_FREEZE_ERR) start_freeze_handling(dd->pport, 0); } static void count_port_inactive(struct hfi1_devdata *dd) { struct hfi1_pportdata *ppd = dd->pport; if (ppd->port_xmit_discards < ~(u64)0) ppd->port_xmit_discards++; } /* * We have had a "disallowed packet" error during egress. Determine the * integrity check which failed, and update relevant error counter, etc. * * Note that the SEND_EGRESS_ERR_INFO register has only a single * bit of state per integrity check, and so we can miss the reason for an * egress error if more than one packet fails the same integrity check * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO. */ static void handle_send_egress_err_info(struct hfi1_devdata *dd) { struct hfi1_pportdata *ppd = dd->pport; u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */ u64 info = read_csr(dd, SEND_EGRESS_ERR_INFO); char buf[96]; /* clear down all observed info as quickly as possible after read */ write_csr(dd, SEND_EGRESS_ERR_INFO, info); dd_dev_info(dd, "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n", info, egress_err_info_string(buf, sizeof(buf), info), src); /* Eventually add other counters for each bit */ if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) { if (ppd->port_xmit_discards < ~(u64)0) ppd->port_xmit_discards++; } } /* * Input value is a bit position within the SEND_EGRESS_ERR_STATUS * register. Does it represent a 'port inactive' error? */ static inline int port_inactive_err(u64 posn) { return (posn >= SEES(TX_LINKDOWN) && posn <= SEES(TX_INCORRECT_LINK_STATE)); } /* * Input value is a bit position within the SEND_EGRESS_ERR_STATUS * register. Does it represent a 'disallowed packet' error? */ static inline int disallowed_pkt_err(u64 posn) { return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) && posn <= SEES(TX_SDMA15_DISALLOWED_PACKET)); } static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg) { u64 reg_copy = reg, handled = 0; char buf[96]; if (reg & ALL_TXE_EGRESS_FREEZE_ERR) start_freeze_handling(dd->pport, 0); if (is_a0(dd) && (reg & SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK) && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) start_freeze_handling(dd->pport, 0); while (reg_copy) { int posn = fls64(reg_copy); /* * fls64() returns a 1-based offset, but we generally * want 0-based offsets. */ int shift = posn - 1; if (port_inactive_err(shift)) { count_port_inactive(dd); handled |= (1ULL << shift); } else if (disallowed_pkt_err(shift)) { handle_send_egress_err_info(dd); handled |= (1ULL << shift); } clear_bit(shift, (unsigned long *)®_copy); } reg &= ~handled; if (reg) dd_dev_info(dd, "Egress Error: %s\n", egress_err_status_string(buf, sizeof(buf), reg)); } static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg) { char buf[96]; dd_dev_info(dd, "Send Error: %s\n", send_err_status_string(buf, sizeof(buf), reg)); } /* * The maximum number of times the error clear down will loop before * blocking a repeating error. This value is arbitrary. */ #define MAX_CLEAR_COUNT 20 /* * Clear and handle an error register. All error interrupts are funneled * through here to have a central location to correctly handle single- * or multi-shot errors. * * For non per-context registers, call this routine with a context value * of 0 so the per-context offset is zero. * * If the handler loops too many times, assume that something is wrong * and can't be fixed, so mask the error bits. */ static void interrupt_clear_down(struct hfi1_devdata *dd, u32 context, const struct err_reg_info *eri) { u64 reg; u32 count; /* read in a loop until no more errors are seen */ count = 0; while (1) { reg = read_kctxt_csr(dd, context, eri->status); if (reg == 0) break; write_kctxt_csr(dd, context, eri->clear, reg); if (likely(eri->handler)) eri->handler(dd, context, reg); count++; if (count > MAX_CLEAR_COUNT) { u64 mask; dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n", eri->desc, reg); /* * Read-modify-write so any other masked bits * remain masked. */ mask = read_kctxt_csr(dd, context, eri->mask); mask &= ~reg; write_kctxt_csr(dd, context, eri->mask, mask); break; } } } /* * CCE block "misc" interrupt. Source is < 16. */ static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source) { const struct err_reg_info *eri = &misc_errs[source]; if (eri->handler) { interrupt_clear_down(dd, 0, eri); } else { dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n", source); } } static char *send_context_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags)); } /* * Send context error interrupt. Source (hw_context) is < 160. * * All send context errors cause the send context to halt. The normal * clear-down mechanism cannot be used because we cannot clear the * error bits until several other long-running items are done first. * This is OK because with the context halted, nothing else is going * to happen on it anyway. */ static void is_sendctxt_err_int(struct hfi1_devdata *dd, unsigned int hw_context) { struct send_context_info *sci; struct send_context *sc; char flags[96]; u64 status; u32 sw_index; sw_index = dd->hw_to_sw[hw_context]; if (sw_index >= dd->num_send_contexts) { dd_dev_err(dd, "out of range sw index %u for send context %u\n", sw_index, hw_context); return; } sci = &dd->send_contexts[sw_index]; sc = sci->sc; if (!sc) { dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__, sw_index, hw_context); return; } /* tell the software that a halt has begun */ sc_stop(sc, SCF_HALTED); status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS); dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context, send_context_err_status_string(flags, sizeof(flags), status)); if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK) handle_send_egress_err_info(dd); /* * Automatically restart halted kernel contexts out of interrupt * context. User contexts must ask the driver to restart the context. */ if (sc->type != SC_USER) queue_work(dd->pport->hfi1_wq, &sc->halt_work); } static void handle_sdma_eng_err(struct hfi1_devdata *dd, unsigned int source, u64 status) { struct sdma_engine *sde; sde = &dd->per_sdma[source]; #ifdef CONFIG_SDMA_VERBOSITY dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx, slashstrip(__FILE__), __LINE__, __func__); dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n", sde->this_idx, source, (unsigned long long)status); #endif sdma_engine_error(sde, status); } /* * CCE block SDMA error interrupt. Source is < 16. */ static void is_sdma_eng_err_int(struct hfi1_devdata *dd, unsigned int source) { #ifdef CONFIG_SDMA_VERBOSITY struct sdma_engine *sde = &dd->per_sdma[source]; dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx, slashstrip(__FILE__), __LINE__, __func__); dd_dev_err(dd, "CONFIG SDMA(%u) source: %u\n", sde->this_idx, source); sdma_dumpstate(sde); #endif interrupt_clear_down(dd, source, &sdma_eng_err); } /* * CCE block "various" interrupt. Source is < 8. */ static void is_various_int(struct hfi1_devdata *dd, unsigned int source) { const struct err_reg_info *eri = &various_err[source]; /* * TCritInt cannot go through interrupt_clear_down() * because it is not a second tier interrupt. The handler * should be called directly. */ if (source == TCRIT_INT_SOURCE) handle_temp_err(dd); else if (eri->handler) interrupt_clear_down(dd, 0, eri); else dd_dev_info(dd, "%s: Unimplemented/reserved interrupt %d\n", __func__, source); } static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) { /* source is always zero */ struct hfi1_pportdata *ppd = dd->pport; unsigned long flags; u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N); if (reg & QSFP_HFI0_MODPRST_N) { dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n", __func__); if (!qsfp_mod_present(ppd)) { ppd->driver_link_ready = 0; /* * Cable removed, reset all our information about the * cache and cable capabilities */ spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); /* * We don't set cache_refresh_required here as we expect * an interrupt when a cable is inserted */ ppd->qsfp_info.cache_valid = 0; ppd->qsfp_info.qsfp_interrupt_functional = 0; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT, qsfp_int_mgmt); if (ppd->host_link_state == HLS_DN_POLL) { /* * The link is still in POLL. This means * that the normal link down processing * will not happen. We have to do it here * before turning the DC off. */ queue_work(ppd->hfi1_wq, &ppd->link_down_work); } } else { spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.cache_valid = 0; ppd->qsfp_info.cache_refresh_required = 1; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N; write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT, qsfp_int_mgmt); } } if (reg & QSFP_HFI0_INT_N) { dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n", __func__); spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.check_interrupt_flags = 1; ppd->qsfp_info.qsfp_interrupt_functional = 1; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); } /* Schedule the QSFP work only if there is a cable attached. */ if (qsfp_mod_present(ppd)) queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work); } static int request_host_lcb_access(struct hfi1_devdata *dd) { int ret; ret = do_8051_command(dd, HCMD_MISC, (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "%s: command failed with error %d\n", __func__, ret); } return ret == HCMD_SUCCESS ? 0 : -EBUSY; } static int request_8051_lcb_access(struct hfi1_devdata *dd) { int ret; ret = do_8051_command(dd, HCMD_MISC, (u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "%s: command failed with error %d\n", __func__, ret); } return ret == HCMD_SUCCESS ? 0 : -EBUSY; } /* * Set the LCB selector - allow host access. The DCC selector always * points to the host. */ static inline void set_host_lcb_access(struct hfi1_devdata *dd) { write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL, DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK | DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK); } /* * Clear the LCB selector - allow 8051 access. The DCC selector always * points to the host. */ static inline void set_8051_lcb_access(struct hfi1_devdata *dd) { write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL, DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK); } /* * Acquire LCB access from the 8051. If the host already has access, * just increment a counter. Otherwise, inform the 8051 that the * host is taking access. * * Returns: * 0 on success * -EBUSY if the 8051 has control and cannot be disturbed * -errno if unable to acquire access from the 8051 */ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) { struct hfi1_pportdata *ppd = dd->pport; int ret = 0; /* * Use the host link state lock so the operation of this routine * { link state check, selector change, count increment } can occur * as a unit against a link state change. Otherwise there is a * race between the state change and the count increment. */ if (sleep_ok) { mutex_lock(&ppd->hls_lock); } else { while (!mutex_trylock(&ppd->hls_lock)) udelay(1); } /* this access is valid only when the link is up */ if ((ppd->host_link_state & HLS_UP) == 0) { dd_dev_info(dd, "%s: link state %s not up\n", __func__, link_state_name(ppd->host_link_state)); ret = -EBUSY; goto done; } if (dd->lcb_access_count == 0) { ret = request_host_lcb_access(dd); if (ret) { dd_dev_err(dd, "%s: unable to acquire LCB access, err %d\n", __func__, ret); goto done; } set_host_lcb_access(dd); } dd->lcb_access_count++; done: mutex_unlock(&ppd->hls_lock); return ret; } /* * Release LCB access by decrementing the use count. If the count is moving * from 1 to 0, inform 8051 that it has control back. * * Returns: * 0 on success * -errno if unable to release access to the 8051 */ int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok) { int ret = 0; /* * Use the host link state lock because the acquire needed it. * Here, we only need to keep { selector change, count decrement } * as a unit. */ if (sleep_ok) { mutex_lock(&dd->pport->hls_lock); } else { while (!mutex_trylock(&dd->pport->hls_lock)) udelay(1); } if (dd->lcb_access_count == 0) { dd_dev_err(dd, "%s: LCB access count is zero. Skipping.\n", __func__); goto done; } if (dd->lcb_access_count == 1) { set_8051_lcb_access(dd); ret = request_8051_lcb_access(dd); if (ret) { dd_dev_err(dd, "%s: unable to release LCB access, err %d\n", __func__, ret); /* restore host access if the grant didn't work */ set_host_lcb_access(dd); goto done; } } dd->lcb_access_count--; done: mutex_unlock(&dd->pport->hls_lock); return ret; } /* * Initialize LCB access variables and state. Called during driver load, * after most of the initialization is finished. * * The DC default is LCB access on for the host. The driver defaults to * leaving access to the 8051. Assign access now - this constrains the call * to this routine to be after all LCB set-up is done. In particular, after * hf1_init_dd() -> set_up_interrupts() -> clear_all_interrupts() */ static void init_lcb_access(struct hfi1_devdata *dd) { dd->lcb_access_count = 0; } /* * Write a response back to a 8051 request. */ static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data) { write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK | (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT | (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT); } /* * Handle requests from the 8051. */ static void handle_8051_request(struct hfi1_devdata *dd) { u64 reg; u16 data; u8 type; reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1); if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0) return; /* no request */ /* zero out COMPLETED so the response is seen */ write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, 0); /* extract request details */ type = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_SHIFT) & DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_MASK; data = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT) & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_MASK; switch (type) { case HREQ_LOAD_CONFIG: case HREQ_SAVE_CONFIG: case HREQ_READ_CONFIG: case HREQ_SET_TX_EQ_ABS: case HREQ_SET_TX_EQ_REL: case HREQ_ENABLE: dd_dev_info(dd, "8051 request: request 0x%x not supported\n", type); hreq_response(dd, HREQ_NOT_SUPPORTED, 0); break; case HREQ_CONFIG_DONE: hreq_response(dd, HREQ_SUCCESS, 0); break; case HREQ_INTERFACE_TEST: hreq_response(dd, HREQ_SUCCESS, data); break; default: dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type); hreq_response(dd, HREQ_NOT_SUPPORTED, 0); break; } } static void write_global_credit(struct hfi1_devdata *dd, u8 vau, u16 total, u16 shared) { write_csr(dd, SEND_CM_GLOBAL_CREDIT, ((u64)total << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) | ((u64)shared << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) | ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT)); } /* * Set up initial VL15 credits of the remote. Assumes the rest of * the CM credit registers are zero from a previous global or credit reset . */ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf) { /* leave shared count at zero for both global and VL15 */ write_global_credit(dd, vau, vl15buf, 0); /* We may need some credits for another VL when sending packets * with the snoop interface. Dividing it down the middle for VL15 * and VL0 should suffice. */ if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) { write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1) << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1) << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT); } else { write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); } } /* * Zero all credit details from the previous connection and * reset the CM manager's internal counters. */ void reset_link_credits(struct hfi1_devdata *dd) { int i; /* remove all previous VL credit limits */ for (i = 0; i < TXE_NUM_DATA_VL; i++) write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0); write_csr(dd, SEND_CM_CREDIT_VL15, 0); write_global_credit(dd, 0, 0, 0); /* reset the CM block */ pio_send_control(dd, PSC_CM_RESET); } /* convert a vCU to a CU */ static u32 vcu_to_cu(u8 vcu) { return 1 << vcu; } /* convert a CU to a vCU */ static u8 cu_to_vcu(u32 cu) { return ilog2(cu); } /* convert a vAU to an AU */ static u32 vau_to_au(u8 vau) { return 8 * (1 << vau); } static void set_linkup_defaults(struct hfi1_pportdata *ppd) { ppd->sm_trap_qp = 0x0; ppd->sa_qp = 0x1; } /* * Graceful LCB shutdown. This leaves the LCB FIFOs in reset. */ static void lcb_shutdown(struct hfi1_devdata *dd, int abort) { u64 reg; /* clear lcb run: LCB_CFG_RUN.EN = 0 */ write_csr(dd, DC_LCB_CFG_RUN, 0); /* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT); /* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */ dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN); reg = read_csr(dd, DCC_CFG_RESET); write_csr(dd, DCC_CFG_RESET, reg | (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) | (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT)); (void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */ if (!abort) { udelay(1); /* must hold for the longer of 16cclks or 20ns */ write_csr(dd, DCC_CFG_RESET, reg); write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en); } } /* * This routine should be called after the link has been transitioned to * OFFLINE (OFFLINE state has the side effect of putting the SerDes into * reset). * * The expectation is that the caller of this routine would have taken * care of properly transitioning the link into the correct state. */ static void dc_shutdown(struct hfi1_devdata *dd) { unsigned long flags; spin_lock_irqsave(&dd->dc8051_lock, flags); if (dd->dc_shutdown) { spin_unlock_irqrestore(&dd->dc8051_lock, flags); return; } dd->dc_shutdown = 1; spin_unlock_irqrestore(&dd->dc8051_lock, flags); /* Shutdown the LCB */ lcb_shutdown(dd, 1); /* Going to OFFLINE would have causes the 8051 to put the * SerDes into reset already. Just need to shut down the 8051, * itself. */ write_csr(dd, DC_DC8051_CFG_RST, 0x1); } /* Calling this after the DC has been brought out of reset should not * do any damage. */ static void dc_start(struct hfi1_devdata *dd) { unsigned long flags; int ret; spin_lock_irqsave(&dd->dc8051_lock, flags); if (!dd->dc_shutdown) goto done; spin_unlock_irqrestore(&dd->dc8051_lock, flags); /* Take the 8051 out of reset */ write_csr(dd, DC_DC8051_CFG_RST, 0ull); /* Wait until 8051 is ready */ ret = wait_fm_ready(dd, TIMEOUT_8051_START); if (ret) { dd_dev_err(dd, "%s: timeout starting 8051 firmware\n", __func__); } /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ write_csr(dd, DCC_CFG_RESET, 0x10); /* lcb_shutdown() with abort=1 does not restore these */ write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en); spin_lock_irqsave(&dd->dc8051_lock, flags); dd->dc_shutdown = 0; done: spin_unlock_irqrestore(&dd->dc8051_lock, flags); } /* * These LCB adjustments are for the Aurora SerDes core in the FPGA. */ static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd) { u64 rx_radr, tx_radr; u32 version; if (dd->icode != ICODE_FPGA_EMULATION) return; /* * These LCB defaults on emulator _s are good, nothing to do here: * LCB_CFG_TX_FIFOS_RADR * LCB_CFG_RX_FIFOS_RADR * LCB_CFG_LN_DCLK * LCB_CFG_IGNORE_LOST_RCLK */ if (is_emulator_s(dd)) return; /* else this is _p */ version = emulator_rev(dd); if (!is_a0(dd)) version = 0x2d; /* all B0 use 0x2d or higher settings */ if (version <= 0x12) { /* release 0x12 and below */ /* * LCB_CFG_RX_FIFOS_RADR.RST_VAL = 0x9 * LCB_CFG_RX_FIFOS_RADR.OK_TO_JUMP_VAL = 0x9 * LCB_CFG_RX_FIFOS_RADR.DO_NOT_JUMP_VAL = 0xa */ rx_radr = 0xaull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT; /* * LCB_CFG_TX_FIFOS_RADR.ON_REINIT = 0 (default) * LCB_CFG_TX_FIFOS_RADR.RST_VAL = 6 */ tx_radr = 6ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT; } else if (version <= 0x18) { /* release 0x13 up to 0x18 */ /* LCB_CFG_RX_FIFOS_RADR = 0x988 */ rx_radr = 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT; tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT; } else if (version == 0x19) { /* release 0x19 */ /* LCB_CFG_RX_FIFOS_RADR = 0xa99 */ rx_radr = 0xAull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT; tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT; } else if (version == 0x1a) { /* release 0x1a */ /* LCB_CFG_RX_FIFOS_RADR = 0x988 */ rx_radr = 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT; tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT; write_csr(dd, DC_LCB_CFG_LN_DCLK, 1ull); } else { /* release 0x1b and higher */ /* LCB_CFG_RX_FIFOS_RADR = 0x877 */ rx_radr = 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT; tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT; } write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr); /* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK); write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr); } /* * Handle a SMA idle message * * This is a work-queue function outside of the interrupt. */ void handle_sma_message(struct work_struct *work) { struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, sma_message_work); struct hfi1_devdata *dd = ppd->dd; u64 msg; int ret; /* msg is bytes 1-4 of the 40-bit idle message - the command code is stripped off */ ret = read_idle_sma(dd, &msg); if (ret) return; dd_dev_info(dd, "%s: SMA message 0x%llx\n", __func__, msg); /* * React to the SMA message. Byte[1] (0 for us) is the command. */ switch (msg & 0xff) { case SMA_IDLE_ARM: /* * See OPAv1 table 9-14 - HFI and External Switch Ports Key * State Transitions * * Only expected in INIT or ARMED, discard otherwise. */ if (ppd->host_link_state & (HLS_UP_INIT | HLS_UP_ARMED)) ppd->neighbor_normal = 1; break; case SMA_IDLE_ACTIVE: /* * See OPAv1 table 9-14 - HFI and External Switch Ports Key * State Transitions * * Can activate the node. Discard otherwise. */ if (ppd->host_link_state == HLS_UP_ARMED && ppd->is_active_optimize_enabled) { ppd->neighbor_normal = 1; ret = set_link_state(ppd, HLS_UP_ACTIVE); if (ret) dd_dev_err( dd, "%s: received Active SMA idle message, couldn't set link to Active\n", __func__); } break; default: dd_dev_err(dd, "%s: received unexpected SMA idle message 0x%llx\n", __func__, msg); break; } } static void adjust_rcvctrl(struct hfi1_devdata *dd, u64 add, u64 clear) { u64 rcvctrl; unsigned long flags; spin_lock_irqsave(&dd->rcvctrl_lock, flags); rcvctrl = read_csr(dd, RCV_CTRL); rcvctrl |= add; rcvctrl &= ~clear; write_csr(dd, RCV_CTRL, rcvctrl); spin_unlock_irqrestore(&dd->rcvctrl_lock, flags); } static inline void add_rcvctrl(struct hfi1_devdata *dd, u64 add) { adjust_rcvctrl(dd, add, 0); } static inline void clear_rcvctrl(struct hfi1_devdata *dd, u64 clear) { adjust_rcvctrl(dd, 0, clear); } /* * Called from all interrupt handlers to start handling an SPC freeze. */ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags) { struct hfi1_devdata *dd = ppd->dd; struct send_context *sc; int i; if (flags & FREEZE_SELF) write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK); /* enter frozen mode */ dd->flags |= HFI1_FROZEN; /* notify all SDMA engines that they are going into a freeze */ sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN)); /* do halt pre-handling on all enabled send contexts */ for (i = 0; i < dd->num_send_contexts; i++) { sc = dd->send_contexts[i].sc; if (sc && (sc->flags & SCF_ENABLED)) sc_stop(sc, SCF_FROZEN | SCF_HALTED); } /* Send context are frozen. Notify user space */ hfi1_set_uevent_bits(ppd, _HFI1_EVENT_FROZEN_BIT); if (flags & FREEZE_ABORT) { dd_dev_err(dd, "Aborted freeze recovery. Please REBOOT system\n"); return; } /* queue non-interrupt handler */ queue_work(ppd->hfi1_wq, &ppd->freeze_work); } /* * Wait until all 4 sub-blocks indicate that they have frozen or unfrozen, * depending on the "freeze" parameter. * * No need to return an error if it times out, our only option * is to proceed anyway. */ static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze) { unsigned long timeout; u64 reg; timeout = jiffies + msecs_to_jiffies(FREEZE_STATUS_TIMEOUT); while (1) { reg = read_csr(dd, CCE_STATUS); if (freeze) { /* waiting until all indicators are set */ if ((reg & ALL_FROZE) == ALL_FROZE) return; /* all done */ } else { /* waiting until all indicators are clear */ if ((reg & ALL_FROZE) == 0) return; /* all done */ } if (time_after(jiffies, timeout)) { dd_dev_err(dd, "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing", freeze ? "" : "un", reg & ALL_FROZE, freeze ? ALL_FROZE : 0ull); return; } usleep_range(80, 120); } } /* * Do all freeze handling for the RXE block. */ static void rxe_freeze(struct hfi1_devdata *dd) { int i; /* disable port */ clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); /* disable all receive contexts */ for (i = 0; i < dd->num_rcv_contexts; i++) hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i); } /* * Unfreeze handling for the RXE block - kernel contexts only. * This will also enable the port. User contexts will do unfreeze * handling on a per-context basis as they call into the driver. * */ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) { int i; /* enable all kernel contexts */ for (i = 0; i < dd->n_krcv_queues; i++) hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i); /* enable port */ add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); } /* * Non-interrupt SPC freeze handling. * * This is a work-queue function outside of the triggering interrupt. */ void handle_freeze(struct work_struct *work) { struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, freeze_work); struct hfi1_devdata *dd = ppd->dd; /* wait for freeze indicators on all affected blocks */ dd_dev_info(dd, "Entering SPC freeze\n"); wait_for_freeze_status(dd, 1); /* SPC is now frozen */ /* do send PIO freeze steps */ pio_freeze(dd); /* do send DMA freeze steps */ sdma_freeze(dd); /* do send egress freeze steps - nothing to do */ /* do receive freeze steps */ rxe_freeze(dd); /* * Unfreeze the hardware - clear the freeze, wait for each * block's frozen bit to clear, then clear the frozen flag. */ write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK); wait_for_freeze_status(dd, 0); if (is_a0(dd)) { write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK); wait_for_freeze_status(dd, 1); write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK); wait_for_freeze_status(dd, 0); } /* do send PIO unfreeze steps for kernel contexts */ pio_kernel_unfreeze(dd); /* do send DMA unfreeze steps */ sdma_unfreeze(dd); /* do send egress unfreeze steps - nothing to do */ /* do receive unfreeze steps for kernel contexts */ rxe_kernel_unfreeze(dd); /* * The unfreeze procedure touches global device registers when * it disables and re-enables RXE. Mark the device unfrozen * after all that is done so other parts of the driver waiting * for the device to unfreeze don't do things out of order. * * The above implies that the meaning of HFI1_FROZEN flag is * "Device has gone into freeze mode and freeze mode handling * is still in progress." * * The flag will be removed when freeze mode processing has * completed. */ dd->flags &= ~HFI1_FROZEN; wake_up(&dd->event_queue); /* no longer frozen */ dd_dev_err(dd, "Exiting SPC freeze\n"); } /* * Handle a link up interrupt from the 8051. * * This is a work-queue function outside of the interrupt. */ void handle_link_up(struct work_struct *work) { struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, link_up_work); set_link_state(ppd, HLS_UP_INIT); /* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */ read_ltp_rtt(ppd->dd); /* * OPA specifies that certain counters are cleared on a transition * to link up, so do that. */ clear_linkup_counters(ppd->dd); /* * And (re)set link up default values. */ set_linkup_defaults(ppd); /* enforce link speed enabled */ if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) { /* oops - current speed is not enabled, bounce */ dd_dev_err(ppd->dd, "Link speed active 0x%x is outside enabled 0x%x, downing link\n", ppd->link_speed_active, ppd->link_speed_enabled); set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0, OPA_LINKDOWN_REASON_SPEED_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); start_link(ppd); } } /* Several pieces of LNI information were cached for SMA in ppd. * Reset these on link down */ static void reset_neighbor_info(struct hfi1_pportdata *ppd) { ppd->neighbor_guid = 0; ppd->neighbor_port_number = 0; ppd->neighbor_type = 0; ppd->neighbor_fm_security = 0; } /* * Handle a link down interrupt from the 8051. * * This is a work-queue function outside of the interrupt. */ void handle_link_down(struct work_struct *work) { u8 lcl_reason, neigh_reason = 0; struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, link_down_work); /* go offline first, then deal with reasons */ set_link_state(ppd, HLS_DN_OFFLINE); lcl_reason = 0; read_planned_down_reason_code(ppd->dd, &neigh_reason); /* * If no reason, assume peer-initiated but missed * LinkGoingDown idle flits. */ if (neigh_reason == 0) lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN; set_link_down_reason(ppd, lcl_reason, neigh_reason, 0); reset_neighbor_info(ppd); /* disable the port */ clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); /* If there is no cable attached, turn the DC off. Otherwise, * start the link bring up. */ if (!qsfp_mod_present(ppd)) dc_shutdown(ppd->dd); else start_link(ppd); } void handle_link_bounce(struct work_struct *work) { struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, link_bounce_work); /* * Only do something if the link is currently up. */ if (ppd->host_link_state & HLS_UP) { set_link_state(ppd, HLS_DN_OFFLINE); start_link(ppd); } else { dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n", __func__, link_state_name(ppd->host_link_state)); } } /* * Mask conversion: Capability exchange to Port LTP. The capability * exchange has an implicit 16b CRC that is mandatory. */ static int cap_to_port_ltp(int cap) { int port_ltp = PORT_LTP_CRC_MODE_16; /* this mode is mandatory */ if (cap & CAP_CRC_14B) port_ltp |= PORT_LTP_CRC_MODE_14; if (cap & CAP_CRC_48B) port_ltp |= PORT_LTP_CRC_MODE_48; if (cap & CAP_CRC_12B_16B_PER_LANE) port_ltp |= PORT_LTP_CRC_MODE_PER_LANE; return port_ltp; } /* * Convert an OPA Port LTP mask to capability mask */ int port_ltp_to_cap(int port_ltp) { int cap_mask = 0; if (port_ltp & PORT_LTP_CRC_MODE_14) cap_mask |= CAP_CRC_14B; if (port_ltp & PORT_LTP_CRC_MODE_48) cap_mask |= CAP_CRC_48B; if (port_ltp & PORT_LTP_CRC_MODE_PER_LANE) cap_mask |= CAP_CRC_12B_16B_PER_LANE; return cap_mask; } /* * Convert a single DC LCB CRC mode to an OPA Port LTP mask. */ static int lcb_to_port_ltp(int lcb_crc) { int port_ltp = 0; if (lcb_crc == LCB_CRC_12B_16B_PER_LANE) port_ltp = PORT_LTP_CRC_MODE_PER_LANE; else if (lcb_crc == LCB_CRC_48B) port_ltp = PORT_LTP_CRC_MODE_48; else if (lcb_crc == LCB_CRC_14B) port_ltp = PORT_LTP_CRC_MODE_14; else port_ltp = PORT_LTP_CRC_MODE_16; return port_ltp; } /* * Our neighbor has indicated that we are allowed to act as a fabric * manager, so place the full management partition key in the second * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note * that we should already have the limited management partition key in * array element 1, and also that the port is not yet up when * add_full_mgmt_pkey() is invoked. */ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; /* Sanity check - ppd->pkeys[2] should be 0 */ if (ppd->pkeys[2] != 0) dd_dev_err(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n", __func__, ppd->pkeys[2], FULL_MGMT_P_KEY); ppd->pkeys[2] = FULL_MGMT_P_KEY; (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); } /* * Convert the given link width to the OPA link width bitmask. */ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width) { switch (width) { case 0: /* * Simulator and quick linkup do not set the width. * Just set it to 4x without complaint. */ if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || quick_linkup) return OPA_LINK_WIDTH_4X; return 0; /* no lanes up */ case 1: return OPA_LINK_WIDTH_1X; case 2: return OPA_LINK_WIDTH_2X; case 3: return OPA_LINK_WIDTH_3X; default: dd_dev_info(dd, "%s: invalid width %d, using 4\n", __func__, width); /* fall through */ case 4: return OPA_LINK_WIDTH_4X; } } /* * Do a population count on the bottom nibble. */ static const u8 bit_counts[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; static inline u8 nibble_to_count(u8 nibble) { return bit_counts[nibble & 0xf]; } /* * Read the active lane information from the 8051 registers and return * their widths. * * Active lane information is found in these 8051 registers: * enable_lane_tx * enable_lane_rx */ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, u16 *rx_width) { u16 tx, rx; u8 enable_lane_rx; u8 enable_lane_tx; u8 tx_polarity_inversion; u8 rx_polarity_inversion; u8 max_rate; /* read the active lanes */ read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion, &rx_polarity_inversion, &max_rate); read_local_lni(dd, &enable_lane_rx); /* convert to counts */ tx = nibble_to_count(enable_lane_tx); rx = nibble_to_count(enable_lane_rx); /* * Set link_speed_active here, overriding what was set in * handle_verify_cap(). The ASIC 8051 firmware does not correctly * set the max_rate field in handle_verify_cap until v0.19. */ if ((dd->icode == ICODE_RTL_SILICON) && (dd->dc8051_ver < dc8051_ver(0, 19))) { /* max_rate: 0 = 12.5G, 1 = 25G */ switch (max_rate) { case 0: dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G; break; default: dd_dev_err(dd, "%s: unexpected max rate %d, using 25Gb\n", __func__, (int)max_rate); /* fall through */ case 1: dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G; break; } } dd_dev_info(dd, "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n", enable_lane_tx, tx, enable_lane_rx, rx); *tx_width = link_width_to_bits(dd, tx); *rx_width = link_width_to_bits(dd, rx); } /* * Read verify_cap_local_fm_link_width[1] to obtain the link widths. * Valid after the end of VerifyCap and during LinkUp. Does not change * after link up. I.e. look elsewhere for downgrade information. * * Bits are: * + bits [7:4] contain the number of active transmitters * + bits [3:0] contain the number of active receivers * These are numbers 1 through 4 and can be different values if the * link is asymmetric. * * verify_cap_local_fm_link_width[0] retains its original value. */ static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width, u16 *rx_width) { u16 widths, tx, rx; u8 misc_bits, local_flags; u16 active_tx, active_rx; read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths); tx = widths >> 12; rx = (widths >> 8) & 0xf; *tx_width = link_width_to_bits(dd, tx); *rx_width = link_width_to_bits(dd, rx); /* print the active widths */ get_link_widths(dd, &active_tx, &active_rx); } /* * Set ppd->link_width_active and ppd->link_width_downgrade_active using * hardware information when the link first comes up. * * The link width is not available until after VerifyCap.AllFramesReceived * (the trigger for handle_verify_cap), so this is outside that routine * and should be called when the 8051 signals linkup. */ void get_linkup_link_widths(struct hfi1_pportdata *ppd) { u16 tx_width, rx_width; /* get end-of-LNI link widths */ get_linkup_widths(ppd->dd, &tx_width, &rx_width); /* use tx_width as the link is supposed to be symmetric on link up */ ppd->link_width_active = tx_width; /* link width downgrade active (LWD.A) starts out matching LW.A */ ppd->link_width_downgrade_tx_active = ppd->link_width_active; ppd->link_width_downgrade_rx_active = ppd->link_width_active; /* per OPA spec, on link up LWD.E resets to LWD.S */ ppd->link_width_downgrade_enabled = ppd->link_width_downgrade_supported; /* cache the active egress rate (units {10^6 bits/sec]) */ ppd->current_egress_rate = active_egress_rate(ppd); } /* * Handle a verify capabilities interrupt from the 8051. * * This is a work-queue function outside of the interrupt. */ void handle_verify_cap(struct work_struct *work) { struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, link_vc_work); struct hfi1_devdata *dd = ppd->dd; u64 reg; u8 power_management; u8 continious; u8 vcu; u8 vau; u8 z; u16 vl15buf; u16 link_widths; u16 crc_mask; u16 crc_val; u16 device_id; u16 active_tx, active_rx; u8 partner_supported_crc; u8 remote_tx_rate; u8 device_rev; set_link_state(ppd, HLS_VERIFY_CAP); lcb_shutdown(dd, 0); adjust_lcb_for_fpga_serdes(dd); /* * These are now valid: * remote VerifyCap fields in the general LNI config * CSR DC8051_STS_REMOTE_GUID * CSR DC8051_STS_REMOTE_NODE_TYPE * CSR DC8051_STS_REMOTE_FM_SECURITY * CSR DC8051_STS_REMOTE_PORT_NO */ read_vc_remote_phy(dd, &power_management, &continious); read_vc_remote_fabric( dd, &vau, &z, &vcu, &vl15buf, &partner_supported_crc); read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths); read_remote_device_id(dd, &device_id, &device_rev); /* * And the 'MgmtAllowed' information, which is exchanged during * LNI, is also be available at this point. */ read_mgmt_allowed(dd, &ppd->mgmt_allowed); /* print the active widths */ get_link_widths(dd, &active_tx, &active_rx); dd_dev_info(dd, "Peer PHY: power management 0x%x, continuous updates 0x%x\n", (int)power_management, (int)continious); dd_dev_info(dd, "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n", (int)vau, (int)z, (int)vcu, (int)vl15buf, (int)partner_supported_crc); dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n", (u32)remote_tx_rate, (u32)link_widths); dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n", (u32)device_id, (u32)device_rev); /* * The peer vAU value just read is the peer receiver value. HFI does * not support a transmit vAU of 0 (AU == 8). We advertised that * with Z=1 in the fabric capabilities sent to the peer. The peer * will see our Z=1, and, if it advertised a vAU of 0, will move its * receive to vAU of 1 (AU == 16). Do the same here. We do not care * about the peer Z value - our sent vAU is 3 (hardwired) and is not * subject to the Z value exception. */ if (vau == 0) vau = 1; set_up_vl15(dd, vau, vl15buf); /* set up the LCB CRC mode */ crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc; /* order is important: use the lowest bit in common */ if (crc_mask & CAP_CRC_14B) crc_val = LCB_CRC_14B; else if (crc_mask & CAP_CRC_48B) crc_val = LCB_CRC_48B; else if (crc_mask & CAP_CRC_12B_16B_PER_LANE) crc_val = LCB_CRC_12B_16B_PER_LANE; else crc_val = LCB_CRC_16B; dd_dev_info(dd, "Final LCB CRC mode: %d\n", (int)crc_val); write_csr(dd, DC_LCB_CFG_CRC_MODE, (u64)crc_val << DC_LCB_CFG_CRC_MODE_TX_VAL_SHIFT); /* set (14b only) or clear sideband credit */ reg = read_csr(dd, SEND_CM_CTRL); if (crc_val == LCB_CRC_14B && crc_14b_sideband) { write_csr(dd, SEND_CM_CTRL, reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK); } else { write_csr(dd, SEND_CM_CTRL, reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK); } ppd->link_speed_active = 0; /* invalid value */ if (dd->dc8051_ver < dc8051_ver(0, 20)) { /* remote_tx_rate: 0 = 12.5G, 1 = 25G */ switch (remote_tx_rate) { case 0: ppd->link_speed_active = OPA_LINK_SPEED_12_5G; break; case 1: ppd->link_speed_active = OPA_LINK_SPEED_25G; break; } } else { /* actual rate is highest bit of the ANDed rates */ u8 rate = remote_tx_rate & ppd->local_tx_rate; if (rate & 2) ppd->link_speed_active = OPA_LINK_SPEED_25G; else if (rate & 1) ppd->link_speed_active = OPA_LINK_SPEED_12_5G; } if (ppd->link_speed_active == 0) { dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n", __func__, (int)remote_tx_rate); ppd->link_speed_active = OPA_LINK_SPEED_25G; } /* * Cache the values of the supported, enabled, and active * LTP CRC modes to return in 'portinfo' queries. But the bit * flags that are returned in the portinfo query differ from * what's in the link_crc_mask, crc_sizes, and crc_val * variables. Convert these here. */ ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8; /* supported crc modes */ ppd->port_ltp_crc_mode |= cap_to_port_ltp(ppd->port_crc_mode_enabled) << 4; /* enabled crc modes */ ppd->port_ltp_crc_mode |= lcb_to_port_ltp(crc_val); /* active crc mode */ /* set up the remote credit return table */ assign_remote_cm_au_table(dd, vcu); /* * The LCB is reset on entry to handle_verify_cap(), so this must * be applied on every link up. * * Adjust LCB error kill enable to kill the link if * these RBUF errors are seen: * REPLAY_BUF_MBE_SMASK * FLIT_INPUT_BUF_MBE_SMASK */ if (is_a0(dd)) { /* fixed in B0 */ reg = read_csr(dd, DC_LCB_CFG_LINK_KILL_EN); reg |= DC_LCB_CFG_LINK_KILL_EN_REPLAY_BUF_MBE_SMASK | DC_LCB_CFG_LINK_KILL_EN_FLIT_INPUT_BUF_MBE_SMASK; write_csr(dd, DC_LCB_CFG_LINK_KILL_EN, reg); } /* pull LCB fifos out of reset - all fifo clocks must be stable */ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0); /* give 8051 access to the LCB CSRs */ write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */ set_8051_lcb_access(dd); ppd->neighbor_guid = read_csr(dd, DC_DC8051_STS_REMOTE_GUID); ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) & DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; ppd->neighbor_type = read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) & DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK; ppd->neighbor_fm_security = read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) & DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK; dd_dev_info(dd, "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n", ppd->neighbor_guid, ppd->neighbor_type, ppd->mgmt_allowed, ppd->neighbor_fm_security); if (ppd->mgmt_allowed) add_full_mgmt_pkey(ppd); /* tell the 8051 to go to LinkUp */ set_link_state(ppd, HLS_GOING_UP); } /* * Apply the link width downgrade enabled policy against the current active * link widths. * * Called when the enabled policy changes or the active link widths change. */ void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths) { int skip = 1; int do_bounce = 0; u16 lwde = ppd->link_width_downgrade_enabled; u16 tx, rx; mutex_lock(&ppd->hls_lock); /* only apply if the link is up */ if (ppd->host_link_state & HLS_UP) skip = 0; mutex_unlock(&ppd->hls_lock); if (skip) return; if (refresh_widths) { get_link_widths(ppd->dd, &tx, &rx); ppd->link_width_downgrade_tx_active = tx; ppd->link_width_downgrade_rx_active = rx; } if (lwde == 0) { /* downgrade is disabled */ /* bounce if not at starting active width */ if ((ppd->link_width_active != ppd->link_width_downgrade_tx_active) || (ppd->link_width_active != ppd->link_width_downgrade_rx_active)) { dd_dev_err(ppd->dd, "Link downgrade is disabled and link has downgraded, downing link\n"); dd_dev_err(ppd->dd, " original 0x%x, tx active 0x%x, rx active 0x%x\n", ppd->link_width_active, ppd->link_width_downgrade_tx_active, ppd->link_width_downgrade_rx_active); do_bounce = 1; } } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0 || (lwde & ppd->link_width_downgrade_rx_active) == 0) { /* Tx or Rx is outside the enabled policy */ dd_dev_err(ppd->dd, "Link is outside of downgrade allowed, downing link\n"); dd_dev_err(ppd->dd, " enabled 0x%x, tx active 0x%x, rx active 0x%x\n", lwde, ppd->link_width_downgrade_tx_active, ppd->link_width_downgrade_rx_active); do_bounce = 1; } if (do_bounce) { set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0, OPA_LINKDOWN_REASON_WIDTH_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); start_link(ppd); } } /* * Handle a link downgrade interrupt from the 8051. * * This is a work-queue function outside of the interrupt. */ void handle_link_downgrade(struct work_struct *work) { struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, link_downgrade_work); dd_dev_info(ppd->dd, "8051: Link width downgrade\n"); apply_link_downgrade_policy(ppd, 1); } static char *dcc_err_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, dcc_err_flags, ARRAY_SIZE(dcc_err_flags)); } static char *lcb_err_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, lcb_err_flags, ARRAY_SIZE(lcb_err_flags)); } static char *dc8051_err_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, dc8051_err_flags, ARRAY_SIZE(dc8051_err_flags)); } static char *dc8051_info_err_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, dc8051_info_err_flags, ARRAY_SIZE(dc8051_info_err_flags)); } static char *dc8051_info_host_msg_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, dc8051_info_host_msg_flags, ARRAY_SIZE(dc8051_info_host_msg_flags)); } static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) { struct hfi1_pportdata *ppd = dd->pport; u64 info, err, host_msg; int queue_link_down = 0; char buf[96]; /* look at the flags */ if (reg & DC_DC8051_ERR_FLG_SET_BY_8051_SMASK) { /* 8051 information set by firmware */ /* read DC8051_DBG_ERR_INFO_SET_BY_8051 for details */ info = read_csr(dd, DC_DC8051_DBG_ERR_INFO_SET_BY_8051); err = (info >> DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_SHIFT) & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_MASK; host_msg = (info >> DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_SHIFT) & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_MASK; /* * Handle error flags. */ if (err & FAILED_LNI) { /* * LNI error indications are cleared by the 8051 * only when starting polling. Only pay attention * to them when in the states that occur during * LNI. */ if (ppd->host_link_state & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) { queue_link_down = 1; dd_dev_info(dd, "Link error: %s\n", dc8051_info_err_string(buf, sizeof(buf), err & FAILED_LNI)); } err &= ~(u64)FAILED_LNI; } if (err) { /* report remaining errors, but do not do anything */ dd_dev_err(dd, "8051 info error: %s\n", dc8051_info_err_string(buf, sizeof(buf), err)); } /* * Handle host message flags. */ if (host_msg & HOST_REQ_DONE) { /* * Presently, the driver does a busy wait for * host requests to complete. This is only an * informational message. * NOTE: The 8051 clears the host message * information *on the next 8051 command*. * Therefore, when linkup is achieved, * this flag will still be set. */ host_msg &= ~(u64)HOST_REQ_DONE; } if (host_msg & BC_SMA_MSG) { queue_work(ppd->hfi1_wq, &ppd->sma_message_work); host_msg &= ~(u64)BC_SMA_MSG; } if (host_msg & LINKUP_ACHIEVED) { dd_dev_info(dd, "8051: Link up\n"); queue_work(ppd->hfi1_wq, &ppd->link_up_work); host_msg &= ~(u64)LINKUP_ACHIEVED; } if (host_msg & EXT_DEVICE_CFG_REQ) { handle_8051_request(dd); host_msg &= ~(u64)EXT_DEVICE_CFG_REQ; } if (host_msg & VERIFY_CAP_FRAME) { queue_work(ppd->hfi1_wq, &ppd->link_vc_work); host_msg &= ~(u64)VERIFY_CAP_FRAME; } if (host_msg & LINK_GOING_DOWN) { const char *extra = ""; /* no downgrade action needed if going down */ if (host_msg & LINK_WIDTH_DOWNGRADED) { host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED; extra = " (ignoring downgrade)"; } dd_dev_info(dd, "8051: Link down%s\n", extra); queue_link_down = 1; host_msg &= ~(u64)LINK_GOING_DOWN; } if (host_msg & LINK_WIDTH_DOWNGRADED) { queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work); host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED; } if (host_msg) { /* report remaining messages, but do not do anything */ dd_dev_info(dd, "8051 info host message: %s\n", dc8051_info_host_msg_string(buf, sizeof(buf), host_msg)); } reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK; } if (reg & DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK) { /* * Lost the 8051 heartbeat. If this happens, we * receive constant interrupts about it. Disable * the interrupt after the first. */ dd_dev_err(dd, "Lost 8051 heartbeat\n"); write_csr(dd, DC_DC8051_ERR_EN, read_csr(dd, DC_DC8051_ERR_EN) & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK); reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK; } if (reg) { /* report the error, but do not do anything */ dd_dev_err(dd, "8051 error: %s\n", dc8051_err_string(buf, sizeof(buf), reg)); } if (queue_link_down) { /* if the link is already going down or disabled, do not * queue another */ if ((ppd->host_link_state & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN)) || ppd->link_enabled == 0) { dd_dev_info(dd, "%s: not queuing link down\n", __func__); } else { queue_work(ppd->hfi1_wq, &ppd->link_down_work); } } } static const char * const fm_config_txt[] = { [0] = "BadHeadDist: Distance violation between two head flits", [1] = "BadTailDist: Distance violation between two tail flits", [2] = "BadCtrlDist: Distance violation between two credit control flits", [3] = "BadCrdAck: Credits return for unsupported VL", [4] = "UnsupportedVLMarker: Received VL Marker", [5] = "BadPreempt: Exceeded the preemption nesting level", [6] = "BadControlFlit: Received unsupported control flit", /* no 7 */ [8] = "UnsupportedVLMarker: Received VL Marker for unconfigured or disabled VL", }; static const char * const port_rcv_txt[] = { [1] = "BadPktLen: Illegal PktLen", [2] = "PktLenTooLong: Packet longer than PktLen", [3] = "PktLenTooShort: Packet shorter than PktLen", [4] = "BadSLID: Illegal SLID (0, using multicast as SLID, does not include security validation of SLID)", [5] = "BadDLID: Illegal DLID (0, doesn't match HFI)", [6] = "BadL2: Illegal L2 opcode", [7] = "BadSC: Unsupported SC", [9] = "BadRC: Illegal RC", [11] = "PreemptError: Preempting with same VL", [12] = "PreemptVL15: Preempting a VL15 packet", }; #define OPA_LDR_FMCONFIG_OFFSET 16 #define OPA_LDR_PORTRCV_OFFSET 0 static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) { u64 info, hdr0, hdr1; const char *extra; char buf[96]; struct hfi1_pportdata *ppd = dd->pport; u8 lcl_reason = 0; int do_bounce = 0; if (reg & DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK) { if (!(dd->err_info_uncorrectable & OPA_EI_STATUS_SMASK)) { info = read_csr(dd, DCC_ERR_INFO_UNCORRECTABLE); dd->err_info_uncorrectable = info & OPA_EI_CODE_SMASK; /* set status bit */ dd->err_info_uncorrectable |= OPA_EI_STATUS_SMASK; } reg &= ~DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK; } if (reg & DCC_ERR_FLG_LINK_ERR_SMASK) { struct hfi1_pportdata *ppd = dd->pport; /* this counter saturates at (2^32) - 1 */ if (ppd->link_downed < (u32)UINT_MAX) ppd->link_downed++; reg &= ~DCC_ERR_FLG_LINK_ERR_SMASK; } if (reg & DCC_ERR_FLG_FMCONFIG_ERR_SMASK) { u8 reason_valid = 1; info = read_csr(dd, DCC_ERR_INFO_FMCONFIG); if (!(dd->err_info_fmconfig & OPA_EI_STATUS_SMASK)) { dd->err_info_fmconfig = info & OPA_EI_CODE_SMASK; /* set status bit */ dd->err_info_fmconfig |= OPA_EI_STATUS_SMASK; } switch (info) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: extra = fm_config_txt[info]; break; case 8: extra = fm_config_txt[info]; if (ppd->port_error_action & OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER) { do_bounce = 1; /* * lcl_reason cannot be derived from info * for this error */ lcl_reason = OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER; } break; default: reason_valid = 0; snprintf(buf, sizeof(buf), "reserved%lld", info); extra = buf; break; } if (reason_valid && !do_bounce) { do_bounce = ppd->port_error_action & (1 << (OPA_LDR_FMCONFIG_OFFSET + info)); lcl_reason = info + OPA_LINKDOWN_REASON_BAD_HEAD_DIST; } /* just report this */ dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra); reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK; } if (reg & DCC_ERR_FLG_RCVPORT_ERR_SMASK) { u8 reason_valid = 1; info = read_csr(dd, DCC_ERR_INFO_PORTRCV); hdr0 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR0); hdr1 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR1); if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) { dd->err_info_rcvport.status_and_code = info & OPA_EI_CODE_SMASK; /* set status bit */ dd->err_info_rcvport.status_and_code |= OPA_EI_STATUS_SMASK; /* save first 2 flits in the packet that caused * the error */ dd->err_info_rcvport.packet_flit1 = hdr0; dd->err_info_rcvport.packet_flit2 = hdr1; } switch (info) { case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 9: case 11: case 12: extra = port_rcv_txt[info]; break; default: reason_valid = 0; snprintf(buf, sizeof(buf), "reserved%lld", info); extra = buf; break; } if (reason_valid && !do_bounce) { do_bounce = ppd->port_error_action & (1 << (OPA_LDR_PORTRCV_OFFSET + info)); lcl_reason = info + OPA_LINKDOWN_REASON_RCV_ERROR_0; } /* just report this */ dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra); dd_dev_info(dd, " hdr0 0x%llx, hdr1 0x%llx\n", hdr0, hdr1); reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK; } if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) { /* informative only */ dd_dev_info(dd, "8051 access to LCB blocked\n"); reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK; } if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) { /* informative only */ dd_dev_info(dd, "host access to LCB blocked\n"); reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK; } /* report any remaining errors */ if (reg) dd_dev_info(dd, "DCC Error: %s\n", dcc_err_string(buf, sizeof(buf), reg)); if (lcl_reason == 0) lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN; if (do_bounce) { dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__); set_link_down_reason(ppd, lcl_reason, 0, lcl_reason); queue_work(ppd->hfi1_wq, &ppd->link_bounce_work); } } static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg) { char buf[96]; dd_dev_info(dd, "LCB Error: %s\n", lcb_err_string(buf, sizeof(buf), reg)); } /* * CCE block DC interrupt. Source is < 8. */ static void is_dc_int(struct hfi1_devdata *dd, unsigned int source) { const struct err_reg_info *eri = &dc_errs[source]; if (eri->handler) { interrupt_clear_down(dd, 0, eri); } else if (source == 3 /* dc_lbm_int */) { /* * This indicates that a parity error has occurred on the * address/control lines presented to the LBM. The error * is a single pulse, there is no associated error flag, * and it is non-maskable. This is because if a parity * error occurs on the request the request is dropped. * This should never occur, but it is nice to know if it * ever does. */ dd_dev_err(dd, "Parity error in DC LBM block\n"); } else { dd_dev_err(dd, "Invalid DC interrupt %u\n", source); } } /* * TX block send credit interrupt. Source is < 160. */ static void is_send_credit_int(struct hfi1_devdata *dd, unsigned int source) { sc_group_release_update(dd, source); } /* * TX block SDMA interrupt. Source is < 48. * * SDMA interrupts are grouped by type: * * 0 - N-1 = SDma * N - 2N-1 = SDmaProgress * 2N - 3N-1 = SDmaIdle */ static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source) { /* what interrupt */ unsigned int what = source / TXE_NUM_SDMA_ENGINES; /* which engine */ unsigned int which = source % TXE_NUM_SDMA_ENGINES; #ifdef CONFIG_SDMA_VERBOSITY dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", which, slashstrip(__FILE__), __LINE__, __func__); sdma_dumpstate(&dd->per_sdma[which]); #endif if (likely(what < 3 && which < dd->num_sdma)) { sdma_engine_interrupt(&dd->per_sdma[which], 1ull << source); } else { /* should not happen */ dd_dev_err(dd, "Invalid SDMA interrupt 0x%x\n", source); } } /* * RX block receive available interrupt. Source is < 160. */ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) { struct hfi1_ctxtdata *rcd; char *err_detail; if (likely(source < dd->num_rcv_contexts)) { rcd = dd->rcd[source]; if (rcd) { if (source < dd->first_user_ctxt) rcd->do_interrupt(rcd, 0); else handle_user_interrupt(rcd); return; /* OK */ } /* received an interrupt, but no rcd */ err_detail = "dataless"; } else { /* received an interrupt, but are not using that context */ err_detail = "out of range"; } dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n", err_detail, source); } /* * RX block receive urgent interrupt. Source is < 160. */ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) { struct hfi1_ctxtdata *rcd; char *err_detail; if (likely(source < dd->num_rcv_contexts)) { rcd = dd->rcd[source]; if (rcd) { /* only pay attention to user urgent interrupts */ if (source >= dd->first_user_ctxt) handle_user_interrupt(rcd); return; /* OK */ } /* received an interrupt, but no rcd */ err_detail = "dataless"; } else { /* received an interrupt, but are not using that context */ err_detail = "out of range"; } dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n", err_detail, source); } /* * Reserved range interrupt. Should not be called in normal operation. */ static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source) { char name[64]; dd_dev_err(dd, "unexpected %s interrupt\n", is_reserved_name(name, sizeof(name), source)); } static const struct is_table is_table[] = { /* start end name func interrupt func */ { IS_GENERAL_ERR_START, IS_GENERAL_ERR_END, is_misc_err_name, is_misc_err_int }, { IS_SDMAENG_ERR_START, IS_SDMAENG_ERR_END, is_sdma_eng_err_name, is_sdma_eng_err_int }, { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, is_sendctxt_err_name, is_sendctxt_err_int }, { IS_SDMA_START, IS_SDMA_END, is_sdma_eng_name, is_sdma_eng_int }, { IS_VARIOUS_START, IS_VARIOUS_END, is_various_name, is_various_int }, { IS_DC_START, IS_DC_END, is_dc_name, is_dc_int }, { IS_RCVAVAIL_START, IS_RCVAVAIL_END, is_rcv_avail_name, is_rcv_avail_int }, { IS_RCVURGENT_START, IS_RCVURGENT_END, is_rcv_urgent_name, is_rcv_urgent_int }, { IS_SENDCREDIT_START, IS_SENDCREDIT_END, is_send_credit_name, is_send_credit_int}, { IS_RESERVED_START, IS_RESERVED_END, is_reserved_name, is_reserved_int}, }; /* * Interrupt source interrupt - called when the given source has an interrupt. * Source is a bit index into an array of 64-bit integers. */ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source) { const struct is_table *entry; /* avoids a double compare by walking the table in-order */ for (entry = &is_table[0]; entry->is_name; entry++) { if (source < entry->end) { trace_hfi1_interrupt(dd, entry, source); entry->is_int(dd, source - entry->start); return; } } /* fell off the end */ dd_dev_err(dd, "invalid interrupt source %u\n", source); } /* * General interrupt handler. This is able to correctly handle * all interrupts in case INTx is used. */ static irqreturn_t general_interrupt(int irq, void *data) { struct hfi1_devdata *dd = data; u64 regs[CCE_NUM_INT_CSRS]; u32 bit; int i; this_cpu_inc(*dd->int_counter); /* phase 1: scan and clear all handled interrupts */ for (i = 0; i < CCE_NUM_INT_CSRS; i++) { if (dd->gi_mask[i] == 0) { regs[i] = 0; /* used later */ continue; } regs[i] = read_csr(dd, CCE_INT_STATUS + (8 * i)) & dd->gi_mask[i]; /* only clear if anything is set */ if (regs[i]) write_csr(dd, CCE_INT_CLEAR + (8 * i), regs[i]); } /* phase 2: call the appropriate handler */ for_each_set_bit(bit, (unsigned long *)®s[0], CCE_NUM_INT_CSRS*64) { is_interrupt(dd, bit); } return IRQ_HANDLED; } static irqreturn_t sdma_interrupt(int irq, void *data) { struct sdma_engine *sde = data; struct hfi1_devdata *dd = sde->dd; u64 status; #ifdef CONFIG_SDMA_VERBOSITY dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx, slashstrip(__FILE__), __LINE__, __func__); sdma_dumpstate(sde); #endif this_cpu_inc(*dd->int_counter); /* This read_csr is really bad in the hot path */ status = read_csr(dd, CCE_INT_STATUS + (8*(IS_SDMA_START/64))) & sde->imask; if (likely(status)) { /* clear the interrupt(s) */ write_csr(dd, CCE_INT_CLEAR + (8*(IS_SDMA_START/64)), status); /* handle the interrupt(s) */ sdma_engine_interrupt(sde, status); } else dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n", sde->this_idx); return IRQ_HANDLED; } /* * Clear the receive interrupt, forcing the write and making sure * we have data from the chip, pushing everything in front of it * back to the host. */ static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd) { struct hfi1_devdata *dd = rcd->dd; u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg); mmiowb(); /* make sure everything before is written */ write_csr(dd, addr, rcd->imask); /* force the above write on the chip and get a value back */ (void)read_csr(dd, addr); } /* force the receive interrupt */ static inline void force_recv_intr(struct hfi1_ctxtdata *rcd) { write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask); } /* return non-zero if a packet is present */ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) { if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) return (rcd->seq_cnt == rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)))); /* else is RDMA rtail */ return (rcd->head != get_rcvhdrtail(rcd)); } /* * Receive packet IRQ handler. This routine expects to be on its own IRQ. * This routine will try to handle packets immediately (latency), but if * it finds too many, it will invoke the thread handler (bandwitdh). The * chip receive interupt is *not* cleared down until this or the thread (if * invoked) is finished. The intent is to avoid extra interrupts while we * are processing packets anyway. */ static irqreturn_t receive_context_interrupt(int irq, void *data) { struct hfi1_ctxtdata *rcd = data; struct hfi1_devdata *dd = rcd->dd; int disposition; int present; trace_hfi1_receive_interrupt(dd, rcd->ctxt); this_cpu_inc(*dd->int_counter); /* receive interrupt remains blocked while processing packets */ disposition = rcd->do_interrupt(rcd, 0); /* * Too many packets were seen while processing packets in this * IRQ handler. Invoke the handler thread. The receive interrupt * remains blocked. */ if (disposition == RCV_PKT_LIMIT) return IRQ_WAKE_THREAD; /* * The packet processor detected no more packets. Clear the receive * interrupt and recheck for a packet packet that may have arrived * after the previous check and interrupt clear. If a packet arrived, * force another interrupt. */ clear_recv_intr(rcd); present = check_packet_present(rcd); if (present) force_recv_intr(rcd); return IRQ_HANDLED; } /* * Receive packet thread handler. This expects to be invoked with the * receive interrupt still blocked. */ static irqreturn_t receive_context_thread(int irq, void *data) { struct hfi1_ctxtdata *rcd = data; int present; /* receive interrupt is still blocked from the IRQ handler */ (void)rcd->do_interrupt(rcd, 1); /* * The packet processor will only return if it detected no more * packets. Hold IRQs here so we can safely clear the interrupt and * recheck for a packet that may have arrived after the previous * check and the interrupt clear. If a packet arrived, force another * interrupt. */ local_irq_disable(); clear_recv_intr(rcd); present = check_packet_present(rcd); if (present) force_recv_intr(rcd); local_irq_enable(); return IRQ_HANDLED; } /* ========================================================================= */ u32 read_physical_state(struct hfi1_devdata *dd) { u64 reg; reg = read_csr(dd, DC_DC8051_STS_CUR_STATE); return (reg >> DC_DC8051_STS_CUR_STATE_PORT_SHIFT) & DC_DC8051_STS_CUR_STATE_PORT_MASK; } static u32 read_logical_state(struct hfi1_devdata *dd) { u64 reg; reg = read_csr(dd, DCC_CFG_PORT_CONFIG); return (reg >> DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT) & DCC_CFG_PORT_CONFIG_LINK_STATE_MASK; } static void set_logical_state(struct hfi1_devdata *dd, u32 chip_lstate) { u64 reg; reg = read_csr(dd, DCC_CFG_PORT_CONFIG); /* clear current state, set new state */ reg &= ~DCC_CFG_PORT_CONFIG_LINK_STATE_SMASK; reg |= (u64)chip_lstate << DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT; write_csr(dd, DCC_CFG_PORT_CONFIG, reg); } /* * Use the 8051 to read a LCB CSR. */ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data) { u32 regno; int ret; if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { if (acquire_lcb_access(dd, 0) == 0) { *data = read_csr(dd, addr); release_lcb_access(dd, 0); return 0; } return -EBUSY; } /* register is an index of LCB registers: (offset - base) / 8 */ regno = (addr - DC_LCB_CFG_RUN) >> 3; ret = do_8051_command(dd, HCMD_READ_LCB_CSR, regno, data); if (ret != HCMD_SUCCESS) return -EBUSY; return 0; } /* * Read an LCB CSR. Access may not be in host control, so check. * Return 0 on success, -EBUSY on failure. */ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data) { struct hfi1_pportdata *ppd = dd->pport; /* if up, go through the 8051 for the value */ if (ppd->host_link_state & HLS_UP) return read_lcb_via_8051(dd, addr, data); /* if going up or down, no access */ if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) return -EBUSY; /* otherwise, host has access */ *data = read_csr(dd, addr); return 0; } /* * Use the 8051 to write a LCB CSR. */ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data) { if (acquire_lcb_access(dd, 0) == 0) { write_csr(dd, addr, data); release_lcb_access(dd, 0); return 0; } return -EBUSY; } /* * Write an LCB CSR. Access may not be in host control, so check. * Return 0 on success, -EBUSY on failure. */ int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data) { struct hfi1_pportdata *ppd = dd->pport; /* if up, go through the 8051 for the value */ if (ppd->host_link_state & HLS_UP) return write_lcb_via_8051(dd, addr, data); /* if going up or down, no access */ if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) return -EBUSY; /* otherwise, host has access */ write_csr(dd, addr, data); return 0; } /* * Returns: * < 0 = Linux error, not able to get access * > 0 = 8051 command RETURN_CODE */ static int do_8051_command( struct hfi1_devdata *dd, u32 type, u64 in_data, u64 *out_data) { u64 reg, completed; int return_code; unsigned long flags; unsigned long timeout; hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data); /* * Alternative to holding the lock for a long time: * - keep busy wait - have other users bounce off */ spin_lock_irqsave(&dd->dc8051_lock, flags); /* We can't send any commands to the 8051 if it's in reset */ if (dd->dc_shutdown) { return_code = -ENODEV; goto fail; } /* * If an 8051 host command timed out previously, then the 8051 is * stuck. * * On first timeout, attempt to reset and restart the entire DC * block (including 8051). (Is this too big of a hammer?) * * If the 8051 times out a second time, the reset did not bring it * back to healthy life. In that case, fail any subsequent commands. */ if (dd->dc8051_timed_out) { if (dd->dc8051_timed_out > 1) { dd_dev_err(dd, "Previous 8051 host command timed out, skipping command %u\n", type); return_code = -ENXIO; goto fail; } spin_unlock_irqrestore(&dd->dc8051_lock, flags); dc_shutdown(dd); dc_start(dd); spin_lock_irqsave(&dd->dc8051_lock, flags); } /* * If there is no timeout, then the 8051 command interface is * waiting for a command. */ /* * Do two writes: the first to stabilize the type and req_data, the * second to activate. */ reg = ((u64)type & DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_MASK) << DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_SHIFT | (in_data & DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_MASK) << DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_SHIFT; write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg); reg |= DC_DC8051_CFG_HOST_CMD_0_REQ_NEW_SMASK; write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg); /* wait for completion, alternate: interrupt */ timeout = jiffies + msecs_to_jiffies(DC8051_COMMAND_TIMEOUT); while (1) { reg = read_csr(dd, DC_DC8051_CFG_HOST_CMD_1); completed = reg & DC_DC8051_CFG_HOST_CMD_1_COMPLETED_SMASK; if (completed) break; if (time_after(jiffies, timeout)) { dd->dc8051_timed_out++; dd_dev_err(dd, "8051 host command %u timeout\n", type); if (out_data) *out_data = 0; return_code = -ETIMEDOUT; goto fail; } udelay(2); } if (out_data) { *out_data = (reg >> DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_SHIFT) & DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_MASK; if (type == HCMD_READ_LCB_CSR) { /* top 16 bits are in a different register */ *out_data |= (read_csr(dd, DC_DC8051_CFG_EXT_DEV_1) & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SMASK) << (48 - DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT); } } return_code = (reg >> DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_SHIFT) & DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_MASK; dd->dc8051_timed_out = 0; /* * Clear command for next user. */ write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0); fail: spin_unlock_irqrestore(&dd->dc8051_lock, flags); return return_code; } static int set_physical_link_state(struct hfi1_devdata *dd, u64 state) { return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL); } static int load_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id, u32 config_data) { u64 data; int ret; data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT | (u64)config_data << LOAD_DATA_DATA_SHIFT; ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "load 8051 config: field id %d, lane %d, err %d\n", (int)field_id, (int)lane_id, ret); } return ret; } /* * Read the 8051 firmware "registers". Use the RAM directly. Always * set the result, even on error. * Return 0 on success, -errno on failure */ static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id, u32 *result) { u64 big_data; u32 addr; int ret; /* address start depends on the lane_id */ if (lane_id < 4) addr = (4 * NUM_GENERAL_FIELDS) + (lane_id * 4 * NUM_LANE_FIELDS); else addr = 0; addr += field_id * 4; /* read is in 8-byte chunks, hardware will truncate the address down */ ret = read_8051_data(dd, addr, 8, &big_data); if (ret == 0) { /* extract the 4 bytes we want */ if (addr & 0x4) *result = (u32)(big_data >> 32); else *result = (u32)big_data; } else { *result = 0; dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n", __func__, lane_id, field_id); } return ret; } static int write_vc_local_phy(struct hfi1_devdata *dd, u8 power_management, u8 continuous) { u32 frame; frame = continuous << CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT | power_management << POWER_MANAGEMENT_SHIFT; return load_8051_config(dd, VERIFY_CAP_LOCAL_PHY, GENERAL_CONFIG, frame); } static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu, u16 vl15buf, u8 crc_sizes) { u32 frame; frame = (u32)vau << VAU_SHIFT | (u32)z << Z_SHIFT | (u32)vcu << VCU_SHIFT | (u32)vl15buf << VL15BUF_SHIFT | (u32)crc_sizes << CRC_SIZES_SHIFT; return load_8051_config(dd, VERIFY_CAP_LOCAL_FABRIC, GENERAL_CONFIG, frame); } static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits, u8 *flag_bits, u16 *link_widths) { u32 frame; read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG, &frame); *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK; *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK; *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK; } static int write_vc_local_link_width(struct hfi1_devdata *dd, u8 misc_bits, u8 flag_bits, u16 link_widths) { u32 frame; frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT | (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT | (u32)link_widths << LINK_WIDTH_SHIFT; return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG, frame); } static int write_local_device_id(struct hfi1_devdata *dd, u16 device_id, u8 device_rev) { u32 frame; frame = ((u32)device_id << LOCAL_DEVICE_ID_SHIFT) | ((u32)device_rev << LOCAL_DEVICE_REV_SHIFT); return load_8051_config(dd, LOCAL_DEVICE_ID, GENERAL_CONFIG, frame); } static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, u8 *device_rev) { u32 frame; read_8051_config(dd, REMOTE_DEVICE_ID, GENERAL_CONFIG, &frame); *device_id = (frame >> REMOTE_DEVICE_ID_SHIFT) & REMOTE_DEVICE_ID_MASK; *device_rev = (frame >> REMOTE_DEVICE_REV_SHIFT) & REMOTE_DEVICE_REV_MASK; } void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b) { u32 frame; read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame); *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK; *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK; } static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management, u8 *continuous) { u32 frame; read_8051_config(dd, VERIFY_CAP_REMOTE_PHY, GENERAL_CONFIG, &frame); *power_management = (frame >> POWER_MANAGEMENT_SHIFT) & POWER_MANAGEMENT_MASK; *continuous = (frame >> CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT) & CONTINIOUS_REMOTE_UPDATE_SUPPORT_MASK; } static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z, u8 *vcu, u16 *vl15buf, u8 *crc_sizes) { u32 frame; read_8051_config(dd, VERIFY_CAP_REMOTE_FABRIC, GENERAL_CONFIG, &frame); *vau = (frame >> VAU_SHIFT) & VAU_MASK; *z = (frame >> Z_SHIFT) & Z_MASK; *vcu = (frame >> VCU_SHIFT) & VCU_MASK; *vl15buf = (frame >> VL15BUF_SHIFT) & VL15BUF_MASK; *crc_sizes = (frame >> CRC_SIZES_SHIFT) & CRC_SIZES_MASK; } static void read_vc_remote_link_width(struct hfi1_devdata *dd, u8 *remote_tx_rate, u16 *link_widths) { u32 frame; read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG, &frame); *remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT) & REMOTE_TX_RATE_MASK; *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK; } static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx) { u32 frame; read_8051_config(dd, LOCAL_LNI_INFO, GENERAL_CONFIG, &frame); *enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK; } static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed) { u32 frame; read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame); *mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK; } static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls) { read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls); } static void read_last_remote_state(struct hfi1_devdata *dd, u32 *lrs) { read_8051_config(dd, LAST_REMOTE_STATE_COMPLETE, GENERAL_CONFIG, lrs); } void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality) { u32 frame; int ret; *link_quality = 0; if (dd->pport->host_link_state & HLS_UP) { ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, &frame); if (ret == 0) *link_quality = (frame >> LINK_QUALITY_SHIFT) & LINK_QUALITY_MASK; } } static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc) { u32 frame; read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, &frame); *pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK; } static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx, u8 *tx_polarity_inversion, u8 *rx_polarity_inversion, u8 *max_rate) { u32 frame; int ret; ret = read_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, &frame); *enable_lane_tx = (frame >> ENABLE_LANE_TX_SHIFT) & ENABLE_LANE_TX_MASK; *tx_polarity_inversion = (frame >> TX_POLARITY_INVERSION_SHIFT) & TX_POLARITY_INVERSION_MASK; *rx_polarity_inversion = (frame >> RX_POLARITY_INVERSION_SHIFT) & RX_POLARITY_INVERSION_MASK; *max_rate = (frame >> MAX_RATE_SHIFT) & MAX_RATE_MASK; return ret; } static int write_tx_settings(struct hfi1_devdata *dd, u8 enable_lane_tx, u8 tx_polarity_inversion, u8 rx_polarity_inversion, u8 max_rate) { u32 frame; /* no need to mask, all variable sizes match field widths */ frame = enable_lane_tx << ENABLE_LANE_TX_SHIFT | tx_polarity_inversion << TX_POLARITY_INVERSION_SHIFT | rx_polarity_inversion << RX_POLARITY_INVERSION_SHIFT | max_rate << MAX_RATE_SHIFT; return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame); } static void check_fabric_firmware_versions(struct hfi1_devdata *dd) { u32 frame, version, prod_id; int ret, lane; /* 4 lanes */ for (lane = 0; lane < 4; lane++) { ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame); if (ret) { dd_dev_err( dd, "Unable to read lane %d firmware details\n", lane); continue; } version = (frame >> SPICO_ROM_VERSION_SHIFT) & SPICO_ROM_VERSION_MASK; prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT) & SPICO_ROM_PROD_ID_MASK; dd_dev_info(dd, "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n", lane, version, prod_id); } } /* * Read an idle LCB message. * * Returns 0 on success, -EINVAL on error */ static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out) { int ret; ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG, type, data_out); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "read idle message: type %d, err %d\n", (u32)type, ret); return -EINVAL; } dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out); /* return only the payload as we already know the type */ *data_out >>= IDLE_PAYLOAD_SHIFT; return 0; } /* * Read an idle SMA message. To be done in response to a notification from * the 8051. * * Returns 0 on success, -EINVAL on error */ static int read_idle_sma(struct hfi1_devdata *dd, u64 *data) { return read_idle_message(dd, (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data); } /* * Send an idle LCB message. * * Returns 0 on success, -EINVAL on error */ static int send_idle_message(struct hfi1_devdata *dd, u64 data) { int ret; dd_dev_info(dd, "%s: sending idle message 0x%llx\n", __func__, data); ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n", data, ret); return -EINVAL; } return 0; } /* * Send an idle SMA message. * * Returns 0 on success, -EINVAL on error */ int send_idle_sma(struct hfi1_devdata *dd, u64 message) { u64 data; data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT) | ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT); return send_idle_message(dd, data); } /* * Initialize the LCB then do a quick link up. This may or may not be * in loopback. * * return 0 on success, -errno on error */ static int do_quick_linkup(struct hfi1_devdata *dd) { u64 reg; unsigned long timeout; int ret; lcb_shutdown(dd, 0); if (loopback) { /* LCB_CFG_LOOPBACK.VAL = 2 */ /* LCB_CFG_LANE_WIDTH.VAL = 0 */ write_csr(dd, DC_LCB_CFG_LOOPBACK, IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT); write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0); } /* start the LCBs */ /* LCB_CFG_TX_FIFOS_RESET.VAL = 0 */ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0); /* simulator only loopback steps */ if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { /* LCB_CFG_RUN.EN = 1 */ write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT); /* watch LCB_STS_LINK_TRANSFER_ACTIVE */ timeout = jiffies + msecs_to_jiffies(10); while (1) { reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE); if (reg) break; if (time_after(jiffies, timeout)) { dd_dev_err(dd, "timeout waiting for LINK_TRANSFER_ACTIVE\n"); return -ETIMEDOUT; } udelay(2); } write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT); } if (!loopback) { /* * When doing quick linkup and not in loopback, both * sides must be done with LCB set-up before either * starts the quick linkup. Put a delay here so that * both sides can be started and have a chance to be * done with LCB set up before resuming. */ dd_dev_err(dd, "Pausing for peer to be finished with LCB set up\n"); msleep(5000); dd_dev_err(dd, "Continuing with quick linkup\n"); } write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */ set_8051_lcb_access(dd); /* * State "quick" LinkUp request sets the physical link state to * LinkUp without a verify capability sequence. * This state is in simulator v37 and later. */ ret = set_physical_link_state(dd, PLS_QUICK_LINKUP); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "%s: set physical link state to quick LinkUp failed with return %d\n", __func__, ret); set_host_lcb_access(dd); write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */ if (ret >= 0) ret = -EINVAL; return ret; } return 0; /* success */ } /* * Set the SerDes to internal loopback mode. * Returns 0 on success, -errno on error. */ static int set_serdes_loopback_mode(struct hfi1_devdata *dd) { int ret; ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK); if (ret == HCMD_SUCCESS) return 0; dd_dev_err(dd, "Set physical link state to SerDes Loopback failed with return %d\n", ret); if (ret >= 0) ret = -EINVAL; return ret; } /* * Do all special steps to set up loopback. */ static int init_loopback(struct hfi1_devdata *dd) { dd_dev_info(dd, "Entering loopback mode\n"); /* all loopbacks should disable self GUID check */ write_csr(dd, DC_DC8051_CFG_MODE, (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK)); /* * The simulator has only one loopback option - LCB. Switch * to that option, which includes quick link up. * * Accept all valid loopback values. */ if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR) && (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB || loopback == LOOPBACK_CABLE)) { loopback = LOOPBACK_LCB; quick_linkup = 1; return 0; } /* handle serdes loopback */ if (loopback == LOOPBACK_SERDES) { /* internal serdes loopack needs quick linkup on RTL */ if (dd->icode == ICODE_RTL_SILICON) quick_linkup = 1; return set_serdes_loopback_mode(dd); } /* LCB loopback - handled at poll time */ if (loopback == LOOPBACK_LCB) { quick_linkup = 1; /* LCB is always quick linkup */ /* not supported in emulation due to emulation RTL changes */ if (dd->icode == ICODE_FPGA_EMULATION) { dd_dev_err(dd, "LCB loopback not supported in emulation\n"); return -EINVAL; } return 0; } /* external cable loopback requires no extra steps */ if (loopback == LOOPBACK_CABLE) return 0; dd_dev_err(dd, "Invalid loopback mode %d\n", loopback); return -EINVAL; } /* * Translate from the OPA_LINK_WIDTH handed to us by the FM to bits * used in the Verify Capability link width attribute. */ static u16 opa_to_vc_link_widths(u16 opa_widths) { int i; u16 result = 0; static const struct link_bits { u16 from; u16 to; } opa_link_xlate[] = { { OPA_LINK_WIDTH_1X, 1 << (1-1) }, { OPA_LINK_WIDTH_2X, 1 << (2-1) }, { OPA_LINK_WIDTH_3X, 1 << (3-1) }, { OPA_LINK_WIDTH_4X, 1 << (4-1) }, }; for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) { if (opa_widths & opa_link_xlate[i].from) result |= opa_link_xlate[i].to; } return result; } /* * Set link attributes before moving to polling. */ static int set_local_link_attributes(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; u8 enable_lane_tx; u8 tx_polarity_inversion; u8 rx_polarity_inversion; int ret; /* reset our fabric serdes to clear any lingering problems */ fabric_serdes_reset(dd); /* set the local tx rate - need to read-modify-write */ ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion, &rx_polarity_inversion, &ppd->local_tx_rate); if (ret) goto set_local_link_attributes_fail; if (dd->dc8051_ver < dc8051_ver(0, 20)) { /* set the tx rate to the fastest enabled */ if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G) ppd->local_tx_rate = 1; else ppd->local_tx_rate = 0; } else { /* set the tx rate to all enabled */ ppd->local_tx_rate = 0; if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G) ppd->local_tx_rate |= 2; if (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G) ppd->local_tx_rate |= 1; } enable_lane_tx = 0xF; /* enable all four lanes */ ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion, rx_polarity_inversion, ppd->local_tx_rate); if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; /* * DC supports continuous updates. */ ret = write_vc_local_phy(dd, 0 /* no power management */, 1 /* continuous updates */); if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; /* z=1 in the next call: AU of 0 is not supported by the hardware */ ret = write_vc_local_fabric(dd, dd->vau, 1, dd->vcu, dd->vl15_init, ppd->port_crc_mode_enabled); if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; ret = write_vc_local_link_width(dd, 0, 0, opa_to_vc_link_widths(ppd->link_width_enabled)); if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; /* let peer know who we are */ ret = write_local_device_id(dd, dd->pcidev->device, dd->minrev); if (ret == HCMD_SUCCESS) return 0; set_local_link_attributes_fail: dd_dev_err(dd, "Failed to set local link attributes, return 0x%x\n", ret); return ret; } /* * Call this to start the link. Schedule a retry if the cable is not * present or if unable to start polling. Do not do anything if the * link is disabled. Returns 0 if link is disabled or moved to polling */ int start_link(struct hfi1_pportdata *ppd) { if (!ppd->link_enabled) { dd_dev_info(ppd->dd, "%s: stopping link start because link is disabled\n", __func__); return 0; } if (!ppd->driver_link_ready) { dd_dev_info(ppd->dd, "%s: stopping link start because driver is not ready\n", __func__); return 0; } if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB || ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) return set_link_state(ppd, HLS_DN_POLL); dd_dev_info(ppd->dd, "%s: stopping link start because no cable is present\n", __func__); return -EAGAIN; } static void reset_qsfp(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; u64 mask, qsfp_mask; mask = (u64)QSFP_HFI0_RESET_N; qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE); qsfp_mask |= mask; write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask); qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT); qsfp_mask &= ~mask; write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask); udelay(10); qsfp_mask |= mask; write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask); } static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, u8 *qsfp_interrupt_status) { struct hfi1_devdata *dd = ppd->dd; if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING)) dd_dev_info(dd, "%s: QSFP cable on fire\n", __func__); if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING)) dd_dev_info(dd, "%s: QSFP cable temperature too low\n", __func__); if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) dd_dev_info(dd, "%s: QSFP supply voltage too high\n", __func__); if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING)) dd_dev_info(dd, "%s: QSFP supply voltage too low\n", __func__); /* Byte 2 is vendor specific */ if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING)) dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n", __func__); if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING)) dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n", __func__); if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING)) dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n", __func__); if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING)) dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n", __func__); if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) || (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING)) dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n", __func__); if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) || (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING)) dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n", __func__); if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) || (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING)) dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n", __func__); if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) || (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING)) dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n", __func__); if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING)) dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n", __func__); if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING)) dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n", __func__); if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING)) dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n", __func__); if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING)) dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n", __func__); /* Bytes 9-10 and 11-12 are reserved */ /* Bytes 13-15 are vendor specific */ return 0; } static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd) { refresh_qsfp_cache(ppd, &ppd->qsfp_info); return 0; } static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; u8 qsfp_interrupt_status = 0; if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1) != 1) { dd_dev_info(dd, "%s: Failed to read status of QSFP module\n", __func__); return -EIO; } /* We don't care about alarms & warnings with a non-functional INT_N */ if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY)) do_pre_lni_host_behaviors(ppd); return 0; } /* This routine will only be scheduled if the QSFP module is present */ static void qsfp_event(struct work_struct *work) { struct qsfp_data *qd; struct hfi1_pportdata *ppd; struct hfi1_devdata *dd; qd = container_of(work, struct qsfp_data, qsfp_work); ppd = qd->ppd; dd = ppd->dd; /* Sanity check */ if (!qsfp_mod_present(ppd)) return; /* * Turn DC back on after cables has been * re-inserted. Up until now, the DC has been in * reset to save power. */ dc_start(dd); if (qd->cache_refresh_required) { msleep(3000); reset_qsfp(ppd); /* Check for QSFP interrupt after t_init (SFF 8679) * + extra */ msleep(3000); if (!qd->qsfp_interrupt_functional) { if (do_qsfp_intr_fallback(ppd) < 0) dd_dev_info(dd, "%s: QSFP fallback failed\n", __func__); ppd->driver_link_ready = 1; start_link(ppd); } } if (qd->check_interrupt_flags) { u8 qsfp_interrupt_status[16] = {0,}; if (qsfp_read(ppd, dd->hfi1_id, 6, &qsfp_interrupt_status[0], 16) != 16) { dd_dev_info(dd, "%s: Failed to read status of QSFP module\n", __func__); } else { unsigned long flags; u8 data_status; spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.check_interrupt_flags = 0; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1) != 1) { dd_dev_info(dd, "%s: Failed to read status of QSFP module\n", __func__); } if (!(data_status & QSFP_DATA_NOT_READY)) { do_pre_lni_host_behaviors(ppd); start_link(ppd); } else handle_qsfp_error_conditions(ppd, qsfp_interrupt_status); } } } void init_qsfp(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; u64 qsfp_mask; if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB || ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { ppd->driver_link_ready = 1; return; } ppd->qsfp_info.ppd = ppd; INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N); /* Clear current status to avoid spurious interrupts */ write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR, qsfp_mask); /* Handle active low nature of INT_N and MODPRST_N pins */ if (qsfp_mod_present(ppd)) qsfp_mask &= ~(u64)QSFP_HFI0_MODPRST_N; write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT, qsfp_mask); /* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */ qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N; write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, qsfp_mask); if (qsfp_mod_present(ppd)) { msleep(3000); reset_qsfp(ppd); /* Check for QSFP interrupt after t_init (SFF 8679) * + extra */ msleep(3000); if (!ppd->qsfp_info.qsfp_interrupt_functional) { if (do_qsfp_intr_fallback(ppd) < 0) dd_dev_info(dd, "%s: QSFP fallback failed\n", __func__); ppd->driver_link_ready = 1; } } } int bringup_serdes(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; u64 guid; int ret; if (HFI1_CAP_IS_KSET(EXTENDED_PSN)) add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK); guid = ppd->guid; if (!guid) { if (dd->base_guid) guid = dd->base_guid + ppd->port - 1; ppd->guid = guid; } /* the link defaults to enabled */ ppd->link_enabled = 1; /* Set linkinit_reason on power up per OPA spec */ ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP; if (loopback) { ret = init_loopback(dd); if (ret < 0) return ret; } return start_link(ppd); } void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; /* * Shut down the link and keep it down. First turn off that the * driver wants to allow the link to be up (driver_link_ready). * Then make sure the link is not automatically restarted * (link_enabled). Cancel any pending restart. And finally * go offline. */ ppd->driver_link_ready = 0; ppd->link_enabled = 0; set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0, OPA_LINKDOWN_REASON_SMA_DISABLED); set_link_state(ppd, HLS_DN_OFFLINE); /* disable the port */ clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); cancel_work_sync(&ppd->freeze_work); } static inline int init_cpu_counters(struct hfi1_devdata *dd) { struct hfi1_pportdata *ppd; int i; ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { ppd->ibport_data.rc_acks = NULL; ppd->ibport_data.rc_qacks = NULL; ppd->ibport_data.rc_acks = alloc_percpu(u64); ppd->ibport_data.rc_qacks = alloc_percpu(u64); ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64); if ((ppd->ibport_data.rc_acks == NULL) || (ppd->ibport_data.rc_delayed_comp == NULL) || (ppd->ibport_data.rc_qacks == NULL)) return -ENOMEM; } return 0; } static const char * const pt_names[] = { "expected", "eager", "invalid" }; static const char *pt_name(u32 type) { return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type]; } /* * index is the index into the receive array */ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order) { u64 reg; void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc : (dd->kregbase + RCV_ARRAY)); if (!(dd->flags & HFI1_PRESENT)) goto done; if (type == PT_INVALID) { pa = 0; } else if (type > PT_INVALID) { dd_dev_err(dd, "unexpected receive array type %u for index %u, not handled\n", type, index); goto done; } hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx", pt_name(type), index, pa, (unsigned long)order); #define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */ reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK) << RCV_ARRAY_RT_ADDR_SHIFT; writeq(reg, base + (index * 8)); if (type == PT_EAGER) /* * Eager entries are written one-by-one so we have to push them * after we write the entry. */ flush_wc(); done: return; } void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) { struct hfi1_devdata *dd = rcd->dd; u32 i; /* this could be optimized */ for (i = rcd->eager_base; i < rcd->eager_base + rcd->egrbufs.alloced; i++) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); for (i = rcd->expected_base; i < rcd->expected_base + rcd->expected_count; i++) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); } int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd, struct hfi1_ctxt_info *kinfo) { kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) | HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U); return 0; } struct hfi1_message_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr) { u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); return (struct hfi1_message_header *) (rhf_addr - dd->rhf_offset + offset); } static const char * const ib_cfg_name_strings[] = { "HFI1_IB_CFG_LIDLMC", "HFI1_IB_CFG_LWID_DG_ENB", "HFI1_IB_CFG_LWID_ENB", "HFI1_IB_CFG_LWID", "HFI1_IB_CFG_SPD_ENB", "HFI1_IB_CFG_SPD", "HFI1_IB_CFG_RXPOL_ENB", "HFI1_IB_CFG_LREV_ENB", "HFI1_IB_CFG_LINKLATENCY", "HFI1_IB_CFG_HRTBT", "HFI1_IB_CFG_OP_VLS", "HFI1_IB_CFG_VL_HIGH_CAP", "HFI1_IB_CFG_VL_LOW_CAP", "HFI1_IB_CFG_OVERRUN_THRESH", "HFI1_IB_CFG_PHYERR_THRESH", "HFI1_IB_CFG_LINKDEFAULT", "HFI1_IB_CFG_PKEYS", "HFI1_IB_CFG_MTU", "HFI1_IB_CFG_LSTATE", "HFI1_IB_CFG_VL_HIGH_LIMIT", "HFI1_IB_CFG_PMA_TICKS", "HFI1_IB_CFG_PORT" }; static const char *ib_cfg_name(int which) { if (which < 0 || which >= ARRAY_SIZE(ib_cfg_name_strings)) return "invalid"; return ib_cfg_name_strings[which]; } int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which) { struct hfi1_devdata *dd = ppd->dd; int val = 0; switch (which) { case HFI1_IB_CFG_LWID_ENB: /* allowed Link-width */ val = ppd->link_width_enabled; break; case HFI1_IB_CFG_LWID: /* currently active Link-width */ val = ppd->link_width_active; break; case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */ val = ppd->link_speed_enabled; break; case HFI1_IB_CFG_SPD: /* current Link speed */ val = ppd->link_speed_active; break; case HFI1_IB_CFG_RXPOL_ENB: /* Auto-RX-polarity enable */ case HFI1_IB_CFG_LREV_ENB: /* Auto-Lane-reversal enable */ case HFI1_IB_CFG_LINKLATENCY: goto unimplemented; case HFI1_IB_CFG_OP_VLS: val = ppd->vls_operational; break; case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */ val = VL_ARB_HIGH_PRIO_TABLE_SIZE; break; case HFI1_IB_CFG_VL_LOW_CAP: /* VL arb low priority table size */ val = VL_ARB_LOW_PRIO_TABLE_SIZE; break; case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ val = ppd->overrun_threshold; break; case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ val = ppd->phy_error_threshold; break; case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ val = dd->link_default; break; case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */ case HFI1_IB_CFG_PMA_TICKS: default: unimplemented: if (HFI1_CAP_IS_KSET(PRINT_UNIMPL)) dd_dev_info( dd, "%s: which %s: not implemented\n", __func__, ib_cfg_name(which)); break; } return val; } /* * The largest MAD packet size. */ #define MAX_MAD_PACKET 2048 /* * Return the maximum header bytes that can go on the _wire_ * for this device. This count includes the ICRC which is * not part of the packet held in memory but it is appended * by the HW. * This is dependent on the device's receive header entry size. * HFI allows this to be set per-receive context, but the * driver presently enforces a global value. */ u32 lrh_max_header_bytes(struct hfi1_devdata *dd) { /* * The maximum non-payload (MTU) bytes in LRH.PktLen are * the Receive Header Entry Size minus the PBC (or RHF) size * plus one DW for the ICRC appended by HW. * * dd->rcd[0].rcvhdrqentsize is in DW. * We use rcd[0] as all context will have the same value. Also, * the first kernel context would have been allocated by now so * we are guaranteed a valid value. */ return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2; } /* * Set Send Length * @ppd - per port data * * Set the MTU by limiting how many DWs may be sent. The SendLenCheck* * registers compare against LRH.PktLen, so use the max bytes included * in the LRH. * * This routine changes all VL values except VL15, which it maintains at * the same value. */ static void set_send_length(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; u32 max_hb = lrh_max_header_bytes(dd), maxvlmtu = 0, dcmtu; u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2) & SEND_LEN_CHECK1_LEN_VL15_MASK) << SEND_LEN_CHECK1_LEN_VL15_SHIFT; int i; for (i = 0; i < ppd->vls_supported; i++) { if (dd->vld[i].mtu > maxvlmtu) maxvlmtu = dd->vld[i].mtu; if (i <= 3) len1 |= (((dd->vld[i].mtu + max_hb) >> 2) & SEND_LEN_CHECK0_LEN_VL0_MASK) << ((i % 4) * SEND_LEN_CHECK0_LEN_VL1_SHIFT); else len2 |= (((dd->vld[i].mtu + max_hb) >> 2) & SEND_LEN_CHECK1_LEN_VL4_MASK) << ((i % 4) * SEND_LEN_CHECK1_LEN_VL5_SHIFT); } write_csr(dd, SEND_LEN_CHECK0, len1); write_csr(dd, SEND_LEN_CHECK1, len2); /* adjust kernel credit return thresholds based on new MTUs */ /* all kernel receive contexts have the same hdrqentsize */ for (i = 0; i < ppd->vls_supported; i++) { sc_set_cr_threshold(dd->vld[i].sc, sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu, dd->rcd[0]->rcvhdrqentsize)); } sc_set_cr_threshold(dd->vld[15].sc, sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu, dd->rcd[0]->rcvhdrqentsize)); /* Adjust maximum MTU for the port in DC */ dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 : (ilog2(maxvlmtu >> 8) + 1); len1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG); len1 &= ~DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK; len1 |= ((u64)dcmtu & DCC_CFG_PORT_CONFIG_MTU_CAP_MASK) << DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT; write_csr(ppd->dd, DCC_CFG_PORT_CONFIG, len1); } static void set_lidlmc(struct hfi1_pportdata *ppd) { int i; u64 sreg = 0; struct hfi1_devdata *dd = ppd->dd; u32 mask = ~((1U << ppd->lmc) - 1); u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1); if (dd->hfi1_snoop.mode_flag) dd_dev_info(dd, "Set lid/lmc while snooping"); c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK); c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK) << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)| ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK) << DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT); write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1); /* * Iterate over all the send contexts and set their SLID check */ sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) << SEND_CTXT_CHECK_SLID_MASK_SHIFT) | (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) << SEND_CTXT_CHECK_SLID_VALUE_SHIFT); for (i = 0; i < dd->chip_send_contexts; i++) { hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x", i, (u32)sreg); write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg); } /* Now we have to do the same thing for the sdma engines */ sdma_update_lmc(dd, mask, ppd->lid); } static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs) { unsigned long timeout; u32 curr_state; timeout = jiffies + msecs_to_jiffies(msecs); while (1) { curr_state = read_physical_state(dd); if (curr_state == state) break; if (time_after(jiffies, timeout)) { dd_dev_err(dd, "timeout waiting for phy link state 0x%x, current state is 0x%x\n", state, curr_state); return -ETIMEDOUT; } usleep_range(1950, 2050); /* sleep 2ms-ish */ } return 0; } /* * Helper for set_link_state(). Do not call except from that routine. * Expects ppd->hls_mutex to be held. * * @rem_reason value to be sent to the neighbor * * LinkDownReasons only set if transition succeeds. */ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) { struct hfi1_devdata *dd = ppd->dd; u32 pstate, previous_state; u32 last_local_state; u32 last_remote_state; int ret; int do_transition; int do_wait; previous_state = ppd->host_link_state; ppd->host_link_state = HLS_GOING_OFFLINE; pstate = read_physical_state(dd); if (pstate == PLS_OFFLINE) { do_transition = 0; /* in right state */ do_wait = 0; /* ...no need to wait */ } else if ((pstate & 0xff) == PLS_OFFLINE) { do_transition = 0; /* in an offline transient state */ do_wait = 1; /* ...wait for it to settle */ } else { do_transition = 1; /* need to move to offline */ do_wait = 1; /* ...will need to wait */ } if (do_transition) { ret = set_physical_link_state(dd, PLS_OFFLINE | (rem_reason << 8)); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "Failed to transition to Offline link state, return %d\n", ret); return -EINVAL; } if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE) ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_TRANSIENT; } if (do_wait) { /* it can take a while for the link to go down */ ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000); if (ret < 0) return ret; } /* make sure the logical state is also down */ wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000); /* * Now in charge of LCB - must be after the physical state is * offline.quiet and before host_link_state is changed. */ set_host_lcb_access(dd); write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */ ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */ /* * The LNI has a mandatory wait time after the physical state * moves to Offline.Quiet. The wait time may be different * depending on how the link went down. The 8051 firmware * will observe the needed wait time and only move to ready * when that is completed. The largest of the quiet timeouts * is 2.5s, so wait that long and then a bit more. */ ret = wait_fm_ready(dd, 3000); if (ret) { dd_dev_err(dd, "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n"); /* state is really offline, so make it so */ ppd->host_link_state = HLS_DN_OFFLINE; return ret; } /* * The state is now offline and the 8051 is ready to accept host * requests. * - change our state * - notify others if we were previously in a linkup state */ ppd->host_link_state = HLS_DN_OFFLINE; if (previous_state & HLS_UP) { /* went down while link was up */ handle_linkup_change(dd, 0); } else if (previous_state & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) { /* went down while attempting link up */ /* byte 1 of last_*_state is the failure reason */ read_last_local_state(dd, &last_local_state); read_last_remote_state(dd, &last_remote_state); dd_dev_err(dd, "LNI failure last states: local 0x%08x, remote 0x%08x\n", last_local_state, last_remote_state); } /* the active link width (downgrade) is 0 on link down */ ppd->link_width_active = 0; ppd->link_width_downgrade_tx_active = 0; ppd->link_width_downgrade_rx_active = 0; ppd->current_egress_rate = 0; return 0; } /* return the link state name */ static const char *link_state_name(u32 state) { const char *name; int n = ilog2(state); static const char * const names[] = { [__HLS_UP_INIT_BP] = "INIT", [__HLS_UP_ARMED_BP] = "ARMED", [__HLS_UP_ACTIVE_BP] = "ACTIVE", [__HLS_DN_DOWNDEF_BP] = "DOWNDEF", [__HLS_DN_POLL_BP] = "POLL", [__HLS_DN_DISABLE_BP] = "DISABLE", [__HLS_DN_OFFLINE_BP] = "OFFLINE", [__HLS_VERIFY_CAP_BP] = "VERIFY_CAP", [__HLS_GOING_UP_BP] = "GOING_UP", [__HLS_GOING_OFFLINE_BP] = "GOING_OFFLINE", [__HLS_LINK_COOLDOWN_BP] = "LINK_COOLDOWN" }; name = n < ARRAY_SIZE(names) ? names[n] : NULL; return name ? name : "unknown"; } /* return the link state reason name */ static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state) { if (state == HLS_UP_INIT) { switch (ppd->linkinit_reason) { case OPA_LINKINIT_REASON_LINKUP: return "(LINKUP)"; case OPA_LINKINIT_REASON_FLAPPING: return "(FLAPPING)"; case OPA_LINKINIT_OUTSIDE_POLICY: return "(OUTSIDE_POLICY)"; case OPA_LINKINIT_QUARANTINED: return "(QUARANTINED)"; case OPA_LINKINIT_INSUFIC_CAPABILITY: return "(INSUFIC_CAPABILITY)"; default: break; } } return ""; } /* * driver_physical_state - convert the driver's notion of a port's * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*). * Return -1 (converted to a u32) to indicate error. */ u32 driver_physical_state(struct hfi1_pportdata *ppd) { switch (ppd->host_link_state) { case HLS_UP_INIT: case HLS_UP_ARMED: case HLS_UP_ACTIVE: return IB_PORTPHYSSTATE_LINKUP; case HLS_DN_POLL: return IB_PORTPHYSSTATE_POLLING; case HLS_DN_DISABLE: return IB_PORTPHYSSTATE_DISABLED; case HLS_DN_OFFLINE: return OPA_PORTPHYSSTATE_OFFLINE; case HLS_VERIFY_CAP: return IB_PORTPHYSSTATE_POLLING; case HLS_GOING_UP: return IB_PORTPHYSSTATE_POLLING; case HLS_GOING_OFFLINE: return OPA_PORTPHYSSTATE_OFFLINE; case HLS_LINK_COOLDOWN: return OPA_PORTPHYSSTATE_OFFLINE; case HLS_DN_DOWNDEF: default: dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n", ppd->host_link_state); return -1; } } /* * driver_logical_state - convert the driver's notion of a port's * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1 * (converted to a u32) to indicate error. */ u32 driver_logical_state(struct hfi1_pportdata *ppd) { if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP)) return IB_PORT_DOWN; switch (ppd->host_link_state & HLS_UP) { case HLS_UP_INIT: return IB_PORT_INIT; case HLS_UP_ARMED: return IB_PORT_ARMED; case HLS_UP_ACTIVE: return IB_PORT_ACTIVE; default: dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n", ppd->host_link_state); return -1; } } void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, u8 neigh_reason, u8 rem_reason) { if (ppd->local_link_down_reason.latest == 0 && ppd->neigh_link_down_reason.latest == 0) { ppd->local_link_down_reason.latest = lcl_reason; ppd->neigh_link_down_reason.latest = neigh_reason; ppd->remote_link_down_reason = rem_reason; } } /* * Change the physical and/or logical link state. * * Do not call this routine while inside an interrupt. It contains * calls to routines that can take multiple seconds to finish. * * Returns 0 on success, -errno on failure. */ int set_link_state(struct hfi1_pportdata *ppd, u32 state) { struct hfi1_devdata *dd = ppd->dd; struct ib_event event = {.device = NULL}; int ret1, ret = 0; int was_up, is_down; int orig_new_state, poll_bounce; mutex_lock(&ppd->hls_lock); orig_new_state = state; if (state == HLS_DN_DOWNDEF) state = dd->link_default; /* interpret poll -> poll as a link bounce */ poll_bounce = ppd->host_link_state == HLS_DN_POLL && state == HLS_DN_POLL; dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__, link_state_name(ppd->host_link_state), link_state_name(orig_new_state), poll_bounce ? "(bounce) " : "", link_state_reason_name(ppd, state)); was_up = !!(ppd->host_link_state & HLS_UP); /* * If we're going to a (HLS_*) link state that implies the logical * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then * reset is_sm_config_started to 0. */ if (!(state & (HLS_UP_ARMED | HLS_UP_ACTIVE))) ppd->is_sm_config_started = 0; /* * Do nothing if the states match. Let a poll to poll link bounce * go through. */ if (ppd->host_link_state == state && !poll_bounce) goto done; switch (state) { case HLS_UP_INIT: if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) { /* * Quick link up jumps from polling to here. * * Whether in normal or loopback mode, the * simulator jumps from polling to link up. * Accept that here. */ /* OK */; } else if (ppd->host_link_state != HLS_GOING_UP) { goto unexpected; } ppd->host_link_state = HLS_UP_INIT; ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000); if (ret) { /* logical state didn't change, stay at going_up */ ppd->host_link_state = HLS_GOING_UP; dd_dev_err(dd, "%s: logical state did not change to INIT\n", __func__); } else { /* clear old transient LINKINIT_REASON code */ if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR) ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP; /* enable the port */ add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); handle_linkup_change(dd, 1); } break; case HLS_UP_ARMED: if (ppd->host_link_state != HLS_UP_INIT) goto unexpected; ppd->host_link_state = HLS_UP_ARMED; set_logical_state(dd, LSTATE_ARMED); ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000); if (ret) { /* logical state didn't change, stay at init */ ppd->host_link_state = HLS_UP_INIT; dd_dev_err(dd, "%s: logical state did not change to ARMED\n", __func__); } /* * The simulator does not currently implement SMA messages, * so neighbor_normal is not set. Set it here when we first * move to Armed. */ if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) ppd->neighbor_normal = 1; break; case HLS_UP_ACTIVE: if (ppd->host_link_state != HLS_UP_ARMED) goto unexpected; ppd->host_link_state = HLS_UP_ACTIVE; set_logical_state(dd, LSTATE_ACTIVE); ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000); if (ret) { /* logical state didn't change, stay at armed */ ppd->host_link_state = HLS_UP_ARMED; dd_dev_err(dd, "%s: logical state did not change to ACTIVE\n", __func__); } else { /* tell all engines to go running */ sdma_all_running(dd); /* Signal the IB layer that the port has went active */ event.device = &dd->verbs_dev.ibdev; event.element.port_num = ppd->port; event.event = IB_EVENT_PORT_ACTIVE; } break; case HLS_DN_POLL: if ((ppd->host_link_state == HLS_DN_DISABLE || ppd->host_link_state == HLS_DN_OFFLINE) && dd->dc_shutdown) dc_start(dd); /* Hand LED control to the DC */ write_csr(dd, DCC_CFG_LED_CNTRL, 0); if (ppd->host_link_state != HLS_DN_OFFLINE) { u8 tmp = ppd->link_enabled; ret = goto_offline(ppd, ppd->remote_link_down_reason); if (ret) { ppd->link_enabled = tmp; break; } ppd->remote_link_down_reason = 0; if (ppd->driver_link_ready) ppd->link_enabled = 1; } ret = set_local_link_attributes(ppd); if (ret) break; ppd->port_error_action = 0; ppd->host_link_state = HLS_DN_POLL; if (quick_linkup) { /* quick linkup does not go into polling */ ret = do_quick_linkup(dd); } else { ret1 = set_physical_link_state(dd, PLS_POLLING); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, "Failed to transition to Polling link state, return 0x%x\n", ret1); ret = -EINVAL; } } ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE; /* * If an error occurred above, go back to offline. The * caller may reschedule another attempt. */ if (ret) goto_offline(ppd, 0); break; case HLS_DN_DISABLE: /* link is disabled */ ppd->link_enabled = 0; /* allow any state to transition to disabled */ /* must transition to offline first */ if (ppd->host_link_state != HLS_DN_OFFLINE) { ret = goto_offline(ppd, ppd->remote_link_down_reason); if (ret) break; ppd->remote_link_down_reason = 0; } ret1 = set_physical_link_state(dd, PLS_DISABLED); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, "Failed to transition to Disabled link state, return 0x%x\n", ret1); ret = -EINVAL; break; } ppd->host_link_state = HLS_DN_DISABLE; dc_shutdown(dd); break; case HLS_DN_OFFLINE: if (ppd->host_link_state == HLS_DN_DISABLE) dc_start(dd); /* allow any state to transition to offline */ ret = goto_offline(ppd, ppd->remote_link_down_reason); if (!ret) ppd->remote_link_down_reason = 0; break; case HLS_VERIFY_CAP: if (ppd->host_link_state != HLS_DN_POLL) goto unexpected; ppd->host_link_state = HLS_VERIFY_CAP; break; case HLS_GOING_UP: if (ppd->host_link_state != HLS_VERIFY_CAP) goto unexpected; ret1 = set_physical_link_state(dd, PLS_LINKUP); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, "Failed to transition to link up state, return 0x%x\n", ret1); ret = -EINVAL; break; } ppd->host_link_state = HLS_GOING_UP; break; case HLS_GOING_OFFLINE: /* transient within goto_offline() */ case HLS_LINK_COOLDOWN: /* transient within goto_offline() */ default: dd_dev_info(dd, "%s: state 0x%x: not supported\n", __func__, state); ret = -EINVAL; break; } is_down = !!(ppd->host_link_state & (HLS_DN_POLL | HLS_DN_DISABLE | HLS_DN_OFFLINE)); if (was_up && is_down && ppd->local_link_down_reason.sma == 0 && ppd->neigh_link_down_reason.sma == 0) { ppd->local_link_down_reason.sma = ppd->local_link_down_reason.latest; ppd->neigh_link_down_reason.sma = ppd->neigh_link_down_reason.latest; } goto done; unexpected: dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n", __func__, link_state_name(ppd->host_link_state), link_state_name(state)); ret = -EINVAL; done: mutex_unlock(&ppd->hls_lock); if (event.device) ib_dispatch_event(&event); return ret; } int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val) { u64 reg; int ret = 0; switch (which) { case HFI1_IB_CFG_LIDLMC: set_lidlmc(ppd); break; case HFI1_IB_CFG_VL_HIGH_LIMIT: /* * The VL Arbitrator high limit is sent in units of 4k * bytes, while HFI stores it in units of 64 bytes. */ val *= 4096/64; reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK) << SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT; write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg); break; case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ /* HFI only supports POLL as the default link down state */ if (val != HLS_DN_POLL) ret = -EINVAL; break; case HFI1_IB_CFG_OP_VLS: if (ppd->vls_operational != val) { ppd->vls_operational = val; if (!ppd->port) ret = -EINVAL; else ret = sdma_map_init( ppd->dd, ppd->port - 1, val, NULL); } break; /* * For link width, link width downgrade, and speed enable, always AND * the setting with what is actually supported. This has two benefits. * First, enabled can't have unsupported values, no matter what the * SM or FM might want. Second, the ALL_SUPPORTED wildcards that mean * "fill in with your supported value" have all the bits in the * field set, so simply ANDing with supported has the desired result. */ case HFI1_IB_CFG_LWID_ENB: /* set allowed Link-width */ ppd->link_width_enabled = val & ppd->link_width_supported; break; case HFI1_IB_CFG_LWID_DG_ENB: /* set allowed link width downgrade */ ppd->link_width_downgrade_enabled = val & ppd->link_width_downgrade_supported; break; case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */ ppd->link_speed_enabled = val & ppd->link_speed_supported; break; case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ /* * HFI does not follow IB specs, save this value * so we can report it, if asked. */ ppd->overrun_threshold = val; break; case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ /* * HFI does not follow IB specs, save this value * so we can report it, if asked. */ ppd->phy_error_threshold = val; break; case HFI1_IB_CFG_MTU: set_send_length(ppd); break; case HFI1_IB_CFG_PKEYS: if (HFI1_CAP_IS_KSET(PKEY_CHECK)) set_partition_keys(ppd); break; default: if (HFI1_CAP_IS_KSET(PRINT_UNIMPL)) dd_dev_info(ppd->dd, "%s: which %s, val 0x%x: not implemented\n", __func__, ib_cfg_name(which), val); break; } return ret; } /* begin functions related to vl arbitration table caching */ static void init_vl_arb_caches(struct hfi1_pportdata *ppd) { int i; BUILD_BUG_ON(VL_ARB_TABLE_SIZE != VL_ARB_LOW_PRIO_TABLE_SIZE); BUILD_BUG_ON(VL_ARB_TABLE_SIZE != VL_ARB_HIGH_PRIO_TABLE_SIZE); /* * Note that we always return values directly from the * 'vl_arb_cache' (and do no CSR reads) in response to a * 'Get(VLArbTable)'. This is obviously correct after a * 'Set(VLArbTable)', since the cache will then be up to * date. But it's also correct prior to any 'Set(VLArbTable)' * since then both the cache, and the relevant h/w registers * will be zeroed. */ for (i = 0; i < MAX_PRIO_TABLE; i++) spin_lock_init(&ppd->vl_arb_cache[i].lock); } /* * vl_arb_lock_cache * * All other vl_arb_* functions should be called only after locking * the cache. */ static inline struct vl_arb_cache * vl_arb_lock_cache(struct hfi1_pportdata *ppd, int idx) { if (idx != LO_PRIO_TABLE && idx != HI_PRIO_TABLE) return NULL; spin_lock(&ppd->vl_arb_cache[idx].lock); return &ppd->vl_arb_cache[idx]; } static inline void vl_arb_unlock_cache(struct hfi1_pportdata *ppd, int idx) { spin_unlock(&ppd->vl_arb_cache[idx].lock); } static void vl_arb_get_cache(struct vl_arb_cache *cache, struct ib_vl_weight_elem *vl) { memcpy(vl, cache->table, VL_ARB_TABLE_SIZE * sizeof(*vl)); } static void vl_arb_set_cache(struct vl_arb_cache *cache, struct ib_vl_weight_elem *vl) { memcpy(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl)); } static int vl_arb_match_cache(struct vl_arb_cache *cache, struct ib_vl_weight_elem *vl) { return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl)); } /* end functions related to vl arbitration table caching */ static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target, u32 size, struct ib_vl_weight_elem *vl) { struct hfi1_devdata *dd = ppd->dd; u64 reg; unsigned int i, is_up = 0; int drain, ret = 0; mutex_lock(&ppd->hls_lock); if (ppd->host_link_state & HLS_UP) is_up = 1; drain = !is_ax(dd) && is_up; if (drain) /* * Before adjusting VL arbitration weights, empty per-VL * FIFOs, otherwise a packet whose VL weight is being * set to 0 could get stuck in a FIFO with no chance to * egress. */ ret = stop_drain_data_vls(dd); if (ret) { dd_dev_err( dd, "%s: cannot stop/drain VLs - refusing to change VL arbitration weights\n", __func__); goto err; } for (i = 0; i < size; i++, vl++) { /* * NOTE: The low priority shift and mask are used here, but * they are the same for both the low and high registers. */ reg = (((u64)vl->vl & SEND_LOW_PRIORITY_LIST_VL_MASK) << SEND_LOW_PRIORITY_LIST_VL_SHIFT) | (((u64)vl->weight & SEND_LOW_PRIORITY_LIST_WEIGHT_MASK) << SEND_LOW_PRIORITY_LIST_WEIGHT_SHIFT); write_csr(dd, target + (i * 8), reg); } pio_send_control(dd, PSC_GLOBAL_VLARB_ENABLE); if (drain) open_fill_data_vls(dd); /* reopen all VLs */ err: mutex_unlock(&ppd->hls_lock); return ret; } /* * Read one credit merge VL register. */ static void read_one_cm_vl(struct hfi1_devdata *dd, u32 csr, struct vl_limit *vll) { u64 reg = read_csr(dd, csr); vll->dedicated = cpu_to_be16( (reg >> SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT) & SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_MASK); vll->shared = cpu_to_be16( (reg >> SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT) & SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_MASK); } /* * Read the current credit merge limits. */ static int get_buffer_control(struct hfi1_devdata *dd, struct buffer_control *bc, u16 *overall_limit) { u64 reg; int i; /* not all entries are filled in */ memset(bc, 0, sizeof(*bc)); /* OPA and HFI have a 1-1 mapping */ for (i = 0; i < TXE_NUM_DATA_VL; i++) read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]); /* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */ read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]); reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT); bc->overall_shared_limit = cpu_to_be16( (reg >> SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) & SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK); if (overall_limit) *overall_limit = (reg >> SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) & SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_MASK; return sizeof(struct buffer_control); } static int get_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp) { u64 reg; int i; /* each register contains 16 SC->VLnt mappings, 4 bits each */ reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_15_0); for (i = 0; i < sizeof(u64); i++) { u8 byte = *(((u8 *)®) + i); dp->vlnt[2 * i] = byte & 0xf; dp->vlnt[(2 * i) + 1] = (byte & 0xf0) >> 4; } reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_31_16); for (i = 0; i < sizeof(u64); i++) { u8 byte = *(((u8 *)®) + i); dp->vlnt[16 + (2 * i)] = byte & 0xf; dp->vlnt[16 + (2 * i) + 1] = (byte & 0xf0) >> 4; } return sizeof(struct sc2vlnt); } static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems, struct ib_vl_weight_elem *vl) { unsigned int i; for (i = 0; i < nelems; i++, vl++) { vl->vl = 0xf; vl->weight = 0; } } static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp) { write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, DC_SC_VL_VAL(15_0, 0, dp->vlnt[0] & 0xf, 1, dp->vlnt[1] & 0xf, 2, dp->vlnt[2] & 0xf, 3, dp->vlnt[3] & 0xf, 4, dp->vlnt[4] & 0xf, 5, dp->vlnt[5] & 0xf, 6, dp->vlnt[6] & 0xf, 7, dp->vlnt[7] & 0xf, 8, dp->vlnt[8] & 0xf, 9, dp->vlnt[9] & 0xf, 10, dp->vlnt[10] & 0xf, 11, dp->vlnt[11] & 0xf, 12, dp->vlnt[12] & 0xf, 13, dp->vlnt[13] & 0xf, 14, dp->vlnt[14] & 0xf, 15, dp->vlnt[15] & 0xf)); write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, DC_SC_VL_VAL(31_16, 16, dp->vlnt[16] & 0xf, 17, dp->vlnt[17] & 0xf, 18, dp->vlnt[18] & 0xf, 19, dp->vlnt[19] & 0xf, 20, dp->vlnt[20] & 0xf, 21, dp->vlnt[21] & 0xf, 22, dp->vlnt[22] & 0xf, 23, dp->vlnt[23] & 0xf, 24, dp->vlnt[24] & 0xf, 25, dp->vlnt[25] & 0xf, 26, dp->vlnt[26] & 0xf, 27, dp->vlnt[27] & 0xf, 28, dp->vlnt[28] & 0xf, 29, dp->vlnt[29] & 0xf, 30, dp->vlnt[30] & 0xf, 31, dp->vlnt[31] & 0xf)); } static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what, u16 limit) { if (limit != 0) dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n", what, (int)limit, idx); } /* change only the shared limit portion of SendCmGLobalCredit */ static void set_global_shared(struct hfi1_devdata *dd, u16 limit) { u64 reg; reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT); reg &= ~SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK; reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT; write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg); } /* change only the total credit limit portion of SendCmGLobalCredit */ static void set_global_limit(struct hfi1_devdata *dd, u16 limit) { u64 reg; reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT); reg &= ~SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK; reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT; write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg); } /* set the given per-VL shared limit */ static void set_vl_shared(struct hfi1_devdata *dd, int vl, u16 limit) { u64 reg; u32 addr; if (vl < TXE_NUM_DATA_VL) addr = SEND_CM_CREDIT_VL + (8 * vl); else addr = SEND_CM_CREDIT_VL15; reg = read_csr(dd, addr); reg &= ~SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SMASK; reg |= (u64)limit << SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT; write_csr(dd, addr, reg); } /* set the given per-VL dedicated limit */ static void set_vl_dedicated(struct hfi1_devdata *dd, int vl, u16 limit) { u64 reg; u32 addr; if (vl < TXE_NUM_DATA_VL) addr = SEND_CM_CREDIT_VL + (8 * vl); else addr = SEND_CM_CREDIT_VL15; reg = read_csr(dd, addr); reg &= ~SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SMASK; reg |= (u64)limit << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT; write_csr(dd, addr, reg); } /* spin until the given per-VL status mask bits clear */ static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask, const char *which) { unsigned long timeout; u64 reg; timeout = jiffies + msecs_to_jiffies(VL_STATUS_CLEAR_TIMEOUT); while (1) { reg = read_csr(dd, SEND_CM_CREDIT_USED_STATUS) & mask; if (reg == 0) return; /* success */ if (time_after(jiffies, timeout)) break; /* timed out */ udelay(1); } dd_dev_err(dd, "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n", which, VL_STATUS_CLEAR_TIMEOUT, mask, reg); /* * If this occurs, it is likely there was a credit loss on the link. * The only recovery from that is a link bounce. */ dd_dev_err(dd, "Continuing anyway. A credit loss may occur. Suggest a link bounce\n"); } /* * The number of credits on the VLs may be changed while everything * is "live", but the following algorithm must be followed due to * how the hardware is actually implemented. In particular, * Return_Credit_Status[] is the only correct status check. * * if (reducing Global_Shared_Credit_Limit or any shared limit changing) * set Global_Shared_Credit_Limit = 0 * use_all_vl = 1 * mask0 = all VLs that are changing either dedicated or shared limits * set Shared_Limit[mask0] = 0 * spin until Return_Credit_Status[use_all_vl ? all VL : mask0] == 0 * if (changing any dedicated limit) * mask1 = all VLs that are lowering dedicated limits * lower Dedicated_Limit[mask1] * spin until Return_Credit_Status[mask1] == 0 * raise Dedicated_Limits * raise Shared_Limits * raise Global_Shared_Credit_Limit * * lower = if the new limit is lower, set the limit to the new value * raise = if the new limit is higher than the current value (may be changed * earlier in the algorithm), set the new limit to the new value */ static int set_buffer_control(struct hfi1_devdata *dd, struct buffer_control *new_bc) { u64 changing_mask, ld_mask, stat_mask; int change_count; int i, use_all_mask; int this_shared_changing; /* * A0: add the variable any_shared_limit_changing below and in the * algorithm above. If removing A0 support, it can be removed. */ int any_shared_limit_changing; struct buffer_control cur_bc; u8 changing[OPA_MAX_VLS]; u8 lowering_dedicated[OPA_MAX_VLS]; u16 cur_total; u32 new_total = 0; const u64 all_mask = SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK | SEND_CM_CREDIT_USED_STATUS_VL1_RETURN_CREDIT_STATUS_SMASK | SEND_CM_CREDIT_USED_STATUS_VL2_RETURN_CREDIT_STATUS_SMASK | SEND_CM_CREDIT_USED_STATUS_VL3_RETURN_CREDIT_STATUS_SMASK | SEND_CM_CREDIT_USED_STATUS_VL4_RETURN_CREDIT_STATUS_SMASK | SEND_CM_CREDIT_USED_STATUS_VL5_RETURN_CREDIT_STATUS_SMASK | SEND_CM_CREDIT_USED_STATUS_VL6_RETURN_CREDIT_STATUS_SMASK | SEND_CM_CREDIT_USED_STATUS_VL7_RETURN_CREDIT_STATUS_SMASK | SEND_CM_CREDIT_USED_STATUS_VL15_RETURN_CREDIT_STATUS_SMASK; #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15) #define NUM_USABLE_VLS 16 /* look at VL15 and less */ /* find the new total credits, do sanity check on unused VLs */ for (i = 0; i < OPA_MAX_VLS; i++) { if (valid_vl(i)) { new_total += be16_to_cpu(new_bc->vl[i].dedicated); continue; } nonzero_msg(dd, i, "dedicated", be16_to_cpu(new_bc->vl[i].dedicated)); nonzero_msg(dd, i, "shared", be16_to_cpu(new_bc->vl[i].shared)); new_bc->vl[i].dedicated = 0; new_bc->vl[i].shared = 0; } new_total += be16_to_cpu(new_bc->overall_shared_limit); if (new_total > (u32)dd->link_credits) return -EINVAL; /* fetch the current values */ get_buffer_control(dd, &cur_bc, &cur_total); /* * Create the masks we will use. */ memset(changing, 0, sizeof(changing)); memset(lowering_dedicated, 0, sizeof(lowering_dedicated)); /* NOTE: Assumes that the individual VL bits are adjacent and in increasing order */ stat_mask = SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK; changing_mask = 0; ld_mask = 0; change_count = 0; any_shared_limit_changing = 0; for (i = 0; i < NUM_USABLE_VLS; i++, stat_mask <<= 1) { if (!valid_vl(i)) continue; this_shared_changing = new_bc->vl[i].shared != cur_bc.vl[i].shared; if (this_shared_changing) any_shared_limit_changing = 1; if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated || this_shared_changing) { changing[i] = 1; changing_mask |= stat_mask; change_count++; } if (be16_to_cpu(new_bc->vl[i].dedicated) < be16_to_cpu(cur_bc.vl[i].dedicated)) { lowering_dedicated[i] = 1; ld_mask |= stat_mask; } } /* bracket the credit change with a total adjustment */ if (new_total > cur_total) set_global_limit(dd, new_total); /* * Start the credit change algorithm. */ use_all_mask = 0; if ((be16_to_cpu(new_bc->overall_shared_limit) < be16_to_cpu(cur_bc.overall_shared_limit)) || (is_a0(dd) && any_shared_limit_changing)) { set_global_shared(dd, 0); cur_bc.overall_shared_limit = 0; use_all_mask = 1; } for (i = 0; i < NUM_USABLE_VLS; i++) { if (!valid_vl(i)) continue; if (changing[i]) { set_vl_shared(dd, i, 0); cur_bc.vl[i].shared = 0; } } wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask, "shared"); if (change_count > 0) { for (i = 0; i < NUM_USABLE_VLS; i++) { if (!valid_vl(i)) continue; if (lowering_dedicated[i]) { set_vl_dedicated(dd, i, be16_to_cpu(new_bc->vl[i].dedicated)); cur_bc.vl[i].dedicated = new_bc->vl[i].dedicated; } } wait_for_vl_status_clear(dd, ld_mask, "dedicated"); /* now raise all dedicated that are going up */ for (i = 0; i < NUM_USABLE_VLS; i++) { if (!valid_vl(i)) continue; if (be16_to_cpu(new_bc->vl[i].dedicated) > be16_to_cpu(cur_bc.vl[i].dedicated)) set_vl_dedicated(dd, i, be16_to_cpu(new_bc->vl[i].dedicated)); } } /* next raise all shared that are going up */ for (i = 0; i < NUM_USABLE_VLS; i++) { if (!valid_vl(i)) continue; if (be16_to_cpu(new_bc->vl[i].shared) > be16_to_cpu(cur_bc.vl[i].shared)) set_vl_shared(dd, i, be16_to_cpu(new_bc->vl[i].shared)); } /* finally raise the global shared */ if (be16_to_cpu(new_bc->overall_shared_limit) > be16_to_cpu(cur_bc.overall_shared_limit)) set_global_shared(dd, be16_to_cpu(new_bc->overall_shared_limit)); /* bracket the credit change with a total adjustment */ if (new_total < cur_total) set_global_limit(dd, new_total); return 0; } /* * Read the given fabric manager table. Return the size of the * table (in bytes) on success, and a negative error code on * failure. */ int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t) { int size; struct vl_arb_cache *vlc; switch (which) { case FM_TBL_VL_HIGH_ARB: size = 256; /* * OPA specifies 128 elements (of 2 bytes each), though * HFI supports only 16 elements in h/w. */ vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE); vl_arb_get_cache(vlc, t); vl_arb_unlock_cache(ppd, HI_PRIO_TABLE); break; case FM_TBL_VL_LOW_ARB: size = 256; /* * OPA specifies 128 elements (of 2 bytes each), though * HFI supports only 16 elements in h/w. */ vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE); vl_arb_get_cache(vlc, t); vl_arb_unlock_cache(ppd, LO_PRIO_TABLE); break; case FM_TBL_BUFFER_CONTROL: size = get_buffer_control(ppd->dd, t, NULL); break; case FM_TBL_SC2VLNT: size = get_sc2vlnt(ppd->dd, t); break; case FM_TBL_VL_PREEMPT_ELEMS: size = 256; /* OPA specifies 128 elements, of 2 bytes each */ get_vlarb_preempt(ppd->dd, OPA_MAX_VLS, t); break; case FM_TBL_VL_PREEMPT_MATRIX: size = 256; /* * OPA specifies that this is the same size as the VL * arbitration tables (i.e., 256 bytes). */ break; default: return -EINVAL; } return size; } /* * Write the given fabric manager table. */ int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t) { int ret = 0; struct vl_arb_cache *vlc; switch (which) { case FM_TBL_VL_HIGH_ARB: vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE); if (vl_arb_match_cache(vlc, t)) { vl_arb_unlock_cache(ppd, HI_PRIO_TABLE); break; } vl_arb_set_cache(vlc, t); vl_arb_unlock_cache(ppd, HI_PRIO_TABLE); ret = set_vl_weights(ppd, SEND_HIGH_PRIORITY_LIST, VL_ARB_HIGH_PRIO_TABLE_SIZE, t); break; case FM_TBL_VL_LOW_ARB: vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE); if (vl_arb_match_cache(vlc, t)) { vl_arb_unlock_cache(ppd, LO_PRIO_TABLE); break; } vl_arb_set_cache(vlc, t); vl_arb_unlock_cache(ppd, LO_PRIO_TABLE); ret = set_vl_weights(ppd, SEND_LOW_PRIORITY_LIST, VL_ARB_LOW_PRIO_TABLE_SIZE, t); break; case FM_TBL_BUFFER_CONTROL: ret = set_buffer_control(ppd->dd, t); break; case FM_TBL_SC2VLNT: set_sc2vlnt(ppd->dd, t); break; default: ret = -EINVAL; } return ret; } /* * Disable all data VLs. * * Return 0 if disabled, non-zero if the VLs cannot be disabled. */ static int disable_data_vls(struct hfi1_devdata *dd) { if (is_a0(dd)) return 1; pio_send_control(dd, PSC_DATA_VL_DISABLE); return 0; } /* * open_fill_data_vls() - the counterpart to stop_drain_data_vls(). * Just re-enables all data VLs (the "fill" part happens * automatically - the name was chosen for symmetry with * stop_drain_data_vls()). * * Return 0 if successful, non-zero if the VLs cannot be enabled. */ int open_fill_data_vls(struct hfi1_devdata *dd) { if (is_a0(dd)) return 1; pio_send_control(dd, PSC_DATA_VL_ENABLE); return 0; } /* * drain_data_vls() - assumes that disable_data_vls() has been called, * wait for occupancy (of per-VL FIFOs) for all contexts, and SDMA * engines to drop to 0. */ static void drain_data_vls(struct hfi1_devdata *dd) { sc_wait(dd); sdma_wait(dd); pause_for_credit_return(dd); } /* * stop_drain_data_vls() - disable, then drain all per-VL fifos. * * Use open_fill_data_vls() to resume using data VLs. This pair is * meant to be used like this: * * stop_drain_data_vls(dd); * // do things with per-VL resources * open_fill_data_vls(dd); */ int stop_drain_data_vls(struct hfi1_devdata *dd) { int ret; ret = disable_data_vls(dd); if (ret == 0) drain_data_vls(dd); return ret; } /* * Convert a nanosecond time to a cclock count. No matter how slow * the cclock, a non-zero ns will always have a non-zero result. */ u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns) { u32 cclocks; if (dd->icode == ICODE_FPGA_EMULATION) cclocks = (ns * 1000) / FPGA_CCLOCK_PS; else /* simulation pretends to be ASIC */ cclocks = (ns * 1000) / ASIC_CCLOCK_PS; if (ns && !cclocks) /* if ns nonzero, must be at least 1 */ cclocks = 1; return cclocks; } /* * Convert a cclock count to nanoseconds. Not matter how slow * the cclock, a non-zero cclocks will always have a non-zero result. */ u32 cclock_to_ns(struct hfi1_devdata *dd, u32 cclocks) { u32 ns; if (dd->icode == ICODE_FPGA_EMULATION) ns = (cclocks * FPGA_CCLOCK_PS) / 1000; else /* simulation pretends to be ASIC */ ns = (cclocks * ASIC_CCLOCK_PS) / 1000; if (cclocks && !ns) ns = 1; return ns; } /* * Dynamically adjust the receive interrupt timeout for a context based on * incoming packet rate. * * NOTE: Dynamic adjustment does not allow rcv_intr_count to be zero. */ static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts) { struct hfi1_devdata *dd = rcd->dd; u32 timeout = rcd->rcvavail_timeout; /* * This algorithm doubles or halves the timeout depending on whether * the number of packets received in this interrupt were less than or * greater equal the interrupt count. * * The calculations below do not allow a steady state to be achieved. * Only at the endpoints it is possible to have an unchanging * timeout. */ if (npkts < rcv_intr_count) { /* * Not enough packets arrived before the timeout, adjust * timeout downward. */ if (timeout < 2) /* already at minimum? */ return; timeout >>= 1; } else { /* * More than enough packets arrived before the timeout, adjust * timeout upward. */ if (timeout >= dd->rcv_intr_timeout_csr) /* already at max? */ return; timeout = min(timeout << 1, dd->rcv_intr_timeout_csr); } rcd->rcvavail_timeout = timeout; /* timeout cannot be larger than rcv_intr_timeout_csr which has already been verified to be in range */ write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT, (u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT); } void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd, u32 intr_adjust, u32 npkts) { struct hfi1_devdata *dd = rcd->dd; u64 reg; u32 ctxt = rcd->ctxt; /* * Need to write timeout register before updating RcvHdrHead to ensure * that a new value is used when the HW decides to restart counting. */ if (intr_adjust) adjust_rcv_timeout(rcd, npkts); if (updegr) { reg = (egrhd & RCV_EGR_INDEX_HEAD_HEAD_MASK) << RCV_EGR_INDEX_HEAD_HEAD_SHIFT; write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg); } mmiowb(); reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) | (((u64)hd & RCV_HDR_HEAD_HEAD_MASK) << RCV_HDR_HEAD_HEAD_SHIFT); write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg); mmiowb(); } u32 hdrqempty(struct hfi1_ctxtdata *rcd) { u32 head, tail; head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) & RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT; if (rcd->rcvhdrtail_kvaddr) tail = get_rcvhdrtail(rcd); else tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL); return head == tail; } /* * Context Control and Receive Array encoding for buffer size: * 0x0 invalid * 0x1 4 KB * 0x2 8 KB * 0x3 16 KB * 0x4 32 KB * 0x5 64 KB * 0x6 128 KB * 0x7 256 KB * 0x8 512 KB (Receive Array only) * 0x9 1 MB (Receive Array only) * 0xa 2 MB (Receive Array only) * * 0xB-0xF - reserved (Receive Array only) * * * This routine assumes that the value has already been sanity checked. */ static u32 encoded_size(u32 size) { switch (size) { case 4*1024: return 0x1; case 8*1024: return 0x2; case 16*1024: return 0x3; case 32*1024: return 0x4; case 64*1024: return 0x5; case 128*1024: return 0x6; case 256*1024: return 0x7; case 512*1024: return 0x8; case 1*1024*1024: return 0x9; case 2*1024*1024: return 0xa; } return 0x1; /* if invalid, go with the minimum size */ } void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) { struct hfi1_ctxtdata *rcd; u64 rcvctrl, reg; int did_enable = 0; rcd = dd->rcd[ctxt]; if (!rcd) return; hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op); rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL); /* if the context already enabled, don't do the extra steps */ if ((op & HFI1_RCVCTRL_CTXT_ENB) && !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) { /* reset the tail and hdr addresses, and sequence count */ write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR, rcd->rcvhdrq_phys); if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, rcd->rcvhdrqtailaddr_phys); rcd->seq_cnt = 1; /* reset the cached receive header queue head value */ rcd->head = 0; /* * Zero the receive header queue so we don't get false * positives when checking the sequence number. The * sequence numbers could land exactly on the same spot. * E.g. a rcd restart before the receive header wrapped. */ memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); /* starting timeout */ rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr; /* enable the context */ rcvctrl |= RCV_CTXT_CTRL_ENABLE_SMASK; /* clean the egr buffer size first */ rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK; rcvctrl |= ((u64)encoded_size(rcd->egrbufs.rcvtid_size) & RCV_CTXT_CTRL_EGR_BUF_SIZE_MASK) << RCV_CTXT_CTRL_EGR_BUF_SIZE_SHIFT; /* zero RcvHdrHead - set RcvHdrHead.Counter after enable */ write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0); did_enable = 1; /* zero RcvEgrIndexHead */ write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, 0); /* set eager count and base index */ reg = (((u64)(rcd->egrbufs.alloced >> RCV_SHIFT) & RCV_EGR_CTRL_EGR_CNT_MASK) << RCV_EGR_CTRL_EGR_CNT_SHIFT) | (((rcd->eager_base >> RCV_SHIFT) & RCV_EGR_CTRL_EGR_BASE_INDEX_MASK) << RCV_EGR_CTRL_EGR_BASE_INDEX_SHIFT); write_kctxt_csr(dd, ctxt, RCV_EGR_CTRL, reg); /* * Set TID (expected) count and base index. * rcd->expected_count is set to individual RcvArray entries, * not pairs, and the CSR takes a pair-count in groups of * four, so divide by 8. */ reg = (((rcd->expected_count >> RCV_SHIFT) & RCV_TID_CTRL_TID_PAIR_CNT_MASK) << RCV_TID_CTRL_TID_PAIR_CNT_SHIFT) | (((rcd->expected_base >> RCV_SHIFT) & RCV_TID_CTRL_TID_BASE_INDEX_MASK) << RCV_TID_CTRL_TID_BASE_INDEX_SHIFT); write_kctxt_csr(dd, ctxt, RCV_TID_CTRL, reg); if (ctxt == VL15CTXT) write_csr(dd, RCV_VL15, VL15CTXT); } if (op & HFI1_RCVCTRL_CTXT_DIS) { write_csr(dd, RCV_VL15, 0); rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK; } if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK; if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys) rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_DIS) rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK; if (op & HFI1_RCVCTRL_TIDFLOW_ENB) rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK; if (op & HFI1_RCVCTRL_TIDFLOW_DIS) rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK; if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) { /* In one-packet-per-eager mode, the size comes from the RcvArray entry. */ rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK; rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK; } if (op & HFI1_RCVCTRL_ONE_PKT_EGR_DIS) rcvctrl &= ~RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK; if (op & HFI1_RCVCTRL_NO_RHQ_DROP_ENB) rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK; if (op & HFI1_RCVCTRL_NO_RHQ_DROP_DIS) rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK; if (op & HFI1_RCVCTRL_NO_EGR_DROP_ENB) rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS) rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; rcd->rcvctrl = rcvctrl; hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl); write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl); /* work around sticky RcvCtxtStatus.BlockedRHQFull */ if (did_enable && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) { reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS); if (reg != 0) { dd_dev_info(dd, "ctxt %d status %lld (blocked)\n", ctxt, reg); read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD); write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10); write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00); read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD); reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS); dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n", ctxt, reg, reg == 0 ? "not" : "still"); } } if (did_enable) { /* * The interrupt timeout and count must be set after * the context is enabled to take effect. */ /* set interrupt timeout */ write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT, (u64)rcd->rcvavail_timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT); /* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */ reg = (u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT; write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg); } if (op & (HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS)) /* * If the context has been disabled and the Tail Update has * been cleared, clear the RCV_HDR_TAIL_ADDR CSR so * it doesn't contain an address that is invalid. */ write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, 0); } u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep, u64 **cntrp) { int ret; u64 val = 0; if (namep) { ret = dd->cntrnameslen; if (pos != 0) { dd_dev_err(dd, "read_cntrs does not support indexing"); return 0; } *namep = dd->cntrnames; } else { const struct cntr_entry *entry; int i, j; ret = (dd->ndevcntrs) * sizeof(u64); if (pos != 0) { dd_dev_err(dd, "read_cntrs does not support indexing"); return 0; } /* Get the start of the block of counters */ *cntrp = dd->cntrs; /* * Now go and fill in each counter in the block. */ for (i = 0; i < DEV_CNTR_LAST; i++) { entry = &dev_cntrs[i]; hfi1_cdbg(CNTR, "reading %s", entry->name); if (entry->flags & CNTR_DISABLED) { /* Nothing */ hfi1_cdbg(CNTR, "\tDisabled\n"); } else { if (entry->flags & CNTR_VL) { hfi1_cdbg(CNTR, "\tPer VL\n"); for (j = 0; j < C_VL_COUNT; j++) { val = entry->rw_cntr(entry, dd, j, CNTR_MODE_R, 0); hfi1_cdbg( CNTR, "\t\tRead 0x%llx for %d\n", val, j); dd->cntrs[entry->offset + j] = val; } } else { val = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0); dd->cntrs[entry->offset] = val; hfi1_cdbg(CNTR, "\tRead 0x%llx", val); } } } } return ret; } /* * Used by sysfs to create files for hfi stats to read */ u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port, char **namep, u64 **cntrp) { int ret; u64 val = 0; if (namep) { ret = dd->portcntrnameslen; if (pos != 0) { dd_dev_err(dd, "index not supported"); return 0; } *namep = dd->portcntrnames; } else { const struct cntr_entry *entry; struct hfi1_pportdata *ppd; int i, j; ret = (dd->nportcntrs) * sizeof(u64); if (pos != 0) { dd_dev_err(dd, "indexing not supported"); return 0; } ppd = (struct hfi1_pportdata *)(dd + 1 + port); *cntrp = ppd->cntrs; for (i = 0; i < PORT_CNTR_LAST; i++) { entry = &port_cntrs[i]; hfi1_cdbg(CNTR, "reading %s", entry->name); if (entry->flags & CNTR_DISABLED) { /* Nothing */ hfi1_cdbg(CNTR, "\tDisabled\n"); continue; } if (entry->flags & CNTR_VL) { hfi1_cdbg(CNTR, "\tPer VL"); for (j = 0; j < C_VL_COUNT; j++) { val = entry->rw_cntr(entry, ppd, j, CNTR_MODE_R, 0); hfi1_cdbg( CNTR, "\t\tRead 0x%llx for %d", val, j); ppd->cntrs[entry->offset + j] = val; } } else { val = entry->rw_cntr(entry, ppd, CNTR_INVALID_VL, CNTR_MODE_R, 0); ppd->cntrs[entry->offset] = val; hfi1_cdbg(CNTR, "\tRead 0x%llx", val); } } } return ret; } static void free_cntrs(struct hfi1_devdata *dd) { struct hfi1_pportdata *ppd; int i; if (dd->synth_stats_timer.data) del_timer_sync(&dd->synth_stats_timer); dd->synth_stats_timer.data = 0; ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { kfree(ppd->cntrs); kfree(ppd->scntrs); free_percpu(ppd->ibport_data.rc_acks); free_percpu(ppd->ibport_data.rc_qacks); free_percpu(ppd->ibport_data.rc_delayed_comp); ppd->cntrs = NULL; ppd->scntrs = NULL; ppd->ibport_data.rc_acks = NULL; ppd->ibport_data.rc_qacks = NULL; ppd->ibport_data.rc_delayed_comp = NULL; } kfree(dd->portcntrnames); dd->portcntrnames = NULL; kfree(dd->cntrs); dd->cntrs = NULL; kfree(dd->scntrs); dd->scntrs = NULL; kfree(dd->cntrnames); dd->cntrnames = NULL; } #define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL #define CNTR_32BIT_MAX 0x00000000FFFFFFFF static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry, u64 *psval, void *context, int vl) { u64 val; u64 sval = *psval; if (entry->flags & CNTR_DISABLED) { dd_dev_err(dd, "Counter %s not enabled", entry->name); return 0; } hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval); val = entry->rw_cntr(entry, context, vl, CNTR_MODE_R, 0); /* If its a synthetic counter there is more work we need to do */ if (entry->flags & CNTR_SYNTH) { if (sval == CNTR_MAX) { /* No need to read already saturated */ return CNTR_MAX; } if (entry->flags & CNTR_32BIT) { /* 32bit counters can wrap multiple times */ u64 upper = sval >> 32; u64 lower = (sval << 32) >> 32; if (lower > val) { /* hw wrapped */ if (upper == CNTR_32BIT_MAX) val = CNTR_MAX; else upper++; } if (val != CNTR_MAX) val = (upper << 32) | val; } else { /* If we rolled we are saturated */ if ((val < sval) || (val > CNTR_MAX)) val = CNTR_MAX; } } *psval = val; hfi1_cdbg(CNTR, "\tNew val=0x%llx", val); return val; } static u64 write_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry, u64 *psval, void *context, int vl, u64 data) { u64 val; if (entry->flags & CNTR_DISABLED) { dd_dev_err(dd, "Counter %s not enabled", entry->name); return 0; } hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval); if (entry->flags & CNTR_SYNTH) { *psval = data; if (entry->flags & CNTR_32BIT) { val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, (data << 32) >> 32); val = data; /* return the full 64bit value */ } else { val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, data); } } else { val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, data); } *psval = val; hfi1_cdbg(CNTR, "\tNew val=0x%llx", val); return val; } u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl) { struct cntr_entry *entry; u64 *sval; entry = &dev_cntrs[index]; sval = dd->scntrs + entry->offset; if (vl != CNTR_INVALID_VL) sval += vl; return read_dev_port_cntr(dd, entry, sval, dd, vl); } u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data) { struct cntr_entry *entry; u64 *sval; entry = &dev_cntrs[index]; sval = dd->scntrs + entry->offset; if (vl != CNTR_INVALID_VL) sval += vl; return write_dev_port_cntr(dd, entry, sval, dd, vl, data); } u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl) { struct cntr_entry *entry; u64 *sval; entry = &port_cntrs[index]; sval = ppd->scntrs + entry->offset; if (vl != CNTR_INVALID_VL) sval += vl; if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) && (index <= C_RCV_HDR_OVF_LAST)) { /* We do not want to bother for disabled contexts */ return 0; } return read_dev_port_cntr(ppd->dd, entry, sval, ppd, vl); } u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data) { struct cntr_entry *entry; u64 *sval; entry = &port_cntrs[index]; sval = ppd->scntrs + entry->offset; if (vl != CNTR_INVALID_VL) sval += vl; if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) && (index <= C_RCV_HDR_OVF_LAST)) { /* We do not want to bother for disabled contexts */ return 0; } return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data); } static void update_synth_timer(unsigned long opaque) { u64 cur_tx; u64 cur_rx; u64 total_flits; u8 update = 0; int i, j, vl; struct hfi1_pportdata *ppd; struct cntr_entry *entry; struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; /* * Rather than keep beating on the CSRs pick a minimal set that we can * check to watch for potential roll over. We can do this by looking at * the number of flits sent/recv. If the total flits exceeds 32bits then * we have to iterate all the counters and update. */ entry = &dev_cntrs[C_DC_RCV_FLITS]; cur_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0); entry = &dev_cntrs[C_DC_XMIT_FLITS]; cur_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0); hfi1_cdbg( CNTR, "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n", dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx); if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) { /* * May not be strictly necessary to update but it won't hurt and * simplifies the logic here. */ update = 1; hfi1_cdbg(CNTR, "[%d] Tripwire counter rolled, updating", dd->unit); } else { total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx); hfi1_cdbg(CNTR, "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit, total_flits, (u64)CNTR_32BIT_MAX); if (total_flits >= CNTR_32BIT_MAX) { hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating", dd->unit); update = 1; } } if (update) { hfi1_cdbg(CNTR, "[%d] Updating dd and ppd counters", dd->unit); for (i = 0; i < DEV_CNTR_LAST; i++) { entry = &dev_cntrs[i]; if (entry->flags & CNTR_VL) { for (vl = 0; vl < C_VL_COUNT; vl++) read_dev_cntr(dd, i, vl); } else { read_dev_cntr(dd, i, CNTR_INVALID_VL); } } ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { for (j = 0; j < PORT_CNTR_LAST; j++) { entry = &port_cntrs[j]; if (entry->flags & CNTR_VL) { for (vl = 0; vl < C_VL_COUNT; vl++) read_port_cntr(ppd, j, vl); } else { read_port_cntr(ppd, j, CNTR_INVALID_VL); } } } /* * We want the value in the register. The goal is to keep track * of the number of "ticks" not the counter value. In other * words if the register rolls we want to notice it and go ahead * and force an update. */ entry = &dev_cntrs[C_DC_XMIT_FLITS]; dd->last_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0); entry = &dev_cntrs[C_DC_RCV_FLITS]; dd->last_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0); hfi1_cdbg(CNTR, "[%d] setting last tx/rx to 0x%llx 0x%llx", dd->unit, dd->last_tx, dd->last_rx); } else { hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit); } mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); } #define C_MAX_NAME 13 /* 12 chars + one for /0 */ static int init_cntrs(struct hfi1_devdata *dd) { int i, rcv_ctxts, index, j; size_t sz; char *p; char name[C_MAX_NAME]; struct hfi1_pportdata *ppd; /* set up the stats timer; the add_timer is done at the end */ setup_timer(&dd->synth_stats_timer, update_synth_timer, (unsigned long)dd); /***********************/ /* per device counters */ /***********************/ /* size names and determine how many we have*/ dd->ndevcntrs = 0; sz = 0; index = 0; for (i = 0; i < DEV_CNTR_LAST; i++) { hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name); if (dev_cntrs[i].flags & CNTR_DISABLED) { hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name); continue; } if (dev_cntrs[i].flags & CNTR_VL) { hfi1_dbg_early("\tProcessing VL cntr\n"); dev_cntrs[i].offset = index; for (j = 0; j < C_VL_COUNT; j++) { memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, vl_from_idx(j)); sz += strlen(name); sz++; hfi1_dbg_early("\t\t%s\n", name); dd->ndevcntrs++; index++; } } else { /* +1 for newline */ sz += strlen(dev_cntrs[i].name) + 1; dd->ndevcntrs++; dev_cntrs[i].offset = index; index++; hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name); } } /* allocate space for the counter values */ dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL); if (!dd->cntrs) goto bail; dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL); if (!dd->scntrs) goto bail; /* allocate space for the counter names */ dd->cntrnameslen = sz; dd->cntrnames = kmalloc(sz, GFP_KERNEL); if (!dd->cntrnames) goto bail; /* fill in the names */ for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) { if (dev_cntrs[i].flags & CNTR_DISABLED) { /* Nothing */ } else { if (dev_cntrs[i].flags & CNTR_VL) { for (j = 0; j < C_VL_COUNT; j++) { memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, vl_from_idx(j)); memcpy(p, name, strlen(name)); p += strlen(name); *p++ = '\n'; } } else { memcpy(p, dev_cntrs[i].name, strlen(dev_cntrs[i].name)); p += strlen(dev_cntrs[i].name); *p++ = '\n'; } index++; } } /*********************/ /* per port counters */ /*********************/ /* * Go through the counters for the overflows and disable the ones we * don't need. This varies based on platform so we need to do it * dynamically here. */ rcv_ctxts = dd->num_rcv_contexts; for (i = C_RCV_HDR_OVF_FIRST + rcv_ctxts; i <= C_RCV_HDR_OVF_LAST; i++) { port_cntrs[i].flags |= CNTR_DISABLED; } /* size port counter names and determine how many we have*/ sz = 0; dd->nportcntrs = 0; for (i = 0; i < PORT_CNTR_LAST; i++) { hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name); if (port_cntrs[i].flags & CNTR_DISABLED) { hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name); continue; } if (port_cntrs[i].flags & CNTR_VL) { hfi1_dbg_early("\tProcessing VL cntr\n"); port_cntrs[i].offset = dd->nportcntrs; for (j = 0; j < C_VL_COUNT; j++) { memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", port_cntrs[i].name, vl_from_idx(j)); sz += strlen(name); sz++; hfi1_dbg_early("\t\t%s\n", name); dd->nportcntrs++; } } else { /* +1 for newline */ sz += strlen(port_cntrs[i].name) + 1; port_cntrs[i].offset = dd->nportcntrs; dd->nportcntrs++; hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name); } } /* allocate space for the counter names */ dd->portcntrnameslen = sz; dd->portcntrnames = kmalloc(sz, GFP_KERNEL); if (!dd->portcntrnames) goto bail; /* fill in port cntr names */ for (p = dd->portcntrnames, i = 0; i < PORT_CNTR_LAST; i++) { if (port_cntrs[i].flags & CNTR_DISABLED) continue; if (port_cntrs[i].flags & CNTR_VL) { for (j = 0; j < C_VL_COUNT; j++) { memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", port_cntrs[i].name, vl_from_idx(j)); memcpy(p, name, strlen(name)); p += strlen(name); *p++ = '\n'; } } else { memcpy(p, port_cntrs[i].name, strlen(port_cntrs[i].name)); p += strlen(port_cntrs[i].name); *p++ = '\n'; } } /* allocate per port storage for counter values */ ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { ppd->cntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL); if (!ppd->cntrs) goto bail; ppd->scntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL); if (!ppd->scntrs) goto bail; } /* CPU counters need to be allocated and zeroed */ if (init_cpu_counters(dd)) goto bail; mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); return 0; bail: free_cntrs(dd); return -ENOMEM; } static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate) { switch (chip_lstate) { default: dd_dev_err(dd, "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n", chip_lstate); /* fall through */ case LSTATE_DOWN: return IB_PORT_DOWN; case LSTATE_INIT: return IB_PORT_INIT; case LSTATE_ARMED: return IB_PORT_ARMED; case LSTATE_ACTIVE: return IB_PORT_ACTIVE; } } u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate) { /* look at the HFI meta-states only */ switch (chip_pstate & 0xf0) { default: dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n", chip_pstate); /* fall through */ case PLS_DISABLED: return IB_PORTPHYSSTATE_DISABLED; case PLS_OFFLINE: return OPA_PORTPHYSSTATE_OFFLINE; case PLS_POLLING: return IB_PORTPHYSSTATE_POLLING; case PLS_CONFIGPHY: return IB_PORTPHYSSTATE_TRAINING; case PLS_LINKUP: return IB_PORTPHYSSTATE_LINKUP; case PLS_PHYTEST: return IB_PORTPHYSSTATE_PHY_TEST; } } /* return the OPA port logical state name */ const char *opa_lstate_name(u32 lstate) { static const char * const port_logical_names[] = { "PORT_NOP", "PORT_DOWN", "PORT_INIT", "PORT_ARMED", "PORT_ACTIVE", "PORT_ACTIVE_DEFER", }; if (lstate < ARRAY_SIZE(port_logical_names)) return port_logical_names[lstate]; return "unknown"; } /* return the OPA port physical state name */ const char *opa_pstate_name(u32 pstate) { static const char * const port_physical_names[] = { "PHYS_NOP", "reserved1", "PHYS_POLL", "PHYS_DISABLED", "PHYS_TRAINING", "PHYS_LINKUP", "PHYS_LINK_ERR_RECOVER", "PHYS_PHY_TEST", "reserved8", "PHYS_OFFLINE", "PHYS_GANGED", "PHYS_TEST", }; if (pstate < ARRAY_SIZE(port_physical_names)) return port_physical_names[pstate]; return "unknown"; } /* * Read the hardware link state and set the driver's cached value of it. * Return the (new) current value. */ u32 get_logical_state(struct hfi1_pportdata *ppd) { u32 new_state; new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd)); if (new_state != ppd->lstate) { dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n", opa_lstate_name(new_state), new_state); ppd->lstate = new_state; } /* * Set port status flags in the page mapped into userspace * memory. Do it here to ensure a reliable state - this is * the only function called by all state handling code. * Always set the flags due to the fact that the cache value * might have been changed explicitly outside of this * function. */ if (ppd->statusp) { switch (ppd->lstate) { case IB_PORT_DOWN: case IB_PORT_INIT: *ppd->statusp &= ~(HFI1_STATUS_IB_CONF | HFI1_STATUS_IB_READY); break; case IB_PORT_ARMED: *ppd->statusp |= HFI1_STATUS_IB_CONF; break; case IB_PORT_ACTIVE: *ppd->statusp |= HFI1_STATUS_IB_READY; break; } } return ppd->lstate; } /** * wait_logical_linkstate - wait for an IB link state change to occur * @ppd: port device * @state: the state to wait for * @msecs: the number of milliseconds to wait * * Wait up to msecs milliseconds for IB link state change to occur. * For now, take the easy polling route. * Returns 0 if state reached, otherwise -ETIMEDOUT. */ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs) { unsigned long timeout; timeout = jiffies + msecs_to_jiffies(msecs); while (1) { if (get_logical_state(ppd) == state) return 0; if (time_after(jiffies, timeout)) break; msleep(20); } dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state); return -ETIMEDOUT; } u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) { static u32 remembered_state = 0xff; u32 pstate; u32 ib_pstate; pstate = read_physical_state(ppd->dd); ib_pstate = chip_to_opa_pstate(ppd->dd, pstate); if (remembered_state != ib_pstate) { dd_dev_info(ppd->dd, "%s: physical state changed to %s (0x%x), phy 0x%x\n", __func__, opa_pstate_name(ib_pstate), ib_pstate, pstate); remembered_state = ib_pstate; } return ib_pstate; } /* * Read/modify/write ASIC_QSFP register bits as selected by mask * data: 0 or 1 in the positions depending on what needs to be written * dir: 0 for read, 1 for write * mask: select by setting * I2CCLK (bit 0) * I2CDATA (bit 1) */ u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir, u32 mask) { u64 qsfp_oe, target_oe; target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE; if (mask) { /* We are writing register bits, so lock access */ dir &= mask; data &= mask; qsfp_oe = read_csr(dd, target_oe); qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir; write_csr(dd, target_oe, qsfp_oe); } /* We are exclusively reading bits here, but it is unlikely * we'll get valid data when we set the direction of the pin * in the same call, so read should call this function again * to get valid data */ return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN); } #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) #define SET_STATIC_RATE_CONTROL_SMASK(r) \ (r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) int hfi1_init_ctxt(struct send_context *sc) { if (sc != NULL) { struct hfi1_devdata *dd = sc->dd; u64 reg; u8 set = (sc->type == SC_USER ? HFI1_CAP_IS_USET(STATIC_RATE_CTRL) : HFI1_CAP_IS_KSET(STATIC_RATE_CTRL)); reg = read_kctxt_csr(dd, sc->hw_context, SEND_CTXT_CHECK_ENABLE); if (set) CLEAR_STATIC_RATE_CONTROL_SMASK(reg); else SET_STATIC_RATE_CONTROL_SMASK(reg); write_kctxt_csr(dd, sc->hw_context, SEND_CTXT_CHECK_ENABLE, reg); } return 0; } int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp) { int ret = 0; u64 reg; if (dd->icode != ICODE_RTL_SILICON) { if (HFI1_CAP_IS_KSET(PRINT_UNIMPL)) dd_dev_info(dd, "%s: tempsense not supported by HW\n", __func__); return -EINVAL; } reg = read_csr(dd, ASIC_STS_THERM); temp->curr = ((reg >> ASIC_STS_THERM_CURR_TEMP_SHIFT) & ASIC_STS_THERM_CURR_TEMP_MASK); temp->lo_lim = ((reg >> ASIC_STS_THERM_LO_TEMP_SHIFT) & ASIC_STS_THERM_LO_TEMP_MASK); temp->hi_lim = ((reg >> ASIC_STS_THERM_HI_TEMP_SHIFT) & ASIC_STS_THERM_HI_TEMP_MASK); temp->crit_lim = ((reg >> ASIC_STS_THERM_CRIT_TEMP_SHIFT) & ASIC_STS_THERM_CRIT_TEMP_MASK); /* triggers is a 3-bit value - 1 bit per trigger. */ temp->triggers = (u8)((reg >> ASIC_STS_THERM_LOW_SHIFT) & 0x7); return ret; } /* ========================================================================= */ /* * Enable/disable chip from delivering interrupts. */ void set_intr_state(struct hfi1_devdata *dd, u32 enable) { int i; /* * In HFI, the mask needs to be 1 to allow interrupts. */ if (enable) { u64 cce_int_mask; const int qsfp1_int_smask = QSFP1_INT % 64; const int qsfp2_int_smask = QSFP2_INT % 64; /* enable all interrupts */ for (i = 0; i < CCE_NUM_INT_CSRS; i++) write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0); /* * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR, * therefore just one of QSFP1_INT/QSFP2_INT can be used to find * the index of the appropriate CSR in the CCEIntMask CSR array */ cce_int_mask = read_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64))); if (dd->hfi1_id) { cce_int_mask &= ~((u64)1 << qsfp1_int_smask); write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)), cce_int_mask); } else { cce_int_mask &= ~((u64)1 << qsfp2_int_smask); write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)), cce_int_mask); } } else { for (i = 0; i < CCE_NUM_INT_CSRS; i++) write_csr(dd, CCE_INT_MASK + (8*i), 0ull); } } /* * Clear all interrupt sources on the chip. */ static void clear_all_interrupts(struct hfi1_devdata *dd) { int i; for (i = 0; i < CCE_NUM_INT_CSRS; i++) write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0); write_csr(dd, CCE_ERR_CLEAR, ~(u64)0); write_csr(dd, MISC_ERR_CLEAR, ~(u64)0); write_csr(dd, RCV_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0); for (i = 0; i < dd->chip_send_contexts; i++) write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0); for (i = 0; i < dd->chip_sdma_engines; i++) write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0); write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0); write_csr(dd, DC_LCB_ERR_CLR, ~(u64)0); write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0); } /* Move to pcie.c? */ static void disable_intx(struct pci_dev *pdev) { pci_intx(pdev, 0); } static void clean_up_interrupts(struct hfi1_devdata *dd) { int i; /* remove irqs - must happen before disabling/turning off */ if (dd->num_msix_entries) { /* MSI-X */ struct hfi1_msix_entry *me = dd->msix_entries; for (i = 0; i < dd->num_msix_entries; i++, me++) { if (me->arg == NULL) /* => no irq, no affinity */ break; irq_set_affinity_hint(dd->msix_entries[i].msix.vector, NULL); free_irq(me->msix.vector, me->arg); } } else { /* INTx */ if (dd->requested_intx_irq) { free_irq(dd->pcidev->irq, dd); dd->requested_intx_irq = 0; } } /* turn off interrupts */ if (dd->num_msix_entries) { /* MSI-X */ hfi1_nomsix(dd); } else { /* INTx */ disable_intx(dd->pcidev); } /* clean structures */ for (i = 0; i < dd->num_msix_entries; i++) free_cpumask_var(dd->msix_entries[i].mask); kfree(dd->msix_entries); dd->msix_entries = NULL; dd->num_msix_entries = 0; } /* * Remap the interrupt source from the general handler to the given MSI-X * interrupt. */ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) { u64 reg; int m, n; /* clear from the handled mask of the general interrupt */ m = isrc / 64; n = isrc % 64; dd->gi_mask[m] &= ~((u64)1 << n); /* direct the chip source to the given MSI-X interrupt */ m = isrc / 8; n = isrc % 8; reg = read_csr(dd, CCE_INT_MAP + (8*m)); reg &= ~((u64)0xff << (8*n)); reg |= ((u64)msix_intr & 0xff) << (8*n); write_csr(dd, CCE_INT_MAP + (8*m), reg); } static void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr) { /* * SDMA engine interrupt sources grouped by type, rather than * engine. Per-engine interrupts are as follows: * SDMA * SDMAProgress * SDMAIdle */ remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine, msix_intr); remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine, msix_intr); remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine, msix_intr); } static void remap_receive_available_interrupt(struct hfi1_devdata *dd, int rx, int msix_intr) { remap_intr(dd, IS_RCVAVAIL_START + rx, msix_intr); } static int request_intx_irq(struct hfi1_devdata *dd) { int ret; snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME"_%d", dd->unit); ret = request_irq(dd->pcidev->irq, general_interrupt, IRQF_SHARED, dd->intx_name, dd); if (ret) dd_dev_err(dd, "unable to request INTx interrupt, err %d\n", ret); else dd->requested_intx_irq = 1; return ret; } static int request_msix_irqs(struct hfi1_devdata *dd) { const struct cpumask *local_mask; cpumask_var_t def, rcv; bool def_ret, rcv_ret; int first_general, last_general; int first_sdma, last_sdma; int first_rx, last_rx; int first_cpu, restart_cpu, curr_cpu; int rcv_cpu, sdma_cpu; int i, ret = 0, possible; int ht; /* calculate the ranges we are going to use */ first_general = 0; first_sdma = last_general = first_general + 1; first_rx = last_sdma = first_sdma + dd->num_sdma; last_rx = first_rx + dd->n_krcv_queues; /* * Interrupt affinity. * * non-rcv avail gets a default mask that * starts as possible cpus with threads reset * and each rcv avail reset. * * rcv avail gets node relative 1 wrapping back * to the node relative 1 as necessary. * */ local_mask = cpumask_of_pcibus(dd->pcidev->bus); /* if first cpu is invalid, use NUMA 0 */ if (cpumask_first(local_mask) >= nr_cpu_ids) local_mask = topology_core_cpumask(0); def_ret = zalloc_cpumask_var(&def, GFP_KERNEL); rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL); if (!def_ret || !rcv_ret) goto bail; /* use local mask as default */ cpumask_copy(def, local_mask); possible = cpumask_weight(def); /* disarm threads from default */ ht = cpumask_weight( topology_sibling_cpumask(cpumask_first(local_mask))); for (i = possible/ht; i < possible; i++) cpumask_clear_cpu(i, def); /* reset possible */ possible = cpumask_weight(def); /* def now has full cores on chosen node*/ first_cpu = cpumask_first(def); if (nr_cpu_ids >= first_cpu) first_cpu++; restart_cpu = first_cpu; curr_cpu = restart_cpu; for (i = first_cpu; i < dd->n_krcv_queues + first_cpu; i++) { cpumask_clear_cpu(curr_cpu, def); cpumask_set_cpu(curr_cpu, rcv); if (curr_cpu >= possible) curr_cpu = restart_cpu; else curr_cpu++; } /* def mask has non-rcv, rcv has recv mask */ rcv_cpu = cpumask_first(rcv); sdma_cpu = cpumask_first(def); /* * Sanity check - the code expects all SDMA chip source * interrupts to be in the same CSR, starting at bit 0. Verify * that this is true by checking the bit location of the start. */ BUILD_BUG_ON(IS_SDMA_START % 64); for (i = 0; i < dd->num_msix_entries; i++) { struct hfi1_msix_entry *me = &dd->msix_entries[i]; const char *err_info; irq_handler_t handler; irq_handler_t thread = NULL; void *arg; int idx; struct hfi1_ctxtdata *rcd = NULL; struct sdma_engine *sde = NULL; /* obtain the arguments to request_irq */ if (first_general <= i && i < last_general) { idx = i - first_general; handler = general_interrupt; arg = dd; snprintf(me->name, sizeof(me->name), DRIVER_NAME"_%d", dd->unit); err_info = "general"; } else if (first_sdma <= i && i < last_sdma) { idx = i - first_sdma; sde = &dd->per_sdma[idx]; handler = sdma_interrupt; arg = sde; snprintf(me->name, sizeof(me->name), DRIVER_NAME"_%d sdma%d", dd->unit, idx); err_info = "sdma"; remap_sdma_interrupts(dd, idx, i); } else if (first_rx <= i && i < last_rx) { idx = i - first_rx; rcd = dd->rcd[idx]; /* no interrupt if no rcd */ if (!rcd) continue; /* * Set the interrupt register and mask for this * context's interrupt. */ rcd->ireg = (IS_RCVAVAIL_START+idx) / 64; rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START+idx) % 64); handler = receive_context_interrupt; thread = receive_context_thread; arg = rcd; snprintf(me->name, sizeof(me->name), DRIVER_NAME"_%d kctxt%d", dd->unit, idx); err_info = "receive context"; remap_receive_available_interrupt(dd, idx, i); } else { /* not in our expected range - complain, then ignore it */ dd_dev_err(dd, "Unexpected extra MSI-X interrupt %d\n", i); continue; } /* no argument, no interrupt */ if (arg == NULL) continue; /* make sure the name is terminated */ me->name[sizeof(me->name)-1] = 0; ret = request_threaded_irq(me->msix.vector, handler, thread, 0, me->name, arg); if (ret) { dd_dev_err(dd, "unable to allocate %s interrupt, vector %d, index %d, err %d\n", err_info, me->msix.vector, idx, ret); return ret; } /* * assign arg after request_irq call, so it will be * cleaned up */ me->arg = arg; if (!zalloc_cpumask_var( &dd->msix_entries[i].mask, GFP_KERNEL)) goto bail; if (handler == sdma_interrupt) { dd_dev_info(dd, "sdma engine %d cpu %d\n", sde->this_idx, sdma_cpu); cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask); sdma_cpu = cpumask_next(sdma_cpu, def); if (sdma_cpu >= nr_cpu_ids) sdma_cpu = cpumask_first(def); } else if (handler == receive_context_interrupt) { dd_dev_info(dd, "rcv ctxt %d cpu %d\n", rcd->ctxt, rcv_cpu); cpumask_set_cpu(rcv_cpu, dd->msix_entries[i].mask); rcv_cpu = cpumask_next(rcv_cpu, rcv); if (rcv_cpu >= nr_cpu_ids) rcv_cpu = cpumask_first(rcv); } else { /* otherwise first def */ dd_dev_info(dd, "%s cpu %d\n", err_info, cpumask_first(def)); cpumask_set_cpu( cpumask_first(def), dd->msix_entries[i].mask); } irq_set_affinity_hint( dd->msix_entries[i].msix.vector, dd->msix_entries[i].mask); } out: free_cpumask_var(def); free_cpumask_var(rcv); return ret; bail: ret = -ENOMEM; goto out; } /* * Set the general handler to accept all interrupts, remap all * chip interrupts back to MSI-X 0. */ static void reset_interrupts(struct hfi1_devdata *dd) { int i; /* all interrupts handled by the general handler */ for (i = 0; i < CCE_NUM_INT_CSRS; i++) dd->gi_mask[i] = ~(u64)0; /* all chip interrupts map to MSI-X 0 */ for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++) write_csr(dd, CCE_INT_MAP + (8*i), 0); } static int set_up_interrupts(struct hfi1_devdata *dd) { struct hfi1_msix_entry *entries; u32 total, request; int i, ret; int single_interrupt = 0; /* we expect to have all the interrupts */ /* * Interrupt count: * 1 general, "slow path" interrupt (includes the SDMA engines * slow source, SDMACleanupDone) * N interrupts - one per used SDMA engine * M interrupt - one per kernel receive context */ total = 1 + dd->num_sdma + dd->n_krcv_queues; entries = kcalloc(total, sizeof(*entries), GFP_KERNEL); if (!entries) { ret = -ENOMEM; goto fail; } /* 1-1 MSI-X entry assignment */ for (i = 0; i < total; i++) entries[i].msix.entry = i; /* ask for MSI-X interrupts */ request = total; request_msix(dd, &request, entries); if (request == 0) { /* using INTx */ /* dd->num_msix_entries already zero */ kfree(entries); single_interrupt = 1; dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n"); } else { /* using MSI-X */ dd->num_msix_entries = request; dd->msix_entries = entries; if (request != total) { /* using MSI-X, with reduced interrupts */ dd_dev_err( dd, "cannot handle reduced interrupt case, want %u, got %u\n", total, request); ret = -EINVAL; goto fail; } dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total); } /* mask all interrupts */ set_intr_state(dd, 0); /* clear all pending interrupts */ clear_all_interrupts(dd); /* reset general handler mask, chip MSI-X mappings */ reset_interrupts(dd); if (single_interrupt) ret = request_intx_irq(dd); else ret = request_msix_irqs(dd); if (ret) goto fail; return 0; fail: clean_up_interrupts(dd); return ret; } /* * Set up context values in dd. Sets: * * num_rcv_contexts - number of contexts being used * n_krcv_queues - number of kernel contexts * first_user_ctxt - first non-kernel context in array of contexts * freectxts - number of free user contexts * num_send_contexts - number of PIO send contexts being used */ static int set_up_context_variables(struct hfi1_devdata *dd) { int num_kernel_contexts; int num_user_contexts; int total_contexts; int ret; unsigned ngroups; /* * Kernel contexts: (to be fixed later): * - min or 2 or 1 context/numa * - Context 0 - default/errors * - Context 1 - VL15 */ if (n_krcvqs) num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS; else num_kernel_contexts = num_online_nodes(); num_kernel_contexts = max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts); /* * Every kernel receive context needs an ACK send context. * one send context is allocated for each VL{0-7} and VL15 */ if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) { dd_dev_err(dd, "Reducing # kernel rcv contexts to: %d, from %d\n", (int)(dd->chip_send_contexts - num_vls - 1), (int)num_kernel_contexts); num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; } /* * User contexts: (to be fixed later) * - set to num_rcv_contexts if non-zero * - default to 1 user context per CPU */ if (num_rcv_contexts) num_user_contexts = num_rcv_contexts; else num_user_contexts = num_online_cpus(); total_contexts = num_kernel_contexts + num_user_contexts; /* * Adjust the counts given a global max. */ if (total_contexts > dd->chip_rcv_contexts) { dd_dev_err(dd, "Reducing # user receive contexts to: %d, from %d\n", (int)(dd->chip_rcv_contexts - num_kernel_contexts), (int)num_user_contexts); num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts; /* recalculate */ total_contexts = num_kernel_contexts + num_user_contexts; } /* the first N are kernel contexts, the rest are user contexts */ dd->num_rcv_contexts = total_contexts; dd->n_krcv_queues = num_kernel_contexts; dd->first_user_ctxt = num_kernel_contexts; dd->freectxts = num_user_contexts; dd_dev_info(dd, "rcv contexts: chip %d, used %d (kernel %d, user %d)\n", (int)dd->chip_rcv_contexts, (int)dd->num_rcv_contexts, (int)dd->n_krcv_queues, (int)dd->num_rcv_contexts - dd->n_krcv_queues); /* * Receive array allocation: * All RcvArray entries are divided into groups of 8. This * is required by the hardware and will speed up writes to * consecutive entries by using write-combining of the entire * cacheline. * * The number of groups are evenly divided among all contexts. * any left over groups will be given to the first N user * contexts. */ dd->rcv_entries.group_size = RCV_INCREMENT; ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size; dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts; dd->rcv_entries.nctxt_extra = ngroups - (dd->num_rcv_contexts * dd->rcv_entries.ngroups); dd_dev_info(dd, "RcvArray groups %u, ctxts extra %u\n", dd->rcv_entries.ngroups, dd->rcv_entries.nctxt_extra); if (dd->rcv_entries.ngroups * dd->rcv_entries.group_size > MAX_EAGER_ENTRIES * 2) { dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) / dd->rcv_entries.group_size; dd_dev_info(dd, "RcvArray group count too high, change to %u\n", dd->rcv_entries.ngroups); dd->rcv_entries.nctxt_extra = 0; } /* * PIO send contexts */ ret = init_sc_pools_and_sizes(dd); if (ret >= 0) { /* success */ dd->num_send_contexts = ret; dd_dev_info( dd, "send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n", dd->chip_send_contexts, dd->num_send_contexts, dd->sc_sizes[SC_KERNEL].count, dd->sc_sizes[SC_ACK].count, dd->sc_sizes[SC_USER].count); ret = 0; /* success */ } return ret; } /* * Set the device/port partition key table. The MAD code * will ensure that, at least, the partial management * partition key is present in the table. */ static void set_partition_keys(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; u64 reg = 0; int i; dd_dev_info(dd, "Setting partition keys\n"); for (i = 0; i < hfi1_get_npkeys(dd); i++) { reg |= (ppd->pkeys[i] & RCV_PARTITION_KEY_PARTITION_KEY_A_MASK) << ((i % 4) * RCV_PARTITION_KEY_PARTITION_KEY_B_SHIFT); /* Each register holds 4 PKey values. */ if ((i % 4) == 3) { write_csr(dd, RCV_PARTITION_KEY + ((i - 3) * 2), reg); reg = 0; } } /* Always enable HW pkeys check when pkeys table is set */ add_rcvctrl(dd, RCV_CTRL_RCV_PARTITION_KEY_ENABLE_SMASK); } /* * These CSRs and memories are uninitialized on reset and must be * written before reading to set the ECC/parity bits. * * NOTE: All user context CSRs that are not mmaped write-only * (e.g. the TID flows) must be initialized even if the driver never * reads them. */ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) { int i, j; /* CceIntMap */ for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++) write_csr(dd, CCE_INT_MAP+(8*i), 0); /* SendCtxtCreditReturnAddr */ for (i = 0; i < dd->chip_send_contexts; i++) write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); /* PIO Send buffers */ /* SDMA Send buffers */ /* These are not normally read, and (presently) have no method to be read, so are not pre-initialized */ /* RcvHdrAddr */ /* RcvHdrTailAddr */ /* RcvTidFlowTable */ for (i = 0; i < dd->chip_rcv_contexts; i++) { write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0); write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0); for (j = 0; j < RXE_NUM_TID_FLOWS; j++) write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0); } /* RcvArray */ for (i = 0; i < dd->chip_rcv_array_count; i++) write_csr(dd, RCV_ARRAY + (8*i), RCV_ARRAY_RT_WRITE_ENABLE_SMASK); /* RcvQPMapTable */ for (i = 0; i < 32; i++) write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0); } /* * Use the ctrl_bits in CceCtrl to clear the status_bits in CceStatus. */ static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits, u64 ctrl_bits) { unsigned long timeout; u64 reg; /* is the condition present? */ reg = read_csr(dd, CCE_STATUS); if ((reg & status_bits) == 0) return; /* clear the condition */ write_csr(dd, CCE_CTRL, ctrl_bits); /* wait for the condition to clear */ timeout = jiffies + msecs_to_jiffies(CCE_STATUS_TIMEOUT); while (1) { reg = read_csr(dd, CCE_STATUS); if ((reg & status_bits) == 0) return; if (time_after(jiffies, timeout)) { dd_dev_err(dd, "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n", status_bits, reg & status_bits); return; } udelay(1); } } /* set CCE CSRs to chip reset defaults */ static void reset_cce_csrs(struct hfi1_devdata *dd) { int i; /* CCE_REVISION read-only */ /* CCE_REVISION2 read-only */ /* CCE_CTRL - bits clear automatically */ /* CCE_STATUS read-only, use CceCtrl to clear */ clear_cce_status(dd, ALL_FROZE, CCE_CTRL_SPC_UNFREEZE_SMASK); clear_cce_status(dd, ALL_TXE_PAUSE, CCE_CTRL_TXE_RESUME_SMASK); clear_cce_status(dd, ALL_RXE_PAUSE, CCE_CTRL_RXE_RESUME_SMASK); for (i = 0; i < CCE_NUM_SCRATCH; i++) write_csr(dd, CCE_SCRATCH + (8 * i), 0); /* CCE_ERR_STATUS read-only */ write_csr(dd, CCE_ERR_MASK, 0); write_csr(dd, CCE_ERR_CLEAR, ~0ull); /* CCE_ERR_FORCE leave alone */ for (i = 0; i < CCE_NUM_32_BIT_COUNTERS; i++) write_csr(dd, CCE_COUNTER_ARRAY32 + (8 * i), 0); write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_RESETCSR); /* CCE_PCIE_CTRL leave alone */ for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) { write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0); write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i), CCE_MSIX_TABLE_UPPER_RESETCSR); } for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) { /* CCE_MSIX_PBA read-only */ write_csr(dd, CCE_MSIX_INT_GRANTED, ~0ull); write_csr(dd, CCE_MSIX_VEC_CLR_WITHOUT_INT, ~0ull); } for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++) write_csr(dd, CCE_INT_MAP, 0); for (i = 0; i < CCE_NUM_INT_CSRS; i++) { /* CCE_INT_STATUS read-only */ write_csr(dd, CCE_INT_MASK + (8 * i), 0); write_csr(dd, CCE_INT_CLEAR + (8 * i), ~0ull); /* CCE_INT_FORCE leave alone */ /* CCE_INT_BLOCKED read-only */ } for (i = 0; i < CCE_NUM_32_BIT_INT_COUNTERS; i++) write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0); } /* set ASIC CSRs to chip reset defaults */ static void reset_asic_csrs(struct hfi1_devdata *dd) { int i; /* * If the HFIs are shared between separate nodes or VMs, * then more will need to be done here. One idea is a module * parameter that returns early, letting the first power-on or * a known first load do the reset and blocking all others. */ if (!(dd->flags & HFI1_DO_INIT_ASIC)) return; if (dd->icode != ICODE_FPGA_EMULATION) { /* emulation does not have an SBus - leave these alone */ /* * All writes to ASIC_CFG_SBUS_REQUEST do something. * Notes: * o The reset is not zero if aimed at the core. See the * SBus documentation for details. * o If the SBus firmware has been updated (e.g. by the BIOS), * will the reset revert that? */ /* ASIC_CFG_SBUS_REQUEST leave alone */ write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0); } /* ASIC_SBUS_RESULT read-only */ write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0); for (i = 0; i < ASIC_NUM_SCRATCH; i++) write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0); write_csr(dd, ASIC_CFG_MUTEX, 0); /* this will clear it */ /* We might want to retain this state across FLR if we ever use it */ write_csr(dd, ASIC_CFG_DRV_STR, 0); write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0); /* ASIC_STS_THERM read-only */ /* ASIC_CFG_RESET leave alone */ write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0); /* ASIC_PCIE_SD_HOST_STATUS read-only */ write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0); write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0); /* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */ write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */ /* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */ /* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */ for (i = 0; i < 16; i++) write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0); /* ASIC_GPIO_IN read-only */ write_csr(dd, ASIC_GPIO_OE, 0); write_csr(dd, ASIC_GPIO_INVERT, 0); write_csr(dd, ASIC_GPIO_OUT, 0); write_csr(dd, ASIC_GPIO_MASK, 0); /* ASIC_GPIO_STATUS read-only */ write_csr(dd, ASIC_GPIO_CLEAR, ~0ull); /* ASIC_GPIO_FORCE leave alone */ /* ASIC_QSFP1_IN read-only */ write_csr(dd, ASIC_QSFP1_OE, 0); write_csr(dd, ASIC_QSFP1_INVERT, 0); write_csr(dd, ASIC_QSFP1_OUT, 0); write_csr(dd, ASIC_QSFP1_MASK, 0); /* ASIC_QSFP1_STATUS read-only */ write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull); /* ASIC_QSFP1_FORCE leave alone */ /* ASIC_QSFP2_IN read-only */ write_csr(dd, ASIC_QSFP2_OE, 0); write_csr(dd, ASIC_QSFP2_INVERT, 0); write_csr(dd, ASIC_QSFP2_OUT, 0); write_csr(dd, ASIC_QSFP2_MASK, 0); /* ASIC_QSFP2_STATUS read-only */ write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull); /* ASIC_QSFP2_FORCE leave alone */ write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR); /* this also writes a NOP command, clearing paging mode */ write_csr(dd, ASIC_EEP_ADDR_CMD, 0); write_csr(dd, ASIC_EEP_DATA, 0); } /* set MISC CSRs to chip reset defaults */ static void reset_misc_csrs(struct hfi1_devdata *dd) { int i; for (i = 0; i < 32; i++) { write_csr(dd, MISC_CFG_RSA_R2 + (8 * i), 0); write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0); write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0); } /* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can only be written 128-byte chunks */ /* init RSA engine to clear lingering errors */ write_csr(dd, MISC_CFG_RSA_CMD, 1); write_csr(dd, MISC_CFG_RSA_MU, 0); write_csr(dd, MISC_CFG_FW_CTRL, 0); /* MISC_STS_8051_DIGEST read-only */ /* MISC_STS_SBM_DIGEST read-only */ /* MISC_STS_PCIE_DIGEST read-only */ /* MISC_STS_FAB_DIGEST read-only */ /* MISC_ERR_STATUS read-only */ write_csr(dd, MISC_ERR_MASK, 0); write_csr(dd, MISC_ERR_CLEAR, ~0ull); /* MISC_ERR_FORCE leave alone */ } /* set TXE CSRs to chip reset defaults */ static void reset_txe_csrs(struct hfi1_devdata *dd) { int i; /* * TXE Kernel CSRs */ write_csr(dd, SEND_CTRL, 0); __cm_reset(dd, 0); /* reset CM internal state */ /* SEND_CONTEXTS read-only */ /* SEND_DMA_ENGINES read-only */ /* SEND_PIO_MEM_SIZE read-only */ /* SEND_DMA_MEM_SIZE read-only */ write_csr(dd, SEND_HIGH_PRIORITY_LIMIT, 0); pio_reset_all(dd); /* SEND_PIO_INIT_CTXT */ /* SEND_PIO_ERR_STATUS read-only */ write_csr(dd, SEND_PIO_ERR_MASK, 0); write_csr(dd, SEND_PIO_ERR_CLEAR, ~0ull); /* SEND_PIO_ERR_FORCE leave alone */ /* SEND_DMA_ERR_STATUS read-only */ write_csr(dd, SEND_DMA_ERR_MASK, 0); write_csr(dd, SEND_DMA_ERR_CLEAR, ~0ull); /* SEND_DMA_ERR_FORCE leave alone */ /* SEND_EGRESS_ERR_STATUS read-only */ write_csr(dd, SEND_EGRESS_ERR_MASK, 0); write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~0ull); /* SEND_EGRESS_ERR_FORCE leave alone */ write_csr(dd, SEND_BTH_QP, 0); write_csr(dd, SEND_STATIC_RATE_CONTROL, 0); write_csr(dd, SEND_SC2VLT0, 0); write_csr(dd, SEND_SC2VLT1, 0); write_csr(dd, SEND_SC2VLT2, 0); write_csr(dd, SEND_SC2VLT3, 0); write_csr(dd, SEND_LEN_CHECK0, 0); write_csr(dd, SEND_LEN_CHECK1, 0); /* SEND_ERR_STATUS read-only */ write_csr(dd, SEND_ERR_MASK, 0); write_csr(dd, SEND_ERR_CLEAR, ~0ull); /* SEND_ERR_FORCE read-only */ for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++) write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0); for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++) write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0); for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++) write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0); for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++) write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0); for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++) write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0); write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR); write_csr(dd, SEND_CM_GLOBAL_CREDIT, SEND_CM_GLOBAL_CREDIT_RESETCSR); /* SEND_CM_CREDIT_USED_STATUS read-only */ write_csr(dd, SEND_CM_TIMER_CTRL, 0); write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0); write_csr(dd, SEND_CM_LOCAL_AU_TABLE4_TO7, 0); write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0); write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0); for (i = 0; i < TXE_NUM_DATA_VL; i++) write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0); write_csr(dd, SEND_CM_CREDIT_VL15, 0); /* SEND_CM_CREDIT_USED_VL read-only */ /* SEND_CM_CREDIT_USED_VL15 read-only */ /* SEND_EGRESS_CTXT_STATUS read-only */ /* SEND_EGRESS_SEND_DMA_STATUS read-only */ write_csr(dd, SEND_EGRESS_ERR_INFO, ~0ull); /* SEND_EGRESS_ERR_INFO read-only */ /* SEND_EGRESS_ERR_SOURCE read-only */ /* * TXE Per-Context CSRs */ for (i = 0; i < dd->chip_send_contexts; i++) { write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0); write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_FORCE, 0); write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, 0); write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~0ull); write_kctxt_csr(dd, i, SEND_CTXT_CHECK_ENABLE, 0); write_kctxt_csr(dd, i, SEND_CTXT_CHECK_VL, 0); write_kctxt_csr(dd, i, SEND_CTXT_CHECK_JOB_KEY, 0); write_kctxt_csr(dd, i, SEND_CTXT_CHECK_PARTITION_KEY, 0); write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, 0); write_kctxt_csr(dd, i, SEND_CTXT_CHECK_OPCODE, 0); } /* * TXE Per-SDMA CSRs */ for (i = 0; i < dd->chip_sdma_engines; i++) { write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); /* SEND_DMA_STATUS read-only */ write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0); write_kctxt_csr(dd, i, SEND_DMA_LEN_GEN, 0); write_kctxt_csr(dd, i, SEND_DMA_TAIL, 0); /* SEND_DMA_HEAD read-only */ write_kctxt_csr(dd, i, SEND_DMA_HEAD_ADDR, 0); write_kctxt_csr(dd, i, SEND_DMA_PRIORITY_THLD, 0); /* SEND_DMA_IDLE_CNT read-only */ write_kctxt_csr(dd, i, SEND_DMA_RELOAD_CNT, 0); write_kctxt_csr(dd, i, SEND_DMA_DESC_CNT, 0); /* SEND_DMA_DESC_FETCHED_CNT read-only */ /* SEND_DMA_ENG_ERR_STATUS read-only */ write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, 0); write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~0ull); /* SEND_DMA_ENG_ERR_FORCE leave alone */ write_kctxt_csr(dd, i, SEND_DMA_CHECK_ENABLE, 0); write_kctxt_csr(dd, i, SEND_DMA_CHECK_VL, 0); write_kctxt_csr(dd, i, SEND_DMA_CHECK_JOB_KEY, 0); write_kctxt_csr(dd, i, SEND_DMA_CHECK_PARTITION_KEY, 0); write_kctxt_csr(dd, i, SEND_DMA_CHECK_SLID, 0); write_kctxt_csr(dd, i, SEND_DMA_CHECK_OPCODE, 0); write_kctxt_csr(dd, i, SEND_DMA_MEMORY, 0); } } /* * Expect on entry: * o Packet ingress is disabled, i.e. RcvCtrl.RcvPortEnable == 0 */ static void init_rbufs(struct hfi1_devdata *dd) { u64 reg; int count; /* * Wait for DMA to stop: RxRbufPktPending and RxPktInProgress are * clear. */ count = 0; while (1) { reg = read_csr(dd, RCV_STATUS); if ((reg & (RCV_STATUS_RX_RBUF_PKT_PENDING_SMASK | RCV_STATUS_RX_PKT_IN_PROGRESS_SMASK)) == 0) break; /* * Give up after 1ms - maximum wait time. * * RBuf size is 148KiB. Slowest possible is PCIe Gen1 x1 at * 250MB/s bandwidth. Lower rate to 66% for overhead to get: * 148 KB / (66% * 250MB/s) = 920us */ if (count++ > 500) { dd_dev_err(dd, "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n", __func__, reg); break; } udelay(2); /* do not busy-wait the CSR */ } /* start the init - expect RcvCtrl to be 0 */ write_csr(dd, RCV_CTRL, RCV_CTRL_RX_RBUF_INIT_SMASK); /* * Read to force the write of Rcvtrl.RxRbufInit. There is a brief * period after the write before RcvStatus.RxRbufInitDone is valid. * The delay in the first run through the loop below is sufficient and * required before the first read of RcvStatus.RxRbufInintDone. */ read_csr(dd, RCV_CTRL); /* wait for the init to finish */ count = 0; while (1) { /* delay is required first time through - see above */ udelay(2); /* do not busy-wait the CSR */ reg = read_csr(dd, RCV_STATUS); if (reg & (RCV_STATUS_RX_RBUF_INIT_DONE_SMASK)) break; /* give up after 100us - slowest possible at 33MHz is 73us */ if (count++ > 50) { dd_dev_err(dd, "%s: RcvStatus.RxRbufInit not set, continuing\n", __func__); break; } } } /* set RXE CSRs to chip reset defaults */ static void reset_rxe_csrs(struct hfi1_devdata *dd) { int i, j; /* * RXE Kernel CSRs */ write_csr(dd, RCV_CTRL, 0); init_rbufs(dd); /* RCV_STATUS read-only */ /* RCV_CONTEXTS read-only */ /* RCV_ARRAY_CNT read-only */ /* RCV_BUF_SIZE read-only */ write_csr(dd, RCV_BTH_QP, 0); write_csr(dd, RCV_MULTICAST, 0); write_csr(dd, RCV_BYPASS, 0); write_csr(dd, RCV_VL15, 0); /* this is a clear-down */ write_csr(dd, RCV_ERR_INFO, RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK); /* RCV_ERR_STATUS read-only */ write_csr(dd, RCV_ERR_MASK, 0); write_csr(dd, RCV_ERR_CLEAR, ~0ull); /* RCV_ERR_FORCE leave alone */ for (i = 0; i < 32; i++) write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0); for (i = 0; i < 4; i++) write_csr(dd, RCV_PARTITION_KEY + (8 * i), 0); for (i = 0; i < RXE_NUM_32_BIT_COUNTERS; i++) write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0); for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++) write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0); for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) { write_csr(dd, RCV_RSM_CFG + (8 * i), 0); write_csr(dd, RCV_RSM_SELECT + (8 * i), 0); write_csr(dd, RCV_RSM_MATCH + (8 * i), 0); } for (i = 0; i < 32; i++) write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0); /* * RXE Kernel and User Per-Context CSRs */ for (i = 0; i < dd->chip_rcv_contexts; i++) { /* kernel */ write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0); /* RCV_CTXT_STATUS read-only */ write_kctxt_csr(dd, i, RCV_EGR_CTRL, 0); write_kctxt_csr(dd, i, RCV_TID_CTRL, 0); write_kctxt_csr(dd, i, RCV_KEY_CTRL, 0); write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0); write_kctxt_csr(dd, i, RCV_HDR_CNT, 0); write_kctxt_csr(dd, i, RCV_HDR_ENT_SIZE, 0); write_kctxt_csr(dd, i, RCV_HDR_SIZE, 0); write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0); write_kctxt_csr(dd, i, RCV_AVAIL_TIME_OUT, 0); write_kctxt_csr(dd, i, RCV_HDR_OVFL_CNT, 0); /* user */ /* RCV_HDR_TAIL read-only */ write_uctxt_csr(dd, i, RCV_HDR_HEAD, 0); /* RCV_EGR_INDEX_TAIL read-only */ write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0); /* RCV_EGR_OFFSET_TAIL read-only */ for (j = 0; j < RXE_NUM_TID_FLOWS; j++) { write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j), 0); } } } /* * Set sc2vl tables. * * They power on to zeros, so to avoid send context errors * they need to be set: * * SC 0-7 -> VL 0-7 (respectively) * SC 15 -> VL 15 * otherwise * -> VL 0 */ static void init_sc2vl_tables(struct hfi1_devdata *dd) { int i; /* init per architecture spec, constrained by hardware capability */ /* HFI maps sent packets */ write_csr(dd, SEND_SC2VLT0, SC2VL_VAL( 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7)); write_csr(dd, SEND_SC2VLT1, SC2VL_VAL( 1, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 15)); write_csr(dd, SEND_SC2VLT2, SC2VL_VAL( 2, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0)); write_csr(dd, SEND_SC2VLT3, SC2VL_VAL( 3, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, 0)); /* DC maps received packets */ write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, DC_SC_VL_VAL( 15_0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 15)); write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, DC_SC_VL_VAL( 31_16, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, 0)); /* initialize the cached sc2vl values consistently with h/w */ for (i = 0; i < 32; i++) { if (i < 8 || i == 15) *((u8 *)(dd->sc2vl) + i) = (u8)i; else *((u8 *)(dd->sc2vl) + i) = 0; } } /* * Read chip sizes and then reset parts to sane, disabled, values. We cannot * depend on the chip going through a power-on reset - a driver may be loaded * and unloaded many times. * * Do not write any CSR values to the chip in this routine - there may be * a reset following the (possible) FLR in this routine. * */ static void init_chip(struct hfi1_devdata *dd) { int i; /* * Put the HFI CSRs in a known state. * Combine this with a DC reset. * * Stop the device from doing anything while we do a * reset. We know there are no other active users of * the device since we are now in charge. Turn off * off all outbound and inbound traffic and make sure * the device does not generate any interrupts. */ /* disable send contexts and SDMA engines */ write_csr(dd, SEND_CTRL, 0); for (i = 0; i < dd->chip_send_contexts; i++) write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); for (i = 0; i < dd->chip_sdma_engines; i++) write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); /* disable port (turn off RXE inbound traffic) and contexts */ write_csr(dd, RCV_CTRL, 0); for (i = 0; i < dd->chip_rcv_contexts; i++) write_csr(dd, RCV_CTXT_CTRL, 0); /* mask all interrupt sources */ for (i = 0; i < CCE_NUM_INT_CSRS; i++) write_csr(dd, CCE_INT_MASK + (8*i), 0ull); /* * DC Reset: do a full DC reset before the register clear. * A recommended length of time to hold is one CSR read, * so reread the CceDcCtrl. Then, hold the DC in reset * across the clear. */ write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK); (void) read_csr(dd, CCE_DC_CTRL); if (use_flr) { /* * A FLR will reset the SPC core and part of the PCIe. * The parts that need to be restored have already been * saved. */ dd_dev_info(dd, "Resetting CSRs with FLR\n"); /* do the FLR, the DC reset will remain */ hfi1_pcie_flr(dd); /* restore command and BARs */ restore_pci_variables(dd); if (is_a0(dd)) { dd_dev_info(dd, "Resetting CSRs with FLR\n"); hfi1_pcie_flr(dd); restore_pci_variables(dd); } reset_asic_csrs(dd); } else { dd_dev_info(dd, "Resetting CSRs with writes\n"); reset_cce_csrs(dd); reset_txe_csrs(dd); reset_rxe_csrs(dd); reset_asic_csrs(dd); reset_misc_csrs(dd); } /* clear the DC reset */ write_csr(dd, CCE_DC_CTRL, 0); /* Set the LED off */ if (is_a0(dd)) setextled(dd, 0); /* * Clear the QSFP reset. * A0 leaves the out lines floating on power on, then on an FLR * enforces a 0 on all out pins. The driver does not touch * ASIC_QSFPn_OUT otherwise. This leaves RESET_N low and * anything plugged constantly in reset, if it pays attention * to RESET_N. * A prime example of this is SiPh. For now, set all pins high. * I2CCLK and I2CDAT will change per direction, and INT_N and * MODPRS_N are input only and their value is ignored. */ if (is_a0(dd)) { write_csr(dd, ASIC_QSFP1_OUT, 0x1f); write_csr(dd, ASIC_QSFP2_OUT, 0x1f); } } static void init_early_variables(struct hfi1_devdata *dd) { int i; /* assign link credit variables */ dd->vau = CM_VAU; dd->link_credits = CM_GLOBAL_CREDITS; if (is_a0(dd)) dd->link_credits--; dd->vcu = cu_to_vcu(hfi1_cu); /* enough room for 8 MAD packets plus header - 17K */ dd->vl15_init = (8 * (2048 + 128)) / vau_to_au(dd->vau); if (dd->vl15_init > dd->link_credits) dd->vl15_init = dd->link_credits; write_uninitialized_csrs_and_memories(dd); if (HFI1_CAP_IS_KSET(PKEY_CHECK)) for (i = 0; i < dd->num_pports; i++) { struct hfi1_pportdata *ppd = &dd->pport[i]; set_partition_keys(ppd); } init_sc2vl_tables(dd); } static void init_kdeth_qp(struct hfi1_devdata *dd) { /* user changed the KDETH_QP */ if (kdeth_qp != 0 && kdeth_qp >= 0xff) { /* out of range or illegal value */ dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring"); kdeth_qp = 0; } if (kdeth_qp == 0) /* not set, or failed range check */ kdeth_qp = DEFAULT_KDETH_QP; write_csr(dd, SEND_BTH_QP, (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) << SEND_BTH_QP_KDETH_QP_SHIFT); write_csr(dd, RCV_BTH_QP, (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) << RCV_BTH_QP_KDETH_QP_SHIFT); } /** * init_qpmap_table * @dd - device data * @first_ctxt - first context * @last_ctxt - first context * * This return sets the qpn mapping table that * is indexed by qpn[8:1]. * * The routine will round robin the 256 settings * from first_ctxt to last_ctxt. * * The first/last looks ahead to having specialized * receive contexts for mgmt and bypass. Normal * verbs traffic will assumed to be on a range * of receive contexts. */ static void init_qpmap_table(struct hfi1_devdata *dd, u32 first_ctxt, u32 last_ctxt) { u64 reg = 0; u64 regno = RCV_QP_MAP_TABLE; int i; u64 ctxt = first_ctxt; for (i = 0; i < 256;) { if (ctxt == VL15CTXT) { ctxt++; if (ctxt > last_ctxt) ctxt = first_ctxt; continue; } reg |= ctxt << (8 * (i % 8)); i++; ctxt++; if (ctxt > last_ctxt) ctxt = first_ctxt; if (i % 8 == 0) { write_csr(dd, regno, reg); reg = 0; regno += 8; } } if (i % 8) write_csr(dd, regno, reg); add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK | RCV_CTRL_RCV_BYPASS_ENABLE_SMASK); } /** * init_qos - init RX qos * @dd - device data * @first_context * * This routine initializes Rule 0 and the * RSM map table to implement qos. * * If all of the limit tests succeed, * qos is applied based on the array * interpretation of krcvqs where * entry 0 is VL0. * * The number of vl bits (n) and the number of qpn * bits (m) are computed to feed both the RSM map table * and the single rule. * */ static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt) { u8 max_by_vl = 0; unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m; u64 *rsmmap; u64 reg; u8 rxcontext = is_a0(dd) ? 0 : 0xff; /* 0 is default if a0 ver. */ /* validate */ if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS || num_vls == 1 || krcvqsset <= 1) goto bail; for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++) if (krcvqs[i] > max_by_vl) max_by_vl = krcvqs[i]; if (max_by_vl > 32) goto bail; qpns_per_vl = __roundup_pow_of_two(max_by_vl); /* determine bits vl */ n = ilog2(num_vls); /* determine bits for qpn */ m = ilog2(qpns_per_vl); if ((m + n) > 7) goto bail; if (num_vls * qpns_per_vl > dd->chip_rcv_contexts) goto bail; rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL); memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64)); /* init the local copy of the table */ for (i = 0, ctxt = first_ctxt; i < num_vls; i++) { unsigned tctxt; for (qpn = 0, tctxt = ctxt; krcvqs[i] && qpn < qpns_per_vl; qpn++) { unsigned idx, regoff, regidx; /* generate index <= 128 */ idx = (qpn << n) ^ i; regoff = (idx % 8) * 8; regidx = idx / 8; reg = rsmmap[regidx]; /* replace 0xff with context number */ reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK << regoff); reg |= (u64)(tctxt++) << regoff; rsmmap[regidx] = reg; if (tctxt == ctxt + krcvqs[i]) tctxt = ctxt; } ctxt += krcvqs[i]; } /* flush cached copies to chip */ for (i = 0; i < NUM_MAP_REGS; i++) write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]); /* add rule0 */ write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */, RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK << RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT | 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT); write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */, LRH_BTH_MATCH_OFFSET << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT | LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT | LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT | ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT | QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT | ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT); write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */, LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT | LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT | LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT | LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT); /* Enable RSM */ add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); kfree(rsmmap); /* map everything else (non-VL15) to context 0 */ init_qpmap_table( dd, 0, 0); dd->qos_shift = n + 1; return; bail: dd->qos_shift = 1; init_qpmap_table( dd, dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0, dd->n_krcv_queues - 1); } static void init_rxe(struct hfi1_devdata *dd) { /* enable all receive errors */ write_csr(dd, RCV_ERR_MASK, ~0ull); /* setup QPN map table - start where VL15 context leaves off */ init_qos( dd, dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0); /* * make sure RcvCtrl.RcvWcb <= PCIe Device Control * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config * space, PciCfgCap2.MaxPayloadSize in HFI). There is only one * invalid configuration: RcvCtrl.RcvWcb set to its max of 256 and * Max_PayLoad_Size set to its minimum of 128. * * Presently, RcvCtrl.RcvWcb is not modified from its default of 0 * (64 bytes). Max_Payload_Size is possibly modified upward in * tune_pcie_caps() which is called after this routine. */ } static void init_other(struct hfi1_devdata *dd) { /* enable all CCE errors */ write_csr(dd, CCE_ERR_MASK, ~0ull); /* enable *some* Misc errors */ write_csr(dd, MISC_ERR_MASK, DRIVER_MISC_MASK); /* enable all DC errors, except LCB */ write_csr(dd, DCC_ERR_FLG_EN, ~0ull); write_csr(dd, DC_DC8051_ERR_EN, ~0ull); } /* * Fill out the given AU table using the given CU. A CU is defined in terms * AUs. The table is a an encoding: given the index, how many AUs does that * represent? * * NOTE: Assumes that the register layout is the same for the * local and remote tables. */ static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu, u32 csr0to3, u32 csr4to7) { write_csr(dd, csr0to3, 0ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT | 1ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT | 2ull * cu << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT | 4ull * cu << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT); write_csr(dd, csr4to7, 8ull * cu << SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT | 16ull * cu << SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT | 32ull * cu << SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT | 64ull * cu << SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT); } static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu) { assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3, SEND_CM_LOCAL_AU_TABLE4_TO7); } void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu) { assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3, SEND_CM_REMOTE_AU_TABLE4_TO7); } static void init_txe(struct hfi1_devdata *dd) { int i; /* enable all PIO, SDMA, general, and Egress errors */ write_csr(dd, SEND_PIO_ERR_MASK, ~0ull); write_csr(dd, SEND_DMA_ERR_MASK, ~0ull); write_csr(dd, SEND_ERR_MASK, ~0ull); write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull); /* enable all per-context and per-SDMA engine errors */ for (i = 0; i < dd->chip_send_contexts; i++) write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull); for (i = 0; i < dd->chip_sdma_engines; i++) write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull); /* set the local CU to AU mapping */ assign_local_cm_au_table(dd, dd->vcu); /* * Set reasonable default for Credit Return Timer * Don't set on Simulator - causes it to choke. */ if (dd->icode != ICODE_FUNCTIONAL_SIMULATOR) write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE); } int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey) { struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; unsigned sctxt; int ret = 0; u64 reg; if (!rcd || !rcd->sc) { ret = -EINVAL; goto done; } sctxt = rcd->sc->hw_context; reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */ ((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) << SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT); /* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */ if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY)) reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK; write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg); /* * Enable send-side J_KEY integrity check, unless this is A0 h/w * (due to A0 erratum). */ if (!is_a0(dd)) { reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); } /* Enable J_KEY check on receive context. */ reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK | ((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) << RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT); write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg); done: return ret; } int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt) { struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; unsigned sctxt; int ret = 0; u64 reg; if (!rcd || !rcd->sc) { ret = -EINVAL; goto done; } sctxt = rcd->sc->hw_context; write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0); /* * Disable send-side J_KEY integrity check, unless this is A0 h/w. * This check would not have been enabled for A0 h/w, see * set_ctxt_jkey(). */ if (!is_a0(dd)) { reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); } /* Turn off the J_KEY on the receive side */ write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0); done: return ret; } int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey) { struct hfi1_ctxtdata *rcd; unsigned sctxt; int ret = 0; u64 reg; if (ctxt < dd->num_rcv_contexts) rcd = dd->rcd[ctxt]; else { ret = -EINVAL; goto done; } if (!rcd || !rcd->sc) { ret = -EINVAL; goto done; } sctxt = rcd->sc->hw_context; reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) << SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT; write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg); reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); done: return ret; } int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt) { struct hfi1_ctxtdata *rcd; unsigned sctxt; int ret = 0; u64 reg; if (ctxt < dd->num_rcv_contexts) rcd = dd->rcd[ctxt]; else { ret = -EINVAL; goto done; } if (!rcd || !rcd->sc) { ret = -EINVAL; goto done; } sctxt = rcd->sc->hw_context; reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0); done: return ret; } /* * Start doing the clean up the the chip. Our clean up happens in multiple * stages and this is just the first. */ void hfi1_start_cleanup(struct hfi1_devdata *dd) { free_cntrs(dd); free_rcverr(dd); clean_up_interrupts(dd); } #define HFI_BASE_GUID(dev) \ ((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT)) /* * Certain chip functions need to be initialized only once per asic * instead of per-device. This function finds the peer device and * checks whether that chip initialization needs to be done by this * device. */ static void asic_should_init(struct hfi1_devdata *dd) { unsigned long flags; struct hfi1_devdata *tmp, *peer = NULL; spin_lock_irqsave(&hfi1_devs_lock, flags); /* Find our peer device */ list_for_each_entry(tmp, &hfi1_dev_list, list) { if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) && dd->unit != tmp->unit) { peer = tmp; break; } } /* * "Claim" the ASIC for initialization if it hasn't been " "claimed" yet. */ if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC)) dd->flags |= HFI1_DO_INIT_ASIC; spin_unlock_irqrestore(&hfi1_devs_lock, flags); } /** * Allocate and initialize the device structure for the hfi. * @dev: the pci_dev for hfi1_ib device * @ent: pci_device_id struct for this dev * * Also allocates, initializes, and returns the devdata struct for this * device instance * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. */ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, const struct pci_device_id *ent) { struct hfi1_devdata *dd; struct hfi1_pportdata *ppd; u64 reg; int i, ret; static const char * const inames[] = { /* implementation names */ "RTL silicon", "RTL VCS simulation", "RTL FPGA emulation", "Functional simulator" }; dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * sizeof(struct hfi1_pportdata)); if (IS_ERR(dd)) goto bail; ppd = dd->pport; for (i = 0; i < dd->num_pports; i++, ppd++) { int vl; /* init common fields */ hfi1_init_pportdata(pdev, ppd, dd, 0, 1); /* DC supports 4 link widths */ ppd->link_width_supported = OPA_LINK_WIDTH_1X | OPA_LINK_WIDTH_2X | OPA_LINK_WIDTH_3X | OPA_LINK_WIDTH_4X; ppd->link_width_downgrade_supported = ppd->link_width_supported; /* start out enabling only 4X */ ppd->link_width_enabled = OPA_LINK_WIDTH_4X; ppd->link_width_downgrade_enabled = ppd->link_width_downgrade_supported; /* link width active is 0 when link is down */ /* link width downgrade active is 0 when link is down */ if (num_vls < HFI1_MIN_VLS_SUPPORTED || num_vls > HFI1_MAX_VLS_SUPPORTED) { hfi1_early_err(&pdev->dev, "Invalid num_vls %u, using %u VLs\n", num_vls, HFI1_MAX_VLS_SUPPORTED); num_vls = HFI1_MAX_VLS_SUPPORTED; } ppd->vls_supported = num_vls; ppd->vls_operational = ppd->vls_supported; /* Set the default MTU. */ for (vl = 0; vl < num_vls; vl++) dd->vld[vl].mtu = hfi1_max_mtu; dd->vld[15].mtu = MAX_MAD_PACKET; /* * Set the initial values to reasonable default, will be set * for real when link is up. */ ppd->lstate = IB_PORT_DOWN; ppd->overrun_threshold = 0x4; ppd->phy_error_threshold = 0xf; ppd->port_crc_mode_enabled = link_crc_mask; /* initialize supported LTP CRC mode */ ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8; /* initialize enabled LTP CRC mode */ ppd->port_ltp_crc_mode |= cap_to_port_ltp(link_crc_mask) << 4; /* start in offline */ ppd->host_link_state = HLS_DN_OFFLINE; init_vl_arb_caches(ppd); } dd->link_default = HLS_DN_POLL; /* * Do remaining PCIe setup and save PCIe values in dd. * Any error printing is already done by the init code. * On return, we have the chip mapped. */ ret = hfi1_pcie_ddinit(dd, pdev, ent); if (ret < 0) goto bail_free; /* verify that reads actually work, save revision for reset check */ dd->revision = read_csr(dd, CCE_REVISION); if (dd->revision == ~(u64)0) { dd_dev_err(dd, "cannot read chip CSRs\n"); ret = -EINVAL; goto bail_cleanup; } dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT) & CCE_REVISION_CHIP_REV_MAJOR_MASK; dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT) & CCE_REVISION_CHIP_REV_MINOR_MASK; /* obtain the hardware ID - NOT related to unit, which is a software enumeration */ reg = read_csr(dd, CCE_REVISION2); dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT) & CCE_REVISION2_HFI_ID_MASK; /* the variable size will remove unwanted bits */ dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT; dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT; dd_dev_info(dd, "Implementation: %s, revision 0x%x\n", dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown", (int)dd->irev); /* speeds the hardware can support */ dd->pport->link_speed_supported = OPA_LINK_SPEED_25G; /* speeds allowed to run at */ dd->pport->link_speed_enabled = dd->pport->link_speed_supported; /* give a reasonable active value, will be set on link up */ dd->pport->link_speed_active = OPA_LINK_SPEED_25G; dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS); dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS); dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES); dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE); dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE); /* fix up link widths for emulation _p */ ppd = dd->pport; if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) { ppd->link_width_supported = ppd->link_width_enabled = ppd->link_width_downgrade_supported = ppd->link_width_downgrade_enabled = OPA_LINK_WIDTH_1X; } /* insure num_vls isn't larger than number of sdma engines */ if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) { dd_dev_err(dd, "num_vls %u too large, using %u VLs\n", num_vls, HFI1_MAX_VLS_SUPPORTED); ppd->vls_supported = num_vls = HFI1_MAX_VLS_SUPPORTED; ppd->vls_operational = ppd->vls_supported; } /* * Convert the ns parameter to the 64 * cclocks used in the CSR. * Limit the max if larger than the field holds. If timeout is * non-zero, then the calculated field will be at least 1. * * Must be after icode is set up - the cclock rate depends * on knowing the hardware being used. */ dd->rcv_intr_timeout_csr = ns_to_cclock(dd, rcv_intr_timeout) / 64; if (dd->rcv_intr_timeout_csr > RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK) dd->rcv_intr_timeout_csr = RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK; else if (dd->rcv_intr_timeout_csr == 0 && rcv_intr_timeout) dd->rcv_intr_timeout_csr = 1; /* needs to be done before we look for the peer device */ read_guid(dd); /* should this device init the ASIC block? */ asic_should_init(dd); /* obtain chip sizes, reset chip CSRs */ init_chip(dd); /* read in the PCIe link speed information */ ret = pcie_speeds(dd); if (ret) goto bail_cleanup; /* read in firmware */ ret = hfi1_firmware_init(dd); if (ret) goto bail_cleanup; /* * In general, the PCIe Gen3 transition must occur after the * chip has been idled (so it won't initiate any PCIe transactions * e.g. an interrupt) and before the driver changes any registers * (the transition will reset the registers). * * In particular, place this call after: * - init_chip() - the chip will not initiate any PCIe transactions * - pcie_speeds() - reads the current link speed * - hfi1_firmware_init() - the needed firmware is ready to be * downloaded */ ret = do_pcie_gen3_transition(dd); if (ret) goto bail_cleanup; /* start setting dd values and adjusting CSRs */ init_early_variables(dd); parse_platform_config(dd); /* add board names as they are defined */ dd->boardname = kmalloc(64, GFP_KERNEL); if (!dd->boardname) goto bail_cleanup; snprintf(dd->boardname, 64, "Board ID 0x%llx", dd->revision >> CCE_REVISION_BOARD_ID_LOWER_NIBBLE_SHIFT & CCE_REVISION_BOARD_ID_LOWER_NIBBLE_MASK); snprintf(dd->boardversion, BOARD_VERS_MAX, "ChipABI %u.%u, %s, ChipRev %u.%u, SW Compat %llu\n", HFI1_CHIP_VERS_MAJ, HFI1_CHIP_VERS_MIN, dd->boardname, (u32)dd->majrev, (u32)dd->minrev, (dd->revision >> CCE_REVISION_SW_SHIFT) & CCE_REVISION_SW_MASK); ret = set_up_context_variables(dd); if (ret) goto bail_cleanup; /* set initial RXE CSRs */ init_rxe(dd); /* set initial TXE CSRs */ init_txe(dd); /* set initial non-RXE, non-TXE CSRs */ init_other(dd); /* set up KDETH QP prefix in both RX and TX CSRs */ init_kdeth_qp(dd); /* send contexts must be set up before receive contexts */ ret = init_send_contexts(dd); if (ret) goto bail_cleanup; ret = hfi1_create_ctxts(dd); if (ret) goto bail_cleanup; dd->rcvhdrsize = DEFAULT_RCVHDRSIZE; /* * rcd[0] is guaranteed to be valid by this point. Also, all * context are using the same value, as per the module parameter. */ dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32); ret = init_pervl_scs(dd); if (ret) goto bail_cleanup; /* sdma init */ for (i = 0; i < dd->num_pports; ++i) { ret = sdma_init(dd, i); if (ret) goto bail_cleanup; } /* use contexts created by hfi1_create_ctxts */ ret = set_up_interrupts(dd); if (ret) goto bail_cleanup; /* set up LCB access - must be after set_up_interrupts() */ init_lcb_access(dd); snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n", dd->base_guid & 0xFFFFFF); dd->oui1 = dd->base_guid >> 56 & 0xFF; dd->oui2 = dd->base_guid >> 48 & 0xFF; dd->oui3 = dd->base_guid >> 40 & 0xFF; ret = load_firmware(dd); /* asymmetric with dispose_firmware() */ if (ret) goto bail_clear_intr; check_fabric_firmware_versions(dd); thermal_init(dd); ret = init_cntrs(dd); if (ret) goto bail_clear_intr; ret = init_rcverr(dd); if (ret) goto bail_free_cntrs; ret = eprom_init(dd); if (ret) goto bail_free_rcverr; goto bail; bail_free_rcverr: free_rcverr(dd); bail_free_cntrs: free_cntrs(dd); bail_clear_intr: clean_up_interrupts(dd); bail_cleanup: hfi1_pcie_ddcleanup(dd); bail_free: hfi1_free_devdata(dd); dd = ERR_PTR(ret); bail: return dd; } static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate, u32 dw_len) { u32 delta_cycles; u32 current_egress_rate = ppd->current_egress_rate; /* rates here are in units of 10^6 bits/sec */ if (desired_egress_rate == -1) return 0; /* shouldn't happen */ if (desired_egress_rate >= current_egress_rate) return 0; /* we can't help go faster, only slower */ delta_cycles = egress_cycles(dw_len * 4, desired_egress_rate) - egress_cycles(dw_len * 4, current_egress_rate); return (u16)delta_cycles; } /** * create_pbc - build a pbc for transmission * @flags: special case flags or-ed in built pbc * @srate: static rate * @vl: vl * @dwlen: dword length (header words + data words + pbc words) * * Create a PBC with the given flags, rate, VL, and length. * * NOTE: The PBC created will not insert any HCRC - all callers but one are * for verbs, which does not use this PSM feature. The lone other caller * is for the diagnostic interface which calls this if the user does not * supply their own PBC. */ u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, u32 dw_len) { u64 pbc, delay = 0; if (unlikely(srate_mbs)) delay = delay_cycles(ppd, srate_mbs, dw_len); pbc = flags | (delay << PBC_STATIC_RATE_CONTROL_COUNT_SHIFT) | ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT) | (vl & PBC_VL_MASK) << PBC_VL_SHIFT | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT; return pbc; } #define SBUS_THERMAL 0x4f #define SBUS_THERM_MONITOR_MODE 0x1 #define THERM_FAILURE(dev, ret, reason) \ dd_dev_err((dd), \ "Thermal sensor initialization failed: %s (%d)\n", \ (reason), (ret)) /* * Initialize the Avago Thermal sensor. * * After initialization, enable polling of thermal sensor through * SBus interface. In order for this to work, the SBus Master * firmware has to be loaded due to the fact that the HW polling * logic uses SBus interrupts, which are not supported with * default firmware. Otherwise, no data will be returned through * the ASIC_STS_THERM CSR. */ static int thermal_init(struct hfi1_devdata *dd) { int ret = 0; if (dd->icode != ICODE_RTL_SILICON || !(dd->flags & HFI1_DO_INIT_ASIC)) return ret; acquire_hw_mutex(dd); dd_dev_info(dd, "Initializing thermal sensor\n"); /* Thermal Sensor Initialization */ /* Step 1: Reset the Thermal SBus Receiver */ ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0, RESET_SBUS_RECEIVER, 0); if (ret) { THERM_FAILURE(dd, ret, "Bus Reset"); goto done; } /* Step 2: Set Reset bit in Thermal block */ ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0, WRITE_SBUS_RECEIVER, 0x1); if (ret) { THERM_FAILURE(dd, ret, "Therm Block Reset"); goto done; } /* Step 3: Write clock divider value (100MHz -> 2MHz) */ ret = sbus_request_slow(dd, SBUS_THERMAL, 0x1, WRITE_SBUS_RECEIVER, 0x32); if (ret) { THERM_FAILURE(dd, ret, "Write Clock Div"); goto done; } /* Step 4: Select temperature mode */ ret = sbus_request_slow(dd, SBUS_THERMAL, 0x3, WRITE_SBUS_RECEIVER, SBUS_THERM_MONITOR_MODE); if (ret) { THERM_FAILURE(dd, ret, "Write Mode Sel"); goto done; } /* Step 5: De-assert block reset and start conversion */ ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0, WRITE_SBUS_RECEIVER, 0x2); if (ret) { THERM_FAILURE(dd, ret, "Write Reset Deassert"); goto done; } /* Step 5.1: Wait for first conversion (21.5ms per spec) */ msleep(22); /* Enable polling of thermal readings */ write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1); done: release_hw_mutex(dd); return ret; } static void handle_temp_err(struct hfi1_devdata *dd) { struct hfi1_pportdata *ppd = &dd->pport[0]; /* * Thermal Critical Interrupt * Put the device into forced freeze mode, take link down to * offline, and put DC into reset. */ dd_dev_emerg(dd, "Critical temperature reached! Forcing device into freeze mode!\n"); dd->flags |= HFI1_FORCED_FREEZE; start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT); /* * Shut DC down as much and as quickly as possible. * * Step 1: Take the link down to OFFLINE. This will cause the * 8051 to put the Serdes in reset. However, we don't want to * go through the entire link state machine since we want to * shutdown ASAP. Furthermore, this is not a graceful shutdown * but rather an attempt to save the chip. * Code below is almost the same as quiet_serdes() but avoids * all the extra work and the sleeps. */ ppd->driver_link_ready = 0; ppd->link_enabled = 0; set_physical_link_state(dd, PLS_OFFLINE | (OPA_LINKDOWN_REASON_SMA_DISABLED << 8)); /* * Step 2: Shutdown LCB and 8051 * After shutdown, do not restore DC_CFG_RESET value. */ dc_shutdown(dd); }