/* * puma7_pp_init.c * Description: * Puma-7 Packet Processor initialization. * * GPL LICENSE SUMMARY * * Copyright(C) 2020 - 2022 MaxLinear, Inc. * Copyright(c) 2016 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. * The full GNU General Public License is included in this distribution * in the file called LICENSE.GPL. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CONFIG_ARM_AVALANCHE_SOC /* interrupts NPCPU*/ #define DWC_REQUEST_IRQ(irq, handler, flags, name, dev) \ request_irq(irq, handler, flags, name, dev) #define DWC_FREE_IRQ(irq, dev) free_irq(irq, dev) #define DWC_ENABLE_IRQ(irq) enable_irq(irq) #define DWC_DISABLE_IRQ(irq) disable_irq(irq) #define DWC_DISABLE_IRQ_NOSYC(irq) disable_irq_nosync(irq) #define DWC_ACK_IRQ(irq) ack_irq(irq) #else /* interrupts APPCPU */ #define DWC_REQUEST_IRQ(irq, handler, flags, name, dev) \ netss_request_npcpu_irq(irq, name, handler, dev) #define DWC_FREE_IRQ(irq, dev) do {} while(0) /* TODO: need to implement free_irq for netss module */ #define DWC_ENABLE_IRQ(irq) avalanche_intc_enable_irq(irq) #define DWC_DISABLE_IRQ(irq) avalanche_intc_disable_irq(irq) #define DWC_DISABLE_IRQ_NOSYC(irq) avalanche_intc_disable_irq(irq) #define DWC_ACK_IRQ(irq) avalanche_intc_clear_status(irq) #endif #define PAL_CPPI41_ACC_MAX_PAGE_ENTRIES 32 #define PAL_CPPI41_ACC_LIST_NULL_TERM 0 #define PAL_CPPI41_ACC_PACE_MODE_LASTINTR 1 #define PAL_CPPI41_ACC_PACE_TICK_CNT 40 #define PAL_CPPI41_ACC_MAX_PAGE_COUNT 2 #define NETDEV_TX_SERVICE_MAX ((PAL_CPPI41_ACC_MAX_PAGE_ENTRIES - 1) * 2) //#define TX_COMPLETE_NETDEV_USE_TASKLET #define TX_COMPLETE_NETDEV_USE_NAPI #if defined(TX_COMPLETE_NETDEV_USE_NAPI) struct napi_struct gTxCompleteNapi; struct net_device dummyDev; static int netdev_tx_poll(struct napi_struct *napi , int budget); #elif defined(TX_COMPLETE_NETDEV_USE_TASKLET) struct tasklet_struct gTxCompleteTasklet; /* Tx completion processing tasklet */ #endif static int __init_acc_channel(PAL_Handle pal_hnd, int chan_num, Cppi4Queue queue, PAL_Cppi4AccChHnd *acc_hnd); irqreturn_t tx_complete_interrupt(int irq, void *dev); Int32 __setup_txcomplete(PAL_Handle palHnd); static int replace_npcpu_memory_for_queue(PAL_Handle palHnd, int qnum); static int replace_npcpu_memory(PAL_Handle palHnd); static int __init tx_comp_init(void); static void __exit tx_comp_cleanup (void); PAL_Cppi4AccChHnd gTxCompleteAccChHnd[PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_COUNT]; Ptr gTxCompleteAccListBase[PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_COUNT]; Cppi4HostDescLinux** gTxCompleteAccList[PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_COUNT]; PAL_Cppi4QueueHnd gHost2ppFreeHostDescQueueHnd[PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_COUNT]; static unsigned int q_info[] = { PAL_CPPI_PP_QMGR_G2_SHARED_LOW_INFRA_HOST_FD_Q_NUM, PAL_CPPI_PP_QMGR_G2_ATOM_HI_INFRA_HOST_FD_Q_NUM, PAL_CPPI_PP_QMGR_G2_WIFI_INFRA_HOST_RX_FD_Q_NUM, PAL_CPPI_PP_QMGR_G2_RGMII0_HI_INFRA_HOST_FD_Q_NUM, PAL_CPPI_PP_QMGR_G2_RGMII1_HI_INFRA_HOST_FD_Q_NUM, PAL_CPPI_PP_QMGR_G2_SGMII0_HI_INFRA_HOST_FD_Q_NUM, PAL_CPPI_PP_QMGR_G2_SGMII1_HI_INFRA_HOST_FD_Q_NUM, PAL_CPPI_PP_QMGR_G2_HOST2PP_LOW_HOST_FD_Q_NUM, PAL_CPPI_PP_QMGR_G2_HOST2PP_HI_HOST_FD_Q_NUM, PAL_CPPI_PP_QMGR_G2_DS_FW_MPEG_TS_FD_INT_MODE_Q_NUM, }; #ifndef CONFIG_ARM_AVALANCHE_SOC static inline int get_list_entry_count(int priority) { int list_entry_count; unsigned long long timeout = 1<<31; /* long timeout */ do { PAL_CPPI4_CACHE_INVALIDATE(gTxCompleteAccList[priority], sizeof(int *)); list_entry_count = be32_to_cpu((unsigned long)*gTxCompleteAccList[priority]); pr_debug("%s:%d: list_entry_count %x\n", __func__, __LINE__, list_entry_count); } while(!list_entry_count && --timeout); BUG_ON(!timeout); *gTxCompleteAccList[priority] = NULL; PAL_CPPI4_CACHE_INVALIDATE(gTxCompleteAccList[priority], sizeof(int *)); gTxCompleteAccList[priority]++; return list_entry_count; } #endif #if defined(TX_COMPLETE_NETDEV_USE_NAPI) static int __do_tx_complete(struct net_device* dev, int budget) #elif defined(TX_COMPLETE_NETDEV_USE_TASKLET) static void __do_tx_complete(unsigned long data) #else #error "Please choose packet processing framework" #endif // TX_COMPLETE_NETDEV_USE_NAPI { Cppi4HostDescLinux* hostDesc; Uint32 packets_processed = 0; Int32 priority; /* Start with high priority channel */ for (priority = PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_COUNT - 1; priority >= 0; priority--) { /* While there are ready pages... */ while (avalanche_intd_get_interrupt_count(0, PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_NUM(priority)) && (packets_processed <= NETDEV_TX_SERVICE_MAX)) { #ifndef CONFIG_ARM_AVALANCHE_SOC int list_entry_count = get_list_entry_count(priority); BUG_ON(list_entry_count > PAL_CPPI41_ACC_MAX_PAGE_ENTRIES); while(list_entry_count--) { do { PAL_CPPI4_CACHE_INVALIDATE(gTxCompleteAccList[priority], sizeof(int *)); hostDesc = (Cppi4HostDescLinux*)(be32_to_cpu((unsigned long)*gTxCompleteAccList[priority]) & QMGR_QUEUE_N_REG_D_DESC_ADDR_MASK); pr_debug("%s:%d: hostDesc 0x%lx\n", __func__, __LINE__, (unsigned long)hostDesc); } while (!hostDesc); *gTxCompleteAccList[priority] = NULL; PAL_CPPI4_CACHE_INVALIDATE(gTxCompleteAccList[priority], sizeof(int *)); #else /* While there are descriptors in the page... */ while((hostDesc = (Cppi4HostDescLinux*)((unsigned long)*gTxCompleteAccList[priority] & QMGR_QUEUE_N_REG_D_DESC_ADDR_MASK))) { #endif hostDesc = PAL_CPPI4_PHYS_2_VIRT((uintptr_t)hostDesc); PAL_CPPI4_CACHE_INVALIDATE(hostDesc, PAL_CPPI_PP_QMGR_GLOBAL_DEFAULT_DESC_SIZE); dev_kfree_skb_any((Ptr)be32_to_cpu(hostDesc->skb)); hostDesc->skb = NULL; /* Queue back the hostDesc to free pool */ PAL_cppi4QueuePush(gHost2ppFreeHostDescQueueHnd[priority], (Ptr)PAL_CPPI4_VIRT_2_PHYS(hostDesc), PAL_CPPI4_DESCSIZE_2_QMGRSIZE(PAL_CPPI_PP_QMGR_GLOBAL_DEFAULT_DESC_SIZE), 0); packets_processed++; gTxCompleteAccList[priority]++; } /* Update the list entry for next time */ gTxCompleteAccList[priority] = PAL_cppi4AccChGetNextList(gTxCompleteAccChHnd[priority]); /* Decrement number of pages by 1 */ avalanche_intd_set_interrupt_count(0, PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_NUM(priority), 1); #if defined(TX_COMPLETE_NETDEV_USE_NAPI) /* thats it, we did enough. Jump out now! */ if(packets_processed >= budget) { return packets_processed; } #endif // TX_COMPLETE_NETDEV_USE_NAPI } } #if defined(TX_COMPLETE_NETDEV_USE_TASKLET) /* First clear the IRQ in order not to get a false interrupt since INTD is level */ DWC_ACK_IRQ(MAP_INTD_TO_INTC(PAL_CPPI_PP_HOST2PP_TX_COMPLETE_INTD0_ACC_INTV_NUM)); /* Send INTD EOI */ avalanche_intd_write_eoi(PAL_CPPI_PP_HOST2PP_TX_COMPLETE_INTD0_ACC_INTV_NUM); /* It could be that between INTD count decrement and EOI the accumulator will issue another interrupt. The logic of INTD is such that level will remain active high even after EOI is set, so INTC will lose the interrupt after ack_irq is done (it now expects INTD polarity change). Therefore we must check INTD count and if it is not 0 - reschedule the tasklet */ for (priority = PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_COUNT - 1; priority >= 0; priority--) { if (avalanche_intd_get_interrupt_count(0, PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_NUM(priority))) { tasklet_schedule(&gTxCompleteTasklet); return; } } DWC_ENABLE_IRQ(MAP_INTD_TO_INTC(PAL_CPPI_PP_HOST2PP_TX_COMPLETE_INTD0_ACC_INTV_NUM)); #endif #if defined(TX_COMPLETE_NETDEV_USE_NAPI) return packets_processed; #endif } static int __init_acc_channel(PAL_Handle pal_hnd, int chan_num, Cppi4Queue queue, PAL_Cppi4AccChHnd* acc_hnd) { Cppi4AccumulatorCfg cfg; unsigned int accListSize; *acc_hnd = NULL; cfg.accChanNum = chan_num; cfg.list.maxPageEntry = PAL_CPPI41_ACC_MAX_PAGE_ENTRIES; /* This is entries per page (and we have 2 pages) */ cfg.list.listEntrySize = PAL_CPPI41_ACC_ENTRY_TYPE_D; /* Only interested in register 'D' which has the desc pointer */ #ifndef CONFIG_ARM_AVALANCHE_SOC cfg.list.listCountMode = PAL_CPPI41_ACC_PACE_MODE_LASTINTR; /* One indicates Entry Count Mode */ #else cfg.list.listCountMode = PAL_CPPI41_ACC_LIST_NULL_TERM; /* Zero indicates null terminated list. */ #endif cfg.list.pacingMode = PAL_CPPI41_ACC_PACE_MODE_LASTINTR; /* Wait for time since last interrupt */ cfg.pacingTickCnt = PAL_CPPI41_ACC_PACE_TICK_CNT; /* Wait for 1000uS == 1ms */ cfg.list.maxPageCnt = PAL_CPPI41_ACC_MAX_PAGE_COUNT; /* Use two pages */ cfg.list.stallAvoidance = 1; /* Use the stall avoidance feature */ cfg.queue = queue; cfg.mode = 0; accListSize = (cfg.list.maxPageEntry * (cfg.list.listEntrySize + 1)) * cfg.list.maxPageCnt * sizeof(Uint32); if (!(cfg.list.listBase = kzalloc(accListSize, GFP_KERNEL))) { pr_err("Unable to allocate list page of size %d\n", accListSize); return -1; } PAL_CPPI4_CACHE_WRITEBACK((Ptr)cfg.list.listBase, accListSize); if (!(*acc_hnd = PAL_cppi4AccChOpen(pal_hnd, &cfg))) { pr_err("Unable to open accumulator channel #%d\n", chan_num); kfree(cfg.list.listBase); return -1; } return 0; } irqreturn_t tx_complete_interrupt(int irq, void *dev) { DWC_DISABLE_IRQ_NOSYC(MAP_INTD_TO_INTC(PAL_CPPI_PP_HOST2PP_TX_COMPLETE_INTD0_ACC_INTV_NUM)); #if defined(TX_COMPLETE_NETDEV_USE_NAPI) napi_schedule(&gTxCompleteNapi); #elif defined(TX_COMPLETE_NETDEV_USE_TASKLET) tasklet_schedule(&gTxCompleteTasklet); #endif return IRQ_RETVAL(1); } Int32 __setup_txcomplete(PAL_Handle palHnd) { Cppi4Queue txCmplQ; Cppi4Queue fdHostQ; Uint8 priority; for (priority = 0; priority < PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_COUNT; priority++) { /************************************************/ /* reset Tx complete queue */ /************************************************/ txCmplQ.qMgr = PAL_CPPI_PP_HOST2PP_TX_COMPLETE_Q_MGR; txCmplQ.qNum = PAL_CPPI_PP_HOST2PP_TX_COMPLETE_Q_NUM(priority); PAL_cppi4QueueClose(palHnd, PAL_cppi4QueueOpen(palHnd, txCmplQ)); fdHostQ.qMgr = PAL_CPPI_PP_HOST2PP_HOST_FD_Q_MGR; fdHostQ.qNum = PAL_CPPI_PP_HOST2PP_HOST_FD_Q_NUM(priority); if (!(gHost2ppFreeHostDescQueueHnd[priority] = PAL_cppi4QueueOpen(palHnd, fdHostQ))) { pr_err("unable to open FD Host Queue #%d for TX Complete task\n", fdHostQ.qNum); return -1; } /************************************************/ /* Init the Tx complete accumulator channel */ /************************************************/ if (__init_acc_channel(palHnd, PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_NUM(priority), txCmplQ, &gTxCompleteAccChHnd[priority])) { pr_err("unable to open accumulator channel #%d for TX Complete task\n", PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_NUM(priority)); return -1; } gTxCompleteAccListBase[priority] = gTxCompleteAccList[priority] = PAL_cppi4AccChGetNextList(gTxCompleteAccChHnd[priority]); /* request the Tx Complete IRQs - one IRQ per all TX complete priorities */ if (priority == 0) { #if defined(TX_COMPLETE_NETDEV_USE_NAPI) init_dummy_netdev(&dummyDev); netif_napi_add(&dummyDev, &gTxCompleteNapi, netdev_tx_poll, NETDEV_TX_SERVICE_MAX); napi_enable(&gTxCompleteNapi); #elif defined(TX_COMPLETE_NETDEV_USE_TASKLET) tasklet_init(&gTxCompleteTasklet, __do_tx_complete, 0); #endif if(DWC_REQUEST_IRQ(MAP_INTD_TO_INTC(PAL_CPPI_PP_HOST2PP_TX_COMPLETE_INTD0_ACC_INTV_NUM), tx_complete_interrupt, IRQF_DISABLED, "TX Complete", NULL)) { pr_err("unable to get IRQ #%d for TX Complete task\n", MAP_INTD_TO_INTC(PAL_CPPI_PP_HOST2PP_TX_COMPLETE_INTD0_ACC_INTV_NUM)); return -1; } } } return 0; } EXPORT_SYMBOL(__setup_txcomplete); #if defined(TX_COMPLETE_NETDEV_USE_NAPI) /**************************************************************************/ /*! \fn netdev_tx_poll ************************************************************************** * * * \param[in] Net Device * \param[in] Processed packets budget * \return Number of processed packets **************************************************************************/ static int netdev_tx_poll(struct napi_struct *napi , int budget) { int work_done, priority; work_done = __do_tx_complete(NULL, budget); if (likely(work_done >= budget)) return budget; /* order is important here. If we do EOI before calling netif_tx_complete, an interrupt * can occur just before we take ourselves out of the poll list; we will not * schedule NAPI thread on that interrupt, no further Tx interrupts and * Tx will stall forever. Scary... * */ napi_complete(napi); /* Accumulator looks at INTD counter in order to know if it can issue another interrupt. Since we decrement the counter at l2sw_netdev_tx_complete it is possible that accumulator issued another interrupt. Due to the fact that interrupt is level and we do not want to get a false interrupt, we clear the INTC at the end of l2sw_netdev_tx_complete. Next time INTC will wait for INTD to become active. But, since INTD is level there is a possibility that INTD will remain active. This can happen if accumulator issues an interrupt before the host sent EOI (this is done in next line of code). So, in this case we have INTD status not changed - still active, while INTC now waits for it to become active. This can lead to not getting the interrupt forever. This is why we must check if counter>0 and if so re-schedule NAPI. We lock the interrupts b4 doing EOI and up until NAPI schedule in order not to get double interrupt in the case that an interrupt is really issued between EOI and checking INTD count - we are going to reschedule NAPI anyway... */ DWC_ACK_IRQ(MAP_INTD_TO_INTC(PAL_CPPI_PP_HOST2PP_TX_COMPLETE_INTD0_ACC_INTV_NUM)); avalanche_intd_write_eoi(PAL_CPPI_PP_HOST2PP_TX_COMPLETE_INTD0_ACC_INTV_NUM); /* It could be that between INTD count decrement and EOI the accumulator will issue another interrupt. The logic of INTD is such that level will remain active high even after EOI is set, so INTC will lose the interrupt after ack_irq is done (it now expects INTD polarity change). Therefore we must check INTD count and if it is not 0 - reschedule the tasklet */ for (priority = PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_COUNT - 1; priority >= 0; priority--) { if (avalanche_intd_get_interrupt_count(0, PAL_CPPI_PP_HOST2PP_TX_COMPLETE_ACC_CH_NUM(priority))) { napi_schedule(napi); return work_done; } } DWC_ENABLE_IRQ(MAP_INTD_TO_INTC(PAL_CPPI_PP_HOST2PP_TX_COMPLETE_INTD0_ACC_INTV_NUM)); return work_done; } #endif static int replace_npcpu_memory_for_queue(PAL_Handle palHnd, int qnum) { Cppi4HostDescLinux* currDesc; unsigned int i, descCount; Cppi4Queue tmpQ; PAL_Cppi4QueueHnd tmpQHnd; tmpQ.qMgr = PAL_CPPI_PP_QMGR_G2; tmpQ.qNum = qnum; tmpQHnd = PAL_cppi4QueueOpen(NULL, tmpQ); PAL_cppi4QueueGetEntryCount(palHnd, tmpQ, &descCount); if (0 == descCount) { pr_info("queue num %d is empty, packet data descriptors init failed\n", qnum); return 0; } pr_info("replace_npcpu_memory_for_queue: queue %d has %d descriptors\n", qnum, descCount); for (i = 0; i < descCount; i++) { /* get a free RX descriptor */ if(!(currDesc = (Cppi4HostDescLinux *)PAL_cppi4QueuePop(tmpQHnd)) ) return -1; currDesc = (Cppi4HostDescLinux *)PAL_CPPI4_PHYS_2_VIRT((uintptr_t)currDesc); if (qnum == PAL_CPPI_PP_QMGR_G2_DS_FW_MPEG_TS_FD_INT_MODE_Q_NUM) { void *data; Uint32 buffsize = PAL_CPPI_PP_QMGR_GLOBAL_MPEG_BUFF_SIZE; data = kmalloc(buffsize, GFP_ATOMIC); if (!data) { pr_err("%s:%d: allocation FAILED\n", __func__, __LINE__); return -1; } currDesc->hw.orgBuffLen = cpu_to_be32(buffsize); currDesc->hw.orgBufPtr = cpu_to_be32(PAL_CPPI4_VIRT_2_PHYS(data)); currDesc->skb = NULL; } else { struct sk_buff *skb; Uint32 buffsize = PAL_CPPI_PP_QMGR_GLOBAL_DEFAULT_BUFF_SIZE; skb = dev_alloc_skb(buffsize); if (!skb) { pr_err("%s:%d: SKB allocation FAILED\n", __func__, __LINE__); return -1; } skb_reserve (skb, NET_IP_ALIGN); /* 16 bit align the IP fields. */ currDesc->hw.orgBuffLen = cpu_to_be32(buffsize - NET_IP_ALIGN); currDesc->hw.orgBufPtr = cpu_to_be32(PAL_CPPI4_VIRT_2_PHYS(skb->data)); currDesc->skb = (Ptr)cpu_to_be32(skb); } PAL_CPPI4_CACHE_WRITEBACK(currDesc, sizeof(*currDesc)); PAL_cppi4QueuePush(tmpQHnd, (Uint32 *)PAL_CPPI4_VIRT_2_PHYS(currDesc), PAL_CPPI4_DESCSIZE_2_QMGRSIZE(PAL_CPPI_PP_QMGR_GLOBAL_DEFAULT_DESC_SIZE), 0); } return 0; } static int replace_npcpu_memory(PAL_Handle palHnd) { unsigned int qcount; for (qcount = 0; qcount < ARRAY_SIZE(q_info); qcount++) { if (replace_npcpu_memory_for_queue(palHnd, q_info[qcount]) < 0) { pr_err("replace memory for queue %d FAILED!\n", q_info[qcount]); return -1; } } return 0; } static int __init tx_comp_init(void) { int ret = -ENODEV; PAL_Handle palHnd; printk("Starting tx driver init\n"); if (netip_memmap_init()) { pr_err("%s:%d ERROR; netip memmap failed!\n", __func__, __LINE__); ret = -1; goto tx_comp_exit; } palHnd = PAL_cppi4Init(NULL, CPPI41_DOMAIN_PP); if (replace_npcpu_memory(palHnd)) { pr_err("%s(%d): Error - replace_npcpu_memory failed!\n", __FUNCTION__, __LINE__); goto tx_comp_exit; } if (__setup_txcomplete(palHnd)) { pr_err("%s(%d): Error - setup_txcomplete failed!\n", __FUNCTION__, __LINE__); goto tx_comp_exit; } if(cppi41_hw_mbox_init()) { pr_err("%s(%d): Error - cppi41_hw_mbox_init failed!\n", __FUNCTION__, __LINE__); goto tx_comp_exit; } #ifdef CONFIG_MRPC_HANDSHAKE boot_status_report(HANDSHAKE_ID_PP_INIT); #endif return 0; tx_comp_exit: BUG(); return ret; } static void __exit tx_comp_cleanup (void) { pr_info("pp init driver cleanup done\n"); } module_init(tx_comp_init); module_exit(tx_comp_cleanup); MODULE_AUTHOR ("Intel Corporation"); MODULE_DESCRIPTION ("pp_init"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0");