--- zzzz-none-000/linux-4.4.60/drivers/net/wireless/ath/wil6210/txrx.c 2017-04-08 07:53:53.000000000 +0000 +++ wasp-540e-714/linux-4.4.60/drivers/net/wireless/ath/wil6210/txrx.c 2019-07-03 09:21:34.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2015 Qualcomm Atheros, Inc. + * Copyright (c) 2012-2017 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -27,16 +27,27 @@ #include "wmi.h" #include "txrx.h" #include "trace.h" +#if defined(CONFIG_WIL6210_NSS_SUPPORT) +#include +#endif static bool rtap_include_phy_info; -module_param(rtap_include_phy_info, bool, S_IRUGO); +module_param(rtap_include_phy_info, bool, 0444); MODULE_PARM_DESC(rtap_include_phy_info, " Include PHY info in the radiotap header, default - no"); +#if defined(CONFIG_WIL6210_NSS_SUPPORT) +bool rx_align_2 = true; +#else bool rx_align_2; -module_param(rx_align_2, bool, S_IRUGO); +#endif +module_param(rx_align_2, bool, 0444); MODULE_PARM_DESC(rx_align_2, " align Rx buffers on 4*n+2, default - no"); +bool rx_large_buf; +module_param(rx_large_buf, bool, 0444); +MODULE_PARM_DESC(rx_large_buf, " allocate 8KB RX buffers, default - no"); + static inline uint wil_rx_snaplen(void) { return rx_align_2 ? 6 : 0; @@ -88,6 +99,63 @@ return vring->size/4; } +/* returns true if num avail descriptors is lower than wmark_low */ +static inline int wil_vring_avail_low(struct vring *vring) +{ + return wil_vring_avail_tx(vring) < wil_vring_wmark_low(vring); +} + +/* returns true if num avail descriptors is higher than wmark_high */ +static inline int wil_vring_avail_high(struct vring *vring) +{ + return wil_vring_avail_tx(vring) > wil_vring_wmark_high(vring); +} + +/* returns true when all tx vrings are empty */ +bool wil_is_tx_idle(struct wil6210_priv *wil) +{ + int i; + unsigned long data_comp_to; + + for (i = 0; i < WIL6210_MAX_TX_RINGS; i++) { + struct vring *vring = &wil->vring_tx[i]; + int vring_index = vring - wil->vring_tx; + struct vring_tx_data *txdata = &wil->vring_tx_data[vring_index]; + + spin_lock(&txdata->lock); + + if (!vring->va || !txdata->enabled) { + spin_unlock(&txdata->lock); + continue; + } + + data_comp_to = jiffies + msecs_to_jiffies( + WIL_DATA_COMPLETION_TO_MS); + if (test_bit(wil_status_napi_en, wil->status)) { + while (!wil_vring_is_empty(vring)) { + if (time_after(jiffies, data_comp_to)) { + wil_dbg_pm(wil, + "TO waiting for idle tx\n"); + spin_unlock(&txdata->lock); + return false; + } + wil_dbg_ratelimited(wil, + "tx vring is not empty -> NAPI\n"); + spin_unlock(&txdata->lock); + napi_synchronize(&wil->napi_tx); + msleep(20); + spin_lock(&txdata->lock); + if (!vring->va || !txdata->enabled) + break; + } + } + + spin_unlock(&txdata->lock); + } + + return true; +} + /* wil_val_in_range - check if value in [min,max) */ static inline bool wil_val_in_range(int val, int min, int max) { @@ -100,7 +168,7 @@ size_t sz = vring->size * sizeof(vring->va[0]); uint i; - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_misc(wil, "vring_alloc:\n"); BUILD_BUG_ON(sizeof(vring->va[0]) != 32); @@ -111,15 +179,33 @@ vring->va = NULL; return -ENOMEM; } + /* vring->va should be aligned on its size rounded up to power of 2 - * This is granted by the dma_alloc_coherent + * This is granted by the dma_alloc_coherent. + * + * HW has limitation that all vrings addresses must share the same + * upper 16 msb bits part of 48 bits address. To workaround that, + * if we are using more than 32 bit addresses switch to 32 bit + * allocation before allocating vring memory. + * + * There's no check for the return value of dma_set_mask_and_coherent, + * since we assume if we were able to set the mask during + * initialization in this system it will not fail if we set it again */ + if (wil->dma_addr_size > 32) + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + vring->va = dma_alloc_coherent(dev, sz, &vring->pa, GFP_KERNEL); if (!vring->va) { kfree(vring->ctx); vring->ctx = NULL; return -ENOMEM; } + + if (wil->dma_addr_size > 32) + dma_set_mask_and_coherent(dev, + DMA_BIT_MASK(wil->dma_addr_size)); + /* initially, all descriptors are SW owned * For Tx and Rx, ownership bit is at the same location, thus * we can use any @@ -160,6 +246,7 @@ struct device *dev = wil_to_dev(wil); size_t sz = vring->size * sizeof(vring->va[0]); + lockdep_assert_held(&wil->mutex); if (tx) { int vring_index = vring - wil->vring_tx; @@ -183,6 +270,13 @@ &vring->va[vring->swtail].tx; ctx = &vring->ctx[vring->swtail]; + if (!ctx) { + wil_dbg_txrx(wil, + "ctx(%d) was already completed\n", + vring->swtail); + vring->swtail = wil_vring_next_tail(vring); + continue; + } *d = *_d; wil_txdesc_unmap(dev, d, ctx); if (ctx->skb) @@ -218,7 +312,7 @@ u32 i, int headroom) { struct device *dev = wil_to_dev(wil); - unsigned int sz = mtu_max + ETH_HLEN + wil_rx_snaplen(); + unsigned int sz = wil->rx_buf_len + ETH_HLEN + wil_rx_snaplen(); struct vring_rx_desc dd, *d = ⅆ volatile struct vring_rx_desc *_d = &vring->va[i].rx; dma_addr_t pa; @@ -261,7 +355,6 @@ static void wil_rx_add_radiotap_header(struct wil6210_priv *wil, struct sk_buff *skb) { - struct wireless_dev *wdev = wil->wdev; struct wil6210_rtap { struct ieee80211_radiotap_header rthdr; /* fields should be in the order of bits in rthdr.it_present */ @@ -288,7 +381,7 @@ int rtap_len = sizeof(struct wil6210_rtap); int phy_length = 0; /* phy info header size, bytes */ static char phy_data[128]; - struct ieee80211_channel *ch = wdev->preset_chandef.chan; + struct ieee80211_channel *ch = wil->monitor_chandef.chan; if (rtap_include_phy_info) { rtap_len = sizeof(*rtap_vendor) + sizeof(*d); @@ -365,6 +458,18 @@ (IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK_REQ); } +bool wil_is_rx_idle(struct wil6210_priv *wil) +{ + struct vring_rx_desc *_d; + struct vring *vring = &wil->vring_rx; + + _d = (struct vring_rx_desc *)&vring->va[vring->swhead].rx; + if (_d->dma.status & RX_DMA_STATUS_DU) + return false; + + return true; +} + /** * reap 1 frame from @swhead * @@ -382,7 +487,7 @@ struct sk_buff *skb; dma_addr_t pa; unsigned int snaplen = wil_rx_snaplen(); - unsigned int sz = mtu_max + ETH_HLEN + snaplen; + unsigned int sz = wil->rx_buf_len + ETH_HLEN + snaplen; u16 dmalen; u8 ftype; int cid; @@ -543,11 +648,71 @@ break; } } + + /* make sure all writes to descriptors (shared memory) are done before + * committing them to HW + */ + wmb(); + wil_w(wil, v->hwtail, v->swtail); return rc; } +/** + * reverse_memcmp - Compare two areas of memory, in reverse order + * @cs: One area of memory + * @ct: Another area of memory + * @count: The size of the area. + * + * Cut'n'paste from original memcmp (see lib/string.c) + * with minimal modifications + */ +static int reverse_memcmp(const void *cs, const void *ct, size_t count) +{ + const unsigned char *su1, *su2; + int res = 0; + + for (su1 = cs + count - 1, su2 = ct + count - 1; count > 0; + --su1, --su2, count--) { + res = *su1 - *su2; + if (res) + break; + } + return res; +} + +static int wil_rx_crypto_check(struct wil6210_priv *wil, struct sk_buff *skb) +{ + struct vring_rx_desc *d = wil_skb_rxdesc(skb); + int cid = wil_rxdesc_cid(d); + int tid = wil_rxdesc_tid(d); + int key_id = wil_rxdesc_key_id(d); + int mc = wil_rxdesc_mcast(d); + struct wil_sta_info *s = &wil->sta[cid]; + struct wil_tid_crypto_rx *c = mc ? &s->group_crypto_rx : + &s->tid_crypto_rx[tid]; + struct wil_tid_crypto_rx_single *cc = &c->key_id[key_id]; + const u8 *pn = (u8 *)&d->mac.pn_15_0; + + if (!cc->key_set) { + wil_err_ratelimited(wil, + "Key missing. CID %d TID %d MCast %d KEY_ID %d\n", + cid, tid, mc, key_id); + return -EINVAL; + } + + if (reverse_memcmp(pn, cc->pn, IEEE80211_GCMP_PN_LEN) <= 0) { + wil_err_ratelimited(wil, + "Replay attack. CID %d TID %d MCast %d KEY_ID %d PN %6phN last %6phN\n", + cid, tid, mc, key_id, pn, cc->pn); + return -EINVAL; + } + memcpy(cc->pn, pn, IEEE80211_GCMP_PN_LEN); + + return 0; +} + /* * Pass Rx packet to the netif. Update statistics. * Called in softirq context (NAPI poll). @@ -560,6 +725,7 @@ unsigned int len = skb->len; struct vring_rx_desc *d = wil_skb_rxdesc(skb); int cid = wil_rxdesc_cid(d); /* always 0..7, no need to check */ + int security = wil_rxdesc_security(d); struct ethhdr *eth = (void *)skb->data; /* here looking for DA, not A1, thus Rxdesc's 'mcast' indication * is not suitable, need to look at data @@ -585,6 +751,13 @@ skb_orphan(skb); + if (security && (wil_rx_crypto_check(wil, skb) != 0)) { + rc = GRO_DROP; + dev_kfree_skb(skb); + stats->rx_replay++; + goto stats; + } + if (wdev->iftype == NL80211_IFTYPE_AP && !wil->ap_isolate) { if (mcast) { /* send multicast frames both to higher layers in @@ -620,12 +793,28 @@ } if (skb) { /* deliver to local stack */ - - skb->protocol = eth_type_trans(skb, ndev); - rc = napi_gro_receive(&wil->napi_rx, skb); + bool deliver_skb = true; +#if defined(CONFIG_WIL6210_NSS_SUPPORT) + if (rx_align_2) { + int rc1 = nss_virt_if_tx_buf(wil->nss_handle, skb); + + if (rc1) { + wil_err_ratelimited(wil, "NSS Rx error: %d\n", + rc1); + } else { + rc = GRO_NORMAL; + deliver_skb = false; + } + } +#endif /* #if defined(CONFIG_WIL6210_NSS_SUPPORT) */ + if (deliver_skb) { + skb->protocol = eth_type_trans(skb, ndev); + rc = napi_gro_receive(&wil->napi_rx, skb); + } wil_dbg_txrx(wil, "Rx complete %d bytes => %s\n", len, gro_res_str[rc]); } +stats: /* statistics. rc set to GRO_NORMAL for AP bridging */ if (unlikely(rc == GRO_DROP)) { ndev->stats.rx_dropped++; @@ -656,7 +845,7 @@ wil_err(wil, "Rx IRQ while Rx not yet initialized\n"); return; } - wil_dbg_txrx(wil, "%s()\n", __func__); + wil_dbg_txrx(wil, "rx_handle\n"); while ((*quota > 0) && (NULL != (skb = wil_vring_reap_rx(wil, v)))) { (*quota)--; @@ -674,18 +863,34 @@ wil_rx_refill(wil, v->size); } +static void wil_rx_buf_len_init(struct wil6210_priv *wil) +{ + wil->rx_buf_len = rx_large_buf ? + WIL_MAX_ETH_MTU : TXRX_BUF_LEN_DEFAULT - WIL_MAX_MPDU_OVERHEAD; + if (mtu_max > wil->rx_buf_len) { + /* do not allow RX buffers to be smaller than mtu_max, for + * backward compatibility (mtu_max parameter was also used + * to support receiving large packets) + */ + wil_info(wil, "Override RX buffer to mtu_max(%d)\n", mtu_max); + wil->rx_buf_len = mtu_max; + } +} + int wil_rx_init(struct wil6210_priv *wil, u16 size) { struct vring *vring = &wil->vring_rx; int rc; - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_misc(wil, "rx_init\n"); if (vring->va) { wil_err(wil, "Rx ring already allocated\n"); return -EINVAL; } + wil_rx_buf_len_init(wil); + vring->size = size; rc = wil_vring_alloc(wil, vring); if (rc) @@ -710,12 +915,27 @@ { struct vring *vring = &wil->vring_rx; - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_misc(wil, "rx_fini\n"); if (vring->va) wil_vring_free(wil, vring, 0); } +static inline void wil_tx_data_init(struct vring_tx_data *txdata) +{ + spin_lock_bh(&txdata->lock); + txdata->dot1x_open = 0; + txdata->enabled = 0; + txdata->idle = 0; + txdata->last_idle = 0; + txdata->begin = 0; + txdata->agg_wsize = 0; + txdata->agg_timeout = 0; + txdata->agg_amsdu = 0; + txdata->addba_in_progress = false; + spin_unlock_bh(&txdata->lock); +} + int wil_vring_init_tx(struct wil6210_priv *wil, int id, int size, int cid, int tid) { @@ -741,14 +961,15 @@ }, }; struct { - struct wil6210_mbox_hdr_wmi wmi; + struct wmi_cmd_hdr wmi; struct wmi_vring_cfg_done_event cmd; } __packed reply; struct vring *vring = &wil->vring_tx[id]; struct vring_tx_data *txdata = &wil->vring_tx_data[id]; - wil_dbg_misc(wil, "%s() max_mpdu_size %d\n", __func__, + wil_dbg_misc(wil, "vring_init_tx: max_mpdu_size %d\n", cmd.vring_cfg.tx_sw_ring.max_mpdu_size); + lockdep_assert_held(&wil->mutex); if (vring->va) { wil_err(wil, "Tx ring [%d] already allocated\n", id); @@ -756,8 +977,7 @@ goto out; } - memset(txdata, 0, sizeof(*txdata)); - spin_lock_init(&txdata->lock); + wil_tx_data_init(txdata); vring->size = size; rc = wil_vring_alloc(wil, vring); if (rc) @@ -781,17 +1001,25 @@ rc = -EINVAL; goto out_free; } - vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); + spin_lock_bh(&txdata->lock); + vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); txdata->enabled = 1; + spin_unlock_bh(&txdata->lock); + if (txdata->dot1x_open && (agg_wsize >= 0)) wil_addba_tx_request(wil, id, agg_wsize); return 0; out_free: + spin_lock_bh(&txdata->lock); txdata->dot1x_open = false; txdata->enabled = 0; + spin_unlock_bh(&txdata->lock); wil_vring_free(wil, vring, 1); + wil->vring2cid_tid[id][0] = WIL6210_MAX_CID; + wil->vring2cid_tid[id][1] = 0; + out: return rc; @@ -813,14 +1041,15 @@ }, }; struct { - struct wil6210_mbox_hdr_wmi wmi; + struct wmi_cmd_hdr wmi; struct wmi_vring_cfg_done_event cmd; } __packed reply; struct vring *vring = &wil->vring_tx[id]; struct vring_tx_data *txdata = &wil->vring_tx_data[id]; - wil_dbg_misc(wil, "%s() max_mpdu_size %d\n", __func__, + wil_dbg_misc(wil, "vring_init_bcast: max_mpdu_size %d\n", cmd.vring_cfg.tx_sw_ring.max_mpdu_size); + lockdep_assert_held(&wil->mutex); if (vring->va) { wil_err(wil, "Tx ring [%d] already allocated\n", id); @@ -828,8 +1057,7 @@ goto out; } - memset(txdata, 0, sizeof(*txdata)); - spin_lock_init(&txdata->lock); + wil_tx_data_init(txdata); vring->size = size; rc = wil_vring_alloc(wil, vring); if (rc) @@ -853,14 +1081,18 @@ rc = -EINVAL; goto out_free; } - vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); + spin_lock_bh(&txdata->lock); + vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); txdata->enabled = 1; + spin_unlock_bh(&txdata->lock); return 0; out_free: + spin_lock_bh(&txdata->lock); txdata->enabled = 0; txdata->dot1x_open = false; + spin_unlock_bh(&txdata->lock); wil_vring_free(wil, vring, 1); out: @@ -872,23 +1104,29 @@ struct vring *vring = &wil->vring_tx[id]; struct vring_tx_data *txdata = &wil->vring_tx_data[id]; - WARN_ON(!mutex_is_locked(&wil->mutex)); + lockdep_assert_held(&wil->mutex); if (!vring->va) return; - wil_dbg_misc(wil, "%s() id=%d\n", __func__, id); + wil_dbg_misc(wil, "vring_fini_tx: id=%d\n", id); spin_lock_bh(&txdata->lock); txdata->dot1x_open = false; txdata->enabled = 0; /* no Tx can be in progress or start anew */ spin_unlock_bh(&txdata->lock); + /* napi_synchronize waits for completion of the current NAPI but will + * not prevent the next NAPI run. + * Add a memory barrier to guarantee that txdata->enabled is zeroed + * before napi_synchronize so that the next scheduled NAPI will not + * handle this vring + */ + wmb(); /* make sure NAPI won't touch this vring */ if (test_bit(wil_status_napi_en, wil->status)) napi_synchronize(&wil->napi_tx); wil_vring_free(wil, vring, 1); - memset(txdata, 0, sizeof(*txdata)); } static struct vring *wil_find_tx_ucast(struct wil6210_priv *wil, @@ -908,13 +1146,16 @@ continue; if (wil->vring2cid_tid[i][0] == cid) { struct vring *v = &wil->vring_tx[i]; + struct vring_tx_data *txdata = &wil->vring_tx_data[i]; - wil_dbg_txrx(wil, "%s(%pM) -> [%d]\n", - __func__, eth->h_dest, i); - if (v->va) { + wil_dbg_txrx(wil, "find_tx_ucast: (%pM) -> [%d]\n", + eth->h_dest, i); + if (v->va && txdata->enabled) { return v; } else { - wil_dbg_txrx(wil, "vring[%d] not valid\n", i); + wil_dbg_txrx(wil, + "find_tx_ucast: vring[%d] not valid\n", + i); return NULL; } } @@ -932,6 +1173,7 @@ struct vring *v; int i; u8 cid; + struct vring_tx_data *txdata; /* In the STA mode, it is expected to have only 1 VRING * for the AP we connected to. @@ -939,7 +1181,8 @@ */ for (i = 0; i < WIL6210_MAX_TX_RINGS; i++) { v = &wil->vring_tx[i]; - if (!v->va) + txdata = &wil->vring_tx_data[i]; + if (!v->va || !txdata->enabled) continue; cid = wil->vring2cid_tid[i][0]; @@ -975,12 +1218,14 @@ struct sk_buff *skb) { struct vring *v; + struct vring_tx_data *txdata; int i = wil->bcast_vring; if (i < 0) return NULL; v = &wil->vring_tx[i]; - if (!v->va) + txdata = &wil->vring_tx_data[i]; + if (!v->va || !txdata->enabled) return NULL; if (!wil->vring_tx_data[i].dot1x_open && (skb->protocol != cpu_to_be16(ETH_P_PAE))) @@ -1007,11 +1252,13 @@ u8 cid; struct ethhdr *eth = (void *)skb->data; char *src = eth->h_source; + struct vring_tx_data *txdata; /* find 1-st vring eligible for data */ for (i = 0; i < WIL6210_MAX_TX_RINGS; i++) { v = &wil->vring_tx[i]; - if (!v->va) + txdata = &wil->vring_tx_data[i]; + if (!v->va || !txdata->enabled) continue; cid = wil->vring2cid_tid[i][0]; @@ -1064,17 +1311,6 @@ return v; } -static struct vring *wil_find_tx_bcast(struct wil6210_priv *wil, - struct sk_buff *skb) -{ - struct wireless_dev *wdev = wil->wdev; - - if (wdev->iftype != NL80211_IFTYPE_AP) - return wil_find_tx_bcast_2(wil, skb); - - return wil_find_tx_bcast_1(wil, skb); -} - static int wil_tx_desc_map(struct vring_tx_desc *d, dma_addr_t pa, u32 len, int vring_index) { @@ -1244,8 +1480,8 @@ int gso_type; int rc = -EINVAL; - wil_dbg_txrx(wil, "%s() %d bytes to vring %d\n", - __func__, skb->len, vring_index); + wil_dbg_txrx(wil, "tx_vring_tso: %d bytes to vring %d\n", skb->len, + vring_index); if (unlikely(!txdata->enabled)) return -EINVAL; @@ -1452,13 +1688,20 @@ /* performance monitoring */ used = wil_vring_used_tx(vring); - if (wil_val_in_range(vring_idle_trsh, + if (wil_val_in_range(wil->vring_idle_trsh, used, used + descs_used)) { txdata->idle += get_cycles() - txdata->last_idle; wil_dbg_txrx(wil, "Ring[%2d] not idle %d -> %d\n", vring_index, used, used + descs_used); } + /* Make sure to advance the head only after descriptor update is done. + * This will prevent a race condition where the completion thread + * will see the DU bit set from previous run and will handle the + * skb before it was completed. + */ + wmb(); + /* advance swhead */ wil_vring_advance_head(vring, descs_used); wil_dbg_txrx(wil, "TSO: Tx swhead %d -> %d\n", swhead, vring->swhead); @@ -1475,7 +1718,7 @@ while (descs_used > 0) { struct wil_ctx *ctx; - i = (swhead + descs_used) % vring->size; + i = (swhead + descs_used - 1) % vring->size; d = (struct vring_tx_desc *)&vring->va[i].tx; _desc = &vring->va[i].tx; *d = *_desc; @@ -1507,8 +1750,8 @@ bool mcast = (vring_index == wil->bcast_vring); uint len = skb_headlen(skb); - wil_dbg_txrx(wil, "%s() %d bytes to vring %d\n", - __func__, skb->len, vring_index); + wil_dbg_txrx(wil, "tx_vring: %d bytes to vring %d\n", skb->len, + vring_index); if (unlikely(!txdata->enabled)) return -EINVAL; @@ -1592,13 +1835,20 @@ /* performance monitoring */ used = wil_vring_used_tx(vring); - if (wil_val_in_range(vring_idle_trsh, + if (wil_val_in_range(wil->vring_idle_trsh, used, used + nr_frags + 1)) { txdata->idle += get_cycles() - txdata->last_idle; wil_dbg_txrx(wil, "Ring[%2d] not idle %d -> %d\n", vring_index, used, used + nr_frags + 1); } + /* Make sure to advance the head only after descriptor update is done. + * This will prevent a race condition where the completion thread + * will see the DU bit set from previous run and will handle the + * skb before it was completed. + */ + wmb(); + /* advance swhead */ wil_vring_advance_head(vring, nr_frags + 1); wil_dbg_txrx(wil, "Tx[%2d] swhead %d -> %d\n", vring_index, swhead, @@ -1641,6 +1891,15 @@ spin_lock(&txdata->lock); + if (test_bit(wil_status_suspending, wil->status) || + test_bit(wil_status_suspended, wil->status) || + test_bit(wil_status_resuming, wil->status)) { + wil_dbg_txrx(wil, + "suspend/resume in progress. drop packet\n"); + spin_unlock(&txdata->lock); + return -EINVAL; + } + rc = (skb_is_gso(skb) ? __wil_tx_vring_tso : __wil_tx_vring) (wil, vring, skb); @@ -1649,6 +1908,94 @@ return rc; } +/** + * Check status of tx vrings and stop/wake net queues if needed + * + * This function does one of two checks: + * In case check_stop is true, will check if net queues need to be stopped. If + * the conditions for stopping are met, netif_tx_stop_all_queues() is called. + * In case check_stop is false, will check if net queues need to be waked. If + * the conditions for waking are met, netif_tx_wake_all_queues() is called. + * vring is the vring which is currently being modified by either adding + * descriptors (tx) into it or removing descriptors (tx complete) from it. Can + * be null when irrelevant (e.g. connect/disconnect events). + * + * The implementation is to stop net queues if modified vring has low + * descriptor availability. Wake if all vrings are not in low descriptor + * availability and modified vring has high descriptor availability. + */ +static inline void __wil_update_net_queues(struct wil6210_priv *wil, + struct vring *vring, + bool check_stop) +{ + int i; + + if (vring) + wil_dbg_txrx(wil, "vring %d, check_stop=%d, stopped=%d", + (int)(vring - wil->vring_tx), check_stop, + wil->net_queue_stopped); + else + wil_dbg_txrx(wil, "check_stop=%d, stopped=%d", + check_stop, wil->net_queue_stopped); + + if (check_stop == wil->net_queue_stopped) + /* net queues already in desired state */ + return; + + if (check_stop) { + if (!vring || unlikely(wil_vring_avail_low(vring))) { + /* not enough room in the vring */ + netif_tx_stop_all_queues(wil_to_ndev(wil)); + wil->net_queue_stopped = true; + wil_dbg_txrx(wil, "netif_tx_stop called\n"); + } + return; + } + + /* Do not wake the queues in suspend flow */ + if (test_bit(wil_status_suspending, wil->status) || + test_bit(wil_status_suspended, wil->status)) + return; + + /* check wake */ + for (i = 0; i < WIL6210_MAX_TX_RINGS; i++) { + struct vring *cur_vring = &wil->vring_tx[i]; + struct vring_tx_data *txdata = &wil->vring_tx_data[i]; + + if (!cur_vring->va || !txdata->enabled || cur_vring == vring) + continue; + + if (wil_vring_avail_low(cur_vring)) { + wil_dbg_txrx(wil, "vring %d full, can't wake\n", + (int)(cur_vring - wil->vring_tx)); + return; + } + } + + if (!vring || wil_vring_avail_high(vring)) { + /* enough room in the vring */ + wil_dbg_txrx(wil, "calling netif_tx_wake\n"); + netif_tx_wake_all_queues(wil_to_ndev(wil)); + wil->net_queue_stopped = false; + } +} + +void wil_update_net_queues(struct wil6210_priv *wil, struct vring *vring, + bool check_stop) +{ + spin_lock(&wil->net_queue_lock); + __wil_update_net_queues(wil, vring, check_stop); + spin_unlock(&wil->net_queue_lock); +} + +void wil_update_net_queues_bh(struct wil6210_priv *wil, struct vring *vring, + bool check_stop) +{ + spin_lock_bh(&wil->net_queue_lock); + __wil_update_net_queues(wil, vring, check_stop); + spin_unlock_bh(&wil->net_queue_lock); +} + netdev_tx_t wil_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct wil6210_priv *wil = ndev_to_wil(ndev); @@ -1658,7 +2005,7 @@ static bool pr_once_fw; int rc; - wil_dbg_txrx(wil, "%s()\n", __func__); + wil_dbg_txrx(wil, "start_xmit\n"); if (unlikely(!test_bit(wil_status_fwready, wil->status))) { if (!pr_once_fw) { wil_err(wil, "FW not ready\n"); @@ -1667,7 +2014,7 @@ goto drop; } if (unlikely(!test_bit(wil_status_fwconnected, wil->status))) { - wil_err_ratelimited(wil, "FW not connected\n"); + wil_dbg_ratelimited(wil, "FW not connected, packet dropped\n"); goto drop; } if (unlikely(wil->wdev->iftype == NL80211_IFTYPE_MONITOR)) { @@ -1677,12 +2024,26 @@ pr_once_fw = false; /* find vring */ - if (wil->wdev->iftype == NL80211_IFTYPE_STATION) { - /* in STA mode (ESS), all to same VRING */ + if (wil->wdev->iftype == NL80211_IFTYPE_STATION && !wil->pbss) { + /* in STA mode (ESS), all to same VRING (to AP) */ vring = wil_find_tx_vring_sta(wil, skb); - } else { /* direct communication, find matching VRING */ - vring = bcast ? wil_find_tx_bcast(wil, skb) : - wil_find_tx_ucast(wil, skb); + } else if (bcast) { + if (wil->pbss) + /* in pbss, no bcast VRING - duplicate skb in + * all stations VRINGs + */ + vring = wil_find_tx_bcast_2(wil, skb); + else if (wil->wdev->iftype == NL80211_IFTYPE_AP) + /* AP has a dedicated bcast VRING */ + vring = wil_find_tx_bcast_1(wil, skb); + else + /* unexpected combination, fallback to duplicating + * the skb in all stations VRINGs + */ + vring = wil_find_tx_bcast_2(wil, skb); + } else { + /* unicast, find specific VRING by dest. address */ + vring = wil_find_tx_ucast(wil, skb); } if (unlikely(!vring)) { wil_dbg_txrx(wil, "No Tx VRING found for %pM\n", eth->h_dest); @@ -1691,14 +2052,10 @@ /* set up vring entry */ rc = wil_tx_vring(wil, vring, skb); - /* do we still have enough room in the vring? */ - if (unlikely(wil_vring_avail_tx(vring) < wil_vring_wmark_low(vring))) { - netif_tx_stop_all_queues(wil_to_ndev(wil)); - wil_dbg_txrx(wil, "netif_tx_stop : ring full\n"); - } - switch (rc) { case 0: + /* shall we stop net queues? */ + wil_update_net_queues_bh(wil, vring, true); /* statistics will be updated on the tx_complete */ dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -1760,7 +2117,7 @@ return 0; } - wil_dbg_txrx(wil, "%s(%d)\n", __func__, ringid); + wil_dbg_txrx(wil, "tx_complete: (%d)\n", ringid); used_before_complete = wil_vring_used_tx(vring); @@ -1822,6 +2179,12 @@ wil_consume_skb(skb, d->dma.error == 0); } memset(ctx, 0, sizeof(*ctx)); + /* Make sure the ctx is zeroed before updating the tail + * to prevent a case where wil_tx_vring will see + * this descriptor as used and handle it before ctx zero + * is completed. + */ + wmb(); /* There is no need to touch HW descriptor: * - ststus bit TX_DMA_STATUS_DU is set by design, * so hardware will not try to process this desc., @@ -1834,17 +2197,16 @@ /* performance monitoring */ used_new = wil_vring_used_tx(vring); - if (wil_val_in_range(vring_idle_trsh, + if (wil_val_in_range(wil->vring_idle_trsh, used_new, used_before_complete)) { wil_dbg_txrx(wil, "Ring[%2d] idle %d -> %d\n", ringid, used_before_complete, used_new); txdata->last_idle = get_cycles(); } - if (wil_vring_avail_tx(vring) > wil_vring_wmark_high(vring)) { - wil_dbg_txrx(wil, "netif_tx_wake : ring not full\n"); - netif_tx_wake_all_queues(wil_to_ndev(wil)); - } + /* shall we wake net queues? */ + if (done) + wil_update_net_queues(wil, vring, false); return done; }