--- zzzz-none-000/linux-5.4.213/net/core/dev.c 2022-09-15 10:04:56.000000000 +0000 +++ miami-7690-761/linux-5.4.213/net/core/dev.c 2024-05-29 11:20:02.000000000 +0000 @@ -97,6 +97,7 @@ #include #include #include +#include #include #include #include @@ -145,6 +146,7 @@ #include #include "net-sysfs.h" +#include "skbuff_debug.h" #define MAX_GRO_SKBS 8 @@ -156,6 +158,7 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; +static struct workqueue_struct *napi_workq __read_mostly; static int netif_rx_internal(struct sk_buff *skb); static int call_netdevice_notifiers_info(unsigned long val, @@ -283,7 +286,6 @@ * *******************************************************************************/ - /* * Add a protocol ID to the list. Now that the input handler is * smarter we can dispense with all the messy stuff that used to be @@ -385,7 +387,6 @@ } EXPORT_SYMBOL(dev_remove_pack); - /** * dev_add_offload - register offload handlers * @po: protocol offload declaration @@ -528,7 +529,6 @@ } EXPORT_SYMBOL(netdev_boot_setup_check); - /** * netdev_boot_base - get address from boot time settings * @prefix: prefix for network device @@ -762,7 +762,6 @@ } EXPORT_SYMBOL(dev_get_by_index_rcu); - /** * dev_get_by_index - find a device by its ifindex * @net: the applicable net namespace @@ -1457,7 +1456,6 @@ } EXPORT_SYMBOL(dev_close); - /** * dev_disable_lro - disable Large Receive Offload on a device * @dev: device @@ -1514,7 +1512,7 @@ N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN) N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO) N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO) - N(PRE_CHANGEADDR) + N(PRE_CHANGEADDR) N(BR_JOIN) N(BR_LEAVE) } #undef N return "UNKNOWN_NETDEV_EVENT"; @@ -2740,7 +2738,6 @@ } EXPORT_SYMBOL(__dev_kfree_skb_any); - /** * netif_device_detach - mark device as removed * @dev: network device @@ -2970,7 +2967,6 @@ } EXPORT_SYMBOL(skb_mac_gso_segment); - /* openvswitch calls this on rx path, so we need a different check. */ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) @@ -3190,19 +3186,293 @@ } EXPORT_SYMBOL(netif_skb_features); +/** + * netdev_sawf_deinit - free sawf statistics. + * @dev: Device to free sawf statistics. + * + * Returns true on success, false on failure. + */ +bool netdev_sawf_deinit(struct net_device *dev) +{ + struct pcpu_sawf_stats __percpu *stats_to_delete; + + if ((!dev->sawf_stats)) { + return false; + } + + stats_to_delete = dev->sawf_stats; + dev->sawf_stats = NULL; + + free_percpu(stats_to_delete); + + return true; +} +EXPORT_SYMBOL(netdev_sawf_deinit); + +/** + * netdev_sawf_init - Allocate netdev SAWF statistics. + * @dev: Device to allocate statistics on. + * @mode: Initial flags to be set. + */ +bool netdev_sawf_init(struct net_device *dev, uint16_t mode) +{ + int cpu; + + if (dev->sawf_stats) { + return false; + } + + dev->sawf_stats = netdev_alloc_pcpu_stats(struct pcpu_sawf_stats); + if (!dev->sawf_stats) { + return false; + } + + for_each_possible_cpu(cpu) { + struct pcpu_sawf_stats *stats = per_cpu_ptr(dev->sawf_stats, cpu); + memset(stats, 0, sizeof(*stats)); + } + + dev->sawf_flags = mode; + + return true; +} +EXPORT_SYMBOL(netdev_sawf_init); + +/** + * netdev_sawf_flags_update - Set SAWF flags. + * @dev: Device to update + * @flags: New value of flags + */ +bool netdev_sawf_flags_update(struct net_device *dev, uint16_t flags) +{ + if (!dev->sawf_stats) { + return false; + } + + dev->sawf_flags = flags; + + return true; +} +EXPORT_SYMBOL(netdev_sawf_flags_update); + +/** + * netdev_sawf_enable - Re-enable SAWF statistics. + * @dev: Device to enable. + */ +bool netdev_sawf_enable(struct net_device *dev) +{ + int cpu; + if (!dev->sawf_stats) { + return false; + } + + for_each_possible_cpu(cpu) { + struct pcpu_sawf_stats *stats = per_cpu_ptr(dev->sawf_stats, cpu); + memset(stats, 0, sizeof(*stats)); + } + + dev->sawf_flags |= NETDEV_SAWF_FLAG_ENABLED; + + return true; +} +EXPORT_SYMBOL(netdev_sawf_enable); + +/** + * netdev_sawf_disable - Disable SAWF statistics collection. + * @dev: device to disable statistics. + */ +bool netdev_sawf_disable(struct net_device *dev) +{ + if (!dev->sawf_stats) { + return false; + } + + dev->sawf_flags &= ~NETDEV_SAWF_FLAG_ENABLED; + + return true; +} +EXPORT_SYMBOL(netdev_sawf_disable); + +/** + * netdev_sawf_debug_set - Sets the debug service class. + * @dev: Device to configure + * @sid: Service class ID to keep debug information. + */ +bool netdev_sawf_debug_set(struct net_device *dev, uint8_t sid) +{ + int cpu; + + if (!dev->sawf_stats) { + return false; + } + + for_each_possible_cpu(cpu) { + struct pcpu_sawf_stats *stats = per_cpu_ptr(dev->sawf_stats, cpu); + stats->debug_lat_max = 0; + stats->debug_lat_min = 0; + stats->debug_lat_ewma = 0; + stats->debug_lat_last = 0; + } + + dev->sawf_flags = (dev->sawf_flags & ~(NETDEV_SAWF_FLAG_DEBUG_MASK)) | (sid << NETDEV_SAWF_FLAG_DEBUG_SHIFT) | (NETDEV_SAWF_FLAG_DEBUG); + + return true; +} +EXPORT_SYMBOL(netdev_sawf_debug_set); + +/** + * netdev_sawf_debug_set - Clears the debug service class. + * @dev: Device to configure + */ +bool netdev_sawf_debug_unset(struct net_device *dev) +{ + if (!dev->sawf_stats) { + return false; + } + + dev->sawf_flags &= ~NETDEV_SAWF_FLAG_DEBUG; + + return true; +} +EXPORT_SYMBOL(netdev_sawf_debug_unset); + +/** + * netdev_sawf_debug_get - Gets the debug SAWF information. + * @dev: Device to read debug information + * @sid: Pointer where service class id is written + * @max: Pointer where max latency is written + * @min: Pointer where min latency is written + * @avg: Pointer where average (exponential moving average) is written + * @last: Pointer where last latency value is written. + */ +bool netdev_sawf_debug_get(struct net_device *dev, uint8_t *sid, uint32_t *max, uint32_t *min, uint32_t *avg, uint32_t *last) +{ + uint32_t cpu, avg_sum = 0, avg_count = 0; + + if (!dev->sawf_stats || !(dev->sawf_flags & NETDEV_SAWF_FLAG_DEBUG)) { + return false; + } + + /* + * Initialize minimum to max value of uint32 so any valid value is less than it. + * Initialize maximum to 0 so any valid value is greater than it. + */ + *min = 0xFFFFFFFF; + *max = 0; + + *sid = dev->sawf_flags >> NETDEV_SAWF_FLAG_DEBUG_SHIFT; + for_each_possible_cpu(cpu) { + struct pcpu_sawf_stats *sawf_stats = per_cpu_ptr(dev->sawf_stats, cpu); + + if (*min > sawf_stats->debug_lat_min && sawf_stats->debug_lat_min != 0) { + *min = sawf_stats->debug_lat_min; + } + + if (*max < sawf_stats->debug_lat_max) { + *max = sawf_stats->debug_lat_max; + } + + if (sawf_stats->debug_lat_last) { + *last = sawf_stats->debug_lat_last; + } + + if (sawf_stats->debug_lat_ewma) { + avg_sum += sawf_stats->debug_lat_ewma; + avg_count++; + } + } + + if (avg_count) { + *avg = avg_sum / avg_count; + } + + /* + * If minimum hasn't been updated, set it to 0. + */ + if (*min == 0xFFFFFFFF) { + *min = 0; + } + + return true; +} +EXPORT_SYMBOL(netdev_sawf_debug_get); + +/** + * netdev_sawf_debug_get - Gets latency statistics for a service class. + * @dev: Device to read latency statistics + * @sid: Service class ID to get + * @hist: Pointer to array where histogram data is written. + * @avg: Pointer where mean latency is written. + */ +bool netdev_sawf_lat_get(struct net_device *dev, uint8_t sid, uint64_t *hist, uint64_t *avg) +{ + uint32_t bucket = 0, cpu = 0; + uint64_t total_lat = 0, total_packets = 0; + + if (!dev->sawf_stats) { + return false; + } + + if (!(dev->sawf_flags & NETDEV_SAWF_FLAG_ENABLED)) { + return false; + } + + for (bucket = 0; bucket < NETDEV_SAWF_DELAY_BUCKETS; bucket++) { + hist[bucket] = 0; + } + + for_each_possible_cpu(cpu) { + unsigned int start; + struct pcpu_sawf_stats *sawf_stats = per_cpu_ptr(dev->sawf_stats, cpu); + do { + start = u64_stats_fetch_begin(&sawf_stats->syncp); + for (bucket = 0; bucket < NETDEV_SAWF_DELAY_BUCKETS; bucket++) { + hist[bucket] += sawf_stats->delay[sid][bucket]; + } + + total_packets += sawf_stats->tx_packets[sid]; + total_lat += sawf_stats->total_delay[sid]; + } while (u64_stats_fetch_retry(&sawf_stats->syncp, start)); + } + + *avg = div64_u64(total_lat, total_packets); + return true; +} +EXPORT_SYMBOL(netdev_sawf_lat_get); + static int xmit_one(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, bool more) { unsigned int len; int rc; - if (dev_nit_active(dev)) - dev_queue_xmit_nit(skb, dev); + /* At this point all offload features are handled and the skb is + * optimized for the driver. + */ + avm_pa_dev_snoop_transmit(AVM_PA_DEVINFO(dev), skb); - len = skb->len; - trace_net_dev_start_xmit(skb, dev); - rc = netdev_start_xmit(skb, dev, txq, more); - trace_net_dev_xmit(skb, rc, dev, len); + /* If this skb has been fast forwarded then we don't want it to + * go to any taps (by definition we're trying to bypass them). + */ + if (unlikely(!skb->fast_forwarded)) { + if (dev_nit_active(dev)) + dev_queue_xmit_nit(skb, dev); + } + +#ifdef CONFIG_ETHERNET_PACKET_MANGLE + if (!dev->eth_mangle_tx || + (skb = dev->eth_mangle_tx(dev, skb)) != NULL) +#else + if (1) +#endif + { + len = skb->len; + trace_net_dev_start_xmit(skb, dev); + rc = netdev_start_xmit(skb, dev, txq, more); + trace_net_dev_xmit(skb, rc, dev, len); + } else { + rc = NETDEV_TX_OK; + } return rc; } @@ -3215,7 +3485,6 @@ while (skb) { struct sk_buff *next = skb->next; - skb_mark_not_on_list(skb); rc = xmit_one(skb, dev, txq, next != NULL); if (unlikely(!dev_xmit_complete(rc))) { @@ -3379,6 +3648,60 @@ } } +static inline int __dev_xmit_skb_qdisc(struct sk_buff *skb, struct Qdisc *q, + struct net_device *top_qdisc_dev, + struct netdev_queue *top_txq) +{ + spinlock_t *root_lock = qdisc_lock(q); + struct sk_buff *to_free = NULL; + bool contended; + int rc; + + qdisc_calculate_pkt_len(skb, q); + + if (q->flags & TCQ_F_NOLOCK) { + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + if (likely(!netif_xmit_frozen_or_stopped(top_txq))) + qdisc_run(q); + + if (unlikely(to_free)) + kfree_skb_list(to_free); + return rc; + } + + /* + * Heuristic to force contended enqueues to serialize on a + * separate lock before trying to get qdisc main lock. + * This permits qdisc->running owner to get the lock more + * often and dequeue packets faster. + */ + contended = qdisc_is_running(q); + if (unlikely(contended)) + spin_lock(&q->busylock); + + spin_lock(root_lock); + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { + __qdisc_drop(skb, &to_free); + rc = NET_XMIT_DROP; + } else { + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + if (qdisc_run_begin(q)) { + if (unlikely(contended)) { + spin_unlock(&q->busylock); + contended = false; + } + __qdisc_run(q); + qdisc_run_end(q); + } + } + spin_unlock(root_lock); + if (unlikely(to_free)) + kfree_skb_list(to_free); + if (unlikely(contended)) + spin_unlock(&q->busylock); + return rc; +} + static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq) @@ -3435,6 +3758,7 @@ qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { + avm_pa_mark_shaped(skb); rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { if (unlikely(contended)) { @@ -3678,6 +4002,211 @@ } /** + * dev_fast_xmit_vp - fast xmit the skb to a PPE virtual port + * @skb:buffer to transmit + * @dev: the device to be transmited to + * sucessful return true + * failed return false + */ +bool dev_fast_xmit_vp(struct sk_buff *skb, + struct net_device *dev) +{ + struct netdev_queue *txq; + int cpu; + netdev_tx_t rc; + + if (unlikely(!(dev->flags & IFF_UP))) { + return false; + } + + if (unlikely(skb_is_nonlinear(skb))) { + return false; + } + + rcu_read_lock_bh(); + cpu = smp_processor_id(); + + /* + * TODO: Skip this altogether and eventually move this call to ppe_vp + * this would avoid multiple function calls when giving packet to wifi VAP. + */ + txq = netdev_core_pick_tx(dev, skb, NULL); + + if (likely(txq->xmit_lock_owner != cpu)) { +#define FAST_VP_HARD_TX_LOCK(txq, cpu) { \ + __netif_tx_lock(txq, cpu); \ +} + +#define FAST_VP_HARD_TX_UNLOCK(txq) { \ + __netif_tx_unlock(txq); \ +} + skb->fast_xmit = 1; + FAST_VP_HARD_TX_LOCK(txq, cpu); + if (likely(!netif_xmit_stopped(txq))) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (unlikely(!dev_xmit_complete(rc))) { + FAST_VP_HARD_TX_UNLOCK(txq); + goto q_xmit; + } + FAST_VP_HARD_TX_UNLOCK(txq); + rcu_read_unlock_bh(); + return true; + } + FAST_VP_HARD_TX_UNLOCK(txq); + } +q_xmit: + skb->fast_xmit = 0; + rcu_read_unlock_bh(); + return false; +} +EXPORT_SYMBOL(dev_fast_xmit_vp); + +/** + * dev_fast_xmit_qdisc - fast xmit the skb along with qdisc processing + * @skb:buffer to transmit + * @top_qdisc_dev: the top device on which qdisc is enabled. + * @bottom_dev: the device on which transmission should happen after qdisc processing. + * sucessful return true + * failed return false + */ +bool dev_fast_xmit_qdisc(struct sk_buff *skb, struct net_device *top_qdisc_dev, struct net_device *bottom_dev) +{ + struct netdev_queue *txq; + struct Qdisc *q; + int rc = -ENOMEM; + + if (unlikely(!(top_qdisc_dev->flags & IFF_UP))) { + return false; + } + + skb_reset_mac_header(skb); + + /* Disable soft irqs for various locks below. Also + * stops preemption for RCU. + */ + rcu_read_lock_bh(); + + txq = netdev_core_pick_tx(top_qdisc_dev, skb, NULL); + q = rcu_dereference_bh(txq->qdisc); + if (unlikely(!q->enqueue)) { + rcu_read_unlock_bh(); + return false; + } + + skb_update_prio(skb); + + qdisc_pkt_len_init(skb); +#ifdef CONFIG_NET_CLS_ACT + skb->tc_at_ingress = 0; +# ifdef CONFIG_NET_EGRESS + if (static_branch_unlikely(&egress_needed_key)) { + skb = sch_handle_egress(skb, &rc, top_qdisc_dev); + if (!skb) + goto out; + } +# endif +#endif + /* If device/qdisc don't need skb->dst, release it right now while + * its hot in this cpu cache. + * TODO: do we need this ? + */ + if (top_qdisc_dev->priv_flags & IFF_XMIT_DST_RELEASE) + skb_dst_drop(skb); + else + skb_dst_force(skb); + + trace_net_dev_queue(skb); + + /* Update the dev so that we can transmit to bottom device after qdisc */ + skb->dev = bottom_dev; + skb->fast_qdisc = 1; + rc = __dev_xmit_skb_qdisc(skb, q, top_qdisc_dev, txq); + +out: + rcu_read_unlock_bh(); + return true; +} +EXPORT_SYMBOL(dev_fast_xmit_qdisc); + +/** + * dev_fast_xmit - fast xmit the skb + * @skb:buffer to transmit + * @dev: the device to be transmited to + * @features: the skb features could bed used + * sucessful return true + * failed return false + */ +bool dev_fast_xmit(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + struct netdev_queue *txq; + int cpu; + netdev_tx_t rc; + + /* the fast_xmit flag will avoid multiple checks in wifi xmit path */ + if (likely(!skb_is_nonlinear(skb))) + skb->fast_xmit = 1; + + if (unlikely(!(dev->flags & IFF_UP))) { + return false; + } + + if (unlikely(skb_needs_linearize(skb, features))) { + return false; + } + + rcu_read_lock_bh(); + cpu = smp_processor_id(); + + /* If device don't need the dst, release it now, otherwise make sure + * the refcount increased. + */ + if (likely(dev->priv_flags & IFF_XMIT_DST_RELEASE)) { + skb_dst_drop(skb); + } else { + skb_dst_force(skb); + } + + txq = netdev_core_pick_tx(dev, skb, NULL); + + if (likely(txq->xmit_lock_owner != cpu)) { +#define FAST_HARD_TX_LOCK(features, txq, cpu) { \ + if ((features & NETIF_F_LLTX) == 0) { \ + __netif_tx_lock(txq, cpu); \ + } else { \ + __netif_tx_acquire(txq); \ + } \ +} + +#define FAST_HARD_TX_UNLOCK(features, txq) { \ + if ((features & NETIF_F_LLTX) == 0) { \ + __netif_tx_unlock(txq); \ + } else { \ + __netif_tx_release(txq); \ + } \ +} + netdev_features_t dev_features = dev->features; + FAST_HARD_TX_LOCK(dev_features, txq, cpu); + if (likely(!netif_xmit_stopped(txq))) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (unlikely(!dev_xmit_complete(rc))) { + FAST_HARD_TX_UNLOCK(dev_features, txq); + goto fail; + } + FAST_HARD_TX_UNLOCK(dev_features, txq); + rcu_read_unlock_bh(); + return true; + } + FAST_HARD_TX_UNLOCK(dev_features, txq); + } +fail: + rcu_read_unlock_bh(); + return false; +} +EXPORT_SYMBOL(dev_fast_xmit); + +/** * __dev_queue_xmit - transmit a buffer * @skb: buffer to transmit * @sb_dev: suboordinate device used for L2 forwarding offload @@ -3714,6 +4243,12 @@ skb_reset_mac_header(skb); skb_assert_len(skb); + /* + * if the skb landed in dev_queue_xmit then its not fast transmitted + * reset this flag for further processing. + */ + skb->fast_xmit = 0; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED); @@ -4408,11 +4943,23 @@ } EXPORT_SYMBOL_GPL(do_xdp_generic); +static inline void netif_sawf_timestamp(struct sk_buff *skb, struct net_device *dev) +{ + if (!(dev->sawf_flags & NETDEV_SAWF_FLAG_RX_LAT)) { + __net_timestamp(skb); + } +} + static int netif_rx_internal(struct sk_buff *skb) { int ret; + struct net_device *dev = skb->dev; - net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + if (dev->sawf_flags & NETDEV_SAWF_FLAG_ENABLED) { + netif_sawf_timestamp(skb, dev); + } else { + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + } trace_netif_rx(skb); @@ -4704,6 +5251,30 @@ } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); +int (*athrs_fast_nat_recv)(struct sk_buff *skb) __rcu __read_mostly; +EXPORT_SYMBOL_GPL(athrs_fast_nat_recv); + +#ifdef CONFIG_AVM_RECV_HOOKS +static int (*avm_recvhook)(struct sk_buff *skb) __read_mostly; +static int (*avm_early_recvhook)(struct sk_buff *skb) __read_mostly; +#endif + +void set_avm_recvhook(int (*recvhook)(struct sk_buff *skb)) +{ +#ifdef CONFIG_AVM_RECV_HOOKS + avm_recvhook = recvhook; +#endif +} +EXPORT_SYMBOL(set_avm_recvhook); + +void set_avm_early_recvhook(int (*recvhook)(struct sk_buff *skb)) +{ +#ifdef CONFIG_AVM_RECV_HOOKS + avm_early_recvhook = recvhook; +#endif +} +EXPORT_SYMBOL(set_avm_early_recvhook); + /* * Limit the use of PFMEMALLOC reserves to those protocols that implement * the special handling of PFMEMALLOC skbs. @@ -4753,6 +5324,7 @@ bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; + int (*fast_recv)(struct sk_buff *skb); net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb); @@ -4786,6 +5358,14 @@ skb_reset_mac_len(skb); } + fast_recv = rcu_dereference(athrs_fast_nat_recv); + if (fast_recv) { + if (fast_recv(skb)) { + ret = NET_RX_SUCCESS; + goto out; + } + } + if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); @@ -4827,6 +5407,25 @@ if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; +#ifdef CONFIG_AVM_NET_DEBUG_SKBUFF_LEAK + skb_track_funccall(skb, avm_pa_dev_receive); +#endif + + if (avm_pa_dev_receive(AVM_PA_DEVINFO(skb->dev), skb) == 0) { + ret = NET_RX_SUCCESS; + goto out; + } + +#ifdef CONFIG_AVM_RECV_HOOKS + if (avm_early_recvhook && (*avm_early_recvhook)(skb)) { + /* + * paket consumed by hook + */ + ret = NET_RX_SUCCESS; + goto out; + } +#endif + if (skb_vlan_tag_present(skb)) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); @@ -4859,6 +5458,16 @@ } } +#ifdef CONFIG_AVM_RECV_HOOKS + if (avm_recvhook && (*avm_recvhook)(skb)) { + /* + * paket consumed by hook + */ + ret = NET_RX_SUCCESS; + goto out; + } +#endif + if (unlikely(skb_vlan_tag_present(skb))) { check_vlan_id: if (skb_vlan_tag_get_id(skb)) { @@ -5136,7 +5745,12 @@ { int ret; - net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + struct net_device *dev = skb->dev; + if (dev->sawf_flags & NETDEV_SAWF_FLAG_ENABLED) { + netif_sawf_timestamp(skb, dev); + } else { + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + } if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; @@ -5166,7 +5780,13 @@ INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { - net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + struct net_device *dev = skb->dev; + if (dev->sawf_flags & NETDEV_SAWF_FLAG_ENABLED) { + netif_sawf_timestamp(skb, dev); + } else { + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + } + skb_list_del_init(skb); if (!skb_defer_rx_timestamp(skb)) list_add_tail(&skb->list, &sublist); @@ -5436,8 +6056,7 @@ NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; - if (skb_mac_header(skb) == skb_tail_pointer(skb) && - pinfo->nr_frags && + if (!skb_headlen(skb) && pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0)) && (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); @@ -5503,7 +6122,10 @@ int same_flow; int grow; - if (netif_elide_gro(skb->dev)) + if (skb->gro_skip) + goto normal; + + if (netif_elide_gro(skb->dev) || avm_pa_dev_elide_gro(AVM_PA_DEVINFO(skb->dev), skb)) goto normal; gro_head = gro_list_prepare(napi, skb); @@ -5650,10 +6272,13 @@ break; case GRO_MERGED_FREE: - if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD){ napi_skb_free_stolen_head(skb); - else + skbuff_debugobj_deactivate(skb); + } + else{ __kfree_skb(skb); + } break; case GRO_HELD: @@ -5942,6 +6567,11 @@ { unsigned long flags; + if (test_bit(NAPI_STATE_THREADED, &n->state)) { + queue_work(napi_workq, &n->work); + return; + } + local_irq_save(flags); ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); @@ -6257,6 +6887,84 @@ napi->gro_bitmask = 0; } +static int __napi_poll(struct napi_struct *n, bool *repoll) +{ + int work, weight; + + weight = n->weight; + + /* This NAPI_STATE_SCHED test is for avoiding a race + * with netpoll's poll_napi(). Only the entity which + * obtains the lock and sees NAPI_STATE_SCHED set will + * actually make the ->poll() call. Therefore we avoid + * accidentally calling ->poll() when NAPI is not scheduled. + */ + work = 0; + if (test_bit(NAPI_STATE_SCHED, &n->state)) { + work = n->poll(n, weight); + trace_napi_poll(n, work, weight); + } + + WARN_ON_ONCE(work > weight); + + if (likely(work < weight)) + return work; + + /* Drivers must not modify the NAPI state if they + * consume the entire weight. In such cases this code + * still "owns" the NAPI instance and therefore can + * move the instance around on the list at-will. + */ + if (unlikely(napi_disable_pending(n))) { + napi_complete(n); + return work; + } + + if (n->gro_bitmask) { + /* flush too old packets + * If HZ < 1000, flush all packets. + */ + napi_gro_flush(n, HZ >= 1000); + } + + gro_normal_list(n); + + *repoll = true; + + return work; +} + +static void napi_workfn(struct work_struct *work) +{ + struct napi_struct *n = container_of(work, struct napi_struct, work); + void *have; + + for (;;) { + bool repoll = false; + + local_bh_disable(); + + have = netpoll_poll_lock(n); + __napi_poll(n, &repoll); + netpoll_poll_unlock(have); + + local_bh_enable(); + + if (!repoll) + return; + + if (!need_resched()) + continue; + + /* + * have to pay for the latency of task switch even if + * napi is scheduled + */ + queue_work(napi_workq, work); + return; + } +} + void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6276,6 +6984,7 @@ #ifdef CONFIG_NETPOLL napi->poll_owner = -1; #endif + INIT_WORK(&napi->work, napi_workfn); set_bit(NAPI_STATE_SCHED, &napi->state); set_bit(NAPI_STATE_NPSVC, &napi->state); list_add_rcu(&napi->dev_list, &dev->napi_list); @@ -6316,6 +7025,7 @@ void netif_napi_del(struct napi_struct *napi) { might_sleep(); + cancel_work_sync(&napi->work); if (napi_hash_del(napi)) synchronize_net(); list_del_init(&napi->dev_list); @@ -6328,51 +7038,19 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) { + bool do_repoll = false; void *have; - int work, weight; + int work; list_del_init(&n->poll_list); have = netpoll_poll_lock(n); - weight = n->weight; - - /* This NAPI_STATE_SCHED test is for avoiding a race - * with netpoll's poll_napi(). Only the entity which - * obtains the lock and sees NAPI_STATE_SCHED set will - * actually make the ->poll() call. Therefore we avoid - * accidentally calling ->poll() when NAPI is not scheduled. - */ - work = 0; - if (test_bit(NAPI_STATE_SCHED, &n->state)) { - work = n->poll(n, weight); - trace_napi_poll(n, work, weight); - } - - WARN_ON_ONCE(work > weight); + work = __napi_poll(n, &do_repoll); - if (likely(work < weight)) + if (!do_repoll) goto out_unlock; - /* Drivers must not modify the NAPI state if they - * consume the entire weight. In such cases this code - * still "owns" the NAPI instance and therefore can - * move the instance around on the list at-will. - */ - if (unlikely(napi_disable_pending(n))) { - napi_complete(n); - goto out_unlock; - } - - if (n->gro_bitmask) { - /* flush too old packets - * If HZ < 1000, flush all packets. - */ - napi_gro_flush(n, HZ >= 1000); - } - - gro_normal_list(n); - /* Some drivers may have called napi_schedule * prior to exhausting their budget. */ @@ -7305,6 +7983,48 @@ &upper_dev->adj_list.lower); } +static void __netdev_addr_mask(unsigned char *mask, const unsigned char *addr, + struct net_device *dev) +{ + int i; + + for (i = 0; i < dev->addr_len; i++) + mask[i] |= addr[i] ^ dev->dev_addr[i]; +} + +static void __netdev_upper_mask(unsigned char *mask, struct net_device *dev, + struct net_device *lower) +{ + struct net_device *cur; + struct list_head *iter; + + netdev_for_each_upper_dev_rcu(dev, cur, iter) { + __netdev_addr_mask(mask, cur->dev_addr, lower); + __netdev_upper_mask(mask, cur, lower); + } +} + +static void __netdev_update_addr_mask(struct net_device *dev) +{ + unsigned char mask[MAX_ADDR_LEN]; + struct net_device *cur; + struct list_head *iter; + + memset(mask, 0, sizeof(mask)); + __netdev_upper_mask(mask, dev, dev); + memcpy(dev->local_addr_mask, mask, dev->addr_len); + + netdev_for_each_lower_dev(dev, cur, iter) + __netdev_update_addr_mask(cur); +} + +static void netdev_update_addr_mask(struct net_device *dev) +{ + rcu_read_lock(); + __netdev_update_addr_mask(dev); + rcu_read_unlock(); +} + static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *upper_priv, void *upper_info, @@ -7355,6 +8075,7 @@ if (ret) return ret; + netdev_update_addr_mask(dev); ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); ret = notifier_to_errno(ret); @@ -7448,6 +8169,7 @@ __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); + netdev_update_addr_mask(dev); call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); @@ -7652,7 +8374,6 @@ } EXPORT_SYMBOL(netdev_lower_dev_get_private); - /** * netdev_lower_change - Dispatch event about lower device state change * @lower_dev: device @@ -8178,6 +8899,7 @@ if (err) return err; dev->addr_assign_type = NET_ADDR_SET; + netdev_update_addr_mask(dev); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); add_device_randomness(dev->dev_addr, dev->addr_len); return 0; @@ -9277,7 +9999,6 @@ } EXPORT_SYMBOL_GPL(init_dummy_netdev); - /** * register_netdev - register a network device * @dev: device to register @@ -9407,7 +10128,6 @@ __rtnl_unlock(); - /* Wait for rcu callbacks to finish before next phase */ if (!list_empty(&list)) rcu_barrier(); @@ -9427,6 +10147,7 @@ dev->reg_state = NETREG_UNREGISTERED; netdev_wait_allrefs(dev); + avm_pa_dev_unregister_sync(AVM_PA_DEVINFO(dev)); /* paranoia */ BUG_ON(netdev_refcnt_read(dev)); @@ -9628,6 +10349,7 @@ hash_init(dev->qdisc_hash); #endif dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; + avm_pa_dev_init(AVM_PA_DEVINFO(dev)); setup(dev); if (!dev->tx_queue_len) { @@ -10306,6 +11028,10 @@ sd->backlog.weight = weight_p; } + napi_workq = alloc_workqueue("napi_workq", WQ_UNBOUND | WQ_HIGHPRI, + WQ_UNBOUND_MAX_ACTIVE | WQ_SYSFS); + BUG_ON(!napi_workq); + dev_boot_phase = 0; /* The loopback device is special if any other network devices