--- zzzz-none-000/linux-5.4.213/net/core/dev.c	2022-09-15 10:04:56.000000000 +0000
+++ miami-7690-761/linux-5.4.213/net/core/dev.c	2024-05-29 11:20:02.000000000 +0000
@@ -97,6 +97,7 @@
 #include <net/sock.h>
 #include <net/busy_poll.h>
 #include <linux/rtnetlink.h>
+#include <avm/pa/avm_pa.h>
 #include <linux/stat.h>
 #include <net/dst.h>
 #include <net/dst_metadata.h>
@@ -145,6 +146,7 @@
 #include <net/devlink.h>
 
 #include "net-sysfs.h"
+#include "skbuff_debug.h"
 
 #define MAX_GRO_SKBS 8
 
@@ -156,6 +158,7 @@
 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 struct list_head ptype_all __read_mostly;	/* Taps */
 static struct list_head offload_base __read_mostly;
+static struct workqueue_struct *napi_workq __read_mostly;
 
 static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_info(unsigned long val,
@@ -283,7 +286,6 @@
  *
  *******************************************************************************/
 
-
 /*
  *	Add a protocol ID to the list. Now that the input handler is
  *	smarter we can dispense with all the messy stuff that used to be
@@ -385,7 +387,6 @@
 }
 EXPORT_SYMBOL(dev_remove_pack);
 
-
 /**
  *	dev_add_offload - register offload handlers
  *	@po: protocol offload declaration
@@ -528,7 +529,6 @@
 }
 EXPORT_SYMBOL(netdev_boot_setup_check);
 
-
 /**
  * netdev_boot_base	- get address from boot time settings
  * @prefix: prefix for network device
@@ -762,7 +762,6 @@
 }
 EXPORT_SYMBOL(dev_get_by_index_rcu);
 
-
 /**
  *	dev_get_by_index - find a device by its ifindex
  *	@net: the applicable net namespace
@@ -1457,7 +1456,6 @@
 }
 EXPORT_SYMBOL(dev_close);
 
-
 /**
  *	dev_disable_lro - disable Large Receive Offload on a device
  *	@dev: device
@@ -1514,7 +1512,7 @@
 	N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
 	N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
 	N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
-	N(PRE_CHANGEADDR)
+	N(PRE_CHANGEADDR) N(BR_JOIN) N(BR_LEAVE)
 	}
 #undef N
 	return "UNKNOWN_NETDEV_EVENT";
@@ -2740,7 +2738,6 @@
 }
 EXPORT_SYMBOL(__dev_kfree_skb_any);
 
-
 /**
  * netif_device_detach - mark device as removed
  * @dev: network device
@@ -2970,7 +2967,6 @@
 }
 EXPORT_SYMBOL(skb_mac_gso_segment);
 
-
 /* openvswitch calls this on rx path, so we need a different check.
  */
 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
@@ -3190,19 +3186,293 @@
 }
 EXPORT_SYMBOL(netif_skb_features);
 
+/**
+ *	netdev_sawf_deinit - free sawf statistics.
+ *	@dev: Device to free sawf statistics.
+ *
+ *	Returns true on success, false on failure.
+ */
+bool netdev_sawf_deinit(struct net_device *dev)
+{
+	struct pcpu_sawf_stats __percpu *stats_to_delete;
+
+	if ((!dev->sawf_stats)) {
+		return false;
+	}
+
+	stats_to_delete = dev->sawf_stats;
+	dev->sawf_stats = NULL;
+
+	free_percpu(stats_to_delete);
+
+	return true;
+}
+EXPORT_SYMBOL(netdev_sawf_deinit);
+
+/**
+ *	netdev_sawf_init - Allocate netdev SAWF statistics.
+ *	@dev:  Device to allocate statistics on.
+ *	@mode: Initial flags to be set.
+ */
+bool netdev_sawf_init(struct net_device *dev, uint16_t mode)
+{
+	int cpu;
+
+	if (dev->sawf_stats) {
+		return false;
+	}
+
+	dev->sawf_stats = netdev_alloc_pcpu_stats(struct pcpu_sawf_stats);
+	if (!dev->sawf_stats) {
+		return false;
+	}
+
+	for_each_possible_cpu(cpu) {
+		struct pcpu_sawf_stats *stats = per_cpu_ptr(dev->sawf_stats, cpu);
+		memset(stats, 0, sizeof(*stats));
+	}
+
+	dev->sawf_flags = mode;
+
+	return true;
+}
+EXPORT_SYMBOL(netdev_sawf_init);
+
+/**
+ *	netdev_sawf_flags_update - Set SAWF flags.
+ *	@dev: Device to update
+ *	@flags: New value of flags
+ */
+bool netdev_sawf_flags_update(struct net_device *dev, uint16_t flags)
+{
+	if (!dev->sawf_stats) {
+		return false;
+	}
+
+	dev->sawf_flags = flags;
+
+	return true;
+}
+EXPORT_SYMBOL(netdev_sawf_flags_update);
+
+/**
+ *	netdev_sawf_enable - Re-enable SAWF statistics.
+ *	@dev: Device to enable.
+ */
+bool netdev_sawf_enable(struct net_device *dev)
+{
+	int cpu;
+	if (!dev->sawf_stats) {
+		return false;
+	}
+
+	for_each_possible_cpu(cpu) {
+		struct pcpu_sawf_stats *stats = per_cpu_ptr(dev->sawf_stats, cpu);
+		memset(stats, 0, sizeof(*stats));
+	}
+
+	dev->sawf_flags |= NETDEV_SAWF_FLAG_ENABLED;
+
+	return true;
+}
+EXPORT_SYMBOL(netdev_sawf_enable);
+
+/**
+ *	netdev_sawf_disable - Disable SAWF statistics collection.
+ *	@dev: device to disable statistics.
+ */
+bool netdev_sawf_disable(struct net_device *dev)
+{
+	if (!dev->sawf_stats) {
+		return false;
+	}
+
+	dev->sawf_flags &= ~NETDEV_SAWF_FLAG_ENABLED;
+
+	return true;
+}
+EXPORT_SYMBOL(netdev_sawf_disable);
+
+/**
+ *	netdev_sawf_debug_set - Sets the debug service class.
+ *	@dev: Device to configure
+ *	@sid: Service class ID to keep debug information.
+ */
+bool netdev_sawf_debug_set(struct net_device *dev, uint8_t sid)
+{
+	int cpu;
+
+	if (!dev->sawf_stats) {
+		return false;
+	}
+
+	for_each_possible_cpu(cpu) {
+		struct pcpu_sawf_stats *stats = per_cpu_ptr(dev->sawf_stats, cpu);
+		stats->debug_lat_max = 0;
+		stats->debug_lat_min = 0;
+		stats->debug_lat_ewma = 0;
+		stats->debug_lat_last = 0;
+	}
+
+	dev->sawf_flags = (dev->sawf_flags & ~(NETDEV_SAWF_FLAG_DEBUG_MASK)) | (sid << NETDEV_SAWF_FLAG_DEBUG_SHIFT) | (NETDEV_SAWF_FLAG_DEBUG);
+
+	return true;
+}
+EXPORT_SYMBOL(netdev_sawf_debug_set);
+
+/**
+ *	netdev_sawf_debug_set - Clears the debug service class.
+ *	@dev: Device to configure
+ */
+bool netdev_sawf_debug_unset(struct net_device *dev)
+{
+	if (!dev->sawf_stats) {
+		return false;
+	}
+
+	dev->sawf_flags &= ~NETDEV_SAWF_FLAG_DEBUG;
+
+	return true;
+}
+EXPORT_SYMBOL(netdev_sawf_debug_unset);
+
+/**
+ *	netdev_sawf_debug_get - Gets the debug SAWF information.
+ *	@dev:  Device to read debug information
+ *	@sid:  Pointer where service class id is written
+ *	@max:  Pointer where max latency is written
+ *	@min:  Pointer where min latency is written
+ *	@avg:  Pointer where average (exponential moving average) is written
+ *	@last: Pointer where last latency value is written.
+ */
+bool netdev_sawf_debug_get(struct net_device *dev, uint8_t *sid, uint32_t *max, uint32_t *min, uint32_t *avg, uint32_t *last)
+{
+	uint32_t cpu, avg_sum = 0, avg_count = 0;
+
+	if (!dev->sawf_stats || !(dev->sawf_flags & NETDEV_SAWF_FLAG_DEBUG)) {
+		return false;
+	}
+
+	/*
+	 * Initialize minimum to max value of uint32 so any valid value is less than it.
+	 * Initialize maximum to 0 so any valid value is greater than it.
+	 */
+	*min = 0xFFFFFFFF;
+	*max = 0;
+
+	*sid = dev->sawf_flags >> NETDEV_SAWF_FLAG_DEBUG_SHIFT;
+	for_each_possible_cpu(cpu) {
+		struct pcpu_sawf_stats *sawf_stats = per_cpu_ptr(dev->sawf_stats, cpu);
+
+		if (*min > sawf_stats->debug_lat_min && sawf_stats->debug_lat_min != 0) {
+			*min = sawf_stats->debug_lat_min;
+		}
+
+		if (*max < sawf_stats->debug_lat_max) {
+			*max = sawf_stats->debug_lat_max;
+		}
+
+		if (sawf_stats->debug_lat_last) {
+			*last = sawf_stats->debug_lat_last;
+		}
+
+		if (sawf_stats->debug_lat_ewma) {
+			avg_sum += sawf_stats->debug_lat_ewma;
+			avg_count++;
+		}
+	}
+
+	if (avg_count) {
+		*avg = avg_sum / avg_count;
+	}
+
+	/*
+	 * If minimum hasn't been updated, set it to 0.
+	 */
+	if (*min == 0xFFFFFFFF) {
+		*min = 0;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(netdev_sawf_debug_get);
+
+/**
+ *	netdev_sawf_debug_get - Gets latency statistics for a service class.
+ *	@dev:  Device to read latency statistics
+ *	@sid:  Service class ID to get
+ *	@hist: Pointer to array where histogram data is written.
+ *	@avg:  Pointer where mean latency is written.
+ */
+bool netdev_sawf_lat_get(struct net_device *dev, uint8_t sid, uint64_t *hist, uint64_t *avg)
+{
+	uint32_t bucket = 0, cpu = 0;
+	uint64_t total_lat = 0, total_packets = 0;
+
+	if (!dev->sawf_stats) {
+		return false;
+	}
+
+	if (!(dev->sawf_flags & NETDEV_SAWF_FLAG_ENABLED)) {
+		return false;
+	}
+
+	for (bucket = 0; bucket < NETDEV_SAWF_DELAY_BUCKETS; bucket++) {
+		hist[bucket] = 0;
+	}
+
+	for_each_possible_cpu(cpu) {
+		unsigned int start;
+		struct pcpu_sawf_stats *sawf_stats = per_cpu_ptr(dev->sawf_stats, cpu);
+		do {
+			start = u64_stats_fetch_begin(&sawf_stats->syncp);
+			for (bucket = 0; bucket < NETDEV_SAWF_DELAY_BUCKETS; bucket++) {
+				hist[bucket] += sawf_stats->delay[sid][bucket];
+			}
+
+			total_packets += sawf_stats->tx_packets[sid];
+			total_lat += sawf_stats->total_delay[sid];
+		} while (u64_stats_fetch_retry(&sawf_stats->syncp, start));
+	}
+
+	*avg = div64_u64(total_lat, total_packets);
+	return true;
+}
+EXPORT_SYMBOL(netdev_sawf_lat_get);
+
 static int xmit_one(struct sk_buff *skb, struct net_device *dev,
 		    struct netdev_queue *txq, bool more)
 {
 	unsigned int len;
 	int rc;
 
-	if (dev_nit_active(dev))
-		dev_queue_xmit_nit(skb, dev);
+	/* At this point all offload features are handled and the skb is
+	 * optimized for the driver.
+	 */
+	avm_pa_dev_snoop_transmit(AVM_PA_DEVINFO(dev), skb);
 
-	len = skb->len;
-	trace_net_dev_start_xmit(skb, dev);
-	rc = netdev_start_xmit(skb, dev, txq, more);
-	trace_net_dev_xmit(skb, rc, dev, len);
+	/* If this skb has been fast forwarded then we don't want it to
+	 * go to any taps (by definition we're trying to bypass them).
+	 */
+	if (unlikely(!skb->fast_forwarded)) {
+		if (dev_nit_active(dev))
+			dev_queue_xmit_nit(skb, dev);
+	}
+
+#ifdef CONFIG_ETHERNET_PACKET_MANGLE
+	if (!dev->eth_mangle_tx ||
+	    (skb = dev->eth_mangle_tx(dev, skb)) != NULL)
+#else
+	if (1)
+#endif
+	{
+		len = skb->len;
+		trace_net_dev_start_xmit(skb, dev);
+		rc = netdev_start_xmit(skb, dev, txq, more);
+		trace_net_dev_xmit(skb, rc, dev, len);
+	} else {
+		rc = NETDEV_TX_OK;
+	}
 
 	return rc;
 }
@@ -3215,7 +3485,6 @@
 
 	while (skb) {
 		struct sk_buff *next = skb->next;
-
 		skb_mark_not_on_list(skb);
 		rc = xmit_one(skb, dev, txq, next != NULL);
 		if (unlikely(!dev_xmit_complete(rc))) {
@@ -3379,6 +3648,60 @@
 	}
 }
 
+static inline int __dev_xmit_skb_qdisc(struct sk_buff *skb, struct Qdisc *q,
+				 struct net_device *top_qdisc_dev,
+				 struct netdev_queue *top_txq)
+{
+	spinlock_t *root_lock = qdisc_lock(q);
+	struct sk_buff *to_free = NULL;
+	bool contended;
+	int rc;
+
+	qdisc_calculate_pkt_len(skb, q);
+
+	if (q->flags & TCQ_F_NOLOCK) {
+		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+		if (likely(!netif_xmit_frozen_or_stopped(top_txq)))
+			qdisc_run(q);
+
+		if (unlikely(to_free))
+			kfree_skb_list(to_free);
+		return rc;
+	}
+
+	/*
+	 * Heuristic to force contended enqueues to serialize on a
+	 * separate lock before trying to get qdisc main lock.
+	 * This permits qdisc->running owner to get the lock more
+	 * often and dequeue packets faster.
+	 */
+	contended = qdisc_is_running(q);
+	if (unlikely(contended))
+		spin_lock(&q->busylock);
+
+	spin_lock(root_lock);
+	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+		__qdisc_drop(skb, &to_free);
+		rc = NET_XMIT_DROP;
+	} else {
+		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+		if (qdisc_run_begin(q)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
+			__qdisc_run(q);
+			qdisc_run_end(q);
+		}
+	}
+	spin_unlock(root_lock);
+	if (unlikely(to_free))
+		kfree_skb_list(to_free);
+	if (unlikely(contended))
+		spin_unlock(&q->busylock);
+	return rc;
+}
+
 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct net_device *dev,
 				 struct netdev_queue *txq)
@@ -3435,6 +3758,7 @@
 		qdisc_run_end(q);
 		rc = NET_XMIT_SUCCESS;
 	} else {
+		avm_pa_mark_shaped(skb);
 		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
@@ -3678,6 +4002,211 @@
 }
 
 /**
+ *	dev_fast_xmit_vp - fast xmit the skb to a PPE virtual port
+ *	@skb:buffer to transmit
+ *	@dev: the device to be transmited to
+ *	sucessful return true
+ *	failed return false
+ */
+bool dev_fast_xmit_vp(struct sk_buff *skb,
+		struct net_device *dev)
+{
+	struct netdev_queue *txq;
+	int cpu;
+	netdev_tx_t rc;
+
+	if (unlikely(!(dev->flags & IFF_UP))) {
+		return false;
+	}
+
+	if (unlikely(skb_is_nonlinear(skb))) {
+		return false;
+	}
+
+	rcu_read_lock_bh();
+	cpu = smp_processor_id();
+
+	/*
+	 * TODO: Skip this altogether and eventually move this call to ppe_vp
+	 * this would avoid multiple function calls when giving packet to wifi VAP.
+	 */
+	txq = netdev_core_pick_tx(dev, skb, NULL);
+
+	if (likely(txq->xmit_lock_owner != cpu)) {
+#define FAST_VP_HARD_TX_LOCK(txq, cpu) {	\
+		__netif_tx_lock(txq, cpu);		\
+}
+
+#define FAST_VP_HARD_TX_UNLOCK(txq) {		\
+		__netif_tx_unlock(txq);			\
+}
+		skb->fast_xmit = 1;
+		FAST_VP_HARD_TX_LOCK(txq, cpu);
+		if (likely(!netif_xmit_stopped(txq))) {
+			rc = netdev_start_xmit(skb, dev, txq, 0);
+			if (unlikely(!dev_xmit_complete(rc))) {
+				FAST_VP_HARD_TX_UNLOCK(txq);
+				goto q_xmit;
+			}
+			FAST_VP_HARD_TX_UNLOCK(txq);
+			rcu_read_unlock_bh();
+			return true;
+		}
+		FAST_VP_HARD_TX_UNLOCK(txq);
+	}
+q_xmit:
+	skb->fast_xmit = 0;
+	rcu_read_unlock_bh();
+	return false;
+}
+EXPORT_SYMBOL(dev_fast_xmit_vp);
+
+/**
+ *	dev_fast_xmit_qdisc - fast xmit the skb along with qdisc processing
+ *	@skb:buffer to transmit
+ *	@top_qdisc_dev: the top device on which qdisc is enabled.
+ *	@bottom_dev: the device on which transmission should happen after qdisc processing.
+ *	sucessful return true
+ *	failed return false
+ */
+bool dev_fast_xmit_qdisc(struct sk_buff *skb, struct net_device *top_qdisc_dev, struct net_device *bottom_dev)
+{
+        struct netdev_queue *txq;
+	struct Qdisc *q;
+	int rc = -ENOMEM;
+
+	if (unlikely(!(top_qdisc_dev->flags & IFF_UP))) {
+		return false;
+	}
+
+	skb_reset_mac_header(skb);
+
+	/* Disable soft irqs for various locks below. Also
+	 * stops preemption for RCU.
+	 */
+	rcu_read_lock_bh();
+
+	txq = netdev_core_pick_tx(top_qdisc_dev, skb, NULL);
+	q = rcu_dereference_bh(txq->qdisc);
+	if (unlikely(!q->enqueue)) {
+		rcu_read_unlock_bh();
+		return false;
+	}
+
+	skb_update_prio(skb);
+
+	qdisc_pkt_len_init(skb);
+#ifdef CONFIG_NET_CLS_ACT
+	skb->tc_at_ingress = 0;
+# ifdef CONFIG_NET_EGRESS
+	if (static_branch_unlikely(&egress_needed_key)) {
+		skb = sch_handle_egress(skb, &rc, top_qdisc_dev);
+		if (!skb)
+			goto out;
+	}
+# endif
+#endif
+	/* If device/qdisc don't need skb->dst, release it right now while
+	 * its hot in this cpu cache.
+	 * TODO: do we need this ?
+	 */
+	if (top_qdisc_dev->priv_flags & IFF_XMIT_DST_RELEASE)
+		skb_dst_drop(skb);
+	else
+		skb_dst_force(skb);
+
+	trace_net_dev_queue(skb);
+
+	/* Update the dev so that we can transmit to bottom device after qdisc */
+	skb->dev = bottom_dev;
+	skb->fast_qdisc = 1;
+	rc = __dev_xmit_skb_qdisc(skb, q, top_qdisc_dev, txq);
+
+out:
+	rcu_read_unlock_bh();
+	return true;
+}
+EXPORT_SYMBOL(dev_fast_xmit_qdisc);
+
+/**
+ *	dev_fast_xmit - fast xmit the skb
+ *	@skb:buffer to transmit
+ *	@dev: the device to be transmited to
+ *	@features: the skb features could bed used
+ *	sucessful return true
+ *	failed return false
+ */
+bool dev_fast_xmit(struct sk_buff *skb,
+		struct net_device *dev,
+		netdev_features_t features)
+{
+	struct netdev_queue *txq;
+	int cpu;
+	netdev_tx_t rc;
+
+	/* the fast_xmit flag will avoid multiple checks in wifi xmit path */
+	if (likely(!skb_is_nonlinear(skb)))
+		skb->fast_xmit = 1;
+
+	if (unlikely(!(dev->flags & IFF_UP))) {
+		return false;
+	}
+
+	if (unlikely(skb_needs_linearize(skb, features))) {
+		return false;
+	}
+
+	rcu_read_lock_bh();
+	cpu = smp_processor_id();
+
+	/* If device don't need the dst, release it now, otherwise make sure
+	 * the refcount increased.
+	 */
+	if (likely(dev->priv_flags & IFF_XMIT_DST_RELEASE)) {
+		skb_dst_drop(skb);
+	} else {
+		skb_dst_force(skb);
+	}
+
+	txq = netdev_core_pick_tx(dev, skb, NULL);
+
+	if (likely(txq->xmit_lock_owner != cpu)) {
+#define FAST_HARD_TX_LOCK(features, txq, cpu) {		\
+	if ((features & NETIF_F_LLTX) == 0) {		\
+		__netif_tx_lock(txq, cpu);		\
+	} else {					\
+		__netif_tx_acquire(txq);		\
+	}						\
+}
+
+#define FAST_HARD_TX_UNLOCK(features, txq) {		\
+	if ((features & NETIF_F_LLTX) == 0) {		\
+		__netif_tx_unlock(txq);			\
+	} else {					\
+		__netif_tx_release(txq);		\
+	}						\
+}
+		netdev_features_t dev_features = dev->features;
+		FAST_HARD_TX_LOCK(dev_features, txq, cpu);
+		if (likely(!netif_xmit_stopped(txq))) {
+			rc = netdev_start_xmit(skb, dev, txq, 0);
+			if (unlikely(!dev_xmit_complete(rc))) {
+				FAST_HARD_TX_UNLOCK(dev_features, txq);
+				goto fail;
+			}
+			FAST_HARD_TX_UNLOCK(dev_features, txq);
+			rcu_read_unlock_bh();
+			return true;
+		}
+		FAST_HARD_TX_UNLOCK(dev_features, txq);
+	}
+fail:
+	rcu_read_unlock_bh();
+	return false;
+}
+EXPORT_SYMBOL(dev_fast_xmit);
+
+/**
  *	__dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
  *	@sb_dev: suboordinate device used for L2 forwarding offload
@@ -3714,6 +4243,12 @@
 	skb_reset_mac_header(skb);
 	skb_assert_len(skb);
 
+	/*
+	 * if the skb landed in dev_queue_xmit then its not fast transmitted
+	 * reset this flag for further processing.
+	 */
+	skb->fast_xmit = 0;
+
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
 		__skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
 
@@ -4408,11 +4943,23 @@
 }
 EXPORT_SYMBOL_GPL(do_xdp_generic);
 
+static inline void netif_sawf_timestamp(struct sk_buff *skb, struct net_device *dev)
+{
+	if (!(dev->sawf_flags & NETDEV_SAWF_FLAG_RX_LAT)) {
+		__net_timestamp(skb);
+	}
+}
+
 static int netif_rx_internal(struct sk_buff *skb)
 {
 	int ret;
+	struct net_device *dev = skb->dev;
 
-	net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+	if (dev->sawf_flags & NETDEV_SAWF_FLAG_ENABLED) {
+		netif_sawf_timestamp(skb, dev);
+	} else {
+		net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+	}
 
 	trace_netif_rx(skb);
 
@@ -4704,6 +5251,30 @@
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 
+int (*athrs_fast_nat_recv)(struct sk_buff *skb) __rcu __read_mostly;
+EXPORT_SYMBOL_GPL(athrs_fast_nat_recv);
+
+#ifdef CONFIG_AVM_RECV_HOOKS
+static int (*avm_recvhook)(struct sk_buff *skb) __read_mostly;
+static int (*avm_early_recvhook)(struct sk_buff *skb) __read_mostly;
+#endif
+
+void set_avm_recvhook(int (*recvhook)(struct sk_buff *skb))
+{
+#ifdef CONFIG_AVM_RECV_HOOKS
+	avm_recvhook = recvhook;
+#endif
+}
+EXPORT_SYMBOL(set_avm_recvhook);
+
+void set_avm_early_recvhook(int (*recvhook)(struct sk_buff *skb))
+{
+#ifdef CONFIG_AVM_RECV_HOOKS
+	avm_early_recvhook = recvhook;
+#endif
+}
+EXPORT_SYMBOL(set_avm_early_recvhook);
+
 /*
  * Limit the use of PFMEMALLOC reserves to those protocols that implement
  * the special handling of PFMEMALLOC skbs.
@@ -4753,6 +5324,7 @@
 	bool deliver_exact = false;
 	int ret = NET_RX_DROP;
 	__be16 type;
+	int (*fast_recv)(struct sk_buff *skb);
 
 	net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
 
@@ -4786,6 +5358,14 @@
 		skb_reset_mac_len(skb);
 	}
 
+	fast_recv = rcu_dereference(athrs_fast_nat_recv);
+	if (fast_recv) {
+		if (fast_recv(skb)) {
+			ret = NET_RX_SUCCESS;
+			goto out;
+		}
+	}
+
 	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
 	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
 		skb = skb_vlan_untag(skb);
@@ -4827,6 +5407,25 @@
 	if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
 		goto drop;
 
+#ifdef CONFIG_AVM_NET_DEBUG_SKBUFF_LEAK
+	skb_track_funccall(skb, avm_pa_dev_receive);
+#endif
+
+	if (avm_pa_dev_receive(AVM_PA_DEVINFO(skb->dev), skb) == 0) {
+		ret = NET_RX_SUCCESS;
+		goto out;
+	}
+
+#ifdef CONFIG_AVM_RECV_HOOKS
+	if (avm_early_recvhook && (*avm_early_recvhook)(skb)) {
+		/*
+		 * paket consumed by hook
+		 */
+		ret = NET_RX_SUCCESS;
+		goto out;
+	}
+#endif
+
 	if (skb_vlan_tag_present(skb)) {
 		if (pt_prev) {
 			ret = deliver_skb(skb, pt_prev, orig_dev);
@@ -4859,6 +5458,16 @@
 		}
 	}
 
+#ifdef CONFIG_AVM_RECV_HOOKS
+	if (avm_recvhook && (*avm_recvhook)(skb)) {
+		/*
+		 * paket consumed by hook
+		 */
+		ret = NET_RX_SUCCESS;
+		goto out;
+	}
+#endif
+
 	if (unlikely(skb_vlan_tag_present(skb))) {
 check_vlan_id:
 		if (skb_vlan_tag_get_id(skb)) {
@@ -5136,7 +5745,12 @@
 {
 	int ret;
 
-	net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+	struct net_device *dev = skb->dev;
+	if (dev->sawf_flags & NETDEV_SAWF_FLAG_ENABLED) {
+		netif_sawf_timestamp(skb, dev);
+	} else {
+		net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+	}
 
 	if (skb_defer_rx_timestamp(skb))
 		return NET_RX_SUCCESS;
@@ -5166,7 +5780,13 @@
 
 	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
-		net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+		struct net_device *dev = skb->dev;
+		if (dev->sawf_flags & NETDEV_SAWF_FLAG_ENABLED) {
+			netif_sawf_timestamp(skb, dev);
+		} else {
+			net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+		}
+
 		skb_list_del_init(skb);
 		if (!skb_defer_rx_timestamp(skb))
 			list_add_tail(&skb->list, &sublist);
@@ -5436,8 +6056,7 @@
 	NAPI_GRO_CB(skb)->frag0 = NULL;
 	NAPI_GRO_CB(skb)->frag0_len = 0;
 
-	if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
-	    pinfo->nr_frags &&
+	if (!skb_headlen(skb) && pinfo->nr_frags &&
 	    !PageHighMem(skb_frag_page(frag0)) &&
 	    (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
 		NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
@@ -5503,7 +6122,10 @@
 	int same_flow;
 	int grow;
 
-	if (netif_elide_gro(skb->dev))
+	if (skb->gro_skip)
+		goto normal;
+
+	if (netif_elide_gro(skb->dev) || avm_pa_dev_elide_gro(AVM_PA_DEVINFO(skb->dev), skb))
 		goto normal;
 
 	gro_head = gro_list_prepare(napi, skb);
@@ -5650,10 +6272,13 @@
 		break;
 
 	case GRO_MERGED_FREE:
-		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD){
 			napi_skb_free_stolen_head(skb);
-		else
+			skbuff_debugobj_deactivate(skb);
+		}
+		else{
 			__kfree_skb(skb);
+		}
 		break;
 
 	case GRO_HELD:
@@ -5942,6 +6567,11 @@
 {
 	unsigned long flags;
 
+	if (test_bit(NAPI_STATE_THREADED, &n->state)) {
+		queue_work(napi_workq, &n->work);
+		return;
+	}
+
 	local_irq_save(flags);
 	____napi_schedule(this_cpu_ptr(&softnet_data), n);
 	local_irq_restore(flags);
@@ -6257,6 +6887,84 @@
 	napi->gro_bitmask = 0;
 }
 
+static int __napi_poll(struct napi_struct *n, bool *repoll)
+{
+	int work, weight;
+
+	weight = n->weight;
+
+	/* This NAPI_STATE_SCHED test is for avoiding a race
+	 * with netpoll's poll_napi().  Only the entity which
+	 * obtains the lock and sees NAPI_STATE_SCHED set will
+	 * actually make the ->poll() call.  Therefore we avoid
+	 * accidentally calling ->poll() when NAPI is not scheduled.
+	 */
+	work = 0;
+	if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+		work = n->poll(n, weight);
+		trace_napi_poll(n, work, weight);
+	}
+
+	WARN_ON_ONCE(work > weight);
+
+	if (likely(work < weight))
+		return work;
+
+	/* Drivers must not modify the NAPI state if they
+	 * consume the entire weight.  In such cases this code
+	 * still "owns" the NAPI instance and therefore can
+	 * move the instance around on the list at-will.
+	 */
+	if (unlikely(napi_disable_pending(n))) {
+		napi_complete(n);
+		return work;
+	}
+
+	if (n->gro_bitmask) {
+		/* flush too old packets
+		 * If HZ < 1000, flush all packets.
+		 */
+		napi_gro_flush(n, HZ >= 1000);
+	}
+
+	gro_normal_list(n);
+
+	*repoll = true;
+
+	return work;
+}
+
+static void napi_workfn(struct work_struct *work)
+{
+	struct napi_struct *n = container_of(work, struct napi_struct, work);
+	void *have;
+
+	for (;;) {
+		bool repoll = false;
+
+		local_bh_disable();
+
+		have = netpoll_poll_lock(n);
+		__napi_poll(n, &repoll);
+		netpoll_poll_unlock(have);
+
+		local_bh_enable();
+
+		if (!repoll)
+			return;
+
+		if (!need_resched())
+			continue;
+
+		/*
+		 * have to pay for the latency of task switch even if
+		 * napi is scheduled
+		 */
+		queue_work(napi_workq, work);
+		return;
+	}
+}
+
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight)
 {
@@ -6276,6 +6984,7 @@
 #ifdef CONFIG_NETPOLL
 	napi->poll_owner = -1;
 #endif
+	INIT_WORK(&napi->work, napi_workfn);
 	set_bit(NAPI_STATE_SCHED, &napi->state);
 	set_bit(NAPI_STATE_NPSVC, &napi->state);
 	list_add_rcu(&napi->dev_list, &dev->napi_list);
@@ -6316,6 +7025,7 @@
 void netif_napi_del(struct napi_struct *napi)
 {
 	might_sleep();
+	cancel_work_sync(&napi->work);
 	if (napi_hash_del(napi))
 		synchronize_net();
 	list_del_init(&napi->dev_list);
@@ -6328,51 +7038,19 @@
 
 static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 {
+	bool do_repoll = false;
 	void *have;
-	int work, weight;
+	int work;
 
 	list_del_init(&n->poll_list);
 
 	have = netpoll_poll_lock(n);
 
-	weight = n->weight;
-
-	/* This NAPI_STATE_SCHED test is for avoiding a race
-	 * with netpoll's poll_napi().  Only the entity which
-	 * obtains the lock and sees NAPI_STATE_SCHED set will
-	 * actually make the ->poll() call.  Therefore we avoid
-	 * accidentally calling ->poll() when NAPI is not scheduled.
-	 */
-	work = 0;
-	if (test_bit(NAPI_STATE_SCHED, &n->state)) {
-		work = n->poll(n, weight);
-		trace_napi_poll(n, work, weight);
-	}
-
-	WARN_ON_ONCE(work > weight);
+	work = __napi_poll(n, &do_repoll);
 
-	if (likely(work < weight))
+	if (!do_repoll)
 		goto out_unlock;
 
-	/* Drivers must not modify the NAPI state if they
-	 * consume the entire weight.  In such cases this code
-	 * still "owns" the NAPI instance and therefore can
-	 * move the instance around on the list at-will.
-	 */
-	if (unlikely(napi_disable_pending(n))) {
-		napi_complete(n);
-		goto out_unlock;
-	}
-
-	if (n->gro_bitmask) {
-		/* flush too old packets
-		 * If HZ < 1000, flush all packets.
-		 */
-		napi_gro_flush(n, HZ >= 1000);
-	}
-
-	gro_normal_list(n);
-
 	/* Some drivers may have called napi_schedule
 	 * prior to exhausting their budget.
 	 */
@@ -7305,6 +7983,48 @@
 					   &upper_dev->adj_list.lower);
 }
 
+static void __netdev_addr_mask(unsigned char *mask, const unsigned char *addr,
+			       struct net_device *dev)
+{
+	int i;
+
+	for (i = 0; i < dev->addr_len; i++)
+		mask[i] |= addr[i] ^ dev->dev_addr[i];
+}
+
+static void __netdev_upper_mask(unsigned char *mask, struct net_device *dev,
+				struct net_device *lower)
+{
+	struct net_device *cur;
+	struct list_head *iter;
+
+	netdev_for_each_upper_dev_rcu(dev, cur, iter) {
+		__netdev_addr_mask(mask, cur->dev_addr, lower);
+		__netdev_upper_mask(mask, cur, lower);
+	}
+}
+
+static void __netdev_update_addr_mask(struct net_device *dev)
+{
+	unsigned char mask[MAX_ADDR_LEN];
+	struct net_device *cur;
+	struct list_head *iter;
+
+	memset(mask, 0, sizeof(mask));
+	__netdev_upper_mask(mask, dev, dev);
+	memcpy(dev->local_addr_mask, mask, dev->addr_len);
+
+	netdev_for_each_lower_dev(dev, cur, iter)
+		__netdev_update_addr_mask(cur);
+}
+
+static void netdev_update_addr_mask(struct net_device *dev)
+{
+	rcu_read_lock();
+	__netdev_update_addr_mask(dev);
+	rcu_read_unlock();
+}
+
 static int __netdev_upper_dev_link(struct net_device *dev,
 				   struct net_device *upper_dev, bool master,
 				   void *upper_priv, void *upper_info,
@@ -7355,6 +8075,7 @@
 	if (ret)
 		return ret;
 
+	netdev_update_addr_mask(dev);
 	ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
 					    &changeupper_info.info);
 	ret = notifier_to_errno(ret);
@@ -7448,6 +8169,7 @@
 
 	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 
+	netdev_update_addr_mask(dev);
 	call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
 				      &changeupper_info.info);
 
@@ -7652,7 +8374,6 @@
 }
 EXPORT_SYMBOL(netdev_lower_dev_get_private);
 
-
 /**
  * netdev_lower_change - Dispatch event about lower device state change
  * @lower_dev: device
@@ -8178,6 +8899,7 @@
 	if (err)
 		return err;
 	dev->addr_assign_type = NET_ADDR_SET;
+	netdev_update_addr_mask(dev);
 	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
 	return 0;
@@ -9277,7 +9999,6 @@
 }
 EXPORT_SYMBOL_GPL(init_dummy_netdev);
 
-
 /**
  *	register_netdev	- register a network device
  *	@dev: device to register
@@ -9407,7 +10128,6 @@
 
 	__rtnl_unlock();
 
-
 	/* Wait for rcu callbacks to finish before next phase */
 	if (!list_empty(&list))
 		rcu_barrier();
@@ -9427,6 +10147,7 @@
 		dev->reg_state = NETREG_UNREGISTERED;
 
 		netdev_wait_allrefs(dev);
+		avm_pa_dev_unregister_sync(AVM_PA_DEVINFO(dev));
 
 		/* paranoia */
 		BUG_ON(netdev_refcnt_read(dev));
@@ -9628,6 +10349,7 @@
 	hash_init(dev->qdisc_hash);
 #endif
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
+	avm_pa_dev_init(AVM_PA_DEVINFO(dev));
 	setup(dev);
 
 	if (!dev->tx_queue_len) {
@@ -10306,6 +11028,10 @@
 		sd->backlog.weight = weight_p;
 	}
 
+	napi_workq = alloc_workqueue("napi_workq", WQ_UNBOUND | WQ_HIGHPRI,
+				     WQ_UNBOUND_MAX_ACTIVE | WQ_SYSFS);
+	BUG_ON(!napi_workq);
+
 	dev_boot_phase = 0;
 
 	/* The loopback device is special if any other network devices