/*
 * Packet Accelerator Interface
 *
 * vim:set expandtab shiftwidth=3 softtabstop=3:
 *
 * Copyright (c) 2011-2020 AVM GmbH <info@avm.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions, and the following disclaimer,
 *    without modification.
 * 2. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * Alternatively, this software may be distributed and/or modified under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * PID  - pheripheral ID
 *        Identifies a low level device, may be a network driver or
 *        for ATM, every VCC has its own PID
 * VPID - virtual pheripheral ID
 *        Is assigned to a network device or a virtual network device
 *
 *   Sessions can have four states:
 *   - FREE    : session on sess_list[AVM_PA_LIST_FREE]
 *   - CREATE  : session is on no list
 *   - ACTIVE  : session on sess_list[AVM_PA_LIST_ACTIVE], in hashtable and not flushed
 *   - FLUSHED : session on sess_list[AVM_PA_LIST_ACTIVE], in hashtable and flushed
 *   - DEAD    : session on sess_list[AVM_PA_LIST_DEAD]
 *
 *   FREE    -> pa_session_alloc()    -> CREATE
 *   CREATE  -> pa_session_activate() -> ACTIVE
 *   ACTIVE  -> pa_session_flush()    -> FLUSHED
 *   FLUSHED -> pa_session_tick()     -> DEAD
 *   DEAD    -> pa_session_tick()     -> FREE
 *
 *   pa_session_kill() can transition from any state to DEAD. Use it only if you
 *   know that an immediate GC trigger (that moves from DEAD to FREE) won't be
 *   a problem, otherwise use pa_session_flush() which is safe. In general,
 *   this is only the case when a session wasn't ACTIVE yet (before
 *   pa_session_activate() completes). pa_session_flush() guarantees that at least one
 *   complete GC period happens before a session transitions to FREE.
 */

#define AVM_PA_FORCE_PRINTK_ENABLED 0

#if AVM_PA_FORCE_PRINTK_ENABLED
#   ifdef CONFIG_NO_PRINTK
#   define printk __printk
#   endif
#   define DEBUG /* want pr_debug to be compiled in */
#endif


#include <linux/version.h>
#include <linux/utsname.h>
#include <linux/printk.h>
#include <linux/ctype.h>
#include <linux/types.h>
#include <linux/jhash.h>
#include <linux/skbuff.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <net/xfrm.h>
#include <net/ipv6.h>
#include <net/dsfield.h>
#include <asm/unaligned.h>
#include <net/checksum.h>
#include <net/protocol.h>
#include <net/pkt_sched.h>
/* Necessary for MIPS Platforms without arch-support for ipv6 chksums */
#include <net/ip6_checksum.h>
#include <linux/pkt_sched.h>
#include <linux/kthread.h>
#include <linux/hrtimer.h>
#include <linux/ktime.h>
#ifdef CONFIG_AVM_POWERMETER
#include <avm/power/power.h>
#endif
#ifdef CONFIG_AVM_SIMPLE_PROFILING
#include <avm/profile/profile.h>
#else
#define avm_simple_profiling_skb(a,b) do { } while(0)
#endif
#include <linux/module.h> // MODULE_NAME_LEN needed by kallsyms.h (who fails to include himself)
#include <linux/kallsyms.h> // sprint_symbol()
#include <linux/miscdevice.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/workqueue.h>
#include <linux/cpumask.h>
#include <linux/list.h>
#include <linux/if_arp.h> /* ARPHRD_NONE */
#include <linux/if_vlan.h>
#include <linux/proc_fs.h>
#include <linux/spinlock.h>

#include <linux/kernel_stat.h>

#include "avm_pa.h"
#include "avm_pa_hw.h"
#include "avm_pa_intern.h"

#ifdef CONFIG_AVM_PA_GENERIC_CT
#include "generic_ct/generic_ct_ops.h"
#endif

#ifdef CONFIG_BLOG
#include <linux/blog.h>
#include <linux/bcm_skb_defines.h>
#define BROADCOM_MAX_PRIOS 8
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
#else
#error missing NF_CONNTRACK for Broadcom
#endif
#endif

#ifdef CONFIG_AVM_GENERIC_CONNTRACK
#warning Please do not use deprecated AVM_GENERIC_CONNTRACK
#define SKB_GENERIC_CT(skb)      ((skb)->generic_ct)
#define SKB_GENERIC_CT_DIR(skb)  ((skb)->nfctinfo)
#endif

#ifdef CONFIG_L2TP
#include <linux/l2tp.h>
#include "../l2tp/l2tp_core.h"
#endif

#include <linux/smp.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 11, 0)
#define INIT_CSD(_csd, _func, _info) {*(_csd) = (call_single_data_t){ .func = (_func), .info = (_info), };}
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
typedef struct call_single_data call_single_data_t;
#endif
#endif

/* ------------------------------------------------------------------------ */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)
/* Until 5.0, call_rcu() call_rcu_bh() were semantically different.
 * Then, 5.0 makde call_rcu_bh() be a wrapper because a refactoring
 * makde call_rcu() and call_rcu_bh() semantically equivalent. Finally,
 * 5.1 removed the call_rcu_bh() wrapper for greater good.
 *
 * => For older kernels we still need to call the _bh variant
 */
static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
{
   call_rcu(head, func);
}
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
#define skb_vlan_tag_get_id  vlan_tx_tag_get_id
#define skb_vlan_tag_get     vlan_tx_tag_get
#define skb_vlan_tag_present vlan_tx_tag_present
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
static inline void skb_vlan_tag_put(struct sk_buff *skb,
					  __be16 vlan_proto, u16 vlan_tci)
{
#ifdef AVM_PA_SKBUFF_HAS_VLAN_PROTO
	skb->vlan_proto = vlan_proto;
#endif
	skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci;
}
#else /* >= 4.0 */
#define skb_vlan_tag_put __vlan_hwaccel_put_tag
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0)
static inline void skb_vlan_tag_clear(struct sk_buff *skb)
{
   skb->vlan_tci = 0;
}
#else /* >= 5.0 */
#define skb_vlan_tag_clear __vlan_hwaccel_clear_tag
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)
/* For non-broken smp_call_function_single_async() the following commits are needed:
 * commit 5224b961 smp: Fix error case handling in smp_call_function_*()
 * commit 8053871d smp: Fix smp_call_function_single_async() locking
 *
 * The commits landed in Linux 4.1. Any older kernel lacking those have a broken
 * smp_call_function_single_async() and we cannot use RPS (we saw panics every now and then).
 */
#ifdef CONFIG_AVM_PA_RPS
#error Broken smp_call_function_single_async(). Upgrade the kernel, backport 8053871d and 5224b961 or disable CONFIG_AVM_PA_RPS.
#endif
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 218)
/* Really since 4.11.0. Backported to 4.9.y.
 * 4.10.y doesn't have it but it's EOL anyway.
 *
 * commit 2c935bc572 locking/atomic, kref: Add kref_read()
 */
#define kref_read(r) atomic_read(&(r)->refcount)
#endif


#if LINUX_VERSION_CODE <= KERNEL_VERSION(3, 17, 0)
/* See mainline commits:
 * commit 1d023284 list: fix order of arguments for hlist_add_after(_rcu)
 *
 * Note that the macro was renamed and arguments order swapped.
 */
#define hlist_add_behind_rcu(new, prev) hlist_add_after_rcu(prev, new)
#endif

/* ------------------------------------------------------------------------ */

#if LINUX_VERSION_CODE <= KERNEL_VERSION(3, 0, 0)
static inline void skb_reset_mac_len(struct sk_buff *skb)
{
	skb->mac_len = skb->network_header - skb->mac_header;
}
#endif


/* ------------------------------------------------------------------------ */

#define AVM_PA_TRACE              1  /* 0: off */
#define AVM_PA_UNALIGNED_CHECK    0
#define AVM_PA_REF_DEBUG          0  /* 0: off */

#ifndef ETH_P_8021AD
#define ETH_P_8021AD	0x88A8
#endif

/* ------------------------------------------------------------------------ */

static inline void set_udp_checksum(struct iphdr *iph, struct udphdr *udph)
{
   unsigned short len = ntohs(udph->len);
   __wsum sum;

   udph->check = 0;
   sum = csum_partial((unsigned char *)udph, len, 0);
   udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
                                   len, IPPROTO_UDP, sum);
   if (udph->check == 0)
      udph->check = CSUM_MANGLED_0;
}

static inline void set_udpv6_checksum(struct ipv6hdr *ipv6h,
                                      struct udphdr *udph)
{
   unsigned short len = ntohs(udph->len);
   __wsum sum;

   udph->check = 0;
   sum = csum_partial((unsigned char *)udph, len, 0);
   udph->check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
                                 len, IPPROTO_UDP, sum);
   if (udph->check == 0)
      udph->check = CSUM_MANGLED_0;
}

/* Return a uniq id for a give skb. Currently it simply
 * returns its addresses with the always-zero low bits shifted away.
 */
static inline unsigned long pkt_uniq_id(PKT *pkt)
{
   unsigned long addr = (unsigned long) pkt;
   unsigned long shift = max(L1_CACHE_SHIFT, 2);

   return addr >> shift;
}

/* ------------------------------------------------------------------------ */

static inline int rand(void)
{
   int x;
   get_random_bytes(&x, sizeof(x));
   return x;
}

#define PKT_DATA(pkt)   (pkt)->data
/* PKT_LEN has the data in the head skb. For frag_list skbs, this is just L2/3/4 headers
 * without any payload. For normal skbs it includes the payload after the headers. */
#define PKT_LEN(pkt)   (skb_headlen(pkt))
#define PKT_PULL(pkt, len) skb_pull(pkt, len)
#define PKT_PUSH(pkt, len) skb_push(pkt, len)
#define PKT_FREE(pkt)  dev_kfree_skb_any(pkt)
#define PKT_COPY(pkt)  skb_copy(pkt, GFP_ATOMIC)
#ifdef AVM_PA_SKBUFF_HAS_VLAN_PROTO
#define PA_VLAN_PROTO(pkt) (pkt)->vlan_proto
#else
#define PA_VLAN_PROTO(pkt) (constant_htons(ETH_P_8021Q))
#endif

static int pa_printk(void *type, const char *format, ...)
#ifdef __GNUC__
        __attribute__ ((__format__(__printf__, 2, 3)))
#endif
;

static int pa_printk(void *type, const char *format, ...)
{
   va_list args;
   int rc;

   va_start(args, format);
   if (type) printk("%s", (char *)type);
   rc = vprintk(format, args);
   va_end(args);
   return rc;
}

/* ------------------------------------------------------------------------ */

#define constant_htons(x)   __constant_htons(x)

#undef IPPROTO_IPENCAP
#define IPPROTO_IPENCAP 4
#ifndef IPPROTO_L2TP
#define IPPROTO_L2TP    115
#endif

/*
 * Accelerating of L2TPv3 only works with
 * pseudowire ethernet or ethernet vlan
 * and default l2-specific header.
 */

/* ------------------------------------------------------------------------ */

#define AVM_PA_TICK_RATE                (500*HZ/1000) /* 0.5 secs */
#define AVM_PA_LC_TIMEOUT                           2 /* secs */
#define AVM_PA_TRAFFIC_IDLE_TBFDISABLE             10 /* secs */

/* ------------------------------------------------------------------------ */

#define AVM_PA_EST_DEFAULT_IDX                0 /* 0 - 5 => 0.25sec - 8sec */
#define AVM_PA_EST_DEFAULT_EWMA_LOG           3 /* 1 - 31 */

#define AVM_PA_PRIOACK_THRESH_PKTS   40   /* wait for X packets to do the TCP-ACK check */
#define AVM_PA_PRIOACK_RATIO         70   /* % of packets have to be TCP-ACKs for positive check */

#define AVM_PA_COUNT_PRIO_MAPS  2 /* tack and tget */

#define AVM_PA_BE_QUEUE 6 /* best-effort queue */

#define AVM_PA_INGRESS_PRIO_NET_MASK   0xFFFF0000U
#define AVM_PA_INGRESS_PRIO_HOST_MASK  0x0000FFFFU

#define AVM_PA_INGRESS_PRIO_NET(prio)  (((prio) & AVM_PA_INGRESS_PRIO_NET_MASK) >> 16)
#define AVM_PA_INGRESS_PRIO_HOST(prio) ( (prio) & AVM_PA_INGRESS_PRIO_HOST_MASK)

/* ------------------------------------------------------------------------ */

static DEFINE_SPINLOCK(avm_pa_lock);

struct avm_pa_est {
   unsigned                  idx;
   unsigned                  ewma_log;
   u32                       last_packets;
   u32                       avpps;
};

struct avm_pa_rxq {
   struct napi_struct     napi;
   struct sk_buff        *rq; /* reverse queue, lockless enqueue/dequeue */
   struct sk_buff_head    lq; /* local queue (in order), limited number of packets */
   struct tasklet_struct  sched_task;
#ifdef CONFIG_AVM_PA_RPS
   call_single_data_t     csd;
#endif
   unsigned long          rx_enqueued;
   unsigned long          rx_napi_sched;
   unsigned long          rx_process;
   unsigned long          rx_rps_isr;
   unsigned long          rx_dropped;
   unsigned               cpu;
};

DEFINE_PER_CPU(struct avm_pa_rxq, pa_rxq);

struct avm_pa_global {
   int                       disabled;
   int                       fw_disabled;
   atomic_t                  misc_is_open; /* means fw_disabled */
   int                       dbgcapture;
   int                       dbgsession;
   int                       dbgnosession;
   int                       dbgtrace;
   int                       dbgmatch;
   int                       dbgprioack;
   int                       dbgprioacktrace;
   int                       dbgstats;
   bool                      bsession_allowed;
   unsigned long             tcp_timeout_secs;
   unsigned long             udp_timeout_secs;
   unsigned long             echo_timeout_secs;
   unsigned long             bridge_timeout_secs;
   struct avm_pa_pid         pid_array[CONFIG_AVM_PA_MAX_PID];
   struct avm_pa_vpid        vpid_array[CONFIG_AVM_PA_MAX_VPID];
   struct avm_pa_session_list sess_list[AVM_PA_LIST_MAX];
   struct avm_pa_bsession    bsess_array[CONFIG_AVM_PA_MAX_SESSION];
   struct avm_pa_macaddr     macaddr_array[AVM_PA_MAX_MACADDR];
   struct hlist_head         macaddr_hashtab[AVM_PA_MAX_MACADDR];
   struct avm_pa_stats       stats, stats_copy;
   struct hlist_head         egress_freelist;
   atomic_t                  session_uniq_id;
   atomic_t                  ingress_uniq_id;

   struct timer_list         tick_timer;

   /* packet rate estimater */
   char                      est_start[0];
   int                       est_idx;
   int                       ewma_log;
   struct timer_list         est_timer;
   struct avm_pa_est         rx_est;
   struct avm_pa_est         fw_est;
   struct avm_pa_est         overlimit_est;
   char                      est_end[0];
   bool                      rxq_enabled;
   bool                      rps_enabled;
   /* ... */
   char                      tok_start[0];
   struct task_struct       *tok_task;
   int                       tok_pos;
#define TOK_SAMLES  64
   int                       tok_state[TOK_SAMLES];
   unsigned                  tok_overtime[TOK_SAMLES];
   unsigned                  tok_rate[TOK_SAMLES];
   unsigned                  tok_pps[TOK_SAMLES];
   unsigned long             tok_overlimit[TOK_SAMLES];
   char                      tok_end[0];
   unsigned                  prioack_thresh_packets;
   unsigned                  prioack_ratio;
   struct avm_hardware_pa    hardware_pa;
   int                       hw_ppa_disabled;
   struct completion        *hw_pa_flush_completion;
   struct kref               hw_pa_ref;

#ifdef CONFIG_PROC_FS
   int filter_enabled;
   struct list_head          accel_filter; /* empty to accelerate all sessions (if filter_enabled == 1) */
   struct list_head          show_filter;  /* empty to show all sessions (default) */
#endif
} pa_glob = {
   .disabled = 1,
   .fw_disabled = 1,
   .dbgcapture = 0,
   .dbgsession = 0,
   .dbgnosession = 0,
   .dbgtrace = 0,
   .dbgmatch = 0,
   .dbgprioack = 0,
   .dbgprioacktrace = 0,
   .dbgstats = 0,
   .bsession_allowed = 1,
   .tcp_timeout_secs = 10,
   .udp_timeout_secs = 10,
   .echo_timeout_secs = 3,
   .bridge_timeout_secs = 30,
   .est_idx = AVM_PA_EST_DEFAULT_IDX,
   .ewma_log = AVM_PA_EST_DEFAULT_EWMA_LOG,
   .prioack_thresh_packets = AVM_PA_PRIOACK_THRESH_PKTS,
   .prioack_ratio = AVM_PA_PRIOACK_RATIO,
   .filter_enabled = 1,
   .rxq_enabled = 1,
#ifdef CONFIG_AVM_PA_RPS
   .rps_enabled = 1,
#endif
};

struct avm_pa_data pa_data;

#define PA_PID(ctx, handle)      (&ctx->pid_array[(handle)%CONFIG_AVM_PA_MAX_PID])
#define PA_VPID(ctx, handle)     (&ctx->vpid_array[(handle)%CONFIG_AVM_PA_MAX_VPID])
#define PA_SESSION(pd, handle)   (&(pd)->sessions[(handle)%CONFIG_AVM_PA_MAX_SESSION])
#define PA_BSESSION(ctx, handle) (&ctx->bsess_array[(handle)%CONFIG_AVM_PA_MAX_SESSION])

/* ------------------------------------------------------------------------ */

static void pa_session_kill_nolock(struct avm_pa_session *session, const char *why);
static void pa_session_kill(struct avm_pa_session *session, const char *why);
static void pa_session_flush(struct avm_pa_session *session, const char *why);
static int pa_session_handle_stats(struct avm_pa_session *session);
static void pa_show_session(struct avm_pa_session *session,
                            pa_fprintf fprintffunc, void *arg);
static int avm_pa_pid_receive(avm_pid_handle pid_handle, PKT *pkt);
static void avm_pa_flush_sessions_with_destmac(struct avm_pa_macaddr *destmac);
static void avm_pa_flush_hw_sessions(void);

static inline int avm_pa_pid_tack_enabled(struct avm_pa_pid *pid)
{
   return pid->prio_maps[AVM_PA_PRIO_MAP_TACK].enabled;
}

static inline int avm_pa_pid_tget_enabled(struct avm_pa_pid *pid)
{
   return pid->prio_maps[AVM_PA_PRIO_MAP_TGET].enabled;
}

/*
 * Helper functions to retrieve a valid tack or tget priority from a pid's priority map.
 * Remember: prio_maps must include the correct TC_H_MAJ part.
 */
static inline unsigned int avm_pa_pid_tack_prio(struct avm_pa_pid *pid, unsigned int prio)
{
   if (likely((prio & TC_H_MIN_MASK) < AVM_PA_MAX_PRIOS))
      return pid->prio_maps[AVM_PA_PRIO_MAP_TACK].prios[prio & TC_H_MIN_MASK];
   return 0;
}

static inline unsigned int avm_pa_pid_tget_prio(struct avm_pa_pid *pid, unsigned int prio)
{
   if (likely((prio & TC_H_MIN_MASK) < AVM_PA_MAX_PRIOS))
      return pid->prio_maps[AVM_PA_PRIO_MAP_TGET].prios[prio & TC_H_MIN_MASK];
   return prio;
}

/* ------------------------------------------------------------------------ */

static inline int avm_pa_capture_running(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   if (ctx->dbgcapture)
      return 0;
   return atomic_read(&ctx->misc_is_open);
}

/* ------------------------------------------------------------------------ */
/* -------- utilities ----------------------------------------------------- */
/* ------------------------------------------------------------------------ */

static const char *egresstype2str(enum avm_pa_egresstype etype)
{
   switch (etype) {
      case avm_pa_egresstype_output : return "output";
      case avm_pa_egresstype_local  : return "local";
      case avm_pa_egresstype_rtp    : return "rtp";
      case avm_pa_egresstype_xfrm   : return "xfrm";
      case avm_pa_egresstype_null   : return "null";
   }
   return "???";
}

static const char *rc2str(int rc)
{
   switch (rc) {
      case AVM_PA_RX_BROADCAST       : return "is broadcast";
      case AVM_PA_RX_TTL             : return "ttl/hoplimit <= 1";
      case AVM_PA_RX_FRAGMENT        : return "is fragment";
      case AVM_PA_RX_BYPASS          : return "bypass";
      case AVM_PA_RX_OK              : return "ok";
      case AVM_PA_RX_ACCELERATED     : return "accelerated";
      case AVM_PA_RX_ERROR_STATE     : return "state machine problem ?";
      case AVM_PA_RX_ERROR_LEN       : return "packet too short";
      case AVM_PA_RX_ERROR_IPVERSION : return "illegal ip version";
      case AVM_PA_RX_ERROR_MATCH     : return "too much header";
      case AVM_PA_RX_ERROR_HDR       : return "too much ip header";
   }
   return "???";
}

static const char *framing2str(enum avm_pa_framing framing)
{
   switch (framing) {
     case avm_pa_framing_ether: return "ether";
     case avm_pa_framing_ppp: return "ppp";
     case avm_pa_framing_ip: return "ip";
     case avm_pa_framing_ipdev: return "ipdev";
     case avm_pa_framing_dev: return "dev";
     case avm_pa_framing_ptype: return "local";
     case avm_pa_framing_llcsnap: return "llcsnap";
   }
   return "undef";
}

static int in6_addr2str(const void *cp, char *buf, size_t size)
{
    const struct in6_addr *s = (const struct in6_addr *)cp;
    return snprintf(buf, size, "%x:%x:%x:%x:%x:%x:%x:%x",
                    ntohs(s->s6_addr16[0]), ntohs(s->s6_addr16[1]),
                    ntohs(s->s6_addr16[2]), ntohs(s->s6_addr16[3]),
                    ntohs(s->s6_addr16[4]), ntohs(s->s6_addr16[5]),
                    ntohs(s->s6_addr16[6]), ntohs(s->s6_addr16[7]));
}

static int in_addr2str(const void *cp, char *buf, size_t size)
{
    const unsigned char *s = (const unsigned char *)cp;
    return snprintf(buf, size, "%d.%d.%d.%d", s[0], s[1], s[2], s[3]);
}

static int mac2str(const void *cp, char *buf, size_t size)
{
    const unsigned char *mac = (const unsigned char *)cp;
    return snprintf(buf, size, "%02X:%02X:%02X:%02X:%02X:%02X",
                        mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
}

static const char *pkttype2str(u16 pkttype, char *buf, size_t size)
{
   char *p = buf;
   char *end = p + size;

   if (pkttype == AVM_PA_PKTTYPE_NONE) {
      snprintf(p, end-p, "none");
      return buf;
   }

   switch (pkttype & AVM_PA_PKTTYPE_IPENCAP_MASK) {
      case AVM_PA_PKTTYPE_IPV6ENCAP:
         snprintf(p, end-p, "IPv6+");
         p += strlen(p);
         break;
      case AVM_PA_PKTTYPE_IPV4ENCAP:
         snprintf(p, end-p, "IPv4+");
         p += strlen(p);
         break;
   }
   if (pkttype & AVM_PA_PKTTYPE_LISP) {
      snprintf(p, end-p, "LISP+");
      p += strlen(p);
   }
   if (pkttype & AVM_PA_PKTTYPE_L2TP) {
      snprintf(p, end-p, "L2TPv3+");
      p += strlen(p);
   }
   if (pkttype & AVM_PA_PKTTYPE_GRE) {
      snprintf(p, end-p, "GRE+");
      p += strlen(p);
   }
   switch (pkttype & AVM_PA_PKTTYPE_IP_MASK) {
      case AVM_PA_PKTTYPE_IPV6:
         snprintf(p, end-p, "IPv6");
         p += strlen(p);
         break;
      case AVM_PA_PKTTYPE_IPV4:
         snprintf(p, end-p, "IPv4");
         p += strlen(p);
         break;
   }
   if (AVM_PA_PKTTYPE_IPPROTO(pkttype)) {
      switch (AVM_PA_PKTTYPE_IPPROTO(pkttype)) {
         case IPPROTO_UDP:
            snprintf(p, end-p, "+UDP");
            break;
         case IPPROTO_TCP:
            snprintf(p, end-p, "+TCP");
            break;
         case IPPROTO_ICMP:
            snprintf(p, end-p, "+ICMP");
            break;
         case IPPROTO_ICMPV6:
            snprintf(p, end-p, "+ICMPV6");
            break;
         case IPPROTO_L2TP:
            snprintf(p, end-p, "+L2TPv3");
            break;
         case IPPROTO_ESP:
            snprintf(p, end-p, "+ESP");
            break;
         default:
            snprintf(p, end-p, "+P%u", AVM_PA_PKTTYPE_IPPROTO(pkttype));
            break;
      }
   }
   return buf;
}

static char *data2hex(void *data, int datalen,
                      char *buf, int bufsiz)
{
   static char hexchars[] = "0123456789ABCDEF";
   unsigned char *databuf = (unsigned char *)data;
   char *s = buf;
   char *end = buf+bufsiz;
   int i;

   snprintf(s, end-s, "%d: ", datalen);
   s += strlen(s);

   for (i=0; i < datalen && s + 3 < end; i ++) {
      *s++ = hexchars[(databuf[i] >> 4) & 0xf];
      *s++ = hexchars[databuf[i] & 0xf];
   }
   *s = 0;
   return buf;
}

static char *pidflags2str(unsigned long flags, char *buf, int bufsiz)
{
   char *s = buf;
   char *end = s + bufsiz;
   buf[0] = 0;
   if (flags & AVM_PA_PID_FLAG_NO_PID_CHANGED_CHECK) {
      snprintf(s, end-s, "%sno_pid_changed_check", s == buf ? "" : ",");
      s += strlen(s);
   }
   if (flags & AVM_PA_PID_FLAG_HSTART_ON_INGRESS) {
      snprintf(s, end-s, "%shstart_on_ingress", s == buf ? "" : ",");
      s += strlen(s);
   }
   if (flags & AVM_PA_PID_FLAG_HSTART_ON_EGRESS) {
      snprintf(s, end-s, "%shstart_on_egress", s == buf ? "" : ",");
      s += strlen(s);
   }
   if (s == buf)
      snprintf(s, end-s, "none");
   return buf;
}

/* ------------------------------------------------------------------------ */
/* -------- l2tp session cache -------------------------------------------- */
/* ------------------------------------------------------------------------ */

static struct avm_pa_l2tp *
pa_l2tp_session_search(__be32 session_id)
{
#ifdef CONFIG_L2TP
   struct avm_pa_data *pd = &pa_data;
   int i;

   for (i = 0; i < ARRAY_SIZE(pd->l2tp_cache); i++) {
      if (pd->l2tp_cache[i].session_id == session_id)
         return &pd->l2tp_cache[i];
   }
#endif
   return NULL;
}

static struct avm_pa_l2tp *
pa_l2tp_session_search_by_peer(__be32 peer_session_id)
{
#ifdef CONFIG_L2TP
   struct avm_pa_data *pd = &pa_data;
   int i;

   for (i = 0; i < ARRAY_SIZE(pd->l2tp_cache); i++) {
      if (pd->l2tp_cache[i].peer_session_id == peer_session_id)
         return &pd->l2tp_cache[i];
   }
#endif
   return NULL;
}


#ifdef CONFIG_L2TP
static struct l2tp_session *
pa_l2tp_session_get_local(__be32 session_id)
{
   if (in_irq())
      return NULL;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0)
   return l2tp_session_get(&init_net, ntohl(session_id));
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) || defined(AVM_L2TP_BACKPORT_4_15)
   return l2tp_session_get(&init_net, NULL, ntohl(session_id));
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 225)
   /* Instead of error prone ifdefs, we simply do not support kernels 4.5-4.8,
    * l2tp_session_get() is normally available since 4.9 onwards. */
   return l2tp_session_get(&init_net, NULL, ntohl(session_id), true);
#else
   return l2tp_session_find(&init_net, NULL, ntohl(session_id));
#endif
}

#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 225)
#define pa_l2tp_session_put_local(s) l2tp_session_dec_refcount(s)
#else
/* no-op since this kernel uses l2tp_session_find() w/o refcounting */
#define pa_l2tp_session_put_local(s)
#endif
#endif

static struct avm_pa_l2tp *
pa_l2tp_session_alloc(__be32 session_id)
{
   struct avm_pa_l2tp *l2tp = NULL;

#ifdef CONFIG_L2TP
   struct avm_pa_data *pd = &pa_data;
   struct l2tp_session *local_sess;
   int i;

   local_sess = pa_l2tp_session_get_local(session_id);
   if (local_sess) {
      /* Add to the cache */
      spin_lock(&avm_pa_lock);
      for (i = 0; i < ARRAY_SIZE(pd->l2tp_cache); i++) {
         if (pd->l2tp_cache[i].session_id == 0) {
            l2tp = &pd->l2tp_cache[i];
            l2tp->session_id = session_id;
            /* store so that we can also look up by peer_session_id
             * for ingress packets */
            l2tp->peer_session_id = htonl(local_sess->peer_session_id);
            l2tp->hdr_len = local_sess->hdr_len;
            break;
         }
      }
      spin_unlock(&avm_pa_lock);
      pa_l2tp_session_put_local(local_sess);
   }
#endif

   return l2tp;
}

/* ------------------------------------------------------------------------ */
/* -------- parsing of packets -------------------------------------------- */
/* ------------------------------------------------------------------------ */

#define LISPDATAHDR(info) (HDRCOPY(info)+(info)->lisp_offset)

static inline void pa_reset_match(struct avm_pa_pkt_match *info)
{
   info->nmatch = 0;
   info->casttype = AVM_PA_IS_UNICAST;
   info->pkttype = AVM_PA_PKTTYPE_NONE;
   info->pppoe_offset = AVM_PA_OFFSET_NOT_SET;
   info->encap_offset = AVM_PA_OFFSET_NOT_SET;
   info->lisp_offset = AVM_PA_OFFSET_NOT_SET;
   info->ip_offset = AVM_PA_OFFSET_NOT_SET;
   info->l4_offset = AVM_PA_OFFSET_NOT_SET;
   info->hdroff = 0;
   info->hdrlen = 0;
   info->full_hdrlen = 0;
   info->vlan_tci = 0;
   info->vlan_proto = 0;
}


static inline struct avm_pa_match_info *
pa_find_eth_match(struct avm_pa_pkt_match *match)
{
   struct avm_pa_match_info *p, *end = &match->match[match->nmatch]; 

   for (p = &match->match[0]; p != end; p++) {
      if (p->type == AVM_PA_ETH) {
         return p;
      }
   }

   return NULL; /* no ETH found */
}


static inline void pa_change_to_bridge_match(struct avm_pa_pkt_match *match)
{
   struct avm_pa_match_info *p;

   p = pa_find_eth_match(match);
   if (p) {
      if ((p + 1)->type == AVM_PA_VLAN) ++p;
      match->nmatch = p - match->match + 1;
   }
}

static inline int pa_add_match(struct avm_pa_pkt_match *info,
                               unsigned char offset, unsigned char type)
{
   if (info->nmatch < AVM_PA_MAX_MATCH) {
      info->match[info->nmatch].offset = offset;
      info->match[info->nmatch].type = type;
      info->nmatch++;
      return 0;
   }
   return -1;
}

static int set_pkt_match(enum avm_pa_framing framing,
                         unsigned int hstart,
                         PKT *pkt,
                         struct avm_pa_pkt_match *info,
                         int on_egress)
{
#define RETURN(retval) do { ret = retval; goto out; } while (0)
   int ret = AVM_PA_RX_ERROR_LEN;
   int state = 0;
   u8 *data, *p, *end;
   u32 daddr;
   u16 ethproto = 0;
   u16 ipproto = 0;
   int ttl = 0;
   int full_hdrlen = 0;

   data = PKT_DATA(pkt);
   end = data + PKT_LEN(pkt);
   data += hstart;

   switch (framing) {
      case avm_pa_framing_ip:
         if ((data[0] & 0xf0) == 0x40 && (data[0] & 0x0f) >= 5) {
            state = AVM_PA_IPV4;
            break;
         }
         if ((data[0] & 0xf0) == 0x60) {
            state = AVM_PA_IPV6;
            break;
         }
         return AVM_PA_RX_ERROR_IPVERSION;
      case avm_pa_framing_ppp:
         state = AVM_PA_PPP;
         break;
      case avm_pa_framing_ether:
         state = AVM_PA_ETH;
         break;
      case avm_pa_framing_dev:
         data = (u8 *)eth_hdr(pkt);
         state = AVM_PA_ETH;
         break;
      case avm_pa_framing_ipdev:
      case avm_pa_framing_ptype:
         data = (u8 *)skb_network_header(pkt);
         if (pkt->protocol == constant_htons(ETH_P_IP)) {
            state = AVM_PA_IPV4;
         } else if (pkt->protocol == constant_htons(ETH_P_IPV6)) {
            state = AVM_PA_IPV6;
         } else {
            return AVM_PA_RX_BYPASS;
         }
         break;
      case avm_pa_framing_llcsnap:
         state = AVM_PA_LLC_SNAP;
         break;

   }
   if (end - data > AVM_PA_MAX_HEADER - AVM_PA_MAX_HDROFF)
      end = data + AVM_PA_MAX_HEADER - AVM_PA_MAX_HDROFF;
   p = data;

   while (p < end) {
      hdrunion_t *hdr = (hdrunion_t *)p;
      int offset = p-data;

      switch (state) {
         case AVM_PA_ETH:
            if (pa_add_match(info, offset, AVM_PA_ETH) < 0)
               RETURN(AVM_PA_RX_ERROR_MATCH);
            p += sizeof(struct ethhdr);
            if (hdr->ethh.h_dest[0] & 1) {
               if (hdr->ethh.h_dest[0] == 0xff) {
                  info->casttype = AVM_PA_IS_BROADCAST;
                  RETURN(AVM_PA_RX_BYPASS);
               } else {
                  info->casttype = AVM_PA_IS_MULTICAST;
               }
            }
            if (skb_vlan_tag_present(pkt)) {
               info->vlan_tci = skb_vlan_tag_get(pkt);
               info->vlan_proto = PA_VLAN_PROTO(pkt);
               if (pa_add_match(info, AVM_PA_OFFSET_NOT_SET, AVM_PA_VLAN) < 0)
                  RETURN(AVM_PA_RX_ERROR_MATCH);
            }
            state = AVM_PA_ETH_PROTO;
            ethproto = hdr->ethh.h_proto;
            continue;

         case AVM_PA_VLAN: /* This handles only in-band vlan */
            if (pa_add_match(info, offset, AVM_PA_VLAN) < 0)
               RETURN(AVM_PA_RX_ERROR_MATCH);
            p += sizeof(struct vlanhdr);
            state = AVM_PA_ETH_PROTO;
            ethproto = hdr->vlanh.vlan_proto;
            continue;

         case AVM_PA_ETH_PROTO:
            switch (ethproto) {
               case constant_htons(ETH_P_PPP_SESS):
                  state = AVM_PA_PPPOE;
                  continue;
               case constant_htons(ETH_P_IP):
                  state = AVM_PA_IPV4;
                  continue;
               case constant_htons(ETH_P_IPV6):
                  state = AVM_PA_IPV6;
                  continue;
               case constant_htons(ETH_P_8021Q):
               case constant_htons(ETH_P_8021AD):
                  state = AVM_PA_VLAN;
                  continue;
            }
            RETURN(AVM_PA_RX_BYPASS);

         case AVM_PA_PPPOE:
            if (pa_add_match(info, offset, AVM_PA_PPPOE) < 0)
               RETURN(AVM_PA_RX_ERROR_MATCH);
            p += sizeof(struct pppoehdr);
            info->pppoe_offset = offset;
            state = AVM_PA_PPP;
            continue;

         case AVM_PA_PPP:
            if (p[0] == 0) {
               p++;
               offset++;
            }
            if (p[0] == 0x21) {
               if (pa_add_match(info, offset, AVM_PA_PPP) < 0)
                  RETURN(AVM_PA_RX_ERROR_MATCH);
               p++;
               state = AVM_PA_IPV4;
               continue;
            }
            if (p[0] == 0x57) {
               if (pa_add_match(info, offset, AVM_PA_PPP) < 0)
                  RETURN(AVM_PA_RX_ERROR_MATCH);
               p++;
               state = AVM_PA_IPV6;
               continue;
            }
            RETURN(AVM_PA_RX_BYPASS);

         case AVM_PA_IPV4:
            if (hdr->iph.version != 4)
               RETURN(AVM_PA_RX_ERROR_IPVERSION);
            if (pa_add_match(info, offset, AVM_PA_IPV4) < 0)
               RETURN(AVM_PA_RX_ERROR_MATCH);
            ttl = hdr->iph.ttl;
            p += PA_IPHLEN(&hdr->iph);
            if (hdr->iph.frag_off & constant_htons(IP_OFFSET))
               RETURN(AVM_PA_RX_FRAGMENT);
            /* We don't support forwarding fragments, we may only create them for
             * tunnels, so check if we're on egress.
             */
            if ((hdr->iph.frag_off & constant_htons(IP_MF)) && !on_egress)
               RETURN(AVM_PA_RX_FRAGMENT);
            daddr = get_unaligned(&hdr->iph.daddr);
            if (ipv4_is_lbcast(daddr)) {
               info->casttype = AVM_PA_IS_BROADCAST;
               RETURN(AVM_PA_RX_BYPASS);
            } else if (ipv4_is_multicast(daddr))  {
               info->casttype = AVM_PA_IS_MULTICAST;
            }
            if (hdr->iph.protocol == IPPROTO_IPV6) {
               if (info->pkttype != AVM_PA_PKTTYPE_NONE)
                  RETURN(AVM_PA_RX_ERROR_HDR);
               info->pkttype |= AVM_PA_PKTTYPE_IPV4ENCAP;
               info->encap_offset = offset;
               state = AVM_PA_IPV6;
               continue;
            }
            if (hdr->iph.protocol == IPPROTO_IPENCAP) {
               if (info->pkttype != AVM_PA_PKTTYPE_NONE)
                  RETURN(AVM_PA_RX_ERROR_HDR);
               info->pkttype |= AVM_PA_PKTTYPE_IPV4ENCAP;
               info->encap_offset = offset;
               state = AVM_PA_IPV4;
               continue;
            }
            info->pkttype |= AVM_PA_PKTTYPE_IPV4;
            info->ip_offset = offset;
            state = AVM_PA_IP_PROTO;
            ipproto = hdr->iph.protocol;
            if ((offset & 0x3) && info->hdroff == 0)
               info->hdroff = 4 - (offset & 0x3);
            continue;

         case AVM_PA_IPV6:
            if (hdr->ipv6h.version != 6)
               RETURN(AVM_PA_RX_ERROR_IPVERSION);
            if (pa_add_match(info, offset, AVM_PA_IPV6) < 0)
               RETURN(AVM_PA_RX_ERROR_MATCH);
            ttl = hdr->ipv6h.hop_limit;
            p += sizeof(struct ipv6hdr);
            if (hdr->ipv6h.daddr.s6_addr[0] == 0xff)
               info->casttype = AVM_PA_IS_MULTICAST;
            if (hdr->ipv6h.nexthdr == IPPROTO_IPV6) {
               if (info->pkttype != AVM_PA_PKTTYPE_NONE)
                  RETURN(AVM_PA_RX_ERROR_HDR);
               info->pkttype |= AVM_PA_PKTTYPE_IPV6ENCAP;
               info->encap_offset = offset;
               state = AVM_PA_IPV6;
               continue;
            }
            if (hdr->ipv6h.nexthdr == IPPROTO_IPENCAP) {
               if (info->pkttype != AVM_PA_PKTTYPE_NONE)
                  RETURN(AVM_PA_RX_ERROR_HDR);
               info->pkttype |= AVM_PA_PKTTYPE_IPV6ENCAP;
               info->encap_offset = offset;
               state = AVM_PA_IPV4;
               continue;
            }
            if (hdr->ipv6h.nexthdr == IPPROTO_FRAGMENT) {
               struct ipv6fraghdr *fragh = (struct ipv6fraghdr *)p;
               info->pkttype |= AVM_PA_PKTTYPE_IPV6;
               info->ip_offset = offset;
               if (fragh->frag_off & constant_htons(IP6_OFFSET))
                  RETURN(AVM_PA_RX_FRAGMENT);
               /* We don't support forwarding fragments, we may only create them for
                * tunnels, so check if we're on egress.
                */
               if ((fragh->frag_off & constant_htons(IP6_MF)) && !on_egress)
                  RETURN(AVM_PA_RX_FRAGMENT);
               p += sizeof(struct ipv6fraghdr);
               state = AVM_PA_IP_PROTO;
               ipproto = fragh->nexthdr;
               continue;
            }
            info->pkttype |= AVM_PA_PKTTYPE_IPV6;
            info->ip_offset = offset;
            state = AVM_PA_IP_PROTO;
            ipproto = hdr->ipv6h.nexthdr;
            if ((offset & 0x3) && info->hdroff == 0)
               info->hdroff = 4 - (offset & 0x3);
            continue;

         case AVM_PA_IP_PROTO:
            switch (ipproto) {
               case IPPROTO_TCP:
                  if (p + sizeof(struct tcphdr) > end)
                     RETURN(AVM_PA_RX_ERROR_LEN);
                  if (pa_add_match(info, offset, AVM_PA_PORTS) < 0)
                     RETURN(AVM_PA_RX_ERROR_MATCH);
                  /* Only ports are stored */
                  full_hdrlen = (p - data) + sizeof(struct tcphdr);
                  p += 2 * sizeof(__be16);
                  info->pkttype |= ipproto;
                  info->l4_offset = offset;
                  RETURN(AVM_PA_RX_OK);
               case IPPROTO_UDP:
                  if (p + sizeof(struct udphdr) > end)
                     RETURN(AVM_PA_RX_ERROR_LEN);
                  if (pa_add_match(info, offset, AVM_PA_PORTS) < 0)
                     RETURN(AVM_PA_RX_ERROR_MATCH);
                  if (hdr->udph.dest == constant_htons(4341)) {
                     p += sizeof(struct udphdr);
                     state = AVM_PA_LISP;
                     continue;
                  } else if (hdr->udph.dest == constant_htons(67)) {
                     if (AVM_PA_PKTTYPE_IP_VERSION(info->pkttype) == 4) {
                        /* We don't accelerate DHCPv4 as it turned out to be
                         * problematic. Local DHCP daemons must listen
                         * on a raw socket in addition to datagram sockets
                         * because clients usually have the source address
                         * of 0.0.0.0. We cannot serve raw sockets once a local
                         * session exists so the local daemons miss packets.
                         *
                         * DHCP is not worthwhile to accelerate anyway
                         * - low traffic
                         * - often broadcast
                         *
                         * Since no other protocol requires two sockets it's
                         * not worth it to implement raw socket support, therefore
                         * just don't create sessions.
                         *
                         * Fixes JZ-25001 and JZ-94510.
                         */
                        p += sizeof(struct udphdr);
                        RETURN(AVM_PA_RX_BYPASS);
                     }
                  }
                  /* Only ports are stored */
                  full_hdrlen = (p - data) + sizeof(struct udphdr);
                  p += 2 * sizeof(__be16);
                  info->pkttype |= ipproto;
                  info->l4_offset = offset;
                  RETURN(AVM_PA_RX_OK);
               case IPPROTO_ICMP:
                  if (p + sizeof(struct icmphdr) > end)
                     RETURN(AVM_PA_RX_ERROR_LEN);
                  if (   hdr->icmph.type != ICMP_ECHO
                      && hdr->icmph.type != ICMP_ECHOREPLY)
                     RETURN(AVM_PA_RX_BYPASS);
                  if (pa_add_match(info, offset, AVM_PA_ICMPV4) < 0)
                     RETURN(AVM_PA_RX_ERROR_MATCH);
                  p += sizeof(struct icmphdr);
                  info->pkttype |= ipproto;
                  info->l4_offset = offset;
                  RETURN(AVM_PA_RX_OK);
               case IPPROTO_ICMPV6:
                  if (p + sizeof(struct icmp6hdr) > end)
                     RETURN(AVM_PA_RX_ERROR_LEN);
                  if (   hdr->icmpv6h.icmp6_type != ICMPV6_ECHO_REQUEST
                      && hdr->icmpv6h.icmp6_type != ICMPV6_ECHO_REPLY)
                     RETURN(AVM_PA_RX_BYPASS);
                  if (pa_add_match(info, offset, AVM_PA_ICMPV6) < 0)
                     RETURN(AVM_PA_RX_ERROR_MATCH);
                  p += sizeof(struct icmp6hdr);
                  info->pkttype |= ipproto;
                  info->l4_offset = offset;
                  RETURN(AVM_PA_RX_OK);
               case IPPROTO_L2TP:
                  if (AVM_PA_PKTTYPE_IPENCAP_VERSION(info->pkttype))
                     RETURN(AVM_PA_RX_OK);
                  if (pa_add_match(info, offset, AVM_PA_L2TP) < 0)
                     RETURN(AVM_PA_RX_ERROR_MATCH);
                  {
                     struct avm_pa_l2tp *l2tp = NULL;
                     __be32 be_session_id = hdr->l2tp.session_id;
                     /* check the system has configured sessions...
                      * yes: we check and use the sessions offset (start of eth header)
                      * no: we terminate classification, probably l2tp pass through.
                      */
                     if (be_session_id != 0) {
                        if (on_egress) {
                           l2tp = pa_l2tp_session_search_by_peer(be_session_id);
                        }
                        else {
                           l2tp = pa_l2tp_session_search(be_session_id);
                           if (!l2tp)
                              l2tp = pa_l2tp_session_alloc(be_session_id); /* fails inside irq */
                        }
                     }
                     if (l2tp) {
                        if (p + l2tp->hdr_len > end)
                           RETURN(AVM_PA_RX_ERROR_LEN);
                        p += l2tp->hdr_len;
                        info->encap_offset = info->ip_offset;
                        info->pkttype = AVM_PA_PKTTYPE_IP2IPENCAP_VERSION(info->pkttype);
                        info->pkttype |= AVM_PA_PKTTYPE_L2TP;
                        state = AVM_PA_ETH;
                     } else {
                        /* We are in irq context or cache is filled,
                         * or there is no local l2tp session, i.e. pass through.
                         *
                         * We cannot know for sure as long as we might be in
                         * irq context, but we assume pass through and figure
                         * out later whether to add a session.
                         *
                         * Control connections are treated as pass through here
                         * but effectively they won't be accelerated because precheck
                         * on egress always fails (if they terminate locally).
                         */
                        AVM_PKT_INFO(pkt)->l2tp_session_id = be_session_id;
                        if (p + sizeof(__be32) > end)
                           RETURN(AVM_PA_RX_ERROR_LEN);
                        p += sizeof(__be32);
                        info->pkttype |= ipproto;
                        info->l4_offset = offset;
                        RETURN(AVM_PA_RX_OK);
                     }
                  }
                  continue;
               case IPPROTO_GRE:
                  if (AVM_PA_PKTTYPE_IPENCAP_VERSION(info->pkttype))
                     RETURN(AVM_PA_RX_OK);
                  if (p + sizeof(struct tlb_grehdr) > end)
                     RETURN(AVM_PA_RX_ERROR_LEN);
                  p += sizeof(struct tlb_grehdr);
                  if (pa_add_match(info, offset, AVM_PA_GRE) < 0)
                     RETURN(AVM_PA_RX_ERROR_MATCH);
                  info->encap_offset = info->ip_offset;
                  info->pkttype = AVM_PA_PKTTYPE_IP2IPENCAP_VERSION(info->pkttype);
                  info->pkttype |= AVM_PA_PKTTYPE_GRE;
                  switch (hdr->greh.protocol) {
                     case constant_htons(ETH_P_IP):
                        state = AVM_PA_IPV4;
                        continue;
                     case constant_htons(ETH_P_TEB):
                        state = AVM_PA_ETH;
                        continue;
                  }
                  break;
               case IPPROTO_ESP:
                  if (p + sizeof(struct ip_esp_hdr) > end)
                     RETURN(AVM_PA_RX_ERROR_LEN);
                  if (pa_add_match(info, offset, AVM_PA_ESP) < 0)
                     RETURN(AVM_PA_RX_ERROR_MATCH);
                  p += sizeof(struct ip_esp_hdr);
                  /* Only supporting pass-through... */
                  info->pkttype |= ipproto;
                  info->l4_offset = offset;
                  /* Encrypted payload follows, terminate parsing. */
                  RETURN(AVM_PA_RX_OK);
            }
            RETURN(AVM_PA_RX_BYPASS);

         case AVM_PA_LLC_SNAP:
            if (   hdr->llcsnap.dsap  != 0xAA
                || hdr->llcsnap.ssap  != 0xAA
                || hdr->llcsnap.ui    != 0x03)
               /* not checking:
                * RFC1042_SNAP 0x00,0x00,0x00
                * BTEP_SNAP    0x00,0x00,0xf8
                */
               RETURN(AVM_PA_RX_BYPASS);

            if (pa_add_match(info, offset, AVM_PA_LLC_SNAP) < 0)
               RETURN(AVM_PA_RX_ERROR_MATCH);
            p += sizeof(struct llc_snap_hdr);
            state = AVM_PA_ETH_PROTO;
            ethproto = get_unaligned(&hdr->llcsnap.type);
            continue;

         case AVM_PA_LISP:
           if (AVM_PA_PKTTYPE_IPENCAP_VERSION(info->pkttype))
              RETURN(AVM_PA_RX_OK);
            if (p + LISP_DATAHDR_SIZE > end)
               RETURN(AVM_PA_RX_ERROR_LEN);
            p += LISP_DATAHDR_SIZE;
            hdr = (hdrunion_t *)p;
            if (hdr->iph.version == 4)
               state = AVM_PA_IPV4;
            else if (hdr->iph.version == 6)
               state = AVM_PA_IPV6;
            else
               RETURN(AVM_PA_RX_OK); /* not a lisp packet */
            if (pa_add_match(info, offset, AVM_PA_LISP) < 0)
               RETURN(AVM_PA_RX_ERROR_MATCH);
            info->lisp_offset = offset;
            info->encap_offset = info->ip_offset;
            info->pkttype = AVM_PA_PKTTYPE_IP2IPENCAP_VERSION(info->pkttype);
            info->pkttype |= AVM_PA_PKTTYPE_LISP;
            continue;

         default:
            RETURN(AVM_PA_RX_ERROR_STATE);
      }
   }
out:
   if (ret == AVM_PA_RX_OK && ttl == 0)
      ret = AVM_PA_RX_TTL;
   if (ret == AVM_PA_RX_OK && (p - data) > AVM_PA_MAX_HEADER)
      ret = AVM_PA_RX_ERROR_LEN;
   if (ret == AVM_PA_RX_OK || pa_glob.dbgmatch) {
      info->protocol = pkt->protocol;
      info->hdrlen = p - data;
      memcpy(HDRCOPY(info), data, info->hdrlen);
   }
   if (ret == AVM_PA_RX_OK) {
      info->full_hdrlen = full_hdrlen ? full_hdrlen : info->hdrlen;
   }
   return ret;
#undef RETURN
}

static inline bool
pa_match_is_tcp_nodata(struct avm_pa_pkt_match *match, char *head)
{
   struct tcphdr *tcph = (struct tcphdr *) (head + match->l4_offset);
   struct iphdr *iph = (struct iphdr *) (head + match->ip_offset);

   if (AVM_PA_PKTTYPE_IP_VERSION(match->pkttype) == 4)
      return ntohs(PA_IPTOTLEN(iph)) == (PA_IPHLEN(iph)+PA_TCP_DOFF(tcph));
   else if (AVM_PA_PKTTYPE_IP_VERSION(match->pkttype) == 6)
      return ntohs(PA_IP6_PAYLOADLEN(iph)) == PA_TCP_DOFF(tcph);
   else
      return 0;
}

static inline void
pa_match_postprocess(struct avm_pa_pkt_match *info)
{
   int i;
   info->hash = 0;

   for (i = 0 ; i < info->nmatch; i++) {
      struct avm_pa_match_info *p = &info->match[i];
      hdrunion_t *hdr = (hdrunion_t *)(HDRCOPY(info)+p->offset);
      switch (p->type) {
         case AVM_PA_IPV4:
#if AVM_PA_UNALIGNED_CHECK
            if (((unsigned long)&hdr->iph.saddr) & 0x3)
               if (net_ratelimit())
                  pr_info("avm_pa: unaligned access %p (ipv4)\n",
                          &hdr->iph.saddr);
#endif
            info->hash ^= hdr->iph.saddr;
            info->hash ^= hdr->iph.daddr;
            info->hash ^= hdr->iph.protocol;
            info->hash ^= hdr->iph.tos;
            /*
             * JZ-36233: Gastzugang auf dem Repeater
             *
             * A session may be created by a packet with IP_MF set. If this
             * header is going to be pushed as-is on egress (e.g. in case of L2TP
             * encap) all packets would have IP_MF set, so we need to reset frag_off.
             *
             * TODO: What about IPv6?
             */
            hdr->iph.frag_off = 0;
            break;
         case AVM_PA_IPV6:
#if AVM_PA_UNALIGNED_CHECK
            if (((unsigned long)&hdr->ipv6h.saddr.s6_addr32[2]) & 0x3)
               if (net_ratelimit())
                  pr_info("avm_pa: unaligned access %p (ipv6)\n",
                          &hdr->ipv6h.saddr.s6_addr32[2]);
#endif
            //info->hash ^= hdr->ipv6h.saddr.s6_addr32[0];
            //info->hash ^= hdr->ipv6h.saddr.s6_addr32[1];
            info->hash ^= hdr->ipv6h.saddr.s6_addr32[2];
            info->hash ^= hdr->ipv6h.saddr.s6_addr32[3];
            //info->hash ^= hdr->ipv6h.daddr.s6_addr32[0];
            //info->hash ^= hdr->ipv6h.daddr.s6_addr32[1];
            info->hash ^= hdr->ipv6h.daddr.s6_addr32[2];
            info->hash ^= hdr->ipv6h.daddr.s6_addr32[3];
            info->hash ^= hdr->ipv6h.nexthdr;
            /* hash prio and flow label (plus constant version 6) */
            info->hash ^= hdr->ipv6_vpfl;
            break;
         case AVM_PA_PORTS:
            /* At least Linux seems to prefer even ports when selecting source ports,
             * for RPS we want the lowest bits of the hash to be most significant */
            info->hash ^= ror16(hdr->ports[0], 1);
            info->hash ^= ror16(hdr->ports[1], 1);
            break;
         case AVM_PA_ICMPV4:
         case AVM_PA_ICMPV6:
            info->hash ^= hdr->ports[0]; /* type + code */
            info->hash ^= hdr->ports[2]; /* id */
            break;
         case AVM_PA_ESP:
            info->hash ^= hdr->esph.spi;
            break;
      }
   }
   info->hash = (info->hash >> 16) ^ (info->hash & 0xffff);
   info->hash = (info->hash >> 8) ^ (info->hash & 0xff);
   info->hash %= CONFIG_AVM_PA_MAX_SESSION;
}

static int pa_set_pkt_match(enum avm_pa_framing framing,
                            unsigned int hstart,
                            PKT *pkt,
                            struct avm_pa_pkt_match *match,
                            int on_egress)
{
   int rc;
   pa_reset_match(match);
   rc = set_pkt_match(framing, hstart, pkt, match, on_egress);
   if (rc == AVM_PA_RX_OK)
      pa_match_postprocess(match);
   return rc;
}

/* Compare two packet matches. A slice can be selected by skipping
 * the first few match info items, for example to only compare the
 * L3 part of the packet match. */
static inline int
pa_match_cmp(struct avm_pa_pkt_match *a1, int a1_skip,
             struct avm_pa_pkt_match *a2, int a2_skip)
{
   struct avm_pa_match_info *p;
   hdrunion_t *h1, *h2;
   int rc;
   int i;
   int a1_nmatch = a1->nmatch - a1_skip;
   int a2_nmatch = a2->nmatch - a2_skip;

   /* The match item count must be equal. */
   if ((rc = a1_nmatch - a2_nmatch))
      goto out;

   /* The match slice itself must be equal. */
   if ((rc = memcmp(&a1->match[a1_skip], &a2->match[a2_skip],
                    a1_nmatch*sizeof(struct avm_pa_match_info))))
      goto out;

   /* From here now we determined that the slice is the same, therefore we only
    * use match items from a1 going forward, to test how the relevant fields in
    * the hdrcopy compare. */
   for (i = a1->nmatch-1; i >= a1_skip; i--) {
      p = &a1->match[i];
      /* h1 and h2 must NOT be used if p->offset is AVM_PA_OFFSET_NOT_SET.
       * At this time, AVM_PA_OFFSET_NOT_SET is only possible for type == AVM_PA_VLAN */
      h1 = (hdrunion_t *)(HDRCOPY(a1)+p->offset);
      h2 = (hdrunion_t *)(HDRCOPY(a2)+p->offset);
      switch (p->type) {
         case AVM_PA_ETH:
            rc = memcmp(&h1->ethh, &h2->ethh, sizeof(struct ethhdr));
            if (rc) goto out;
            break;
         case AVM_PA_VLAN:
            if (p->offset == AVM_PA_OFFSET_NOT_SET)
               rc = a1->vlan_tci ^ a2->vlan_tci;
            else
               rc = h1->vlanh.vlan_tci ^ h2->vlanh.vlan_tci;
            if (rc) goto out;
            break;
         case AVM_PA_PPPOE:
            rc = (int)h1->pppoeh.sid - (int)h2->pppoeh.sid;
            if (rc) goto out;
            break;
         case AVM_PA_PPP:
            rc = (int)h1->ppph[0] - (int)h2->ppph[0];
            if (rc) goto out;
            break;
         case AVM_PA_IPV4:
            rc = (int)h1->iph.protocol - (int)h2->iph.protocol;
            if (rc) goto out;
            rc = (int)h1->iph.tos - (int)h2->iph.tos;
            if (rc) goto out;
            /* JZ-47728: Windows ICMP has always the same id so ttl
             * is the only difference between ping and tracert packets
             * Also, ttl == 1 must not match existing sessions with higher ttl.
             */
            rc = (int)h1->iph.ttl - (int)h2->iph.ttl;
            if (rc) goto out;
            rc = (int)h1->iph.daddr - (int)h2->iph.daddr;
            if (rc) goto out;
            rc = (int)h1->iph.saddr - (int)h2->iph.saddr;
            if (rc) goto out;
            break;
         case AVM_PA_IPV6:
            /* compares priority and flow label in one op (version is always 6) */
            rc = h1->ipv6_vpfl - h2->ipv6_vpfl;
            if (rc) goto out;
            rc = (int)h1->ipv6h.nexthdr - (int)h2->ipv6h.nexthdr;
            if (rc) goto out;
            rc = (int)h1->ipv6h.hop_limit - (int)h2->ipv6h.hop_limit;
            if (rc) goto out;
            /* compare both src and dst in a single call */
            rc = memcmp(&h1->ipv6h.saddr, &h2->ipv6h.saddr,
                        sizeof(struct in6_addr) * 2);
            if (rc) goto out;
            break;
         case AVM_PA_PORTS:
            rc = (int)h1->ports[0] - (int)h2->ports[0]; /* source */
            if (rc) goto out;
            rc = (int)h1->ports[1] - (int)h2->ports[1]; /* dest */
            if (rc) goto out;
            break;
         case AVM_PA_ICMPV4:
         case AVM_PA_ICMPV6:
            rc = (int)h1->ports[0] - (int)h2->ports[0]; /* type + code */
            if (rc) goto out;
            rc = (int)h1->ports[2] - (int)h2->ports[2]; /* id */
            if (rc) goto out;
            break;
         case AVM_PA_LLC_SNAP:
            rc = (int)h1->llcsnap.type - (int)h2->llcsnap.type;
            if (rc) goto out;
            break;
         case AVM_PA_L2TP:
            rc = (int)h1->l2tp.session_id - (int)h2->l2tp.session_id;
            if (rc) goto out;
            break;
         case AVM_PA_GRE:
            rc = (int)h1->greh.protocol - (int)h2->greh.protocol;
            if (rc) goto out;
            break;
         case AVM_PA_ESP:
            rc = (int)h1->esph.spi - (int)h2->esph.spi;
            if (rc) goto out;
            break;
      }
   }
out:
   return rc;
}

static inline int pa_match_eq(struct avm_pa_pkt_match *a1,
                              struct avm_pa_pkt_match *a2)
{
   return pa_match_cmp(a1, 0, a2, 0) == 0;
}


/* Returns 1 if two matches are compatible for bridging.
 *
 * This is basically the same as pa_match_eq, except vlan is not considered, since
 * a bsession can cross VLANs (provided that no modifications need to be done
 * to the packet data and that the system's bridge setup allows that). */
static inline int
pa_match_bridged(struct avm_pa_pkt_match *a1, struct avm_pa_pkt_match *a2)
{
   struct avm_pa_match_info *p1, *p2;
   hdrunion_t *h1, *h2;

   if (!(p1 = pa_find_eth_match(a1)))
      return 0;
   if (!(p2 = pa_find_eth_match(a2)))
      return 0;

   h1 = (hdrunion_t *)(HDRCOPY(a1)+p1->offset);
   h2 = (hdrunion_t *)(HDRCOPY(a2)+p2->offset);

   /* MAC addresses must be equal. */
   if (memcmp(&h1->ethh, &h2->ethh, ETH_ALEN * 2))
      return 0;

   /* Different VLANs is OK, even the VID may differ. So just skip the VLAN match */
   /* JZ-63724: ...but only if there's no in-band VLAN header stored in skb->data */
   if ((++p1)->type == AVM_PA_VLAN && p1->offset == AVM_PA_OFFSET_NOT_SET) ++p1;
   if ((++p2)->type == AVM_PA_VLAN && p2->offset == AVM_PA_OFFSET_NOT_SET) ++p2;

   /* Compare the remainder for equality which ensures that modifications
    * to the packet data are not permitted. */
   return pa_match_cmp(a1, p1 - a1->match, a2, p2 - a2->match) == 0;
}


/* ------------------------------------------------------------------------ */
/* -------- mod rec ------------------------------------------------------- */
/* ------------------------------------------------------------------------ */

/*
 * From RFC 1624 Incremental Internet Checksum
 *
 * HC  - old checksum in header
 * HC' - new checksum in header
 * m   - old value of a 16-bit field
 * m'  - new value of a 16-bit field
 * HC' = ~(~HC + ~m + m') --    [Eqn. 3]
 * HC' = HC - ~m - m'     --    [Eqn. 4]
 *
 *
 * csum_unfold(): be16 -> u32
 *
 * M   = ~m + m';
 *
 * we use Eqn.3, because we precalculate M.
 * csum_fold(): add the carries
 *
 * HC' = ~csum_fold((~csum_unfold(HC) + ~m + m'));
 *
 * HC' = ~csum_fold(csum_add(~csum_unfold(HC), M);
 *
 */

static inline u32 hcsum_add(u32 sum, u32 addend)
{
   sum += addend;
   if (sum < addend) sum++; /* skip -0 */
   return sum; // + (sum < addend);
}

static inline u32 hcsum_prepare(u16 sum)
{
   return (u16)(~sum);
}

static inline u32 hcsum_u32(u32 sum, u32 from, u32 to)
{
   sum = hcsum_add(sum, ~from);
   sum = hcsum_add(sum, to);
   return sum;
}

static inline u32 hcsum_u16(u32 sum, u16 from, u16 to)
{
   sum = hcsum_u32(sum, from, to);
   return sum;
}

static inline u16 hcsum_fold(u32 sum)
{
   while (sum >> 16)
      sum = (sum & 0xffff) + (sum >> 16);
   return sum;
}

static inline u16 hcsum_finish(u32 sum)
{
   return ~hcsum_fold(sum);
}

static int pa_set_v4_mod_rec(struct avm_pa_v4_mod_rec *mod,
                             int update_ttl, u8 *in, u8 *out)
{
   struct iphdr *iiph = (struct iphdr *)in;
   struct iphdr *oiph = (struct iphdr *)out;
   u32 l3_check = 0;
   u32 l4_check;
   int isicmp = 0;
   u16 modflags = 0;

   mod->saddr = oiph->saddr;
   if (iiph->saddr != oiph->saddr) {
      modflags |= AVM_PA_MOD_SADDR|AVM_PA_MOD_IP4_CSUM;
      l3_check = hcsum_u32(l3_check, iiph->saddr, oiph->saddr);
   }

   mod->daddr = oiph->daddr;
   if (iiph->daddr != oiph->daddr) {
      modflags |= AVM_PA_MOD_DADDR|AVM_PA_MOD_IP4_CSUM;
      l3_check = hcsum_u32(l3_check, iiph->daddr, oiph->daddr);
   }

   l4_check = l3_check;

   mod->tos = oiph->tos;
   if (iiph->tos != oiph->tos) {
      modflags |= AVM_PA_MOD_TOS|AVM_PA_MOD_IP4_CSUM;
      l3_check = hcsum_u16(l3_check, htons(iiph->tos), htons(oiph->tos));
   }

   if (update_ttl) {
      modflags |= AVM_PA_MOD_TTL|AVM_PA_MOD_IP4_CSUM;
      l3_check = hcsum_u16(l3_check, constant_htons(0x0100), 0x0000);
   }

   mod->l3crc_update = hcsum_fold(l3_check);

   switch (iiph->protocol) {
      case IPPROTO_TCP:
         mod->l4crc_offset = offsetof(struct tcphdr, check);
         break;
      case IPPROTO_UDP:
         mod->l4crc_offset = offsetof(struct udphdr, check);
         break;
      case IPPROTO_ICMP:
#ifdef _LINUX_ICMP_H
         mod->l4crc_offset = offsetof(struct icmphdr, checksum);
#else
         mod->l4crc_offset = offsetof(struct icmphdr, check);
#endif
         isicmp = 1;
         break;
      default:
         mod->l4crc_offset = 0;
         break;
   }
   mod->l4crc_update = 0;
   mod->l4crc_update_part = 0;
   if (mod->l4crc_offset) {
      u16 *iports = (u16 *)(in + PA_IPHLEN(iiph));
      u16 *oports = (u16 *)(out + PA_IPHLEN(oiph));
      if (isicmp) {
         l4_check = 0;
         mod->id = oports[2];
         if (iports[2] != oports[2]) {
            modflags |= AVM_PA_MOD_ICMPID|AVM_PA_MOD_L4_CSUM;
            l4_check = hcsum_u16(l4_check, iports[2], oports[2]);
         }
      } else {
         if (modflags & AVM_PA_MOD_ADDRS)
            modflags |= AVM_PA_MOD_L4_CSUM;
         mod->sport = oports[0];
         /* PARTIAL_CSUM case: tcph->check is prepared with IP addrs (pre-NAT),
          * checksum is will be computed over tcp (header + payload) so ports
          * must not be included in the update mask.
          */
         mod->l4crc_update_part = hcsum_fold(l4_check);
         if (iports[0] != oports[0]) {
            modflags |= AVM_PA_MOD_SPORT|AVM_PA_MOD_L4_CSUM;
            l4_check = hcsum_u16(l4_check, iports[0], oports[0]);
         }
         mod->dport = oports[1];
         if (iports[1] != oports[1]) {
            modflags |= AVM_PA_MOD_DPORT|AVM_PA_MOD_L4_CSUM;
            l4_check = hcsum_u16(l4_check, iports[1], oports[1]);
         }
      }
      mod->l4crc_update = hcsum_fold(l4_check);
   }

   mod->iphlen = PA_IPHLEN(oiph);
   return modflags;
}

static void pa_do_v4_mod_rec(struct avm_pa_v4_mod_rec *mod, u16 modflags,
                             bool partial_csum, u8 *data)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct iphdr *iph = (struct iphdr *)data;
   u16 *ports = (u16 *)(data + mod->iphlen);
   u32 sum;
   u16 csum;

   ctx->stats.rx_mod++;

   if (modflags & AVM_PA_MOD_ADDRS) {
      if (((unsigned long)iph) & 0x3) {
         memcpy(&iph->saddr, &mod->saddr, 2*sizeof(u32));
      } else {
         iph->saddr = mod->saddr;
         iph->daddr = mod->daddr;
      }
   }
   if (modflags & AVM_PA_MOD_TOS)
      iph->tos = mod->tos;
   if (modflags & AVM_PA_MOD_TTL)
      iph->ttl--;
   if (modflags & AVM_PA_MOD_IP4_CSUM) {
      sum = hcsum_prepare(iph->check);
      iph->check = hcsum_finish(hcsum_add(sum, mod->l3crc_update));
   }
   if (modflags & AVM_PA_MOD_PORTS) {
      ports[0] = mod->sport;
      ports[1] = mod->dport;
   } else if (modflags & AVM_PA_MOD_ICMPID) {
      ports[2] = mod->id;
   }
   if (modflags & AVM_PA_MOD_L4_CSUM) {
      csum = ports[mod->l4crc_offset>>1];
      if (csum || iph->protocol != IPPROTO_UDP) {
         if (partial_csum) {
            ports[mod->l4crc_offset>>1] = hcsum_fold(hcsum_add(csum, mod->l4crc_update_part));
         } else {
            sum = hcsum_prepare(csum);
            ports[mod->l4crc_offset>>1] = hcsum_finish(hcsum_add(sum, mod->l4crc_update));
         }
      }
   }
}

static void pa_show_v4_mod_rec(struct avm_pa_v4_mod_rec *mod, u16 modflags,
                               pa_fprintf fprintffunc, void *arg)

{
   char buf[64];

   if (modflags & AVM_PA_MOD_SADDR) {
      in_addr2str(&mod->saddr, buf, sizeof(buf));
      (*fprintffunc)(arg, "*IPv4 Src      : %s\n", buf);
   }
   if (modflags & AVM_PA_MOD_DADDR) {
      in_addr2str(&mod->daddr, buf, sizeof(buf));
      (*fprintffunc)(arg, "*IPv4 Dst      : %s\n", buf);
   }
   if (modflags & AVM_PA_MOD_TOS)
      (*fprintffunc)(arg, "*IPv4 Tos      : 0x%02x\n", mod->tos);

   if (modflags & AVM_PA_MOD_IP4_CSUM)
      (*fprintffunc)(arg, "*L3 Sum        : update 0x%02x\n", mod->l3crc_update);

   if (modflags &  AVM_PA_MOD_SPORT)
      (*fprintffunc)(arg, "*Src Port      : %d\n", ntohs(mod->sport));

   if (modflags &  AVM_PA_MOD_DPORT)
      (*fprintffunc)(arg, "*Dst Port      : %d\n", ntohs(mod->dport));

   if (modflags & AVM_PA_MOD_ICMPID)
      (*fprintffunc)(arg, "*ICMP Id       : %d\n", ntohs(mod->id));

   if (modflags & AVM_PA_MOD_L4_CSUM)
      (*fprintffunc)(arg, "*L4 Sum        : update 0x%02x\n", mod->l4crc_update);
}

/* ------------------------------------------------------------------------ */

static void pa_show_mod_rec(struct avm_pa_mod_rec *mod,
                            pa_fprintf fprintffunc, void *arg)

{
   (*fprintffunc)(arg, "IP version     : %u\n", mod->ipversion);

   if (mod->pull_l2_len)
      (*fprintffunc)(arg, "L2 pull        : %d\n", mod->pull_l2_len);

   if (mod->pull_encap_len)
      (*fprintffunc)(arg, "Encap pull     : %d\n", mod->pull_encap_len);

   if (mod->push_encap_len)
      (*fprintffunc)(arg, "Push IPv       : %u\n", mod->outer_ipversion);

   if (mod->push_udpoffset)
      (*fprintffunc)(arg, "Push UDP       : %u\n", mod->push_udpoffset);

   if (mod->push_encap_len) {
      char buf[256];
      data2hex(HDRCOPY(mod)+mod->push_l2_len, mod->push_encap_len, buf, sizeof(buf));
      (*fprintffunc)(arg, "Encap push     : %s\n", buf);
   }

   pa_show_v4_mod_rec(&mod->v4_mod, mod->modflags, fprintffunc, arg);

   if (mod->modflags & AVM_PA_MOD_TTL)
      (*fprintffunc)(arg, "*IPv%d TTL      : decrease\n", mod->ipversion);
}

static int pa_egress_precheck(struct avm_pa_pid *pid,
                              PKT *pkt,
                              struct avm_pa_pkt_match *ingress,
                              struct avm_pa_pkt_match *egress)
{
   unsigned int hstart;
   int ret;
   if (pid->ecfg.flags & AVM_PA_PID_FLAG_HSTART_ON_EGRESS)
      hstart = AVM_PKT_INFO(pkt)->hstart;
   else
      hstart = 0;
   ret = pa_set_pkt_match(pid->egress_framing, hstart,
                        pkt, egress, 1);
   if (ret != AVM_PA_RX_OK)
      return ret;

   if (!AVM_PA_PKTTYPE_BASE_EQ(egress->pkttype, ingress->pkttype))
      return AVM_PA_RX_BYPASS;

   return AVM_PA_RX_OK;
}

static void pa_calc_modify(struct avm_pa_session *session,
                           struct avm_pa_pkt_match *ingress,
                           struct avm_pa_pkt_match *egress)
{
   /*
    * Precondition: AVM_PA_PKTTYPE_BASE_EQ(egress->pkttype, ingress->pkttype)
    */
   struct avm_pa_mod_rec *mod = &session->mod;

   mod->hdroff = egress->hdroff;
   memcpy(HDRCOPY(mod), HDRCOPY(egress), egress->hdrlen);
   mod->pkttype = egress->pkttype;
   if (AVM_PA_PKTTYPE_EQ(ingress->pkttype, egress->pkttype)) {

      if (ingress->encap_offset == AVM_PA_OFFSET_NOT_SET) {
         /* no tunnel, egress->encap_offset also not set */
         mod->pull_l2_len = ingress->ip_offset;
         mod->pull_encap_len = 0;
         mod->ipversion = AVM_PA_PKTTYPE_IP_VERSION(egress->pkttype);
         mod->outer_ipversion = mod->ipversion;
         mod->push_encap_len = 0;
         mod->push_l2_len = egress->ip_offset;
      } else {
         /* untouched tunnel, egress->encap_offset also set */
         mod->pull_l2_len = ingress->encap_offset;
         mod->pull_encap_len = 0;
         mod->ipversion = AVM_PA_PKTTYPE_IPENCAP_VERSION(egress->pkttype);
         mod->outer_ipversion = mod->ipversion;
         mod->push_encap_len = 0;
         mod->push_l2_len = egress->encap_offset;
      }
   } else { /* AVM_PA_PKTTYPE_BASE_EQ because of precheck */
      BUG_ON(!ingress->encap_offset && !egress->encap_offset);

      if (ingress->encap_offset == AVM_PA_OFFSET_NOT_SET) {
         /* tunnel header only on egress */
         mod->pull_l2_len = ingress->ip_offset;
         mod->pull_encap_len = 0;
         mod->ipversion = AVM_PA_PKTTYPE_IP_VERSION(egress->pkttype);
         mod->outer_ipversion = AVM_PA_PKTTYPE_IPENCAP_VERSION(egress->pkttype);
         mod->push_encap_len = egress->ip_offset - egress->encap_offset;
         mod->push_l2_len = egress->encap_offset;
      } else if (egress->encap_offset == AVM_PA_OFFSET_NOT_SET) {
         /* tunnel header only on ingress */
         mod->pull_l2_len = ingress->encap_offset;
         mod->pull_encap_len = ingress->ip_offset - ingress->encap_offset;
         mod->ipversion = AVM_PA_PKTTYPE_IP_VERSION(egress->pkttype);
         mod->outer_ipversion = mod->ipversion;
         mod->push_encap_len = 0;
         mod->push_l2_len = egress->ip_offset;
      } else {
         /* different tunnel header on both ingress and egress (!AVM_PA_PKTTYPE_EQ) */
         mod->pull_l2_len = ingress->encap_offset;
         mod->pull_encap_len = ingress->ip_offset - ingress->encap_offset;
         mod->ipversion = AVM_PA_PKTTYPE_IP_VERSION(egress->pkttype);
         mod->outer_ipversion = AVM_PA_PKTTYPE_IPENCAP_VERSION(egress->pkttype);
         mod->push_encap_len = egress->ip_offset - egress->encap_offset;
         mod->push_l2_len = egress->encap_offset;
      }
   }
   if (mod->push_encap_len) {
      if (egress->lisp_offset != AVM_PA_OFFSET_NOT_SET) {
         mod->push_udpoffset = egress->lisp_offset - egress->encap_offset;
         mod->push_udpoffset -= sizeof(struct udphdr);
      }
   } else {
      mod->push_udpoffset = 0;
   }

   if (mod->ipversion == 4) {
      int ingress_offset = mod->pull_l2_len + mod->pull_encap_len;
      int egress_offset = mod->push_l2_len + mod->push_encap_len;
      mod->modflags = pa_set_v4_mod_rec(&mod->v4_mod,
                                        test_bit(PA_S_ROUTED, &session->flags),
                                        HDRCOPY(ingress)+ingress_offset,
                                        HDRCOPY(mod)+egress_offset);
   } else if (mod->ipversion == 6) {
      if (test_bit(PA_S_ROUTED, &session->flags)) {
         mod->modflags = AVM_PA_MOD_TTL;
      }
   } else {
      BUG();
   }
}

static u8 casttype2pkt_type[] = {
   PACKET_HOST,
   PACKET_MULTICAST,
   PACKET_BROADCAST
};


/* ------------------------------------------------------------------------ */
/* -------- session retrieval and verification ---------------------------- */
/* ------------------------------------------------------------------------ */

static struct avm_pa_session *
pa_session_get(avm_session_handle session_handle)
{
   struct avm_pa_data    *pd = &pa_data;
   struct avm_pa_session *session;

   session = PA_SESSION(pd, session_handle);
   if (!avm_pa_session_valid(session))
      session = NULL;

   return session;
}

/* ------------------------------------------------------------------------ */
/* -------- packet forwarding --------------------------------------------- */
/* ------------------------------------------------------------------------ */

#ifdef CONFIG_AVM_PA_TX_NAPI

static int pa_dev_tx_napi_poll(struct napi_struct *napi, int budget)
{
   int done;
   struct avm_pa_pid *pid = container_of(napi, struct avm_pa_pid, tx_napi);

   for (done = 0; done < budget; done++) {
      PKT *pkt = skb_dequeue_tail(&pid->tx_napi_pkts);
      if (!pkt) break;
      pid->cfg.tx_func(pid->cfg.tx_arg, pkt);
   }

   if (done < budget)
      napi_complete(napi);

   return done;
}

#ifdef CONFIG_SMP
static void __do_schedule_napi(struct napi_struct *napi)
{
   int cpu = smp_processor_id();
   int tcpu = cpumask_any_but(cpu_online_mask, cpu);
   if (tcpu >= nr_cpumask_bits)
      tcpu = cpu;
   /* This runs in a tasklet because we want to run the "core transition" per
    * packet burst, and not per packet. Both napi_schedule_prep() and IPIs (via
    * smp_call_function_single()) on a per packet basis would be too expensive in this
    * smp scenario. (napi_schedule_prep() does atomic accesses which requires snooping
    * the other cores caches, and the napi_poll runs one of the other cores).
    *
    * Furthermore, guarding the IPI with napi_schedule_prep() has been found to
    * perform a bit better than doing the IPI straight in this tasklet. */
   if (napi_schedule_prep(napi))
      smp_call_function_single(tcpu, (void*)__napi_schedule, napi, 0);
}

static void do_schedule_napi(struct avm_pa_pid *pid)
{
   tasklet_schedule(&pid->tx_napi_tsk);
}
#else
static void do_schedule_napi(struct avm_pa_pid *pid)
{
   /* On UP the atomic access is a no-op */
   napi_schedule(&pid->tx_napi);
}
#endif

#endif

static inline void pa_do_push_l2(struct avm_pa_egress *egress, PKT *pkt)
{
   if (egress->push_l2_len) {
      memcpy(PKT_PUSH(pkt, egress->push_l2_len), HDRCOPY(&egress->match), egress->push_l2_len);

      if (egress->pppoe_offset != AVM_PA_OFFSET_NOT_SET) {
         unsigned char *data = PKT_DATA(pkt) + egress->pppoe_offset;
         struct pppoehdr *pppoehdr = (struct pppoehdr *)data;
         pppoehdr->length = htons(pkt->len - egress->pppoe_hdrlen);
      }
   }
}

static int _pa_transmit(struct avm_pa_egress *egress, PKT *pkt)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, egress->pid_handle);
   struct avm_pa_pkt_info *info = AVM_PKT_INFO(pkt);
   struct avm_pa_session *session __maybe_unused;

#ifdef CONFIG_AVM_PA_TX_NAPI
   /* A non-NULL dev indicates avm_pa_dev_pid_register_tx_napi() was used */
   if (pid->tx_napi.dev && (skb_queue_len(&pid->tx_napi_pkts) >= CONFIG_AVM_PA_NAPI_MAX_BACKLOG)) {
      PKT_FREE(pkt); /* drop packet, wouldn't fit anyway */
      return NET_XMIT_DROP;
   }
#endif

   /*
    * info->already_modified is set when paket comes
    * from avm_pa_tx_channel_accelerated_packet() and
    * statistics are counted in HW.
    *
    * 2016-03-02, calle
    */
   if (info->already_modified == 0) {
      egress->sw_stats.tx_pkts++;
      egress->sw_stats.tx_bytes += pkt->len + egress->push_l2_len;
   }
   info->egress_pid_handle = egress->pid_handle;
   egress->tx_pkts++;
   pid->tx_pkts++;

   if (pid->ecfg.cb_len)
      memcpy(&pkt->cb[pid->ecfg.cb_start], egress->cb, pid->ecfg.cb_len);

   pkt->protocol = egress->match.protocol;

   switch (egress->type) {
      case avm_pa_egresstype_output:
         pa_do_push_l2(egress, pkt);
         pkt->tc_index = egress->output.tc_index;
         pkt->skb_iif = egress->output.skb_iif;
         pkt->mac_len = egress->output.mac_len;
         if (egress->match.vlan_proto)
            skb_vlan_tag_put(pkt, egress->match.vlan_proto, egress->match.vlan_tci);

         /* skb_flow_dissect() expects network_header to point
          * at the header indicated by skb->protocol */
         if (pid->egress_framing == avm_pa_framing_ether)
            skb_set_network_header(pkt, ETH_HLEN);
         else
            skb_reset_network_header(pkt);

         pkt->pkt_type = PACKET_OUTGOING;
         /* Checksum handling:
          *
          * tl;dr: CHECKSUM_NONE unless checksum offload is requested by
          *    CHECKSUM_PARTIAL.
          *
          * 1) Don't touch if already set to CHECKSUM_PARTIAL.
          *    Either the packet is locally generated and checksum offloading is
          *    is requested (especially in case of gso), or the packet is
          *    "received" on some virtual device (e.g. ifb0) and the checksum offload
          *    request is sticky, or the receive side (device or Linux)
          *    performned GRO and segmentation must be done by the PID, including
          *    checksum calculation. In all these events, keep CHECKSUM_PARTIAL.
          * 2) For normally received packets, ip_summed is assumed to be initialized
          *    by the driver. If it verified the packet checksums or not doesn't
          *    really matter, we blindly forward the packet. That means we must change
          *    CHECKSUM_UNNECESSARY and CHECKSUM_COMPLETE to CHECKSUM_NONE.
          */
         if (pkt->ip_summed != CHECKSUM_PARTIAL)
            pkt->ip_summed = CHECKSUM_NONE;
         skb_reset_mac_header(pkt);
         skb_set_queue_mapping(pkt, egress->output.txq_id);
         if (info->tcp_nodata) {
            pid->prioack_accl_acks++;
            egress->tcpack_pkts++;
         }
         pkt->priority = egress->output.priority;
         if (egress->output.dst)
            skb_dst_set(pkt, dst_clone(egress->output.dst));
         secpath_reset(pkt);

#ifdef CONFIG_BLOG
         if (avm_pa_pid_tack_enabled(pid)) {
            pkt->mark = SKBMARK_SET_Q(pkt->mark, (BROADCOM_MAX_PRIOS - (pkt->priority & TC_H_MIN_MASK)));
            pkt->mark = SKBMARK_SET_FLOW_ID(pkt->mark, 0x1);
         }

         /*
          * We have to let the flow cache know about the struct nf_conn *
          * entry of the connection, so flow cache registers an accelerated session
          * there.
          */
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
         if (!ctx->hw_ppa_disabled && (session = pa_session_get(info->session_handle)) && session->ct) {
               /* In case the skb already has a conntrack entry:
                * decrement refcount and overwrite it. */
               if (skb_nfct(pkt)) {
                  nf_conntrack_put(skb_nfct(pkt));
                  pkt->_nfct = 0;
                  if (net_ratelimit())
                     pr_err("avm_pa: accelerated packet with exisitng nf_conn session_handle = %u\n", info->session_handle);
               }
               /* The nf_conn entry saved in the session contains the required generic_ct */
               nf_conntrack_get(&session->ct->ct_general);
               nf_ct_set(pkt, session->ct, 0);
               /*
                * here we save the nf_conn entry in the blog extension of the skb
                * nothing is done if the blog_ptr(pkt) == NULL, this happens if we call blog_skip()
                */
               blog_link(FLOWTRACK, blog_ptr(pkt),
				            (void*) session->ct, session->generic_ct_dir ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL, 0 );
         }
#endif
#endif

#ifdef CONFIG_TI_PACKET_PROCESSOR
         /*
          * Relevant PP fields must be copied into the egress to ensure the PP handles
          * the packet correctly as if it had taken the entire slow path (via ARM).
          *
          * In JZ-68647 (Puma 7: Cert-Fail SF-02 proc-1.1, root cause), it was found
          * that we copied to little and added skb->ti_meta_info* to the list. In
          * JZ-69391 it was found that we copied too much and overwrite important
          * per-packet PP information and went back to a white list of individual fields.
          *
          * Reasoning: We don't need to store session information as the PP
          * session is already set up (or no session at all). We need to store QoS / SF
          * relevant fields that are used in the xmit routines of the interface drivers,
          * even if there is no PP session at all. Except ti_epi_header which contains
          * per-packet data set by the PP.
          */
         SKB_GET_PP_INFO_P(pkt)->egress_queue = egress->output.puma_pktinfo.egress_queue;
#ifdef CONFIG_TI_META_DATA
         pkt->ti_meta_info = egress->output.ti_meta_info;
         pkt->ti_meta_info2 = egress->output.ti_meta_info2;
#endif
#endif
#if AVM_PA_TRACE
         if (ctx->dbgtrace)
            pa_printk(KERN_DEBUG, "avm_pa: %lu - _pa_transmit(%s), prio=0x%X, info->match.ack_only=%d\n",
                                  pkt_uniq_id(pkt), pid->cfg.name, pkt->priority, info->tcp_nodata);
#endif
#ifdef CONFIG_AVM_PA_TX_NAPI
         if (pid->tx_napi.dev) {
            skb_queue_tail(&pid->tx_napi_pkts, pkt);
            do_schedule_napi(pid);
         }
         else
#endif
         {
            (*pid->cfg.tx_func)(pid->cfg.tx_arg, pkt);
            ctx->stats.fw_output++;
         }
         return NET_XMIT_SUCCESS;

      case avm_pa_egresstype_local:
         {
            pkt->pkt_type = casttype2pkt_type[egress->match.casttype];
            skb_dst_set(pkt, dst_clone(egress->local.dst));
            secpath_reset(pkt);
            pkt->dev = egress->local.dev;
            pkt->skb_iif = egress->local.skb_iif;
            ctx->stats.fw_local++;
            (*pid->cfg.tx_func)(pid->cfg.tx_arg, pkt);
         }
         return NET_XMIT_SUCCESS;

      case avm_pa_egresstype_rtp:
         if (egress->rtp.sk) {
            size_t hsize;
            skb_set_network_header(pkt, 0);
            if (pkt->protocol == constant_htons(ETH_P_IP)) {
               struct iphdr *iph = (struct iphdr *)pkt->data;
               hsize = iph->ihl*4;
            } else {
               hsize = sizeof(struct ipv6hdr);
            }
            skb_pull(pkt, hsize); /* skb->data points to udphdr */
            skb_set_transport_header(pkt, 0);
            pkt->pkt_type = casttype2pkt_type[egress->match.casttype];
            pkt->skb_iif = egress->rtp.skb_iif;
            ctx->stats.fw_rtp++;
            (*egress->rtp.transmit)(egress->rtp.sk, pkt);
            return NET_XMIT_SUCCESS;
         } else {
            ctx->stats.fw_rtp_drop++;
            kfree_skb(pkt);
            return NET_XMIT_SUCCESS;
         }
      case avm_pa_egresstype_xfrm:
         if (IS_ENABLED(CONFIG_XFRM)) {
            pkt->dev = egress->xfrm.dev;
            skb_dst_set(pkt, dst_clone(egress->xfrm.dst));
            secpath_reset(pkt);
            pkt->tc_index = egress->xfrm.tc_index;
            (*pid->cfg.tx_func)(egress->xfrm.x, pkt);
         }
         return NET_XMIT_SUCCESS;
      case avm_pa_egresstype_null:
         /* perhaps we should shortcut and drop even earlier */
         consume_skb(pkt);
         return NET_XMIT_SUCCESS;
  }

   ctx->stats.fw_ill++;
   kfree_skb(pkt);
   return NET_XMIT_SUCCESS;
}

static void pa_transmit(struct avm_pa_egress *egress, struct sk_buff *skb, int bridged)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct sk_buff *nskb;

   avm_simple_profiling_skb(0, skb);
   /*
    * Bugfix: bridge packets were cut, when third position of
    *         mac address was 0x00, because ethernet header
    *         was used as IP/IPv6 header, and packets were
    *         trimed and perhaps fragmented.
    *
    * packets for bridge sessions arrive with ethernet header,
    * we do not need fragmentation or size check here.
    *
    *             2014-07-08 calle
    */
   if (bridged == 0) {
      skb = avm_pa_fragment(egress, skb);
      if (!skb) {
         ctx->stats.fw_frag_fail++;
         return;
      }
      if (skb->next)
         ctx->stats.fw_frags++;
   }

   do {
      nskb = skb->next;
      skb->next = NULL;

      if (_pa_transmit(egress, skb) == NET_XMIT_DROP)
         ctx->stats.fw_drop++;
      else
         ctx->stats.fw_pkts++;
   } while ((skb = nskb));
}


static void pa_do_modify_l3(struct avm_pa_mod_rec *mod, PKT *pkt)
{
   if (mod->ipversion == 4) {
      pa_do_v4_mod_rec(&mod->v4_mod, mod->modflags,
                       pkt->ip_summed == CHECKSUM_PARTIAL, PKT_DATA(pkt));
   } else if (mod->modflags & AVM_PA_MOD_TTL) {
      struct ipv6hdr *ipv6h = (struct ipv6hdr *)PKT_DATA(pkt);
      ipv6h->hop_limit--;
   }
}


static void pa_do_modify_non_l2(struct avm_pa_mod_rec *mod, PKT *pkt, int bridged)
{
   /* The actual vlan_tci will be inserted on egress. */
   skb_vlan_tag_clear(pkt);

   if (bridged) {
      /* We have to initialize skb->network_header for Linux' transmit paths.
       * For bridged we can safely assume ethernet (might be vlan tagged,
       * but that's OK as long as pkt->protocol agrees). */
      skb_set_network_header(pkt, ETH_HLEN);
      skb_reset_mac_len(pkt);
      return;
   }

   if (mod->pull_l2_len)
      PKT_PULL(pkt, mod->pull_l2_len);
   if (mod->pull_encap_len)
      PKT_PULL(pkt, mod->pull_encap_len);

   /* We're now at the innermost l3 header, set offsets in the skb appropriately.
    * This is required for Linux' transmit paths and some drivers (but remember that
    * this is not done for bridged sessions).
    * Also remove any padding that might still be left from ingress L2. */
   skb_reset_network_header(pkt);
   if (mod->ipversion == 4) {
      pskb_trim(pkt, ntohs(PA_IPTOTLEN(pkt->data)));
      skb_set_transport_header(pkt, mod->v4_mod.iphlen);
   } else if (mod->ipversion == 6) {
      pskb_trim(pkt, ntohs(PA_IP6_PAYLOADLEN(pkt->data)) + sizeof(struct ipv6hdr));
      skb_set_transport_header(pkt, sizeof(struct ipv6hdr));
   }

   if (mod->modflags)
      pa_do_modify_l3(mod, pkt);

   if (mod->push_encap_len) {
      unsigned tot_len;

#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
      /* Checksum offloading might get it wrong if we don't indicate encapsulation */
      pkt->encapsulation = 1;
      skb_reset_inner_network_header(pkt);
      skb_set_inner_transport_header(pkt, skb_transport_offset(pkt));
#endif

      memcpy(PKT_PUSH(pkt, mod->push_encap_len),
             HDRCOPY(mod)+mod->push_l2_len, mod->push_encap_len);
      skb_reset_network_header(pkt);
      tot_len = pkt->len;
      if (mod->outer_ipversion == 4) {
         struct iphdr *iph = (struct iphdr *)PKT_DATA(pkt);
         iph->id = rand() & 0xffff;
         iph->tot_len = htons(tot_len);
         skb_set_transport_header(pkt, PA_IPHLEN(iph));
         ip_send_check(iph);
      } else {
         struct ipv6hdr *ipv6h = (struct ipv6hdr *)PKT_DATA(pkt);
         ipv6h->payload_len = htons(tot_len - sizeof(struct ipv6hdr));
         skb_set_transport_header(pkt, sizeof(*ipv6h));
      }
      if (mod->push_udpoffset) {
         struct udphdr *udph = (struct udphdr *)(PKT_DATA(pkt)+mod->push_udpoffset);
         udph->len = htons(tot_len - mod->push_udpoffset);
         if (mod->outer_ipversion == 4)
            set_udp_checksum((struct iphdr *)PKT_DATA(pkt), udph);
         else
            set_udpv6_checksum((struct ipv6hdr *)PKT_DATA(pkt), udph);
      }
   }
}

static void
_pa_do_send_egress(struct avm_pa_session *session, struct sk_buff *skb, int bridged)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_egress *egress, *first;
   struct sk_buff *nskb;

   AVM_PKT_INFO(skb)->is_accelerated = 1;

   egress = first = avm_pa_first_egress(session);
   /* We can transmit to the egress in any order as long as the skbs per egress are
    * in order. This is optimized to avoid a copy in the common, single egress case. */
   hlist_for_each_entry_continue_rcu(egress, egress_list) {
      if ((nskb = PKT_COPY(skb)) != 0)
         pa_transmit(egress, nskb, bridged);
      else
         ctx->stats.fw_fail++;
   }
   pa_transmit(first, skb, bridged);
}

/* Pass NULL for session to to get it from the packet. Do this if there is uncertainty if
 * the session is still valid, i.e. if the packet was queued and the the RCU read side
 * critical section was left.
 *
 * If the session is given, we're still inside the RCU lock of avm_pa_pid_receive().
 * Otherwise the caller must enter a new RCU read side.
 */
static void
pa_do_modify_and_send(struct avm_pa_session *session, struct sk_buff *skb)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct sk_buff *nskb;
   int nfrags, bridged;

   avm_simple_profiling_skb(0, skb);

   if (!session) {
      /* Protect against possible race with GC timer deleting sessions */
      session = pa_session_get(AVM_PKT_INFO(skb)->session_handle);
      if (unlikely(!session)) {
         ctx->stats.fw_drop_gone++;
         PKT_FREE(skb);
         return;
      }
   }

   BUG_ON(AVM_PKT_INFO(skb)->session_uniq_id != session->uniq_id);

   /* TODO: Paged skbs but do they really correlate to individual packets? */
   nfrags = 1;
   skb_walk_frags(skb, nskb)
      nfrags += 1;

   bridged = session->bsession != NULL;
   if (AVM_PKT_INFO(skb)->already_modified) {
      _pa_do_send_egress(session, skb, bridged);
   } else {
      session->ingress_sw_stats.tx_bytes += PKT_LEN(skb) + skb->data_len;
      session->ingress_sw_stats.tx_pkts  += nfrags;

      pa_do_modify_non_l2(&session->mod, skb, bridged);
      _pa_do_send_egress(session, skb, bridged);

      if (session->timeout == 0)
         pa_session_flush(session, "fast timeout");
   }
}

static int pa_egress_size_check(struct avm_pa_session *session, struct sk_buff *skb, int hdr_off)
{
   struct avm_pa_pkt_match *info = &session->ingress;

   /* Check if the (inner) header allows for fragmentation. If not, and
    * there is no tunnel on egress, then the packet must not exceed the MTU of
    * the egress. Slow path will proably drop it and generate an ICMP error.
    */
   if (AVM_PA_PKTTYPE_IP_VERSION(info->pkttype) == 4) {
      struct iphdr *iph = (struct iphdr *) (skb->data - hdr_off + info->ip_offset);
      if ((iph->frag_off & constant_htons(IP_DF)) == 0)
         return 0;
   }

   if (session->mod.push_encap_len == 0) { /* no tunnel on output */
      struct avm_pa_mod_rec *mod = &session->mod;
      unsigned len = skb->len + hdr_off - mod->pull_l2_len - mod->pull_encap_len;
      struct avm_pa_egress *egress;

      avm_pa_for_each_egress(egress, session) {
         if (len > egress->mtu)
            return -1;
      }
   }
   return 0;
}

/* ------------------------------------------------------------------------ */
/* -------- macaddr management -------------------------------------------- */
/* ------------------------------------------------------------------------ */

/* There are two forms of macaddrs, pvid and non-pvid.
 *
 * pvid macaddrs are the common ones. They are used when sessions when
 * VLAN changes between ingress and egress or if no vlan is involved at all.
 *
 * non-pvid macaddrs are used only if the vlan between ingress and egress
 * does not change.
 *
 * The purpose of non-pvid macaddrs is to avoid flushing sessions in
 * the ingress pid change logic when a known ethernet address is observed
 * in a different vlan. As long as the vlan is known by the means of
 * a non-pvid macaddr (which means: there is a related session with
 * the same vlan on egress) then the packet is accepted and maybe accelerated.
 *
 * Otherwise, when an ethernet address is seen with a unknown vlan, then
 * we assume that the host has been moved to a different PID, i.e. one
 * that is based on virtual vlan interface. Then we flush
 * all sessions that hold the corresponding pvid session. This part is
 * crucial for bridging sessions.
 *
 * macaddrs are allocated for each egress per session. But multiple egress
 * may share macaddrs and therefore macaddrs are reference counted.
 * pvid and non-pvid macaddrs do not share refcounts, i.e. both can exist
 * without the other. But there is one catch: non-pvid macaddrs are only
 * fully deleted when there is no corresponding pvid macaddr, so that
 * we don't forget about "proper vlans" as long as there are pvid macaddrs
 * present (otherwise we would flush too early when a vlan packet
 * is observed again). These zero-reference non-pvid macaddrs are deleted
 * when there is no pvid macaddrs left. In the meantime they can
 * be looked up by the pid change logic (and also new egress for new sessions).
 *
 * Internally, pvid and non-pvid share the same hash bucket, because
 * only the address is hashed. However, pvid macaddrs are head-inserted
 * while non-pvid macaddrs are tail-inserted. This allows for quick
 * decision whether pvid macaddrs exist at all for a given address.
 */

#define PA_MACADDR_NON_PVID_OFFSET 0x1000000
#define PA_MACADDR_IS_PVID(macaddr) (!(macaddr->refcount & PA_MACADDR_NON_PVID_OFFSET))
#define PA_MACADDR_REFCOUNT(macaddr) (macaddr->refcount & ~PA_MACADDR_NON_PVID_OFFSET)

static struct vlan_ethhdr *
pa_get_ethhdr(enum avm_pa_framing framing, struct sk_buff *skb)
{
   if (framing == avm_pa_framing_ether)
      return (struct vlan_ethhdr *) skb->data;
   if (framing == avm_pa_framing_dev)
      return vlan_eth_hdr(skb);
   return 0;
}

static u16
pa_get_vlan_id(enum avm_pa_framing framing, struct sk_buff *skb)
{
   struct vlan_ethhdr *ethh = pa_get_ethhdr(framing, skb);

   if (skb_vlan_tag_present(skb))
      return skb_vlan_tag_get_id(skb);
   else if (  ethh->h_vlan_proto == htons(ETH_P_8021Q)
           || ethh->h_vlan_proto == htons(ETH_P_8021AD))
      return ntohs(ethh->h_vlan_TCI) & VLAN_VID_MASK;

   /* The null VID is equivlant to no VID, the header contains only priority information.
    * Therefore we can indicate the null VID if there is no vlan header.
    * See IEEE 802.1q */
   return 0;
}

static u16
pa_get_vlan_match(struct avm_pa_pkt_match *match)
{
   struct avm_pa_match_info *info = pa_find_eth_match(match);
   struct vlanhdr *vlanh;

   if (!info)
      return 0;
   /* vlan follows ethernet */
   info += 1;
   if (info->type != AVM_PA_VLAN)
      return 0;

   if (info->offset == AVM_PA_OFFSET_NOT_SET)
      return match->vlan_tci;

   vlanh = (struct vlanhdr *) (HDRCOPY(match) + info->offset);
   return ntohs(vlanh->vlan_tci);
}

static size_t pa_macaddr2str(struct avm_pa_macaddr *macaddr,
                             char *buf, size_t sz)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, macaddr->pid_handle);
   char vlan_buf[16] = "";

   if (macaddr->vlan_id)
      sprintf(vlan_buf, " vlan %u", macaddr->vlan_id);

   return snprintf(buf, sz, "%pM%s%s ref %3lu pid %2d (%s)",
                   macaddr->mac, vlan_buf,
                   PA_MACADDR_IS_PVID(macaddr) ? " pvid" : "",
                   PA_MACADDR_REFCOUNT(macaddr),
                   pid->pid_handle, pid->cfg.name);
}

static void pa_show_macaddr(struct avm_pa_macaddr *macaddr,
                            pa_fprintf fprintffunc, void *arg)

{
   char buf[128];

   pa_macaddr2str(macaddr, buf, sizeof(buf));
   (*fprintffunc)(arg, "Macaddr        : %s\n", buf);
}

static inline u32 macaddr_hash(const unsigned char mac[ETH_ALEN])
{
   u32 h = 0;
   int i;

   for (i=0; i < ETH_ALEN; i++) {
      h += mac[i]; h += (h<<10); h ^= (h>>6);
   }
   h += (h<<3); h ^= (h>>11); h += (h<<15);
   return h;
}

static struct avm_pa_macaddr *
pa_macaddr_link(unsigned char mac[ETH_ALEN], avm_pid_handle pid_handle, bool is_pvid, u16 vlan_id)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_macaddr *p;
   u32 hash;
   int i;

    /* No macaddr for broadcast or multicast as we don't expect those on
     * ingress and therefore don't need them in the pid change logic.
     */
   if (mac[0] & 1)
      return NULL;

   /* The hash covers only the ethernet addresses so that
    * avm_pa_macaddrs that differ only in vlan share the same bucket.
    */
   hash = macaddr_hash(mac) % AVM_PA_MAX_MACADDR;

   spin_lock(&avm_pa_lock);

   /* First, try to locate existing entries. For pvid entries, the actual
    * vlan id doesn't matter. For non-pvid entries, the vlan id must match
    */
   hlist_for_each_entry_rcu(p, &ctx->macaddr_hashtab[hash], macaddr_list) {
      if (ether_addr_equal(p->mac, mac)) {
         if (is_pvid && PA_MACADDR_IS_PVID(p))
            goto out;
         else if (!is_pvid && !PA_MACADDR_IS_PVID(p) && vlan_id == p->vlan_id)
            goto out;
      }
   }

   for (i=0; i < ARRAY_SIZE(ctx->macaddr_array); i++) {
      p = &ctx->macaddr_array[i];
      if (p->refcount == 0) {
         memcpy(p->mac, mac, ETH_ALEN);
         /* pvid macaddrs are always added to the head so that they come before
          * non-pvid macaddrs for the same address. Allows to cancel lookups
          * for pvid macaddrs early.
          */
         if (is_pvid) {
            hlist_add_head_rcu(&p->macaddr_list, &ctx->macaddr_hashtab[hash]);
         } else {
            p->refcount = PA_MACADDR_NON_PVID_OFFSET;
            hlist_add_tail_rcu(&p->macaddr_list, &ctx->macaddr_hashtab[hash]);
         }
         if (ctx->dbgsession) {
            pa_printk(KERN_DEBUG, "\navm_pa: new macaddr:\n");
            pa_show_macaddr(p, pa_printk, KERN_DEBUG);
         }
         goto out;
      }
   }

out:
   p->pid_handle = pid_handle;
   p->vlan_id = vlan_id;
   p->refcount++;

   spin_unlock(&avm_pa_lock);
   return p;
}

static struct avm_pa_macaddr *
pa_macaddr_find_pvid(const char mac[ETH_ALEN])
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_macaddr *p;
   u32 hash;

   hash = macaddr_hash(mac) % AVM_PA_MAX_MACADDR;

   hlist_for_each_entry_rcu(p, &ctx->macaddr_hashtab[hash], macaddr_list) {
      if (ether_addr_equal(p->mac, mac)) {
         if (PA_MACADDR_IS_PVID(p))
            return p;
         /* There is no pvid macaddr if this isn't one as pvid macaddrs are
          * always inserted at head.
          */
         break;
      }
   }

   return NULL;
}


static void pa_macaddr_unlink(struct avm_pa_macaddr *destmac)
{
   struct avm_pa_global *ctx = &pa_glob;

   spin_lock(&avm_pa_lock);

   destmac->refcount--;
   if (PA_MACADDR_REFCOUNT(destmac) > 0)
      goto unlock;

   if (!PA_MACADDR_IS_PVID(destmac)) {
      /* This is a non-pvid macaddr that isn't referenced by sessions anymore.
       *
       * As long as there is a pvid macaddr we keep this non-pvid macaddr
       * in a floating state where it can be used to a) prevent
       * extraneous "pid change" events and b) can be looked
       * up by pa_macaddr_link() in case of new sessions.
       */
      if (pa_macaddr_find_pvid(destmac->mac) != NULL)
         goto unlock;

      /* Clear non-refcount bits and allow pa_macaddr_link() re-use this macaddr.
       * No memset()! destmac might be currently used in an RCU read side.
       */
      destmac->refcount = 0;
   } else {
      /* This is a pvid macaddr that isn't referenced by sessions anymore.
       *
       * The above code puts non-pvid macaddrs in a floating state in the
       * presence of pvid macaddrs. So when we unlink the pvid macaddr we
       * must garbage-collect those floating non-pvid macaddrs now.
       *
       * The refcount must be checked! Floating macaddrs could be
       * referenced by new sessions in the meantime (i.e. not floating anymore).
       */
      struct avm_pa_macaddr *p = destmac;
      hlist_for_each_entry_continue_rcu(p, macaddr_list) {
         if (ether_addr_equal(p->mac, destmac->mac)) {
            if (PA_MACADDR_IS_PVID(p)) {
               /* cannot happen and indicates a problem in the code */
               pr_warn_ratelimited("duplicated pvid macaddr\n");
               continue;
            }
            if (PA_MACADDR_REFCOUNT(p) == 0) {
               p->refcount = 0;
               hlist_del_rcu(&p->macaddr_list);
               /* Because of the _rcu semantics of the traversal we can delete
                * and still continue traversal because next pointer remains intact.
                */
            }
         }
      }
   }
   hlist_del_rcu(&destmac->macaddr_list);

unlock:
   if (ctx->dbgsession) {
      pa_printk(KERN_DEBUG, "\navm_pa: delete macaddr:\n");
      pa_show_macaddr(destmac, pa_printk, KERN_DEBUG);
   }

   spin_unlock(&avm_pa_lock);
}

/* must be called inside rcu read side */
static void pa_check_and_handle_ingress_pid_change(unsigned char mac[ETH_ALEN],
                                                   avm_pid_handle pid_handle,
                                                   u16 vlan_id)
{
   struct avm_pa_macaddr *p, *p_pvid;
   struct avm_pa_global *ctx = &pa_glob;
   u32 hash;
   int pid_group = PA_PID(ctx, pid_handle)->ecfg.pid_group;
   bool pid_changed = false;

   hash = macaddr_hash(mac) % AVM_PA_MAX_MACADDR;

   /* Look first if the low-level pid has changed. The pid is the same
    * for related pvid and non-pvid macaddrs, so the first mismatch is
    * sufficient to to trigger pid change.
    * If the pid has not changed, then we check vlan to detect
    * changes between vlan interfaces that use the same low-level pid.
    * We can stop looking if we find a macaddrs with the same vlan whether
    * this is a pvid macaddr or non-pvid. If we don't find a matching vlan
    * (and also no pid mismatch), then the vlan has changed and we must
    * flush all sessions that belong to the pvid macaddrs. non-pvid
    * macaddrs are not considered for flushing in that case because we assume
    * that only the "primary vlan" has changed and all other vlans are intact.
    */
   p_pvid = NULL;
   hlist_for_each_entry_rcu(p, &ctx->macaddr_hashtab[hash], macaddr_list) {
      if (ether_addr_equal(p->mac, mac)) {
         if (p->pid_handle != pid_handle) {
            struct avm_pa_pid *pid = PA_PID(ctx, p->pid_handle);
            if (pid->ingress_pid_handle != pid_handle) {
               if (pid_group == 0 || pid_group != pid->ecfg.pid_group) {
                  pid_changed = true;
                  break;
               }
            }
         } else if (!PA_PID(ctx, pid_handle)->bridging_ok) {
            /* If the pid doesn't allow for bridge sessions then vlan checks can be
             * skipped. All sessions that egress to a !bridging_ok pid are fully classified
             * and there is no uncertainty about which vlan tag to add.
             */
            return;
         } else if (p->vlan_id == vlan_id) {
            /* If vlan_id matches the vlan of the macaddr then it's alright. */
            return;
         } else if (PA_MACADDR_IS_PVID(p)) {
            p_pvid = p;
         }
      }
   }

   {
      int old = ctx->stats.sess_flushed;

      if (pid_changed) {
         net_info_ratelimited("avm_pa: pid change (pid) for %pM (%s(%d) -> %s(%d))\n",
                 p->mac,
                 PA_PID(ctx, p->pid_handle)->cfg.name, p->pid_handle,
                 PA_PID(ctx, pid_handle)->cfg.name, pid_handle);
         avm_pa_flush_sessions_for_mac(p->mac);
      } else if (p_pvid) {
         char vlan1[16] = "none";
         char vlan2[16] = "none";
         if (p_pvid->vlan_id)
            snprintf(vlan1, sizeof(vlan1), "%d", p_pvid->vlan_id);
         if (vlan_id)
            snprintf(vlan2, sizeof(vlan2), "%d", vlan_id);
         net_info_ratelimited("avm_pa: pid change (pvid) for %pM (%s -> %s)\n",
                 p_pvid->mac, vlan1, vlan2);
         avm_pa_flush_sessions_with_destmac(p_pvid);
      }

      ctx->stats.sess_pidchanged += ctx->stats.sess_flushed - old;
   }
}

/* ------------------------------------------------------------------------ */
/* -------- pid life cycle management ------------------------------------- */
/* ------------------------------------------------------------------------ */

static void
_pa_hw_pa_release(struct kref *ref)
{
   struct avm_pa_global *ctx = &pa_glob;

   ctx->hardware_pa.flags = 0;
   if (ctx->hw_pa_flush_completion) {
      complete(ctx->hw_pa_flush_completion);
      ctx->hw_pa_flush_completion = NULL;
   }
}

static int
pa_hw_pa_get(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   return kref_get_unless_zero(&ctx->hw_pa_ref);
}

static int
pa_hw_pa_put(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   return kref_put(&ctx->hw_pa_ref, _pa_hw_pa_release);
}

static int
pa_hw_pa_valid(struct avm_hardware_pa *hwpa)
{
   if (!hwpa->remove_session)
      return 0;
   /* exactly one of add_session or add_session_skb must be set */
   if (hwpa->add_session && !hwpa->add_session_skb)
      return 1;
   if (!hwpa->add_session && hwpa->add_session_skb)
      return 1;
   /* probe_session will become mandatory as well after some transition period */
   return 0;
}

static void inline
pa_pid_init(avm_pid_handle pid_handle, struct avm_pa_pid_cfg *cfg)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, pid_handle);

   BUG_ON(pid_handle == 0);

#if AVM_PA_REF_DEBUG
   pr_err("avm_pa: pid %d ref %d (%s) called from %pf (before)\n",
          pid_handle,
          kref_read(&pid->ref),
          "pa_pid_init", (void *)_RET_IP_);
#endif

   spin_lock_bh(&avm_pa_lock);
   /* Do not call pa_pid_get() on purpose. That would check "pid->pid_handle == 0"
    * in addition to the actual refcount, and return no new reference in that case.
    * But we want to detect if we're being called while no new reference are allowed
    */
   if (kref_get_unless_zero(&pid->ref) == 0) {
      memset(pid, 0, sizeof(struct avm_pa_pid));
      kref_init(&pid->ref);
#if AVM_PA_REF_DEBUG
      pr_err("avm_pa: pid %d ref %d (%s) called from %pf (after)\n",
             pid_handle,
             kref_read(&pid->ref),
             "pa_pid_init(new)", (void *)_RET_IP_);
#endif
   } else {
      pr_err("avm_pa: pid %d (%s) ref %d already registered\n",
             pid_handle, cfg->name,
             kref_read(&pid->ref));
      spin_unlock_bh(&avm_pa_lock);
      BUG();
   }
   pid->pid_handle = pid_handle;
   pid->cfg = *cfg;

   if (pid->cfg.default_mtu == 0)
      pid->cfg.default_mtu = 1500;

   pid->bridging_ok = 1;
   pid->ingress_framing = cfg->framing;
   switch (cfg->framing) {
      case avm_pa_framing_llcsnap:
      case avm_pa_framing_ether:
      case avm_pa_framing_ppp:
      case avm_pa_framing_ip:
      case avm_pa_framing_ipdev:
         pid->egress_framing = cfg->framing;
         pid->cfg.ptype = 0;
         break;
      case avm_pa_framing_dev:
         pid->egress_framing = avm_pa_framing_ether;
         pid->cfg.ptype = 0;
         break;
      case avm_pa_framing_ptype:
         pid->egress_framing = cfg->framing;
         pid->cfg.tx_func = 0;
         pid->cfg.tx_arg = 0;
         avm_pa_pid_activate_hw_accelaration(pid_handle);
         break;
   }

   spin_unlock_bh(&avm_pa_lock);
}

static void
_pa_pid_delete(struct kref *ref)
{
   struct avm_pa_pid        *pid  = container_of(ref, struct avm_pa_pid, ref);
   struct avm_pa_pid_hwinfo *hw   = pid->hw;
   struct completion        *done = pid->release_completion;

   /* Only cleared by avm_pa_dev_unregister(). It is a bug if the
    * ref drops to 0 without going through that function.
    */
   BUG_ON(pid->pid_handle != 0);

#if AVM_PA_REF_DEBUG
   pr_err("avm_pa: pid %d ref %d (%s) called from %pf\n",
          pid->pid_handle,
          kref_read(&pid->ref),
          "_pa_pid_delete", (void *)_RET_IP_);
#endif
   pid->ingress_pid_handle = 0;
   pid->hw = NULL;
   pid->release_completion = NULL;
   kfree(hw);
   if (done)
      complete(done);
   /* keep cfg for reuse by name */
}

/*
 * Given a pid_handle, decrease the ref count of the corresponding avm_pa_pid.
 * Resources are released if the ref count drops to zero.
 *
 * Returns 1 if the pid_handle was removed, otherwise 0.
 */
static int
pa_pid_put(avm_pid_handle pid_handle)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid    *pid = PA_PID(ctx, pid_handle);
   int ret;

   BUG_ON(pid_handle == 0);
#if AVM_PA_REF_DEBUG
   pr_err("avm_pa: pid %d ref %d (%s) called from %pf (before)\n",
          pid_handle,
          kref_read(&pid->ref),
          "pa_pid_put", (void *)_RET_IP_);
#endif

   ret = kref_put(&pid->ref, _pa_pid_delete);

#if AVM_PA_REF_DEBUG
   pr_err("avm_pa: pid %d ref %d (%s) called from %pf (after)\n",
          pid_handle,
          kref_read(&pid->ref),
          "pa_pid_put", (void *)_RET_IP_);
#endif
   return ret;
}

/*
 * Given a pid_handle, increase the ref count of the corresponding avm_pa_pid
 *
 * Each session holds a ref on all pids involved. So if you have a valid session,
 * (as per pa_session_valid()) use PA_PID() instead, especially in the fast path, as
 * refcounting is unecessarily expensive.
 *
 * If the pid is not registered, 0 is returned and the ref count is restored.
 */
static avm_pid_handle
pa_pid_get(avm_pid_handle pid_handle)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid    *pid = PA_PID(ctx, pid_handle);

   BUG_ON(pid_handle == 0);

#if AVM_PA_REF_DEBUG
   pr_err("avm_pa: pid %d ref %d (%s) called from %pf (before)\n",
          pid_handle,
          kref_read(&pid->ref),
          "pa_pid_get", (void *)_RET_IP_);
#endif

   if (kref_get_unless_zero(&pid->ref) == 0)
      return 0;

#if AVM_PA_REF_DEBUG
   pr_err("avm_pa: pid %d ref %d (%s) called from %pf (after)\n",
          pid_handle,
          kref_read(&pid->ref),
          "pa_pid_get", (void *)_RET_IP_);
#endif

   if (pid->pid_handle != pid_handle) {
      /* avm_pa_dev_unregister() clears pid->pid_handle to prevent new references */
      kref_put(&pid->ref, _pa_pid_delete);
      return 0;
   }

   return pid->pid_handle;
}

/*
 * Given a pid_handle, increase the ref count of the corresponding avm_pa_pid and return it.
 *
 * If the pid is not registered, NULL is returned and the ref count is restored.
 */
static struct avm_pa_pid *
pa_pid_get_pid(avm_pid_handle pid_handle)
{
   struct avm_pa_global *ctx = &pa_glob;
   avm_pid_handle n;

   if (!pid_handle)
      return NULL;

   n = pa_pid_get(pid_handle);
   return n ? PA_PID(ctx, n) : NULL;
}


/* Uninlined versions for other modules, hot code paths should use pa_pid_get(). */
struct avm_pa_pid *
avm_pa_pid_get_pid(avm_pid_handle pid_handle)
{
   return pa_pid_get_pid(pid_handle);
}

int
avm_pa_pid_put(avm_pid_handle pid_handle)
{
   return pa_pid_put(pid_handle);
}

/* same for vpid, but don't tell there is no reference counting yet */
struct avm_pa_vpid *
avm_pa_vpid_get_vpid(avm_vpid_handle vpid_handle)
{
   struct avm_pa_global *ctx  = &pa_glob;
   struct avm_pa_vpid   *vpid = PA_VPID(ctx, vpid_handle);

   if (vpid->vpid_handle)
      return vpid;
   return NULL;
}

int
avm_pa_vpid_put(avm_vpid_handle vpid_handle)
{
   return 1;
}


/* ------------------------------------------------------------------------ */
/* -------- bsession management ------------------------------------------- */
/* ------------------------------------------------------------------------ */

static inline __be16
pa_vlanh_l3proto(struct vlan_ethhdr *ethh)
{
   if (  ethh->h_vlan_proto == __constant_htons(ETH_P_8021Q)
      || ethh->h_vlan_proto == __constant_htons(ETH_P_8021AD))
      return ethh->h_vlan_encapsulated_proto;
   else
      return ethh->h_vlan_proto;
}

static inline u16
pa_vlanh_vid(struct vlan_ethhdr *ethh)
{
   if (  ethh->h_vlan_proto == __constant_htons(ETH_P_8021Q)
      || ethh->h_vlan_proto == __constant_htons(ETH_P_8021AD))
      return ntohs(ethh->h_vlan_TCI) & VLAN_VID_MASK;
   else
      return 0;
}

static inline u32
pa_bkey(struct vlan_ethhdr *ethh, u16 vlan_tci)
{
   u16 vid;
   if (vlan_tci)
      vid = vlan_tci & VLAN_VID_MASK;
   else
      vid = pa_vlanh_vid(ethh);

   return pa_vlanh_l3proto(ethh) | vid << 16;
}


static inline u32
pa_bhash(struct vlan_ethhdr *ethh, u16 vlan_tci)
{
   return jhash_3words(get_unaligned((u32 *)(&ethh->h_source[2])),
                       get_unaligned((u32 *)(&ethh->h_dest[2])),
                       pa_bkey(ethh, vlan_tci), 0);
}


static inline struct avm_pa_session *
pa_bsession_hash_search(struct avm_pa_pid *pid, u32 hash, struct vlan_ethhdr *ethh, u32 key)
{
   struct avm_pa_data     *pd = &pa_data;
   struct avm_pa_bsession *p  = NULL;
   u32 h                      = hash%AVM_PA_MAX_HASH;

   rcu_read_lock();
   /* The primary identifier for bsessions is the MAC address pair. The ingress vlan id
    * and l3 protocol are additonal keys that must match. MAC addresses and l3 protocol
    * are fixed for a given bsession and cannot change. Vlan id may change between
    * ingress and egress iff the packet data is not modified, i.e. vlan is
    * signalled out-of-bad via skb->vlan_tci.
    */
   hlist_for_each_entry_rcu(p, &pid->hash_bsess[h], hash_list) {
      /* Don't consider flushed sessions */
      if (!memcmp(ethh, p->hdr, ETH_ALEN*2) && key == p->key) {
         if (!test_bit(PA_S_FLUSHED, &PA_SESSION(pd, p->session_handle)->flags))
            break;
      }
   }
   rcu_read_unlock();

   return p ? PA_SESSION(pd, p->session_handle) : NULL;
}

static inline struct avm_pa_session *
pa_bsession_search(struct avm_pa_pid *pid, struct vlan_ethhdr *ethh, u16 vlan_tci)
{
   return pa_bsession_hash_search(pid, pa_bhash(ethh, vlan_tci), ethh, pa_bkey(ethh, vlan_tci));
}


static void
pa_change_to_bridge_session(struct avm_pa_session *session)
{
   struct avm_pa_global     *ctx  = &pa_glob;
   struct avm_pa_bsession   *p    = &ctx->bsess_array[session->session_handle];
   struct avm_pa_pkt_match  *match = &session->ingress;
   struct avm_pa_match_info *info = pa_find_eth_match(match);

   BUG_ON(!info);

   INIT_HLIST_NODE(&p->hash_list);
   p->hdr = (struct vlan_ethhdr *) (HDRCOPY(match) + info->offset);
   p->hash = pa_bhash(p->hdr, match->vlan_tci);
   p->key = pa_bkey(p->hdr, match->vlan_tci);
   p->session_handle = session->session_handle;
   ctx->stats.nbsessions++;

   pa_change_to_bridge_match(match);
   pa_change_to_bridge_match(&avm_pa_first_egress(session)->match);
   session->timeout = ctx->bridge_timeout_secs*HZ;
   session->bsession = p;
}

/* ------------------------------------------------------------------------ */
/* -------- session management -------------------------------------------- */
/* ------------------------------------------------------------------------ */

/* Search for ACTIVE sessions */
#define pa_session_search(pid, match) pa_session_hash_search(pid, match)

static struct avm_pa_session *
pa_session_hash_search(struct avm_pa_pid *pid, struct avm_pa_pkt_match *ingress)
{
   struct avm_pa_session *p;
   u32 h = ingress->hash%AVM_PA_MAX_HASH;

   rcu_read_lock();
   hlist_for_each_entry_rcu(p, &pid->hash_sess[h], hash_list) {
      /* Don't consider flushed sessions */
      if (pa_match_eq(ingress, &p->ingress) && !test_bit(PA_S_FLUSHED, &p->flags))
         break;
   }
   rcu_read_unlock();

   return p;
}

static void pa_session_hash_insert(struct avm_pa_pid *pid,
                                   struct avm_pa_session *session)
{
   struct avm_pa_bsession *bsession = session->bsession;
   u32 h = session->ingress.hash%AVM_PA_MAX_HASH;

   hlist_add_head_rcu(&session->hash_list, &pid->hash_sess[h]);
   if (bsession) {
      h = bsession->hash%AVM_PA_MAX_HASH;
      hlist_add_head_rcu(&bsession->hash_list, &pid->hash_bsess[h]);
   }
}

static void pa_session_hash_delete(struct avm_pa_pid *pid,
                                   struct avm_pa_session *session)
{
   struct avm_pa_bsession *bsession = session->bsession;

   hlist_del_init_rcu(&session->hash_list);
   if (bsession)
      hlist_del_init_rcu(&bsession->hash_list);
}

static void
pa_session_list_delete(struct avm_pa_session *session)
{
   if (session->on_list < AVM_PA_LIST_MAX) {
      struct avm_pa_global *ctx = &pa_glob;
      struct avm_pa_session_list *list = &ctx->sess_list[session->on_list];

      BUG_ON(list->nsessions == 0 || list_empty(&list->sessions));

      session->on_list = AVM_PA_LIST_MAX;
      list_del_rcu(&session->session_list);
      list->nsessions--;
   }
}


static void
pa_session_list_update(struct avm_pa_session *session, int which)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_session_list *list = &ctx->sess_list[which];

   pa_session_list_delete(session);

   list->nsessions++;
   if (list->nsessions > list->maxsessions)
      list->maxsessions = list->nsessions;
   list_add_rcu(&session->session_list, &list->sessions);
   session->on_list = which;

   /* Ensure the GC timer runs if sessions are on any list (except FREE).
    * mod_timer() only if necessary, to maintain the ~0.5s interval even if 
    * sessions are constantly added or removed */
   if (which != AVM_PA_LIST_FREE && !timer_pending(&ctx->tick_timer))
      mod_timer(&ctx->tick_timer, jiffies + AVM_PA_TICK_RATE);
}

static void
pa_session_update(struct avm_pa_session *session)
{
   /* Update endtime regardless of the session state, the endtime is only relevant
    * in state ACTIVE (a previous BUG_ON() was regularly triggered, see JZ-43644).  */
   session->endtime = jiffies + session->timeout;
}

static int
pa_session_activate(struct avm_pa_session *session)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *ipid, *epid;
   struct avm_pa_session *s = NULL;
   struct avm_pa_bsession *bs = NULL;

   ipid = PA_PID(ctx, session->ingress_pid_handle);
   epid = PA_PID(ctx, session->static_egress.pid_handle);

   spin_lock(&avm_pa_lock);
   /* Move to ACTIVE only if no "same session" exists and PIDs are ready to use.
    *
    * Session creation can happen concurrently, but after this call only one
    * session of a kind may exist (to avoid confusing hardware acceleration), so the
    * hash lookup finds if anyone else won the race.
    * PID deregistration can also happen concurrently. Therefore we need
    * check if pid->pid_handle is still valid (inside the lock). We don't
    * need a full reference because they are hold by the session.
    */
   if ((bs = session->bsession))
      s = pa_bsession_hash_search(ipid, bs->hash, bs->hdr, bs->key);
   else
      s = pa_session_hash_search(ipid, &session->ingress);

   if (ipid->pid_handle && epid->pid_handle && s == 0) {
      pa_session_hash_insert(ipid, session);
      pa_session_list_update(session, AVM_PA_LIST_ACTIVE);
      pa_session_update(session);
#if (defined(CONFIG_AVM_GENERIC_CONNTRACK) || defined(CONFIG_AVM_PA_GENERIC_CT))
      /* session->generic_ct is shared between sessions and access must be locked.
       * See comment at pa_session_kill_nolock(). */
      if (session->generic_ct) {
         u32 session_handle = (u32)session->session_handle;
         generic_ct_sessionid_set(session->generic_ct, session->generic_ct_dir, (void *)session_handle);
      }
#endif
      /* The sessions is now permanent, so are the sessions references to the pids. */
   } else {
      /* Session wasn't on state ACTIVE yet, so it's safe to kill without flush.
       * This will release the session's references as well
       */
      pa_session_kill_nolock(session, s ? "lost creation race" : "pid gone");
   }

   spin_unlock(&avm_pa_lock);

   return s ? AVM_PA_TX_SESSION_EXISTS : AVM_PA_TX_SESSION_ADDED;
}


static void __init avm_pa_init_freelist(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_data   *pd  = &pa_data;
   struct avm_pa_session_list *free_list = &ctx->sess_list[AVM_PA_LIST_FREE];
   int i;

   for (i = CONFIG_AVM_PA_MAX_SESSION - 1; i > 0; i--) {
      struct avm_pa_session *session = PA_SESSION(pd, i);
      list_add(&session->session_list, &free_list->sessions);
      session->on_list = AVM_PA_LIST_FREE;
   }
   free_list->maxsessions = free_list->nsessions = CONFIG_AVM_PA_MAX_SESSION - 1;

   for (i = ARRAY_SIZE(pd->egress_pool) - 1; i >= 0; i--) {
      struct avm_pa_egress *egress = &pd->egress_pool[i];
      hlist_add_head(&egress->egress_list, &ctx->egress_freelist);
   }
}

static struct avm_pa_session *pa_session_alloc(struct avm_pa_pkt_match *match)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_data   *pd  = &pa_data;
   struct avm_pa_session *session;
   struct avm_pa_session_list *free_list = &ctx->sess_list[AVM_PA_LIST_FREE];

   session = NULL;
   spin_lock(&avm_pa_lock);
   if (!list_empty(&free_list->sessions)) {
      session = list_first_entry(&free_list->sessions, struct avm_pa_session, session_list);
      pa_session_list_delete(session);
      memset(session, 0, sizeof(struct avm_pa_session));
      INIT_HLIST_NODE(&session->hash_list);
      INIT_LIST_HEAD(&session->session_list);
      INIT_HLIST_HEAD(&session->egress_head);
      INIT_HLIST_HEAD(&session->groups);
      hlist_add_head_rcu(&session->static_egress.egress_list, &session->egress_head);
      session->negress = 1;
      session->session_handle = session - pd->sessions;
      session->on_list = AVM_PA_LIST_MAX;
      session->uniq_id = atomic_inc_return(&ctx->session_uniq_id);
      session->ingress = *match;
      session->starttime = jiffies;
      session->endtime = jiffies;
      switch (AVM_PA_PKTTYPE_IPPROTO(match->pkttype)) {
         case IPPROTO_TCP:
            session->timeout = ctx->tcp_timeout_secs*HZ;
            break;
         case IPPROTO_UDP:
         case IPPROTO_ESP:
         case IPPROTO_L2TP:
            session->timeout = ctx->udp_timeout_secs*HZ;
            break;
         case IPPROTO_ICMPV6:
         case IPPROTO_ICMP:
            session->timeout = ctx->echo_timeout_secs*HZ;
            break;
         default:
            session->timeout = ctx->bridge_timeout_secs * HZ;
            break;
      }
   }
   spin_unlock(&avm_pa_lock);

   return session;
}

static struct avm_pa_egress *
pa_egress_alloc(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_egress *egress;

   egress = NULL;
   spin_lock(&avm_pa_lock);
   if (!hlist_empty(&ctx->egress_freelist)) {
      egress = hlist_entry(hlist_first_rcu(&ctx->egress_freelist), struct avm_pa_egress, egress_list);
      hlist_del_rcu(&egress->egress_list);
      memset(egress, 0, sizeof(*egress));
      INIT_HLIST_NODE(&egress->egress_list);
   }
   spin_unlock(&avm_pa_lock);

   return egress;
}

void
pa_egress_free(struct avm_pa_egress *egress)
{
   struct avm_pa_global *ctx = &pa_glob;

   spin_lock(&avm_pa_lock);

   if (!hlist_unhashed(&egress->egress_list))
      hlist_del_rcu(&egress->egress_list);
   hlist_add_head_rcu(&egress->egress_list, &ctx->egress_freelist);

   spin_unlock(&avm_pa_lock);
}

static void
pa_show_vlan_match(struct avm_pa_pkt_match *match,
                   struct avm_pa_match_info *info,
                   pa_fprintf fprintffunc, void *arg)
{
   if (!info) {
      info = pa_find_eth_match(match);
      if (!info || (++info)->type != AVM_PA_VLAN)
         return;
   }

   /* At this time, AVM_PA_OFFSET_NOT_SET is only possible for type == AVM_PA_VLAN.
    * Do not use hdr in this case (it's NULL)! */
   if (info->offset != AVM_PA_OFFSET_NOT_SET) {
      hdrunion_t *hdr = (hdrunion_t *) (HDRCOPY(match) + info->offset);
      (*fprintffunc)(arg, "Vlan ID        : %d\n", VLAN_ID(&hdr->vlanh));
   } else {
      (*fprintffunc)(arg, "Vlan* ID       : %d\n", match->vlan_tci&VLAN_VID_MASK);
   }
}


static void
pa_show_pkt_bridge_match(struct avm_pa_pkt_match *match,
                         pa_fprintf fprintffunc, void *arg)

{
   char buf[128];
   struct avm_pa_match_info *p;
   struct vlan_ethhdr *ethh;

   pkttype2str(match->pkttype & AVM_PA_PKTTYPE_IP_MASK, buf, sizeof(buf));
   (*fprintffunc)(arg, "%-15s: %s\n", "PktType", buf);

   if ((p = pa_find_eth_match(match)) == NULL)
      return;

   ethh = (struct vlan_ethhdr *) (HDRCOPY(match) + p->offset);
   (*fprintffunc)(arg, "%-15s: %pM %pM\n", "Eth Addr", ethh->h_dest, ethh->h_source);
   (*fprintffunc)(arg, "%-15s: %08x\n", "Key", pa_bkey(ethh, match->vlan_tci));
}


static void
pa_show_pkt_full_match(struct avm_pa_pkt_match *match, u16 egress_pkttype,
                       pa_fprintf fprintffunc, void *arg)
{
   char buf[128];
   const char *prompt = "PktType";
   unsigned n;
   int s;

   if (egress_pkttype && egress_pkttype != match->pkttype) {
      size_t half = sizeof(buf)/2;
      pkttype2str(match->pkttype, buf, half);
      pkttype2str(egress_pkttype, buf+half, half);
      (*fprintffunc)(arg, "%-15s: %s -> %s\n", prompt, buf, buf+half);
   } else {
      pkttype2str(match->pkttype, buf, sizeof(buf));
      (*fprintffunc)(arg, "%-15s: %s\n", prompt, buf);
   }

   (*fprintffunc)(arg, "Protocol       : %04X\n", ntohs(match->protocol));
   (*fprintffunc)(arg, "%-15s: %d (stored %d)\n", "Header len", match->full_hdrlen, match->hdrlen);

   for (n=0; n < match->nmatch; n++) {
      struct avm_pa_match_info *p = match->match+n;
      hdrunion_t *hdr = (hdrunion_t *) (HDRCOPY(match) + p->offset);
      switch (p->type) {
         case AVM_PA_ETH:
            s = mac2str(&hdr->ethh.h_dest, buf, sizeof(buf));
            buf[s++] = ' ';
            mac2str(&hdr->ethh.h_source, &buf[s], sizeof(buf) - s);
            (*fprintffunc)(arg, "Eth Hdr        : %s proto %04X\n", buf,
                           ntohs(hdr->ethh.h_proto));
            break;
         case AVM_PA_VLAN:
            /* VLAN match can come from the payload or skb->vlan_tci */
            pa_show_vlan_match(match, p, fprintffunc, arg);
            break;
         case AVM_PA_PPPOE:
            (*fprintffunc)(arg, "PPPoE Sid      : %04X [hdroff %d]\n", ntohs(hdr->pppoeh.sid), match->pppoe_offset);
            break;
         case AVM_PA_PPP:
            (*fprintffunc)(arg, "PPP Proto      : %02X\n", hdr->ppph[0]);
            break;
         case AVM_PA_IPV4:
            s = in_addr2str(&hdr->iph.saddr, buf, sizeof(buf));
            buf[s++] = ' ';
            in_addr2str(&hdr->iph.daddr, &buf[s], sizeof(buf) - s);
            (*fprintffunc)(arg, "IPv4 Hdr       : %s proto %d tos %02X [hdroff %d]\n",
                           buf, hdr->iph.protocol, ipv4_get_dsfield(&hdr->iph), match->ip_offset);
            break;
         case AVM_PA_IPV6:
            s = in6_addr2str(&hdr->ipv6h.saddr, buf, sizeof(buf));
            buf[s++] = ' ';
            in6_addr2str(&hdr->ipv6h.daddr, &buf[s], sizeof(buf) - s);
            (*fprintffunc)(arg, "IPv6 Hdr       : %s proto %d tos %02X flow %05X [hdroff %d]\n",
                           buf, hdr->ipv6h.nexthdr, ipv6_get_dsfield(&hdr->ipv6h),
                           ntohl(hdr->ipv6_vpfl) & 0xfffff, match->ip_offset);
            break;
         case AVM_PA_PORTS:
            (*fprintffunc)(arg, "Ports          : %d -> %d [hdroff %d]\n",
                     ntohs(hdr->ports[0]), ntohs(hdr->ports[1]), match->l4_offset);
            break;
         case AVM_PA_ICMPV4:
            prompt = "ICMPv4";
            switch (hdr->icmph.type) {
               case ICMP_ECHOREPLY:
                  (*fprintffunc)(arg, "%-15s: echo reply id=%hu [hdroff %d]\n",  prompt,
                           hdr->icmph.un.echo.id, match->l4_offset);
                  break;
               case ICMP_ECHO:
                  (*fprintffunc)(arg, "%-15s: echo request id=%hu [hdroff %d]\n",  prompt,
                           hdr->icmph.un.echo.id, match->l4_offset);
                  break;
               default:
                  (*fprintffunc)(arg, "??????\n");
                  break;
            }
            break;
         case AVM_PA_ICMPV6:
            prompt = "ICMPv6";
            switch (hdr->icmpv6h.icmp6_type) {
               case ICMPV6_ECHO_REQUEST:
                  (*fprintffunc)(arg, "%-15s: echo request id=%hu [hdroff %d]\n", prompt,
                           hdr->icmpv6h.icmp6_identifier, match->l4_offset);
                  break;
               case ICMPV6_ECHO_REPLY:
                  (*fprintffunc)(arg, "%-15s: echo reply id=%hu [hdroff %d]\n", prompt,
                           hdr->icmpv6h.icmp6_identifier, match->l4_offset);
                  break;
               default:
                  (*fprintffunc)(arg, "??????\n");
                  break;
            }
            break;
         case AVM_PA_LLC_SNAP:
            (*fprintffunc)(arg, "LLC SNAP       : %04X\n", ntohs(hdr->llcsnap.type));
            break;
         case AVM_PA_LISP:
            (*fprintffunc)(arg, "LISP           : data header [hdroff %d]\n", match->lisp_offset);
            break;
         case AVM_PA_L2TP:
            (*fprintffunc)(arg, "L2TP Sess      : %lu\n", (unsigned long)ntohl(hdr->l2tp.session_id));
            break;
         case AVM_PA_GRE:
            (*fprintffunc)(arg, "GRE Proto      : %04X\n", ntohs(hdr->greh.protocol));
            break;
         case AVM_PA_ESP:
            (*fprintffunc)(arg, "ESP SPI        : 0x%08X [hdroff %d]\n", ntohl(hdr->esph.spi), match->l4_offset);
            break;
      }
   }
}


static void
pa_show_pkt_match(struct avm_pa_pkt_match *match,
                  bool bridged, u16 egress_pkttype,
                  pa_fprintf fprintffunc, void *arg)
{
   if (bridged)
      pa_show_pkt_bridge_match(match, fprintffunc, arg);
   else
      pa_show_pkt_full_match(match, egress_pkttype, fprintffunc, arg);
}


static void
pa_show_pkt_info(struct avm_pa_pkt_info *info,
                 pa_fprintf fprintffunc, void *arg)
{
   struct avm_pa_global *ctx = &pa_glob;

   (*fprintffunc)(arg, "In Pid         : %d (%s)\n",
                  info->ingress_pid_handle,
                  PA_PID(ctx, info->ingress_pid_handle)->cfg.name);

   if (info->ingress_vpid_handle) {
      (*fprintffunc)(arg, "In VPid        : %d (%s)\n",
                     info->ingress_vpid_handle,
                     PA_VPID(ctx, info->ingress_vpid_handle)->cfg.name);
   }
   if (info->egress_vpid_handle) {
      (*fprintffunc)(arg, "Out VPid       : %d (%s)\n",
                     info->egress_vpid_handle,
                     PA_VPID(ctx, info->egress_vpid_handle)->cfg.name);
   }

   if (info->routed)
      (*fprintffunc)(arg, "Routed         : yes\n");

   if (info->shaped)
      (*fprintffunc)(arg, "Shaped         : yes\n");

   pa_show_pkt_match(&info->match, 0, 0, fprintffunc, arg);
}


static void pa_show_bsession(struct avm_pa_bsession *bsession,
                             pa_fprintf fprintffunc, void *arg)

{
   struct avm_pa_global    *ctx = &pa_glob;
   struct avm_pa_session   *session = PA_SESSION(&pa_data, bsession->session_handle);
   struct avm_pa_egress    *egress;
   unsigned negress;

   (*fprintffunc)(arg, "Session        : %d\n", bsession->session_handle);
   (*fprintffunc)(arg, "In Pid         : %d (%s)\n",
                  session->ingress_pid_handle,
                  PA_PID(ctx, session->ingress_pid_handle)->cfg.name);
   (*fprintffunc)(arg, "Hash           : %lu\n", (unsigned long)bsession->hash);

   pa_show_pkt_bridge_match(&session->ingress, fprintffunc, arg);

   /* In practice, negress is always 1, since multicast uses normal sessions */
   negress = 0;
   avm_pa_for_each_egress(egress, session) {
      (*fprintffunc)(arg, "Egress         : %d of %d\n", ++negress, session->negress);
      if (egress->pid_handle) {
         (*fprintffunc)(arg, "Out Pid        : %d (%s)\n",
                        egress->pid_handle,
                        PA_PID(ctx, egress->pid_handle)->cfg.name);
      }
      if (egress->vpid_handle) {
         (*fprintffunc)(arg, "Out VPid       : %d (%s)\n",
                        egress->vpid_handle,
                        PA_VPID(ctx, egress->vpid_handle)->cfg.name);
      }
      if (egress->destmac)
         pa_show_macaddr(egress->destmac, fprintffunc, arg);

      pa_show_vlan_match(&egress->match, NULL, fprintffunc, arg);
   }
}


static void pa_show_session(struct avm_pa_session *session,
                            pa_fprintf fprintffunc, void *arg)

{
   struct avm_pa_global *ctx = &pa_glob;
   char buf[max_t(size_t, KSYM_SYMBOL_LEN, 64ul)];
   struct avm_pa_macaddr *destmac;
   struct net_device *dev;
   unsigned negress;
   struct avm_pa_egress *egress;
   u16 pkttype;
   void *handler;

   (*fprintffunc)(arg, "Session        : %d (%d)\n", session->uniq_id, session->session_handle);

   {
      unsigned long ms, s, min;
      ms = jiffies_to_msecs(jiffies - session->starttime);
      s = ms/1000;
      min = s/60;
      if (min)
         (*fprintffunc)(arg, "Age            : %lumin %lu.%.3lus\n", min, s%60, ms%1000);
      else
         (*fprintffunc)(arg, "Age            : %lu.%.3lus\n", s, ms%1000);
   }

   {
      char *state;
      if (session->on_list < AVM_PA_LIST_MAX) {
         const char *why = session->why_killed ? session->why_killed : "???";
         bool flushed = test_bit(PA_S_FLUSHED, &session->flags);
         switch (session->on_list) {
            case AVM_PA_LIST_ACTIVE: state = flushed ? "flushed" : "active"; break;
            case AVM_PA_LIST_DEAD: snprintf(buf, sizeof(buf), "dead (%s)", why); state = buf; break;
            case AVM_PA_LIST_FREE: state = "free"; break;
            default: state = "BAD STATE"; break;
         }
      } else {
         state = "create";
      }
      (*fprintffunc)(arg, "State          : %s\n", state);
   }

   (*fprintffunc)(arg, "In Pid         : %d (%s)\n",
                  session->ingress_pid_handle,
                  PA_PID(ctx, session->ingress_pid_handle)->cfg.name);

   if (session->ingress_vpid_handle) {
      (*fprintffunc)(arg, "In VPid        : %d (%s)\n",
                     session->ingress_vpid_handle,
                     PA_VPID(ctx, session->ingress_vpid_handle)->cfg.name);
   }

   if (pa_hw_pa_valid(&ctx->hardware_pa)) {
      bool in_hw = test_bit(PA_S_IN_HW, &session->flags);
      if ((in_hw || avm_pa_get_hw_session(session)) && ctx->hardware_pa.session_state)
         (*fprintffunc)(arg, "In HW          : %s\n", (*ctx->hardware_pa.session_state)(session));
      else
         (*fprintffunc)(arg, "In HW          : %s\n", in_hw ? "yes" : "no");
   }

#if (defined(CONFIG_AVM_GENERIC_CONNTRACK) || defined(CONFIG_AVM_PA_GENERIC_CT))
   if (session->generic_ct) {
      if (session->generic_ct_dir == GENERIC_CT_DIR_ORIGINAL)
         (*fprintffunc)(arg, "CT dir         : original\n");
      else
         (*fprintffunc)(arg, "CT dir         : reply\n");
   } else {
      (*fprintffunc)(arg, "CT             : no entry\n");
   }
#if (defined(CONFIG_BLOG) && IS_ENABLED(CONFIG_NF_CONNTRACK))
   if (session->ct) {
      (*fprintffunc)(arg, "nf_conntrack   : %lu\n", (unsigned long) session->ct);
   } else {
      (*fprintffunc)(arg, "nf_conntrack   : no entry\n");
   }
#endif
#endif

   (*fprintffunc)(arg, "Realtime       : %s\n", test_bit(PA_S_REALTIME, &session->flags) ? "yes" : "no");
#ifdef CONFIG_AVM_PA_RPS
   if (session->rps_cpu)
      (*fprintffunc)(arg, "RPS cpu        : %d\n", session->rps_cpu - 1);
#endif

   pa_show_pkt_match(&session->ingress,
                     session->bsession != 0, session->mod.pkttype,
                     fprintffunc, arg);

   pa_show_mod_rec(&session->mod, fprintffunc, arg);

   (*fprintffunc)(arg, "Hroom          : %u\n", (unsigned) session->needed_headroom);

   (*fprintffunc)(arg, "Timeout        : %hu\n", session->timeout/HZ);

   (*fprintffunc)(arg, "SW stats       : %lu pkts, %llu bytes\n",
                  (unsigned long)session->ingress_sw_stats.tx_pkts,
                  (unsigned long long)session->ingress_sw_stats.tx_bytes);

   (*fprintffunc)(arg, "HW stats       : %lu pkts, %llu bytes (validflags 0x%x)\n",
                  (unsigned long)session->ingress_hw_stats.tx_pkts,
                  (unsigned long long)session->ingress_hw_stats.tx_bytes,
                  session->ingress_hw_stats.validflags);

   negress = 0;
   avm_pa_for_each_egress(egress, session) {
      (*fprintffunc)(arg, "Egress         : %d of %d\n", ++negress, session->negress);
      (*fprintffunc)(arg, "Type           : %s\n", egresstype2str(egress->type));
      if (egress->pid_handle) {
         (*fprintffunc)(arg, "Out Pid        : %d (%s)\n", egress->pid_handle,
                        PA_PID(ctx, egress->pid_handle)->cfg.name);
      }
      else {
         (*fprintffunc)(arg, "Egress under construction\n");
         continue;
      }
      if (egress->vpid_handle) {
         (*fprintffunc)(arg, "Out VPid       : %d (%s)\n", egress->vpid_handle,
                        PA_VPID(ctx, egress->vpid_handle)->cfg.name);
      }
      (*fprintffunc)(arg, "Mtu            : %u\n", (unsigned)egress->mtu);
      if (egress->push_l2_len) {
         data2hex(HDRCOPY(&egress->match), egress->push_l2_len,
                  buf, sizeof(buf));
         (*fprintffunc)(arg, "L2 push        : %s\n", buf);
         if (egress->pppoe_offset != AVM_PA_OFFSET_NOT_SET) {
            (*fprintffunc)(arg, "PPPoE off      : %u\n", (unsigned)egress->pppoe_offset);
            (*fprintffunc)(arg, "PPPoE hlen     : %u\n", (unsigned)egress->pppoe_hdrlen);
         }
      }
      if ((destmac = egress->destmac) != 0)
         pa_show_macaddr(destmac, fprintffunc, arg);
      pa_show_pkt_match(&egress->match,
                        session->bsession != 0, session->mod.pkttype,
                        fprintffunc, arg);

      switch (egress->type) {
         case avm_pa_egresstype_output:
            {
               struct avm_pa_pid *pid = PA_PID(ctx, egress->pid_handle);
               (*fprintffunc)(arg, "Prio           : %hx:%hx\n",
                              TC_H_MAJ(egress->output.priority)>>16,
                              TC_H_MIN(egress->output.priority));
               (*fprintffunc)(arg, "TX queue       : %hu\n", egress->output.txq_id);
               (*fprintffunc)(arg, "TC index       : %hu\n", egress->output.tc_index);
               if (avm_pa_pid_tack_enabled(pid)) {
                  (*fprintffunc)(arg, "tack pkts  : %u (accl acks %u)\n",
                                 pid->prioack_acks,
                                 pid->prioack_accl_acks);
               }
            }
            break;

         case avm_pa_egresstype_local:
            pkttype = session->ingress.pkttype;
            if (AVM_PA_PKTTYPE_IP_VERSION(pkttype) == 4)
               handler = inet_protos[AVM_PA_PKTTYPE_IPPROTO(pkttype)]->handler;
            else if (AVM_PA_PKTTYPE_IP_VERSION(pkttype) == 6)
               handler = inet6_protos[AVM_PA_PKTTYPE_IPPROTO(pkttype)]->handler;
            else
               handler = NULL;

            (*fprintffunc)(arg, "Proto handler  : %pf\n", handler);
            if ((dev = egress->local.dev) != 0) {
               (*fprintffunc)(arg, "Input Dev      : %s\n", dev->name);
            } else {
               (*fprintffunc)(arg, "Input Dev      : <NOT SET>\n");
            }
            break;
         case avm_pa_egresstype_rtp:
            (*fprintffunc)(arg, "transmitfunc   : %pF\n", egress->rtp.transmit);
            break;
         case avm_pa_egresstype_xfrm:
            if (IS_ENABLED(CONFIG_XFRM)) {
               struct dst_entry *dst = egress->xfrm.dst;
               struct xfrm_state *x = egress->xfrm.x;

               (*fprintffunc)(arg, "TC index       : %hu\n", egress->xfrm.tc_index);
               (*fprintffunc)(arg, "XFRM dst       : %pf\n", dst->input);
               (*fprintffunc)(arg, "XFRM output    : %pf\n", x->type->output);
               if (x->props.family == AF_INET) {
                  (*fprintffunc)(arg, "XFRM saddr     : %pI4\n", &x->props.saddr.a4);
                  (*fprintffunc)(arg, "XFRM daddr     : %pI4\n", &x->id.daddr.a4);
               } else if (x->props.family == AF_INET6) {
                  (*fprintffunc)(arg, "XFRM saddr     : %pI6\n", &x->props.saddr.a6);
                  (*fprintffunc)(arg, "XFRM daddr     : %pI6\n", &x->id.daddr.a6);
               } else {
                  (*fprintffunc)(arg, "XFRM saddr     : ??? (family %d)\n", x->props.family);
                  (*fprintffunc)(arg, "XFRM daddr     : ??? (proto %d)\n", x->id.proto);
               }
               (*fprintffunc)(arg, "XFRM spi       : 0x%08x\n", ntohl(x->id.spi));
            }
            break;
         case avm_pa_egresstype_null:
            break;
      }
      (*fprintffunc)(arg, "SW stats       : %lu pkts, %llu bytes\n",
                     (unsigned long)egress->sw_stats.tx_pkts,
                     (unsigned long long)egress->sw_stats.tx_bytes);
      (*fprintffunc)(arg, "HW stats       : %lu pkts, %llu bytes\n",
                     (unsigned long)egress->hw_stats.tx_pkts,
                     (unsigned long long)egress->hw_stats.tx_bytes);
      (*fprintffunc)(arg, "Pkts           : TX %lu (acks %lu)\n",
                     (unsigned long)egress->tx_pkts,
                     (unsigned long)egress->tcpack_pkts);
   }

   avm_pa_sg_show_session(session, fprintffunc, arg);
}


static void
pa_session_delete_rcu(struct rcu_head *head)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_session *session = container_of(head, struct avm_pa_session, kill_rcu);
   struct avm_pa_egress  *egress;
   struct hlist_node *tmp;

   /* Being inside rcu callback, we don't need _rcu traversal. Instead,
    * we need _safe traversal since egress are removed inside the loop. */
   hlist_for_each_entry_safe(egress, tmp, &session->egress_head, egress_list) {
      if (egress->destmac) {
         pa_macaddr_unlink(egress->destmac);
         egress->destmac = 0;
      }
      switch (egress->type) {
         case avm_pa_egresstype_output:
            if (egress->output.dst) {
               dst_release(egress->output.dst);
               egress->output.dst = 0;
            }
            break;
         case avm_pa_egresstype_local:
            if (egress->local.dst) {
               dst_release(egress->local.dst);
               egress->local.dst = 0;
            }
            break;
         case avm_pa_egresstype_rtp:
            if (egress->rtp.sk) {
               sock_put(egress->rtp.sk);
               egress->rtp.sk = 0;
            }
            break;
         case avm_pa_egresstype_xfrm:
            if (IS_ENABLED(CONFIG_XFRM)) {
               dst_release(egress->xfrm.dst);
               xfrm_state_put(egress->xfrm.x);
               dev_put(egress->xfrm.dev);
            }
            break;
         case avm_pa_egresstype_null:
            ctx->stats.drop_sess_del++;
            break;
      }

      pa_pid_put(egress->pid_handle);
      if (egress != &session->static_egress)
         pa_egress_free(egress);
   }
   pa_pid_put(session->ingress_pid_handle);

   spin_lock(&avm_pa_lock);
   pa_session_list_update(session, AVM_PA_LIST_FREE);
   spin_unlock(&avm_pa_lock);
}

static void pa_session_delete(struct avm_pa_session *session)
{
   struct avm_pa_global *ctx = &pa_glob;
   const char *why = session->why_killed ? session->why_killed : "???";

   if (ctx->dbgsession) {
      pa_printk(KERN_DEBUG, "\navm_pa: delete session: %s\n", why);
      pa_show_session(session, pa_printk, KERN_DEBUG);
   }

   BUG_ON(session->on_list != AVM_PA_LIST_DEAD);
   pa_session_list_delete(session);

#if AVM_PA_TRACE
   if (ctx->dbgtrace) {
      struct avm_pa_pid *pid = PA_PID(ctx, session->ingress_pid_handle);
      pa_printk(KERN_DEBUG, "avm_pa: delete session %d (%s) %s\n",
                            session->session_handle, pid->cfg.name, why);
   }
#endif
   /*
    * pa_session_kill() has
    * - removed session from hash
    * - removed session from hardware pa
    * - removed session from generic connection tracking
    */
   BUG_ON(!hlist_unhashed(&session->hash_list));
   BUG_ON(session->bsession && !hlist_unhashed(&session->bsession->hash_list));
   BUG_ON(test_bit(PA_S_IN_HW, &session->flags));
#if (defined(CONFIG_AVM_GENERIC_CONNTRACK) || defined(CONFIG_AVM_PA_GENERIC_CT))
   BUG_ON(session->generic_ct);
#endif

   /* There may be packets in-flight at this point. Defer work that prevents
    * transmission of such packets. */
   call_rcu_bh(&session->kill_rcu, pa_session_delete_rcu);
}


static void
pa_session_kill_rcu(struct rcu_head *head)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_session *session = container_of(head, struct avm_pa_session, kill_rcu);

   if (test_and_clear_bit(PA_S_IN_HW, &session->flags)) {
      (*ctx->hardware_pa.remove_session)(session);
      pa_hw_pa_put();
   }

#if (defined(CONFIG_AVM_GENERIC_CONNTRACK) || defined(CONFIG_AVM_PA_GENERIC_CT))
   if (session->generic_ct) {
      u32 session_handle = (u32)(session->session_handle);
      struct generic_ct *ct = session->generic_ct;
      session->generic_ct = 0;
      generic_ct_sessionid_set(ct, session->generic_ct_dir, (void *)(1U << 31 | session_handle));
      generic_ct_put(ct);
   }
#if (defined(CONFIG_BLOG) && IS_ENABLED(CONFIG_NF_CONNTRACK))
   if (session->ct) {
      struct nf_conn *nfct;
      nfct = session->ct;
      session->ct = NULL;
      nf_conntrack_put(&nfct->ct_general);
   }
#endif
#endif

   /*
    * all packets that were in-flight in pa_session_kill()
    * should be counted here.
    */
   avm_pa_sg_session_unlink(session);

   spin_lock(&avm_pa_lock);
   pa_session_list_update(session, AVM_PA_LIST_DEAD);
   spin_unlock(&avm_pa_lock);

}

static void
pa_session_kill_nolock(struct avm_pa_session *session, const char *why)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, session->ingress_pid_handle);

#if AVM_PA_TRACE
   if (ctx->dbgtrace) {
      pa_printk(KERN_DEBUG, "avm_pa: kill session %d (%s) %s\n",
                            session->session_handle, pid->cfg.name, why);
   }
#endif
   if (ctx->dbgsession) {
      pa_printk(KERN_DEBUG, "\navm_pa: kill session: %s\n", why);
      if (session->bsession)
         pa_show_bsession(session->bsession, pa_printk, KERN_DEBUG);
      else
         pa_show_session(session, pa_printk, KERN_DEBUG);
   }

   pa_session_list_delete(session);
   pa_session_hash_delete(pid, session);

   session->why_killed = why;

   /* There may be packets in-flight at this point. Defer work that prevents
    * transmission of such packets. */
   call_rcu_bh(&session->kill_rcu, pa_session_kill_rcu);
}

static void pa_session_kill(struct avm_pa_session *session,
                                   const char *why)
{
   spin_lock(&avm_pa_lock);
   pa_session_kill_nolock(session, why);
   spin_unlock(&avm_pa_lock);
}

static void
pa_session_flush(struct avm_pa_session *session, const char *why)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid;

#if AVM_PA_TRACE
   if (ctx->dbgtrace) {
      pid = PA_PID(ctx, session->ingress_pid_handle);
      pa_printk(KERN_DEBUG, "avm_pa: flush session %d (%s) %s\n",
                            session->session_handle, pid->cfg.name, why);
   }
#endif
   if (ctx->dbgsession) {
      pa_printk(KERN_DEBUG, "\navm_pa: flush session: %s\n", why);
      pa_show_session(session, pa_printk, KERN_DEBUG);
   }

   set_bit(PA_S_FLUSHED, &session->flags); /* will be killed on next gc */
   session->why_killed = why;
}


/* ------------------------------------------------------------------------ */
/* -------- wall clock ---------------------------------------------------- */
/* ------------------------------------------------------------------------ */

static void pa_session_prioack_check(struct avm_pa_session *session)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_egress *egress = avm_pa_first_egress(session);

   if (test_bit(PA_S_PRIOACK_CHECK, &session->flags) && egress->tx_pkts > ctx->prioack_thresh_packets) {
      /*
       * Stop using TGET priority.
       * We need to decide if we use TACK priority or restore original priority.
       * (TACK priority is same as original priority, if TACK is not enabled)
       *   2016-10-14 calle
       */
      unsigned long percent_ack = (egress->tcpack_pkts * 100) / egress->tx_pkts;
      if (ctx->dbgprioack) {
         pa_printk(KERN_DEBUG, "avm_pa: session %d: %lu%% TCP-ACKs (%u pkts %u ACKs) \n",
                               session->session_handle,
                               percent_ack, egress->tx_pkts, egress->tcpack_pkts);
      }

      if (percent_ack > ctx->prioack_ratio)
         set_bit(PA_S_PRIOACK_ACK, &session->flags);

      if (ctx->dbgprioack) {
         pa_printk(KERN_DEBUG, "avm_pa: session %d: priority %x:%x -> %s\n",
                               session->session_handle,
                               TC_H_MAJ(egress->output.priority)>>16,
                               TC_H_MIN(egress->output.priority),
                               test_bit(PA_S_PRIOACK_ACK, &session->flags) ? "TACK" : "NORMAL");
      }

      /*  next packet will re-create a new session based on the new priority */
      set_bit(PA_S_PRIOACK_DONE, &session->flags);
      /* atomic test_and_clear not necessary because we're only called from the tick */
      clear_bit(PA_S_PRIOACK_CHECK, &session->flags);
   }
}

static void pa_session_stats_get_diff(u32 *pkts, u64 *bytes,
                                      struct avm_pa_session_stats *last,
                                      struct avm_pa_session_stats *now)
{
   *pkts = now->tx_pkts - last->tx_pkts;
   last->tx_pkts = now->tx_pkts;
   *bytes = now->tx_bytes - last->tx_bytes;
   last->tx_bytes = now->tx_bytes;
}

static inline unsigned int pa_get_priority(unsigned int prio)
{
   prio &= TC_H_MIN_MASK;
   if (prio >= AVM_PA_MAX_PRIOS)
      prio = AVM_PA_MAX_PRIOS-1;
   return prio;
}

static inline unsigned int
pa_get_egress_priority(struct avm_pa_egress *egress)
{
   return pa_get_priority(egress->output.priority);
}

static inline unsigned int
pa_get_ingress_priority(struct avm_pa_session *session)
{
   /*
    * Ensure that the returned ingress priority is always in the range
    * [0, AVM_PA_MAX_PRIOS-1], otherwise Klocwork will complain if
    * the ingress priority is used as index to the VPID ingress priority
    * statistics array.
    */
   return pa_get_priority(session->ingress_priority);
}

static inline unsigned int
pa_get_ingress_priority_from_pkt_mark(u32 pkt_mark)
{
   /*
    * Consider only networks for now, which are encoded as the two
    * most significant bytes.
    */
   unsigned int prio = AVM_PA_INGRESS_PRIO_NET(pkt_mark);
   if (prio >= AVM_PA_MAX_PRIOS) {
      prio = AVM_PA_MAX_PRIOS-1;
   }
   return prio;
}

/* ------------------------------------------------------------------------ */

static void pa_session_check_pa(struct avm_pa_session *session)
{
   struct avm_pa_global *ctx = &pa_glob;

   if (test_bit(PA_S_IN_HW, &session->flags) && ctx->hardware_pa.check_session) {
      unsigned ret = ctx->hardware_pa.check_session(session);

      /* Warn on unknown return codes, indicates too old avm_pa tag */
      WARN_ON_ONCE(ret & ~AVM_HW_CHK_FLUSH);
      if (ret == AVM_HW_CHK_FLUSH)
         pa_session_flush(session, "void by hw");
   }
}

static int pa_session_handle_stats(struct avm_pa_session *session)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid    *pid;
   struct avm_pa_egress *egress;
   struct avm_pa_session_stats stats;
   u64 bytes, hw_bytes;
   u32 pkts, hw_pkts;
   unsigned validflags;

   stats.validflags = 0;
   pa_session_stats_get_diff(&pkts, &bytes, &session->ingress_last_sw_stats,
                                &session->ingress_sw_stats);
   if (   !test_bit(PA_S_IN_HW, &session->flags)
       || ctx->hardware_pa.session_stats == 0
       || (*ctx->hardware_pa.session_stats)(session, &stats) != 0) {
      validflags = 0;
   } else {
      validflags = stats.validflags;
   }
   if (validflags & AVM_PA_SESSION_STATS_VALID_PKTS)
      hw_pkts = stats.tx_pkts;
   else
      hw_pkts = 0;
   if (validflags & AVM_PA_SESSION_STATS_VALID_BYTES)
      hw_bytes = stats.tx_bytes;
   else
      hw_bytes = 0;

   if (ctx->dbgstats && validflags)
      pr_debug("session %d valid 0x%x, %lu/%lu pkts, %llu/%llu bytes\n",
               session->session_handle,
               validflags,
               (unsigned long)pkts, (unsigned long)hw_pkts,
               bytes, hw_bytes);

   session->ingress_hw_stats.tx_pkts += hw_pkts;
   session->ingress_hw_stats.tx_bytes += hw_bytes;
   session->ingress_hw_stats.validflags |= validflags;

   avm_pa_for_each_egress(egress, session) {
      egress->hw_stats.tx_pkts += hw_pkts;
      egress->hw_stats.tx_bytes += hw_bytes;

      if (egress->pid_handle) {
         pid = PA_PID(ctx, egress->pid_handle);
         pid->tx_pkts += pkts + hw_pkts;
      }
   }
   return validflags != 0;
}

static void
pa_tick_sessions(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_session *session;
   struct avm_pa_session_list *list = &ctx->sess_list[AVM_PA_LIST_ACTIVE];
   ktime_t now = ktime_get_boottime();

   /* Collect stats for all sessions, detecting possible timeouts in hardware.
    * This runs lockless. */
   rcu_read_lock();
   list_for_each_entry_rcu(session, &list->sessions, session_list) {
      session->stats_timestamp = now;
      pa_session_check_pa(session);
      if (pa_session_handle_stats(session))
         pa_session_update(session);
      pa_session_prioack_check(session);
   }
   rcu_read_unlock();
}


static void
pa_tick_session_gc_nolock(int force)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_data *pd __maybe_unused = &pa_data;
   struct avm_pa_session *session, *next;
   struct avm_pa_session_list *list;
   struct avm_pa_l2tp *l2tp __maybe_unused;
   int i __maybe_unused;

   if (force) {
      list = &ctx->sess_list[AVM_PA_LIST_ACTIVE];
      list_for_each_entry_safe(session, next, &list->sessions, session_list) {
         pa_session_kill_nolock(session, "disable");
         ctx->stats.sess_flushed++;
      }
   }

   list = &ctx->sess_list[AVM_PA_LIST_DEAD];
   list_for_each_entry_safe(session, next, &list->sessions, session_list) {
      if (avm_pa_get_hw_session(session) == NULL) {
         pa_session_delete(session);
      }
   }

   list = &ctx->sess_list[AVM_PA_LIST_ACTIVE];
   list_for_each_entry_safe(session, next, &list->sessions, session_list) {
      if (test_bit(PA_S_FLUSHED, &session->flags)) {
         pa_session_kill_nolock(session, session->why_killed);
      }
      else if (time_is_before_eq_jiffies(session->endtime)) {
         /* flush in case a packet is received right now on another CPU,
          * killing immediately is racy. */
         pa_session_flush(session, session->timeout ? "timeout" : "fin");
         ctx->stats.sess_timedout++;
      }
   }

#ifdef CONFIG_L2TP
   for (i = 0; i < ARRAY_SIZE(pd->l2tp_cache); i++) {
      struct l2tp_session *local_sess;
      l2tp = &pd->l2tp_cache[i];
      /* We only clear the cache entry for now.
       * TODO: Maybe clear out corresponding sessions to truly stop forwarding */
      local_sess = pa_l2tp_session_get_local(l2tp->session_id);
      if (local_sess == NULL)
         l2tp->session_id = 0;
      else
         pa_l2tp_session_put_local(local_sess);
   }
#endif
}


static unsigned long last_tick;

#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
static void
pa_session_tick(unsigned long force)
#else
static void
pa_session_tick(struct timer_list *timer)
#endif
{
   struct avm_pa_global *ctx = &pa_glob;
   unsigned long next_tick;

   /* Minimize timer temporal drift */
   next_tick = jiffies + AVM_PA_TICK_RATE;
   last_tick = jiffies;

   pa_tick_sessions();

   spin_lock(&avm_pa_lock);
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
   pa_tick_session_gc_nolock(force);
#else
   pa_tick_session_gc_nolock(0);
#endif

   /* The tick_timer is only necessary as long as there are any sessions */
   if (   ctx->sess_list[AVM_PA_LIST_ACTIVE].nsessions
       || ctx->sess_list[AVM_PA_LIST_DEAD].nsessions)
      mod_timer(&ctx->tick_timer, next_tick);

   spin_unlock(&avm_pa_lock);
}


static void
pa_session_gc_once(void)
{
   spin_lock_bh(&avm_pa_lock);

   pa_tick_session_gc_nolock(1);

   spin_unlock_bh(&avm_pa_lock);
}

/*------------------------------------------------------------------------ */

/* must be called inside rcu read side */
static inline void pa_tbf_forward(struct avm_pa_session *session, PKT *pkt)
{
   /* Set the session_handle to be sure, although it's not always used during transmit. */
   AVM_PKT_INFO(pkt)->session_handle  = session->session_handle;
   AVM_PKT_INFO(pkt)->session_uniq_id = session->uniq_id;

   pa_do_modify_and_send(session, pkt);
}

/* ------------------------------------------------------------------------ */

static void pa_napi_schedule(void *data)
{
   struct avm_pa_rxq *rxq = data;

   rxq->rx_rps_isr++;
   __napi_schedule(&rxq->napi);
}

static int
pa_rxq_process(struct avm_pa_rxq *rxq, int budget)
{
   struct sk_buff       *skb, *nskb, *tail;
   int i;

   rxq->rx_process++;
   /* Atomically clear the skb queue to allow for concurrent enqueue
    * on the emptied queue (again, based on llist).
    *
    * Afterwards, the skb queue must be reversed because enqueue has "add_head"
    * semantics.
    */
   skb = xchg(&rxq->rq, NULL);

    /* skb_queue_peek() may return NULL but for __skb_queue_after() we need *something*.
     * For an empty list the list itself can be used which makes __skb_queue_after()
     * behave as skb_queue_head().
     */
   tail = rxq->lq.prev;
   while (skb) {
      nskb = skb->next;
      /* lq is in-order. By queing after the old tail we do a "add_tail" while also
       * reversing the rq.
       */
      __skb_queue_after(&rxq->lq, tail, skb);
      skb = nskb;
   }

   /* Now walk the reversed list that is back in the order of received skbs */
   rcu_read_lock();
   for (i = 0; i < budget && (skb = __skb_dequeue(&rxq->lq)); i++) {
      pa_do_modify_and_send(NULL, skb);
   }
   rcu_read_unlock();

   return i;
}

static int
pa_napi_process(struct napi_struct *napi, int budget)
{
   struct avm_pa_rxq *rxq;
   int ret;

   rxq = container_of(napi, struct avm_pa_rxq, napi);
   ret = pa_rxq_process(rxq, budget);

   /* Only if we depleted the input queue (rxq->rq) we want to stop,
    * otherwise continue running for so long.
    *
    * If we consume the entire budget we'll be scheduled by the core.
    */
   if (ret == 0)
      clear_bit(NAPI_STATE_SCHED, &napi->state);
   else if (ret != budget)
      __napi_schedule(napi);

   return ret;
}

static void
pa_napi_sched_task(unsigned long data)
{
   struct avm_pa_rxq    *rxq = (struct avm_pa_rxq *) data;

   rxq->rx_napi_sched++;
   /* NAPI scheduling is two-fold and we can execute the first part here
    * to evade an IPI.
    * Actually raising the softirq must be done on the target cpu
    * and is done by __napi_schedule().
    */
   if (test_and_set_bit(NAPI_STATE_SCHED, &rxq->napi.state))
      return;

#ifdef CONFIG_AVM_PA_RPS
   /* This eventually calls pa_napi_process() above through NAPI on another CPU.
    *
    * On older kernels, carefully avoid issuing an ipi if there is one in-flight
    * already. In this case the async call would block, risking a dead lock.
    * smp_call_function_single_async() sets csd.flags to CSD_FLAG_LOCK
    * internally to detect repeated calls itself, so we just re-use that
    * instead of maintaining our own guard.
    *
    * Beginning with 5.7 the kernel checks this on its own and indicates
    * in-flight ipi requests by returning -EBUSY.
    */
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0)
   if (rxq->csd.flags)
      return;
#endif

   smp_call_function_single_async(rxq->cpu, &rxq->csd);
#else
   __napi_schedule(&rxq->napi);
#endif
}

static inline void
pa_rxq_forward(struct avm_pa_session *session, struct sk_buff *skb)
{
   struct avm_pa_global __maybe_unused *ctx = &pa_glob;
   struct avm_pa_rxq    *rxq;
   struct sk_buff       *qhead;

   /* Set the session_handle to be sure, although it's not always used during transmit. */
   AVM_PKT_INFO(skb)->session_handle  = session->session_handle;
   AVM_PKT_INFO(skb)->session_uniq_id = session->uniq_id;

#ifdef CONFIG_AVM_PA_RPS
   /* rps is done only in certain conditions. It's not done when
    * 1) rps globally disabled
    * 2) the session is a bridged session:
    *    - no true flow hash recorded in the session
    *    - questionable effect since bsessions are so cheap
    * 3) rps already done once
    *    - in dual session data paths rps may be done in the first
    *      session already
    */
   if (ctx->rps_enabled && !session->bsession && !AVM_PKT_INFO(skb)->rps_done) {
      u32 tcpu;
      /* Don't do rxq twice, e.g. if there are two sessions for a packet. */
      AVM_PKT_INFO(skb)->rps_done = 1;
      /* Select CPU via session hash, giving good enough distribution (hopefully) */
      if (!session->rps_cpu)
         tcpu = session->ingress.hash & (CONFIG_AVM_PA_RPS_QUEUES-1);
      else
         tcpu = session->rps_cpu - 1;
      /* Ensure new CPU is online and usable. */
      tcpu = cpumask_next(tcpu-1, cpu_online_mask);
      if (unlikely(tcpu >= min_t(u32, CONFIG_AVM_PA_RPS_QUEUES, nr_cpu_ids)))
         tcpu = cpumask_first(cpu_online_mask);

      BUG_ON(tcpu >= CONFIG_AVM_PA_RPS_QUEUES);
      rxq = per_cpu_ptr(&pa_rxq, tcpu);
   } else
#endif
      /* No RPS, just the queue */
      rxq = this_cpu_ptr(&pa_rxq);

   if (unlikely(skb_queue_len_lockless(&rxq->lq) >= CONFIG_AVM_PA_NAPI_MAX_BACKLOG)) {
      rxq->rx_dropped++;
      kfree_skb(skb);
      return;
   }

   /* Based on llist_add_batch, inline for performance. As this has
    * add_head semantics the consumer must reverse the list to avoid
    * packet reordering.
    */
   do {
      skb->next = qhead = READ_ONCE(rxq->rq);
   } while (cmpxchg(&rxq->rq, qhead, skb) != qhead);

   rxq->rx_enqueued++;
   /* We're typically (though not always) in NAPI context here, thus the
    * tasklet deferall allows to complete the current batch first.
    *
    * Especially in the RPS case, which involves an ipi (a hardirq on the other CPU,
    * followed immediately by sofitrqs including NAPI), a __napi_schedule() here
    * would result in the NAPI callback more often than necessary. By deferring that
    * through a tasklet we allow the current batch of packets to complete before
    * triggering an expensive ipi.
    */
   tasklet_schedule(&rxq->sched_task);
}

/* must be called inside rcu read side */
static inline void
pa_forward(struct avm_pa_session *session, struct sk_buff *skb)
{
   struct avm_pa_global *ctx = &pa_glob;

   if (!ctx->rxq_enabled || (test_bit(PA_S_REALTIME, &session->flags) && !in_irq()))
      pa_tbf_forward(session, skb);
   else
      pa_rxq_forward(session, skb);
}

/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */

void avm_pa_rx_channel_suspend(avm_pid_handle pid_handle)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, pid_handle);
   pid->rx_channel_stopped = 1;
}
EXPORT_SYMBOL(avm_pa_rx_channel_suspend);

void avm_pa_rx_channel_resume(avm_pid_handle pid_handle)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, pid_handle);
   pid->rx_channel_stopped = 0;
}
EXPORT_SYMBOL(avm_pa_rx_channel_resume);

void avm_pa_rx_channel_packet_not_accelerated(avm_pid_handle pid_handle,
                                           struct sk_buff *skb)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, pid_handle);

   if (avm_pa_pid_receive(pid_handle, skb) == AVM_PA_RX_ACCELERATED)
      return;

   if (likely(pid && pid->ecfg.rx_slow)) {
      (*pid->ecfg.rx_slow)(pid->ecfg.rx_slow_arg, skb);
      return;
   }
   PKT_FREE(skb);
   ctx->stats.rx_channel_no_rx_slow++;
}
EXPORT_SYMBOL(avm_pa_rx_channel_packet_not_accelerated);

void avm_pa_tx_channel_accelerated_packet(avm_pid_handle pid_handle,
                                          avm_session_handle session_handle,
                                          struct sk_buff *skb)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_session *session;

   rcu_read_lock_bh();

   session = pa_session_get(session_handle);
   if (session && session->on_list == AVM_PA_LIST_ACTIVE) {
      if (   avm_pa_first_egress(session)->type == avm_pa_egresstype_xfrm
          && avm_pa_first_egress(session)->xfrm.x->km.state != XFRM_STATE_VALID) {
         pa_session_flush(session, "xfrm state invalid");
         goto drop;
      }
      /* Set some important skb fields, as pa_do_modify_non_l2() would have done */
      skb_vlan_tag_clear(skb);
      skb_reset_mac_header(skb);
      if (session->bsession == 0) {
         skb_pull(skb, ETH_HLEN);
         skb_reset_network_header(skb);
         if (session->mod.outer_ipversion == 4) {
            pskb_trim(skb, ntohs(PA_IPTOTLEN(skb->data)));
            skb_set_transport_header(skb, session->mod.v4_mod.iphlen);
         } else if (session->mod.outer_ipversion == 6) {
            pskb_trim(skb, ntohs(PA_IP6_PAYLOADLEN(skb->data)) + sizeof(struct ipv6hdr));
            skb_set_transport_header(skb, sizeof(struct ipv6hdr));
         }
      }
      AVM_PKT_INFO(skb)->seen = 1;
      AVM_PKT_INFO(skb)->already_modified = 1;
      pa_forward(session, skb);
   } else {
drop:
      ctx->stats.tx_channel_dropped++;
      PKT_FREE(skb);
   }

   rcu_read_unlock_bh();
}
EXPORT_SYMBOL(avm_pa_tx_channel_accelerated_packet);


/* Validate packet and hand it over to the transmit path.
 *
 * Return code:
 * - AVM_PA_RX_OK if the packet could be accelerated but session became
 *   invalid/stale. It would make sense to evaluate the packet for a new session.
 * - AVM_PA_RX_BYPASS if the packet is cannot be accelerated no matter what.
 */
static int pa_try_accelerate(struct avm_pa_pid *pid, struct avm_pa_session *session,
                             bool bsession, struct sk_buff *skb)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pkt_info *info = AVM_PKT_INFO(skb);
   struct avm_pa_pkt_match *match = &session->ingress;
   int header_offset;
   int headlen;
   char *head;

   /* As long as we don't fully grab the packet we must not move skb->data. But
    * session information is based on the actual header and packet size validation
    * can only use session information. Therefore we must account for the offset.
    */
   if (pid->ingress_framing == avm_pa_framing_dev)
      header_offset = skb->data - skb_mac_header(skb);
   else if (pid->ingress_framing == avm_pa_framing_ipdev)
      header_offset = skb->data - skb_network_header(skb);
   else
      header_offset = 0;

   if (bsession)
      goto bridged;

   head = skb->data - header_offset;
   headlen = skb_headlen(skb) + header_offset;

   if (   avm_pa_first_egress(session)->type == avm_pa_egresstype_xfrm
       && avm_pa_first_egress(session)->xfrm.x->km.state != XFRM_STATE_VALID) {
      pa_session_flush(session, "xfrm state invalid");
      return AVM_PA_RX_OK;
   }

   if (test_and_clear_bit(PA_S_PRIOACK_DONE, &session->flags)) {
      /* This is the first packet after completed prioack_check. Force slow
       * path so that a new session (based on the new priority) can be
       * created. Furthermore, that session may use a different egress if
       * that can be offloaded to hw. Flag the packet such that prioack won't
       * be attempted again.
       */
      if (test_bit(PA_S_PRIOACK_ACK, &session->flags))
         AVM_PKT_INFO(skb)->prioack_result = PRIOACK_ACK;
      else
         AVM_PKT_INFO(skb)->prioack_result = PRIOACK_NORMAL;
      pa_session_flush(session, "prioack done");
      return AVM_PA_RX_OK;
   }

   if (match->pkttype & AVM_PA_PKTTYPE_LISP) {
      void *slhdr = LISPDATAHDR(match);
      void *ilhdr = head + match->lisp_offset;
      if (memcmp(slhdr, ilhdr, LISP_DATAHDR_SIZE) != 0) {
         pa_session_flush(session, "lisp data header changed");
         ctx->stats.rx_lispchanged++;
         return AVM_PA_RX_OK;
      }
   }

   /* Too small packets or too little headroom are systematically wrong.
    * They must be fixed in the at the PID side. Therefore the errors are always
    * printed.
    */
   if (headlen < match->full_hdrlen) {
      ctx->stats.rx_too_small++;
      if (net_ratelimit())
         pr_err("avm_pa: pid %u (%s): too small packet: %d (need %d)\n",
                pid->pid_handle, pid->cfg.name,
                headlen, match->full_hdrlen);
      return AVM_PA_RX_BYPASS;
   }

   if ((skb_headroom(skb) - header_offset) < session->needed_headroom) {
      ctx->stats.rx_headroom_too_small++;
      if (net_ratelimit())
         pr_err("avm_pa: pid %u (%s): too little headroom: %d (need %d)\n",
                pid->pid_handle, pid->cfg.name,
                skb_headroom(skb) - header_offset, session->needed_headroom);
      return AVM_PA_RX_BYPASS;
   }

   if (pa_egress_size_check(session, skb, header_offset) < 0) {
      ctx->stats.rx_df++;
#if AVM_PA_TRACE
      if (ctx->dbgtrace)
         pr_debug("avm_pa: %lu - avm_pa_pid_receive(%s) - %s\n",
                  pkt_uniq_id(skb), pid->cfg.name, "size problem");
#endif
      return AVM_PA_RX_BYPASS;
   }

   if (AVM_PA_PKTTYPE_IPPROTO(match->pkttype) == IPPROTO_TCP) {
      /* fast check for tcp control flags */
      struct tcphdr *tcph = (struct tcphdr *) (head + match->l4_offset);
      /* set tcp_nodata for pa_transmit()  */
      info->tcp_nodata = pa_match_is_tcp_nodata(match, head);
      if (PA_TCP_FIN_OR_RST(tcph) || session->timeout == 0) {
         /* Fin terminates sessions, all further packets (including acks for
          * fin) take the slow path. Only set the timeout to prevent
          * session creation by the very last ack. However, when the tcp
          * socket is reused (indicated by a new syn) quickly, we must create
          * a new session for it immediately. */
         if (PA_TCP_SYN(tcph))
            pa_session_flush(session, "new flow");
         else
            info->do_not_accelerate = 1;
         session->timeout = 0;
         pa_session_update(session);
         return AVM_PA_RX_BYPASS;
      }
   }

bridged:
   pa_session_update(session);

   if (ctx->fw_disabled) {
      if (session->timeout == 0)
         pa_session_flush(session, "fast timeout");
#if AVM_PA_TRACE
      if (ctx->dbgtrace)
         pr_debug("avm_pa: %lu - avm_pa_pid_receive(%s) - %s\n",
                  pkt_uniq_id(skb), pid->cfg.name, "forward disabled");
#endif
         return AVM_PA_RX_OK;
   }

#if AVM_PA_TRACE
   if (ctx->dbgtrace)
      pr_debug("avm_pa: %lu - avm_pa_pid_receive(%s) - %s\n",
               pkt_uniq_id(skb), pid->cfg.name, "accelerated");
#endif

   if (skb_has_frag_list(skb)) {
      ctx->stats.rx_frag_list += 1;
   }

   /* The packet is finally ours and we are free to move skb->data */
   __skb_push(skb, header_offset);

   pa_forward(session, skb);

   return AVM_PA_RX_ACCELERATED;
}

int avm_pa_pid_session_receive(avm_pid_handle pid_handle,
                               avm_session_handle session_handle,
                               struct sk_buff *skb)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_session *session;
   bool fw_ok;
   int ret = AVM_PA_RX_OK;

   rcu_read_lock_bh();

   ctx->stats.rxfw_pkts++;
   session = pa_session_get(session_handle);

   fw_ok = true;
   if ( !session
      || session->on_list != AVM_PA_LIST_ACTIVE
      || session->ingress_pid_handle != pid_handle)
      fw_ok = false;

   if (likely(fw_ok))
      ret = pa_try_accelerate(PA_PID(ctx, pid_handle), session, !!session->bsession, skb);

   rcu_read_unlock_bh();

   if (unlikely(ret != AVM_PA_RX_ACCELERATED))
      ctx->stats.rxfw_bypass++;

   return ret;
}
EXPORT_SYMBOL(avm_pa_pid_session_receive);

/* ------------------------------------------------------------------------ */
/* -------- exported functions -------------------------------------------- */
/* ------------------------------------------------------------------------ */

int avm_pa_is_enabled(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   return !ctx->disabled;
}
EXPORT_SYMBOL(avm_pa_is_enabled);

void avm_pa_get_stats(struct avm_pa_stats *stats)
{
   struct avm_pa_global *ctx = &pa_glob;
   memcpy(stats, &ctx->stats, sizeof(struct avm_pa_stats));
}
EXPORT_SYMBOL(avm_pa_get_stats);

void avm_pa_reset_stats(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   memset(&ctx->stats, 0, sizeof(struct avm_pa_stats));
}
EXPORT_SYMBOL(avm_pa_reset_stats);

void avm_pa_dev_init(struct avm_pa_dev_info *devinfo)
{
   memset(devinfo, 0, sizeof(struct avm_pa_dev_info));
}
EXPORT_SYMBOL(avm_pa_dev_init);

void avm_pa_reset_skb(struct sk_buff *skb)
{
   struct avm_pa_pkt_info *info = AVM_PKT_INFO(skb);

   /* Save seen flag. We must remember this, e.g. to disable
    * RPS for the second part of dual-session data flows.
    */
   info->reset = info->seen;
   info->seen = 0;
   /* The remainder of the packet info is reset by the next
    * avm_pa_pid_receive() (if any) */
}
EXPORT_SYMBOL(avm_pa_reset_skb);

static int avm_pa_pid_receive(avm_pid_handle pid_handle, PKT *pkt)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, pid_handle);
   struct avm_pa_pkt_info *info;
   struct avm_pa_session *session;
   struct vlan_ethhdr *ethh;
   int rc;

   if (ctx->disabled)
      return AVM_PA_RX_OK;

   avm_simple_profiling_skb(0, pkt);

   info = AVM_PKT_INFO(pkt);

   if (info->seen)
      return AVM_PA_RX_OK;

   ctx->stats.rx_pkts++;

   rcu_read_lock_bh();

   info->seen = 1;
   info->ingress_pid_handle = pid_handle;
   if (info->reset) {
      info->ingress_vpid_handle = 0;
      info->egress_pid_handle = 0;
      info->egress_vpid_handle = 0;
      info->is_accelerated = 0;
      info->prioack_result = PRIOACK_UNKOWN;
      info->routed = info->shaped = 0;
      info->session_handle = 0;
      info->do_not_accelerate = 0;
      info->already_modified = 0;
#ifdef CONFIG_AVM_PA_RPS
      info->rps_done = 1;
#endif /* CONFIG_AVM_PA_RPS */
      avm_pa_skb_sg_reset(pkt);
   }

   if ((ethh = pa_get_ethhdr(pid->ingress_framing, pkt)) != 0) {
      if ((session = pa_bsession_search(pid, ethh, skb_vlan_tag_get(pkt))) != 0) {
         ctx->stats.rx_match++;
         rc = pa_try_accelerate(pid, session, true, pkt);
         goto out_unlock;
      }
      if ((pid->ecfg.flags & AVM_PA_PID_FLAG_NO_PID_CHANGED_CHECK) == 0) {
         u16 vlan_id = pa_get_vlan_id(pid->ingress_framing, pkt);
         pa_check_and_handle_ingress_pid_change(ethh->h_source, pid_handle, vlan_id);
      }
   }

   rc = pa_set_pkt_match(pid->ingress_framing,
                         pid->ecfg.flags & AVM_PA_PID_FLAG_HSTART_ON_INGRESS ? info->hstart : 0,
                         pkt, &info->match, 0);

   if (rc == AVM_PA_RX_OK) {

      ctx->stats.rx_search++;
      if ((session = pa_session_search(pid, &info->match)) == 0) {
#if AVM_PA_TRACE
         if (ctx->dbgtrace) {
            pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_pid_receive(%s) - %s\n",
                                  pkt_uniq_id(pkt), pid->cfg.name,
                                  "no session");
            if (ctx->dbgnosession) {
               char buf[64];
               data2hex(PKT_DATA(pkt), PKT_LEN(pkt), buf, sizeof(buf));
               pa_printk(KERN_DEBUG, "Data           : %s\n", buf);
               pa_show_pkt_info(info, pa_printk, KERN_DEBUG);
            }
         }
#endif
         if (ctx->fw_disabled || avm_pa_capture_running()) {
#if AVM_PA_TRACE
            if (ctx->dbgtrace)
               pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_pid_receive(%s) - %s\n",
                     pkt_uniq_id(pkt), pid->cfg.name, "forward disabled");
#endif
            info->do_not_accelerate = 1;
         }
         info->uniq_id = atomic_inc_return(&ctx->ingress_uniq_id);
         rc = AVM_PA_RX_OK;
         goto out_unlock;
      }

#ifdef CONFIG_BLOG
      if (test_bit(PA_S_PRIOACK_CHECK, &session->flags))
         blog_skip(pkt, blog_skip_reason_unknown);
#endif
      ctx->stats.rx_match++;
      rc = pa_try_accelerate(pid, session, false, pkt);
      goto out_unlock;
   }

   if (ctx->dbgmatch) {
      char buf[64];
      pa_printk(KERN_DEBUG, "---------->\n");
      pa_printk(KERN_DEBUG, "RC             : %d %s\n", rc, rc2str(rc));
      data2hex(PKT_DATA(pkt), PKT_LEN(pkt), buf, sizeof(buf));
      pa_printk(KERN_DEBUG, "Data           : %s\n", buf);
      pa_show_pkt_info(info, pa_printk, KERN_DEBUG);
      pa_printk(KERN_DEBUG, "<----------\n");
   }
   pa_reset_match(&info->match);
   switch (rc) {
      case AVM_PA_RX_TTL:
         ctx->stats.rx_ttl++;
         break;
      case AVM_PA_RX_BROADCAST:
         ctx->stats.rx_broadcast++;
         break;
      default:
         ctx->stats.rx_bypass++;
         break;
   }
#if AVM_PA_TRACE
   if (ctx->dbgtrace)
      pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_pid_receive(%s) - %s (rc %d)\n",
                            pkt_uniq_id(pkt), pid->cfg.name,
                            "bypass", rc);
#endif

out_unlock:
   rcu_read_unlock_bh();
   return rc;
}

static inline void avm_pa_vpid_snoop_receive(avm_vpid_handle handle, PKT *pkt)
{
#if AVM_PA_TRACE
   struct avm_pa_global *ctx = &pa_glob;
   if (ctx->dbgtrace) {
      struct avm_pa_vpid *vpid = PA_VPID(ctx, handle);
      pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_vpid_snoop_receive(%s)\n",
                            pkt_uniq_id(pkt), vpid->cfg.name);
   }
#endif
   AVM_PKT_INFO(pkt)->ingress_vpid_handle = handle;
}


int avm_pa_dev_local_out(struct avm_pa_dev_info *devinfo, PKT *pkt)
{
   int rc = AVM_PA_RX_OK;

   if (devinfo->pid_handle) {
      AVM_PKT_INFO(pkt)->realtime = 1; /* avoid rps and other queues */
      rc = avm_pa_pid_receive(devinfo->pid_handle, pkt);
      if (rc == AVM_PA_RX_ACCELERATED)
         return rc;
      /* Do not create local out sessions for multicast, it may prevent
       * local delivery.
       * See JZ-99559: FRITZ!Fon findet den FRITZ!Media Server nicht
       *
       * Unicast local-to-local is not permitted either, see snoop_transmit.
       */
      if (AVM_PKT_INFO(pkt)->match.casttype != AVM_PA_IS_UNICAST)
         avm_pa_do_not_accelerate(pkt);
   }
   if (devinfo->vpid_handle)
      avm_pa_vpid_snoop_receive(devinfo->vpid_handle, pkt);

   return rc;
}

int avm_pa_dev_receive(struct avm_pa_dev_info *devinfo, PKT *pkt)
{
   int rc = AVM_PA_RX_OK;
   if (devinfo->pid_handle) {
      rc = avm_pa_pid_receive(devinfo->pid_handle, pkt);
      if (rc == AVM_PA_RX_ACCELERATED)
         return rc;
   }
   if (devinfo->vpid_handle)
      avm_pa_vpid_snoop_receive(devinfo->vpid_handle, pkt);
   return rc;
}
EXPORT_SYMBOL(avm_pa_dev_receive);

int avm_pa_dev_pid_receive(struct avm_pa_dev_info *devinfo, PKT *pkt)
{
   struct avm_pa_global *ctx = &pa_glob;
   int rc = AVM_PA_RX_OK;

   avm_simple_profiling_skb(0, pkt);

   if (devinfo->pid_handle) {
      struct avm_hardware_pa *hwpa = &ctx->hardware_pa;
      /* We must be careful here since try_to_accelerate might be module code
       * that could be unloaded our the back. Therefore we must get an explicit
       * ref on the hardware_pa since we aren't tied to a session yet.
       */
      if (hwpa->try_to_accelerate && !ctx->hw_ppa_disabled && pa_hw_pa_get()) {
         struct avm_pa_pid *pid = PA_PID(ctx, devinfo->pid_handle);
         if (pid->rx_channel_activated) {
            if (pid->rx_channel_stopped == 0) {
               rc = hwpa->try_to_accelerate(devinfo->pid_handle, pkt);
            } else {
               ctx->stats.rx_channel_stopped++;
            }
         }
         pa_hw_pa_put();
      }
      if (rc == AVM_PA_RX_OK) {
         /* Try to lookup session unless try_to_accelerate() determines invalid packet. */
         rc = avm_pa_pid_receive(devinfo->pid_handle, pkt);
      } else if (rc >= AVM_PA_RX_BYPASS) {
         /* Other avm_pa_pid_receive() calls down the road shall not inspect the packet. */
         AVM_PKT_INFO(pkt)->seen = 1;
      } else if (rc < 0) {
         /* packet dropped due to an error */
         rc = AVM_PA_RX_STOLEN;
      }
   }
   return rc;
}
EXPORT_SYMBOL(avm_pa_dev_pid_receive);

void avm_pa_dev_vpid_snoop_receive(struct avm_pa_dev_info *devinfo, PKT *pkt)
{
   if (devinfo->vpid_handle)
      avm_pa_vpid_snoop_receive(devinfo->vpid_handle, pkt);
}
EXPORT_SYMBOL(avm_pa_dev_vpid_snoop_receive);

void avm_pa_mark_routed(PKT *pkt)
{
   AVM_PKT_INFO(pkt)->routed = 1;
#if AVM_PA_TRACE
   if (pa_glob.dbgtrace)
      pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_mark_routed (ingress %d)\n",
                            pkt_uniq_id(pkt),
                            AVM_PKT_INFO(pkt)->ingress_pid_handle);
#endif
}
EXPORT_SYMBOL(avm_pa_mark_routed);

void avm_pa_mark_shaped(PKT *pkt)
{
   AVM_PKT_INFO(pkt)->shaped = 1;
#if AVM_PA_TRACE
   if (pa_glob.dbgtrace)
      pa_printk(KERN_DEBUG, "avm_pa: %lu - %s (ingress %d)\n",
                            pkt_uniq_id(pkt),
                            __func__,
                            AVM_PKT_INFO(pkt)->ingress_pid_handle);
#endif
}
EXPORT_SYMBOL(avm_pa_mark_shaped);

void avm_pa_skb_set_rps(struct sk_buff *skb,
                        const struct cpumask *allow,
                        const struct cpumask *fallback)
{
#ifdef CONFIG_AVM_PA_RPS
   AVM_PKT_INFO(skb)->rps_override = 1;
   AVM_PKT_INFO(skb)->rps_allowed_mask = *allow;
   AVM_PKT_INFO(skb)->rps_fallback_mask = *fallback;
#if AVM_PA_TRACE
   if (pa_glob.dbgtrace)
      pa_printk(KERN_DEBUG, "avm_pa: %lu - %s (ingress %d)\n",
                            pkt_uniq_id(skb),
                            __func__,
                            AVM_PKT_INFO(skb)->ingress_pid_handle);
#endif
#endif
}
EXPORT_SYMBOL(avm_pa_skb_set_rps);

void avm_pa_use_protocol_specific_session(PKT *pkt)
{
   AVM_PKT_INFO(pkt)->use_protocol_specific = 1;
#if AVM_PA_TRACE
   if (pa_glob.dbgtrace)
      pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_use_protocol_specific_session (ingress %d)\n",
                            pkt_uniq_id(pkt),
                            AVM_PKT_INFO(pkt)->ingress_pid_handle);
#endif
}
EXPORT_SYMBOL(avm_pa_use_protocol_specific_session);

void avm_pa_do_not_accelerate(PKT *pkt)
{
   AVM_PKT_INFO(pkt)->do_not_accelerate = 1;
#if AVM_PA_TRACE
   if (pa_glob.dbgtrace)
      pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_do_not_accelerate\n",
                            pkt_uniq_id(pkt));
#endif
}
EXPORT_SYMBOL(avm_pa_do_not_accelerate);

void avm_pa_set_hstart(PKT *pkt, unsigned int hstart)
{
   AVM_PKT_INFO(pkt)->hstart = hstart;
}
EXPORT_SYMBOL(avm_pa_set_hstart);

static inline void avm_pa_vpid_snoop_transmit(avm_vpid_handle handle, PKT *pkt)
{
   struct avm_pa_pkt_info *info = AVM_PKT_INFO(pkt);
   if (info->egress_vpid_handle == 0)
      info->egress_vpid_handle = handle;
#if AVM_PA_TRACE
   if (pa_glob.dbgtrace) {
      struct avm_pa_global *ctx = &pa_glob;
      struct avm_pa_vpid *vpid = PA_VPID(ctx, handle);
      pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_vpid_snoop_transmit(%s)\n",
                            pkt_uniq_id(pkt), vpid->cfg.name);
   }
#endif
}

static inline unsigned int pa_calc_tack_priority(struct avm_pa_pkt_info *info,
                                                 struct avm_pa_pid *epid,
                                                 unsigned int orig_priority)
{
   unsigned int newprio = orig_priority;
   if (AVM_PA_PKTTYPE_IPPROTO(info->match.pkttype) == IPPROTO_TCP) {
      unsigned int prio;
      prio = avm_pa_pid_tack_enabled(epid) ? avm_pa_pid_tack_prio(epid, orig_priority) : 0;
      if (prio != 0 && prio < newprio) newprio = prio;
   }
   return newprio;
}

static inline unsigned int pa_calc_start_priority(struct avm_pa_pkt_info *info,
                                                  struct avm_pa_pid *epid,
                                                  unsigned int orig_priority)
{
   /*
    * We calculate the priority to use, when session is created.
    * We assume it's an TGET or TACK session. The final decision will be made in
    * pa_session_prioack_check().
    *   2016-10-14 calle
    */
   unsigned int newprio = orig_priority;
   if (AVM_PA_PKTTYPE_IPPROTO(info->match.pkttype) == IPPROTO_TCP) {
      unsigned int prio;
      prio = avm_pa_pid_tget_enabled(epid) ? avm_pa_pid_tget_prio(epid, orig_priority) : 0;
      if (prio != 0 && prio < newprio) newprio = prio;
      prio = avm_pa_pid_tack_enabled(epid) ? avm_pa_pid_tack_prio(epid, orig_priority) : 0;
      if (prio != 0 && prio < newprio) newprio = prio;
   }
   return newprio;
}

static inline int avm_pa_pid_snoop_transmit(avm_pid_handle pid_handle,
                                            PKT *pkt,
                                            enum avm_pa_egresstype etype, void *edata)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_data   *pd  = &pa_data;
   struct avm_pa_pkt_info *info = AVM_PKT_INFO(pkt);
   struct avm_pa_session *session, *take_over_session;
   struct avm_pa_egress *egress;
   struct avm_pa_pkt_match match;
   struct avm_pa_pid *ipid, *epid;
   struct avm_pa_vpid *evpid;
   struct vlan_ethhdr *ethh;
   int headroom;
   char buf[64];
   int ret;
   struct sock *sk = NULL;
   struct xfrm_state *x = NULL;
   bool tcp_syn, tcp_ack, tcp_fin, tcp_nodata;
   u16 vlan_id, ingress_vlan_id, is_pvid;

#if AVM_PA_TRACE
   if (ctx->dbgtrace) {
      epid = PA_PID(ctx, pid_handle);
      pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_pid_snoop_transmit(%s)\n",
                            pkt_uniq_id(pkt), epid->cfg.name);
   }
#endif

   if (ctx->disabled)
      return AVM_PA_TX_OK;

   tcp_syn = tcp_fin = tcp_ack = tcp_nodata = false;
   rcu_read_lock_bh();

   epid = PA_PID(ctx, pid_handle);

   if (info->do_not_accelerate) {
      ctx->stats.tx_bypass++;
      if (ctx->dbgnosession) {
         pa_printk(KERN_DEBUG, "Bypass         : do not accelerate\n");
         data2hex(PKT_DATA(pkt), PKT_LEN(pkt), buf, sizeof(buf));
         pa_printk(KERN_DEBUG, "Data           : %s\n", buf);
         pa_printk(KERN_DEBUG, "---------------\n");
      }
      goto tx_bypass;
   }

   if (info->ingress_pid_handle == 0) {
      ctx->stats.tx_local++;
      goto tx_bypass;
   }

   if (etype == avm_pa_egresstype_local) {
      sk = edata;
      if (info->ingress_pid_handle == pid_handle) {
         /* Traffic through "lo" interface triggers sessions
          * but we don't want to waste sessions on local IPC traffic.
          */
         ctx->stats.tx_loopback++;
         goto tx_bypass;
      }
   } else if (etype == avm_pa_egresstype_xfrm) {
      x = edata;
      /* skip if no xfrm_state is given or there's an additional udp encap */
      if (!x || x->encap || !IS_ENABLED(CONFIG_XFRM)) {
         ctx->stats.tx_bypass++;
         goto tx_bypass;
      }
   }

   ipid = PA_PID(ctx, info->ingress_pid_handle);
   ethh = pa_get_ethhdr(epid->egress_framing, pkt);

   take_over_session = NULL;
   if (info->session_handle != 0) {
      BUG_ON(info->egress_pid_handle == 0);
      if (info->egress_pid_handle != pid_handle) {
         take_over_session = PA_SESSION(pd, info->session_handle);
      } else {
         ctx->stats.tx_already++;
         goto tx_bypass;
      }
   }

   ret = pa_egress_precheck(epid, pkt, &info->match, &match);
   if (ret != AVM_PA_RX_OK) {
      ctx->stats.tx_bypass++;
      if (ctx->dbgnosession) {
         pa_printk(KERN_DEBUG, "Bypass         : precheck failed (%d)\n", ret);
         data2hex(PKT_DATA(pkt), PKT_LEN(pkt), buf, sizeof(buf));
         pa_printk(KERN_DEBUG, "Data           : %s\n", buf);
         pa_printk(KERN_DEBUG, "<- pkt_info  ->\n");
         pa_show_pkt_info(info, pa_printk, KERN_DEBUG);
         pa_printk(KERN_DEBUG, "<- pkt_match ->\n");
         pa_show_pkt_full_match(&match, 0, pa_printk, KERN_DEBUG);
         pa_printk(KERN_DEBUG, "---------------\n");
      }
      goto tx_bypass;
   }

   /* Bypass SYN packets but allow session creation by SYN+ACK (no payload),
    * such that the first data segment can be accelerated. After TCP handshake
    * any packet except FIN (or RST) can create sessions.
    */
   if (AVM_PA_PKTTYPE_IPPROTO(match.pkttype) == IPPROTO_TCP) {
      struct tcphdr *tcph;
      u8 *data;

      switch (epid->egress_framing) {
      case avm_pa_framing_ipdev:
      case avm_pa_framing_ptype: data = skb_network_header(pkt); break;
      case avm_pa_framing_dev:   data = skb_mac_header(pkt); break;
      default:                   data = pkt->data; break;
      }

      tcph = (struct tcphdr *) (data + match.l4_offset);
      tcp_syn = PA_TCP_SYN(tcph);
      tcp_fin = PA_TCP_FIN_OR_RST(tcph);
      tcp_ack = PA_TCP_ACK(tcph);
      tcp_nodata = pa_match_is_tcp_nodata(&match, data);
      if ((tcp_syn && !tcp_ack) || tcp_fin) {
         ctx->stats.tx_bypass++;
         if (ctx->dbgnosession) {
            pa_printk(KERN_DEBUG, "Bypass         : %s\n", tcp_fin ? "Fin" : "Syn");
            data2hex(PKT_DATA(pkt), PKT_LEN(pkt), buf, sizeof(buf));
            pa_printk(KERN_DEBUG, "Data           : %s\n", buf);
            pa_printk(KERN_DEBUG, "<- pkt_info  ->\n");
            pa_show_pkt_info(info, pa_printk, KERN_DEBUG);
            pa_printk(KERN_DEBUG, "---------------\n");
         }
         goto tx_bypass;
      }
   }
   
   vlan_id = pa_get_vlan_match(&match) & VLAN_VID_MASK;
   ingress_vlan_id = pa_get_vlan_match(&info->match) & VLAN_VID_MASK;
   /* If vlan is the same ingress and egress then we consider this "non-pvid" */
   is_pvid = vlan_id != ingress_vlan_id;

   /* This won't find bridge sessions which will create duplicate sessions.
    * Well, temporarly as they don't get past pa_session_activate().
    *
    * In case of pid take over, this would find the existing session, thus not
    * proceed with session creation. But we do need to try that to decide
    * whether to take over or not.
    */
   if (take_over_session || !(session = pa_session_search(ipid, &info->match))) {
      int (*probe_session)(struct avm_pa_session *avm_session);
      int (*add_session)(struct avm_pa_session *avm_session);
      int (*add_session_skb)(struct avm_pa_session *avm_session, struct sk_buff *skb);
      int hw_ok;
      bool __maybe_unused rps_ok;
      /* Grab temporary references for use during CREATE state.
       * If the session fails to reach ACTIVE state, then pa_session_kill() will take
       * care of these. Otherwise pa_session_activate() will render them permanent.
       *
       * In any case we don't have to release them ourselves once we have both.
       */
      avm_pid_handle ingress_pid_handle = pa_pid_get(info->ingress_pid_handle);
      avm_pid_handle egress_pid_handle = pa_pid_get(pid_handle);

      if (likely(ingress_pid_handle && egress_pid_handle) || take_over_session)
         session = pa_session_alloc(&info->match);

      if (!session) {
         /* Maybe we couldn't ref a PID, release the other one */
         if (ingress_pid_handle)
            pa_pid_put(ingress_pid_handle);
         if (egress_pid_handle)
            pa_pid_put(egress_pid_handle);

         if (sk) ctx->stats.local_sess_error++;
         else ctx->stats.tx_sess_error++;
         ret = AVM_PA_TX_ERROR_SESSION;
         goto out;
      }

      /* Session State: CREATE */
      session->ingress_uniq_id = info->uniq_id;
      session->ingress_pid_handle = ingress_pid_handle;
      session->ingress_vpid_handle = info->ingress_vpid_handle;
      session->ingress_priority = pa_get_ingress_priority_from_pkt_mark(pkt->mark);
      if (info->routed)
         set_bit(PA_S_ROUTED, &session->flags);
      if (info->no_hw)
         set_bit(PA_S_NO_HW, &session->flags);
      if (info->realtime)
         set_bit(PA_S_REALTIME, &session->flags);
      egress = avm_pa_first_egress(session);
      egress->pid_handle = egress_pid_handle;
      egress->vpid_handle = info->egress_vpid_handle;
      egress->match = match;
      egress->type = etype;
      switch (etype) {
         case avm_pa_egresstype_local:
            egress->local.dev = pkt->dev;
            egress->local.dst = dst_clone(skb_dst(pkt));
            egress->local.skb_iif = pkt->skb_iif;
            rps_ok = false;
            break;
         case avm_pa_egresstype_xfrm:
            if (IS_ENABLED(CONFIG_XFRM)) {
               dev_hold(pkt->dev);
               xfrm_state_hold(x);
               egress->xfrm.dev = pkt->dev;
               egress->xfrm.x = x;
               egress->xfrm.dst = dst_clone(skb_dst(pkt));
               /* Ensure tx_arg == NULL since we always pass the xfrm_state */
               BUG_ON(PA_PID(ctx, egress->pid_handle)->cfg.tx_arg != NULL);
               egress->xfrm.tc_index = pkt->tc_index;
               rps_ok = true;
            }
            break;
         case avm_pa_egresstype_null:
            rps_ok = false;
            break;
         default:
            egress->output.dst = skb_dst(pkt) ? dst_clone(skb_dst(pkt)) : NULL;
            egress->output.priority = pkt->priority;
            egress->output.txq_id = skb_get_queue_mapping(pkt);
            egress->output.tc_index = pkt->tc_index;
            egress->output.skb_iif = pkt->skb_iif;
            egress->output.mac_len = pkt->mac_len;
#ifdef CONFIG_TI_PACKET_PROCESSOR
            egress->output.puma_pktinfo = *SKB_GET_PP_INFO_P(pkt);
#ifdef CONFIG_TI_META_DATA
            egress->output.ti_meta_info = pkt->ti_meta_info;
            egress->output.ti_meta_info2 = pkt->ti_meta_info2;
#endif
#endif
            rps_ok = true;
            break;
      }
#ifdef CONFIG_AVM_PA_RPS
      /* For local sessions we try to keep it on the same CPU as the receiving
       * process. For now we assume the kernel has already selected the best cpu
       * and follow its decision. If RPS was configured explicitly via
       * avm_pa_skb_set_rps() then we commit to that CPU at session creation.
       * Likewise, for drop sessions, we want to drop on the receiving CPU as
       * there is no further packet processing.
       *
       * Otherwise, CPU selection (via hash based on the flow) is deferred
       * to the fast path because there may be multiple flows within a single
       * bridge session.
       */
      if (info->rps_override || !rps_ok) {
         int cpu = info->match.hash & (CONFIG_AVM_PA_RPS_QUEUES-1);
         if (!rps_ok)
            cpu = smp_processor_id();

         if (info->rps_override) {
            if (!cpumask_test_cpu(cpu, &info->rps_allowed_mask)) {
               cpu = cpumask_any_but(&info->rps_fallback_mask, cpu);
               if (cpu >= nr_cpu_ids) {
                  cpu = cpumask_first(&info->rps_fallback_mask);
                  if (cpu >= nr_cpu_ids)
                     cpu = smp_processor_id(); /* RPS disabled */
               }
            }
         }
         session->rps_cpu = cpu + 1;
      }
#endif

      /* Bridged session are more efficient, but subject to a few restrictions:
       * - ethernet header must match, and nothing else
       * - packets must be bridged, not routed (obviously)
       * - must be unicast as broadcast/multicast means multiple egress, which might require
       *   different framings or even local input, which make plain bridging impossible
       * - avm_pa_use_protocol_specific_session() wasn't used to enforce normal sessions
       * - ingress and egress pids permit bridged sessions
       * - hardware_pa permits bridged sessions
       * - bridged sessions aren't disallowed through procfs interface
       * If all conditions are met, bridged sessions can use a few shortcuts such
       * as skipping data modification entirely.
       */
      if (   ethh
          && ctx->bsession_allowed
          && info->routed == 0
          && info->match.casttype == AVM_PA_IS_UNICAST
          && info->use_protocol_specific == 0
          && (ipid->bridging_ok && epid->bridging_ok)
          && (ctx->hw_ppa_disabled || !(ctx->hardware_pa.flags & AVM_HW_F_NO_BSESSION))
          && pa_match_bridged(&info->match, &egress->match)) {
         pa_change_to_bridge_session(session);
         egress->pppoe_offset = AVM_PA_OFFSET_NOT_SET;
         egress->push_l2_len = 0;
         egress->mtu = 0xffff;
      } else {
         /* Carefully avoid doing prioack repeatedly for the same flow. */
         if (egress->type == avm_pa_egresstype_output) {
             u32 priority;
             if (info->prioack_result == PRIOACK_UNKOWN) {
                priority = pa_calc_start_priority(info, epid, pkt->priority);
                if (pkt->priority != priority) {
                   set_bit(PA_S_PRIOACK_CHECK, &session->flags); /* pa_session_prioack_check() will check priority */
#ifdef CONFIG_BLOG
                   blog_skip(pkt, blog_skip_reason_unknown);
#endif
                   if (ctx->dbgprioack) {
                      pa_printk(KERN_DEBUG, "avm_pa: session %d: priority %x:%x TGET (orignal %x:%x)\n",
                                            session->session_handle,
                                            TC_H_MAJ(priority)>>16,
                                            TC_H_MIN(priority),
                                            TC_H_MAJ(pkt->priority)>>16,
                                            TC_H_MIN(pkt->priority));
                   }
                }
             } else if (info->prioack_result == PRIOACK_ACK) {
                /* An earlier session has determined this is an ACK flow,
                 * therefore this new session is using tack priority.
                 */
                priority = pa_calc_tack_priority(info, epid, pkt->priority);
             } else {
                priority = pkt->priority;
             }

             pkt->priority = egress->output.priority = priority;
#ifdef CONFIG_BLOG
             pkt->mark = SKBMARK_SET_Q(pkt->mark, (BROADCOM_MAX_PRIOS - (pkt->priority & TC_H_MIN_MASK)));
             pkt->mark = SKBMARK_SET_FLOW_ID(pkt->mark, 0x1);
#endif
         }

         pa_calc_modify(session, &info->match, &match);
         if (match.encap_offset == AVM_PA_OFFSET_NOT_SET)
            egress->push_l2_len = match.ip_offset;
         else egress->push_l2_len = match.encap_offset;
         headroom =   (session->mod.push_encap_len + egress->push_l2_len)
                    - (session->mod.pull_l2_len + session->mod.pull_encap_len);
         if (headroom > 0 && headroom > session->needed_headroom)
            session->needed_headroom = headroom;
         egress->pppoe_offset = match.pppoe_offset;
         if (egress->pppoe_offset != AVM_PA_OFFSET_NOT_SET)
            egress->pppoe_hdrlen = egress->pppoe_offset + sizeof(struct pppoehdr);
         egress->mtu = epid->cfg.default_mtu;
         if (egress->vpid_handle) {
            evpid = PA_VPID(ctx, egress->vpid_handle);
            if (session->mod.outer_ipversion == 4) {
               if (evpid->cfg.v4_mtu < egress->mtu)
                  egress->mtu = evpid->cfg.v4_mtu;
            } else if (session->mod.outer_ipversion == 6) {
               if (evpid->cfg.v6_mtu < egress->mtu)
                  egress->mtu = evpid->cfg.v6_mtu;
            }
         }
      }
      if (ethh)
         egress->destmac = pa_macaddr_link(ethh->h_dest, egress_pid_handle, is_pvid, vlan_id);
      if (epid->ecfg.cb_len)
         memcpy(egress->cb, &pkt->cb[epid->ecfg.cb_start], epid->ecfg.cb_len);

#if (defined(CONFIG_AVM_GENERIC_CONNTRACK) || defined(CONFIG_AVM_PA_GENERIC_CT))
      if (SKB_GENERIC_CT(pkt)) {
         session->generic_ct = generic_ct_get(SKB_GENERIC_CT(pkt));
         session->generic_ct_dir = SKB_GENERIC_CT_DIR(pkt);
         /* don't do generic_ct_sessionid_set() yet because the session is not
          * activated yet, so don't use the session_handle yet */

#if (defined(CONFIG_BLOG) && IS_ENABLED(CONFIG_NF_CONNTRACK) && defined(CONFIG_AVM_PA_GENERIC_CT_PP_SUPPORT))
         /*
          * If the hw_pa in this case flow cache is turned on:
          * - register the struct generic_ct * of the datapipe flow
          *   at the struct nf_conn * of the netfilter connection
          * - save the struct nf_conn * in the avm_pa session
          *
          * Note: struct nf_conn* allocated in nf_conntrack_in()
          */
         if (!ctx->hw_ppa_disabled && skb_nfct(pkt) && (AVM_PA_PKTTYPE_IPPROTO(info->match.pkttype) == IPPROTO_TCP
            ||   AVM_PA_PKTTYPE_IPPROTO(info->match.pkttype) == IPPROTO_UDP)) {
            struct nf_conn *ctrack;
            nf_conntrack_get(skb_nfct(pkt));
            ctrack = (struct nf_conn *) skb_nfct(pkt);
            ctrack->generic_ct = generic_ct_get(SKB_GENERIC_CT(pkt));
            session->ct = (struct nf_conn *) skb_nfct(pkt);
         }
#endif
      }
#endif

      /* Hardware session offloading
       *
       * If all pre-conditions are met we first probe if the session is acceleratable at
       * all. Actually adding the session to hardware is done once the session
       * is transitioned to ACTIVE state. Before that transition there can be multiple
       * packets racing to create the same session. probe_session() is expected to deal
       * with that, on the assumption that it's mostly stateless and doesn't talk to
       * the hardware yet.
       *
       * In contrast, add_session() does the actual work and we do not want to
       * confuse that part with multiple, equal sessions.
       *
       * Pre-conditions for offloading:
       * a) prioack_check is NOT set. Not all HW-PA provide packet and byte counters so
       *    we must use counters from software acceleration.
       * b) HW-PA is available
       * c) HW-PA is not disabled
       * d) no_hw flag was not set in AVM_PKT_INFO (eg. by sch_tack)
       * e) On vr9, not a local session (JZ-26496)
       *
       * In the future probe_session will become mandatory. For now it's optional
       * and if it's not provided we assume the session can be offloaded.
       */
      probe_session = rcu_dereference(ctx->hardware_pa.probe_session);
      add_session = rcu_dereference(ctx->hardware_pa.add_session);
      add_session_skb = rcu_dereference(ctx->hardware_pa.add_session_skb);
      hw_ok =  !test_bit(PA_S_PRIOACK_CHECK, &session->flags)  // a
            && (add_session || add_session_skb)                // b
            && !ctx->hw_ppa_disabled                           // c
            && !test_bit(PA_S_NO_HW, &session->flags);         // d
#ifdef CONFIG_VR9
      if (etype == avm_pa_egresstype_local) hw_ok = 0;         // e
#endif

      if (hw_ok && probe_session)
         hw_ok = probe_session(session) != AVM_PA_TX_ERROR_SESSION;
      /* A lower-level pid is taking over. This creates a new session,
       * for many reasons:
       * - updating the egress would be subject to race conditions
       *   since the session is already in state ACTIVE
       * - the egress match info and mod record must be parsed again
       *   (for example, vlan may change)
       * - as a result, the classification as bridged session may change
       * - is super easy to implement (really just need to flush here)
       *
       * Keep in mind that this check is only done for the first packet
       * (is_accelerated == 0).
       *
       * If the current pid performs traffic shaping, this take over is
       * usually prevented since QoS would be bypassed. Except if the new
       * session would qualify for HW offloading. Then we prefer offloading
       * because we typically perform adequate HW-assisted QoS.
       *
       * If the current pid performs prioack check and the new pid doesn't,
       * then this take over is also prevented. Once prioack decision is
       * completed then the new session may take over, for this reason
       * take over is possible in the accelerated path.
       *
       */
      if (  take_over_session
        && (hw_ok || info->shaped == 0)
        && !test_bit(PA_S_PRIOACK_CHECK, &take_over_session->flags)) {
         ctx->stats.tx_pid_change++;
         pa_session_flush(take_over_session, "pid take over");
      }

      /*
       * The selector is asked at last, because the session is not fully setup until now.
       * The session framework needs complete session info to make an informed decision.
       */
      if (ctx->filter_enabled && !avm_pa_session_is_selected(&ctx->accel_filter, session)) {
         ctx->stats.tx_bypass++;
         if (ctx->dbgnosession) {
            pa_printk(KERN_DEBUG, "Acceleration filtered\n");
            data2hex(PKT_DATA(pkt), PKT_LEN(pkt), buf, sizeof(buf));
            pa_printk(KERN_DEBUG, "Data          : %s\n", buf);
            pa_show_pkt_info(info, pa_printk, KERN_DEBUG);
         }
         pa_session_kill(session, "filtered");
         goto tx_bypass;
      }

      /* activate guarantees that only one session of a kind exists but it also
       * hands over the session to the lookup so that newer packets (perhaps
       * on another CPU) can already use this session before we return */
      ret = pa_session_activate(session);
      if (ret != AVM_PA_TX_SESSION_ADDED)
         goto out;

      /* Session State: ACTIVE */
      avm_pa_sg_session_link(session, pkt);

#if AVM_PA_TRACE
      if (ctx->dbgtrace) {
         pa_printk(KERN_DEBUG, "avm_pa: add session %d (%s)\n",
                                session->session_handle, ipid->cfg.name);
      }
#endif
      if (ctx->dbgsession) {
         if (session->bsession) {
            pa_printk(KERN_DEBUG, "\navm_pa: new bsession:\n");
            pa_show_bsession(session->bsession, pa_printk, KERN_DEBUG);
         } else {
            pa_printk(KERN_DEBUG, "\navm_pa: new session:\n");
            pa_show_session(session, pa_printk, KERN_DEBUG);
         }
      }

      /* Do the actual offload. If this succeeds the session will add a reference
       * to the hardware pa.
       */
      if (hw_ok && pa_hw_pa_get()) {
         int added = add_session_skb ? add_session_skb(session, pkt) : add_session(session);
         if (added == AVM_PA_TX_SESSION_ADDED) {
            set_bit(PA_S_IN_HW, &session->flags);
         } else {
            pa_hw_pa_put();
         }
      }
      if (etype == avm_pa_egresstype_local)
         ctx->stats.local_sess_ok++;
      else if (etype == avm_pa_egresstype_null)
         ctx->stats.drop_sess_ok++;
      else if (IS_ENABLED(CONFIG_XFRM) && etype == avm_pa_egresstype_xfrm)
         ctx->stats.xfrm_sess_ok++;
      else
         ctx->stats.tx_sess_ok++;
      info->session_handle = session->session_handle;
      info->egress_pid_handle = pid_handle;
      ret = AVM_PA_TX_SESSION_ADDED;
      goto out;
   }

   /* It's a slow packet with existing session, this happens in case of
    * active packet tracing or batched rx processing (i.e. GRX) or concurrent
    * packet processing (e.g. hawkeye).
    *
    * We only add egress to the session if the packet was cloned from the original one.
    */
   if (info->uniq_id != session->ingress_uniq_id) {
      /* ignore concurrent packet (not cloned from original skb in ingress) */
      if (etype == avm_pa_egresstype_local) {
         ctx->stats.local_sess_exists++;
      } else {
         ctx->stats.tx_sess_exists++;
      }
      pa_session_update(session);
      /* use priority we decide to use for this egress */
      avm_pa_for_each_egress(egress, session) {
         if (egress->pid_handle != pid_handle)
            continue;
         if (egress->type == avm_pa_egresstype_output) {
            pkt->priority = egress->output.priority;
#ifdef CONFIG_BLOG
            if (avm_pa_pid_tack_enabled(PA_PID(ctx, egress->pid_handle))) {
               pkt->mark = SKBMARK_SET_Q(pkt->mark, (BROADCOM_MAX_PRIOS - (pkt->priority & TC_H_MIN_MASK)));
               pkt->mark = SKBMARK_SET_FLOW_ID(pkt->mark, 0x1);
            }
#endif
            break;
         }
      }
      ret = AVM_PA_TX_SESSION_EXISTS;
      goto out;
   }

   /* Add egress for cloned packet.
    *
    * We don't check if the payload truly changed compared to existing egress,
    * if the slow path demands duplicated packets we follow suit.
    */
   if ((egress = pa_egress_alloc()) != NULL) {
      u16 mtu;
      egress->pid_handle = pa_pid_get(pid_handle);
      if (unlikely(!egress->pid_handle)) {
         pa_egress_free(egress);
         goto no_egress;
      }
      egress->vpid_handle = info->egress_vpid_handle;
      egress->match = match;
      egress->type = etype;
      if (etype == avm_pa_egresstype_local) {
         egress->local.dev = pkt->dev;
         egress->local.dst = dst_clone(skb_dst(pkt));
         egress->local.skb_iif = pkt->skb_iif;
      } else if (etype == avm_pa_egresstype_output) {
         egress->output.priority = pkt->priority;
         egress->output.txq_id = skb_get_queue_mapping(pkt);
         egress->output.tc_index = pkt->tc_index;
         egress->output.skb_iif = pkt->skb_iif;
         egress->output.mac_len = pkt->mac_len;
#ifdef CONFIG_TI_PACKET_PROCESSOR
         egress->output.puma_pktinfo = *SKB_GET_PP_INFO_P(pkt);
#endif
      }
      /* multi-egress sessions (multicast) are always full sessions,
       * i.e. vlan info is regular part of the session.
       */
      if (ethh)
         egress->destmac = pa_macaddr_link(ethh->h_dest, egress->pid_handle, is_pvid, vlan_id);
      if (epid->ecfg.cb_len)
         memcpy(egress->cb, &pkt->cb[epid->ecfg.cb_start], epid->ecfg.cb_len);
      mtu = epid->cfg.default_mtu;
      if (egress->vpid_handle) {
         evpid = PA_VPID(ctx, egress->vpid_handle);
         if (session->mod.outer_ipversion == 4) {
            if (evpid->cfg.v4_mtu < mtu)
               mtu = evpid->cfg.v4_mtu;
         } else if (session->mod.outer_ipversion == 6) {
            if (evpid->cfg.v6_mtu < mtu)
               mtu = evpid->cfg.v6_mtu;
         }
      }
      if (session->bsession) {
         egress->pppoe_offset = AVM_PA_OFFSET_NOT_SET;
         egress->push_l2_len = 0;
         mtu = 0xffff;
      } else {
         /*
          * currently we do only TACK/TGET handling only on egress[0].
          * So we keep SKBs original priority.
          *   2016-10-14 calle
          */
         if (match.encap_offset == AVM_PA_OFFSET_NOT_SET)
            egress->push_l2_len = match.ip_offset;
         else egress->push_l2_len = match.encap_offset;
         headroom =   (session->mod.push_encap_len + egress->push_l2_len)
                    - (session->mod.pull_l2_len + session->mod.pull_encap_len);
         if (headroom > 0 && headroom > session->needed_headroom)
            session->needed_headroom = headroom;
         egress->pppoe_offset = match.pppoe_offset;
         if (egress->pppoe_offset != AVM_PA_OFFSET_NOT_SET)
            egress->pppoe_hdrlen = egress->pppoe_offset + sizeof(struct pppoehdr);
      }
      egress->mtu = mtu;

      /* Atomically add the egress, after initialization. Fixes JZ-26868. */
      spin_lock(&avm_pa_lock);
      hlist_add_behind_rcu(&egress->egress_list, &avm_pa_first_egress(session)->egress_list);
      ++session->negress;
      spin_unlock(&avm_pa_lock);

      if (test_bit(PA_S_IN_HW, &session->flags) && pa_hw_pa_get()) {
         pa_session_handle_stats(session);
         /* We must be careful that two (or more) egress don't race here
          * for calling pa_hw_pa_put(), as the session holds at most one reference.
          * When change_session() is not available, also carefully avoid
          * calling remove_session() concurrently.
          */
         if (ctx->hardware_pa.change_session) {
            if ((*ctx->hardware_pa.change_session)(session) != AVM_PA_TX_EGRESS_ADDED) {
               if (test_and_clear_bit(PA_S_IN_HW, &session->flags)) {
                  pa_hw_pa_put();
               }
            }
         } else if (test_and_clear_bit(PA_S_IN_HW, &session->flags)) {
            /* The bit is cleared before calling remove_session() since we couldn't
             * prevent other hw_pa calls during long-running remove_session() calls.
             */
            (*ctx->hardware_pa.remove_session)(session);
            pa_hw_pa_put();
         }
         pa_hw_pa_put();
      }
      ctx->stats.tx_egress_ok++;
      if (ctx->dbgsession) {
         pa_printk(KERN_DEBUG, "\navm_pa: new egress:\n");
         pa_show_session(session, pa_printk, KERN_DEBUG);
      }

      info->session_handle = session->session_handle;
      info->egress_pid_handle = pid_handle;
      ret = AVM_PA_TX_EGRESS_ADDED;
      goto out;
   }

no_egress:
   /*
    * JZ-56718: flush the entire session and try to allocate
    * all egress ports with the next set of slow path packets
    */
   pa_session_flush(session, "no egress left");
   ctx->stats.tx_egress_error++;
   ret = AVM_PA_TX_ERROR_EGRESS;
   goto out;

tx_bypass:
   /* 
    * set TACK priority for TCP control and ack only packets
    *  2016-10-14 calle
    */
   if (avm_pa_pid_tack_enabled(epid)) {
      if (tcp_syn || tcp_fin || (tcp_ack && tcp_nodata)) {
         pkt->priority = pa_calc_tack_priority(info, epid, pkt->priority);
         epid->prioack_acks++;
      }
   }
   ret = AVM_PA_TX_BYPASS;
out:
   rcu_read_unlock_bh();
   return ret;
}

int _avm_pa_dev_snoop_transmit(struct avm_pa_dev_info *devinfo, struct sk_buff *skb)
{
   if (devinfo->vpid_handle)
      avm_pa_vpid_snoop_transmit(devinfo->vpid_handle, skb);
   if (devinfo->pid_handle)
      return avm_pa_pid_snoop_transmit(devinfo->pid_handle, skb, avm_pa_egresstype_output, 0);
   return AVM_PA_TX_OK;
}
EXPORT_SYMBOL(_avm_pa_dev_snoop_transmit);

void avm_pa_dev_vpid_snoop_transmit(struct avm_pa_dev_info *devinfo, PKT *pkt)
{
   if (devinfo->vpid_handle)
      avm_pa_vpid_snoop_transmit(devinfo->vpid_handle, pkt);
}
EXPORT_SYMBOL(avm_pa_dev_vpid_snoop_transmit);

void _avm_pa_add_local_session(struct avm_pa_dev_info *devinfo, struct sk_buff *skb, struct sock *sk)
{
   (void)avm_pa_pid_snoop_transmit(devinfo->pid_handle, skb, avm_pa_egresstype_local, sk);
}
EXPORT_SYMBOL(_avm_pa_add_local_session);

void avm_pa_add_drop_session(struct avm_pa_dev_info *devinfo, struct sk_buff *skb)
{
   struct avm_pa_pkt_info *info = AVM_PKT_INFO(skb);

   if (info->is_accelerated == 0 && devinfo->pid_handle)
      (void)avm_pa_pid_snoop_transmit(devinfo->pid_handle, skb, avm_pa_egresstype_null, NULL);
}
EXPORT_SYMBOL(avm_pa_add_drop_session);

void avm_pa_add_xfrm_session(struct avm_pa_dev_info *devinfo, struct sk_buff *skb, struct xfrm_state *x)
{
   if (unlikely(!AVM_PKT_INFO(skb)->is_accelerated) && IS_ENABLED(CONFIG_XFRM))
      avm_pa_pid_snoop_transmit(devinfo->pid_handle, skb, avm_pa_egresstype_xfrm, x);
}

void avm_pa_add_rtp_session(PKT *pkt,
                            struct sock *sk,
                            void (*transmit)(struct sock *sk, PKT *pkt))
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pkt_info *info = AVM_PKT_INFO(pkt);
   struct avm_pa_session *session;
   struct avm_pa_egress *egress;

   spin_lock_bh(&avm_pa_lock);
   session = pa_session_get(info->session_handle);
   if (session == 0 || session->negress > 1)
      goto unlock;

   egress = avm_pa_first_egress(session);
   if (egress->type != avm_pa_egresstype_local) {
      if (egress->type == avm_pa_egresstype_rtp)
         ctx->stats.rtp_sess_exists++;
      else
         ctx->stats.rtp_sess_error++;
      goto unlock;
   }
   set_bit(PA_S_REALTIME, &session->flags);
   egress->type = avm_pa_egresstype_rtp;
   egress->rtp.skb_iif = pkt->skb_iif;
   sock_hold(sk);
   egress->rtp.sk = sk;
   egress->rtp.transmit = transmit;
   ctx->stats.rtp_sess_ok++;

unlock:
   spin_unlock_bh(&avm_pa_lock);

}
EXPORT_SYMBOL(avm_pa_add_rtp_session);

void avm_pa_filter_packet(PKT *pkt)
{
   /* Drop/filter sessions are not implemented yet. */
   WARN_ONCE(1, "%s() does nothing. Remove the call!\n", __func__);
}
EXPORT_SYMBOL(avm_pa_filter_packet);

int avm_pa_dev_pidhandle_register_with_ingress(struct avm_pa_dev_info *devinfo,
                                               avm_pid_handle pid_handle,
                                               struct avm_pa_pid_cfg *cfg,
                                               avm_pid_handle ingress_pid_handle)
{
   struct avm_pa_global *ctx = &pa_glob;
   avm_pid_handle n;

   /* Already registered */
   if (devinfo->pid_handle) {
      BUG_ON(pid_handle && devinfo->pid_handle != pid_handle);
      return -EBUSY;
   }
   if (ingress_pid_handle) {
      if (PA_PID(ctx, ingress_pid_handle)->pid_handle != ingress_pid_handle)
         return -EINVAL; /* ingress pid must be registered beforehand */
   }
   if (pid_handle) {
      n = pid_handle;
      goto slot_found;
   }
   for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) {
      if (strncmp(cfg->name, PA_PID(ctx, n)->cfg.name, AVM_PA_MAX_NAME) == 0)
         goto slot_found;
   }
   for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) {
      if (PA_PID(ctx, n)->pid_handle || kref_read(&PA_PID(ctx, n)->ref))
         continue;
      else
         goto slot_found;
   }
   return -ENOMEM;

slot_found:
   pa_pid_init(n, cfg);

   if (ingress_pid_handle) {
      PA_PID(ctx, n)->ingress_pid_handle = ingress_pid_handle;
   } else {
      PA_PID(ctx, n)->ingress_pid_handle = n;
   }
   devinfo->pid_handle = n;

   return 0;
}
EXPORT_SYMBOL(avm_pa_dev_pidhandle_register_with_ingress);

int avm_pa_dev_pidhandle_register(struct avm_pa_dev_info *devinfo,
                                  avm_pid_handle pid_handle,
                                  struct avm_pa_pid_cfg *cfg)
{
   return avm_pa_dev_pidhandle_register_with_ingress(devinfo, pid_handle, cfg,
                                                     0);
}
EXPORT_SYMBOL(avm_pa_dev_pidhandle_register);

int avm_pa_dev_pid_register_with_ingress(struct avm_pa_dev_info *devinfo,
                                         struct avm_pa_pid_cfg *cfg,
                                         avm_pid_handle ingress_pid_handle)
{
   return avm_pa_dev_pidhandle_register_with_ingress(devinfo, 0, cfg,
                                                     ingress_pid_handle);
}
EXPORT_SYMBOL(avm_pa_dev_pid_register_with_ingress);

int avm_pa_dev_pid_register(struct avm_pa_dev_info *devinfo,
                            struct avm_pa_pid_cfg *cfg)
{
   return avm_pa_dev_pidhandle_register_with_ingress(devinfo, 0, cfg, 0);
}
EXPORT_SYMBOL(avm_pa_dev_pid_register);

static void pa_dev_queue_xmit(void *arg, struct sk_buff *skb)
{
   int rc;
   skb->dev = (struct net_device *)arg;
   rc = dev_queue_xmit(skb);
   if (dev_xmit_complete(rc) == false && net_ratelimit()) {
      pr_err("%s(%s): xmit failure: %d\n", __func__, skb->dev->name, rc);
   }
}

int avm_pa_dev_register(struct net_device *dev)
{
   struct avm_pa_pid_cfg cfg = {0};

   BUG_ON(!dev);

   if (dev->type == ARPHRD_NONE)
      cfg.framing = avm_pa_framing_ipdev;
   else
      cfg.framing = avm_pa_framing_dev;

   snprintf(cfg.name, sizeof(cfg.name), "%s", dev->name);
   cfg.tx_func = pa_dev_queue_xmit;
   cfg.tx_arg = dev;
   return avm_pa_dev_pidhandle_register_with_ingress(AVM_PA_DEVINFO(dev), 0, &cfg, 0);
}
EXPORT_SYMBOL(avm_pa_dev_register);

static void pa_dev_start_xmit(void *arg, struct sk_buff *skb)
{
   int rc;
   struct net_device *dev = arg;
   struct netdev_queue *txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
   bool __maybe_unused again = false;

   skb->dev = dev;

#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0)
   if (!(skb = validate_xmit_skb_list(skb, dev, &again))) {
#else
   if (!(skb = validate_xmit_skb_list(skb, dev))) {
#endif
      if (net_ratelimit())
         pr_err("%s(%s): validate_xmit_skb_list() failed\n", __func__, dev->name);
      return;
   }
#endif

   HARD_TX_LOCK(dev, txq, smp_processor_id());
   if (!netif_tx_queue_stopped(txq)) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
      skb = dev_hard_start_xmit(skb, dev, txq, &rc);
#else
      rc = dev_hard_start_xmit(skb, dev, txq);
#endif
   } else {
      rc = NETDEV_TX_BUSY;
   }
   HARD_TX_UNLOCK(dev, txq);

   if (!dev_xmit_complete(rc)) {
      if (net_ratelimit())
         pr_err("%s(%s): xmit failure: %d\n", __func__, dev->name, rc);
      kfree_skb_list(skb);
   }
}

int avm_pa_dev_register_ll(struct net_device *dev)
{
   struct avm_pa_pid_cfg cfg = {0};

   BUG_ON(!dev);
   /* With NETIF_F_LLTX HARD_TX_LOCK() becomes a no-op instead of an expensive spinlock */
   if (!(dev->features & NETIF_F_LLTX)) {
      pr_warn("%s: Consider NETIF_F_LLTX for device %s for better performance\n", __func__, dev->name);
   }

   cfg.framing = avm_pa_framing_ether;
   snprintf(cfg.name, sizeof(cfg.name), "%s", dev->name);
   cfg.tx_func = pa_dev_start_xmit;
   cfg.tx_arg = dev;
   return avm_pa_dev_pidhandle_register_with_ingress(AVM_PA_DEVINFO(dev), 0, &cfg, 0);
}
EXPORT_SYMBOL(avm_pa_dev_register_ll);

#ifdef CONFIG_AVM_PA_TX_NAPI
int avm_pa_dev_pid_register_tx_napi(struct avm_pa_dev_info *devinfo,
                                   struct avm_pa_pid_cfg *cfg,
                                   struct net_device *dev)
{
   int ret;

   ret = avm_pa_dev_pidhandle_register_with_ingress(devinfo, 0, cfg, 0);

   if (!ret) {
      struct avm_pa_global *ctx = &pa_glob;
      struct avm_pa_pid *pid = PA_PID(ctx, devinfo->pid_handle);

      netif_napi_add(dev, &pid->tx_napi, pa_dev_tx_napi_poll, CONFIG_AVM_PA_NAPI_WEIGHT);
      napi_enable(&pid->tx_napi);
      skb_queue_head_init(&pid->tx_napi_pkts);
#ifdef CONFIG_SMP
      tasklet_init(&pid->tx_napi_tsk, (void *) __do_schedule_napi, (unsigned long) &pid->tx_napi);
#endif
   }

   return ret;
}
EXPORT_SYMBOL(avm_pa_dev_pid_register_tx_napi);
#endif

int avm_pa_pid_set_ecfg(avm_pid_handle pid_handle,
                        struct avm_pa_pid_ecfg *ecfg)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, pid_handle);
   unsigned int cbsize = sizeof(((struct sk_buff *)0)->cb);

   if (pid->pid_handle != pid_handle)
      return -1;
   memset(&pid->ecfg, 0, sizeof(struct avm_pa_pid_ecfg));
   switch (ecfg->version) {
      case 3:
        pid->ecfg.pid_group = ecfg->pid_group;
      case 2:
        pid->ecfg.rx_slow = ecfg->rx_slow;
        pid->ecfg.rx_slow_arg = ecfg->rx_slow_arg;
      case 1:
        pid->ecfg.cb_start = ecfg->cb_start;
        pid->ecfg.cb_len = ecfg->cb_len;
      case 0:
        pid->ecfg.flags = ecfg->flags;
   }
   if (pid->ecfg.cb_start + pid->ecfg.cb_len > cbsize)
      return -2;
   pid->ecfg.version = ecfg->version;
   return 0;
}
EXPORT_SYMBOL(avm_pa_pid_set_ecfg);

int avm_pa_pid_set_framing(avm_pid_handle pid_handle,
                           enum avm_pa_framing ingress_framing,
                           enum avm_pa_framing egress_framing)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, pid_handle);

   if (pid->pid_handle != pid_handle)
      return -1;

   switch (ingress_framing) {
      case avm_pa_framing_llcsnap:
      case avm_pa_framing_ether:
      case avm_pa_framing_ppp:
      case avm_pa_framing_ip:
      case avm_pa_framing_ipdev:
      case avm_pa_framing_dev:
         pid->ingress_framing = ingress_framing;
         pid->cfg.ptype = 0;
         break;
      case avm_pa_framing_ptype:
         if (pid->ingress_framing != ingress_framing)
            return -2;
         pid->cfg.tx_func = 0;
         pid->cfg.tx_arg = 0;
         break;
   }
   switch (egress_framing) {
      case avm_pa_framing_llcsnap:
      case avm_pa_framing_ether:
      case avm_pa_framing_ppp:
      case avm_pa_framing_ip:
      case avm_pa_framing_ipdev:
         pid->egress_framing = egress_framing;
         pid->cfg.ptype = 0;
         break;
      case avm_pa_framing_dev:
         pid->egress_framing = avm_pa_framing_ether;
         pid->cfg.ptype = 0;
         break;
      case avm_pa_framing_ptype:
         return -3;
   }
   return 0;
}
EXPORT_SYMBOL(avm_pa_pid_set_framing);

static void pa_show_pids(pa_fprintf fprintffunc, void *arg)

{
   struct avm_pa_global *ctx = &pa_glob;
   char buf[128];
   avm_pid_handle n;
   unsigned int i;

#define INDENT "    "
   for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) {
      struct avm_pa_pid *pid = PA_PID(ctx, n);
      struct avm_pa_pid_ecfg *ecfg = &pid->ecfg;
      int refcount;

      if (pid->pid_handle == 0)
         continue;

      refcount = kref_read(&pid->ref);
      if (pid->ingress_pid_handle == pid->pid_handle) {
         (*fprintffunc)(arg, "PID%-3d: (%5d) %-5s %-5s %10lu %s %s (ref %d)\n",
                             pid->pid_handle,
                             pid->cfg.default_mtu,
                             framing2str(pid->ingress_framing),
                             framing2str(pid->egress_framing),
                             (unsigned long)pid->tx_pkts,
                             pid->cfg.name,
                             pidflags2str(ecfg->flags, buf, sizeof(buf)),
                             refcount);
      } else {
         (*fprintffunc)(arg, "PID%-3d: (%5d) %-5s %-5s %10lu %s (ingress %d %s) %s (ref %d)\n",
                             pid->pid_handle,
                             pid->cfg.default_mtu,
                             framing2str(pid->ingress_framing),
                             framing2str(pid->egress_framing),
                             (unsigned long)pid->tx_pkts,
                             pid->cfg.name,
                             pid->ingress_pid_handle,
                             PA_PID(ctx, pid->ingress_pid_handle)->cfg.name,
                             pidflags2str(ecfg->flags, buf, sizeof(buf)),
                             refcount);
      }
      if (pid->rx_channel_activated || pid->tx_channel_activated) {
         (*fprintffunc)(arg, INDENT "rx_channel %d tx_channel %d\n",
                             pid->rx_channel_activated ? 1 : 0,
                             pid->tx_channel_activated ? 1 : 0);
      }

      if (!pid->bridging_ok)
         (*fprintffunc)(arg, INDENT "bridging disabled\n");
      if (ecfg->pid_group)
         (*fprintffunc)(arg, INDENT "pid_group %d\n", ecfg->pid_group);
      if (ecfg->rx_slow)
         (*fprintffunc)(arg, INDENT "rx_slow %pf\n", ecfg->rx_slow);
      if (ecfg->cb_start || ecfg->cb_len)
         (*fprintffunc)(arg, INDENT "cb_start %d cb_len %d\n", ecfg->cb_start, ecfg->cb_len);

      if (avm_pa_pid_tack_enabled(pid) || avm_pa_pid_tget_enabled(pid)) {
         for (i = 0; i < AVM_PA_MAX_PRIOS; ++i) {
            unsigned int tackprio = 0;
            unsigned int tgetprio = 0;
            if (avm_pa_pid_tack_enabled(pid))
               tackprio = avm_pa_pid_tack_prio(pid, i);
            if (avm_pa_pid_tget_enabled(pid))
               tgetprio = avm_pa_pid_tget_prio(pid, i);
            if (tackprio || tgetprio) {
               (*fprintffunc)(arg, INDENT "prio[%u]:", i);
               if (tackprio)
                  (*fprintffunc)(arg, " tack_prio = 0x%x", tackprio);
               if (tgetprio)
                  (*fprintffunc)(arg, " tack_prio = 0x%x", tgetprio);
               (*fprintffunc)(arg, "\n");
            }
         }
      }
   }
#undef INDENT
}

int avm_pa_dev_vpidhandle_register(struct avm_pa_dev_info *devinfo,
                                   avm_vpid_handle vpid_handle,
                                   struct avm_pa_vpid_cfg *cfg)
{
   struct avm_pa_global *ctx = &pa_glob;
   avm_vpid_handle n;

   if (devinfo->vpid_handle) {
      if (vpid_handle && devinfo->vpid_handle != vpid_handle)
         return 0;
      n = devinfo->vpid_handle;
      goto slot_found;
   }
   if (vpid_handle) {
      n = vpid_handle;
      goto slot_found;
   }
   for (n=1; n < CONFIG_AVM_PA_MAX_VPID; n++) {
      if (strncmp(cfg->name, PA_VPID(ctx, n)->cfg.name, AVM_PA_MAX_NAME) == 0) {
         goto slot_found;
      }
   }
   for (n=1; n < CONFIG_AVM_PA_MAX_VPID; n++) {
      if (PA_VPID(ctx, n)->vpid_handle == 0)
         goto slot_found;
   }
   return -1;
slot_found:
   if (cfg->v4_mtu == 0)
      cfg->v4_mtu = 1500;
   if (cfg->v6_mtu == 0)
      cfg->v6_mtu = 1500;

   memset(PA_VPID(ctx, n), 0, sizeof(*PA_VPID(ctx, n)));
   PA_VPID(ctx, n)->cfg = *cfg;
   PA_VPID(ctx, n)->vpid_handle = n;
   devinfo->vpid_handle = n;
   return 0;
}
EXPORT_SYMBOL(avm_pa_dev_vpidhandle_register);

int avm_pa_dev_vpid_register(struct avm_pa_dev_info *devinfo,
                             struct avm_pa_vpid_cfg *cfg)
{
   return avm_pa_dev_vpidhandle_register(devinfo, 0, cfg);
}
EXPORT_SYMBOL(avm_pa_dev_vpid_register);

int avm_pa_dev_unregister(struct avm_pa_dev_info *devinfo, struct completion *done)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_hardware_pa *hwpa;

   if (devinfo->vpid_handle) {
      struct avm_pa_vpid *my_vpid = PA_VPID(ctx, devinfo->vpid_handle);
      avm_vpid_handle my_vpid_handle = my_vpid->vpid_handle;

      devinfo->vpid_handle = 0;
      if (my_vpid_handle != 0) {
         my_vpid->vpid_handle = 0;
         avm_pa_selector_clear_for_vpid(&ctx->show_filter, my_vpid_handle);
         avm_pa_flush_sessions_for_vpid(my_vpid_handle);
      }
   }

   if (devinfo->pid_handle) {
      /*
       * Unregister so that no new sessions can be created for the pid.
       *
       * Deleting the pid may be deferred if there are still sessions alive,
       * this is handled in the GC (through ref counts).
       * A reregister is possible until all sessions are gone.
       *
       * The ref added at avm_pa_dev_pid_register() is still valid therefore
       * we don't add another ref here but use PA_PID().
       */
      struct avm_pa_pid *my_pid = PA_PID(ctx, devinfo->pid_handle);
      avm_pid_handle my_pid_handle = my_pid->pid_handle;

      devinfo->pid_handle = 0;
      if (my_pid_handle != 0) {
         avm_pid_handle n;
         /* check if pid is used as ingress pid */
         for (n = 1; n < CONFIG_AVM_PA_MAX_PID; n++) {
            struct avm_pa_pid *pid = PA_PID(ctx, n);
            if (pid->ingress_pid_handle == my_pid_handle)
               pid->ingress_pid_handle = pid->pid_handle;
         }
         avm_pa_selector_clear_for_pid(&ctx->show_filter, my_pid_handle);

         /* free virtual channels */
         hwpa = &ctx->hardware_pa;
         my_pid->rx_channel_stopped = 1;
         if (my_pid->tx_channel_activated) {
            my_pid->tx_channel_activated = 0;
            if (hwpa->free_tx_channel)
               hwpa->free_tx_channel(my_pid_handle);
         }
         if (my_pid->rx_channel_activated) {
            my_pid->rx_channel_activated = 0;
            if (hwpa->free_rx_channel)
               hwpa->free_rx_channel(my_pid_handle);
         }

         /* At session creation, referencing the PID and moving the session
          * to ACTIVE state is not fully atomic outside the lock (cannot flush essions
          * that are in CREATE state). But inside the lock, any sessions in CREATE state
          * cannot become ACTIVE, because clearing pid->pid_handle prevents new refs.
          */
         spin_lock_bh(&avm_pa_lock);
         my_pid->pid_handle = 0;
         my_pid->release_completion = done;
         if (!pa_pid_put(my_pid_handle)) {
            avm_pa_flush_sessions_for_pid(my_pid_handle);
         }
         spin_unlock_bh(&avm_pa_lock);
         return 0;
      }
   }

   return -ENODEV;
}
EXPORT_SYMBOL(avm_pa_dev_unregister);

int avm_pa_dev_unregister_sync(struct avm_pa_dev_info *devinfo)
{
   int ret;
   int my_pid_handle = devinfo->pid_handle;
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, my_pid_handle);
   DECLARE_COMPLETION_ONSTACK(done);

   might_sleep();
   ret = avm_pa_dev_unregister(devinfo, &done);
   if (ret != 0)
      return ret;

   /* Normally there is no way to block indefinitely, but mark killable
    * in case of a bug somewhere.
    */
   ret = wait_for_completion_killable_timeout(&done, HZ * 10);
   if (ret == 0) {
      /* Timeout. This is fatal. Maybe some session hangs? */
      int i;
      pr_crit("FATAL in %s!\n  PID %s is not removed. Refcount: %d.\n  Done: %p vs %p",
              __func__,
              pid->cfg.name,
              kref_read(&pid->ref),
              &done, pid->release_completion /* should be NULL */);
      for (i = 1; i < CONFIG_AVM_PA_MAX_SESSION; i++) {
         struct avm_pa_session *s = PA_SESSION(&pa_data, i);
         void *hw = avm_pa_get_hw_session(s);
         /* hw != NULL is the most probable reason for session removal to hang */
         if (s->on_list != AVM_PA_LIST_FREE && hw) {
            pa_show_session(s, pa_printk, KERN_CRIT);
            pr_crit("hw_session     : %p\n\n", hw);
            break; /* print at most one session to not overflow printk buffer */
         }
      }
      pr_crit("last tick     : %ld\n", (long)(jiffies - last_tick));
      pr_crit("next tick     : %ld\n", (long)(pa_glob.tick_timer.expires - jiffies));
      BUG();
   } else if (ret < 0) {
      /* Apparently we can get here during reboot. We continue without completing.
       * The PID is not fully unregistered yet but we have an additional
       * safe guard at registration to prevent double registration.
       */
      pr_err("avm_pa: %s: interrupted prematurely\n", __func__);
      pid->release_completion = NULL;
      return ret;
   } else {
      return 0; /* completed! */
   }
}
EXPORT_SYMBOL(avm_pa_dev_unregister_sync);

/* ------------------------------------------------------------------------ */
/* -------- pid extra functions ------------------------------------------- */
/* ------------------------------------------------------------------------ */

bool
avm_pa_pid_set_bridging(avm_pid_handle pid_handle, bool bridging_ok)
{
   struct avm_pa_pid *pid = pa_pid_get_pid(pid_handle);
   struct avm_pa_pid *ingress_pid;
   bool ret;

   if (!pid) {
      pr_err("%s: pid %u not registered\n", __func__, pid_handle);
      return false;
   }

   /* Usually the same pid (pid->ingress_pid_handle == pid->pid_handle) */
   ingress_pid = pa_pid_get_pid(pid->ingress_pid_handle);
   if (!ingress_pid) {
      pr_err("%s: ingress pid %u not registered\n", __func__, pid->ingress_pid_handle);
      pa_pid_put(pid_handle);
      return false;
   }

   /* Set on the ingress pid as well. We don't want the ingress_pid to create
    * bridging sessions if we disallow it here. The ingress_pid machanism is an
    * optimization (for larger bypass and to allow for HW offloading) and shouldn't
    * create different types of session.
    */
   ret = pid->bridging_ok;
   pid->bridging_ok = bridging_ok;
   ingress_pid->bridging_ok = bridging_ok;

   pa_pid_put(ingress_pid->pid_handle);
   pa_pid_put(pid_handle);

   return ret;
}
EXPORT_SYMBOL(avm_pa_pid_set_bridging);

int
avm_pa_pid_set_hwinfo(avm_pid_handle pid_handle,
                      struct avm_pa_pid_hwinfo *hw)
{
   return avm_pa_pid_set_hwinfo2(pid_handle, hw, sizeof(*hw), GFP_ATOMIC);
}
EXPORT_SYMBOL(avm_pa_pid_set_hwinfo);

int
avm_pa_pid_set_hwinfo2(avm_pid_handle pid_handle,
                       struct avm_pa_pid_hwinfo *hw,
                       size_t sizeof_hwinfo,
                       gfp_t gfp)
{
   struct avm_pa_pid *pid = pa_pid_get_pid(pid_handle);

   /* Perhaps the module calling this must be recompiled */
   BUG_ON(hw && sizeof_hwinfo < sizeof(struct avm_pa_pid_hwinfo));
   BUG_ON(hw == NULL && sizeof_hwinfo != 0);

   if (!pid) {
      pr_err("avm_pa_pid_set_hwinfo: pid %u not registered\n",
             pid_handle);
      return -ENODEV;
   }
   kfree(pid->hw);
   pid->hw = NULL;

   if (hw) {
      pid->hw = kmemdup(hw, sizeof_hwinfo, gfp);
      if (!pid->hw) {
         pr_err("avm_pa_pid_set_hwinfo: kmalloc failed\n");
         pa_pid_put(pid_handle);
         return -ENOMEM;
      }
   }
   pa_pid_put(pid_handle);
   return 0;
}
EXPORT_SYMBOL(avm_pa_pid_set_hwinfo2);

struct avm_pa_pid_hwinfo *
avm_pa_pid_get_hwinfo(avm_pid_handle pid_handle)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid    *pid = PA_PID(ctx, pid_handle); /* no need to ref for read access */

   /* Allow to be called with pid_handle == 0, for convinience. */
   if (pid->pid_handle != pid_handle) {
      if (net_ratelimit())
         pr_err("avm_pa_pid_get_hwinfo: pid %u not registered\n",
                pid_handle);
      return NULL;
   }
   return pid->hw;
}
EXPORT_SYMBOL(avm_pa_pid_get_hwinfo);

int avm_pa_pid_activate_hw_accelaration(avm_pid_handle pid_handle)
{
   struct avm_pa_global   *ctx = &pa_glob;
   struct avm_pa_pid      *pid = pa_pid_get_pid(pid_handle);
   struct avm_hardware_pa *hwpa;

   if (!pid) {
      pr_err("avm_pa_pid_activate_hw_accelaration: pid %u not registered\n",
             pid_handle);
      return -1;
   }

   pr_info("avm_pa: try to activate hw accelaration for pid %u (%s) called from %pf\n",
           pid_handle, pid->cfg.name, (void *)_RET_IP_);
   hwpa = &ctx->hardware_pa;
   if (   pid->rx_channel_activated == 0
       && pid->ingress_framing == avm_pa_framing_ether
       && hwpa->alloc_rx_channel) {
      if ((*hwpa->alloc_rx_channel)(pid_handle) < 0) {
         pr_err("avm_pa: can't activate rx channel, pid %u (%s)\n",
                pid_handle, pid->cfg.name);
      } else {
         pid->rx_channel_stopped = 0;
         pid->rx_channel_activated = 1;
         pr_info("avm_pa: rx channel activated, pid %u (%s)\n",
                 pid_handle, pid->cfg.name);
      }
   }
   if (   pid->tx_channel_activated == 0
       && (   pid->egress_framing == avm_pa_framing_ether
           || pid->egress_framing == avm_pa_framing_ptype)
       && hwpa->alloc_tx_channel) {
      if ((*hwpa->alloc_tx_channel)(pid_handle) < 0) {
         pr_err("avm_pa: can't activate tx channel, pid %u (%s)\n",
                pid_handle, pid->cfg.name);
      } else {
         pid->tx_channel_activated = 1;
         pr_info("avm_pa: tx channel activated, pid %u (%s)\n",
                 pid_handle, pid->cfg.name);
      }
   }

   pa_pid_put(pid_handle);
   return 0;
}
EXPORT_SYMBOL(avm_pa_pid_activate_hw_accelaration);


/* Enables or disables a priority map */
int avm_pa_pid_prio_map_enable(avm_pid_handle pid_handle, unsigned short prio_map,
   int enable)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, pid_handle);

   if (pid_handle != pid->pid_handle) {
       pr_err("%s: pid handle %u not registered\n", __FUNCTION__, pid_handle);
       return -1;
   }
   if (prio_map >= AVM_PA_COUNT_PRIO_MAPS) {
      pr_err("%s: prio map %hu does not exist\n", __FUNCTION__, prio_map);
      return -2;
   }
   pid->prio_maps[prio_map].enabled = enable ? 1 : 0;
   return 0;
}
EXPORT_SYMBOL(avm_pa_pid_prio_map_enable);

/* Resets a priority map */
int avm_pa_pid_prio_map_reset(avm_pid_handle pid_handle, unsigned short prio_map)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, pid_handle);

   if (pid_handle != pid->pid_handle) {
       pr_err("%s: pid handle %u not registered\n", __FUNCTION__, pid_handle);
       return -1;
   }
   if (prio_map >= AVM_PA_COUNT_PRIO_MAPS) {
      pr_err("%s: prio map %hu does not exist\n", __FUNCTION__, prio_map);
      return -2;
   }
   memset(pid->prio_maps[prio_map].prios, 0, sizeof(pid->prio_maps[prio_map].prios));
   return 0;
}
EXPORT_SYMBOL(avm_pa_pid_prio_map_reset);

/* Sets the priority per queue */
int avm_pa_pid_prio_map_set_prio_per_queue(avm_pid_handle pid_handle, unsigned short
   prio_map, unsigned int queue, unsigned int prio)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_pid *pid = PA_PID(ctx, pid_handle);

   if (pid_handle != pid->pid_handle) {
      pr_err("%s: pid handle %u not registered\n", __FUNCTION__, pid_handle);
      return -1;
   }
   if (prio_map >= AVM_PA_COUNT_PRIO_MAPS) {
      pr_err("%s: prio map %hu does not exist\n", __FUNCTION__, prio_map);
      return -2;
   }
   if (queue >= AVM_PA_MAX_PRIOS) {
      pr_err("%s: prio map %hu queue %u out of bounds\n", __FUNCTION__,
         prio_map, queue);
      return -3;
   }
   /* A value of 0 for the prio parameter will render the underlying priority
    * unspecified. An unspecified priority will not be used for setting any
    * skb priority.
    */
   pid->prio_maps[prio_map].prios[queue] = prio;
   return 0;
}
EXPORT_SYMBOL(avm_pa_pid_prio_map_set_prio_per_queue);

int avm_pa_pid_activate_tcpackprio(avm_pid_handle pid_handle, int enable, unsigned int prio)
{
   /* Enable / disable the tack priority map to retain backwards compatibility with the old prioack procfs interface */
   if (avm_pa_pid_prio_map_enable(pid_handle, AVM_PA_PRIO_MAP_TACK, enable)) {
      return -1;
   }
   return avm_pa_pid_prio_map_set_prio_per_queue(pid_handle, AVM_PA_PRIO_MAP_TACK, AVM_PA_BE_QUEUE,
      enable ? prio : 0);
}
EXPORT_SYMBOL(avm_pa_pid_activate_tcpackprio);

int avm_pa_pid_activate_tgetprio(avm_pid_handle pid_handle, int enable, unsigned int prio)
{
   /* Enable / disable the tget priority map to retain backwards compatibility with the old prioack procfs interface */
   if (avm_pa_pid_prio_map_enable(pid_handle, AVM_PA_PRIO_MAP_TGET, enable)) {
      return -1;
   }
   return avm_pa_pid_prio_map_set_prio_per_queue(pid_handle, AVM_PA_PRIO_MAP_TGET, AVM_PA_BE_QUEUE,
      enable ? prio : 0);
}
EXPORT_SYMBOL(avm_pa_pid_activate_tgetprio);

int avm_pa_register_hardware_pa(struct avm_hardware_pa *pa_functions)
{
   struct avm_pa_global *ctx = &pa_glob;

   if (!pa_functions || (pa_functions->flags & ~AVM_HW_F_ALL))
      return -EINVAL;

   if (!pa_hw_pa_valid(pa_functions))
      return -EINVAL;

   if (pa_hw_pa_valid(&ctx->hardware_pa)) {
      pr_err("avm_pa: hardware_pa already registered\n");
      return -EADDRINUSE;
   }

   if (pa_hw_pa_get()) {
      /* We can only get here if an avm_pa_unregister_hardware_pa() call
       * didn't complete yet.
       */
      pr_err("avm_pa: deregistration pending\n");
      pa_hw_pa_put();
      return -EAGAIN;
   }

   kref_init(&ctx->hw_pa_ref);
   ctx->hardware_pa = *pa_functions;
   if (pa_functions->alloc_tx_channel || pa_functions->alloc_rx_channel) {
      avm_pid_handle n;
      for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) {
         struct avm_pa_pid *pid = PA_PID(ctx, n);
         if (   pid->pid_handle == n
             && pid->egress_framing == avm_pa_framing_ptype) {
            avm_pa_pid_activate_hw_accelaration(n);
         }
      }
   }
   return 0;
}
EXPORT_SYMBOL(avm_pa_register_hardware_pa);

int avm_pa_unregister_hardware_pa(struct avm_hardware_pa *pa_functions, struct completion *done)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_hardware_pa *hwpa = &ctx->hardware_pa;
   int n;

   if (!pa_functions)
      return -ENODEV;

   BUG_ON(hwpa->add_session != pa_functions->add_session);
   BUG_ON(hwpa->add_session_skb != pa_functions->add_session_skb);

   /* Stop adding hw sessions. the read side might still have a cached pointer
    * and add sessions but this is OK since they hold a ref on the hw_pa
    * and we're not doing the complete(done) here.
    */
   rcu_assign_pointer(hwpa->add_session, NULL);
   rcu_assign_pointer(hwpa->add_session_skb, NULL);

   for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) {
      struct avm_pa_pid *pid = pa_pid_get_pid(n);
      if (pid) {
         pid->rx_channel_stopped = 1;
         if (pid->tx_channel_activated) {
            pid->tx_channel_activated = 0;
            if (hwpa->free_tx_channel)
               hwpa->free_tx_channel(n);
         }
         if (pid->rx_channel_activated) {
            pid->rx_channel_activated = 0;
            if (hwpa->free_rx_channel)
               hwpa->free_rx_channel(n);
         }
         avm_pa_pid_put(n);
      }
   }

   ctx->hw_pa_flush_completion = done;
   if (!pa_hw_pa_put()) {
      /* kill all sessions in hw pa if necessary. pa_hw_pa_put returns 0
       * if any session is in_hw (has a reference).
       */
      avm_pa_flush_hw_sessions();
   }
   return 0;
}
EXPORT_SYMBOL(avm_pa_unregister_hardware_pa);

int avm_pa_unregister_hardware_pa_sync(struct avm_hardware_pa *pa_functions)
{
   DECLARE_COMPLETION_ONSTACK(done);
   int ret;
   struct avm_pa_global *ctx = &pa_glob;

   might_sleep();
   ret = avm_pa_unregister_hardware_pa(pa_functions, &done);

   /* Normally there is no way to block indefinitely, but mark killable
    * in case of a bug somewhere.
    */
   if (ret == 0 && wait_for_completion_killable(&done)) {
      pr_err("avm_pa: %s: interrupted prematurely\n", __func__);
      /* If we get here we must continue before completing.
       * The hardware_pa is not fully unregistered yet but there are additional
       * safe guards at registration to prevent double registration.
       */
      ctx->hw_pa_flush_completion = NULL;
   }

   return ret;
}
EXPORT_SYMBOL(avm_pa_unregister_hardware_pa_sync);


int avm_pa_is_hardware_pa_active(void)
{
   struct avm_pa_global *ctx = &pa_glob;

   return pa_hw_pa_valid(&ctx->hardware_pa) && !ctx->hw_ppa_disabled;
}
EXPORT_SYMBOL(avm_pa_is_hardware_pa_active);

/* ------------------------------------------------------------------------ */

static void pa_show_brief_status_header(pa_fprintf fprintffunc, void *arg)
{
   struct avm_pa_global *ctx = &pa_glob;
   const char *mode;

   if (ctx->disabled)
      mode = "disabled";
   else if (ctx->fw_disabled)
      mode = "testmode";
   else if (avm_pa_capture_running())
      mode = "capture";
   else
      mode = "enabled";
   (*fprintffunc)(arg, "State          : %s\n", mode);

   if (pa_hw_pa_valid(&ctx->hardware_pa)) {
      mode = ctx->hw_ppa_disabled ? "disabled" : "enable";
      (*fprintffunc)(arg, "HW State       : %s\n", mode);
   }
#ifdef CONFIG_BLOG
   else {
      mode = !blog_fc_enabled() ? "disabled (flow cache)" : "enable (flow cache)";
      (*fprintffunc)(arg, "HW State       : %s\n", mode);
   }
#endif

   if ((pa_hw_pa_valid(&ctx->hardware_pa) && (ctx->hardware_pa.flags & AVM_HW_F_NO_BSESSION)))
      mode = "no (by hw)";
   else if (!ctx->bsession_allowed)
      mode = "no";
   else
      mode = "yes";
   (*fprintffunc)(arg, "BSession allow : %s\n", mode);

   if (ctx->filter_enabled && list_empty(&ctx->accel_filter))
      mode = "empty";
   else if (ctx->filter_enabled)
      mode = "yes";
   else
      mode = "no";
   (*fprintffunc)(arg, "Filter active  : %s\n", mode);
}


static void pa_show_num_sessions(pa_fprintf fprintffunc, void *arg, int right_align)
{
   struct avm_pa_global *ctx = &pa_glob;

   (*fprintffunc)(arg, "BSessions      : %*u\n", right_align,
                       (unsigned)ctx->stats.nbsessions);
   (*fprintffunc)(arg, "Sessions       : %*hu\n", right_align,
                       ctx->sess_list[AVM_PA_LIST_ACTIVE].nsessions);
   (*fprintffunc)(arg, "Drop Sessions  : %*hu\n", right_align,
                       ctx->stats.drop_sess_ok - ctx->stats.drop_sess_del);
   /* There is a ref for every in_hw session plus one that's taken on registration */
   (*fprintffunc)(arg, "HW Sessions    : %*d\n", right_align,
                       kref_read(&ctx->hw_pa_ref) - (ctx->hardware_pa.add_session ? 1 : 0));
   (*fprintffunc)(arg, "Max Sessions   : %*hu\n", right_align,
                       ctx->sess_list[AVM_PA_LIST_ACTIVE].maxsessions);
   (*fprintffunc)(arg, "Sessions (dead): %*hu\n", right_align,
                       ctx->sess_list[AVM_PA_LIST_DEAD].nsessions);
   (*fprintffunc)(arg, "Sessions (free): %*hu\n", right_align,
                       ctx->sess_list[AVM_PA_LIST_FREE].nsessions);
}


static void pa_show_linux_banner(pa_fprintf fprintffunc, void *arg)
{
   struct new_utsname *uts;

   uts = utsname();
   BUG_ON(!uts);

   /* cp. fs/proc/version.c, v2.6.27..v4.16+: */
   (*fprintffunc)(arg, linux_proc_banner,
                  uts->sysname, uts->release, uts->version);
}


static void pa_show_brief(pa_fprintf fprintffunc, void *arg)

{
   struct avm_pa_global *ctx = &pa_glob;
   unsigned free_egress = 0;
   struct hlist_node *node;

   (*fprintffunc)(arg, "Version        :  " AVM_PA_VERSION " on ");
   pa_show_linux_banner(fprintffunc, arg);

   pa_show_brief_status_header(fprintffunc, arg);
   pa_show_num_sessions(fprintffunc, arg, 0);

   hlist_for_each(node, &ctx->egress_freelist)
      ++free_egress;

   (*fprintffunc)(arg, "Egress pool    : %u/%zd\n",
                       free_egress, ARRAY_SIZE(pa_data.egress_pool));
   (*fprintffunc)(arg, "Rx pkts/secs   : %lu\n",
                       (unsigned long)ctx->stats.rx_pps);
   (*fprintffunc)(arg, "Fw pkts/sec    : %lu\n",
                       (unsigned long)ctx->stats.fw_pps);
   (*fprintffunc)(arg, "Ov pkts/sec    : %lu\n",
                       (unsigned long)ctx->stats.overlimit_pps);
   (*fprintffunc)(arg, "Rx pakets      : %lu\n",
                       (unsigned long)ctx->stats.rx_pkts);
   (*fprintffunc)(arg, "Rx bypass      : %lu\n",
                       (unsigned long)ctx->stats.rx_bypass);
   (*fprintffunc)(arg, "Rx ttl <= 1    : %lu\n",
                       (unsigned long)ctx->stats.rx_ttl);
   (*fprintffunc)(arg, "Rx broadcast   : %lu\n",
                       (unsigned long)ctx->stats.rx_broadcast);
   (*fprintffunc)(arg, "Rx search      : %lu\n",
                       (unsigned long)ctx->stats.rx_search);
   (*fprintffunc)(arg, "Rx match       : %lu\n",
                       (unsigned long)ctx->stats.rx_match);
   (*fprintffunc)(arg, "Rx modified    : %lu\n",
                       (unsigned long)ctx->stats.rx_mod);
   (*fprintffunc)(arg, "Fw pakets      : %lu\n",
                       (unsigned long)ctx->stats.fw_pkts);
   (*fprintffunc)(arg, "Fw local       : %lu\n",
                       (unsigned long)ctx->stats.fw_local);
}

static void
pa_show_memory(pa_fprintf fprintffunc, void *arg)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_data   *pd  = &pa_data;

   pa_show_brief_status_header(fprintffunc, arg);

#define FMT "%4zd.%02zd KB"
#define ARG(x) (x)/1000, ((x)%1000)/10

   (*fprintffunc)(arg, "avm_pa_global  : " FMT "\n", ARG(sizeof(struct avm_pa_global)));
   (*fprintffunc)(arg, "avm_pa_data    : " FMT "\n", ARG(sizeof(struct avm_pa_data)));
   (*fprintffunc)(arg, "global + data  : " FMT "\n", ARG(sizeof(struct avm_pa_global) + sizeof(struct avm_pa_data)));
   (*fprintffunc)(arg, "One session    : " FMT "\n", ARG(sizeof(struct avm_pa_session)));
   (*fprintffunc)(arg, "All sessions   : " FMT "\n", ARG(sizeof(pd->sessions)));
   (*fprintffunc)(arg, "One bsession   : " FMT "\n", ARG(sizeof(struct avm_pa_bsession)));
   (*fprintffunc)(arg, "All bsessions  : " FMT "\n", ARG(sizeof(ctx->bsess_array)));
   (*fprintffunc)(arg, "One ingress    : " FMT "\n", ARG(sizeof(struct avm_pa_pkt_match)));
   (*fprintffunc)(arg, "One egress     : " FMT "\n", ARG(sizeof(struct avm_pa_egress)));
   (*fprintffunc)(arg, "Egress pool    : " FMT "\n", ARG(sizeof(pd->egress_pool)));
   (*fprintffunc)(arg, "One macaddr    : " FMT "\n", ARG(sizeof(struct avm_pa_macaddr)));
   (*fprintffunc)(arg, "All macaddrs   : " FMT "\n", ARG(sizeof(ctx->macaddr_array)));
   (*fprintffunc)(arg, "One pid        : " FMT "\n", ARG(sizeof(struct avm_pa_pid)));
   (*fprintffunc)(arg, "All pids       : " FMT "\n", ARG(sizeof(ctx->pid_array)));
   (*fprintffunc)(arg, "One vpid       : " FMT "\n", ARG(sizeof(struct avm_pa_vpid)));
   (*fprintffunc)(arg, "All vpids      : " FMT "\n", ARG(sizeof(ctx->vpid_array)));
   (*fprintffunc)(arg, "Stats          : " FMT "\n", ARG(sizeof(struct avm_pa_stats)));
   (*fprintffunc)(arg, "TOK Stats      : " FMT "\n", ARG(ctx->tok_end - ctx->tok_start));
   (*fprintffunc)(arg, "Estimator data : " FMT "\n", ARG(ctx->est_end - ctx->est_start));
}


static void
pa_show_stats(pa_fprintf fprintffunc, void *arg)
{
   struct avm_pa_global *ctx = &pa_glob;
   pa_show_num_sessions(fprintffunc, arg, 9);

#define PRINT_STAT(t, member) do {                                                       \
      (*fprintffunc)(arg, "%-15s: %9lu (%+7ld)\n", t,                                    \
         (unsigned long)ctx->stats. member,                                              \
         (long)(ctx->stats. member - ctx->stats_copy. member)) ;                         \
   } while(0)

   PRINT_STAT("Rx packets/sec",  rx_pps);
   PRINT_STAT("Fw packets/sec",  fw_pps);
   PRINT_STAT("Ov packets/sec",  overlimit_pps);
   PRINT_STAT("Rxfw packets",    rxfw_pkts);
   PRINT_STAT("Rxfw bypass",     rxfw_bypass);
   PRINT_STAT("Rx pakets",       rx_pkts);
   PRINT_STAT("Rx bypass",       rx_bypass);
   PRINT_STAT("Rx frag list",    rx_frag_list);
   PRINT_STAT("Rx ttl <= 1",     rx_ttl);
   PRINT_STAT("Rx broadcast",    rx_broadcast);
   PRINT_STAT("Rx search",       rx_search);
   PRINT_STAT("Rx match",        rx_match);
   PRINT_STAT("Rx lisp changed", rx_lispchanged);
   PRINT_STAT("Rx df",           rx_df);
   PRINT_STAT("Rx modified",     rx_mod);
   PRINT_STAT("Rx overlimit",    rx_overlimit);
   PRINT_STAT("Rx dropped",      rx_dropped);
   PRINT_STAT("Rx size",         rx_too_small);
   PRINT_STAT("Rx hroom",        rx_headroom_too_small);
   PRINT_STAT("Fw pakets",       fw_pkts);
   PRINT_STAT("Fw output",       fw_output);
   PRINT_STAT("Fw output drop",  fw_output_drop);
   PRINT_STAT("Fw local",        fw_local);
   PRINT_STAT("Fw rtp",          fw_rtp);
   PRINT_STAT("Fw rtp drop",     fw_rtp_drop);
   PRINT_STAT("Fw illegal",      fw_ill);
   PRINT_STAT("Fw frags",        fw_frags);
   PRINT_STAT("Fw drop",         fw_drop);
   PRINT_STAT("Fw drop gone",    fw_drop_gone);
   PRINT_STAT("Fw fail",         fw_fail);
   PRINT_STAT("Fw frag fail",    fw_frag_fail);
   PRINT_STAT("Tx local",        tx_local);
   PRINT_STAT("Tx loopback",     tx_loopback);
   PRINT_STAT("Tx already",      tx_already);
   PRINT_STAT("Tx bypass",       tx_bypass);
   PRINT_STAT("Tx sess error",   tx_sess_error);
   PRINT_STAT("Tx sess ok",      tx_sess_ok);
   PRINT_STAT("Tx sess exists",  tx_sess_exists);
   PRINT_STAT("Tx egress error", tx_egress_error);
   PRINT_STAT("Tx egress ok",    tx_egress_ok);
   PRINT_STAT("Tx pid change",   tx_pid_change);
   PRINT_STAT("Loc sess error",  local_sess_error);
   PRINT_STAT("Loc sess ok",     local_sess_ok);
   PRINT_STAT("Loc sess exists", local_sess_exists);
   PRINT_STAT("DROP sess ok",    drop_sess_ok);
   PRINT_STAT("XFRM sess ok",    xfrm_sess_ok);
   PRINT_STAT("RTP sess error",  rtp_sess_error);
   PRINT_STAT("RTP sess ok",     rtp_sess_ok);
   PRINT_STAT("RTP sess exists", rtp_sess_exists);
   PRINT_STAT("TBF schedule",    tbf_schedule);
   PRINT_STAT("TBF reschedule",  tbf_reschedule);
   {
      int i;
      for (i = 0; i < nr_cpu_ids; i++) {
         struct avm_pa_rxq *rxq = per_cpu_ptr(&pa_rxq, i);

         (*fprintffunc)(arg, "RXQ enqueue %2d : %9lu\n", i, rxq->rx_enqueued);
         (*fprintffunc)(arg, "RXQ sched   %2d : %9lu\n", i, rxq->rx_napi_sched);
#ifdef CONFIG_AVM_PA_RPS
         (*fprintffunc)(arg, "RXQ rps isr %2d : %9lu\n", i, rxq->rx_rps_isr);
#endif
         (*fprintffunc)(arg, "RXQ process %2d : %9lu\n", i, rxq->rx_process);
         (*fprintffunc)(arg, "RXQ dropped %2d : %9lu\n", i, rxq->rx_dropped);
      }
   }
   PRINT_STAT("sess flushed",    sess_flushed);
   PRINT_STAT("sess timedout",   sess_timedout);
   PRINT_STAT("sess pid change", sess_pidchanged);
   PRINT_STAT("rxch no rx slow", rx_channel_no_rx_slow);
   PRINT_STAT("rxch stopped",    rx_channel_stopped);
   PRINT_STAT("txch dropped",    tx_channel_dropped);

   ctx->stats_copy = ctx->stats;
};


static void pa_show_status(pa_fprintf fprintffunc, void *arg)
{
   struct avm_pa_global *ctx = &pa_glob;

   pa_show_brief_status_header(fprintffunc, arg);
   (*fprintffunc)(arg, "Current Rate   : %lu\n",
                       (unsigned long)ctx->stats.fw_pps);
}

static void pa_show_vpids(pa_fprintf fprintffunc, void *arg)

{
   struct avm_pa_global *ctx = &pa_glob;
   avm_vpid_handle n;

   for (n=1; n < CONFIG_AVM_PA_MAX_VPID; n++) {
      struct avm_pa_vpid *vpid = PA_VPID(ctx, n);
      if (vpid->vpid_handle == 0) continue;
      (*fprintffunc)(arg, "VPID%-2d: %4d/%4d  %s\n",
                          vpid->vpid_handle,
                          vpid->cfg.v4_mtu,
                          vpid->cfg.v6_mtu,
                          vpid->cfg.name);
   }
}

void avm_pa_dev_set_ipv4_mtu(struct avm_pa_dev_info *devinfo, u16 mtu)
{
   if (devinfo->vpid_handle) {
      struct avm_pa_global *ctx = &pa_glob;
      PA_VPID(ctx, devinfo->vpid_handle)->cfg.v4_mtu = mtu;
   }
}
EXPORT_SYMBOL(avm_pa_dev_set_ipv4_mtu);

void avm_pa_dev_set_ipv6_mtu(struct avm_pa_dev_info *devinfo, u16 mtu)
{
   if (devinfo->vpid_handle) {
      struct avm_pa_global *ctx = &pa_glob;
      PA_VPID(ctx, devinfo->vpid_handle)->cfg.v6_mtu = mtu;
   }
}
EXPORT_SYMBOL(avm_pa_dev_set_ipv6_mtu);

static void
pa_flush_sessions_selective(bool (*match_session)(struct avm_pa_session *sess, va_list args),
                            const char *reason,
                            ...)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_session *session;
   struct avm_pa_session_list *list = &ctx->sess_list[AVM_PA_LIST_ACTIVE];
   unsigned count = 0;
   va_list ap;

   /* There is a linker error on mips if the nested functions access stack variables
    * of the outer functions. So we pass them va variadic args. Change to static
    * functions if this also breaks down */
   va_start(ap, reason);

   /* We are potentially called from process context. Make sure this is called
    * rarely in softirq, try to use pa_session_flush() directly. */
   rcu_read_lock_bh();
   list_for_each_entry_rcu(session, &list->sessions, session_list) {
      if (match_session(session, ap)) {
         pa_session_flush(session, reason);
         count += 1;
      }
   }
   rcu_read_unlock_bh();

   va_end(ap);
   ctx->stats.sess_flushed += count;
}


void avm_pa_flush_sessions(void)
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      return true;
   }
   pa_flush_sessions_selective(fn, "flush");
}
EXPORT_SYMBOL(avm_pa_flush_sessions);

void avm_pa_flush_sessions_select(const char *selector, gfp_t gfp)
{
   LIST_HEAD(selector_list);
   int ret;

   bool fn(struct avm_pa_session *session, va_list args)
   {
      struct list_head *selector_list = va_arg(args, struct list_head *);

      return avm_pa_session_is_selected(selector_list, session);
   }

   ret = avm_pa_parse_selector(&selector_list, selector, gfp);
   if (WARN_ON(ret != 0))
      return;

   pa_flush_sessions_selective(fn, "select", &selector_list);

   avm_pa_selector_free(&selector_list);
}
EXPORT_SYMBOL(avm_pa_flush_sessions_select);

static void avm_pa_flush_bsessions(void)
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      return session->bsession != NULL;
   }
   pa_flush_sessions_selective(fn, "bsession flush");
}

static void avm_pa_flush_hw_sessions(void)
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      return test_bit(PA_S_IN_HW, &session->flags);
   }
   pa_flush_sessions_selective(fn, "hw flush");
}


void avm_pa_flush_lispencap_sessions(void)
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      return session->mod.pkttype & AVM_PA_PKTTYPE_LISP;
   }
   pa_flush_sessions_selective(fn, "lispencap flush");
}
EXPORT_SYMBOL(avm_pa_flush_lispencap_sessions);


void avm_pa_flush_rtp_session(struct sock *sk)
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      struct avm_pa_egress *egress = avm_pa_first_egress(session);
      return egress->type == avm_pa_egresstype_rtp
          && egress->rtp.sk == va_arg(args, struct sock *);
   }
   pa_flush_sessions_selective(fn, "rtp flush", sk);
}
EXPORT_SYMBOL(avm_pa_flush_rtp_session);


void avm_pa_flush_multicast_sessions(void)
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      return session->ingress.casttype == AVM_PA_IS_MULTICAST;
   }
   pa_flush_sessions_selective(fn, "multicast flush");
}
EXPORT_SYMBOL(avm_pa_flush_multicast_sessions);


void avm_pa_flush_multicast_sessions_for_group(u32 group)
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      if (session->ingress.casttype == AVM_PA_IS_MULTICAST) {
         int i;
         for (i = 0; i < session->ingress.nmatch; i++) {
            struct avm_pa_match_info *p = &session->ingress.match[i];
            if (p->type == AVM_PA_IPV4) {
               hdrunion_t *hdr = (hdrunion_t *)&session->ingress.hdrcopy[p->offset + session->ingress.hdroff];
               if (va_arg(args, u32) == hdr->iph.daddr)
                  return true;
            }
         }
      }
      return false;
   }
   pa_flush_sessions_selective(fn, "multicast flush", group);
}
EXPORT_SYMBOL(avm_pa_flush_multicast_sessions_for_group);


void avm_pa_flush_sessions_for_vpid(avm_vpid_handle vpid_handle)
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      avm_vpid_handle vpid_handle = (avm_vpid_handle) va_arg(args, int); 
      if (session->ingress_vpid_handle == vpid_handle) {
         return true;
      } else {
         struct avm_pa_egress *egress;
         avm_pa_for_each_egress(egress, session) {
            if (egress->vpid_handle == vpid_handle)
               return true;
         }
         return false;
      }
   }
   pa_flush_sessions_selective(fn, "vpid flush", (int) vpid_handle);
}
EXPORT_SYMBOL(avm_pa_flush_sessions_for_vpid);


void avm_pa_flush_sessions_for_pid(avm_pid_handle pid_handle)
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      avm_vpid_handle pid_handle = (avm_pid_handle) va_arg(args, int);
      if (session->ingress_pid_handle == pid_handle) {
         return true;
      } else {
         struct avm_pa_egress *egress;
         avm_pa_for_each_egress(egress, session) {
            if (egress->pid_handle == pid_handle)
               return true;
         }
         return false;
      }
   }
   pa_flush_sessions_selective(fn, "pid flush", (int) pid_handle);
}
EXPORT_SYMBOL(avm_pa_flush_sessions_for_pid);

void avm_pa_flush_sessions_for_sg(unsigned short groupid)
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      unsigned short groupid = (unsigned short) va_arg(args, int);
      return avm_pa_session_belongs_to_sg(session, groupid) != 0;
   }
   pa_flush_sessions_selective(fn, "group flush", (int) groupid);
}
EXPORT_SYMBOL(avm_pa_flush_sessions_for_sg);

static void
avm_pa_flush_sessions_with_destmac(struct avm_pa_macaddr *destmac)
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      struct avm_pa_egress *egress;
      avm_pa_for_each_egress(egress, session) {
         if (egress->destmac == va_arg(args, struct avm_pa_macaddr *)) {
            return true;
         }
      }
      return false;
   }
   pa_flush_sessions_selective(fn, "destmac", destmac);
}

void
avm_pa_flush_sessions_for_mac(const unsigned char mac[ETH_ALEN])
{
   bool fn(struct avm_pa_session *session, va_list args)
   {
      struct avm_pa_egress *egress;
      struct ethhdr *ethh;
      u8 *mac = (u8 *) va_arg(args, u8 *);
      struct avm_pa_pkt_match  *match = &session->ingress;
      struct avm_pa_match_info *info = &match->match[0];

      if (info->type == AVM_PA_ETH) {
         /* We want to flush all sessions that belong to the mac address.
          * That means we check both source and dest of the ingress.
          *
          * We also have to check each egress:
          * 1) In typical NAT scenarios both source and dest change
          * 2) In typical MAT scenarios the source changes
          */
         ethh = (struct ethhdr *) (HDRCOPY(match) + info->offset);
         if (ether_addr_equal(mac, ethh->h_source))
            return true;
         if (ether_addr_equal(mac, ethh->h_dest))
            return true;
      }

      if (session->bsession) /* egress doesn't change for bsessions */
         return false;

      avm_pa_for_each_egress(egress, session) {
         match = &egress->match;
         info = &match->match[0];

         if (egress->destmac && info->type == AVM_PA_ETH) {
            ethh = (struct ethhdr *) (HDRCOPY(match) + info->offset);
            if (ether_addr_equal(mac, ethh->h_source))
               return true;
            if (ether_addr_equal(mac, ethh->h_dest))
               return true;
         }
      }
      return false;
   }
   /* enforce alignment required by ether_addr_equal() */
   u8 mac_tmp[ETH_ALEN] __aligned(2);
   memcpy(mac_tmp, mac, ETH_ALEN);

   pa_flush_sessions_selective(fn, "mac flush", &mac_tmp);
}
EXPORT_SYMBOL(avm_pa_flush_sessions_for_mac);

void avm_pa_telefon_state(int state)
{
   pr_info("avm_pa: avm_pa_telefon_state\n");
}
EXPORT_SYMBOL(avm_pa_telefon_state);

/* ------------------------------------------------------------------------ */
/* ------- packet rate estimater ------------------------------------------ */
/* ------------------------------------------------------------------------ */

#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
static void avm_pa_est_timer(unsigned long data)
#else
static void avm_pa_est_timer(struct timer_list *timer)
#endif
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_est *e;
   u32 npackets;
   u32 rate;

   /* fw pkts/s */
   e = &ctx->fw_est;
   npackets = ctx->stats.fw_pkts;
   if (npackets >= e->last_packets) {
      rate = (npackets - e->last_packets)<<(12 - ctx->est_idx);
      e->last_packets = npackets;
      e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
      ctx->stats.fw_pps = (e->avpps+0x1FF)>>10;
   } else {
      e->last_packets = npackets;
   }

   /* rx pkts/s */
   e = &ctx->rx_est;
   npackets = ctx->stats.rx_pkts;
   if (npackets >= e->last_packets) {
      rate = (npackets - e->last_packets)<<(12 - ctx->est_idx);
      e->last_packets = npackets;
      e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
      ctx->stats.rx_pps = (e->avpps+0x1FF)>>10;
   } else {
      e->last_packets = npackets;
   }

   /* queued pkts/s */
   e = &ctx->overlimit_est;
   npackets = ctx->stats.rx_overlimit;
   if (npackets >= e->last_packets) {
      rate = (npackets - e->last_packets)<<(12 - ctx->est_idx);
      e->last_packets = npackets;
      e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
      ctx->stats.overlimit_pps = (e->avpps+0x1FF)>>10;
   } else {
      e->last_packets = npackets;
   }

   mod_timer(&ctx->est_timer, jiffies + ((HZ/4) << ctx->est_idx));
}

static void avm_pa_setup_est(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_est *e;
   del_timer(&ctx->est_timer);
   e = &ctx->fw_est;
   e->ewma_log = ctx->ewma_log;
   e->last_packets = ctx->stats.fw_pkts;
   e = &ctx->rx_est;
   e->ewma_log = ctx->ewma_log;
   e->last_packets = ctx->stats.rx_pkts;
   e = &ctx->overlimit_est;
   e->ewma_log = ctx->ewma_log;
   e->last_packets = ctx->stats.rx_overlimit;

   mod_timer(&ctx->est_timer, jiffies + ((HZ/4) << ctx->est_idx));
}

static void avm_pa_unsetup_est(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   del_timer(&ctx->est_timer);
}

/* ------------------------------------------------------------------------ */
/* -------- value log ----------------------------------------------------- */
/* ------------------------------------------------------------------------ */


/* ------------------------------------------------------------------------ */


static void avm_pa_enable(void)
{
   avm_pa_setup_est();
}

static void avm_pa_disable(void)
{
   avm_pa_unsetup_est();
}

#ifdef CONFIG_PROC_FS
/* ------------------------------------------------------------------------ */
/* -------- procfs functions ---------------------------------------------- */
/* ------------------------------------------------------------------------ */

static int brief_show(struct seq_file *m, void *v)
{
   pa_show_brief((pa_fprintf *)seq_printf, m);
   return 0;
}

static int brief_show_open(struct inode *inode, struct file *file)
{
   return single_open(file, brief_show, PDE_DATA(inode));
}

static const struct proc_ops brief_show_ops = {
   .proc_open    = brief_show_open,
   .proc_read    = seq_read,
   .proc_lseek   = seq_lseek,
   .proc_release = single_release, /* brief_show_open() uses single_open() */
};

/* ------------------------------------------------------------------------ */

static int memory_show(struct seq_file *m, void *v)
{
   pa_show_memory((pa_fprintf *)seq_printf, m);
   return 0;
}

static int memory_show_open(struct inode *inode, struct file *file)
{
   return single_open(file, memory_show, PDE_DATA(inode));
}

static const struct proc_ops memory_show_ops = {
   .proc_open    = memory_show_open,
   .proc_read    = seq_read,
   .proc_lseek   = seq_lseek,
   .proc_release = single_release, /* memory_show_open() uses single_open() */
};

/* ------------------------------------------------------------------------ */

static int status_show(struct seq_file *m, void *v)
{
   pa_show_status((pa_fprintf *)seq_printf, m);
   return 0;
}

static int status_show_open(struct inode *inode, struct file *file)
{
   return single_open(file, status_show, PDE_DATA(inode));
}

static const struct proc_ops status_show_ops = {
   .proc_open    = status_show_open,
   .proc_read    = seq_read,
   .proc_lseek   = seq_lseek,
   .proc_release = single_release, /* status_show_open() uses single_open() */
};

/* ------------------------------------------------------------------------ */

static int stats_show(struct seq_file *m, void *v)
{
   pa_show_stats((pa_fprintf *)seq_printf, m);
   return 0;
}

static int stats_show_open(struct inode *inode, struct file *file)
{
   return single_open(file, stats_show, PDE_DATA(inode));
}

static const struct proc_ops stats_show_ops = {
   .proc_open    = stats_show_open,
   .proc_read    = seq_read,
   .proc_lseek   = seq_lseek,
   .proc_release = single_release, /* stats_show_open() uses single_open() */
};

/* ------------------------------------------------------------------------ */

static int pids_show(struct seq_file *m, void *v)
{
   pa_show_pids((pa_fprintf *)seq_printf, m);
   return 0;
}

static int pids_show_open(struct inode *inode, struct file *file)
{
   return single_open(file, pids_show, PDE_DATA(inode));
}

static const struct proc_ops pids_show_ops = {
   .proc_open    = pids_show_open,
   .proc_read    = seq_read,
   .proc_lseek   = seq_lseek,
   .proc_release = single_release, /* pids_show_open() uses single_open() */
};

/* ------------------------------------------------------------------------ */

static int vpids_show(struct seq_file *m, void *v)
{
   pa_show_vpids((pa_fprintf *)seq_printf, m);
   return 0;
}

static int vpids_show_open(struct inode *inode, struct file *file)
{
   return single_open(file, vpids_show, PDE_DATA(inode));
}

static const struct proc_ops vpids_show_ops = {
   .proc_open    = vpids_show_open,
   .proc_read    = seq_read,
   .proc_lseek   = seq_lseek,
   .proc_release = single_release, /* vpids_show_open() uses single_open() */
};

/* ------------------------------------------------------------------------ */

struct handle_iter {
   unsigned short handle;
};


static inline unsigned short
next_session(struct avm_pa_global *ctx, unsigned short handle)
{
   while (++handle < CONFIG_AVM_PA_MAX_SESSION) {
      struct avm_pa_session *sess = pa_session_get(handle);
      if (sess && avm_pa_session_is_selected(&ctx->show_filter, sess))
         return handle;
   }
   return 0;
}

static void *sess_show_seq_start(struct seq_file *seq, loff_t *pos)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_data   *pd  = &pa_data;
   struct handle_iter   *it  = seq->private;
   loff_t i;

   if ((it->handle = next_session(ctx, 0)) == 0)
      return 0;
   for (i = 0; i < *pos; i++) {
      if ((it->handle = next_session(ctx, it->handle)) == 0)
         return 0;
   }
   return PA_SESSION(pd, it->handle);
}

static void *sess_show_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct avm_pa_data   *pd  = &pa_data;
   struct handle_iter   *it  = seq->private;

   ++*pos;
   if ((it->handle = next_session(ctx, it->handle)) == 0)
      return 0;
   return PA_SESSION(pd, it->handle);
}

static void sess_show_seq_stop(struct seq_file *seq, void *v)
{
}

static int sess_show_seq_show(struct seq_file *seq, void *v)
{
   struct avm_pa_data   *pd  = &pa_data;
   const struct handle_iter *it = seq->private;
   seq_printf(seq, "\n");
   pa_show_session(PA_SESSION(pd, it->handle),
                   (pa_fprintf *)seq_printf, seq);
   return 0;
}

static struct seq_operations sess_show_seq_ops = {
   .start = sess_show_seq_start,
   .next  = sess_show_seq_next,
   .stop  = sess_show_seq_stop,
   .show  = sess_show_seq_show,
};

static int sess_show_open(struct inode *inode, struct file *file)
{
   return seq_open_private(file, &sess_show_seq_ops, sizeof(struct handle_iter));
}

static const struct proc_ops sess_show_ops = {
    .proc_open    = sess_show_open,
    .proc_read    = seq_read,
    .proc_lseek   = seq_lseek,
    .proc_release = seq_release_private, /* sess_show_open() uses seq_open_private() */
};

/* ------------------------------------------------------------------------ */

static inline unsigned short
next_bsession(struct avm_pa_global *ctx, unsigned short handle)
{
   while (++handle < CONFIG_AVM_PA_MAX_SESSION) {
      struct avm_pa_session *session;
      if ((session = pa_session_get(handle)) != 0 && session->bsession)
         return handle;
   }
   return 0;
}

static void *bsess_show_seq_start(struct seq_file *seq, loff_t *pos)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct handle_iter *it = seq->private;
   loff_t i;

   if ((it->handle = next_bsession(ctx, 0)) == 0)
      return 0;
   for (i = 0; i < *pos; i++) {
      if ((it->handle = next_bsession(ctx, it->handle)) == 0)
         return 0;
   }
   return PA_BSESSION(ctx, it->handle);
}

static void *bsess_show_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct handle_iter *it = seq->private;
   ++*pos;
   if ((it->handle = next_bsession(ctx, it->handle)) == 0)
    return 0;
   return PA_BSESSION(ctx, it->handle);
}

static void bsess_show_seq_stop(struct seq_file *seq, void *v)
{
}

static int bsess_show_seq_show(struct seq_file *seq, void *v)
{
   struct avm_pa_global *ctx = &pa_glob;
   const struct handle_iter *it = seq->private;
   seq_printf(seq, "\n");
   pa_show_bsession(PA_BSESSION(ctx, it->handle),
                   (pa_fprintf *)seq_printf, seq);
   return 0;
}

static struct seq_operations bsess_show_seq_ops = {
   .start = bsess_show_seq_start,
   .next  = bsess_show_seq_next,
   .stop  = bsess_show_seq_stop,
   .show  = bsess_show_seq_show,
};

static int bsess_show_open(struct inode *inode, struct file *file)
{
   return seq_open_private(file, &bsess_show_seq_ops, sizeof(struct handle_iter));
}


static const struct proc_ops bsess_show_ops = {
    .proc_open    = bsess_show_open,
    .proc_read    = seq_read,
    .proc_lseek   = seq_lseek,
    .proc_release = seq_release_private, /* bsess_show_open() uses seq_open_private() */
};

/* ------------------------------------------------------------------------ */

static inline int
next_macaddrhash(struct avm_pa_global *ctx, int idx)
{
   while (++idx < ARRAY_SIZE(ctx->macaddr_hashtab)) {
      if (!hlist_empty(&ctx->macaddr_hashtab[idx]))
         return idx;
   }
   return 0;
}

static void *macaddr_show_seq_start(struct seq_file *seq, loff_t *pos)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct handle_iter *it = seq->private;
   loff_t i;

   if ((it->handle = next_macaddrhash(ctx, -1)) == 0)
      return 0;
   for (i = 0; i < *pos; i++) {
      if ((it->handle = next_macaddrhash(ctx, it->handle)) == 0)
         return 0;
   }
   return &ctx->macaddr_hashtab[it->handle];
}

static void *macaddr_show_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct handle_iter *it = seq->private;
   ++*pos;
   if ((it->handle = next_macaddrhash(ctx, it->handle)) == 0)
      return 0;
   return &ctx->macaddr_hashtab[it->handle];
}

static void macaddr_show_seq_stop(struct seq_file *seq, void *v)
{
}

static int macaddr_show_seq_show(struct seq_file *seq, void *v)
{
   struct avm_pa_global *ctx = &pa_glob;
   const struct handle_iter *it = seq->private;
   struct avm_pa_macaddr *p;
   char buf[128];

   rcu_read_lock_bh();
   seq_printf(seq, "%04x:", it->handle);
   hlist_for_each_entry_rcu_bh(p, &ctx->macaddr_hashtab[it->handle], macaddr_list) {
      (void) pa_macaddr2str(p, buf, sizeof(buf));
      seq_printf(seq, "\t%s\n", buf);
   }
   rcu_read_unlock_bh();
   return 0;
}

static struct seq_operations macaddr_show_seq_ops = {
   .start = macaddr_show_seq_start,
   .next  = macaddr_show_seq_next,
   .stop  = macaddr_show_seq_stop,
   .show  = macaddr_show_seq_show,
};

static int macaddr_show_open(struct inode *inode, struct file *file)
{
   return seq_open_private(file, &macaddr_show_seq_ops, sizeof(struct handle_iter));
}

static const struct proc_ops macaddr_show_ops = {
   .proc_open    = macaddr_show_open,
   .proc_read    = seq_read,
   .proc_lseek   = seq_lseek,
   .proc_release = seq_release_private, /* macaddr_show_open() uses seq_open_private() */
};

/* ------------------------------------------------------------------------ */

static inline unsigned short
next_pid(struct avm_pa_global *ctx, unsigned short handle)
{
   while (++handle < CONFIG_AVM_PA_MAX_PID) {
      if (PA_PID(ctx, handle)->pid_handle)
         return handle;
   }
   return 0;
}

static void *pid_show_seq_start(struct seq_file *seq, loff_t *pos)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct handle_iter *it = seq->private;
   loff_t i;

   if ((it->handle = next_pid(ctx, 0)) == 0)
      return 0;
   for (i = 0; i < *pos; i++) {
      if ((it->handle = next_pid(ctx, it->handle)) == 0)
         return 0;
   }
   return PA_PID(ctx, it->handle);
}

static void *pid_show_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
   struct avm_pa_global *ctx = &pa_glob;
   struct handle_iter *it = seq->private;
   ++*pos;
   if ((it->handle = next_pid(ctx, it->handle)) == 0)
      return 0;
   return PA_PID(ctx, it->handle);
}

static void pid_show_seq_stop(struct seq_file *seq, void *v)
{
}

static int hash_show_seq_show(struct seq_file *seq, void *v)
{
   struct avm_pa_global *ctx = &pa_glob;
   const struct handle_iter *it = seq->private;
   struct avm_pa_pid *pid = PA_PID(ctx, it->handle);
   struct avm_pa_session *p;
   int i;
   seq_printf(seq, "PID%-3d: %s\n",
         it->handle, PA_PID(ctx, it->handle)->cfg.name);

   rcu_read_lock_bh();
   for (i = 0; i < ARRAY_SIZE(pid->hash_sess); i++) {
      if (!hlist_empty(&pid->hash_sess[i])) {
         seq_printf(seq, "%3d: ", i);
         hlist_for_each_entry_rcu_bh(p, &pid->hash_sess[i], hash_list)
            seq_printf(seq, " %3d", p->session_handle);
         seq_printf(seq, "\n");
      }
   }
   rcu_read_unlock_bh();
   return 0;
}

static struct seq_operations hash_show_seq_ops = {
   .start = pid_show_seq_start,
   .next  = pid_show_seq_next,
   .stop  = pid_show_seq_stop,
   .show  = hash_show_seq_show,
};

static int hash_show_open(struct inode *inode, struct file *file)
{
   return seq_open_private(file, &hash_show_seq_ops, sizeof(struct handle_iter));
}

static const struct proc_ops hash_show_ops = {
   .proc_open    = hash_show_open,
   .proc_read    = seq_read,
   .proc_lseek   = seq_lseek,
   .proc_release = seq_release_private, /* hash_show_open() uses seq_open_private() */
};

/* ------------------------------------------------------------------------ */

static int prioack_show(struct seq_file *seq, void *v)
{
   struct avm_pa_global *ctx = &pa_glob;
   int i, j;

   seq_printf(seq, "Packet Threshold      : %u\n", ctx->prioack_thresh_packets);
   seq_printf(seq, "Ratio                 : %u\n", ctx->prioack_ratio);

   for (i = 1; i < CONFIG_AVM_PA_MAX_PID; ++i) {
      struct avm_pa_pid *pid = PA_PID(ctx, i);
      if (avm_pa_pid_tack_enabled(pid)) {
         seq_printf(seq, "PID%d: Detected ACKs   : %u\n", pid->pid_handle, pid->prioack_acks);
         seq_printf(seq, "PID%d: Accelerated ACK : %u\n", pid->pid_handle, pid->prioack_accl_acks);
         for (j = 0; j < AVM_PA_MAX_PRIOS; ++j) {
            seq_printf(seq, "PID%d: TACK Priority[%d]: %x\n", pid->pid_handle, j, pid->prio_maps[AVM_PA_PRIO_MAP_TACK].prios[j]);
         }
      }
      if (avm_pa_pid_tget_enabled(pid)) {
         for (j = 0; j < AVM_PA_MAX_PRIOS; ++j) {
            seq_printf(seq, "PID%d: TGET Priority[%d]: %x\n", pid->pid_handle, j, pid->prio_maps[AVM_PA_PRIO_MAP_TGET].prios[j]);
         }
      }
   }

   return 0;
}

static int prioack_show_open(struct inode *inode, struct file *file)
{
   return single_open(file, prioack_show, PDE_DATA(inode));
}

static const struct proc_ops prioack_show_ops = {
   .proc_open    = prioack_show_open,
   .proc_read    = seq_read,
   .proc_lseek   = seq_lseek,
   .proc_release = single_release, /* prioack_show_open() uses single_open() */
};

static int priomaps_show(struct seq_file *seq, void *v)
{
   struct avm_pa_global *ctx = &pa_glob;
   int i, j, k;

   for (i = 1; i < CONFIG_AVM_PA_MAX_PID; ++i) {
      struct avm_pa_pid *pid = PA_PID(ctx, i);
      if (pid->pid_handle == 0) {
         continue;
      }
      seq_printf(seq, "PID %d Prio Maps\n", pid->pid_handle);
      for (j = 0; j < AVM_PA_COUNT_PRIO_MAPS; ++j) {
         if (!pid->prio_maps[j].enabled) {
            continue;
         }
         seq_printf(seq, "Prio Map[%d]\n", j);
         for (k = 0; k < AVM_PA_MAX_PRIOS; ++k) {
            seq_printf(seq, "Queue[%d]: %x\n", k, pid->prio_maps[j].prios[k]);
         }
      }
   }

   return 0;
}

static int priomaps_show_open(struct inode *inode, struct file *file)
{
   return single_open(file, priomaps_show, PDE_DATA(inode));
}

static const struct proc_ops priomaps_show_ops = {
   .proc_open    = priomaps_show_open,
   .proc_read    = seq_read,
   .proc_lseek   = seq_lseek,
   .proc_release = single_release, /* priomaps_show_open() uses single_open() */
};

/* ------------------------------------------------------------------------ */

static avm_pid_handle pa_find_pid_by_name(const char *pidname)
{
   struct avm_pa_global *ctx = &pa_glob;
   avm_pid_handle pid_handle;
   for (pid_handle = 1; pid_handle < CONFIG_AVM_PA_MAX_PID; pid_handle++) {
      struct avm_pa_pid *pid = PA_PID(ctx, pid_handle);
      if (   pid->pid_handle == pid_handle
          && strcmp(pid->cfg.name, pidname) == 0) {
         return pid_handle;
      }
   }
   return 0;
}


/* Normal strsep returns empty strings for duplicated delimtiers */
static char *strsep_nonempty(char **stringp, const char *delim)
{
   char *p = strsep(stringp, delim);
   while (p && *p == 0)
      p = strsep(stringp, delim);
   return p;
}

static ssize_t avm_pa_write_cmds(struct file *file,
                                 const char __user *buffer,
                                 size_t count, loff_t *offset)
{
   struct avm_pa_global *ctx = &pa_glob;
   char    pp_cmd[101];
   char*   argv[10];
   int     argc;
   char*   ptr_next_tok;
   char*   ptr_next_line;
   avm_pid_handle pid_handle;

   /* Validate the length of data passed. */
   if (count >= sizeof(pp_cmd))
      return -E2BIG;

   /* Initialize the buffer before using it. */
   memset ((void *)&pp_cmd[0], 0, sizeof(pp_cmd));

   /* Copy from user space. */
   if (copy_from_user (&pp_cmd, buffer, count))
      return -EFAULT;

   ptr_next_line = pp_cmd;
   /* one command (with arguments) per line */
   while ((ptr_next_tok = strsep_nonempty(&ptr_next_line, "\n"))) {
      /* exract arguments */
      for (argc = 0; argc < ARRAY_SIZE(argv); argc++)
         argv[argc] = strsep_nonempty(&ptr_next_tok, " \t");

      if (ptr_next_tok)
         return -E2BIG;

      /* enable | disable | testmode */
      if (strcmp(argv[0], "enable") == 0) {
         ctx->fw_disabled = 0;
         ctx->disabled = 0;
         avm_pa_enable();
         pr_debug("avm_pa: enabled\n");
      } else if (strcmp(argv[0], "disable") == 0) {
         ctx->disabled = 1;
         ctx->fw_disabled = 1;
         avm_pa_disable();
         avm_pa_flush_sessions();
         pr_debug("avm_pa: disabled\n");
      } else if (strcmp(argv[0], "testmode") == 0) {
         ctx->fw_disabled = 1;
         ctx->disabled = 0;
         avm_pa_disable();
         pr_debug("avm_pa: testmode\n");

      /* hw_enable | hw_disable */
      } else if (strcmp(argv[0], "hw_enable") == 0) {
         ctx->hw_ppa_disabled = 0;
         pr_debug("avm_pa: hw enabled\n");
      } else if (strcmp(argv[0], "hw_disable") == 0) {
         ctx->hw_ppa_disabled = 1;
         avm_pa_flush_hw_sessions();
         pr_debug("avm_pa: hw disabled\n");
      } else if (strcmp(argv[0], "filter") == 0) {
         int old = ctx->filter_enabled;
         if (argc > 1)
            ctx->filter_enabled = argc > 1 ? *argv[1] != '0' : 1;
         else
            ctx->filter_enabled = 1;
         if (ctx->filter_enabled && !old && !list_empty(&ctx->accel_filter))
            avm_pa_flush_sessions();
      } else if (strcmp(argv[0], "nofilter") == 0) {
         ctx->filter_enabled = 0;
      /* flush */
      } else if (strcmp(argv[0], "flush") == 0) {
         if (argv[1]) {
            int i = 1;
            char *s;
            char buf[64] = "";

            /* Rebuild original, space separated selector */
            while ((s = argv[i++])) {
               strlcat(buf, " ", sizeof(buf));
               strlcat(buf, s, sizeof(buf));
            }

            avm_pa_flush_sessions_select(buf+1, GFP_KERNEL);
            pr_debug("avm_pa: flush \"%s\"\n", buf+1);
         } else {
            avm_pa_flush_sessions();
            pr_debug("avm_pa: flush\n");
         }
      /* rps (enable|disable) */
      } else if (!strcmp(argv[0], "rps")) {
            if (!argv[1])
               return -EINVAL;

#ifdef CONFIG_AVM_PA_RPS
            if (!strncmp(argv[1], "en", 2)) /* short for enable */
               ctx->rps_enabled = 1;
            else if (!strncmp(argv[1], "di", 2)) /* short for disable */
               ctx->rps_enabled = 0;
            else
               return -EINVAL;
            pr_info("avm_pa: rps %s\n", ctx->rps_enabled ? "enabled":"disabled");
#else
            return -ENOSYS;
#endif
      /* rxq (enable|disable) */
      } else if (!strcmp(argv[0], "rxq")) {
         if (!argv[1])
            return -EINVAL;

         if (!strncmp(argv[1], "en", 2)) /* short for enable */
            ctx->rxq_enabled = 1;
         else if (!strncmp(argv[1], "di", 2)) /* short for disable */
            ctx->rxq_enabled = 0;
         else
            return -EINVAL;
      /* nodbg */
      } else if (strcmp(argv[0], "nodbg") == 0) {
         ctx->dbgcapture = 0;
         ctx->dbgsession = 0;
         ctx->dbgnosession = 0;
         ctx->dbgtrace = 0;
         ctx->dbgmatch = 0;
         ctx->dbgprioack = 0;
         ctx->dbgprioacktrace = 0;
         ctx->dbgstats = 0;
         pr_debug("avm_pa: all debugs off\n");

      /* dbgcapture | nodbgcapture */
      } else if (strcmp(argv[0], "dbgcapture") == 0) {
         ctx->dbgcapture = 1;
         pr_debug("avm_pa: %s\n", argv[0]);
      } else if (strcmp(argv[0], "nodbgcapture") == 0) {
         ctx->dbgcapture = 0;
         pr_debug("avm_pa: %s\n", argv[0]);

      /* dbgsession | nodbgsession */
      } else if (strcmp(argv[0], "dbgsession") == 0) {
         ctx->dbgsession = 1;
         pr_debug("avm_pa: %s\n", argv[0]);
      } else if (strcmp(argv[0], "nodbgsession") == 0) {
         ctx->dbgsession = 0;
         pr_debug("avm_pa: %s\n", argv[0]);

      /* dbgnosession | nodbgnosession */
      } else if (strcmp(argv[0], "dbgnosession") == 0) {
         ctx->dbgnosession = 1;
         pr_debug("avm_pa: %s\n", argv[0]);
      } else if (strcmp(argv[0], "nodbgnosession") == 0) {
         ctx->dbgnosession = 0;
         pr_debug("avm_pa: %s\n", argv[0]);

      /* trace | notrace */
      } else if (strcmp(argv[0], "trace") == 0) {
#if AVM_PA_TRACE
         ctx->dbgtrace = 1;
         pr_debug("avm_pa: %s\n", argv[0]);
#else
         pr_err("avm_pa: trace not compiled in\n");
#endif
      } else if (strcmp(argv[0], "notrace") == 0) {
         ctx->dbgtrace = 0;
         pr_debug("avm_pa: %s\n", argv[0]);

      /* dbgmatch | nodbgmatch */
      } else if (strcmp(argv[0], "nodbgmatch") == 0) {
         ctx->dbgmatch = 0;
         pr_debug("avm_pa: %s\n", argv[0]);
      } else if (strcmp(argv[0], "dbgmatch") == 0) {
         ctx->dbgmatch = 1;
         pr_debug("avm_pa: %s\n", argv[0]);

      /* dbgprioack | nodbgprioack */
      } else if (strcmp(argv[0], "dbgprioack") == 0) {
         ctx->dbgprioack = 1;
         pr_debug("avm_pa: %s\n", argv[0]);
      } else if (strcmp(argv[0], "nodbgprioack") == 0) {
         ctx->dbgprioack = 0;
         pr_debug("avm_pa: %s\n", argv[0]);

      /* dbgprioacktrace | nodbgprioacktrace */
      } else if (strcmp(argv[0], "dbgprioacktrace") == 0) {
         ctx->dbgprioacktrace = 1;
         pr_debug("avm_pa: %s\n", argv[0]);
      } else if (strcmp(argv[0], "nodbgprioacktrace") == 0) {
         ctx->dbgprioacktrace = 0;
         pr_debug("avm_pa: %s\n", argv[0]);

      /* dbgstats | nodbgstats */
      } else if (strcmp(argv[0], "dbgstats") == 0) {
         ctx->dbgstats = 1;
         pr_debug("avm_pa: %s\n", argv[0]);
      } else if (strcmp(argv[0], "nodbgstats") == 0) {
         ctx->dbgstats = 0;
         pr_debug("avm_pa: %s\n", argv[0]);
      } else if (strstr(argv[0], "bsessions")) {
         ctx->bsession_allowed = strcmp(argv[0], "nobsessions") != 0;
         if (!ctx->bsession_allowed)
            avm_pa_flush_bsessions();
         pr_debug("avm_pa: %s: bsessions_allowed = %d\n", argv[0], ctx->bsession_allowed);
      /* pid <device> */
      } else if (strcmp(argv[0], "pid") == 0 && argv[1]) {
         struct net_device *dev = dev_get_by_name(&init_net, argv[1]);

         if (dev) {
            if (avm_pa_dev_register(dev) < 0)
               pr_err("%s: failed to register PA PID\n", argv[1]);
            dev_put(dev);
         } else {
            pr_err("avm_pa_write_cmds(pid): dev %s not found\n", argv[1]);
         }

      /* vpid <device> */
      } else if (strcmp(argv[0], "vpid") == 0 && argv[1]) {
         struct net_device *dev = dev_get_by_name(&init_net, argv[1]);

         if (dev) {
            struct avm_pa_vpid_cfg cfg;
            snprintf(cfg.name, sizeof(cfg.name), "%s", argv[1]);
            cfg.v4_mtu = 1500;
            cfg.v6_mtu = 1500;
            if (avm_pa_dev_vpid_register(AVM_PA_DEVINFO(dev), &cfg) < 0)
               pr_err("%s: failed to register PA VPID\n", argv[1]);
            dev_put(dev);
         } else {
            pr_err("avm_pa_write_cmds(vpid): dev %s not found\n", argv[1]);
         }

      /* unreg <device> */
      } else if (strcmp(argv[0], "unreg") == 0 && argv[1]) {
         int ret;
         struct net_device *dev = dev_get_by_name(&init_net, argv[1]);
         DECLARE_COMPLETION_ONSTACK(done);

         if (dev) {
            avm_pa_dev_unregister(AVM_PA_DEVINFO(dev), &done);
            ret = wait_for_completion_interruptible(&done);
            if (ret != 0)
                return ret;
         } else {
            pr_err("avm_pa_write_cmds(unreg): dev %s not found\n", argv[1]);
         }

      /* prioack <enable|disable|psize x|pthresh x|prio x|ratio x>
       *
       * Note: This interface is now partially obsolete (prioack <enable|disable>)
       * in favour of the priomap interface defined below.
       */
      } else if (strcmp(argv[0], "prioack") == 0) {
         unsigned val = 0;

         if (argv[1]) {
            pr_debug("avm_pa: prioack %s %s %s\n",
                     argv[1], argv[2] ? argv[2] : "", argv[3] ? argv[3] : "");
            if (strcmp(argv[1], "enable") == 0) {
               if (argv[2] && argv[3]) {
                  if ((pid_handle = pa_find_pid_by_name(argv[2])) != 0) {
                     avm_pa_pid_activate_tcpackprio(pid_handle, 1, simple_strtoul(argv[3], 0, 0));
                  } else {
                     pr_err("avm_pa: %s %s: %s not found\n",
                            argv[0], argv[1], argv[2]);
                  }
               }
            } else if (strcmp(argv[1], "disable") == 0) {
               if (argv[2]) {
                  if ((pid_handle = pa_find_pid_by_name(argv[2])) != 0) {
                     avm_pa_pid_activate_tcpackprio(pid_handle, 0, 0);
                  } else {
                     pr_err("avm_pa: prioack %s: %s not found\n",
                            argv[1], argv[2]);
                  }
               } else {
                  int n;
                  for (n = 1; n < CONFIG_AVM_PA_MAX_PID; ++n) {
                     avm_pa_pid_activate_tcpackprio(n, 0, 0);
                  }
               }
            } else if (strcmp(argv[1], "tgetenable") == 0) {
               if (argv[2] && argv[3]) {
                  if ((pid_handle = pa_find_pid_by_name(argv[2])) != 0) {
                     avm_pa_pid_activate_tgetprio(pid_handle, 1, simple_strtoul(argv[3], 0, 0));
                  } else {
                     pr_err("avm_pa: %s %s: %s not found\n",
                            argv[0], argv[1], argv[2]);
                  }
               }
            } else if (strcmp(argv[1], "tgetdisable") == 0) {
               if (argv[2]) {
                  if ((pid_handle = pa_find_pid_by_name(argv[2])) != 0) {
                     avm_pa_pid_activate_tgetprio(pid_handle, 0, 0);
                  } else {
                     pr_err("avm_pa: %s %s: %s not found\n",
                            argv[0], argv[1], argv[2]);
                  }
               } else {
                  int n;
                  for (n = 1; n < CONFIG_AVM_PA_MAX_PID; ++n) {
                     avm_pa_pid_activate_tgetprio(n, 0, 0);
                  }
               }
            } else if (strcmp(argv[1], "pthresh") == 0) {
               if (argv[2]) val = simple_strtoul(argv[2], 0, 0);
               if (val) ctx->prioack_thresh_packets = val;
            } else if (strcmp(argv[1], "ratio") == 0) {
               if (argv[2]) val = simple_strtoul(argv[2], 0, 0);
               if (val) ctx->prioack_ratio = val;
            } else {
               pr_debug("avm_pa: prioack unknown command %s \n (available commands: enable,disable,psize,pthresh,prio,ratio)\n", argv[1]);
            }
         }
      /* The priomap interface supersedes the old prioack interface. */
      } else if (strcmp(argv[0], "priomap") == 0) {

         if (argv[1] && argv[2] && argv[3]) {
            unsigned short prio_map = simple_strtoul(argv[1], 0, 0);
            if ((pid_handle = pa_find_pid_by_name(argv[2])) != 0) {
               /* Command: priomap <priomap> <pidname> <enable|disable>
                *
                * Effect: Enables or disables the priority map attached to the
                * device specified by the 'dev' parameter. The 'priomap' parameter
                * MUST equal either AVM_PA_PRIO_MAP_TACK (= 0x0000) or
                * AVM_PA_PRIO_MAP_TGET (= 0x0001) as defined in avm_pa.h.
                */
               if (strcmp(argv[3], "enable") == 0) {
                  avm_pa_pid_prio_map_enable(pid_handle, prio_map, 1);
               } else if (strcmp(argv[3], "disable") == 0) {
                  avm_pa_pid_prio_map_enable(pid_handle, prio_map, 0);
               /* Command: priomap <priomap> <pidname> reset
                *
                * Effect: Resets the priority map attached to the device specified by
                * the 'dev' parameter. The 'priomap' parameter MUST equal either
                * AVM_PA_PRIO_MAP_TACK (= 0x0000) or AVM_PA_PRIO_MAP_TGET (= 0x0001)
                * as defined in avm_pa.h.
                */
               } else if (strcmp(argv[3], "reset") == 0) {
                  avm_pa_pid_prio_map_reset(pid_handle, prio_map);
               /* Command: priomap <priomap> <pidname> set_prio <queue> <prio>
                *
                * Effect: Manipulates the priority map entry specified by the
                * 'queue' parameter which is stored in the priority map attached
                * to the device specified by the 'dev' parameter. The 'priomap'
                * parameter MUST equal either AVM_PA_PRIO_MAP_TACK (= 0x0000) or
                * AVM_PA_PRIO_MAP_TGET (= 0x0001) as defined in avm_pa.h.
                */
               } else if (strcmp(argv[3], "setprio") == 0 && argv[4] && argv[5]) {
                  avm_pa_pid_prio_map_set_prio_per_queue(pid_handle, prio_map,
                     simple_strtoul(argv[4], 0, 0),  /* queue */
                     simple_strtoul(argv[5], 0, 0)); /* prio */
               } else {
                  pr_err("avm_pa: priomap unknown command '%s'\n (available commands: enable, disable, reset, setprio)\n", argv[3]);
               }
            } else {
               pr_err("avm_pa: %s %s %s %s: %s not found\n",
                      argv[0], argv[1], argv[2], argv[3], argv[2]);
            }
         } else {
            pr_err("avm_pa: %s: not enough parameters\n", argv[0]);
         }
      } else if (!strcmp(argv[0], "timeout")) {
         unsigned int val;

         if (!argv[1] || !argv[2])
            return -EINVAL;

         val = simple_strtoul(argv[2], 0, 0);
         if (!strcmp(argv[1], "tcp"))
            ctx->tcp_timeout_secs = val;
         else if (!strcmp(argv[1], "udp"))
            ctx->udp_timeout_secs = val;
         else if (!strcmp(argv[1], "echo"))
            ctx->echo_timeout_secs = val;
         else if (!strcmp(argv[1], "bridge"))
            ctx->bridge_timeout_secs = val;
         else
            return -EINVAL;

         pr_info("avm_pa: setting timeout for %s to %u seconds\n", argv[1], val);
      } else if (!strcmp(argv[0], "debug")) {
         if (argv[1] && !strcmp(argv[1], "unreg-hw-pa")) {
            int ret;
            DECLARE_COMPLETION_ONSTACK(done);
            struct avm_hardware_pa tmp = ctx->hardware_pa;
            avm_pa_unregister_hardware_pa(&tmp, &done);
            ret = wait_for_completion_interruptible(&done);
            if (ret != 0)
                return ret;

            /* Give some time for new sessions in case this
             * test is driven with parallel sessions. Of course,
             * hardware sessions must not be created.
             */
            msleep(100);
            if (pa_hw_pa_get()) {
               pa_hw_pa_put();
               pr_err("avm_pa: hw_pa refcount should be 0 but really is %d\n", kref_read(&ctx->hw_pa_ref));
               return -EIO;
            }
            ret = avm_pa_register_hardware_pa(&tmp);
            if (ret != 0) {
               pr_err("avm_pa: re-register hardware_pa failed: %d\n", ret);
               return ret;
            }
         } else if (argv[1] && !strcmp(argv[1], "non-pvid-macaddr")) {
            unsigned long val;
            char mac[ETH_ALEN];
            struct avm_pa_macaddr *p;
            avm_pid_handle pid_handle;

            if (!argv[2] || !argv[3] || !argv[4])
               return -EINVAL;

            if ((pid_handle = pa_find_pid_by_name(argv[2])) == 0) {
               pr_err("err pid_handle %s\n", argv[2]);
               return -EINVAL;
            }
            if (!mac_pton(argv[3], mac)) {
               pr_err("err mac %s\n", argv[3]);
               return -EINVAL;
            }
            if ((val = simple_strtoul(argv[4], 0, 0)) > 4095) {
               pr_err("err vlan %s\n", argv[4]);
               return -EINVAL;
            }

            /* If there is a suitable pvid macaddr, then this new macaddrs
             * should be visible in /proc/net/avm_pa/macaddrs even after unlinking
             * (with 0 references) and should disappear after flushing all sessions.
             */
            local_bh_disable();
            p = pa_macaddr_link(mac, pid_handle, 0, val);
            if (p)
               pa_macaddr_unlink(p);
            local_bh_enable();
            if (!p)
               return -EIO;
         }
      } else {
         pr_err("avm_pa_write_cmds: %s: unknown command\n", argv[0]);
      }
   }

   return count;
}

/* ------------------------------------------------------------------------ */


const struct proc_ops avm_pa_control_ops = {
   .proc_write = avm_pa_write_cmds,
};


static ssize_t
avm_pa_read_show_filter(struct file *file,
                        char __user *buffer,
                        size_t count,
                        loff_t *offset)
{
   struct list_head *selector_list = PDE_DATA(file_inode(file));
   ssize_t ret;

   if (*offset || list_empty(selector_list))
      return 0;

   ret = avm_pa_dump_selector_user(selector_list, buffer, count);
   *offset += ret;
   return ret;
}

static ssize_t
avm_pa_write_show_filter(struct file *file,
                         const char __user *buffer,
                         size_t count,
                         loff_t *offset)
{
   ssize_t ret;
   struct list_head *selector_list = PDE_DATA(file_inode(file));

   ret = avm_pa_parse_selector_user(selector_list, buffer, count);
   if (ret < 0)
      return ret;
   *offset += ret;
   return ret;
}

static const struct proc_ops selector_ops = {
   .proc_read  = avm_pa_read_show_filter,
   .proc_write = avm_pa_write_show_filter,
};

static struct proc_dir_entry *dir_entry = 0;

static void __init avm_pa_proc_init(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   dir_entry = proc_net_mkdir(&init_net, "avm_pa", init_net.proc_net);

   proc_create("control", S_IFREG|S_IWUSR, dir_entry, &avm_pa_control_ops);
   proc_create("brief", S_IRUGO, dir_entry, &brief_show_ops);
   proc_create("memory", S_IRUGO, dir_entry, &memory_show_ops);
   proc_create("status", S_IRUGO, dir_entry, &status_show_ops);
   proc_create("stats", S_IRUGO, dir_entry, &stats_show_ops);
   proc_create("pids", S_IRUGO, dir_entry, &pids_show_ops);
   proc_create("vpids", S_IRUGO, dir_entry, &vpids_show_ops);
   proc_create("sessions", S_IRUGO, dir_entry, &sess_show_ops);
   proc_create_data("filter", S_IRUGO|S_IWUSR, dir_entry, &selector_ops, &ctx->accel_filter);
   proc_create_data("xsession", S_IRUGO|S_IWUSR, dir_entry, &selector_ops, &ctx->show_filter);
   proc_create("bsessions", S_IRUGO, dir_entry, &bsess_show_ops);
   proc_create("macaddrs", S_IRUGO, dir_entry, &macaddr_show_ops);
   proc_create("hashes", S_IRUGO, dir_entry, &hash_show_ops);
   proc_create("prioack", S_IRUGO, dir_entry, &prioack_show_ops);
   proc_create("priomaps", S_IRUGO, dir_entry, &priomaps_show_ops);

   avm_pa_sg_proc_init(dir_entry);
}

static void __exit avm_pa_proc_exit(void)
{
   remove_proc_entry("control", dir_entry);
   remove_proc_entry("brief", dir_entry);
   remove_proc_entry("memory", dir_entry);
   remove_proc_entry("status", dir_entry);
   remove_proc_entry("stats", dir_entry);
   remove_proc_entry("pids", dir_entry);
   remove_proc_entry("vpids", dir_entry);
   remove_proc_entry("sessions", dir_entry);
   remove_proc_entry("filter", dir_entry);
   remove_proc_entry("xsession", dir_entry);
   remove_proc_entry("bsessions", dir_entry);
   remove_proc_entry("macaddrs", dir_entry);
   remove_proc_entry("hashes", dir_entry);
   remove_proc_entry("prioack", dir_entry);
   remove_proc_entry("priomaps", dir_entry);

   avm_pa_sg_proc_exit(dir_entry);

   remove_proc_entry("avm_pa", init_net.proc_net);
}
#endif

/* ------------------------------------------------------------------------ */
/* -------- misc device for capture tracking ------------------------------ */
/* ------------------------------------------------------------------------ */

static ssize_t avm_pa_misc_read(struct file *file, char __user *buf,
                                size_t count, loff_t *ppos)
{
   return 0;
}

static unsigned int avm_pa_misc_poll(struct file *file, poll_table *wait)
{
   return 0;
}

static int avm_pa_misc_open(struct inode *inode, struct file *file)
{
   struct avm_pa_global *ctx = &pa_glob;
   atomic_inc(&ctx->misc_is_open);
   return 0;
}

static int avm_pa_misc_release(struct inode *inode, struct file *file)
{
   struct avm_pa_global *ctx = &pa_glob;
   if (atomic_read(&ctx->misc_is_open) > 0)
      atomic_dec(&ctx->misc_is_open);
   return 0;
}


static const struct file_operations avm_pa_misc_fops = {
        .llseek  = no_llseek,
        .read    = avm_pa_misc_read,
        .poll    = avm_pa_misc_poll,
        .open    = avm_pa_misc_open,
        .release = avm_pa_misc_release,
};

static struct miscdevice avm_pa_misc_dev = {
        .minor =    MISC_DYNAMIC_MINOR,
        .name =     "avm_pa",
        .fops =     &avm_pa_misc_fops
};

/* ------------------------------------------------------------------------ */
/* -------- init & exit functions ----------------------------------------- */
/* ------------------------------------------------------------------------ */

/*
 * early init is called before the init functions of all device drivers.
 */
int __init avm_pa_early_init(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   int i;

   pr_info("AVM PA for %s (early init)\n", linux_banner);

   for (i = 0; i < AVM_PA_LIST_MAX; i++)
      INIT_LIST_HEAD(&ctx->sess_list[i].sessions);

   for (i = 0; i < AVM_PA_MAX_MACADDR; i++) {
      INIT_HLIST_HEAD(&ctx->macaddr_hashtab[i]);
   }

   INIT_HLIST_HEAD(&ctx->egress_freelist);
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
   setup_timer(&ctx->tick_timer, pa_session_tick, 0);
   setup_timer(&ctx->est_timer, avm_pa_est_timer, 0);
#else
   timer_setup(&ctx->tick_timer, pa_session_tick, 0);
   timer_setup(&ctx->est_timer, avm_pa_est_timer, 0);
#endif

   avm_pa_init_freelist();
   avm_pa_sg_init();

   return 0;
}

/*
 * avm_pa_init is called together with the init functions
 * of the device drivers.
 */
int __init avm_pa_init(void)
{
   struct avm_pa_global *ctx = &pa_glob;
   int __maybe_unused i;

   {
      /* complain if avm_pa_pkt_info or avm_pa_dev_info crosses the reserved
       * area (usually 256 and 32 bytes respectively)
       */
      struct sk_buff *skb __maybe_unused = NULL;
      struct net_device *dev __maybe_unused = NULL;
      struct net *net __maybe_unused = NULL;
#if defined(AVM_PKT_INFO_MAX)
      BUILD_BUG_ON(sizeof(struct avm_pa_pkt_info) > AVM_PKT_INFO_MAX);
#else
      BUILD_BUG_ON(sizeof(struct avm_pa_pkt_info) > sizeof(skb->avm_pa));
#endif
      BUILD_BUG_ON(sizeof(struct avm_pa_dev_info) > sizeof(dev->avm_pa));
      /* net->avm_pa holds two avm_pa_dev_info (ipv4 and ipv6) */
      BUILD_BUG_ON(2*sizeof(struct avm_pa_dev_info) > sizeof(net->avm_pa));
   }

   pr_info("AVM PA %s for Linux %s (late init)\n", AVM_PA_VERSION, linux_banner);

   if (misc_register(&avm_pa_misc_dev) < 0)
      pr_err("avm_pa: misc_register() failed");

#ifdef CONFIG_PROC_FS
   INIT_LIST_HEAD(&ctx->accel_filter);
   INIT_LIST_HEAD(&ctx->show_filter);

   avm_pa_proc_init();
#endif

   for (i = 0; i < nr_cpu_ids; i++) {
      struct avm_pa_rxq *rxq = per_cpu_ptr(&pa_rxq, i);

      memset(rxq, 0, sizeof(*rxq));
      __skb_queue_head_init(&rxq->lq);
      rxq->napi.poll = pa_napi_process;
      rxq->napi.weight = CONFIG_AVM_PA_NAPI_WEIGHT;
#ifdef CONFIG_AVM_PA_RPS
      INIT_CSD(&rxq->csd, (smp_call_func_t) pa_napi_schedule, rxq);
#endif
      tasklet_init(&rxq->sched_task, pa_napi_sched_task, (unsigned long) rxq);
      rxq->cpu = i;
   }

   avm_pa_netdev_init();

   avm_pa_enable();
   ctx->disabled = ctx->fw_disabled = 0;
   return 0;
}

void __exit avm_pa_exit(void)
{
   struct avm_pa_global *ctx = &pa_glob;

   ctx->disabled = 1;
   ctx->fw_disabled = 1;
   avm_pa_disable();

   del_timer_sync(&ctx->tick_timer);
   pa_session_gc_once();
   pa_session_gc_once();

   avm_pa_sg_exit();

#ifdef CONFIG_PROC_FS
   avm_pa_proc_exit();

   avm_pa_selector_free(&ctx->show_filter);
   avm_pa_selector_free(&ctx->accel_filter);
#endif
   misc_deregister(&avm_pa_misc_dev);
   avm_pa_reset_stats();
}

#ifdef CONFIG_IFX_PPA
void avm_pa_disable_atm_hw_tx_acl(void){
   int n;
   struct avm_pa_global *ctx = &pa_glob;

   for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) {
      struct avm_pa_pid *pid = PA_PID(ctx, n);
      if (pid->pid_handle == 0) continue;
      if (pid->hw && pid->hw->atmvcc){
        pid->hw->flags |= AVMNET_DEVICE_IFXPPA_DISABLE_TX_ACL;
      }
   }
}
EXPORT_SYMBOL(avm_pa_disable_atm_hw_tx_acl);

void avm_pa_enable_atm_hw_tx_acl(void){
   struct avm_pa_global *ctx = &pa_glob;
   int n;
   for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) {
      struct avm_pa_pid *pid = PA_PID(ctx, n);
      if (pid->pid_handle == 0) continue;
      if (pid->hw && pid->hw->atmvcc){
        pid->hw->flags &= ~AVMNET_DEVICE_IFXPPA_DISABLE_TX_ACL;
      }
   }
}
EXPORT_SYMBOL(avm_pa_enable_atm_hw_tx_acl);
#endif

subsys_initcall(avm_pa_early_init); /* init avm pa before devices */
module_init(avm_pa_init);
module_exit(avm_pa_exit);