/* * Packet Accelerator Interface * * vim:set expandtab shiftwidth=3 softtabstop=3: * * Copyright (c) 2011-2016 AVM GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * Alternatively, this software may be distributed and/or modified under the * terms of the GNU General Public License as published by the Free Software * Foundation. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * PID - pheripheral ID * Identifies a low level device, may be a network driver or * for ATM, every VCC has its own PID * VPID - virtual pheripheral ID * Is assigned to a network device or a virtual network device * * Examples: * ATA Mode: * Name NetDev VirtualNetDev PID VPID * cpmac0 yes no yes yes (cpmac0) * eth0 yes no no yes (cpmac0) * ath0 yes no yes yes (ath0) * internet no yes no yes (cpmac0) * voip no yes no yes (cpmac0) * DSL Mode (one PVCs): * Name NetDev VirtualNetDev PID VPID * cpmac0 yes no yes yes (cpmac0) * eth0 yes no no yes (cpmac0) * ath0 yes no yes yes (ath0) * vcc0 no yes yes yes (vcc0) * internet no yes no yes (vcc0) * voip no yes no yes (vcc0) * DSL Mode (two PVCs): * Name NetDev VirtualNetDev PID VPID * cpmac0 yes no yes yes (cpmac0) * eth0 yes no no yes (cpmac0) * ath0 yes no yes yes (ath0) * vcc0 no yes yes yes (vcc0) * vcc1 no yes yes yes (vcc1) * internet no yes no yes (vcc0) * voip no yes no yes (vcc1) * VDSL Mode: * Name NetDev VirtualNetDev PID VPID * cpmac0 yes no yes yes (cpmac0) * eth0 yes no no yes (cpmac0) * ath0 yes no yes yes (ath0) * vdsl no yes yes yes (vdsl) * internet no yes no yes (vdsl) * voip no yes no yes (vdsl) * * Sessions can have four states: * - FREE : session on sess_lru[AVM_PA_LRU_FREE] * - CREATE : session is on no lru * - ACTIVE : session on sess_lru[AVM_PA_LRU_ACTIVE], in hashtable and not flushed * - FLUSHED : session on sess_lru[AVM_PA_LRU_ACTIVE], in hashtable and flushed * - DEAD : session on sess_lru[AVM_PA_LRU_DEAD] * * FREE -> pa_session_alloc() -> CREATE * CREATE -> pa_session_activate() -> ACTIVE * ACTIVE -> pa_session_flush() -> FLUSHED * FLUSHED -> pa_session_gc() -> DEAD * DEAD -> pa_session_gc() -> FREE * * pa_session_kill() can transition from any state to DEAD. Use it only if you * know that an immediate GC trigger (that moves from DEAD to FREE) won't be * a problem, otherwise use pa_session_flush() which is safe. pa_session_flush() * guarantees that at least one complete GC period happens before a session * transitions to FREE. */ #include #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32) #include #include #else #include #include #endif #include #include #include #include #include #include #include #include #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) #define PSCHED_TICKS2NS(x) PSCHED_US2NS(x) #define PSCHED_NS2TICKS(x) PSCHED_NS2US(x) #endif #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) /* ktime_get() is good enough as a fallback (doesn't account for suspend time) */ #define ktime_get_boottime ktime_get #endif #include #include #include #ifdef CONFIG_AVM_POWERMETER #include #endif #include // MODULE_NAME_LEN needed by kallsyms.h (who fails to include himself) #include // sprint_symbol() #include #include #include #include #include #include #include /* ------------------------------------------------------------------------ */ #include #include #ifndef cputime_to_msecs #define cputime_to_msecs(__ct) jiffies_to_msecs(__ct) #endif #ifndef msecs_to_cputime #define msecs_to_cputime(__msecs) msecs_to_jiffies(__msecs) #endif #ifndef arch_irq_stat_cpu #define arch_irq_stat_cpu(cpu) 0 #endif #ifndef arch_irq_stat #define arch_irq_stat() 0 #endif #ifndef arch_idle_time #define arch_idle_time(cpu) 0 #endif #ifndef cputime64_zero #define cputime64_zero 0ULL #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) #define PDE_DATA(_inode) (PDE(_inode)->data) #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33) #define SKB_IFF(skb) ((skb)->skb_iif) #else #define SKB_IFF(skb) ((skb)->iif) #endif /* ------------------------------------------------------------------------ */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) static inline struct dst_entry *skb_dst(const struct sk_buff *skb) { return skb->dst; } static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) { skb->dst = dst; } #endif #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32) static inline int kstrtol(const char *s, unsigned int base, long *res) { if (isdigit(*s)) { *res = simple_strtol(s, 0, base); return 0; } return -EINVAL; } static inline int kstrtoul(const char *s, unsigned int base, unsigned long *res) { if (isdigit(*s)) { *res = simple_strtoul(s, 0, base); return 0; } return -EINVAL; } #endif /* ------------------------------------------------------------------------ */ #define AVM_PA_TRACE 1 /* 0: off */ #define AVM_PA_TOKSTATS 0 #define AVM_PA_UNALIGNED_CHECK 0 #define AVM_PA_AVOID_UNALIGNED 1 /* GSO is disabled for now, since it's not sufficiently tested. * Tests need to be done on 4040, 7580, 6490, with vlan and/or pppoe encap on egreess. * So far it's been successfully tested on 4040 with plain ethernet+NAT. */ #define AVM_PA_WITH_GSO 0 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37) #define skb_has_frag_list(skb) (0) #define skb_walk_frags(skb, iter) while (0) #endif #define TX_NAPI_MAXQUEUE 512 #define TX_NAPI_BUDGET 64 #if AVM_PA_AVOID_UNALIGNED #define PA_IPHLEN(iph) (((((u8 *)iph)[0])&0xf)<<2) #define PA_IPTOTLEN(iph) (((u16 *)iph)[1]) #define PA_TCP_FIN(tcph) (((u8 *)tcph)[13]&0x01) #define PA_TCP_SYN(tcph) (((u8 *)tcph)[13]&0x02) #define PA_TCP_RST(tcph) (((u8 *)tcph)[13]&0x04) #define PA_TCP_ACK(tcph) (((u8 *)tcph)[13]&0x10) #define PA_TCP_FIN_OR_RST(tcph) (((u8 *)tcph)[13]&0x05) #define PA_TCP_DOFF(tcph) (((((u8 *)tcph)[12]&0xf0)>>4)*4) #define PA_IP6_PAYLOADLEN(ip6h) (((u16 *)ip6h)[2]) #else #define PA_IPHLEN(iph) ((iph)->ihl<<2) #define PA_IPTOTLEN(iph) ((iph)->tot_len) #define PA_TCP_FIN(tcph) ((tcph)->fin) #define PA_TCP_SYN(tcph) ((tcph)->syn) #define PA_TCP_RST(tcph) ((tcph)->rst) #define PA_TCP_ACK(tcph) ((tcph)->ack) #define PA_TCP_FIN_OR_RST(tcph) ((tcph)->fin || (tcph)->rst) #define PA_TCP_DOFF(tcph) (((((tcph)->doff)&0xf000)>>12)*4) #define PA_IP6_PAYLOADLEN(ip6h) ((ip6h)->payload_len) #endif /* ------------------------------------------------------------------------ */ static inline void set_ip_checksum(struct iphdr *iph) { int iphlen = PA_IPHLEN(iph); iph->check = 0; iph->check = csum_fold(csum_partial((unsigned char *)iph, iphlen, 0)); } static inline void set_udp_checksum(struct iphdr *iph, struct udphdr *udph) { unsigned short len = ntohs(udph->len); __wsum sum; udph->check = 0; sum = csum_partial((unsigned char *)udph, len, 0); udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, len, IPPROTO_UDP, sum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; } static inline void set_udpv6_checksum(struct ipv6hdr *ipv6h, struct udphdr *udph) { unsigned short len = ntohs(udph->len); __wsum sum; udph->check = 0; sum = csum_partial((unsigned char *)udph, len, 0); udph->check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, len, IPPROTO_UDP, sum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; } /* These are only effective of struct sk_buff has a uniq_id field */ static inline unsigned long pkt_uniq_id(PKT *pkt) { #ifdef AVM_HAVE_SKB_UNIQ_ID return pkt->uniq_id & 0xfffffflu; #else return 0; #endif } static inline int pkt_cpmac_prio(PKT *pkt) { #ifdef AVM_HAVE_SKB_UNIQ_ID return (u8) (pkt->uniq_id >> 24); #else return 0; #endif } /* ------------------------------------------------------------------------ */ static inline int rand(void) { int x; get_random_bytes(&x, sizeof(x)); return x; } #define PKT_DATA(pkt) (pkt)->data /* PKT_LEN has the data in the head skb. For frag_list skbs, this is just L2/3/4 headers * without any payload. For normal skbs it includes the payload after the headers. */ #define PKT_LEN(pkt) (skb_headlen(pkt)) /* For frag_list skbs, PKT_FRAGLEN is the size a single packet (with headers). That is * PKT_LEN of the head skb (just headers without payload), plus payload length of the * first frag, assuming no other frag is larger. For normal skbs, this is the same as PKT_LEN */ #define PKT_FRAGLEN(pkt) (PKT_LEN(pkt) + (skb_has_frag_list(pkt) ? skb_shinfo(pkt)->frag_list->len : 0)) #define PKT_PULL(pkt, len) skb_pull(pkt, len) #define PKT_PUSH(pkt, len) skb_push(pkt, len) #define PKT_ALLOC(len) pa_alloc_skb(len) #define PKT_FREE(pkt) dev_kfree_skb_any(pkt) #define PKT_COPY(pkt) skb_copy(pkt, GFP_ATOMIC) #define PKT_TRIM(pkt, len) pa_skb_trim(pkt, len) static inline struct sk_buff *pa_alloc_skb(unsigned len) { struct sk_buff *skb; skb = alloc_skb(len+128+128, GFP_ATOMIC); if (skb) { skb_reserve(skb, 128); skb_put(skb, len); } return skb; } static inline void pa_skb_trim(struct sk_buff *skb, unsigned int len) { if (skb->len > len) __skb_trim(skb, len); } static int pa_printk(void *type, const char *format, ...) #ifdef __GNUC__ __attribute__ ((__format__(__printf__, 2, 3))) #endif ; static int pa_printk(void *type, const char *format, ...) { va_list args; int rc; va_start(args, format); if (type) printk("%s", (char *)type); rc = vprintk(format, args); va_end(args); return rc; } /* ------------------------------------------------------------------------ */ #define constant_htons(x) __constant_htons(x) #undef IPPROTO_IPENCAP #define IPPROTO_IPENCAP 4 #ifndef IPPROTO_L2TP #define IPPROTO_L2TP 115 #endif /* * Accelerating of L2TPv3 only works with * pseudowire ethernet or ethernet vlan * and default l2-specific header. */ /* ------------------------------------------------------------------------ */ /* CBU: lantiq_ppa does not allow irq_disabled() while calling add/remove session hooks */ /* AMY: Dakota hw_pa does not allow irq_disabled() for session_stats callback */ #if !defined(CONFIG_LTQ_PPA) && !defined(CONFIG_AVM_NET_EDMA) #define AVM_PA_USE_IRQLOCK #endif static DEFINE_RWLOCK(avm_pa_lock); #ifdef AVM_PA_USE_IRQLOCK #define AVM_PA_LOCK_DECLARE unsigned long flags #define AVM_PA_WRITE_LOCK() write_lock_irqsave(&avm_pa_lock, flags) #define AVM_PA_READ_LOCK() read_lock_irqsave(&avm_pa_lock, flags) #define AVM_PA_WRITE_UNLOCK() write_unlock_irqrestore(&avm_pa_lock, flags) #define AVM_PA_READ_UNLOCK() read_unlock_irqrestore(&avm_pa_lock, flags) #else #define AVM_PA_LOCK_DECLARE #define AVM_PA_WRITE_LOCK() write_lock_bh(&avm_pa_lock) #define AVM_PA_READ_LOCK() read_lock_bh(&avm_pa_lock) #define AVM_PA_WRITE_UNLOCK() write_unlock_bh(&avm_pa_lock) #define AVM_PA_READ_UNLOCK() read_unlock_bh(&avm_pa_lock) #endif /* ------------------------------------------------------------------------ */ #define AVM_PA_GC_TIMEOUT 1 /* secs */ #define AVM_PA_STAT_TIMEOUT 500 /* msecs */ #define AVM_PA_LC_TIMEOUT 2 /* secs */ #define AVM_PA_TRAFFIC_IDLE_TBFDISABLE 10 /* secs */ /* ------------------------------------------------------------------------ */ #define AVM_PA_MAX_TBF_QUEUE_LEN 128 #define AVM_PA_MAX_IRQ_QUEUE_LEN 64 #define AVM_PA_DEFAULT_MAXRATE 5000 #define AVM_PA_MINRATE 1000 #define AVM_PA_DEFAULT_PKTBUFFER 1024 #define AVM_PA_DEFAULT_PKTPEAK 256 #define AVM_PA_DEFAULT_TELEPHONY_REDUCE 65 #define AVM_PA_EST_DEFAULT_IDX 0 /* 0 - 5 => 0.25sec - 8sec */ #define AVM_PA_EST_DEFAULT_EWMA_LOG 3 /* 1 - 31 */ #define AVM_PA_CPUTIME_EST_DEFAULT_IDX 2 /* 0 - 5 => 0.25sec - 8sec */ #define AVM_PA_CPUTIME_EST_DEFAULT_EWMA_LOG 1 /* 1 - 31 */ #define AVM_PA_CPUTIME_IRQ_MSWIN_LOW 300 /* ms/s */ #define AVM_PA_CPUTIME_IRQ_MSWIN_HIGH 400 /* ms/s */ #define AVM_PA_CPUTIME_IDLE_MSWIN_LOW 10 /* ms/s */ #define AVM_PA_CPUTIME_IDLE_MSWIN_HIGH 50 /* ms/s */ #define AVM_PA_PRIOACK_THRESH_PKTS 40 /* wait for X packets to do the TCP-ACK check */ #define AVM_PA_PRIOACK_RATIO 70 /* % of packets have to be TCP-ACKs for positive check */ #define AVM_PA_COUNT_PRIO_MAPS 2 /* tack and tget */ #define AVM_PA_BE_QUEUE 6 /* best-effort queue */ #define AVM_PA_INGRESS_PRIO_NET_MASK 0xFFFF0000U #define AVM_PA_INGRESS_PRIO_HOST_MASK 0x0000FFFFU #define AVM_PA_INGRESS_PRIO_NET(prio) (((prio) & AVM_PA_INGRESS_PRIO_NET_MASK) >> 16) #define AVM_PA_INGRESS_PRIO_HOST(prio) ( (prio) & AVM_PA_INGRESS_PRIO_HOST_MASK) /* ------------------------------------------------------------------------ */ #define AVM_PA_MAX_RECVHOOK 8 /* ------------------------------------------------------------------------ */ struct avm_pa_prio_map { int enabled; unsigned int prios[AVM_PA_MAX_PRIOS]; }; struct avm_pa_pid { #ifdef CONFIG_AVM_PA_TX_NAPI struct napi_struct tx_napi; struct sk_buff_head tx_napi_pkts; #ifdef CONFIG_SMP /* the tasklet is used to switch cores for the napi_poll */ struct tasklet_struct tx_napi_tsk; #endif #endif struct avm_pa_pid_cfg cfg; struct avm_pa_pid_ecfg ecfg; avm_pid_handle pid_handle; avm_pid_handle ingress_pid_handle; enum avm_pa_framing ingress_framing; enum avm_pa_framing egress_framing; struct avm_pa_session *hash_sess[CONFIG_AVM_PA_MAX_SESSION]; struct avm_pa_bsession *hash_bsess[CONFIG_AVM_PA_MAX_SESSION]; struct avm_pa_pid_hwinfo *hw; /* channel acceleration via hw */ unsigned rx_channel_activated:1, tx_channel_activated:1, rx_channel_stopped:1; /* Provide an array of avm_pa_prio_map structs to store multiple priority * maps which allow us to specificy per priority in which upstream queue * classified TCP ACK (tack) and HTTP-GET (tget; JAZZ 10051) traffic should * be enqueued. This enables us to configure the upstream prioritization in * such way that tack traffic for priority 7 will not be enqeued in * queue 5 (important) but in queue 7 (low). This is a prerequisite for the * downstream regulation to work properly. */ struct avm_pa_prio_map prio_maps[AVM_PA_COUNT_PRIO_MAPS]; unsigned prioack_acks; unsigned prioack_accl_acks; /* stats */ u32 tx_pkts; }; struct avm_pa_vpid { struct avm_pa_vpid_cfg cfg; avm_vpid_handle vpid_handle; struct avm_pa_vpid_stats stats; #ifdef AVM_PA_HAS_GUEST_STATS struct avm_pa_vpid_stats guest_stats; #endif struct avm_pa_traffic_stats sw_stats[AVM_PA_MAX_PRIOS]; struct avm_pa_traffic_stats hw_stats[AVM_PA_MAX_PRIOS]; #ifdef AVM_PA_HAS_GUEST_STATS struct avm_pa_traffic_stats guest_sw_stats[AVM_PA_MAX_PRIOS]; struct avm_pa_traffic_stats guest_hw_stats[AVM_PA_MAX_PRIOS]; #endif struct avm_pa_traffic_stats associated_sw_stats[AVM_PA_MAX_PRIOS]; struct avm_pa_traffic_stats associated_hw_stats[AVM_PA_MAX_PRIOS]; struct avm_pa_traffic_stats ingress_sw_stats[AVM_PA_MAX_PRIOS]; struct avm_pa_traffic_stats ingress_hw_stats[AVM_PA_MAX_PRIOS]; struct avm_pa_traffic_stats associated_ingress_sw_stats[AVM_PA_MAX_PRIOS]; struct avm_pa_traffic_stats associated_ingress_hw_stats[AVM_PA_MAX_PRIOS]; ktime_t prio_stats_timestamp; }; struct avm_pa_est { unsigned idx; unsigned ewma_log; u32 last_packets; u32 avpps; }; struct avm_pa_cputime_est { unsigned idx; unsigned ewma_log; cputime64_t last_cputime; cputime_t avtps; }; struct avm_pa_tbf { struct hrtimer timer; u32 buffer; u32 pbuffer; u32 pkttime; long tokens; long ptokens; psched_time_t t_c; }; struct avm_pa_recvhook { int (*cb_recvhook)(struct sk_buff *skb, int framing); }; struct avm_pa_global { int disabled; int fw_disabled; atomic_t misc_is_open; /* means fw_disabled */ int dbgcapture; int dbgsession; int dbgnosession; int dbgtrace; int dbgmatch; int dbgcputime; int dbgprioack; int dbgprioacktrace; int dbgstats; unsigned long tcp_timeout_secs; unsigned long fin_timeout_secs; unsigned long udp_timeout_secs; unsigned long echo_timeout_secs; unsigned long bridge_timeout_secs; struct avm_pa_pid pid_array[CONFIG_AVM_PA_MAX_PID]; struct avm_pa_vpid vpid_array[CONFIG_AVM_PA_MAX_VPID]; struct avm_pa_session sess_array[CONFIG_AVM_PA_MAX_SESSION]; struct avm_pa_session_lru sess_lru[AVM_PA_LRU_MAX]; struct avm_pa_bsession bsess_array[CONFIG_AVM_PA_MAX_SESSION]; struct avm_pa_macaddr macaddr_array[CONFIG_AVM_PA_MAX_SESSION]; struct avm_pa_macaddr *macaddr_hash[CONFIG_AVM_PA_MAX_SESSION]; struct avm_pa_stats stats; u32 next_session_uniq_id; struct timer_list gc_timer; struct timer_list stat_timer; struct sk_buff_head irqqueue; struct tasklet_struct irqtasklet; /* packet rate estimater */ int est_idx; int ewma_log; struct timer_list est_timer; struct avm_pa_est rx_est; struct avm_pa_est fw_est; struct avm_pa_est overlimit_est; /* cputime estimater */ int cputime_est_idx; int cputime_ewma_log; struct timer_list cputime_est_timer; struct avm_pa_cputime_est cputime_user_est; struct avm_pa_cputime_est cputime_idle_est; struct avm_pa_cputime_est cputime_irq_est; /* tbf for packets per second */ int load_control; #define LOADCONTROL_OFF 0x00 #define LOADCONTROL_POWER 0x01 #define LOADCONTROL_IRQ 0x02 #define LOADCONTROL_POWERIRQ (LOADCONTROL_POWER|LOADCONTROL_IRQ) #define LOADCONTROL_IDLE 0x04 int load_reduce; int telephony_active; unsigned telephony_reduce; int tbf_enabled; unsigned irq_mswin_low; /* max irq ms/s */ unsigned irq_mswin_high; /* overload irq ms/s */ unsigned idle_mswin_low; /* overload idle ms/s */ unsigned idle_mswin_high; /* good idle ms/s */ unsigned maxrate; /* pkt/s at load_reduce == 0 */ unsigned rate; /* pkt/s */ unsigned pktbuffer; /* # pkts */ unsigned pktpeak; /* # pkts */ struct avm_pa_tbf tbf; struct sk_buff_head tbfqueue; struct tasklet_struct tbftasklet; struct task_struct *task; struct timer_list lc_timer; u32 lc_overlimit; /* rx_overlimit at last tick */ #ifdef CONFIG_AVM_POWERMETER void *load_control_handle; #endif /* ... */ int tok_pos; #define TOK_SAMLES 64 int tok_state[TOK_SAMLES]; unsigned tok_overtime[TOK_SAMLES]; unsigned tok_rate[TOK_SAMLES]; unsigned tok_pps[TOK_SAMLES]; unsigned long tok_overlimit[TOK_SAMLES]; unsigned prioack_thresh_packets; unsigned prioack_ratio; struct avm_hardware_pa hardware_pa; int hw_ppa_disabled; struct avm_pa_recvhook recvhook[AVM_PA_MAX_RECVHOOK]; int recvhook_counter; #ifdef CONFIG_PROC_FS struct list_head session_selector; /* empty to show all sessions (default) */ #endif } pa_glob = { #ifdef CONFIG_MIPS_UR8 .disabled = 1, .fw_disabled = 1, #else .disabled = 0, .fw_disabled = 0, #endif .dbgcapture = 0, .dbgsession = 0, .dbgnosession = 0, .dbgtrace = 0, .dbgmatch = 0, .dbgcputime = 0, .dbgprioack = 0, .dbgprioacktrace = 0, .dbgstats = 0, .tcp_timeout_secs = 30, .fin_timeout_secs = 0, .udp_timeout_secs = 10, .echo_timeout_secs = 3, .bridge_timeout_secs = 30, .load_control = LOADCONTROL_IDLE, .telephony_reduce = AVM_PA_DEFAULT_TELEPHONY_REDUCE, .irq_mswin_low = AVM_PA_CPUTIME_IRQ_MSWIN_LOW, .irq_mswin_high = AVM_PA_CPUTIME_IRQ_MSWIN_HIGH, .idle_mswin_low = AVM_PA_CPUTIME_IDLE_MSWIN_LOW, .idle_mswin_high = AVM_PA_CPUTIME_IDLE_MSWIN_HIGH, .maxrate = AVM_PA_DEFAULT_MAXRATE, .rate = AVM_PA_DEFAULT_MAXRATE, .pktbuffer = AVM_PA_DEFAULT_PKTBUFFER, .pktpeak = AVM_PA_DEFAULT_PKTPEAK, .est_idx = AVM_PA_EST_DEFAULT_IDX, .ewma_log = AVM_PA_EST_DEFAULT_EWMA_LOG, .cputime_est_idx = AVM_PA_CPUTIME_EST_DEFAULT_IDX, .cputime_ewma_log = AVM_PA_CPUTIME_EST_DEFAULT_EWMA_LOG, .prioack_thresh_packets = AVM_PA_PRIOACK_THRESH_PKTS, .prioack_ratio = AVM_PA_PRIOACK_RATIO, }; #define PA_PID(ctx, handle) (&ctx->pid_array[(handle)%CONFIG_AVM_PA_MAX_PID]) #define PA_VPID(ctx, handle) (&ctx->vpid_array[(handle)%CONFIG_AVM_PA_MAX_VPID]) #define PA_SESSION(ctx, handle) (&ctx->sess_array[(handle)%CONFIG_AVM_PA_MAX_SESSION]) #define PA_BSESSION(ctx, handle) (&ctx->bsess_array[(handle)%CONFIG_AVM_PA_MAX_SESSION]) typedef int pa_fprintf(void *, const char *, ...) #ifdef __GNUC__ __attribute__ ((__format__(__printf__, 2, 3))) #endif ; /* ------------------------------------------------------------------------ */ static void pa_session_kill_unlocked(struct avm_pa_session *session, const char *why); static void pa_session_kill(struct avm_pa_session *session, const char *why); static int pa_session_handle_stats(struct avm_pa_session *session); static void pa_show_session(struct avm_pa_session *session, pa_fprintf fprintffunc, void *arg); static int avm_pa_pid_receive(avm_pid_handle pid_handle, PKT *pkt); static void avm_pa_flush_sessions_with_destmac(struct avm_pa_macaddr *destmac); #ifdef CONFIG_PROC_FS static void clear_pid_selector(struct avm_pa_global *ctx, u32 pid, int isvpid); static int session_is_selected(struct avm_pa_global *ctx, struct avm_pa_session *sess); #else #define clear_pid_selector(ctx, pid, isvpid) #endif static inline int avm_pa_pid_tack_enabled(struct avm_pa_pid *pid) { return pid->prio_maps[AVM_PA_PRIO_MAP_TACK].enabled; } static inline int avm_pa_pid_tget_enabled(struct avm_pa_pid *pid) { return pid->prio_maps[AVM_PA_PRIO_MAP_TGET].enabled; } /* * Helper functions to retrieve a valid tack or tget priority from a pid's priority map. * Remember: prio_maps must include the correct TC_H_MAJ part. */ static inline unsigned int avm_pa_pid_tack_prio(struct avm_pa_pid *pid, unsigned int prio) { if (likely((prio & TC_H_MIN_MASK) < AVM_PA_MAX_PRIOS)) return pid->prio_maps[AVM_PA_PRIO_MAP_TACK].prios[prio & TC_H_MIN_MASK]; return 0; } static inline unsigned int avm_pa_pid_tget_prio(struct avm_pa_pid *pid, unsigned int prio) { if (likely((prio & TC_H_MIN_MASK) < AVM_PA_MAX_PRIOS)) return pid->prio_maps[AVM_PA_PRIO_MAP_TGET].prios[prio & TC_H_MIN_MASK]; return prio; } static inline void change_tack_prio(struct avm_pa_global *ctx, struct avm_pa_pid *pid, PKT *pkt, unsigned int org_prio) { unsigned int tack_prio = avm_pa_pid_tack_prio(pid, org_prio); if (tack_prio != 0 && pkt->priority > tack_prio) { pkt->priority = tack_prio; #if AVM_PA_TRACE if (ctx->dbgprioacktrace) { pa_printk(KERN_DEBUG, "avm_pa: %lu - change_tack_prio(%s), reset tack prio to 0x%x\n", pkt_uniq_id(pkt), pid->cfg.name, pkt->priority); } #endif } } /* ------------------------------------------------------------------------ */ static inline int avm_pa_capture_running(void) { struct avm_pa_global *ctx = &pa_glob; if (ctx->dbgcapture) return 0; return atomic_read(&ctx->misc_is_open); } /* ------------------------------------------------------------------------ */ /* -------- utilities ----------------------------------------------------- */ /* ------------------------------------------------------------------------ */ static const char *rc2str(int rc) { switch (rc) { case AVM_PA_RX_BROADCAST : return "is broadcast"; case AVM_PA_RX_TTL : return "ttl/hoplimit <= 1"; case AVM_PA_RX_FRAGMENT : return "is fragment"; case AVM_PA_RX_BYPASS : return "bypass"; case AVM_PA_RX_OK : return "ok"; case AVM_PA_RX_ACCELERATED : return "accelerated"; case AVM_PA_RX_ERROR_STATE : return "state machine problem ?"; case AVM_PA_RX_ERROR_LEN : return "packet too short"; case AVM_PA_RX_ERROR_IPVERSION : return "illegal ip version"; case AVM_PA_RX_ERROR_MATCH : return "too much header"; case AVM_PA_RX_ERROR_HDR : return "too much ip header"; } return "???"; } static const char *framing2str(enum avm_pa_framing framing) { switch (framing) { case avm_pa_framing_ether: return "ether"; case avm_pa_framing_ppp: return "ppp"; case avm_pa_framing_ip: return "ip"; case avm_pa_framing_dev: return "dev"; case avm_pa_framing_ptype: return "local"; case avm_pa_framing_llcsnap: return "llcsnap"; } return "undef"; } static int in6_addr2str(const void *cp, char *buf, size_t size) { const struct in6_addr *s = (const struct in6_addr *)cp; return snprintf(buf, size, "%x:%x:%x:%x:%x:%x:%x:%x", ntohs(s->s6_addr16[0]), ntohs(s->s6_addr16[1]), ntohs(s->s6_addr16[2]), ntohs(s->s6_addr16[3]), ntohs(s->s6_addr16[4]), ntohs(s->s6_addr16[5]), ntohs(s->s6_addr16[6]), ntohs(s->s6_addr16[7])); } static int in_addr2str(const void *cp, char *buf, size_t size) { const unsigned char *s = (const unsigned char *)cp; return snprintf(buf, size, "%d.%d.%d.%d", s[0], s[1], s[2], s[3]); } static int mac2str(const void *cp, char *buf, size_t size) { const unsigned char *mac = (const unsigned char *)cp; return snprintf(buf, size, "%02X:%02X:%02X:%02X:%02X:%02X", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); } static const char *pkttype2str(u16 pkttype, char *buf, size_t size) { char *p = buf; char *end = p + size; if (pkttype == AVM_PA_PKTTYPE_NONE) { snprintf(p, end-p, "none"); return buf; } switch (pkttype & AVM_PA_PKTTYPE_IPENCAP_MASK) { case AVM_PA_PKTTYPE_IPV6ENCAP: snprintf(p, end-p, "IPv6+"); p += strlen(p); break; case AVM_PA_PKTTYPE_IPV4ENCAP: snprintf(p, end-p, "IPv4+"); p += strlen(p); break; } if (pkttype & AVM_PA_PKTTYPE_LISP) { snprintf(p, end-p, "LISP+"); p += strlen(p); } if (pkttype & AVM_PA_PKTTYPE_L2TP) { snprintf(p, end-p, "L2TP+"); p += strlen(p); } if (pkttype & AVM_PA_PKTTYPE_GRE) { snprintf(p, end-p, "GRE+"); p += strlen(p); } switch (pkttype & AVM_PA_PKTTYPE_IP_MASK) { case AVM_PA_PKTTYPE_IPV6: snprintf(p, end-p, "IPv6"); p += strlen(p); break; case AVM_PA_PKTTYPE_IPV4: snprintf(p, end-p, "IPv4"); p += strlen(p); break; } if (AVM_PA_PKTTYPE_IPPROTO(pkttype)) { switch (AVM_PA_PKTTYPE_IPPROTO(pkttype)) { case IPPROTO_UDP: snprintf(p, end-p, "+UDP"); break; case IPPROTO_TCP: snprintf(p, end-p, "+TCP"); break; case IPPROTO_ICMP: snprintf(p, end-p, "+ICMP"); break; case IPPROTO_ICMPV6: snprintf(p, end-p, "+ICMPV6"); break; default: snprintf(p, end-p, "+P%u", AVM_PA_PKTTYPE_IPPROTO(pkttype)); break; } } return buf; } static char *data2hex(void *data, int datalen, char *buf, int bufsiz) { static char hexchars[] = "0123456789ABCDEF"; unsigned char *databuf = (unsigned char *)data; char *s = buf; char *end = buf+bufsiz; int i; snprintf(s, end-s, "%d: ", datalen); s += strlen(s); for (i=0; i < datalen && s + 3 < end; i ++) { *s++ = hexchars[(databuf[i] >> 4) & 0xf]; *s++ = hexchars[databuf[i] & 0xf]; } *s = 0; return buf; } static char *pidflags2str(unsigned long flags, char *buf, int bufsiz) { char *s = buf; char *end = s + bufsiz; buf[0] = 0; if (flags & AVM_PA_PID_FLAG_NO_PID_CHANGED_CHECK) { snprintf(s, end-s, "%sno_pid_changed_check", s == buf ? "" : ","); s += strlen(s); } if (flags & AVM_PA_PID_FLAG_HSTART_ON_INGRESS) { snprintf(s, end-s, "%shstart_on_ingress", s == buf ? "" : ","); s += strlen(s); } if (flags & AVM_PA_PID_FLAG_HSTART_ON_EGRESS) { snprintf(s, end-s, "%shstart_on_egress", s == buf ? "" : ","); s += strlen(s); } if (s == buf) snprintf(s, end-s, "none"); return buf; } /* ------------------------------------------------------------------------ */ /* -------- parsing of packets -------------------------------------------- */ /* ------------------------------------------------------------------------ */ #define HDRCOPY(info) ((info)->hdrcopy+(info)->hdroff) #define LISPDATAHDR(info) (HDRCOPY(info)+(info)->lisp_offset) static inline void pa_reset_match(struct avm_pa_pkt_match *info) { info->nmatch = 0; info->casttype = AVM_PA_IS_UNICAST; info->fragok = 0; info->fin = 0; info->syn = 0; info->ack_only = 0; info->pkttype = AVM_PA_PKTTYPE_NONE; info->pppoe_offset = AVM_PA_OFFSET_NOT_SET; info->encap_offset = AVM_PA_OFFSET_NOT_SET; info->lisp_offset = AVM_PA_OFFSET_NOT_SET; info->ip_offset = AVM_PA_OFFSET_NOT_SET; info->hdroff = 0; info->hdrlen = 0; info->pktlen = 0; } static inline void pa_change_to_bridge_match(struct avm_pa_pkt_match *info) { int i; for (i = 0; i < info->nmatch && info->match[i].type != AVM_PA_ETH; i++) ; if (i < info->nmatch) info->nmatch = i+1; } static inline int pa_add_match(struct avm_pa_pkt_match *info, unsigned char offset, unsigned char type) { if (info->nmatch < AVM_PA_MAX_MATCH) { info->match[info->nmatch].offset = offset; info->match[info->nmatch].type = type; info->nmatch++; return 0; } return -1; } static int set_pkt_match(enum avm_pa_framing framing, unsigned int hstart, PKT *pkt, struct avm_pa_pkt_match *info, int ffaspkt) { #define RETURN(retval) do { ret = retval; goto out; } while (0) int ret = AVM_PA_RX_ERROR_LEN; int state = 0; u8 *data, *p, *end; u32 daddr; u16 uninitialized_var(ethproto); /* not used uninitialized */ u8 uninitialized_var(ipproto); /* not used uninitialized */ int uninitialized_var(ttl); /* not used uninitialized */ data = PKT_DATA(pkt); end = data + PKT_LEN(pkt); data += hstart; switch (framing) { case avm_pa_framing_ip: if ((data[0] & 0xf0) == 0x40 && (data[0] & 0x0f) >= 5) { state = AVM_PA_IPV4; break; } if ((data[0] & 0xf0) == 0x60) { state = AVM_PA_IPV6; break; } return AVM_PA_RX_ERROR_IPVERSION; case avm_pa_framing_ppp: state = AVM_PA_PPP; break; case avm_pa_framing_ether: state = AVM_PA_ETH; break; case avm_pa_framing_dev: state = AVM_PA_ETH; data = (u8 *)eth_hdr(pkt); break; case avm_pa_framing_ptype: data = (u8 *)skb_network_header(pkt); if (pkt->protocol == constant_htons(ETH_P_IP)) { state = AVM_PA_IPV4; } else if (pkt->protocol == constant_htons(ETH_P_IPV6)) { state = AVM_PA_IPV6; } else { return AVM_PA_RX_BYPASS; } break; case avm_pa_framing_llcsnap: state = AVM_PA_LLC_SNAP; break; } if (end - data > AVM_PA_MAX_HEADER - AVM_PA_MAX_HDROFF) end = data + AVM_PA_MAX_HEADER - AVM_PA_MAX_HDROFF; p = data; while (p < end) { hdrunion_t *hdr = (hdrunion_t *)p; int offset = p-data; switch (state) { case AVM_PA_ETH: if (pa_add_match(info, offset, AVM_PA_ETH) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); p += sizeof(struct ethhdr); if (hdr->ethh.h_dest[0] & 1) { if (hdr->ethh.h_dest[0] == 0xff) { info->casttype = AVM_PA_IS_BROADCAST; RETURN(AVM_PA_RX_BYPASS); } else { info->casttype = AVM_PA_IS_MULTICAST; } } state = AVM_PA_ETH_PROTO; ethproto = hdr->ethh.h_proto; continue; case AVM_PA_VLAN: if (pa_add_match(info, offset, AVM_PA_VLAN) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); p += sizeof(struct vlanhdr); state = AVM_PA_ETH_PROTO; ethproto = hdr->vlanh.vlan_proto; continue; case AVM_PA_ETH_PROTO: switch (ethproto) { case constant_htons(ETH_P_PPP_SESS): state = AVM_PA_PPPOE; continue; case constant_htons(ETH_P_IP): state = AVM_PA_IPV4; continue; case constant_htons(ETH_P_IPV6): state = AVM_PA_IPV6; continue; case constant_htons(ETH_P_8021Q): state = AVM_PA_VLAN; continue; } RETURN(AVM_PA_RX_BYPASS); case AVM_PA_PPPOE: if (pa_add_match(info, offset, AVM_PA_PPPOE) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); p += sizeof(struct pppoehdr); info->pppoe_offset = offset; state = AVM_PA_PPP; continue; case AVM_PA_PPP: if (p[0] == 0) { p++; offset++; } if (p[0] == 0x21) { if (pa_add_match(info, offset, AVM_PA_PPP) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); p++; state = AVM_PA_IPV4; continue; } if (p[0] == 0x57) { if (pa_add_match(info, offset, AVM_PA_PPP) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); p++; state = AVM_PA_IPV6; continue; } RETURN(AVM_PA_RX_BYPASS); case AVM_PA_IPV4: if (hdr->iph.version != 4) RETURN(AVM_PA_RX_ERROR_IPVERSION); if (pa_add_match(info, offset, AVM_PA_IPV4) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); ttl = hdr->iph.ttl; p += PA_IPHLEN(&hdr->iph); if (hdr->iph.frag_off & constant_htons(IP_OFFSET)) RETURN(AVM_PA_RX_FRAGMENT); if ((hdr->iph.frag_off & constant_htons(IP_MF)) && !ffaspkt) RETURN(AVM_PA_RX_FRAGMENT); daddr = get_unaligned(&hdr->iph.daddr); if (ipv4_is_lbcast(daddr)) { info->casttype = AVM_PA_IS_BROADCAST; RETURN(AVM_PA_RX_BYPASS); } else if (ipv4_is_multicast(daddr)) { info->casttype = AVM_PA_IS_MULTICAST; } if ((hdr->iph.frag_off & constant_htons(IP_DF)) == 0) info->fragok = 1; if (hdr->iph.protocol == IPPROTO_IPV6) { if (info->pkttype != AVM_PA_PKTTYPE_NONE) RETURN(AVM_PA_RX_ERROR_HDR); info->pkttype |= AVM_PA_PKTTYPE_IPV4ENCAP; info->encap_offset = offset; state = AVM_PA_IPV6; continue; } if (hdr->iph.protocol == IPPROTO_IPENCAP) { if (info->pkttype != AVM_PA_PKTTYPE_NONE) RETURN(AVM_PA_RX_ERROR_HDR); info->pkttype |= AVM_PA_PKTTYPE_IPV4ENCAP; info->encap_offset = offset; state = AVM_PA_IPV4; continue; } info->pkttype |= AVM_PA_PKTTYPE_IPV4; info->ip_offset = offset; state = AVM_PA_IP_PROTO; ipproto = hdr->iph.protocol; if ((offset & 0x3) && info->hdroff == 0) info->hdroff = 4 - (offset & 0x3); continue; case AVM_PA_IPV6: if (hdr->ipv6h.version != 6) RETURN(AVM_PA_RX_ERROR_IPVERSION); if (pa_add_match(info, offset, AVM_PA_IPV6) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); ttl = hdr->ipv6h.hop_limit; p += sizeof(struct ipv6hdr); if (hdr->ipv6h.daddr.s6_addr[0] == 0xff) info->casttype = AVM_PA_IS_MULTICAST; if (hdr->ipv6h.nexthdr == IPPROTO_IPV6) { if (info->pkttype != AVM_PA_PKTTYPE_NONE) RETURN(AVM_PA_RX_ERROR_HDR); info->pkttype |= AVM_PA_PKTTYPE_IPV6ENCAP; info->encap_offset = offset; state = AVM_PA_IPV6; continue; } if (hdr->ipv6h.nexthdr == IPPROTO_IPENCAP) { if (info->pkttype != AVM_PA_PKTTYPE_NONE) RETURN(AVM_PA_RX_ERROR_HDR); info->pkttype |= AVM_PA_PKTTYPE_IPV6ENCAP; info->encap_offset = offset; state = AVM_PA_IPV4; continue; } if (hdr->ipv6h.nexthdr == IPPROTO_FRAGMENT) { struct ipv6fraghdr *fragh = (struct ipv6fraghdr *)p; info->pkttype |= AVM_PA_PKTTYPE_IPV6; info->ip_offset = offset; if (fragh->frag_off & constant_htons(IP6_OFFSET)) RETURN(AVM_PA_RX_FRAGMENT); if ((fragh->frag_off & constant_htons(IP6_MF)) && !ffaspkt) RETURN(AVM_PA_RX_FRAGMENT); p += sizeof(struct ipv6fraghdr); state = AVM_PA_IP_PROTO; ipproto = fragh->nexthdr; } info->pkttype |= AVM_PA_PKTTYPE_IPV6; info->ip_offset = offset; state = AVM_PA_IP_PROTO; ipproto = hdr->ipv6h.nexthdr; if ((offset & 0x3) && info->hdroff == 0) info->hdroff = 4 - (offset & 0x3); continue; case AVM_PA_IP_PROTO: switch (ipproto) { case IPPROTO_TCP: info->pkttype |= ipproto; if (p + sizeof(struct tcphdr) > end) RETURN(AVM_PA_RX_ERROR_LEN); if (p + PA_TCP_DOFF(&hdr->tcph) > end) RETURN(AVM_PA_RX_ERROR_LEN); if (pa_add_match(info, offset, AVM_PA_PORTS) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); if (PA_TCP_FIN_OR_RST(&hdr->tcph)) info->fin = 1; if (PA_TCP_SYN(&hdr->tcph)) info->syn = 1; if (PA_TCP_ACK(&hdr->tcph)) { if (AVM_PA_PKTTYPE_IP_VERSION(info->pkttype) == 4) { hdrunion_t *iphdr = (hdrunion_t *)(data+info->ip_offset); if (ntohs(PA_IPTOTLEN(&iphdr->iph)) == (PA_IPHLEN(&iphdr->iph)+PA_TCP_DOFF(&hdr->tcph))) info->ack_only = 1; } else if (AVM_PA_PKTTYPE_IP_VERSION(info->pkttype) == 6) { hdrunion_t *ip6hdr = (hdrunion_t *)(data+info->ip_offset); if (ntohs(PA_IP6_PAYLOADLEN(&ip6hdr->iph)) == PA_TCP_DOFF(&hdr->tcph)) info->ack_only = 1; } } p += PA_TCP_DOFF(&hdr->tcph); RETURN(AVM_PA_RX_OK); case IPPROTO_UDP: info->pkttype |= ipproto; if (p + sizeof(struct udphdr) > end) RETURN(AVM_PA_RX_ERROR_LEN); if (pa_add_match(info, offset, AVM_PA_PORTS) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); if (hdr->udph.dest == constant_htons(4341)) { p += sizeof(struct udphdr); state = AVM_PA_LISP; continue; } p += sizeof(struct udphdr); RETURN(AVM_PA_RX_OK); case IPPROTO_ICMP: info->pkttype |= ipproto; if (p + sizeof(struct icmphdr) > end) RETURN(AVM_PA_RX_ERROR_LEN); if ( hdr->icmph.type == ICMP_ECHO || hdr->icmph.type == ICMP_ECHOREPLY) { if (pa_add_match(info, offset, AVM_PA_ICMPV4) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); p += sizeof(struct icmphdr); RETURN(AVM_PA_RX_OK); } break; case IPPROTO_ICMPV6: info->pkttype |= ipproto; if (p + sizeof(struct icmp6hdr) > end) RETURN(AVM_PA_RX_ERROR_LEN); if ( hdr->icmpv6h.icmp6_type == ICMPV6_ECHO_REQUEST || hdr->icmpv6h.icmp6_type == ICMPV6_ECHO_REPLY) { if (pa_add_match(info, offset, AVM_PA_ICMPV6) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); p += sizeof(struct icmp6hdr); RETURN(AVM_PA_RX_OK); } break; case IPPROTO_L2TP: if (AVM_PA_PKTTYPE_IPENCAP_VERSION(info->pkttype)) RETURN(AVM_PA_RX_OK); if (p + L2TP_DATAHDR_SIZE > end) RETURN(AVM_PA_RX_ERROR_LEN); info->encap_offset = info->ip_offset; p += L2TP_DATAHDR_SIZE; if (pa_add_match(info, offset, AVM_PA_L2TP) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); info->pkttype = AVM_PA_PKTTYPE_IP2IPENCAP_VERSION(info->pkttype); info->pkttype |= AVM_PA_PKTTYPE_L2TP; state = AVM_PA_ETH; continue; case IPPROTO_GRE: if (AVM_PA_PKTTYPE_IPENCAP_VERSION(info->pkttype)) RETURN(AVM_PA_RX_OK); if (p + sizeof(struct tlb_grehdr) > end) RETURN(AVM_PA_RX_ERROR_LEN); info->encap_offset = info->ip_offset; p += sizeof(struct tlb_grehdr); if (pa_add_match(info, offset, AVM_PA_GRE) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); info->pkttype = AVM_PA_PKTTYPE_IP2IPENCAP_VERSION(info->pkttype); info->pkttype |= AVM_PA_PKTTYPE_GRE; switch (hdr->greh.protocol) { case constant_htons(ETH_P_IP): state = AVM_PA_IPV4; continue; case constant_htons(ETH_P_TEB): state = AVM_PA_ETH; continue; } break; } RETURN(AVM_PA_RX_BYPASS); case AVM_PA_LLC_SNAP: if ( hdr->llcsnap.dsap != 0xAA || hdr->llcsnap.ssap != 0xAA || hdr->llcsnap.ui != 0x03) /* not checking: * RFC1042_SNAP 0x00,0x00,0x00 * BTEP_SNAP 0x00,0x00,0xf8 */ RETURN(AVM_PA_RX_BYPASS); if (pa_add_match(info, offset, AVM_PA_LLC_SNAP) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); p += sizeof(struct llc_snap_hdr); state = AVM_PA_ETH_PROTO; ethproto = get_unaligned(&hdr->llcsnap.type); continue; case AVM_PA_LISP: if (AVM_PA_PKTTYPE_IPENCAP_VERSION(info->pkttype)) RETURN(AVM_PA_RX_OK); if (p + LISP_DATAHDR_SIZE > end) RETURN(AVM_PA_RX_ERROR_LEN); info->encap_offset = info->ip_offset; info->lisp_offset = offset; p += LISP_DATAHDR_SIZE; hdr = (hdrunion_t *)p; if (hdr->iph.version == 4) state = AVM_PA_IPV4; else if (hdr->iph.version == 6) state = AVM_PA_IPV6; else RETURN(AVM_PA_RX_OK); /* not a lisp packet */ if (pa_add_match(info, offset, AVM_PA_LISP) < 0) RETURN(AVM_PA_RX_ERROR_MATCH); info->pkttype = AVM_PA_PKTTYPE_IP2IPENCAP_VERSION(info->pkttype); info->pkttype |= AVM_PA_PKTTYPE_LISP; continue; default: RETURN(AVM_PA_RX_ERROR_STATE); } } out: if (ret == AVM_PA_RX_OK && ttl == 0) ret = AVM_PA_RX_TTL; if (ret == AVM_PA_RX_OK || pa_glob.dbgmatch) { info->hdrlen = p-data; memcpy(HDRCOPY(info), data, info->hdrlen); } if (ret == AVM_PA_RX_OK) info->pktlen = PKT_LEN(pkt); return ret; #undef RETURN } static inline void pa_match_set_hash(struct avm_pa_pkt_match *info) { int i; info->hash = 0; for (i = 0 ; i < info->nmatch; i++) { struct avm_pa_match_info *p = &info->match[i]; hdrunion_t *hdr = (hdrunion_t *)(HDRCOPY(info)+p->offset); switch (p->type) { case AVM_PA_IPV4: #if AVM_PA_UNALIGNED_CHECK if (((unsigned long)&hdr->iph.saddr) & 0x3) if (net_ratelimit()) printk(KERN_INFO "avm_pa: unaligned access %p (ipv4)\n", &hdr->iph.saddr); #endif info->hash ^= hdr->iph.saddr; info->hash ^= hdr->iph.daddr; info->hash ^= hdr->iph.protocol; info->hash ^= hdr->iph.tos; break; case AVM_PA_IPV6: #if AVM_PA_UNALIGNED_CHECK if (((unsigned long)&hdr->ipv6h.saddr.s6_addr32[2]) & 0x3) if (net_ratelimit()) printk(KERN_INFO "avm_pa: unaligned access %p (ipv6)\n", &hdr->ipv6h.saddr.s6_addr32[2]); #endif //info->hash ^= hdr->ipv6h.saddr.s6_addr32[0]; //info->hash ^= hdr->ipv6h.saddr.s6_addr32[1]; info->hash ^= hdr->ipv6h.saddr.s6_addr32[2]; info->hash ^= hdr->ipv6h.saddr.s6_addr32[3]; //info->hash ^= hdr->ipv6h.daddr.s6_addr32[0]; //info->hash ^= hdr->ipv6h.daddr.s6_addr32[1]; info->hash ^= hdr->ipv6h.daddr.s6_addr32[2]; info->hash ^= hdr->ipv6h.daddr.s6_addr32[3]; info->hash ^= hdr->ipv6h.nexthdr; break; case AVM_PA_PORTS: info->hash ^= hdr->ports[0]; info->hash ^= hdr->ports[1]; break; case AVM_PA_ICMPV4: case AVM_PA_ICMPV6: info->hash ^= hdr->ports[0]; /* type + code */ info->hash ^= hdr->ports[2]; /* id */ break; } } info->hash = (info->hash >> 16) ^ (info->hash & 0xffff); info->hash = (info->hash >> 8) ^ (info->hash & 0xff); info->hash %= CONFIG_AVM_PA_MAX_SESSION; } static int pa_set_pkt_match(enum avm_pa_framing framing, unsigned int hstart, PKT *pkt, struct avm_pa_pkt_match *match, int ffaspkt) { int rc; pa_reset_match(match); rc = set_pkt_match(framing, hstart, pkt, match, ffaspkt); if (rc == AVM_PA_RX_OK) pa_match_set_hash(match); return rc; } static inline int pa_match_cmp(struct avm_pa_pkt_match *a1, struct avm_pa_pkt_match *a2) { struct avm_pa_match_info *p = 0; hdrunion_t *h1, *h2; int rc; int i; rc = (int)a1->nmatch - (int)a2->nmatch; if (rc) return rc; rc = memcmp(&a1->match, &a2->match, a1->nmatch*sizeof(struct avm_pa_match_info)); if (rc) return rc; for (i = a1->nmatch-1; i >= 0; i--) { p = &a1->match[i]; h1 = (hdrunion_t *)(HDRCOPY(a1)+p->offset); h2 = (hdrunion_t *)(HDRCOPY(a2)+p->offset); switch (p->type) { case AVM_PA_ETH: rc = memcmp(&h1->ethh, &h2->ethh, sizeof(struct ethhdr)); if (rc) goto out; break; case AVM_PA_VLAN: rc = (int)VLAN_ID(&h1->vlanh) - (int)VLAN_ID(&h2->vlanh); if (rc) goto out; break; case AVM_PA_PPPOE: rc = (int)h1->pppoeh.sid - (int)h2->pppoeh.sid; if (rc) goto out; break; case AVM_PA_PPP: rc = (int)h1->ppph[0] - (int)h2->ppph[0]; if (rc) goto out; break; case AVM_PA_IPV4: rc = (int)h1->iph.protocol - (int)h2->iph.protocol; if (rc) goto out; rc = (int)h1->iph.tos - (int)h2->iph.tos; if (rc) goto out; rc = (int)h1->iph.daddr - (int)h2->iph.daddr; if (rc) goto out; rc = (int)h1->iph.saddr - (int)h2->iph.saddr; if (rc) goto out; break; case AVM_PA_IPV6: rc = (int)h1->ipv6h.nexthdr - (int)h2->ipv6h.nexthdr; if (rc) goto out; /* compare both src and dst in a single call */ rc = memcmp(&h1->ipv6h.saddr, &h2->ipv6h.saddr, sizeof(struct in6_addr) * 2); if (rc) goto out; break; case AVM_PA_PORTS: rc = (int)h1->ports[0] - (int)h2->ports[0]; /* source */ if (rc) goto out; rc = (int)h1->ports[1] - (int)h2->ports[1]; /* dest */ if (rc) goto out; break; case AVM_PA_ICMPV4: case AVM_PA_ICMPV6: rc = (int)h1->ports[0] - (int)h2->ports[0]; /* type + code */ if (rc) goto out; rc = (int)h1->ports[2] - (int)h2->ports[2]; /* id */ if (rc) goto out; break; case AVM_PA_LLC_SNAP: rc = (int)h1->llcsnap.type - (int)h2->llcsnap.type; if (rc) goto out; break; case AVM_PA_L2TP: rc = (int)h1->l2tp.session_id - (int)h2->l2tp.session_id; if (rc) goto out; break; case AVM_PA_GRE: rc = (int)h1->greh.protocol - (int)h2->greh.protocol; if (rc) goto out; break; } } out: return rc; } static void pa_show_pkt_match(struct avm_pa_pkt_match *match, int is_bridged, u16 egress_pkttype, pa_fprintf fprintffunc, void *arg) { char buf[128]; const char *prompt = "PktType"; unsigned n; int s; if (is_bridged) { pkttype2str(match->pkttype & AVM_PA_PKTTYPE_IP_MASK, buf, sizeof(buf)); (*fprintffunc)(arg, "%-15s: %s\n", prompt, buf); } else { if (egress_pkttype && egress_pkttype != match->pkttype) { size_t half = sizeof(buf)/2; pkttype2str(match->pkttype, buf, half); pkttype2str(egress_pkttype, buf+half, half); (*fprintffunc)(arg, "%-15s: %s -> %s\n", prompt, buf, buf+half); } else { pkttype2str(match->pkttype, buf, sizeof(buf)); (*fprintffunc)(arg, "%-15s: %s\n", prompt, buf); } } if (match->nmatch && !is_bridged) { (*fprintffunc)(arg, "FragOk : %u\n", match->fragok); (*fprintffunc)(arg, "Syn : %u\n", match->syn); (*fprintffunc)(arg, "Fin : %u\n", match->fin); } for (n=0; n < match->nmatch; n++) { struct avm_pa_match_info *p = match->match+n; hdrunion_t *hdr = (hdrunion_t *)(HDRCOPY(match)+p->offset); switch (p->type) { case AVM_PA_ETH: s = mac2str(&hdr->ethh.h_dest, buf, sizeof(buf)); buf[s++] = ' '; mac2str(&hdr->ethh.h_source, &buf[s], sizeof(buf) - s); (*fprintffunc)(arg, "Eth Hdr DS : %s proto %04X\n", buf, ntohs(hdr->ethh.h_proto)); break; case AVM_PA_VLAN: (*fprintffunc)(arg, "Vlan ID : %d\n", VLAN_ID(&hdr->vlanh)); break; case AVM_PA_PPPOE: (*fprintffunc)(arg, "PPPoE Sid : %04X\n", ntohs(hdr->pppoeh.sid)); break; case AVM_PA_PPP: (*fprintffunc)(arg, "PPP Proto : %02X\n", hdr->ppph[0]); break; case AVM_PA_IPV4: s = in_addr2str(&hdr->iph.saddr, buf, sizeof(buf)); buf[s++] = ' '; in_addr2str(&hdr->iph.daddr, &buf[s], sizeof(buf) - s); (*fprintffunc)(arg, "IPv4 Hdr : %s proto %d tos %02X\n", buf, hdr->iph.protocol, hdr->iph.tos); break; case AVM_PA_IPV6: s = in6_addr2str(&hdr->ipv6h.saddr, buf, sizeof(buf)); buf[s++] = ' '; in6_addr2str(&hdr->ipv6h.daddr, &buf[s], sizeof(buf) - s); (*fprintffunc)(arg, "IPv6 Hdr : %s proto %d\n", buf, hdr->ipv6h.nexthdr); break; case AVM_PA_PORTS: (*fprintffunc)(arg, "Ports : %d -> %d\n", ntohs(hdr->ports[0]), ntohs(hdr->ports[1])); break; case AVM_PA_ICMPV4: prompt = "ICMPv4"; switch (hdr->icmph.type) { case ICMP_ECHOREPLY: (*fprintffunc)(arg, "%-15s: echo reply id=%hu\n", prompt, hdr->icmph.un.echo.id); break; case ICMP_ECHO: (*fprintffunc)(arg, "%-15s: echo request id=%hu\n", prompt, hdr->icmph.un.echo.id); break; default: (*fprintffunc)(arg, "??????\n"); break; } break; case AVM_PA_ICMPV6: prompt = "ICMPv6"; switch (hdr->icmpv6h.icmp6_type) { case ICMPV6_ECHO_REQUEST: (*fprintffunc)(arg, "%-15s: echo request id=%hu\n", prompt, hdr->icmpv6h.icmp6_identifier); break; case ICMPV6_ECHO_REPLY: (*fprintffunc)(arg, "%-15s: echo reply id=%hu\n", prompt, hdr->icmpv6h.icmp6_identifier); break; default: (*fprintffunc)(arg, "??????\n"); break; } break; case AVM_PA_LLC_SNAP: (*fprintffunc)(arg, "LLC SNAP : %04X\n", ntohs(hdr->llcsnap.type)); break; case AVM_PA_LISP: (*fprintffunc)(arg, "LISP : data header\n"); break; case AVM_PA_L2TP: (*fprintffunc)(arg, "L2TP Sess : %lu\n", (unsigned long)ntohl(hdr->l2tp.session_id)); break; case AVM_PA_GRE: (*fprintffunc)(arg, "GRE Proto : %04X\n", ntohs(hdr->greh.protocol)); break; } } } static void pa_show_pkt_info(struct avm_pa_pkt_info *info, pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; (*fprintffunc)(arg, "In Pid : %d (%s)\n", info->ingress_pid_handle, PA_PID(ctx, info->ingress_pid_handle)->cfg.name); if (info->ingress_vpid_handle) { (*fprintffunc)(arg, "In VPid : %d (%s)\n", info->ingress_vpid_handle, PA_VPID(ctx, info->ingress_vpid_handle)->cfg.name); } if (info->egress_vpid_handle) { (*fprintffunc)(arg, "Out VPid : %d (%s)\n", info->egress_vpid_handle, PA_VPID(ctx, info->egress_vpid_handle)->cfg.name); } if (info->routed) (*fprintffunc)(arg, "Routed : yes\n"); pa_show_pkt_match(&info->match, 0, 0, fprintffunc, arg); } /* ------------------------------------------------------------------------ */ /* -------- mod rec ------------------------------------------------------- */ /* ------------------------------------------------------------------------ */ /* * From RFC 1624 Incremental Internet Checksum * * HC - old checksum in header * HC' - new checksum in header * m - old value of a 16-bit field * m' - new value of a 16-bit field * HC' = ~(~HC + ~m + m') -- [Eqn. 3] * HC' = HC - ~m - m' -- [Eqn. 4] * * * csum_unfold(): be16 -> u32 * * M = ~m + m'; * * we use Eqn.3, because we precalculate M. * csum_fold(): add the carries * * HC' = ~csum_fold((~csum_unfold(HC) + ~m + m')); * * HC' = ~csum_fold(csum_add(~csum_unfold(HC), M); * */ static inline u32 hcsum_add(u32 sum, u32 addend) { sum += addend; if (sum < addend) sum++; /* skip -0 */ return sum; // + (sum < addend); } static inline u32 hcsum_prepare(u16 sum) { return (u16)(~sum); } static inline u32 hcsum_u32(u32 sum, u32 from, u32 to) { sum = hcsum_add(sum, ~from); sum = hcsum_add(sum, to); return sum; } static inline u32 hcsum_u16(u32 sum, u16 from, u16 to) { sum = hcsum_u32(sum, from, to); return sum; } static inline u16 hcsum_fold(u32 sum) { while (sum >> 16) sum = (sum & 0xffff) + (sum >> 16); return sum; } static inline u16 hcsum_finish(u32 sum) { return ~hcsum_fold(sum); } static int pa_set_v4_mod_rec(struct avm_pa_v4_mod_rec *mod, int update_ttl, u8 *in, u8 *out) { struct iphdr *iiph = (struct iphdr *)in; struct iphdr *oiph = (struct iphdr *)out; u32 l3_check = 0; u32 l4_check; int isicmp = 0; mod->flags = 0; mod->saddr = oiph->saddr; if (iiph->saddr != oiph->saddr) { mod->flags |= AVM_PA_V4_MOD_SADDR|AVM_PA_V4_MOD_IPHDR_CSUM; l3_check = hcsum_u32(l3_check, iiph->saddr, oiph->saddr); } mod->daddr = oiph->daddr; if (iiph->daddr != oiph->daddr) { mod->flags |= AVM_PA_V4_MOD_DADDR|AVM_PA_V4_MOD_IPHDR_CSUM; l3_check = hcsum_u32(l3_check, iiph->daddr, oiph->daddr); } l4_check = l3_check; mod->tos = oiph->tos; if (iiph->tos != oiph->tos) { mod->flags |= AVM_PA_V4_MOD_TOS|AVM_PA_V4_MOD_IPHDR_CSUM; l3_check = hcsum_u16(l3_check, htons(iiph->tos), htons(oiph->tos)); } if (update_ttl) { mod->flags |= AVM_PA_V4_MOD_UPDATE_TTL|AVM_PA_V4_MOD_IPHDR_CSUM; l3_check = hcsum_u16(l3_check, constant_htons(0x0100), 0x0000); } mod->l3crc_update = hcsum_fold(l3_check); switch (iiph->protocol) { case IPPROTO_TCP: mod->l4crc_offset = offsetof(struct tcphdr, check); break; case IPPROTO_UDP: mod->l4crc_offset = offsetof(struct udphdr, check); break; case IPPROTO_ICMP: #ifdef _LINUX_ICMP_H mod->l4crc_offset = offsetof(struct icmphdr, checksum); #else mod->l4crc_offset = offsetof(struct icmphdr, check); #endif isicmp = 1; break; default: mod->l4crc_offset = 0; break; } mod->l4crc_update = 0; if (mod->l4crc_offset) { u16 *iports = (u16 *)(in + PA_IPHLEN(iiph)); u16 *oports = (u16 *)(out + PA_IPHLEN(oiph)); if (isicmp) { l4_check = 0; mod->id = oports[2]; if (iports[2] != oports[2]) { mod->flags |= AVM_PA_V4_MOD_ICMPID|AVM_PA_V4_MOD_PROTOHDR_CSUM; l4_check = hcsum_u16(l4_check, iports[2], oports[2]); } } else { if (mod->flags & AVM_PA_V4_MOD_ADDR) mod->flags |= AVM_PA_V4_MOD_PROTOHDR_CSUM; mod->sport = oports[0]; if (iports[0] != oports[0]) { mod->flags |= AVM_PA_V4_MOD_SPORT|AVM_PA_V4_MOD_PROTOHDR_CSUM; l4_check = hcsum_u16(l4_check, iports[0], oports[0]); } mod->dport = oports[1]; if (iports[1] != oports[1]) { mod->flags |= AVM_PA_V4_MOD_DPORT|AVM_PA_V4_MOD_PROTOHDR_CSUM; l4_check = hcsum_u16(l4_check, iports[1], oports[1]); } } mod->l4crc_update = hcsum_fold(l4_check); } mod->iphlen = PA_IPHLEN(oiph); return mod->flags != 0; } static void pa_do_v4_mod_rec(struct avm_pa_v4_mod_rec *mod, u8 *data) { struct avm_pa_global *ctx = &pa_glob; struct iphdr *iph = (struct iphdr *)data; u16 *ports = (u16 *)(data + mod->iphlen); u32 sum; u16 csum; ctx->stats.rx_mod++; if (((unsigned long)iph) & 0x3) { memcpy(&iph->saddr, &mod->saddr, 2*sizeof(u32)); } else { iph->saddr = mod->saddr; iph->daddr = mod->daddr; } iph->tos = mod->tos; if (mod->flags & AVM_PA_V4_MOD_UPDATE_TTL) iph->ttl--; sum = hcsum_prepare(iph->check); iph->check = hcsum_finish(hcsum_add(sum, mod->l3crc_update)); if (mod->flags & AVM_PA_V4_MOD_PORT) { ports[0] = mod->sport; ports[1] = mod->dport; } else if (mod->flags & AVM_PA_V4_MOD_ICMPID) { ports[2] = mod->id; } csum = ports[mod->l4crc_offset>>1]; if (csum || iph->protocol != IPPROTO_UDP) { sum = hcsum_prepare(csum); ports[mod->l4crc_offset>>1] = hcsum_finish(hcsum_add(sum, mod->l4crc_update)); } } static void pa_show_v4_mod_rec(struct avm_pa_v4_mod_rec *mod, pa_fprintf fprintffunc, void *arg) { char buf[64]; if (mod->flags & AVM_PA_V4_MOD_SADDR) { in_addr2str(&mod->saddr, buf, sizeof(buf)); (*fprintffunc)(arg, "*IPv4 Src : %s\n", buf); } if (mod->flags & AVM_PA_V4_MOD_DADDR) { in_addr2str(&mod->daddr, buf, sizeof(buf)); (*fprintffunc)(arg, "*IPv4 Dst : %s\n", buf); } if (mod->flags & AVM_PA_V4_MOD_TOS) (*fprintffunc)(arg, "*IPv4 Tos : 0x%02x\n", mod->tos); if (mod->flags & AVM_PA_V4_MOD_UPDATE_TTL) (*fprintffunc)(arg, "*IPv4 TTL : decrease\n"); if (mod->flags & AVM_PA_V4_MOD_IPHDR_CSUM) (*fprintffunc)(arg, "*L3 Sum : 0x%02x\n", mod->l3crc_update); if (mod->flags & AVM_PA_V4_MOD_SPORT) (*fprintffunc)(arg, "*Src Port : %d\n", ntohs(mod->sport)); if (mod->flags & AVM_PA_V4_MOD_DPORT) (*fprintffunc)(arg, "*Dst Port : %d\n", ntohs(mod->dport)); if (mod->flags & AVM_PA_V4_MOD_ICMPID) (*fprintffunc)(arg, "*ICMP Id : %d\n", ntohs(mod->id)); if (mod->flags & AVM_PA_V4_MOD_PROTOHDR_CSUM) (*fprintffunc)(arg, "*L4 Sum : 0x%02x\n", mod->l4crc_update); } /* ------------------------------------------------------------------------ */ static void pa_show_mod_rec(struct avm_pa_mod_rec *mod, pa_fprintf fprintffunc, void *arg) { (*fprintffunc)(arg, "Hdrlen : %u\n", (unsigned)mod->hdrlen); if (mod->ipversion) (*fprintffunc)(arg, "IP version : %u\n", (unsigned)mod->ipversion); if (mod->pull_l2_len) (*fprintffunc)(arg, "L2 pull : %d\n", mod->pull_l2_len); if (mod->pull_encap_len) (*fprintffunc)(arg, "Encap pull : %d\n", mod->pull_encap_len); if (mod->push_ipversion) (*fprintffunc)(arg, "Push IPv : %u\n", (unsigned)mod->push_ipversion); if (mod->push_udpoffset) (*fprintffunc)(arg, "Push UDP : %u\n", (unsigned)mod->push_udpoffset); if (mod->push_encap_len) { char buf[256]; data2hex(HDRCOPY(mod)+mod->push_l2_len, mod->push_encap_len, buf, sizeof(buf)); (*fprintffunc)(arg, "Encap push : %s\n", buf); } (*fprintffunc)(arg, "SKB proto : %04X\n", (unsigned)ntohs(mod->protocol)); pa_show_v4_mod_rec(&mod->v4_mod, fprintffunc, arg); if (mod->v6_decrease_hop_limit) (*fprintffunc)(arg, "IPv6 ttl : decrease\n"); } static int pa_egress_precheck(struct avm_pa_pid *pid, PKT *pkt, struct avm_pa_pkt_match *ingress, struct avm_pa_pkt_match *egress) { unsigned int hstart; if (pid->ecfg.flags & AVM_PA_PID_FLAG_HSTART_ON_EGRESS) hstart = AVM_PKT_INFO(pkt)->hstart; else hstart = 0; if (pa_set_pkt_match(pid->egress_framing, hstart, pkt, egress, 1) != AVM_PA_RX_OK) return -1; if (AVM_PA_PKTTYPE_BASE_EQ(egress->pkttype, ingress->pkttype)) return 0; return -1; } static int pa_calc_modify(struct avm_pa_session *session, struct avm_pa_pkt_match *ingress, struct avm_pa_pkt_match *egress) { /* * Precondition: AVM_PA_PKTTYPE_BASE_EQ(egress->pkttype, ingress->pkttype) */ struct avm_pa_mod_rec *mod = &session->mod; int change = 0; mod->hdrlen = egress->hdrlen; mod->hdroff = egress->hdroff; memcpy(HDRCOPY(mod), HDRCOPY(egress), mod->hdrlen); mod->protocol = 0; mod->pkttype = egress->pkttype; if (AVM_PA_PKTTYPE_EQ(ingress->pkttype, egress->pkttype)) { mod->pull_encap_len = 0; if (ingress->encap_offset == AVM_PA_OFFSET_NOT_SET) { /* no tunnel, egress->encap_offset also not set */ mod->pull_l2_len = ingress->ip_offset; mod->pull_encap_len = 0; mod->ipversion = AVM_PA_PKTTYPE_IP_VERSION(egress->pkttype); mod->push_encap_len = 0; mod->push_ipversion = 0; mod->push_l2_len = egress->ip_offset; } else { /* untouched tunnel, egress->encap_offset also set */ mod->pull_l2_len = ingress->encap_offset; mod->pull_encap_len = 0; mod->ipversion = AVM_PA_PKTTYPE_IPENCAP_VERSION(egress->pkttype); mod->push_encap_len = 0; mod->push_ipversion = 0; mod->push_l2_len = egress->encap_offset; } } else { /* AVM_PA_PKTTYPE_BASE_EQ because of precheck */ change++; if (ingress->encap_offset == AVM_PA_OFFSET_NOT_SET) { /* no tunnel header on input */ mod->pull_l2_len = ingress->ip_offset; mod->pull_encap_len = 0; mod->ipversion = AVM_PA_PKTTYPE_IP_VERSION(ingress->pkttype); } else { /* tunnel header on input */ mod->pull_l2_len = ingress->encap_offset; mod->pull_encap_len = ingress->ip_offset - ingress->encap_offset; mod->ipversion = AVM_PA_PKTTYPE_IP_VERSION(ingress->pkttype); } if (egress->encap_offset == AVM_PA_OFFSET_NOT_SET) { mod->push_encap_len = 0; mod->push_ipversion = 0; mod->push_l2_len = egress->ip_offset; } else { mod->push_encap_len = egress->ip_offset - egress->encap_offset; mod->push_ipversion = AVM_PA_PKTTYPE_IPENCAP_VERSION(egress->pkttype); mod->push_l2_len = egress->encap_offset; } } if (mod->push_ipversion) { change++; if (mod->push_ipversion == 4) mod->protocol = constant_htons(ETH_P_IP); else if (mod->push_ipversion == 6) mod->protocol = constant_htons(ETH_P_IPV6); if (egress->lisp_offset != AVM_PA_OFFSET_NOT_SET) { mod->push_udpoffset = egress->lisp_offset - egress->encap_offset; mod->push_udpoffset -= sizeof(struct udphdr); } } else { if (mod->ipversion == 4) mod->protocol = constant_htons(ETH_P_IP); else if (mod->ipversion == 6) mod->protocol = constant_htons(ETH_P_IPV6); mod->push_udpoffset = 0; } if (mod->ipversion == 4) { int ingress_offset = mod->pull_l2_len + mod->pull_encap_len; int egress_offset = mod->push_l2_len + mod->push_encap_len; if (pa_set_v4_mod_rec(&mod->v4_mod, session->routed, HDRCOPY(ingress)+ingress_offset, HDRCOPY(mod)+egress_offset)) change++; } else if (mod->ipversion == 6) { if (session->routed) { mod->v6_decrease_hop_limit = 1; change++; } } return change; } static u8 casttype2pkt_type[] = { PACKET_HOST, PACKET_MULTICAST, PACKET_BROADCAST }; /* ------------------------------------------------------------------------ */ /* -------- session retrieval and verification ---------------------------- */ /* ------------------------------------------------------------------------ */ static int pa_session_valid(struct avm_pa_session *session) { return session->is_on_lru && session->lru != AVM_PA_LRU_FREE; } static struct avm_pa_session * pa_session_get_unlocked(avm_session_handle session_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session; session = PA_SESSION(ctx, session_handle); if (!pa_session_valid(session) || session->session_handle == 0) session = 0; return session; } static struct avm_pa_session * pa_session_get(avm_session_handle session_handle) { struct avm_pa_session *session; AVM_PA_LOCK_DECLARE; AVM_PA_READ_LOCK(); session = pa_session_get_unlocked(session_handle); AVM_PA_READ_UNLOCK(); return session; } /* ------------------------------------------------------------------------ */ /* -------- packet forwarding --------------------------------------------- */ /* ------------------------------------------------------------------------ */ #ifdef CONFIG_AVM_PA_TX_NAPI static int pa_dev_tx_napi_poll(struct napi_struct *napi, int budget) { int done; struct avm_pa_pid *pid = container_of(napi, struct avm_pa_pid, tx_napi); for (done = 0; done < budget; done++) { PKT *pkt = skb_dequeue_tail(&pid->tx_napi_pkts); if (!pkt) break; pid->cfg.tx_func(pid->cfg.tx_arg, pkt); } if (done < budget) napi_complete(napi); return done; } #ifdef CONFIG_SMP static void __do_schedule_napi(struct napi_struct *napi) { int cpu = smp_processor_id(); int tcpu = cpumask_any_but(cpu_online_mask, cpu); if (tcpu >= nr_cpumask_bits) tcpu = cpu; /* This runs in a tasklet because we want to run the "core transition" per * packet burst, and not per packet. Both napi_schedule_prep() and IPIs (via * smp_call_function_single()) on a per packet basis would be too expensive in this * smp scenario. (napi_schedule_prep() does atomic accesses which requires snooping * the other cores caches, and the napi_poll runs one of the other cores). * * Furthermore, guarding the IPI with napi_schedule_prep() has been found to * perform a bit better than doing the IPI straight in this tasklet. */ if (napi_schedule_prep(napi)) smp_call_function_single(tcpu, (void*)__napi_schedule, napi, 0); } static void do_schedule_napi(struct avm_pa_pid *pid) { tasklet_schedule(&pid->tx_napi_tsk); } #else static void do_schedule_napi(struct avm_pa_pid *pid) { /* On UP the atomic access is a no-op */ napi_schedule(&pid->tx_napi); } #endif #endif /* we dont want 0 as a valid timestamp */ static unsigned long NOT_ZERO(unsigned long n) { if (!n) return 1; return n; } static inline void pa_do_push_l2(struct avm_pa_egress *egress, PKT *pkt) { if (egress->push_l2_len) { memcpy(PKT_PUSH(pkt, egress->push_l2_len), HDRCOPY(&egress->match), egress->push_l2_len); if (egress->pppoe_offset != AVM_PA_OFFSET_NOT_SET) { unsigned char *data = PKT_DATA(pkt) + egress->pppoe_offset; struct pppoehdr *pppoehdr = (struct pppoehdr *)data; pppoehdr->length = htons(PKT_FRAGLEN(pkt) - egress->pppoe_hdrlen); } } } static void pa_show_pids(pa_fprintf fprintffunc, void *arg); static void pa_show_vpids(pa_fprintf fprintffunc, void *arg); static int _pa_transmit(struct avm_pa_egress *egress, PKT *pkt, int nfrags) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, egress->pid_handle); struct avm_pa_pkt_info *info = AVM_PKT_INFO(pkt); #ifdef CONFIG_AVM_PA_TX_NAPI /* A non-NULL dev indicates avm_pa_dev_pid_register_tx_napi() was used */ if (pid->tx_napi.dev && (skb_queue_len(&pid->tx_napi_pkts) >= TX_NAPI_MAXQUEUE)) { PKT_FREE(pkt); /* drop packet, wouldn't fit anyway */ return NET_XMIT_DROP; } #endif /* * info->already_modified is set when paket comes * from avm_pa_tx_channel_accelerated_packet() and * statistics are counted in HW. * * 2016-03-02, calle */ if (info->already_modified == 0) { egress->sw_stats.tx_pkts += nfrags; if (skb_has_frag_list(pkt)) egress->sw_stats.tx_bytes += pkt->data_len + nfrags * (PKT_LEN(pkt) + egress->push_l2_len); else egress->sw_stats.tx_bytes += PKT_LEN(pkt) + egress->push_l2_len; } AVM_PKT_INFO(pkt)->is_accelerated = 1; egress->tx_pkts += nfrags; pid->tx_pkts += nfrags; switch (egress->type) { case avm_pa_egresstype_output: pa_do_push_l2(egress, pkt); pkt->tc_index = egress->output.tc_index; if (pid->ecfg.cb_len) { memcpy(&pkt->cb[pid->ecfg.cb_start], egress->output.cb, pid->ecfg.cb_len); } SKB_IFF(pkt) = egress->output.skb_iif; #ifdef AVM_HAVE_SKB_UNIQ_ID pkt->uniq_id &= 0xffffff; pkt->uniq_id |= ((unsigned long)egress->output.cpmac_prio) << 24; #endif pkt->vlan_tci = egress->output.vlan_tci; #ifdef AVM_PA_SKBUFF_HAS_VLAN_PROTO pkt->vlan_proto = egress->output.vlan_proto; #endif pkt->pkt_type = PACKET_OUTGOING; /* We only modified the checksum for the first fragment which is actually * only the header template for skb->frag_list. Therefore checksum * calculation is incomplete (partial). Linux' GSO path handles this and * potentially uses hardware offloading for this. For non-frag_list * traffic we're have calculated the full checksum, none is left. */ pkt->ip_summed = skb_has_frag_list(pkt) ? CHECKSUM_PARTIAL : CHECKSUM_NONE; skb_reset_mac_header(pkt); /* MDU: JZ-29198: This is nessesary for fragments generated by avm_pa */ skb_set_network_header(pkt, egress->push_l2_len); /* set priority */ if (info->match.ack_only) { if (egress->output.tack_priority < egress->output.priority) pkt->priority = egress->output.tack_priority; else pkt->priority = egress->output.priority; pid->prioack_accl_acks++; egress->tcpack_pkts += nfrags; } else { pkt->priority = egress->output.priority; } #if AVM_PA_TRACE if (ctx->dbgtrace) pa_printk(KERN_DEBUG, "avm_pa: %lu - _pa_transmit(%s), prio=0x%X, info->match.ack_only=%d\n", pkt_uniq_id(pkt), pid->cfg.name, pkt->priority, info->match.ack_only); #endif #ifdef CONFIG_AVM_PA_TX_NAPI if (pid->tx_napi.dev) { skb_queue_tail(&pid->tx_napi_pkts, pkt); do_schedule_napi(pid); } else #endif (*pid->cfg.tx_func)(pid->cfg.tx_arg, pkt); ctx->stats.fw_output += nfrags; return NET_XMIT_SUCCESS; case avm_pa_egresstype_local: { struct packet_type *ptype = pid->cfg.ptype; if (!ptype) { /* What the heck is wrong. This should be impossible! * This is to debug JZ-26868 */ struct avm_pa_session *session; int i; /* locate session */ for (i = 0; i < CONFIG_AVM_PA_MAX_SESSION; i++) { session = PA_SESSION(ctx, i); if ( (unsigned long)egress >= (unsigned long)&session->egress[0] && (unsigned long)egress < (unsigned long)&session->negress) break; } if (i == CONFIG_AVM_PA_MAX_SESSION) { pa_printk(KERN_CRIT, "Busted egress pointer!! %p\n pid %d\nvpid %d\n", egress, egress->pid_handle, egress->vpid_handle); } else { pa_printk(KERN_CRIT, "Corrupt egress!! %p\n pid %d\nvpid %d\n", egress, egress->pid_handle, egress->vpid_handle); pa_show_pids(pa_printk, KERN_CRIT); pa_show_vpids(pa_printk, KERN_CRIT); pa_show_session(session, pa_printk, KERN_CRIT); } BUG(); } skb_set_network_header(pkt, 0); pkt->pkt_type = casttype2pkt_type[egress->match.casttype]; if (egress->local.dst) skb_dst_set(pkt, dst_clone(egress->local.dst)); pkt->dev = egress->local.dev; SKB_IFF(pkt) = egress->local.skb_iif; ctx->stats.fw_local += nfrags; (*ptype->func)(pkt, pkt->dev, ptype, 0); } return NET_XMIT_SUCCESS; case avm_pa_egresstype_rtp: if (egress->rtp.sk) { size_t hsize; skb_set_network_header(pkt, 0); if (pkt->protocol == constant_htons(ETH_P_IP)) { struct iphdr *iph = (struct iphdr *)pkt->data; hsize = iph->ihl*4; } else { hsize = sizeof(struct ipv6hdr); } skb_pull(pkt, hsize); /* skb->data points to udphdr */ skb_set_transport_header(pkt, 0); pkt->pkt_type = casttype2pkt_type[egress->match.casttype]; pkt->dev = egress->rtp.dev; SKB_IFF(pkt) = egress->rtp.skb_iif; ctx->stats.fw_rtp += nfrags; (*egress->rtp.transmit)(egress->rtp.sk, pkt); return NET_XMIT_SUCCESS; } else { ctx->stats.fw_rtp_drop += nfrags; kfree_skb(pkt); return NET_XMIT_SUCCESS; } } ctx->stats.fw_ill += nfrags; kfree_skb(pkt); return NET_XMIT_SUCCESS; } static inline u16 calc_frag_size(u16 mtu, u16 len) { u16 frag_num = len/mtu; u16 frag_size; if (len % mtu) frag_num ++; frag_size = len / frag_num; if (frag_size & 7) { /* mod 8 */ if (frag_num > 1 && (((frag_num - 1)*(frag_size & 7) + frag_size ) > mtu)) { frag_num++; frag_size = len / frag_num; } } frag_size = frag_size & ~7; /* multiple of 8 */ return frag_size; } static void zero_fragment_options(struct iphdr *iph) { unsigned char *p = (unsigned char *)(iph+1); unsigned char *e = p + PA_IPHLEN(iph); unsigned char olen; while (p < e) { if (*p == IPOPT_EOL) { return; } else if (*p == IPOPT_NOP) { p++; } else { olen = *p; if (olen < 2 || p+olen > e) return; if (!IPOPT_COPIED(*p)) memset(p, IPOPT_NOP, olen); p += olen; } } } static void pa_fragment_ipv4(struct avm_pa_egress *egress, u16 omtu, PKT *pkt) { struct avm_pa_global *ctx = &pa_glob; u16 iphlen, len, left, mtu, offset, mf, frag_size = 0; unsigned char *data; struct iphdr *iph; iph = (struct iphdr *)PKT_DATA(pkt); iphlen = (u16)PA_IPHLEN(iph); mtu = (u16)((omtu - iphlen) & ~7); /* set mtu to multiple of 8 */ left = (u16)(PKT_LEN(pkt) - iphlen); data = PKT_DATA(pkt) + iphlen; offset = (u16)((ntohs(iph->frag_off) & IP_OFFSET) << 3); mf = (u16)(iph->frag_off & constant_htons(IP_MF)); frag_size = calc_frag_size(mtu, left); /* TODO: This could be optimized of the egress supports GSO * (build up pkt->frag_list instead of _pa_transmit() for each single packet) */ while (left > 0) { struct iphdr *niph; PKT *npkt; if (left > mtu) len = frag_size; /* prevent to small fragments */ else len = left; if ((npkt = PKT_ALLOC(iphlen+len)) == 0) { ctx->stats.fw_frag_fail++; break; } npkt->protocol = pkt->protocol; memcpy(PKT_DATA(npkt), PKT_DATA(pkt), iphlen); memcpy(PKT_DATA(npkt) + iphlen, data, len); niph = (struct iphdr *)PKT_DATA(npkt); niph->frag_off = htons((u16)(offset >> 3)); left -= len; if (offset == 0) zero_fragment_options(iph); if (left > 0 || mf) niph->frag_off |= constant_htons(IP_MF); data += len; offset += len; niph->tot_len = htons((u16)(iphlen+len)); set_ip_checksum(niph); if (_pa_transmit(egress, npkt, 1) == NET_XMIT_DROP) { ctx->stats.fw_frag_fail++; break; } else ctx->stats.fw_frags++; } PKT_FREE(pkt); } static void pa_fragment_ipv6(struct avm_pa_egress *egress, u16 omtu, PKT *pkt) { struct avm_pa_global *ctx = &pa_glob; u16 phlen, hlen, nhlen, len, left, mtu, offset, frag_size = 0; struct ipv6hdr *ipv6h; unsigned char *data; u32 id; ipv6h = (struct ipv6hdr *)PKT_DATA(pkt); phlen = sizeof(struct ipv6hdr) + sizeof(struct ipv6fraghdr); hlen = (u16)sizeof(struct ipv6hdr); nhlen = (u16)hlen + sizeof(struct ipv6fraghdr); /* set mtu to multiple of 8 */ mtu = (u16)((omtu - phlen) & ~7); left = (u16)(pkt->len - hlen); data = PKT_DATA(pkt) + hlen; frag_size = calc_frag_size(mtu, left); offset = 0; id = rand(); /* TODO: This could be optimized of the egress supports GSO * (build up pkt->frag_list instead of _pa_transmit() for each single packet) */ while (left > 0) { struct ipv6fraghdr *fragh; struct ipv6hdr *nipv6h; PKT *npkt; if (left > mtu) len = frag_size; /* prevent to small fragments */ else len = left; if ((npkt = PKT_ALLOC(nhlen+len)) == 0) { PKT_FREE(pkt); ctx->stats.fw_frag_fail++; return; } npkt->protocol = pkt->protocol; memcpy(PKT_DATA(npkt), PKT_DATA(pkt), hlen); memcpy(PKT_DATA(npkt) + nhlen, data, len); nipv6h = (struct ipv6hdr *)PKT_DATA(npkt); fragh = (struct ipv6fraghdr *)(nipv6h + 1); memcpy(nipv6h, ipv6h, sizeof(struct ipv6hdr)); fragh->nexthdr = nipv6h->nexthdr; nipv6h->nexthdr = IPPROTO_FRAGMENT; fragh->reserved = 0; fragh->frag_off = htons((u16)offset); fragh->identification = id; left -= len; if (left > 0) fragh->frag_off |= constant_htons(IP6_MF); data += len; offset += len; nipv6h->payload_len = htons((u16)(sizeof(struct ipv6fraghdr)+len)); if (_pa_transmit(egress, npkt, 1) == NET_XMIT_DROP) { ctx->stats.fw_frag_fail++; break; } else ctx->stats.fw_frags++; } PKT_FREE(pkt); } static void pa_transmit(struct avm_pa_egress *egress, PKT *pkt, int bridged, int nfrags) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, egress->pid_handle); u16 total_len; if (pid->pid_handle != egress->pid_handle) { PKT_FREE(pkt); ctx->stats.fw_drop += nfrags; return; } /* * Bugfix: bridge packets were cut, when third position of * mac address was 0x00, because ethernet header * was used as IP/IPv6 header, and packets were * trimed and perhaps fragmented. * * packets for bridge sessions arrive with ethernet header, * we do not need fragmentation or size check here. * * 2014-07-08 calle */ if (bridged == 0) { if (pkt->protocol == constant_htons(ETH_P_IP)) { struct iphdr *iph = (struct iphdr *)PKT_DATA(pkt); total_len = ntohs(iph->tot_len); PKT_TRIM(pkt, total_len); if (PKT_LEN(pkt) > egress->mtu) { pa_fragment_ipv4(egress, egress->mtu, pkt); return; } } else if (pkt->protocol == constant_htons(ETH_P_IPV6)) { struct ipv6hdr *ipv6h = (struct ipv6hdr *)PKT_DATA(pkt); total_len = sizeof(struct ipv6hdr)+ntohs(ipv6h->payload_len); PKT_TRIM(pkt, total_len); if (PKT_LEN(pkt) > egress->mtu) { pa_fragment_ipv6(egress, egress->mtu, pkt); return; } } } if (_pa_transmit(egress, pkt, nfrags) == NET_XMIT_DROP) ctx->stats.fw_drop += nfrags; else ctx->stats.fw_pkts += nfrags; } static void pa_do_modify_l3(struct avm_pa_mod_rec *mod, PKT *pkt) { if (mod->v4_mod.flags) { pa_do_v4_mod_rec(&mod->v4_mod, PKT_DATA(pkt)); } else if (mod->v6_decrease_hop_limit) { struct ipv6hdr *ipv6h = (struct ipv6hdr *)PKT_DATA(pkt); ipv6h->hop_limit--; } } static void pa_do_modify_non_l2(struct avm_pa_mod_rec *mod, PKT *pkt, int bridged) { pkt->protocol = mod->protocol; /* FIXME: This is really bad. We can only work with vlan in the packet data. We * really should do this properly. This also assumes that vlan_tci (if non-zero) * was patched into the packet data before avm_pa_pid_snoop_transmit()! */ pkt->vlan_tci = 0; if (bridged) return; if (mod->pull_l2_len) PKT_PULL(pkt, mod->pull_l2_len); if (mod->pull_encap_len) PKT_PULL(pkt, mod->pull_encap_len); /* We're now at the innermost l3 header, set offsets in the skb appropriately. * This is required for Linux' transmit paths and some drivers (but remember that * this is not done for bridged sessions). */ skb_reset_network_header(pkt); if (mod->protocol == constant_htons(ETH_P_IP)) skb_set_transport_header(pkt, mod->v4_mod.iphlen); else if (mod->protocol == constant_htons(ETH_P_IPV6)) skb_set_transport_header(pkt, sizeof(struct ipv6hdr)); pa_do_modify_l3(mod, pkt); if (mod->push_encap_len) { unsigned tot_len; memcpy(PKT_PUSH(pkt, mod->push_encap_len), HDRCOPY(mod)+mod->push_l2_len, mod->push_encap_len); tot_len = PKT_LEN(pkt); if (mod->push_ipversion == 4) { struct iphdr *iph = (struct iphdr *)PKT_DATA(pkt); iph->id = rand() & 0xffff; iph->tot_len = htons(tot_len); set_ip_checksum(iph); } else { struct ipv6hdr *ipv6h = (struct ipv6hdr *)PKT_DATA(pkt); ipv6h->payload_len = htons(tot_len - sizeof(struct ipv6hdr)); } if (mod->push_udpoffset) { struct udphdr *udph = (struct udphdr *)(PKT_DATA(pkt)+mod->push_udpoffset); udph->len = htons(tot_len - mod->push_udpoffset); if (mod->push_ipversion == 4) set_udp_checksum((struct iphdr *)PKT_DATA(pkt), udph); else set_udpv6_checksum((struct ipv6hdr *)PKT_DATA(pkt), udph); } } } static void _pa_do_send_egress(struct avm_pa_session *session, PKT *pkt, int bridged, int nfrags) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_egress *egress; int negress; PKT *npkt; negress = session->negress; egress = &session->egress[0]; for ( ; --negress; egress++) { /* egress can be added in parallel, pid_handle != 0 indicates that this operation * has completed. */ if (likely(egress->pid_handle) && (npkt = PKT_COPY(pkt)) != 0) pa_transmit(egress, npkt, bridged, nfrags); else ctx->stats.fw_fail += nfrags; } pa_transmit(egress, pkt, bridged, nfrags); } static void _pa_do_modify_and_send(struct avm_pa_session *session, PKT *pkt) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_mod_rec *mod = &session->mod; int bridged = session->bsession != 0; int gso, nfrags; PKT *npkt, *next; /* remember byte conunt before pa_do_modify_non_l2() pulls some */ int headlen = PKT_LEN(pkt); if (skb_has_frag_list(pkt)) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37) nfrags = 0; /* For now, only plain Ethernet+IP can use the fast GSO path, pppoe and tunneling * take the slower path. Adopt more traffic types this once a driver supports it, * but beware that IP fragmentation must be handled as well. * Hack: Peek at the first egress to see if PPPoE is in use, since this * is not available in the pkttype. This assumes all egress use PPPoE but this * is currently always the case since we don't do multicast on upstream and * never do PPPoE on upstream. */ #if AVM_PA_WITH_GSO gso = (mod->pkttype & ~AVM_PA_PKTTYPE_BASE_MASK) == 0 && session->egress[0].pppoe_offset == AVM_PA_OFFSET_NOT_SET; #else gso = 0; /* forcefully disabled until more testing has been done */ #endif npkt = skb_shinfo(pkt)->frag_list; if (gso) ctx->stats.tx_fast_gso += 1; else skb_frag_list_init(pkt); do { next = npkt->next; /* have to count frags even for the non-GSO path (for statistics) */ nfrags += 1; if (!gso) { npkt->next = NULL; PKT_PUSH(npkt, session->ingress.hdrlen); pa_do_modify_non_l2(mod, npkt, bridged); _pa_do_send_egress(session, npkt, bridged, 1); } } while((npkt = next)); session->ingress_sw_stats.tx_bytes += pkt->data_len + nfrags * headlen; session->ingress_sw_stats.tx_pkts += nfrags; #endif } else { gso = nfrags = 1; /* single, non-frag_list packets also use the normal path */ session->ingress_sw_stats.tx_bytes += headlen; session->ingress_sw_stats.tx_pkts += 1; } /* In the GSO case with frag_list, the head skb must be modified. Linux GSO * will then use this as a template for the frag_list skbs, which is possibly * done in HW (otherwise we'd do it ourselves) */ if (gso) { pa_do_modify_non_l2(mod, pkt, bridged); _pa_do_send_egress(session, pkt, bridged, nfrags); } else { /* frag_list packet are sent out. the head skb remains and must be freed. * In the GSO path, the egress is responsible (usually through dev_queue_xmit()). */ PKT_FREE(pkt); } /* transmit_in_progress is set, so it's safe to kill without flush */ if (session->timeout == 0) pa_session_kill(session, "fast timeout"); } /* Pass NULL for session to to get it from the packet. Do this if there is uncertainty if * the session is still valid, e.g. after sitting on a queue (if called from avm_pa_tbf_tasklet()) */ static void pa_do_modify_and_send(struct avm_pa_session *session, PKT *pkt) { struct avm_pa_global *ctx = &pa_glob; /* avoid spin lock if a session was given */ if (!session) { session = pa_session_get(AVM_PKT_INFO(pkt)->session_handle); if (!session) { ctx->stats.fw_drop_gone++; PKT_FREE(pkt); return; } } atomic_inc(&session->transmit_in_progress); /* Must check validity again because of possible race condition before * flipping transmit_in_progress. */ if ( unlikely(!pa_session_valid(session)) || unlikely(AVM_PKT_INFO(pkt)->session_uniq_id != session->uniq_id)) { /* Maybe the session was deleted and recycled while being queued. Can't happen * from now on because transmit_in_progress != 0 */ ctx->stats.fw_drop_gone++; PKT_FREE(pkt); } else if (AVM_PKT_INFO(pkt)->already_modified) { PKT *npkt; struct avm_pa_egress *egress; int nfrags = 0; skb_walk_frags(pkt, npkt) nfrags += 1; egress = &session->egress[AVM_PKT_INFO(pkt)->egress_offset]; pa_transmit(egress, pkt, session->bsession != 0, nfrags ? nfrags : 1); } else { _pa_do_modify_and_send(session, pkt); } atomic_dec(&session->transmit_in_progress); } static int pa_egress_size_check(struct avm_pa_session *session, PKT *pkt) { struct avm_pa_pkt_info *info = AVM_PKT_INFO(pkt); if (info->match.fragok) return 0; if (session->mod.push_encap_len == 0) { /* no tunnel on output */ struct avm_pa_mod_rec *mod = &session->mod; unsigned len = PKT_FRAGLEN(pkt) - mod->pull_l2_len - mod->pull_encap_len; int negress; for (negress = 0; negress < session->negress; negress++) { struct avm_pa_egress *egress = &session->egress[negress]; if (len > egress->mtu) return -1; } } return 0; } /* ------------------------------------------------------------------------ */ /* -------- macaddr management -------------------------------------------- */ /* ------------------------------------------------------------------------ */ static void pa_show_macaddr(struct avm_pa_macaddr *macaddr, pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, macaddr->pid_handle); char buf[32]; mac2str(&macaddr->mac, buf, sizeof(buf)); (*fprintffunc)(arg, "Macaddr : %s ref %3lu Pid %2d %s\n", buf, macaddr->refcount, macaddr->pid_handle, pid->cfg.name); } static inline u32 macaddr_hash(unsigned char mac[ETH_ALEN]) { u32 h = 0; int i; for (i=0; i < ETH_ALEN; i++) { h += mac[i]; h += (h<<10); h ^= (h>>6); } h += (h<<3); h ^= (h>>11); h += (h<<15); return h; } static struct avm_pa_macaddr * _pa_macaddr_link(unsigned char mac[ETH_ALEN], avm_pid_handle pid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_macaddr *p; u32 hash; int i; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); hash = macaddr_hash(mac); for (p = ctx->macaddr_hash[hash%CONFIG_AVM_PA_MAX_SESSION]; p; p = p->link) { if (memcmp(p->mac, mac, ETH_ALEN) == 0) { p->refcount++; p->pid_handle = pid_handle; AVM_PA_WRITE_UNLOCK(); return p; } } for (i=0; i < CONFIG_AVM_PA_MAX_SESSION; i++) { p = &ctx->macaddr_array[i]; if (p->refcount == 0) { memcpy(p->mac, mac, ETH_ALEN); p->pid_handle = pid_handle; p->refcount++; p->link = ctx->macaddr_hash[hash%CONFIG_AVM_PA_MAX_SESSION]; ctx->macaddr_hash[hash%CONFIG_AVM_PA_MAX_SESSION] = p; if (ctx->dbgsession) { pa_printk(KERN_DEBUG, "\navm_pa: new macaddr:\n"); pa_show_macaddr(p, pa_printk, KERN_DEBUG); } AVM_PA_WRITE_UNLOCK(); return p; } } AVM_PA_WRITE_UNLOCK(); return 0; } static inline struct avm_pa_macaddr * pa_macaddr_link(unsigned char mac[ETH_ALEN], avm_pid_handle pid_handle) { if (mac[0] & 1) return 0; return _pa_macaddr_link(mac, pid_handle); } static void pa_macaddr_unlink(struct avm_pa_macaddr *destmac) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_macaddr *p, **pp; u32 hash; if (--destmac->refcount > 0) return; hash = macaddr_hash(destmac->mac); pp = &ctx->macaddr_hash[hash%CONFIG_AVM_PA_MAX_SESSION]; while ((p = *pp) != 0) { if (p == destmac) { *pp = p->link; if (ctx->dbgsession) { pa_printk(KERN_DEBUG, "\navm_pa: delete macaddr:\n"); pa_show_macaddr(p, pa_printk, KERN_DEBUG); } memset(p, 0, sizeof(struct avm_pa_macaddr)); return; } pp = &p->link; } } static void pa_check_and_handle_ingress_pid_change(unsigned char mac[ETH_ALEN], avm_pid_handle pid_handle) { struct avm_pa_global *ctx = &pa_glob; u32 hash = macaddr_hash(mac); struct avm_pa_macaddr *p; int pid_group = PA_PID(ctx, pid_handle)->ecfg.pid_group; int pid_changed = 0; AVM_PA_LOCK_DECLARE; AVM_PA_READ_LOCK(); for (p = ctx->macaddr_hash[hash%CONFIG_AVM_PA_MAX_SESSION]; p; p = p->link) { if (memcmp(mac, &p->mac, ETH_ALEN) == 0) { if (p->pid_handle != pid_handle) { struct avm_pa_pid *pid = PA_PID(ctx, p->pid_handle); if (pid->ingress_pid_handle != pid_handle) { if (pid_group == 0 || pid_group != pid->ecfg.pid_group) pid_changed = 1; } } break; } } AVM_PA_READ_UNLOCK(); if (pid_changed) { char buf[128]; mac2str(mac, buf, sizeof(buf)); if (net_ratelimit()) printk(KERN_INFO "avm_pa: pid changed for %s (%d %s -> %d %s)\n", buf, p->pid_handle, PA_PID(ctx, p->pid_handle)->cfg.name, pid_handle, PA_PID(ctx, pid_handle)->cfg.name); avm_pa_flush_sessions_with_destmac(p); } } /* ------------------------------------------------------------------------ */ /* -------- bsession management ------------------------------------------- */ /* ------------------------------------------------------------------------ */ static struct ethhdr *pa_get_ethhdr(enum avm_pa_framing framing, PKT *pkt) { if (framing == avm_pa_framing_ether) return (struct ethhdr *)PKT_DATA(pkt); if (framing == avm_pa_framing_dev) return eth_hdr(pkt); return 0; } static inline u32 ethh_hash(struct ethhdr *ethh) { return jhash_3words(get_unaligned((u32 *)(ðh->h_source[2])), get_unaligned((u32 *)(ðh->h_dest[2])), (u32)ethh->h_proto, 0); } static inline struct avm_pa_session * pa_bsession_search_unlocked(struct avm_pa_pid *pid, u32 hash, struct ethhdr *ethh) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_bsession *p; for (p = pid->hash_bsess[hash%CONFIG_AVM_PA_MAX_SESSION]; p; p = p->link) { if (memcmp(ethh, &p->ethh, sizeof(struct ethhdr)) == 0) break; } return p ? PA_SESSION(ctx, p->session_handle) : 0; } static struct avm_pa_session * pa_bsession_search(struct avm_pa_pid *pid, u32 hash, struct ethhdr *ethh) { struct avm_pa_session *p; AVM_PA_LOCK_DECLARE; AVM_PA_READ_LOCK(); p = pa_bsession_search_unlocked(pid, hash, ethh); AVM_PA_READ_UNLOCK(); return p; } static struct avm_pa_bsession * pa_bsession_alloc(u32 hash, struct ethhdr *ethh, avm_session_handle session_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_bsession *p = &ctx->bsess_array[session_handle]; p->link = 0; p->hash = hash%CONFIG_AVM_PA_MAX_SESSION; memcpy(&p->ethh, ethh, sizeof(struct ethhdr)); p->session_handle = session_handle; ctx->stats.nbsessions++; return p; } static void pa_show_bsession(struct avm_pa_bsession *bsession, pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session = PA_SESSION(ctx, bsession->session_handle); char buf[128]; char *s = buf, *end = buf + sizeof(buf); int i; (*fprintffunc)(arg, "Session : %d\n", bsession->session_handle); (*fprintffunc)(arg, "In Pid : %d (%s)\n", session->ingress_pid_handle, PA_PID(ctx, session->ingress_pid_handle)->cfg.name); (*fprintffunc)(arg, "Hash : %lu\n", (unsigned long)bsession->hash); s += mac2str(&bsession->ethh.h_dest, s, end - s); *s++ = ' '; mac2str(&bsession->ethh.h_source, s, end - s); (*fprintffunc)(arg, "Eth Hdr DS : %s proto %04X\n", buf, ntohs(bsession->ethh.h_proto)); /* In practice, negress is always 1, since multicast uses normal sessions */ for (i = 0; i < session->negress; i++) { struct avm_pa_egress *egress = &session->egress[i]; (*fprintffunc)(arg, "Egress : %d\n", i); if (egress->pid_handle) { (*fprintffunc)(arg, "Out Pid : %d (%s)\n", egress->pid_handle, PA_PID(ctx, egress->pid_handle)->cfg.name); } if (egress->vpid_handle) { (*fprintffunc)(arg, "Out VPid : %d (%s)\n", egress->vpid_handle, PA_VPID(ctx, egress->vpid_handle)->cfg.name); } if (egress->destmac) pa_show_macaddr(egress->destmac, fprintffunc, arg); } } static void pa_bsession_delete(struct avm_pa_pid *pid, struct avm_pa_bsession *bsession) { struct avm_pa_bsession **pp, *p; for (pp = &pid->hash_bsess[bsession->hash]; (p = *pp) != 0; pp = &p->link) { if (p == bsession) { struct avm_pa_global *ctx = &pa_glob; *pp = p->link; p->link = 0; p->session_handle = 0; ctx->stats.nbsessions--; break; } } } /* ------------------------------------------------------------------------ */ /* -------- session management -------------------------------------------- */ /* ------------------------------------------------------------------------ */ /* Search for ACTIVE sessions */ #define pa_session_search(pid, match) pa_session_hash_search(pid, match) static inline struct avm_pa_session * pa_session_hash_search_unlocked(struct avm_pa_pid *pid, struct avm_pa_pkt_match *ingress) { struct avm_pa_session *p; for (p = pid->hash_sess[ingress->hash%CONFIG_AVM_PA_MAX_SESSION]; p; p = p->link) { if (pa_match_cmp(ingress, &p->ingress) == 0 && !p->flushed) break; } return p; } static struct avm_pa_session * pa_session_hash_search(struct avm_pa_pid *pid, struct avm_pa_pkt_match *ingress) { struct avm_pa_session *p; AVM_PA_LOCK_DECLARE; AVM_PA_READ_LOCK(); p = pa_session_hash_search_unlocked(pid, ingress); AVM_PA_READ_UNLOCK(); return p; } static void pa_session_hash_insert(struct avm_pa_pid *pid, struct avm_pa_session *session) { struct avm_pa_bsession *bsession = session->bsession; session->link = pid->hash_sess[session->ingress.hash]; pid->hash_sess[session->ingress.hash] = session; if (bsession) { bsession->link = pid->hash_bsess[bsession->hash]; pid->hash_bsess[bsession->hash] = bsession; } session->hashed = 1; } static void pa_session_hash_delete(struct avm_pa_pid *pid, struct avm_pa_session *session) { if (session->hashed) { struct avm_pa_session **pp, *p; for (pp = &pid->hash_sess[session->ingress.hash]; (p = *pp) != 0; pp = &p->link) { if (p == session) { *pp = p->link; p->link = 0; session->hashed = 0; break; } } } } static void pa_session_lru_delete(struct avm_pa_session *session) { if (session->is_on_lru) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session_lru *lru = &ctx->sess_lru[session->lru]; if (session->lru_prev) session->lru_prev->lru_next = session->lru_next; else lru->lru_head = session->lru_next; if (session->lru_next) session->lru_next->lru_prev = session->lru_prev; else lru->lru_tail = session->lru_prev; BUG_ON(lru->nsessions == 0); lru->nsessions--; session->lru_next = session->lru_prev = 0; session->is_on_lru = 0; } } static void pa_session_lru_update(int which, struct avm_pa_session *session) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session_lru *lru = &ctx->sess_lru[which]; if (session->is_on_lru) pa_session_lru_delete(session); if (lru->lru_tail) { session->lru_next = 0; session->lru_prev = lru->lru_tail; lru->lru_tail->lru_next = session; lru->lru_tail = session; } else { session->lru_next = session->lru_prev = 0; lru->lru_head = lru->lru_tail = session; } lru->nsessions++; if (lru->nsessions > lru->maxsessions) lru->maxsessions = lru->nsessions; session->is_on_lru = 1; session->lru = which; } static int pa_session_activate(struct avm_pa_session *session) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, session->ingress_pid_handle); struct avm_pa_session *s; AVM_PA_LOCK_DECLARE; /* session creation can happen concurrently, but after this call only one * session of a kind may exist (to avoid confusing hardware acceleration), so the * hash lookup finds if anyone else won the race */ AVM_PA_WRITE_LOCK(); if (session->bsession) s = pa_bsession_search_unlocked(pid, session->bsession->hash, &session->bsession->ethh); else s = pa_session_hash_search_unlocked(pid, &session->ingress); if (s == 0) { pa_session_hash_insert(pid, session); pa_session_lru_update(AVM_PA_LRU_ACTIVE, session); session->endtime = jiffies + session->timeout; } else { /* session wasn't on state ACTIVE yet, so it's safe to kill without flush */ pa_session_kill_unlocked(session, "lost creation race"); } AVM_PA_WRITE_UNLOCK(); return session->hashed != 0; } /* This requires the WRITE lock. It would be nicer if only the READ lock would be needed * but since the session is moved within the ACTIVE lru it's not possibly currently. */ static inline void pa_session_update_unlocked(struct avm_pa_session *session) { BUG_ON(session->is_on_lru == 0); if (session->lru == AVM_PA_LRU_ACTIVE) { pa_session_lru_update(AVM_PA_LRU_ACTIVE, session); session->endtime = jiffies + session->timeout; } else { /* session flushed */ } } static void pa_session_update(struct avm_pa_session *session) { AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); pa_session_update_unlocked(session); AVM_PA_WRITE_UNLOCK(); } static inline void pa_start_stat_timer(void) { struct avm_pa_global *ctx = &pa_glob; mod_timer(&ctx->stat_timer, jiffies + msecs_to_jiffies(AVM_PA_STAT_TIMEOUT)); } static inline void pa_start_gc_timer(void) { struct avm_pa_global *ctx = &pa_glob; mod_timer(&ctx->gc_timer, jiffies + AVM_PA_GC_TIMEOUT*HZ); } static void __init avm_pa_init_freelist(void) { struct avm_pa_global *ctx = &pa_glob; int i; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); for (i=CONFIG_AVM_PA_MAX_SESSION-1; i > 0; i--) { struct avm_pa_session *session = PA_SESSION(ctx, i); pa_session_lru_update(AVM_PA_LRU_FREE, session); } AVM_PA_WRITE_UNLOCK(); } static struct avm_pa_session *pa_session_alloc(struct avm_pa_pkt_match *match) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); if ((session = ctx->sess_lru[AVM_PA_LRU_FREE].lru_head) != 0) { pa_session_lru_delete(session); memset(session, 0, sizeof(struct avm_pa_session)); session->session_handle = session - ctx->sess_array; session->uniq_id = ctx->next_session_uniq_id++; session->ingress = *match; session->endtime = jiffies; switch (AVM_PA_PKTTYPE_IPPROTO(match->pkttype)) { case IPPROTO_TCP: session->timeout = ctx->tcp_timeout_secs*HZ; break; case IPPROTO_UDP: session->timeout = ctx->udp_timeout_secs*HZ; break; case IPPROTO_ICMPV6: case IPPROTO_ICMP: session->timeout = ctx->echo_timeout_secs*HZ; break; } } AVM_PA_WRITE_UNLOCK(); pa_start_gc_timer(); return session; } static void avm_pa_set_associated_session_handle(struct avm_pa_session *session) { #ifdef CONFIG_AVM_GENERIC_CONNTRACK if (session->generic_ct) { avm_session_handle handle; enum generic_ct_dir dir; if (session->generic_ct_dir == GENERIC_CT_DIR_ORIGINAL) dir = GENERIC_CT_DIR_REPLY; else dir = GENERIC_CT_DIR_ORIGINAL; if ((handle = (avm_session_handle)(unsigned long)generic_ct_sessionid_get(session->generic_ct, dir)) != 0) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *asession = PA_SESSION(ctx, handle); if (pa_session_valid(asession)) { session->associated_session_handle = handle; asession->associated_session_handle = session->session_handle; } } } #endif } static void avm_pa_unset_associated_session_handle(struct avm_pa_session *session) { avm_session_handle handle; if ((handle = session->associated_session_handle) != 0) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *asession = PA_SESSION(ctx, handle); if (pa_session_valid(asession)) asession->associated_session_handle = 0; session->associated_session_handle = 0; } } static void pa_show_session(struct avm_pa_session *session, pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; char buf[max_t(size_t, KSYM_SYMBOL_LEN, 64ul)]; struct avm_pa_macaddr *destmac; struct net_device *dev; struct dst_entry *dst; unsigned negress; (*fprintffunc)(arg, "Session : %u (%d)\n", session->uniq_id, session->session_handle); { char *state; if (session->is_on_lru) { switch (session->lru) { case AVM_PA_LRU_ACTIVE: state = "active"; break; case AVM_PA_LRU_DEAD: state = "dead"; break; case AVM_PA_LRU_FREE: state = "free"; break; default: state = "BAD STATE"; break; } } else { state = "create"; } #ifdef AVM_PA_HAS_GUEST_STATS (*fprintffunc)(arg, "State : %s%s\n", state, session->is_guest ? " (guest)" : ""); #else (*fprintffunc)(arg, "State : %s\n", state); #endif } (*fprintffunc)(arg, "TX active : %d\n", atomic_read(&session->transmit_in_progress)); (*fprintffunc)(arg, "In Pid : %d (%s)\n", session->ingress_pid_handle, PA_PID(ctx, session->ingress_pid_handle)->cfg.name); if (session->ingress_vpid_handle) { (*fprintffunc)(arg, "In VPid : %d (%s)\n", session->ingress_vpid_handle, PA_VPID(ctx, session->ingress_vpid_handle)->cfg.name); } if (ctx->hardware_pa.add_session) { if ((session->in_hw || avm_pa_get_hw_session(session)) && ctx->hardware_pa.session_state) (*fprintffunc)(arg, "In HW : %s\n", (*ctx->hardware_pa.session_state)(session)); else (*fprintffunc)(arg, "In HW : %s\n", session->in_hw ? "yes" : "no"); } #ifdef CONFIG_AVM_GENERIC_CONNTRACK if (session->generic_ct) { if (session->generic_ct_dir == GENERIC_CT_DIR_ORIGINAL) (*fprintffunc)(arg, "CT dir : original\n"); else (*fprintffunc)(arg, "CT dir : reply\n"); } #endif if (session->associated_session_handle) { (*fprintffunc)(arg, "Associated : %d\n", session->associated_session_handle); } (*fprintffunc)(arg, "Realtime : %s\n", session->realtime ? "yes" : "no"); pa_show_pkt_match(&session->ingress, session->bsession != 0, session->mod.pkttype, fprintffunc, arg); pa_show_mod_rec(&session->mod, fprintffunc, arg); (*fprintffunc)(arg, "Hroom : %u\n", (unsigned) session->needed_headroom); (*fprintffunc)(arg, "Timeout : %hu\n", session->timeout/HZ); (*fprintffunc)(arg, "SW stats : %lu pkts, %llu bytes\n", (unsigned long)session->ingress_sw_stats.tx_pkts, (unsigned long long)session->ingress_sw_stats.tx_bytes); (*fprintffunc)(arg, "HW stats : %lu pkts, %llu bytes}\n", (unsigned long)session->ingress_hw_stats.tx_pkts, (unsigned long long)session->ingress_hw_stats.tx_bytes); for (negress = 0; negress < session->negress; negress++) { struct avm_pa_egress *egress = &session->egress[negress]; (*fprintffunc)(arg, "Egress : %d\n", negress); if (egress->pid_handle) { (*fprintffunc)(arg, "Out Pid : %d (%s)\n", egress->pid_handle, PA_PID(ctx, egress->pid_handle)->cfg.name); } else { (*fprintffunc)(arg, "Egress under construction\n"); continue; } if (egress->vpid_handle) { (*fprintffunc)(arg, "Out VPid : %d (%s)\n", egress->vpid_handle, PA_VPID(ctx, egress->vpid_handle)->cfg.name); } (*fprintffunc)(arg, "Mtu : %u\n", (unsigned)egress->mtu); if (egress->push_l2_len) { data2hex(HDRCOPY(&egress->match), egress->push_l2_len, buf, sizeof(buf)); (*fprintffunc)(arg, "L2 push : %s\n", buf); if (egress->pppoe_offset != AVM_PA_OFFSET_NOT_SET) { (*fprintffunc)(arg, "PPPoE off : %u\n", (unsigned)egress->pppoe_offset); (*fprintffunc)(arg, "PPPoE hlen : %u\n", (unsigned)egress->pppoe_hdrlen); } } if ((destmac = egress->destmac) != 0) pa_show_macaddr(destmac, fprintffunc, arg); switch (egress->type) { case avm_pa_egresstype_output: { struct avm_pa_pid *pid = PA_PID(ctx, egress->pid_handle); (*fprintffunc)(arg, "Orig Prio : %hx:%hx\n", TC_H_MAJ(egress->output.orig_priority)>>16, TC_H_MIN(egress->output.orig_priority)); (*fprintffunc)(arg, "Prio : %hx:%hx\n", TC_H_MAJ(egress->output.priority)>>16, TC_H_MIN(egress->output.priority)); if (egress->output.tack_priority != egress->output.orig_priority) { (*fprintffunc)(arg, "TACK prio : %hx:%hx\n", TC_H_MAJ(egress->output.tack_priority)>>16, TC_H_MIN(egress->output.tack_priority)); } (*fprintffunc)(arg, "TC index : %u\n", (unsigned)egress->output.tc_index); (*fprintffunc)(arg, "cpmac prio : %u\n", (unsigned)egress->output.cpmac_prio); if (egress->output.vlan_tci & VLAN_TAG_PRESENT) { unsigned value = egress->output.vlan_tci & VLAN_VID_MASK; (*fprintffunc)(arg, "vlan id : %u\n", value); value = (egress->output.vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; (*fprintffunc)(arg, "vlan prio : %u\n", value); #ifdef AVM_PA_SKBUFF_HAS_VLAN_PROTO value = ntohs(egress->output.vlan_proto); (*fprintffunc)(arg, "vlan proto : 0x%04u\n", value); #endif } if (avm_pa_pid_tack_enabled(pid)) { (*fprintffunc)(arg, "tack pkts : %u (accl acks %u)\n", pid->prioack_acks, pid->prioack_accl_acks); } } break; case avm_pa_egresstype_local: if ((dst = egress->local.dst) != 0) { sprint_symbol(buf, (unsigned long)dst->input); (*fprintffunc)(arg, "Dest : %s\n", buf); } else { (*fprintffunc)(arg, "Dest : \n"); } if ((dev = egress->local.dev) != 0) { (*fprintffunc)(arg, "Input Dev : %s\n", dev->name); } else { (*fprintffunc)(arg, "Input Dev : \n"); } break; case avm_pa_egresstype_rtp: sprint_symbol(buf, (unsigned long)egress->rtp.transmit); (*fprintffunc)(arg, "transmitfunc : %s\n", buf); if ((dev = egress->rtp.dev) != 0) { (*fprintffunc)(arg, "Input Dev : %s\n", dev->name); } else { (*fprintffunc)(arg, "Input Dev : \n"); } } (*fprintffunc)(arg, "SW stats : %lu pkts, %llu bytes\n", (unsigned long)egress->sw_stats.tx_pkts, (unsigned long long)egress->sw_stats.tx_bytes); (*fprintffunc)(arg, "HW stats : %lu pkts, %llu bytes\n", (unsigned long)egress->hw_stats.tx_pkts, (unsigned long long)egress->hw_stats.tx_bytes); (*fprintffunc)(arg, "Pkts : TX %lu (acks %lu)\n", (unsigned long)egress->tx_pkts, (unsigned long)egress->tcpack_pkts); } } static void pa_delete_session(struct avm_pa_session *session) { struct avm_pa_global *ctx = &pa_glob; const char *why = session->why_killed ? session->why_killed : "???"; int j; if (ctx->dbgsession) { pa_printk(KERN_DEBUG, "\navm_pa: delete session: %s\n", why); pa_show_session(session, pa_printk, KERN_DEBUG); } pa_session_lru_delete(session); #if AVM_PA_TRACE if (ctx->dbgtrace) { struct avm_pa_pid *pid = PA_PID(ctx, session->ingress_pid_handle); pa_printk(KERN_DEBUG, "avm_pa: delete session %d (%s) %s\n", session->session_handle, pid->cfg.name, why); } #endif /* * pa_session_kill() has * - removed session from hash * - removed session from hardware pa * - removed session from generic connection tracking */ BUG_ON(session->hashed); BUG_ON(session->in_hw); #ifdef CONFIG_AVM_GENERIC_CONNTRACK BUG_ON(session->generic_ct); #endif for (j = 0; j < session->negress; j++) { struct avm_pa_egress *egress = &session->egress[j]; if (egress->destmac) { pa_macaddr_unlink(egress->destmac); egress->destmac = 0; } if (egress->type == avm_pa_egresstype_local) { if (egress->local.dst) { dst_release(egress->local.dst); egress->local.dst = 0; } } if (egress->type == avm_pa_egresstype_rtp) { if (egress->rtp.sk) { sock_put(egress->rtp.sk); egress->rtp.sk = 0; } } } pa_session_lru_update(AVM_PA_LRU_FREE, session); } static void pa_session_kill_unlocked(struct avm_pa_session *session, const char *why) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, session->ingress_pid_handle); #if AVM_PA_TRACE if (ctx->dbgtrace) { pa_printk(KERN_DEBUG, "avm_pa: kill session %d (%s) %s\n", session->session_handle, pid->cfg.name, why); } #endif if (ctx->dbgsession) { pa_printk(KERN_DEBUG, "\navm_pa: kill session: %s\n", why); pa_show_session(session, pa_printk, KERN_DEBUG); } pa_session_lru_delete(session); pa_session_hash_delete(pid, session); if (session->bsession) { if (ctx->dbgsession) { pa_printk(KERN_DEBUG, "\navm_pa: delete bsession: %s\n", why); pa_show_bsession(session->bsession, pa_printk, KERN_DEBUG); } pa_bsession_delete(pid, session->bsession); } pa_session_handle_stats(session); if (session->in_hw && ctx->hardware_pa.remove_session) { (*ctx->hardware_pa.remove_session)(session); session->in_hw = 0; } if ( session->negress == 1 && session->egress[0].type == avm_pa_egresstype_rtp && session->egress[0].rtp.sk) { sock_put(session->egress[0].rtp.sk); session->egress[0].rtp.sk = 0; } #ifdef CONFIG_AVM_GENERIC_CONNTRACK if (session->generic_ct) { struct generic_ct *ct = session->generic_ct; session->generic_ct = 0; generic_ct_sessionid_set(ct, session->generic_ct_dir, (void *)0); generic_ct_put(ct); } #endif avm_pa_unset_associated_session_handle(session); session->why_killed = why; pa_session_lru_update(AVM_PA_LRU_DEAD, session); } static void pa_session_kill(struct avm_pa_session *session, const char *why) { AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); pa_session_kill_unlocked(session, why); AVM_PA_WRITE_UNLOCK(); } static void pa_session_flush_unlocked(struct avm_pa_session *session, const char *why) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid; #if AVM_PA_TRACE if (ctx->dbgtrace) { pid = PA_PID(ctx, session->ingress_pid_handle); pa_printk(KERN_DEBUG, "avm_pa: flush session %d (%s) %s\n", session->session_handle, pid->cfg.name, why); } #endif if (ctx->dbgsession) { pa_printk(KERN_DEBUG, "\navm_pa: flush session: %s\n", why); pa_show_session(session, pa_printk, KERN_DEBUG); } session->flushed = 1; /* will be killed on next gc */ session->why_killed = why; } static void pa_session_flush(struct avm_pa_session *session, const char *why) { AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); pa_session_flush_unlocked(session, why); AVM_PA_WRITE_UNLOCK(); } /* ------------------------------------------------------------------------ */ /* -------- wall clock ---------------------------------------------------- */ /* ------------------------------------------------------------------------ */ static void pa_session_prioack_check(struct avm_pa_session *session) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_egress *egress = &session->egress[0]; unsigned int oldprio = egress->output.priority; if (egress->tx_pkts > ctx->prioack_thresh_packets) { /* * Stop using TGET priority. * We need to decide if we use TACK priority or restore original priority. * (TACK priority is same as original priority, if TACK is not enabled) * 2016-10-14 calle */ unsigned long percent_ack = (egress->tcpack_pkts * 100) / egress->tx_pkts; int switched_to_tack = 0; if (ctx->dbgprioack) { pa_printk(KERN_DEBUG, "avm_pa: session %d: %lu%% TCP-ACKs (%u pkts %u ACKs) \n", session->session_handle, percent_ack, egress->tx_pkts, egress->tcpack_pkts); } if (percent_ack > ctx->prioack_ratio) { egress->output.priority = egress->output.tack_priority; switched_to_tack = 1; } else { egress->output.priority = egress->output.orig_priority; } if (ctx->hardware_pa.add_session && !ctx->hw_ppa_disabled) { if ((*ctx->hardware_pa.add_session)(session) == AVM_PA_TX_SESSION_ADDED) session->in_hw = 1; } if (ctx->dbgprioack) { pa_printk(KERN_DEBUG, "avm_pa: session %d: priority %x:%x %s (old %x:%x)\n", session->session_handle, TC_H_MAJ(egress->output.priority)>>16, TC_H_MIN(egress->output.priority), switched_to_tack ? "TACK" : "NORMAL", TC_H_MAJ(oldprio)>>16, TC_H_MIN(oldprio)); } session->prioack_check = 0; } } static void pa_session_stats_get_diff(u32 *pkts, u64 *bytes, struct avm_pa_session_stats *last, struct avm_pa_session_stats *now) { *pkts = now->tx_pkts - last->tx_pkts; last->tx_pkts = now->tx_pkts; *bytes = now->tx_bytes - last->tx_bytes; last->tx_bytes = now->tx_bytes; } static inline unsigned int pa_get_priority(unsigned int prio) { prio &= TC_H_MIN_MASK; if (prio >= AVM_PA_MAX_PRIOS) prio = AVM_PA_MAX_PRIOS-1; return prio; } static inline unsigned int pa_get_egress_priority(struct avm_pa_egress *egress) { return pa_get_priority(egress->output.priority); } static inline unsigned int pa_get_ingress_priority(struct avm_pa_session *session) { /* * Ensure that the returned ingress priority is always in the range * [0, AVM_PA_MAX_PRIOS-1], otherwise Klocwork will complain if * the ingress priority is used as index to the VPID ingress priority * statistics array. */ return pa_get_priority(session->ingress_priority); } static inline unsigned int pa_get_ingress_priority_from_pkt_mark(u32 pkt_mark) { /* * Consider only networks for now, which are encoded as the two * most significant bytes. */ unsigned int prio = AVM_PA_INGRESS_PRIO_NET(pkt_mark); if (prio >= AVM_PA_MAX_PRIOS) { prio = AVM_PA_MAX_PRIOS-1; } return prio; } static int pa_session_handle_stats(struct avm_pa_session *session) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid; struct avm_pa_vpid *vpid; u64 bytes, hw_bytes; u32 pkts, hw_pkts; unsigned validflags = 0; unsigned long timestamp; ktime_t prio_stats_timestamp; int i; pa_session_stats_get_diff(&pkts, &bytes, &session->ingress_last_sw_stats, &session->ingress_sw_stats); #ifndef AVM_PA_NO_REPORT_FUNCTION if (ctx->hardware_pa.session_stats == 0) { pa_session_stats_get_diff(&hw_pkts, &hw_bytes, &session->last_hw_stats, &session->hw_stats); if (hw_pkts) validflags |= AVM_PA_SESSION_STATS_VALID_PKTS; if (hw_bytes) validflags |= AVM_PA_SESSION_STATS_VALID_BYTES; } else #endif { struct avm_pa_session_stats stats; if ( session->in_hw == 0 || ctx->hardware_pa.session_stats == 0 || (*ctx->hardware_pa.session_stats)(session, &stats) != 0) { validflags = 0; } else { validflags = stats.validflags; } if (validflags & AVM_PA_SESSION_STATS_VALID_PKTS) hw_pkts = stats.tx_pkts; else hw_pkts = 0; if (validflags & AVM_PA_SESSION_STATS_VALID_BYTES) hw_bytes = stats.tx_bytes; else hw_bytes = 0; } timestamp = NOT_ZERO(jiffies); prio_stats_timestamp = ktime_get_boottime(); if (ctx->dbgstats && validflags) printk(KERN_DEBUG "session %d valid 0x%x, %lu/%lu pkts, %llu/%llu bytes\n", session->session_handle, validflags, (unsigned long)pkts, (unsigned long)hw_pkts, bytes, hw_bytes); if (session->ingress_vpid_handle) { struct avm_pa_session *asession = PA_SESSION(ctx, session->associated_session_handle); unsigned int aprio = pa_get_ingress_priority(asession); unsigned int prio = pa_get_ingress_priority(session); vpid = PA_VPID(ctx, session->ingress_vpid_handle); ((u32 *)(&vpid->stats.rx_unicast_pkt))[session->ingress.casttype] += pkts + hw_pkts; ((u64 *)(&vpid->stats.rx_bytes))[session->ingress.casttype] += bytes + hw_bytes; vpid->ingress_sw_stats[prio].pkts += pkts; vpid->ingress_sw_stats[prio].bytes += bytes; vpid->ingress_hw_stats[prio].pkts += hw_pkts; vpid->ingress_hw_stats[prio].bytes += hw_bytes; vpid->stats.hardware_report_timestamp = timestamp; vpid->prio_stats_timestamp = prio_stats_timestamp; vpid = PA_VPID(ctx, asession->ingress_vpid_handle); vpid->associated_ingress_sw_stats[aprio].pkts += pkts; vpid->associated_ingress_sw_stats[aprio].bytes += bytes; vpid->associated_ingress_hw_stats[aprio].pkts += hw_pkts; vpid->associated_ingress_hw_stats[aprio].bytes += hw_bytes; } for (i = 0; i < session->negress; i++) { struct avm_pa_egress *egress = &session->egress[i]; unsigned int prio = pa_get_egress_priority(egress); egress->hw_stats.tx_pkts += hw_pkts; egress->hw_stats.tx_bytes += hw_bytes; if (egress->pid_handle) { pid = PA_PID(ctx, egress->pid_handle); pid->tx_pkts += pkts + hw_pkts; } if (egress->vpid_handle) { vpid = PA_VPID(ctx, egress->vpid_handle); ((u32 *)(&vpid->stats.tx_unicast_pkt))[egress->match.casttype] += pkts + hw_pkts; vpid->stats.tx_bytes += bytes + hw_bytes; vpid->sw_stats[prio].pkts += pkts; vpid->sw_stats[prio].bytes += bytes; vpid->hw_stats[prio].pkts += hw_pkts; vpid->hw_stats[prio].bytes += hw_bytes; vpid->stats.hardware_report_timestamp = timestamp; vpid->prio_stats_timestamp = prio_stats_timestamp; #ifdef AVM_PA_HAS_GUEST_STATS if (session->is_guest) { ((u32 *)(&vpid->guest_stats.tx_unicast_pkt))[egress->match.casttype] += pkts + hw_pkts; vpid->guest_stats.tx_bytes += bytes + hw_bytes; vpid->guest_sw_stats[prio].pkts += pkts; vpid->guest_sw_stats[prio].bytes += bytes; vpid->guest_hw_stats[prio].pkts += hw_pkts; vpid->guest_hw_stats[prio].bytes += hw_bytes; } #endif } if (session->associated_session_handle) { struct avm_pa_session *asession; int j; asession = PA_SESSION(ctx, session->associated_session_handle); for (j = 0; j < asession->negress; j++) { struct avm_pa_egress *aegress = &asession->egress[i]; unsigned int aprio = pa_get_egress_priority(aegress); vpid = PA_VPID(ctx, aegress->vpid_handle); vpid->associated_sw_stats[aprio].pkts += pkts; vpid->associated_sw_stats[aprio].bytes += bytes; vpid->associated_hw_stats[aprio].pkts += hw_pkts; vpid->associated_hw_stats[aprio].bytes += hw_bytes; } } } return validflags != 0; } static void pa_collect_session_stats(void) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session, *next, *last; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); session = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_head; last = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_tail; /* * when we have stats for a session, it will be moved * to the end of the LRU list via pa_session_update_unlocked(). * So we remember the last member at the beginning of the operation * and stop, when we had "last" handled. * * 2015-11-23 calle */ while (session) { next = session->lru_next; if (pa_session_handle_stats(session)) pa_session_update_unlocked(session); if (session->prioack_check) pa_session_prioack_check(session); if (session == last) break; session = next; } AVM_PA_WRITE_UNLOCK(); } static void pa_stat_timer_expired (unsigned long data) { pa_collect_session_stats(); pa_start_stat_timer(); } static void pa_session_gc(int force) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session, *next; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); if (force) { while ((session = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_head) != 0) { pa_session_kill_unlocked(session, "disable"); ctx->stats.sess_flushed++; } } session = ctx->sess_lru[AVM_PA_LRU_DEAD].lru_head; while (session) { next = session->lru_next; if ( atomic_read(&session->transmit_in_progress) == 0 && avm_pa_get_hw_session(session) == NULL) { pa_delete_session(session); } session = next; } session = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_head; while (session) { next = session->lru_next; if (session->flushed) { pa_session_kill_unlocked(session, session->why_killed); } else if (time_is_before_eq_jiffies(session->endtime)) { /* good to kill directly, will be on state DEAD for one more cycle */ pa_session_kill_unlocked(session, "timeout"); ctx->stats.sess_timedout++; } session = next; } AVM_PA_WRITE_UNLOCK(); } /* ------------------------------------------------------------------------ */ static void avm_pa_flush_sessions_with_destmac(struct avm_pa_macaddr *destmac) { struct avm_pa_global *ctx = &pa_glob; int i,j; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); for (i=1; i < CONFIG_AVM_PA_MAX_SESSION; i++) { struct avm_pa_session *session = PA_SESSION(ctx, i); for (j = 0; j < session->negress; j++) { if (session->egress[j].destmac == destmac) { pa_session_flush_unlocked(session, "destmac"); ctx->stats.sess_pidchanged++; break; } } } AVM_PA_WRITE_UNLOCK(); } /* ------------------------------------------------------------------------ */ static void pa_gc_timer_expired (unsigned long data) { struct avm_pa_global *ctx = &pa_glob; pa_session_gc(0); if ( ctx->sess_lru[AVM_PA_LRU_ACTIVE].nsessions || ctx->sess_lru[AVM_PA_LRU_DEAD].nsessions) pa_start_gc_timer(); } /* ------------------------------------------------------------------------ */ static void pa_show_stats(pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; (*fprintffunc)(arg, "BSessions : %u\n", (unsigned)ctx->stats.nbsessions); (*fprintffunc)(arg, "Sessions : %hu\n", ctx->sess_lru[AVM_PA_LRU_ACTIVE].nsessions); (*fprintffunc)(arg, "Max Sessions : %hu\n", ctx->sess_lru[AVM_PA_LRU_ACTIVE].maxsessions); (*fprintffunc)(arg, "Sessions (dead): %hu\n", ctx->sess_lru[AVM_PA_LRU_DEAD].nsessions); (*fprintffunc)(arg, "Sessions (free): %hu\n", ctx->sess_lru[AVM_PA_LRU_FREE].nsessions); (*fprintffunc)(arg, "Rx packets/sec : %lu\n", (unsigned long)ctx->stats.rx_pps); (*fprintffunc)(arg, "Fw packets/sec : %lu\n", (unsigned long)ctx->stats.fw_pps); (*fprintffunc)(arg, "Ov packets/sec : %lu\n", (unsigned long)ctx->stats.overlimit_pps); (*fprintffunc)(arg, "Rx pakets : %lu\n", (unsigned long)ctx->stats.rx_pkts); (*fprintffunc)(arg, "Rx bypass : %lu\n", (unsigned long)ctx->stats.rx_bypass); (*fprintffunc)(arg, "Rx frag list : %lu\n", (unsigned long)ctx->stats.rx_frag_list); (*fprintffunc)(arg, "Rx ttl <= 1 : %lu\n", (unsigned long)ctx->stats.rx_ttl); (*fprintffunc)(arg, "Rx broadcast : %lu\n", (unsigned long)ctx->stats.rx_broadcast); (*fprintffunc)(arg, "Rx search : %lu\n", (unsigned long)ctx->stats.rx_search); (*fprintffunc)(arg, "Rx match : %lu\n", (unsigned long)ctx->stats.rx_match); (*fprintffunc)(arg, "Rx lisp changed: %lu\n", (unsigned long)ctx->stats.rx_lispchanged); (*fprintffunc)(arg, "Rx df : %lu\n", (unsigned long)ctx->stats.rx_df); (*fprintffunc)(arg, "Rx modified : %lu\n", (unsigned long)ctx->stats.rx_mod); (*fprintffunc)(arg, "Rx overlimit : %lu\n", (unsigned long)ctx->stats.rx_overlimit); (*fprintffunc)(arg, "Rx dropped : %lu\n", (unsigned long)ctx->stats.rx_dropped); (*fprintffunc)(arg, "Rx filtered : %lu\n", (unsigned long)ctx->stats.rx_filtered); (*fprintffunc)(arg, "Rx irq : %lu\n", (unsigned long)ctx->stats.rx_irq); (*fprintffunc)(arg, "Rx irq dropped : %lu\n", (unsigned long)ctx->stats.rx_irqdropped); (*fprintffunc)(arg, "Rx hroom : %lu\n", (unsigned long)ctx->stats.rx_headroom_too_small); (*fprintffunc)(arg, "Rx hroom fail : %lu\n", (unsigned long)ctx->stats.rx_realloc_headroom_failed); (*fprintffunc)(arg, "Fw pakets : %lu\n", (unsigned long)ctx->stats.fw_pkts); (*fprintffunc)(arg, "Fw output : %lu\n", (unsigned long)ctx->stats.fw_output); (*fprintffunc)(arg, "Fw local : %lu\n", (unsigned long)ctx->stats.fw_local); (*fprintffunc)(arg, "Fw rtp : %lu\n", (unsigned long)ctx->stats.fw_rtp); (*fprintffunc)(arg, "Fw rtp drop : %lu\n", (unsigned long)ctx->stats.fw_rtp_drop); (*fprintffunc)(arg, "Fw illegal : %lu\n", (unsigned long)ctx->stats.fw_ill); (*fprintffunc)(arg, "Fw frags : %lu\n", (unsigned long)ctx->stats.fw_frags); (*fprintffunc)(arg, "Fw drop : %lu\n", (unsigned long)ctx->stats.fw_drop); (*fprintffunc)(arg, "Fw drop gone : %lu\n", (unsigned long)ctx->stats.fw_drop_gone); (*fprintffunc)(arg, "Fw fail : %lu\n", (unsigned long)ctx->stats.fw_fail); (*fprintffunc)(arg, "Fw frag fail : %lu\n", (unsigned long)ctx->stats.fw_frag_fail); (*fprintffunc)(arg, "Tx accelerated : %lu\n", (unsigned long)ctx->stats.tx_accelerated); (*fprintffunc)(arg, "Tx local : %lu\n", (unsigned long)ctx->stats.tx_local); (*fprintffunc)(arg, "Tx already : %lu\n", (unsigned long)ctx->stats.tx_already); (*fprintffunc)(arg, "Tx bypass : %lu\n", (unsigned long)ctx->stats.tx_bypass); (*fprintffunc)(arg, "Tx sess error : %lu\n", (unsigned long)ctx->stats.tx_sess_error); (*fprintffunc)(arg, "Tx sess ok : %lu\n", (unsigned long)ctx->stats.tx_sess_ok); (*fprintffunc)(arg, "Tx sess exists : %lu\n", (unsigned long)ctx->stats.tx_sess_exists); (*fprintffunc)(arg, "Tx egress error: %lu\n", (unsigned long)ctx->stats.tx_egress_error); (*fprintffunc)(arg, "Tx egress ok : %lu\n", (unsigned long)ctx->stats.tx_egress_ok); (*fprintffunc)(arg, "Tx fast gso : %lu\n", (unsigned long)ctx->stats.tx_fast_gso); (*fprintffunc)(arg, "Loc sess error : %lu\n", (unsigned long)ctx->stats.local_sess_error); (*fprintffunc)(arg, "Loc sess ok : %lu\n", (unsigned long)ctx->stats.local_sess_ok); (*fprintffunc)(arg, "Loc sess exists: %lu\n", (unsigned long)ctx->stats.local_sess_exists); (*fprintffunc)(arg, "RTP sess error : %lu\n", (unsigned long)ctx->stats.rtp_sess_error); (*fprintffunc)(arg, "RTP sess ok : %lu\n", (unsigned long)ctx->stats.rtp_sess_ok); (*fprintffunc)(arg, "RTP sess exists: %lu\n", (unsigned long)ctx->stats.rtp_sess_exists); (*fprintffunc)(arg, "TBF schedule : %lu\n", (unsigned long)ctx->stats.tbf_schedule); (*fprintffunc)(arg, "TBF reschedule : %lu\n", (unsigned long)ctx->stats.tbf_reschedule); (*fprintffunc)(arg, "sess flushed : %lu\n", (unsigned long)ctx->stats.sess_flushed); (*fprintffunc)(arg, "sess timedout : %lu\n", (unsigned long)ctx->stats.sess_timedout); (*fprintffunc)(arg, "sess pid change: %lu\n", (unsigned long)ctx->stats.sess_pidchanged); (*fprintffunc)(arg, "rxch no rx slow: %lu\n", (unsigned long)ctx->stats.rx_channel_no_rx_slow); (*fprintffunc)(arg, "rxch stopped : %lu\n", (unsigned long)ctx->stats.rx_channel_stopped); (*fprintffunc)(arg, "txch dropped : %lu\n", (unsigned long)ctx->stats.tx_channel_dropped); (*fprintffunc)(arg, "user msecs/sec : %lu\n", (unsigned long)ctx->stats.userms); (*fprintffunc)(arg, "idle msecs/sec : %lu\n", (unsigned long)ctx->stats.idlems); (*fprintffunc)(arg, "irq msecs/sec : %lu\n", (unsigned long)ctx->stats.irqms); }; /*------------------------------------------------------------------------ */ static void avm_pa_tbf_schedule(psched_time_t wtime) { struct avm_pa_global *ctx = &pa_glob; /* we never wait a second */ ktime_t time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_TICKS2NS(wtime)); if (hrtimer_active(&ctx->tbf.timer)) { hrtimer_forward_now(&ctx->tbf.timer, time); ctx->stats.tbf_reschedule++; } else { hrtimer_start(&ctx->tbf.timer, time, HRTIMER_MODE_REL); ctx->stats.tbf_schedule++; } } static int avm_pa_tbf_tx_ok(u32 wanted) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_tbf *q = &ctx->tbf; psched_time_t now; long toks; long ptoks; long pkttime = q->pkttime; u32 count = 0; now = psched_get_time(); toks = psched_tdiff_bounded(now, q->t_c, q->buffer); // toks = now - q->t_c; ptoks = toks + q->ptokens; if (ptoks > (long)q->pbuffer) ptoks = q->pbuffer; toks += q->tokens; if (toks > (long)q->buffer) toks = q->buffer; while ( count < wanted && ((toks - pkttime) >= 0 || (ptoks - pkttime) >= 0)) { ptoks -= pkttime; toks -= pkttime; count++; } if (count) { q->t_c = now; q->tokens = toks; q->ptokens = ptoks; return count; } avm_pa_tbf_schedule(max_t(long, -toks, -ptoks)); return 0; } static inline u32 calc_xmittime(unsigned rate, unsigned size) { u64 x64 = NSEC_PER_SEC*(u64)size; do_div(x64, rate); return (u32)(PSCHED_NS2TICKS((u32)x64)); } static void avm_pa_tbf_reset(void) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_tbf *q = &ctx->tbf; q->t_c = psched_get_time(); q->tokens = q->buffer; q->ptokens = q->pbuffer; } static void avm_pa_tbf_disable(void) { struct avm_pa_global *ctx = &pa_glob; ctx->tbf_enabled = 0; avm_pa_tbf_reset(); if (skb_queue_len(&ctx->tbfqueue)) tasklet_hi_schedule(&ctx->tbftasklet); } static void avm_pa_tbf_update(u32 rate, unsigned buffer, unsigned peak) { struct avm_pa_global *ctx = &pa_glob; ctx->tbf.buffer = calc_xmittime(rate, buffer); ctx->tbf.pbuffer = calc_xmittime(rate, peak); ctx->tbf.pkttime = calc_xmittime(rate, 1); } static enum hrtimer_restart avm_pa_tbf_restart(struct hrtimer *timer) { struct avm_pa_global *ctx = &pa_glob; tasklet_hi_schedule(&ctx->tbftasklet); return HRTIMER_NORESTART; } static void avm_pa_tbf_init(u32 rate, unsigned buffer, unsigned peak) { struct avm_pa_global *ctx = &pa_glob; struct hrtimer *timer = &ctx->tbf.timer; if (!hrtimer_active(timer)) { hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); timer->function = avm_pa_tbf_restart; } avm_pa_tbf_update(rate, buffer, peak); avm_pa_tbf_reset(); } static void avm_pa_tbf_exit(void) { struct avm_pa_global *ctx = &pa_glob; struct hrtimer *timer = &ctx->tbf.timer; hrtimer_cancel(timer); } static void avm_pa_tbf_tasklet(unsigned long data) { struct avm_pa_global *ctx = &pa_glob; struct sk_buff *skb; if (ctx->tbf_enabled) { u32 len; if ((len = skb_queue_len(&ctx->tbfqueue)) > 0) { len = avm_pa_tbf_tx_ok(len); while (len--) { skb = skb_dequeue(&ctx->tbfqueue); pa_do_modify_and_send(NULL, skb); } } } else { while ((skb = skb_dequeue(&ctx->tbfqueue)) != 0) { pa_do_modify_and_send(NULL, skb); } } } static inline void avm_pa_tbf_transmit(struct avm_pa_session *session, PKT *pkt) { struct avm_pa_global *ctx = &pa_glob; /* Set the session_handle to be sure, although it's not always used during transmit. */ AVM_PKT_INFO(pkt)->session_handle = session->session_handle; AVM_PKT_INFO(pkt)->session_uniq_id = session->uniq_id; if (session->realtime) { pa_do_modify_and_send(session, pkt); return; } if ( skb_queue_len(&ctx->tbfqueue) == 0 && (ctx->tbf_enabled == 0 || avm_pa_tbf_tx_ok(1))) { pa_do_modify_and_send(session, pkt); return; } skb_queue_tail(&ctx->tbfqueue, pkt); if (ctx->tbf_enabled) { ctx->stats.rx_overlimit++; if (skb_queue_len(&ctx->tbfqueue) > AVM_PA_MAX_TBF_QUEUE_LEN) { if ((pkt = skb_dequeue(&ctx->tbfqueue)) != 0) { PKT_FREE(pkt); ctx->stats.rx_dropped++; } } } if (!hrtimer_active(&ctx->tbf.timer)) tasklet_hi_schedule(&ctx->tbftasklet); } /* ------------------------------------------------------------------------ */ #define MAX_TASKLET_PACKETS 32 static void avm_pa_irq_tasklet(unsigned long data) { struct avm_pa_global *ctx = &pa_glob; int count = MAX_TASKLET_PACKETS; struct sk_buff *skb; while (count-- > 0 && (skb = skb_dequeue(&ctx->irqqueue)) != 0) { struct avm_pa_session *session; session = pa_session_get(AVM_PKT_INFO(skb)->session_handle); /* Shouldn't happen but better play safe. */ if (session && session->uniq_id == AVM_PKT_INFO(skb)->session_uniq_id) { avm_pa_tbf_transmit(session, skb); } else { ctx->stats.fw_drop_gone++; PKT_FREE(skb); } } if (skb_queue_len(&ctx->irqqueue)) tasklet_schedule(&ctx->irqtasklet); } /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ void avm_pa_rx_channel_suspend(avm_pid_handle pid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); pid->rx_channel_stopped = 1; } EXPORT_SYMBOL(avm_pa_rx_channel_suspend); void avm_pa_rx_channel_resume(avm_pid_handle pid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); pid->rx_channel_stopped = 0; } EXPORT_SYMBOL(avm_pa_rx_channel_resume); void avm_pa_rx_channel_packet_not_accelerated(avm_pid_handle pid_handle, struct sk_buff *skb) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); if (avm_pa_pid_receive(pid_handle, skb) == AVM_PA_RX_ACCELERATED) return; if (likely(pid && pid->ecfg.rx_slow)) { (*pid->ecfg.rx_slow)(pid->ecfg.rx_slow_arg, skb); return; } PKT_FREE(skb); ctx->stats.rx_channel_no_rx_slow++; } EXPORT_SYMBOL(avm_pa_rx_channel_packet_not_accelerated); void avm_pa_tx_channel_accelerated_packet(avm_pid_handle pid_handle, avm_session_handle session_handle, struct sk_buff *skb) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session = pa_session_get(session_handle); if (session && session->lru == AVM_PA_LRU_ACTIVE) { int i; for (i = 0; i < session->negress; i++) { struct avm_pa_egress *egress = &session->egress[i]; /* Set some important skb fields, as pa_do_modify_non_l2() would have done */ if (egress->pid_handle == pid_handle) { skb->protocol = session->mod.protocol; // 2016-03-01, calle skb_reset_mac_header(skb); if (session->bsession == 0) { skb_pull(skb, ETH_HLEN); /* Is ETH_HLEN correct for pppoe egress? */ skb_reset_network_header(skb); if (skb->protocol == constant_htons(ETH_P_IP)) skb_set_transport_header(skb, session->mod.v4_mod.iphlen); else if (skb->protocol == constant_htons(ETH_P_IPV6)) skb_set_transport_header(skb, sizeof(struct ipv6hdr)); } AVM_PKT_INFO(skb)->already_modified = 1; AVM_PKT_INFO(skb)->egress_offset = i; avm_pa_tbf_transmit(session, skb); return; } } } PKT_FREE(skb); ctx->stats.tx_channel_dropped++; } EXPORT_SYMBOL(avm_pa_tx_channel_accelerated_packet); /* ------------------------------------------------------------------------ */ /* -------- exported functions -------------------------------------------- */ /* ------------------------------------------------------------------------ */ int avm_pa_is_enabled(void) { struct avm_pa_global *ctx = &pa_glob; return !ctx->disabled; } EXPORT_SYMBOL(avm_pa_is_enabled); void avm_pa_get_stats(struct avm_pa_stats *stats) { struct avm_pa_global *ctx = &pa_glob; memcpy(stats, &ctx->stats, sizeof(struct avm_pa_stats)); } EXPORT_SYMBOL(avm_pa_get_stats); void avm_pa_reset_stats(void) { struct avm_pa_global *ctx = &pa_glob; memset(&ctx->stats, 0, sizeof(struct avm_pa_stats)); } EXPORT_SYMBOL(avm_pa_reset_stats); void avm_pa_dev_init(struct avm_pa_dev_info *devinfo) { memset(devinfo, 0, sizeof(struct avm_pa_dev_info)); } EXPORT_SYMBOL(avm_pa_dev_init); static int avm_pa_pid_receive(avm_pid_handle pid_handle, PKT *pkt) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); struct avm_pa_pkt_info *info; struct avm_pa_session *session; struct ethhdr *ethh; unsigned int hstart; int rc; if (ctx->disabled) return AVM_PA_RX_OK; info = AVM_PKT_INFO(pkt); /* complain if avm_pa_pkt_info crosses the reserved area (usually 256 bytes) */ BUILD_BUG_ON(sizeof(*info) > sizeof(pkt->avm_pa.buf)); if (info->ingress_pid_handle) return AVM_PA_RX_OK; ctx->stats.rx_pkts++; if ((ethh = pa_get_ethhdr(pid->ingress_framing, pkt)) != 0) { if ((session = pa_bsession_search(pid, ethh_hash(ethh), ethh)) != 0) goto accelerate; if ((ethh->h_dest[0] & 1) == 0) { if ((pid->ecfg.flags & AVM_PA_PID_FLAG_NO_PID_CHANGED_CHECK) == 0) pa_check_and_handle_ingress_pid_change(ethh->h_source, pid_handle); } } info->ingress_pid_handle = pid_handle; info->ingress_vpid_handle = 0; info->egress_vpid_handle = 0; info->vpid_counted_slow = 0; info->can_be_accelerated = 0; info->is_accelerated = 0; info->routed = 0; info->session_handle = 0; if (pid->ecfg.flags & AVM_PA_PID_FLAG_HSTART_ON_INGRESS) hstart = AVM_PKT_INFO(pkt)->hstart; else hstart = 0; rc = pa_set_pkt_match(pid->ingress_framing, hstart, pkt, &info->match, 0); if (rc == AVM_PA_RX_OK) { info->can_be_accelerated = 1; ctx->stats.rx_search++; if ((session = pa_session_search(pid, &info->match)) == 0) { info->ingress_pid_handle = pid_handle; #if AVM_PA_TRACE if (ctx->dbgtrace) { pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_pid_receive(%s) - %s\n", pkt_uniq_id(pkt), pid->cfg.name, "no session"); if (ctx->dbgnosession) { char buf[64]; data2hex(PKT_DATA(pkt), PKT_LEN(pkt), buf, sizeof(buf)); pa_printk(KERN_DEBUG, "Data : %s\n", buf); pa_show_pkt_info(info, pa_printk, KERN_DEBUG); } } #endif if (ctx->fw_disabled || avm_pa_capture_running()) { #if AVM_PA_TRACE if (ctx->dbgtrace) pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_pid_receive(%s) - %s\n", pkt_uniq_id(pkt), pid->cfg.name, "forward disabled"); #endif info->can_be_accelerated = 0; } return AVM_PA_RX_OK; } if (info->match.pkttype & AVM_PA_PKTTYPE_LISP) { void *slhdr = LISPDATAHDR(&session->ingress); void *ilhdr = LISPDATAHDR(&info->match); if (memcmp(slhdr, ilhdr, LISP_DATAHDR_SIZE) != 0) { pa_session_flush(session, "lisp data header changed"); ctx->stats.rx_lispchanged++; return AVM_PA_RX_OK; } } ctx->stats.rx_match++; if (session->egress[0].pid_handle == 0) { if (session->ingress_vpid_handle) { struct avm_pa_vpid *vpid = PA_VPID(ctx, session->ingress_vpid_handle); ((u32 *)(&vpid->stats.rx_unicast_pkt))[session->ingress.casttype]++; ((u64 *)(&vpid->stats.rx_bytes))[session->ingress.casttype] += PKT_LEN(pkt); } ctx->stats.rx_filtered++; PKT_FREE(pkt); pa_session_update(session); return AVM_PA_RX_ACCELERATED; } if (pa_egress_size_check(session, pkt) < 0) { ctx->stats.rx_df++; info->ingress_pid_handle = pid_handle; #if AVM_PA_TRACE if (ctx->dbgtrace) pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_pid_receive(%s) - %s\n", pkt_uniq_id(pkt), pid->cfg.name, "size problem"); #endif return AVM_PA_RX_OK; } if (info->match.fin) { session->timeout = ctx->fin_timeout_secs*HZ; if (session->timeout == 0) pa_session_flush(session, "fin"); return AVM_PA_RX_OK; } accelerate: pa_session_update(session); if (ctx->fw_disabled) { if (session->timeout == 0) pa_session_flush(session, "fast timeout"); #if AVM_PA_TRACE if (ctx->dbgtrace) pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_pid_receive(%s) - %s\n", pkt_uniq_id(pkt), pid->cfg.name, "forward disabled"); #endif return AVM_PA_RX_OK; } if (pid->ingress_framing == avm_pa_framing_dev) PKT_PUSH(pkt, PKT_DATA(pkt) - skb_mac_header(pkt)); if (skb_headroom(pkt) < session->needed_headroom) { struct sk_buff *npkt; if (net_ratelimit()) printk(KERN_ERR "avm_pa: pid %u (%s): headroom %u < %u\n", pid_handle, pid->cfg.name, skb_headroom(pkt), (unsigned)session->needed_headroom); ctx->stats.rx_headroom_too_small++; npkt = skb_realloc_headroom(pkt, session->needed_headroom); if (npkt == 0) { if (net_ratelimit()) printk(KERN_ERR "avm_pa: pid %u (%s): skb_realloc_headroom(%u) failed\n", pid_handle, pid->cfg.name, (unsigned)session->needed_headroom); ctx->stats.rx_realloc_headroom_failed++; /* go slow path */ return AVM_PA_RX_OK; } else { kfree_skb(pkt); pkt = npkt; } } #if AVM_PA_TRACE if (ctx->dbgtrace) pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_pid_receive(%s) - %s\n", pkt_uniq_id(pkt), pid->cfg.name, "accelerated"); #endif if (skb_has_frag_list(pkt)) { ctx->stats.rx_frag_list += 1; } if (in_irq() || irqs_disabled()) { if (skb_queue_len(&ctx->irqqueue) > AVM_PA_MAX_IRQ_QUEUE_LEN) { ctx->stats.rx_irqdropped++; PKT_FREE(pkt); } else { info = AVM_PKT_INFO(pkt); info->session_handle = session->session_handle; info->session_uniq_id = session->uniq_id; skb_queue_tail(&ctx->irqqueue, pkt); ctx->stats.rx_irq++; tasklet_schedule(&ctx->irqtasklet); } } else { avm_pa_tbf_transmit(session, pkt); } return AVM_PA_RX_ACCELERATED; } if (ctx->dbgmatch) { char buf[64]; pa_printk(KERN_DEBUG, "---------->\n"); pa_printk(KERN_DEBUG, "RC : %d %s\n", rc, rc2str(rc)); data2hex(PKT_DATA(pkt), PKT_LEN(pkt), buf, sizeof(buf)); pa_printk(KERN_DEBUG, "Data : %s\n", buf); pa_show_pkt_info(info, pa_printk, KERN_DEBUG); pa_printk(KERN_DEBUG, "<----------\n"); } pa_reset_match(&info->match); switch (rc) { case AVM_PA_RX_TTL: ctx->stats.rx_ttl++; break; case AVM_PA_RX_BROADCAST: ctx->stats.rx_broadcast++; break; default: ctx->stats.rx_bypass++; break; } #if AVM_PA_TRACE if (ctx->dbgtrace) pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_pid_receive(%s) - %s (rc %d)\n", pkt_uniq_id(pkt), pid->cfg.name, "bypass", rc); #endif return rc; } static inline void avm_pa_vpid_snoop_receive(avm_vpid_handle handle, PKT *pkt) { #if AVM_PA_TRACE struct avm_pa_global *ctx = &pa_glob; if (ctx->dbgtrace) { struct avm_pa_vpid *vpid = PA_VPID(ctx, handle); pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_vpid_snoop_receive(%s)\n", pkt_uniq_id(pkt), vpid->cfg.name); } #endif AVM_PKT_INFO(pkt)->ingress_vpid_handle = handle; } inline int prepend_recvhook(int (*recvhook)(struct sk_buff *skb, int framing)) { struct avm_pa_global *ctx = &pa_glob; int pos; for (pos = 0; pos < ctx->recvhook_counter; pos++) { if (ctx->recvhook[pos].cb_recvhook == recvhook) return -1; } for (pos = ctx->recvhook_counter; pos > 0; pos--) ctx->recvhook[pos].cb_recvhook = ctx->recvhook[pos-1].cb_recvhook; ctx->recvhook[0].cb_recvhook = recvhook; ctx->recvhook_counter++; return 0; } inline int append_recvhook(int (*recvhook)(struct sk_buff *skb, int framing)) { struct avm_pa_global *ctx = &pa_glob; int pos; for (pos = 0; pos < ctx->recvhook_counter; pos++) { if (ctx->recvhook[pos].cb_recvhook == recvhook) return -1; } ctx->recvhook[ctx->recvhook_counter].cb_recvhook = recvhook; ctx->recvhook_counter++; return 0; } inline void remove_recvhook(int (*recvhook)(struct sk_buff *skb, int framing)) { struct avm_pa_global *ctx = &pa_glob; int found = 0; int pos; if (ctx->recvhook_counter == 0) return; for (pos = 0; pos < ctx->recvhook_counter; pos++) { if (found == 0 && ctx->recvhook[pos].cb_recvhook != recvhook) continue; ctx->recvhook[pos].cb_recvhook = pos+1 < ctx->recvhook_counter ? ctx->recvhook[pos+1].cb_recvhook : 0; found = 1; } if (found) ctx->recvhook_counter--; } int avm_pa_register_recvhook(int prepend, int (*recvhook)(struct sk_buff *skb, int framing)) { struct avm_pa_global *ctx = &pa_glob; if (ctx->recvhook_counter >= AVM_PA_MAX_RECVHOOK) return -1; if (prepend) { if (prepend_recvhook(recvhook) < 0) return -1; } else { if (append_recvhook(recvhook) < 0) return -1; } return 0; } EXPORT_SYMBOL(avm_pa_register_recvhook); void avm_pa_unregister_recvhook(int (*recvhook)(struct sk_buff *skb, int framing)) { remove_recvhook(recvhook); } EXPORT_SYMBOL(avm_pa_unregister_recvhook); static int call_cb_recvhooks(PKT *pkt, int framing) { struct avm_pa_global *ctx = &pa_glob; int pos; for (pos = 0; pos < ctx->recvhook_counter; pos++) { if (likely((ctx->recvhook[pos].cb_recvhook(pkt, framing) == 0))) { return AVM_PA_RX_STOLEN; } } return -1; } int avm_pa_dev_receive(struct avm_pa_dev_info *devinfo, PKT *pkt) { int rc = AVM_PA_RX_OK; if (devinfo->pid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, devinfo->pid_handle); if (likely(call_cb_recvhooks(pkt, pid->ingress_framing) == AVM_PA_RX_STOLEN)) return AVM_PA_RX_STOLEN; rc = avm_pa_pid_receive(devinfo->pid_handle, pkt); if (rc == AVM_PA_RX_ACCELERATED) return rc; } if (devinfo->vpid_handle) avm_pa_vpid_snoop_receive(devinfo->vpid_handle, pkt); return rc; } EXPORT_SYMBOL(avm_pa_dev_receive); int avm_pa_dev_pid_receive(struct avm_pa_dev_info *devinfo, PKT *pkt) { struct avm_pa_global *ctx = &pa_glob; int rc = AVM_PA_RX_OK; if (devinfo->pid_handle) { struct avm_hardware_pa *hwpa = &ctx->hardware_pa; struct avm_pa_pid *pid = PA_PID(ctx, devinfo->pid_handle); if (likely(call_cb_recvhooks(pkt, pid->ingress_framing) == AVM_PA_RX_STOLEN)) return AVM_PA_RX_STOLEN; if (!ctx->hw_ppa_disabled && hwpa && hwpa->try_to_accelerate) { struct avm_pa_pid *pid = PA_PID(ctx, devinfo->pid_handle); if (pid->rx_channel_activated) { if (pid->rx_channel_stopped == 0) { if ((*hwpa->try_to_accelerate)(devinfo->pid_handle, pkt) <= 0) return AVM_PA_RX_STOLEN; } else { ctx->stats.rx_channel_stopped++; } } } rc = avm_pa_pid_receive(devinfo->pid_handle, pkt); } return rc; } EXPORT_SYMBOL(avm_pa_dev_pid_receive); void avm_pa_dev_vpid_snoop_receive(struct avm_pa_dev_info *devinfo, PKT *pkt) { if (devinfo->vpid_handle) avm_pa_vpid_snoop_receive(devinfo->vpid_handle, pkt); } EXPORT_SYMBOL(avm_pa_dev_vpid_snoop_receive); void avm_pa_mark_routed(PKT *pkt) { AVM_PKT_INFO(pkt)->routed = 1; #if AVM_PA_TRACE if (pa_glob.dbgtrace) pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_mark_routed (ingress %d)\n", pkt_uniq_id(pkt), AVM_PKT_INFO(pkt)->ingress_pid_handle); #endif } EXPORT_SYMBOL(avm_pa_mark_routed); void avm_pa_use_protocol_specific_session(PKT *pkt) { AVM_PKT_INFO(pkt)->use_protocol_specific = 1; #if AVM_PA_TRACE if (pa_glob.dbgtrace) pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_use_protocol_specific_session (ingress %d)\n", pkt_uniq_id(pkt), AVM_PKT_INFO(pkt)->ingress_pid_handle); #endif } EXPORT_SYMBOL(avm_pa_use_protocol_specific_session); void avm_pa_do_not_accelerate(PKT *pkt) { AVM_PKT_INFO(pkt)->can_be_accelerated = 0; #if AVM_PA_TRACE if (pa_glob.dbgtrace) pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_do_not_accelerate\n", pkt_uniq_id(pkt)); #endif } EXPORT_SYMBOL(avm_pa_do_not_accelerate); void avm_pa_set_hstart(PKT *pkt, unsigned int hstart) { AVM_PKT_INFO(pkt)->hstart = hstart; } EXPORT_SYMBOL(avm_pa_set_hstart); static inline void avm_pa_vpid_snoop_transmit(avm_vpid_handle handle, PKT *pkt) { struct avm_pa_pkt_info *info = AVM_PKT_INFO(pkt); if (info->egress_vpid_handle == 0) info->egress_vpid_handle = handle; #if AVM_PA_TRACE if (pa_glob.dbgtrace) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_vpid *vpid = PA_VPID(ctx, handle); pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_vpid_snoop_transmit(%s)\n", pkt_uniq_id(pkt), vpid->cfg.name); } #endif } static inline int avm_pa_sock_is_realtime(struct sock *sk) { #ifdef CONFIG_AVM_SK_TC_INDEX return sk->sk_protocol == IPPROTO_UDP && sk->sk_tc_index != 0; #else return 0; #endif } static inline unsigned int pa_calc_tack_priority(struct avm_pa_pkt_info *info, struct avm_pa_pid *epid, unsigned int orig_priority) { unsigned int newprio = orig_priority; if (AVM_PA_PKTTYPE_IPPROTO(info->match.pkttype) == IPPROTO_TCP) { unsigned int prio; prio = avm_pa_pid_tack_enabled(epid) ? avm_pa_pid_tack_prio(epid, orig_priority) : 0; if (prio != 0 && prio < newprio) newprio = prio; } return newprio; } static inline unsigned int pa_calc_start_priority(struct avm_pa_pkt_info *info, struct avm_pa_pid *epid, unsigned int orig_priority) { /* * We calculate the priority to use, when session is created. * We assume it's an TGET or TACK session. The final decision will be made in * pa_session_prioack_check(). * 2016-10-14 calle */ unsigned int newprio = orig_priority; if (AVM_PA_PKTTYPE_IPPROTO(info->match.pkttype) == IPPROTO_TCP) { unsigned int prio; prio = avm_pa_pid_tget_enabled(epid) ? avm_pa_pid_tget_prio(epid, orig_priority) : 0; if (prio != 0 && prio < newprio) newprio = prio; prio = avm_pa_pid_tack_enabled(epid) ? avm_pa_pid_tack_prio(epid, orig_priority) : 0; if (prio != 0 && prio < newprio) newprio = prio; } return newprio; } static inline int avm_pa_pid_snoop_transmit(avm_pid_handle pid_handle, PKT *pkt, struct sock *sk) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pkt_info *info = AVM_PKT_INFO(pkt); struct avm_pa_session *session; struct avm_pa_egress *egress; struct avm_pa_pkt_match match; struct avm_pa_pid *ipid, *epid; struct avm_pa_vpid *ivpid, *evpid; struct ethhdr *ethh; unsigned negress; int headroom; char buf[64]; /* not used uninitialized - if ethh != NULL, then hash is also valid */ u32 uninitialized_var(hash); AVM_PA_LOCK_DECLARE; #if AVM_PA_TRACE if (ctx->dbgtrace) { epid = PA_PID(ctx, pid_handle); pa_printk(KERN_DEBUG, "avm_pa: %lu - avm_pa_pid_snoop_transmit(%s)\n", pkt_uniq_id(pkt), epid->cfg.name); } #endif if (ctx->disabled) return AVM_PA_TX_OK; epid = PA_PID(ctx, pid_handle); if (!info->is_accelerated) { /* Update vpid statistics also for packets that took the slow path. This enables using * only the vpid counters for the online monitor (provided the avm_pa is enabled). */ if (!info->vpid_counted_slow && (info->ingress_vpid_handle || info->egress_vpid_handle)) { PKT *npkt; u32 bytes = 0, len = PKT_LEN(pkt); int nfrags = 0; int casttype = info->match.casttype; unsigned int prio, priority; skb_walk_frags(pkt, npkt) { bytes += PKT_LEN(npkt) + len; nfrags++; } if (!bytes) bytes = len; if (!nfrags) nfrags = 1; if (info->ingress_vpid_handle) { ivpid = PA_VPID(ctx, info->ingress_vpid_handle); (&ivpid->stats.rx_bytes)[casttype] += bytes; (&ivpid->stats.rx_unicast_pkt)[casttype] += nfrags; /* update prio stats */ priority = pa_get_ingress_priority_from_pkt_mark(pkt->mark); prio = pa_get_priority(priority); ivpid->sw_stats[prio].pkts += nfrags; ivpid->sw_stats[prio].bytes += bytes; } if (info->egress_vpid_handle) { evpid = PA_VPID(ctx, info->egress_vpid_handle); evpid->stats.tx_bytes += bytes; (&evpid->stats.tx_unicast_pkt)[casttype] += nfrags; /* update prio stats */ if (info->match.ack_only) priority = pa_calc_tack_priority(info, epid, pkt->priority); else priority = pkt->priority; prio = pa_get_priority(priority); evpid->sw_stats[prio].pkts += nfrags; evpid->sw_stats[prio].bytes += bytes; } /* vpids must be accounted exactly once, in case of multple * avm_pa_pid_snoop_transmit() calls */ info->vpid_counted_slow = 1; } } if (!info->can_be_accelerated) { ctx->stats.tx_bypass++; goto tx_bypass; } if (sk) { /* input to local system */ ipid = PA_PID(ctx, info->ingress_pid_handle); ethh = 0; } else { /* forwarded or bridged */ if (info->is_accelerated) { ctx->stats.tx_accelerated++; return AVM_PA_TX_BYPASS; // everything done already. } if (info->ingress_pid_handle == 0) { ctx->stats.tx_local++; goto tx_bypass; // AVM_PA_TX_BYPASS; } if (info->session_handle != 0) { ctx->stats.tx_already++; goto tx_bypass; // AVM_PA_TX_BYPASS; } ipid = PA_PID(ctx, info->ingress_pid_handle); if ((ethh = pa_get_ethhdr(epid->egress_framing, pkt)) != 0) { hash = ethh_hash(ethh); if ((session = pa_bsession_search(ipid, hash, ethh)) != 0) return AVM_PA_TX_SESSION_EXISTS; } } if (info->match.syn || info->match.fin) { ctx->stats.tx_bypass++; if (ctx->dbgnosession) { pa_printk(KERN_DEBUG, "Syn/Fin : %s\n", info->match.syn ? "Syn" : "Fin"); data2hex(PKT_DATA(pkt), PKT_LEN(pkt), buf, sizeof(buf)); pa_printk(KERN_DEBUG, "Data : %s\n", buf); pa_show_pkt_info(info, pa_printk, KERN_DEBUG); } goto tx_bypass; // AVM_PA_TX_BYPASS; } if (pa_egress_precheck(epid, pkt, &info->match, &match) < 0) { ctx->stats.tx_bypass++; if (ctx->dbgnosession) { pa_printk(KERN_DEBUG, "Precheck : failed\n"); data2hex(PKT_DATA(pkt), PKT_LEN(pkt), buf, sizeof(buf)); pa_printk(KERN_DEBUG, "Data : %s\n", buf); pa_show_pkt_info(info, pa_printk, KERN_DEBUG); pa_show_pkt_match(&match, 0, 0, pa_printk, KERN_DEBUG); } goto tx_bypass; // AVM_PA_TX_BYPASS; } if ((session = pa_session_search(ipid, &info->match)) == 0) { if ((session = pa_session_alloc(&info->match)) == 0) { pa_session_gc(0); /* try to get space for the new session */ if ((session = pa_session_alloc(&info->match)) == 0) { if (sk) ctx->stats.local_sess_error++; else ctx->stats.tx_sess_error++; return AVM_PA_TX_ERROR_SESSION; } } /* Session State: CREATE */ session->ingress_pid_handle = info->ingress_pid_handle; session->ingress_vpid_handle = info->ingress_vpid_handle; session->ingress_priority = pa_get_ingress_priority_from_pkt_mark(pkt->mark); session->routed = info->routed ? 1 : 0; session->negress = 0; session->bsession = 0; egress = &session->egress[session->negress++]; egress->pid_handle = pid_handle; egress->vpid_handle = info->egress_vpid_handle; egress->match = match; if (sk) { if (avm_pa_sock_is_realtime(sk)) session->realtime = 1; egress->type = avm_pa_egresstype_local; egress->local.dev = pkt->dev; egress->local.dst = dst_clone(skb_dst(pkt)); egress->local.skb_iif = SKB_IFF(pkt); } else { egress->type = avm_pa_egresstype_output; egress->output.orig_priority = pkt->priority; egress->output.priority = pkt->priority; egress->output.tack_priority = pa_calc_tack_priority(info, epid, pkt->priority); egress->output.tc_index = pkt->tc_index; egress->output.cpmac_prio = pkt_cpmac_prio(pkt); egress->output.skb_iif = SKB_IFF(pkt); egress->output.vlan_tci = pkt->vlan_tci; #ifdef AVM_PA_SKBUFF_HAS_VLAN_PROTO egress->output.vlan_proto = pkt->vlan_proto; #endif if (epid->ecfg.cb_len) { memcpy(egress->output.cb, &pkt->cb[epid->ecfg.cb_start], epid->ecfg.cb_len); } } if (ethh) egress->destmac = pa_macaddr_link(ethh->h_dest, pid_handle); /* Bridged session are more efficient, but subject to a few restrictions: * - ethernet header must match, and nothing else * - packets must be bridged, not routed (obviously) * - must be unicast as broadcast/multicast means multiple egress, which might require * different framings or even local input, which make plain bridging impossible * - avm_pa_use_protocol_specific_session() wasn't used to enforce normal sessions * If all conditions are met, bridged sessions can use a few shortcuts such * as skipping data modification entirely. */ if ( ethh && info->routed == 0 && info->match.casttype == AVM_PA_IS_UNICAST && info->use_protocol_specific == 0 && pa_match_cmp(&info->match, &match) == 0) { session->timeout = ctx->bridge_timeout_secs*HZ; session->bsession = pa_bsession_alloc(hash, ethh, session->session_handle); pa_change_to_bridge_match(&session->ingress); pa_change_to_bridge_match(&egress->match); session->mod.protocol = ethh->h_proto; egress->pppoe_offset = AVM_PA_OFFSET_NOT_SET; egress->push_l2_len = 0; egress->mtu = 0xffff; } else { if (egress->type == avm_pa_egresstype_output) { egress->output.priority = pa_calc_start_priority(info, epid, pkt->priority); if (egress->output.priority != egress->output.orig_priority) { session->prioack_check = 1; /* pa_session_prioack_check() will check priority */ pkt->priority = egress->output.priority; if (ctx->dbgprioack) { pa_printk(KERN_DEBUG, "avm_pa: session %d: priority %x:%x TGET (orignal %x:%x)\n", session->session_handle, TC_H_MAJ(egress->output.priority)>>16, TC_H_MIN(egress->output.priority), TC_H_MAJ(egress->output.orig_priority)>>16, TC_H_MIN(egress->output.orig_priority)); } } } (void)pa_calc_modify(session, &info->match, &match); if (match.encap_offset == AVM_PA_OFFSET_NOT_SET) egress->push_l2_len = match.ip_offset; else egress->push_l2_len = match.encap_offset; headroom = (session->mod.push_encap_len + egress->push_l2_len) - (session->mod.pull_l2_len + session->mod.pull_encap_len); if (headroom > 0 && headroom > session->needed_headroom) session->needed_headroom = headroom; egress->pppoe_offset = match.pppoe_offset; if (egress->pppoe_offset != AVM_PA_OFFSET_NOT_SET) egress->pppoe_hdrlen = egress->pppoe_offset + sizeof(struct pppoehdr); egress->mtu = epid->cfg.default_mtu; if (egress->vpid_handle) { evpid = PA_VPID(ctx, egress->vpid_handle); if (session->mod.protocol == constant_htons(ETH_P_IP)) { if (evpid->cfg.v4_mtu < egress->mtu) egress->mtu = evpid->cfg.v4_mtu; } else if (session->mod.protocol == constant_htons(ETH_P_IPV6)) { if (evpid->cfg.v6_mtu < egress->mtu) egress->mtu = evpid->cfg.v6_mtu; } } } #ifdef CONFIG_AVM_GENERIC_CONNTRACK if (pkt->generic_ct) { session->generic_ct = generic_ct_get(pkt->generic_ct); session->generic_ct_dir = skb_get_ct_dir(pkt); /* don't do generic_ct_sessionid_set() yet because the session is not * activated yet, so don't use the session_handle yet */ } #endif #if AVM_PA_TRACE if (ctx->dbgtrace) { pa_printk(KERN_DEBUG, "avm_pa: add session %d (%s)\n", session->session_handle, ipid->cfg.name); } #endif if (ctx->dbgsession) { if (session->bsession) { pa_printk(KERN_DEBUG, "\navm_pa: new bsession:\n"); pa_show_bsession(session->bsession, pa_printk, KERN_DEBUG); } else { pa_printk(KERN_DEBUG, "\navm_pa: new session:\n"); pa_show_session(session, pa_printk, KERN_DEBUG); } } /* activate guarantees that only one session of a kind exists but it also * hands over the session to the lookup so that newer packets (perhaps * on another CPU) can already use this session before we return */ if (!pa_session_activate(session)) return AVM_PA_TX_SESSION_EXISTS; /* Session State: ACTIVE */ #ifdef CONFIG_AVM_GENERIC_CONNTRACK if (pkt->generic_ct) { generic_ct_sessionid_set(session->generic_ct, session->generic_ct_dir, (void *)(unsigned long)(session->session_handle)); } #endif /* * Add session to hardware is done after activate the session to not confuse hardware pa * with multiple, equal sessions (in case of race condition with another CPU). * This means that, possibly, the prioack check is done for later packets and * not the first but this is not a problem as long as the session doesn't go in_hw * * Only add session to hardware here if: * a) It's not a local session, because HW-PA seems to have a problem with acceleration * to local system (see JZ-26496 stockendes Internet) * b) prioack_check is NOT set. HW-PA dit't provide packet and byte counters so we * must use counters from software acceleration. * c) HW-PA is available * d) HW-PA is not disabled */ if ( !sk && !session->prioack_check // a) + b) && ctx->hardware_pa.add_session && !ctx->hw_ppa_disabled) { // c) + d) /* Have to guard against writers that may kill the session concurrently */ AVM_PA_READ_LOCK(); if (session->lru == AVM_PA_LRU_ACTIVE) { if ((*ctx->hardware_pa.add_session)(session) == AVM_PA_TX_SESSION_ADDED) session->in_hw = 1; } AVM_PA_READ_UNLOCK(); } avm_pa_set_associated_session_handle(session); #ifdef AVM_PA_HAS_GUEST_STATS /* * MDU: JZ-28867: guest statistics in upstream (Online-Monitor): * * In order to visualize the upstream guest statistics in the Online-Monitor, * we need to count the statistics of all AVM PA upstream sessions carrying guest traffic. * We label each pair of up- and corresponding downstream session as guest (session->is_guest = 1) * by examining if the ingress priority (skb->mark) of the the downstream session * was set to INGRESS_MARK_GUEST_NET via the tc ingress filter attached to the dsl interface. * This will not work for upstream only flows, as for instance a pure UDP upload * as there will be no corresponding downstream session. * We will later on address this issue with an egress filter attached to the * dsl interface which marks all skbs of the flow as guest traffic. */ if (session->ingress_priority == 2) { /* 2 means guest (see ar7/dsld/dsld.c for INGRESS_MARK_GUEST_NET) */ session->is_guest = 1; if (session->associated_session_handle) { struct avm_pa_session *asession = PA_SESSION(ctx, session->associated_session_handle); asession->is_guest = 1; } } else if (session->associated_session_handle) { struct avm_pa_session *asession = PA_SESSION(ctx, session->associated_session_handle); if (asession->ingress_priority == 2) { session->is_guest = 1; } } #endif if (sk) ctx->stats.local_sess_ok++; else ctx->stats.tx_sess_ok++; info->session_handle = session->session_handle; return AVM_PA_TX_SESSION_ADDED; } /* * It's a slow packet with existing session, this happens in case of * active packet tracing or batched rx processing (i.e. GRX). */ info->session_handle = session->session_handle; for (negress = 0; negress < session->negress; negress++) { egress = &session->egress[negress]; if ( egress->pid_handle == pid_handle && egress->vpid_handle == info->egress_vpid_handle && pa_match_cmp(&egress->match, &match) == 0) { if (sk) { ctx->stats.local_sess_exists++; } else { ctx->stats.tx_sess_exists++; } pa_session_update(session); /* use priority we decide to use for this egress */ if (egress->type == avm_pa_egresstype_output) pkt->priority = egress->output.priority; return AVM_PA_TX_SESSION_EXISTS; } } /* * Atomically allocate an egress (fixes JZ-26868, caused by concurrent writes * on one egress). Use spin_lock instead of atomic to avoid costly atomic_read() * in the acceelerated path. * * The egress is allocated but not fully initialized yet. Setting egress->pid_handle * at the end completes initialization which must be checked by other code * paths. However, some fields like mtu or destmac can be accessed regardless because * they are either valid or 0. */ egress = NULL; AVM_PA_WRITE_LOCK(); if (session->negress < AVM_PA_MAX_EGRESS) egress = &session->egress[session->negress++]; AVM_PA_WRITE_UNLOCK(); if (egress) { /* pid_handle is assigned at last, see below */ u16 mtu; egress->vpid_handle = info->egress_vpid_handle; egress->match = match; if (sk) { if (avm_pa_sock_is_realtime(sk)) session->realtime = 1; egress->type = avm_pa_egresstype_local; egress->local.dev = pkt->dev; egress->local.dst = dst_clone(skb_dst(pkt)); egress->local.skb_iif = SKB_IFF(pkt); } else { egress->type = avm_pa_egresstype_output; egress->output.orig_priority = pkt->priority; egress->output.priority = pkt->priority; egress->output.tack_priority = pkt->priority; egress->output.tc_index = pkt->tc_index; egress->output.cpmac_prio = pkt_cpmac_prio(pkt); egress->output.skb_iif = SKB_IFF(pkt); } if (ethh) egress->destmac = pa_macaddr_link(ethh->h_dest, pid_handle); mtu = epid->cfg.default_mtu; if (egress->vpid_handle) { evpid = PA_VPID(ctx, egress->vpid_handle); if (session->mod.protocol == constant_htons(ETH_P_IP)) { if (evpid->cfg.v4_mtu < mtu) mtu = evpid->cfg.v4_mtu; } else if (session->mod.protocol == constant_htons(ETH_P_IPV6)) { if (evpid->cfg.v6_mtu < mtu) mtu = evpid->cfg.v6_mtu; } } if (session->bsession) { egress->pppoe_offset = AVM_PA_OFFSET_NOT_SET; egress->push_l2_len = 0; mtu = 0xffff; } else { /* * currently we do only TACK/TGET handling only on egress[0]. * So we keep SKBs original priority. * 2016-10-14 calle */ if (match.encap_offset == AVM_PA_OFFSET_NOT_SET) egress->push_l2_len = match.ip_offset; else egress->push_l2_len = match.encap_offset; headroom = (session->mod.push_encap_len + egress->push_l2_len) - (session->mod.pull_l2_len + session->mod.pull_encap_len); if (headroom > 0 && headroom > session->needed_headroom) session->needed_headroom = headroom; egress->pppoe_offset = match.pppoe_offset; if (egress->pppoe_offset != AVM_PA_OFFSET_NOT_SET) egress->pppoe_hdrlen = egress->pppoe_offset + sizeof(struct pppoehdr); mtu = epid->cfg.default_mtu; } egress->mtu = mtu; /* Assign pid handle at last, as an indicator that the egress became valid. * This is needed here because the session may be used already by a newer * packet (session state is ACTIVE here). */ egress->pid_handle = pid_handle; if (session->in_hw) { pa_session_handle_stats(session); if ( ctx->hardware_pa.change_session && egress->type == avm_pa_egresstype_output) { if ((*ctx->hardware_pa.change_session)(session) != AVM_PA_TX_EGRESS_ADDED) session->in_hw = 0; } else { (*ctx->hardware_pa.remove_session)(session); session->in_hw = 0; } } ctx->stats.tx_egress_ok++; if (ctx->dbgsession) { pa_printk(KERN_DEBUG, "\navm_pa: new egress:\n"); pa_show_session(session, pa_printk, KERN_DEBUG); } return AVM_PA_TX_EGRESS_ADDED; } ctx->stats.tx_egress_error++; return AVM_PA_TX_ERROR_EGRESS; tx_bypass: /* * set TACK priority for TCP control and ack only packets * 2016-10-14 calle */ if (!sk && avm_pa_pid_tack_enabled(epid)) { if (info->match.syn || info->match.fin || info->match.ack_only) { pkt->priority = pa_calc_tack_priority(info, epid, pkt->priority); epid->prioack_acks++; } } return AVM_PA_TX_BYPASS; } int avm_pa_dev_snoop_transmit(struct avm_pa_dev_info *devinfo, PKT *pkt) { if (devinfo->vpid_handle) avm_pa_vpid_snoop_transmit(devinfo->vpid_handle, pkt); if (devinfo->pid_handle) return avm_pa_pid_snoop_transmit(devinfo->pid_handle, pkt, 0); return AVM_PA_TX_OK; } EXPORT_SYMBOL(avm_pa_dev_snoop_transmit); void avm_pa_dev_vpid_snoop_transmit(struct avm_pa_dev_info *devinfo, PKT *pkt) { if (devinfo->vpid_handle) avm_pa_vpid_snoop_transmit(devinfo->vpid_handle, pkt); } EXPORT_SYMBOL(avm_pa_dev_vpid_snoop_transmit); void _avm_pa_add_local_session(PKT *pkt, struct sock *sk) { (void)avm_pa_pid_snoop_transmit(AVM_PKT_INFO(pkt)->ptype_pid_handle, pkt, sk); } EXPORT_SYMBOL(_avm_pa_add_local_session); void avm_pa_add_rtp_session(PKT *pkt, struct sock *sk, void (*transmit)(struct sock *sk, PKT *pkt)) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pkt_info *info = AVM_PKT_INFO(pkt); struct avm_pa_session *session; struct avm_pa_egress *egress; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); session = pa_session_get_unlocked(info->session_handle); if (session == 0 || session->negress != 1) goto unlock; egress = &session->egress[0]; if (egress->type != avm_pa_egresstype_local) { if (egress->type == avm_pa_egresstype_rtp) ctx->stats.rtp_sess_exists++; else ctx->stats.rtp_sess_error++; goto unlock; } session->realtime = 1; egress->type = avm_pa_egresstype_rtp; egress->rtp.dev = pkt->dev; egress->rtp.skb_iif = SKB_IFF(pkt); sock_hold(sk); egress->rtp.sk = sk; egress->rtp.transmit = transmit; ctx->stats.rtp_sess_ok++; unlock: AVM_PA_WRITE_UNLOCK(); } EXPORT_SYMBOL(avm_pa_add_rtp_session); void avm_pa_filter_packet(PKT *pkt) { (void)avm_pa_pid_snoop_transmit(AVM_PKT_INFO(pkt)->ptype_pid_handle, pkt, 0); } EXPORT_SYMBOL(avm_pa_filter_packet); int avm_pa_dev_pidhandle_register_with_ingress(struct avm_pa_dev_info *devinfo, avm_pid_handle pid_handle, struct avm_pa_pid_cfg *cfg, avm_pid_handle ingress_pid_handle) { struct avm_pa_global *ctx = &pa_glob; avm_pid_handle n; if (devinfo->pid_handle) { if (pid_handle && devinfo->pid_handle != pid_handle) return -1; n = devinfo->pid_handle; goto slot_found; } if (pid_handle) { n = pid_handle; goto slot_found; } for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) { if (strncmp(cfg->name, PA_PID(ctx, n)->cfg.name, AVM_PA_MAX_NAME) == 0) goto slot_found; } for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) { if (PA_PID(ctx, n)->pid_handle == 0) goto slot_found; } return -1; slot_found: if (ingress_pid_handle) { if (PA_PID(ctx, ingress_pid_handle)->pid_handle != ingress_pid_handle) return -1; PA_PID(ctx, n)->ingress_pid_handle = ingress_pid_handle; } else { PA_PID(ctx, n)->ingress_pid_handle = n; } if (cfg->default_mtu == 0) cfg->default_mtu = 1500; PA_PID(ctx, n)->pid_handle = n; PA_PID(ctx, n)->cfg = *cfg; memset(&PA_PID(ctx, n)->ecfg, 0, sizeof(PA_PID(ctx, n)->ecfg)); PA_PID(ctx, n)->ingress_framing = cfg->framing; switch (cfg->framing) { case avm_pa_framing_llcsnap: case avm_pa_framing_ether: case avm_pa_framing_ppp: case avm_pa_framing_ip: PA_PID(ctx, n)->egress_framing = cfg->framing; PA_PID(ctx, n)->cfg.ptype = 0; break; case avm_pa_framing_dev: PA_PID(ctx, n)->egress_framing = avm_pa_framing_ether; PA_PID(ctx, n)->cfg.ptype = 0; break; case avm_pa_framing_ptype: PA_PID(ctx, n)->egress_framing = cfg->framing; PA_PID(ctx, n)->cfg.tx_func = 0; PA_PID(ctx, n)->cfg.tx_arg = 0; break; } PA_PID(ctx, n)->hw = 0; devinfo->pid_handle = n; if (PA_PID(ctx, n)->egress_framing == avm_pa_framing_ptype) avm_pa_pid_activate_hw_accelaration(n); return 0; } EXPORT_SYMBOL(avm_pa_dev_pidhandle_register_with_ingress); int avm_pa_dev_pidhandle_register(struct avm_pa_dev_info *devinfo, avm_pid_handle pid_handle, struct avm_pa_pid_cfg *cfg) { return avm_pa_dev_pidhandle_register_with_ingress(devinfo, pid_handle, cfg, 0); } EXPORT_SYMBOL(avm_pa_dev_pidhandle_register); int avm_pa_dev_pid_register_with_ingress(struct avm_pa_dev_info *devinfo, struct avm_pa_pid_cfg *cfg, avm_pid_handle ingress_pid_handle) { return avm_pa_dev_pidhandle_register_with_ingress(devinfo, 0, cfg, ingress_pid_handle); } EXPORT_SYMBOL(avm_pa_dev_pid_register_with_ingress); int avm_pa_dev_pid_register(struct avm_pa_dev_info *devinfo, struct avm_pa_pid_cfg *cfg) { return avm_pa_dev_pidhandle_register_with_ingress(devinfo, 0, cfg, 0); } EXPORT_SYMBOL(avm_pa_dev_pid_register); #ifdef CONFIG_AVM_PA_TX_NAPI int avm_pa_dev_pid_register_tx_napi(struct avm_pa_dev_info *devinfo, struct avm_pa_pid_cfg *cfg, struct net_device *dev) { int ret; ret = avm_pa_dev_pidhandle_register_with_ingress(devinfo, 0, cfg, 0); if (!ret) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, devinfo->pid_handle); netif_napi_add(dev, &pid->tx_napi, pa_dev_tx_napi_poll, TX_NAPI_BUDGET); napi_enable(&pid->tx_napi); skb_queue_head_init(&pid->tx_napi_pkts); #ifdef CONFIG_SMP tasklet_init(&pid->tx_napi_tsk, (void *) __do_schedule_napi, (unsigned long) &pid->tx_napi); #endif } return ret; } EXPORT_SYMBOL(avm_pa_dev_pid_register_tx_napi); #endif int avm_pa_pid_set_ecfg(avm_pid_handle pid_handle, struct avm_pa_pid_ecfg *ecfg) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); unsigned int cbsize = sizeof(((struct sk_buff *)0)->cb); if (pid->pid_handle != pid_handle) return -1; memset(&pid->ecfg, 0, sizeof(struct avm_pa_pid_ecfg)); switch (ecfg->version) { case 3: pid->ecfg.pid_group = ecfg->pid_group; case 2: pid->ecfg.rx_slow = ecfg->rx_slow; pid->ecfg.rx_slow_arg = ecfg->rx_slow_arg; case 1: pid->ecfg.cb_start = ecfg->cb_start; pid->ecfg.cb_len = ecfg->cb_len; case 0: pid->ecfg.flags = ecfg->flags; } if (pid->ecfg.cb_start + pid->ecfg.cb_len > cbsize) return -2; pid->ecfg.version = ecfg->version; return 0; } EXPORT_SYMBOL(avm_pa_pid_set_ecfg); int avm_pa_pid_set_framing(avm_pid_handle pid_handle, enum avm_pa_framing ingress_framing, enum avm_pa_framing egress_framing) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); if (pid->pid_handle != pid_handle) return -1; switch (ingress_framing) { case avm_pa_framing_llcsnap: case avm_pa_framing_ether: case avm_pa_framing_ppp: case avm_pa_framing_ip: case avm_pa_framing_dev: pid->ingress_framing = ingress_framing; pid->cfg.ptype = 0; break; case avm_pa_framing_ptype: if (pid->ingress_framing != ingress_framing) return -2; pid->cfg.tx_func = 0; pid->cfg.tx_arg = 0; break; } switch (egress_framing) { case avm_pa_framing_llcsnap: case avm_pa_framing_ether: case avm_pa_framing_ppp: case avm_pa_framing_ip: pid->egress_framing = egress_framing; pid->cfg.ptype = 0; break; case avm_pa_framing_dev: pid->egress_framing = avm_pa_framing_ether; pid->cfg.ptype = 0; break; case avm_pa_framing_ptype: return -3; } return 0; } EXPORT_SYMBOL(avm_pa_pid_set_framing); static void pa_show_pids(pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; char buf[128]; avm_pid_handle n; unsigned int i; for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) { struct avm_pa_pid *pid = PA_PID(ctx, n); if (pid->pid_handle == 0) continue; if (pid->ingress_pid_handle == pid->pid_handle) { (*fprintffunc)(arg, "PID%-3d: (%5d) %-5s %-5s %10lu %s %s\n", pid->pid_handle, pid->cfg.default_mtu, framing2str(pid->ingress_framing), framing2str(pid->egress_framing), (unsigned long)pid->tx_pkts, pid->cfg.name, pidflags2str(pid->ecfg.flags, buf, sizeof(buf))); } else { (*fprintffunc)(arg, "PID%-3d: (%5d) %-5s %-5s %10lu %s (ingress %d %s) %s\n", pid->pid_handle, pid->cfg.default_mtu, framing2str(pid->ingress_framing), framing2str(pid->egress_framing), (unsigned long)pid->tx_pkts, pid->cfg.name, pid->ingress_pid_handle, PA_PID(ctx, pid->ingress_pid_handle)->cfg.name, pidflags2str(pid->ecfg.flags, buf, sizeof(buf))); } if (pid->rx_channel_activated || pid->tx_channel_activated) { (*fprintffunc)(arg, " rx_channel %d tx_channel %d\n", pid->rx_channel_activated ? 1 : 0, pid->tx_channel_activated ? 1 : 0); } if (avm_pa_pid_tack_enabled(pid) || avm_pa_pid_tget_enabled(pid)) { for (i = 0; i < AVM_PA_MAX_PRIOS; ++i) { unsigned int tackprio = 0; unsigned int tgetprio = 0; if (avm_pa_pid_tack_enabled(pid)) tackprio = avm_pa_pid_tack_prio(pid, i); if (avm_pa_pid_tget_enabled(pid)) tgetprio = avm_pa_pid_tget_prio(pid, i); if (tackprio || tgetprio) { (*fprintffunc)(arg, " prio[%u]:", i); if (tackprio) (*fprintffunc)(arg, " tack_prio = 0x%x", tackprio); if (tgetprio) (*fprintffunc)(arg, " tack_prio = 0x%x", tgetprio); (*fprintffunc)(arg, "\n"); } } } } } int avm_pa_dev_vpidhandle_register(struct avm_pa_dev_info *devinfo, avm_vpid_handle vpid_handle, struct avm_pa_vpid_cfg *cfg) { struct avm_pa_global *ctx = &pa_glob; avm_vpid_handle n; if (devinfo->vpid_handle) { if (vpid_handle && devinfo->vpid_handle != vpid_handle) return 0; n = devinfo->vpid_handle; goto slot_found; } if (vpid_handle) { n = vpid_handle; goto slot_found; } for (n=1; n < CONFIG_AVM_PA_MAX_VPID; n++) { if (strncmp(cfg->name, PA_VPID(ctx, n)->cfg.name, AVM_PA_MAX_NAME) == 0) { goto slot_found; } } for (n=1; n < CONFIG_AVM_PA_MAX_VPID; n++) { if (PA_VPID(ctx, n)->vpid_handle == 0) goto slot_found; } return -1; slot_found: if (cfg->v4_mtu == 0) cfg->v4_mtu = 1500; if (cfg->v6_mtu == 0) cfg->v6_mtu = 1500; PA_VPID(ctx, n)->vpid_handle = n; PA_VPID(ctx, n)->cfg = *cfg; memset(&PA_VPID(ctx, n)->stats, 0, sizeof(struct avm_pa_vpid_stats)); memset(PA_VPID(ctx, n)->sw_stats, 0, sizeof(PA_VPID(ctx, n)->sw_stats)); memset(PA_VPID(ctx, n)->hw_stats, 0, sizeof(PA_VPID(ctx, n)->hw_stats)); devinfo->vpid_handle = n; return 0; } EXPORT_SYMBOL(avm_pa_dev_vpidhandle_register); int avm_pa_dev_vpid_register(struct avm_pa_dev_info *devinfo, struct avm_pa_vpid_cfg *cfg) { return avm_pa_dev_vpidhandle_register(devinfo, 0, cfg); } EXPORT_SYMBOL(avm_pa_dev_vpid_register); void avm_pa_dev_unregister(struct avm_pa_dev_info *devinfo) { struct avm_pa_global *ctx = &pa_glob; (void)avm_pa_dev_reset_stats(devinfo); if (devinfo->pid_handle) { struct avm_pa_pid *pid = PA_PID(ctx, devinfo->pid_handle); if (devinfo->pid_handle == pid->pid_handle) { struct avm_hardware_pa *hwpa = &ctx->hardware_pa; avm_pid_handle n; /* free virtual channels */ if (pid->tx_channel_activated && hwpa->free_tx_channel) { hwpa->free_tx_channel( pid->pid_handle ); pid->tx_channel_activated = 0; } if (pid->rx_channel_activated && hwpa->free_rx_channel) { hwpa->free_rx_channel( pid->pid_handle ); pid->rx_channel_activated = 0; } if (pid->hw) kfree(pid->hw); /* keep cfg, for reuse by name */ pid->pid_handle = 0; pid->hw = 0; /* check if pid is used as ingress pid */ for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) { pid = PA_PID(ctx, n); if (pid->ingress_pid_handle == devinfo->pid_handle) pid->ingress_pid_handle = pid->pid_handle; } avm_pa_flush_sessions_for_pid(devinfo->pid_handle); clear_pid_selector(ctx, devinfo->pid_handle, 0); } devinfo->pid_handle = 0; } if (devinfo->vpid_handle) { struct avm_pa_vpid *vpid = PA_VPID(ctx, devinfo->vpid_handle); if (devinfo->vpid_handle == vpid->vpid_handle) { avm_pa_flush_sessions_for_vpid(devinfo->vpid_handle); /* keep cfg, for reuse by name */ vpid->vpid_handle = 0; clear_pid_selector(ctx, devinfo->vpid_handle, 1); } devinfo->vpid_handle = 0; } } EXPORT_SYMBOL(avm_pa_dev_unregister); int avm_pa_pid_set_hwinfo(avm_pid_handle pid_handle, struct avm_pa_pid_hwinfo *hw) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); if (pid_handle != pid->pid_handle) { printk(KERN_ERR "avm_pa_pid_set_hwinfo: pid %u not registered\n", pid_handle); return -1; } pid->hw = kmalloc(sizeof(struct avm_pa_pid_hwinfo), GFP_ATOMIC); if ( !pid->hw ) { printk(KERN_ERR "avm_pa_pid_set_hwinfo: kmalloc failed\n"); return -1; } memcpy(pid->hw, hw, sizeof(struct avm_pa_pid_hwinfo)); return 0; } EXPORT_SYMBOL(avm_pa_pid_set_hwinfo); struct avm_pa_pid_hwinfo *avm_pa_pid_get_hwinfo(avm_pid_handle pid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); if (pid_handle != pid->pid_handle) { if (net_ratelimit()) printk(KERN_ERR "avm_pa_pid_get_hwinfo: pid %u not registered\n", pid_handle); return 0; } pid = PA_PID(ctx, pid_handle); return pid->hw; } EXPORT_SYMBOL(avm_pa_pid_get_hwinfo); int avm_pa_pid_activate_hw_accelaration(avm_pid_handle pid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); struct avm_hardware_pa *hwpa; if (pid_handle != pid->pid_handle) { printk(KERN_ERR "avm_pa_pid_activate_hw_accelaration: pid %u not registered\n", pid_handle); return -1; } printk(KERN_INFO "avm_pa: try to activate hw accelaration for pid %u (%s)\n", pid_handle, pid->cfg.name); hwpa = &ctx->hardware_pa; if ( pid->rx_channel_activated == 0 && pid->ingress_framing == avm_pa_framing_ether && pid->ecfg.rx_slow && hwpa->alloc_rx_channel) { if ((*hwpa->alloc_rx_channel)(pid_handle) < 0) { printk(KERN_ERR "avm_pa: can't activate rx channel, pid %u (%s)\n", pid_handle, pid->cfg.name); } else { pid->rx_channel_activated = 1; printk(KERN_INFO "avm_pa: rx channel activated, pid %u (%s)\n", pid_handle, pid->cfg.name); } } if ( pid->tx_channel_activated == 0 && ( pid->egress_framing == avm_pa_framing_ether || pid->egress_framing == avm_pa_framing_ptype) && hwpa->alloc_tx_channel) { if ((*hwpa->alloc_tx_channel)(pid_handle) < 0) { printk(KERN_ERR "avm_pa: can't activate tx channel, pid %u (%s)\n", pid_handle, pid->cfg.name); } else { pid->tx_channel_activated = 1; printk(KERN_INFO "avm_pa: tx channel activated, pid %u (%s)\n", pid_handle, pid->cfg.name); } } return 0; } EXPORT_SYMBOL(avm_pa_pid_activate_hw_accelaration); /* Enables or disables a priority map */ int avm_pa_pid_prio_map_enable(avm_pid_handle pid_handle, unsigned short prio_map, int enable) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); if (pid_handle != pid->pid_handle) { printk(KERN_ERR "%s: pid handle %u not registered\n", __FUNCTION__, pid_handle); return -1; } if (prio_map >= AVM_PA_COUNT_PRIO_MAPS) { printk(KERN_ERR "%s: prio map %hu does not exist\n", __FUNCTION__, prio_map); return -2; } pid->prio_maps[prio_map].enabled = enable ? 1 : 0; return 0; } EXPORT_SYMBOL(avm_pa_pid_prio_map_enable); /* Resets a priority map */ int avm_pa_pid_prio_map_reset(avm_pid_handle pid_handle, unsigned short prio_map) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); if (pid_handle != pid->pid_handle) { printk(KERN_ERR "%s: pid handle %u not registered\n", __FUNCTION__, pid_handle); return -1; } if (prio_map >= AVM_PA_COUNT_PRIO_MAPS) { printk(KERN_ERR "%s: prio map %hu does not exist\n", __FUNCTION__, prio_map); return -2; } memset(pid->prio_maps[prio_map].prios, 0, sizeof(pid->prio_maps[prio_map].prios)); return 0; } EXPORT_SYMBOL(avm_pa_pid_prio_map_reset); /* Sets the priority per queue */ int avm_pa_pid_prio_map_set_prio_per_queue(avm_pid_handle pid_handle, unsigned short prio_map, unsigned int queue, unsigned int prio) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); if (pid_handle != pid->pid_handle) { printk(KERN_ERR "%s: pid handle %u not registered\n", __FUNCTION__, pid_handle); return -1; } if (prio_map >= AVM_PA_COUNT_PRIO_MAPS) { printk(KERN_ERR "%s: prio map %hu does not exist\n", __FUNCTION__, prio_map); return -2; } if (queue >= AVM_PA_MAX_PRIOS) { printk(KERN_ERR "%s: prio map %hu queue %u out of bounds\n", __FUNCTION__, prio_map, queue); return -3; } /* A value of 0 for the prio parameter will render the underlying priority * unspecified. An unspecified priority will not be used for setting any * skb priority. */ pid->prio_maps[prio_map].prios[queue] = prio; return 0; } EXPORT_SYMBOL(avm_pa_pid_prio_map_set_prio_per_queue); int avm_pa_pid_activate_tcpackprio(avm_pid_handle pid_handle, int enable, unsigned int prio) { /* Enable / disable the tack priority map to retain backwards compatibility with the old prioack procfs interface */ if (avm_pa_pid_prio_map_enable(pid_handle, AVM_PA_PRIO_MAP_TACK, enable)) { return -1; } return avm_pa_pid_prio_map_set_prio_per_queue(pid_handle, AVM_PA_PRIO_MAP_TACK, AVM_PA_BE_QUEUE, enable ? prio : 0); } EXPORT_SYMBOL(avm_pa_pid_activate_tcpackprio); int avm_pa_pid_activate_tgetprio(avm_pid_handle pid_handle, int enable, unsigned int prio) { /* Enable / disable the tget priority map to retain backwards compatibility with the old prioack procfs interface */ if (avm_pa_pid_prio_map_enable(pid_handle, AVM_PA_PRIO_MAP_TGET, enable)) { return -1; } return avm_pa_pid_prio_map_set_prio_per_queue(pid_handle, AVM_PA_PRIO_MAP_TGET, AVM_PA_BE_QUEUE, enable ? prio : 0); } EXPORT_SYMBOL(avm_pa_pid_activate_tgetprio); #ifndef AVM_PA_NO_REPORT_FUNCTION void avm_pa_hardware_session_report(avm_session_handle session_handle, u32 pkts, u64 bytes) { struct avm_pa_session *session; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); if ( (session = pa_session_get_unlocked(session_handle)) == 0 || session->lru != AVM_PA_LRU_ACTIVE) { AVM_PA_WRITE_UNLOCK(); if (net_ratelimit()) printk(KERN_ERR "avm_pa_hardware_session_report: no session %u\n", session_handle); return; } session->hw_stats.tx_pkts += pkts; session->hw_stats.tx_bytes += bytes; AVM_PA_WRITE_UNLOCK(); } EXPORT_SYMBOL(avm_pa_hardware_session_report); #endif void avm_pa_register_hardware_pa(struct avm_hardware_pa *pa_functions) { struct avm_pa_global *ctx = &pa_glob; if (pa_functions) { ctx->hardware_pa = *pa_functions; if (pa_functions->alloc_tx_channel || pa_functions->alloc_rx_channel) { avm_pid_handle n; for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) { struct avm_pa_pid *pid = PA_PID(ctx, n); if ( pid->pid_handle == n && pid->egress_framing == avm_pa_framing_ptype) { avm_pa_pid_activate_hw_accelaration(n); } } } } else { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session, *next; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); /* stop adding hw sessions */ ctx->hardware_pa.add_session = 0; /* kill all sessions in hw pa */ session = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_head; while (session) { next = session->lru_next; if (session->in_hw) { pa_session_flush_unlocked(session, "unregister hw pa"); ctx->stats.sess_flushed++; } session = next; } /* remove hw session from dead sessions */ session = ctx->sess_lru[AVM_PA_LRU_DEAD].lru_head; while (session) { next = session->lru_next; session->in_hw = 0; avm_pa_set_hw_session(session, NULL); session = next; } AVM_PA_WRITE_UNLOCK(); /* delete hw pa functions */ memset(&ctx->hardware_pa, 0, sizeof(struct avm_hardware_pa)); } } EXPORT_SYMBOL(avm_pa_register_hardware_pa); /* ------------------------------------------------------------------------ */ static void pa_show_brief(pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; avm_vpid_handle n; char *mode; (*fprintffunc)(arg, "Version : For Linux %s ( %s )\n", UTS_RELEASE, UTS_VERSION); if (ctx->disabled) mode = "disabled"; else if (ctx->fw_disabled) mode = "testmode"; else if (avm_pa_capture_running()) mode = "capture"; else mode = "enabled"; (*fprintffunc)(arg, "State : %s\n", mode); if (ctx->hardware_pa.add_session) { mode = ctx->hw_ppa_disabled ? "disabled" : "enable"; (*fprintffunc)(arg, "HW State : %s\n", mode); } (*fprintffunc)(arg, "BSessions : %u\n", (unsigned)ctx->stats.nbsessions); (*fprintffunc)(arg, "Sessions : %hu\n", ctx->sess_lru[AVM_PA_LRU_ACTIVE].nsessions); (*fprintffunc)(arg, "Max Sessions : %hu\n", ctx->sess_lru[AVM_PA_LRU_ACTIVE].maxsessions); (*fprintffunc)(arg, "Sessions (dead): %hu\n", ctx->sess_lru[AVM_PA_LRU_DEAD].nsessions); (*fprintffunc)(arg, "Sessions (free): %hu\n", ctx->sess_lru[AVM_PA_LRU_FREE].nsessions); (*fprintffunc)(arg, "Queuelen : %lu\n", (unsigned long)skb_queue_len(&ctx->tbfqueue)); (*fprintffunc)(arg, "Rx pkts/secs : %lu\n", (unsigned long)ctx->stats.rx_pps); if (ctx->tbf_enabled) { (*fprintffunc)(arg, "Limit pkts/sec : %lu\n", (unsigned long)ctx->rate); } (*fprintffunc)(arg, "Fw pkts/sec : %lu\n", (unsigned long)ctx->stats.fw_pps); (*fprintffunc)(arg, "Ov pkts/sec : %lu\n", (unsigned long)ctx->stats.overlimit_pps); (*fprintffunc)(arg, "Rx pakets : %lu\n", (unsigned long)ctx->stats.rx_pkts); (*fprintffunc)(arg, "Rx bypass : %lu\n", (unsigned long)ctx->stats.rx_bypass); (*fprintffunc)(arg, "Rx ttl <= 1 : %lu\n", (unsigned long)ctx->stats.rx_ttl); (*fprintffunc)(arg, "Rx broadcast : %lu\n", (unsigned long)ctx->stats.rx_broadcast); (*fprintffunc)(arg, "Rx search : %lu\n", (unsigned long)ctx->stats.rx_search); (*fprintffunc)(arg, "Rx match : %lu\n", (unsigned long)ctx->stats.rx_match); (*fprintffunc)(arg, "Rx modified : %lu\n", (unsigned long)ctx->stats.rx_mod); (*fprintffunc)(arg, "Fw pakets : %lu\n", (unsigned long)ctx->stats.fw_pkts); (*fprintffunc)(arg, "Fw local : %lu\n", (unsigned long)ctx->stats.fw_local); for (n=1; n < CONFIG_AVM_PA_MAX_VPID; n++) { struct avm_pa_vpid *vpid = PA_VPID(ctx, n); unsigned long rx, tx; if (vpid->vpid_handle == 0) continue; rx = vpid->stats.rx_unicast_pkt + vpid->stats.rx_multicast_pkt + vpid->stats.rx_broadcast_pkt; tx = vpid->stats.tx_unicast_pkt + vpid->stats.tx_multicast_pkt + vpid->stats.tx_broadcast_pkt; (*fprintffunc)(arg, "VPID%-2d: RX %10lu TX %10lu %s\n", vpid->vpid_handle, rx, tx, vpid->cfg.name); } } static void pa_show_status(pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; char *mode; if (ctx->disabled) mode = "disabled"; else if (ctx->fw_disabled) mode = "testmode"; else if (avm_pa_capture_running()) mode = "capture"; else mode = "enabled"; (*fprintffunc)(arg, "State : %s\n", mode); if (ctx->hardware_pa.add_session) { mode = ctx->hw_ppa_disabled ? "disabled" : "enable"; (*fprintffunc)(arg, "HW State : %s\n", mode); } switch (ctx->load_control) { case LOADCONTROL_IDLE: mode = "idle"; break; case LOADCONTROL_POWER: mode = "power"; break; case LOADCONTROL_IRQ: mode = "irq"; break; case LOADCONTROL_POWERIRQ: mode = "powerirq"; break; default: mode = "????"; break; } (*fprintffunc)(arg, "Loadcontrol : %s\n", mode); (*fprintffunc)(arg, "IDLE mswin : %u %u\n", ctx->idle_mswin_low, ctx->idle_mswin_high); (*fprintffunc)(arg, "IRQ mswin : %u %u\n", ctx->irq_mswin_low, ctx->irq_mswin_high); (*fprintffunc)(arg, "TelephonyReduce: %u\n", ctx->telephony_reduce); (*fprintffunc)(arg, "Maxrate : %u\n", ctx->maxrate); mode = ctx->tbf_enabled ? "enabled" : "disabled"; (*fprintffunc)(arg, "TBF : %s\n", mode); (*fprintffunc)(arg, "Limit Rate : %u\n", ctx->rate); (*fprintffunc)(arg, "Current Rate : %lu\n", (unsigned long)ctx->stats.fw_pps); (*fprintffunc)(arg, "user msecs/sec : %lu\n", (unsigned long)ctx->stats.userms); (*fprintffunc)(arg, "idle msecs/sec : %lu\n", (unsigned long)ctx->stats.idlems); (*fprintffunc)(arg, "irq msecs/sec : %lu\n", (unsigned long)ctx->stats.irqms); } static void pa_show_vpids(pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; avm_vpid_handle n; for (n=1; n < CONFIG_AVM_PA_MAX_VPID; n++) { struct avm_pa_vpid *vpid = PA_VPID(ctx, n); if (vpid->vpid_handle == 0) continue; (*fprintffunc)(arg, "VPID%-2d: %4d/%4d %s\n", vpid->vpid_handle, vpid->cfg.v4_mtu, vpid->cfg.v6_mtu, vpid->cfg.name); (*fprintffunc)(arg, " %10s %10s %10s %10s %10s %10s %10s %10s\n", "unicast", "multicast", "broadcast", "discard", "error", "bytes", "mc bytes", "bc bytes"); (*fprintffunc)(arg, " RX %10lu %10lu %10lu %10lu %10s %10Lu %10Lu %10Lu\n", (unsigned long)vpid->stats.rx_unicast_pkt, (unsigned long)vpid->stats.rx_multicast_pkt, (unsigned long)vpid->stats.rx_broadcast_pkt, (unsigned long)vpid->stats.rx_discard, "-", (unsigned long long)vpid->stats.rx_bytes, (unsigned long long)vpid->stats.rx_multicast_bytes, (unsigned long long)vpid->stats.rx_broadcast_bytes); (*fprintffunc)(arg, " TX %10lu %10lu %10lu %10lu %10lu %10Lu\n", (unsigned long)vpid->stats.tx_unicast_pkt, (unsigned long)vpid->stats.tx_multicast_pkt, (unsigned long)vpid->stats.tx_broadcast_pkt, (unsigned long)vpid->stats.tx_discard, (unsigned long)vpid->stats.tx_error, (unsigned long long)vpid->stats.tx_bytes); } } static void pa_show_vpids_hw_stats(pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; avm_vpid_handle n; for (n=1; n < CONFIG_AVM_PA_MAX_VPID; n++) { struct avm_pa_vpid *vpid = PA_VPID(ctx, n); unsigned int prio; if (vpid->vpid_handle == 0) continue; (*fprintffunc)(arg, "VPID %-2d () %s\n", vpid->vpid_handle, vpid->cfg.name); for (prio = 0; prio < AVM_PA_MAX_PRIOS; prio++) { if (vpid->hw_stats[prio].pkts || vpid->hw_stats[prio].bytes) { (*fprintffunc)(arg, " %u: %lu pkts / %llu bytes\n", prio, (unsigned long)vpid->hw_stats[prio].pkts, (unsigned long long)vpid->hw_stats[prio].bytes); } } } } static void pa_show_vpids_all_stats(pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; avm_vpid_handle n; for (n=1; n < CONFIG_AVM_PA_MAX_VPID; n++) { struct avm_pa_vpid *vpid = PA_VPID(ctx, n); unsigned int prio; if (vpid->vpid_handle == 0) continue; (*fprintffunc)(arg, "VPID %-2d () %s\n", vpid->vpid_handle, vpid->cfg.name); for (prio = 0; prio < AVM_PA_MAX_PRIOS; prio++) { unsigned long sw_pkts, hw_pkts; unsigned long long sw_bytes, hw_bytes; unsigned long asw_pkts, ahw_pkts; unsigned long long asw_bytes, ahw_bytes; sw_pkts = vpid->sw_stats[prio].pkts; hw_pkts = vpid->hw_stats[prio].pkts; asw_pkts = vpid->associated_sw_stats[prio].pkts; ahw_pkts = vpid->associated_hw_stats[prio].pkts; sw_bytes = vpid->sw_stats[prio].bytes; hw_bytes = vpid->hw_stats[prio].bytes; asw_bytes = vpid->associated_sw_stats[prio].bytes; ahw_bytes = vpid->associated_hw_stats[prio].bytes; if ( sw_pkts || hw_pkts || asw_pkts || ahw_pkts || sw_bytes || hw_bytes || asw_bytes || ahw_bytes) { (*fprintffunc)(arg, " %u: pkts ", prio); (*fprintffunc)(arg, " %lu+%lu = %lu", sw_pkts, hw_pkts, sw_pkts + hw_pkts); (*fprintffunc)(arg, " / %lu+%lu = %lu\n", asw_pkts, ahw_pkts, asw_pkts + ahw_pkts); (*fprintffunc)(arg, " %u: bytes", prio); (*fprintffunc)(arg, " %llu+%llu = %llu", sw_bytes, hw_bytes, sw_bytes + hw_bytes); (*fprintffunc)(arg, " / %llu+%llu = %llu\n", asw_bytes, ahw_bytes, asw_bytes + ahw_bytes); } } } } void avm_pa_dev_set_ipv4_mtu(struct avm_pa_dev_info *devinfo, u16 mtu) { if (devinfo->vpid_handle) { struct avm_pa_global *ctx = &pa_glob; PA_VPID(ctx, devinfo->vpid_handle)->cfg.v4_mtu = mtu; } } EXPORT_SYMBOL(avm_pa_dev_set_ipv4_mtu); void avm_pa_dev_set_ipv6_mtu(struct avm_pa_dev_info *devinfo, u16 mtu) { if (devinfo->vpid_handle) { struct avm_pa_global *ctx = &pa_glob; PA_VPID(ctx, devinfo->vpid_handle)->cfg.v6_mtu = mtu; } } EXPORT_SYMBOL(avm_pa_dev_set_ipv6_mtu); int avm_pa_dev_get_stats(struct avm_pa_dev_info *devinfo, struct avm_pa_vpid_stats *stats) { if (devinfo->vpid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_vpid *vpid = PA_VPID(ctx, devinfo->vpid_handle); if (vpid->vpid_handle == devinfo->vpid_handle) { memcpy(stats, &vpid->stats, sizeof(struct avm_pa_vpid_stats)); return 0; } } memset(stats, 0, sizeof(struct avm_pa_vpid_stats)); return -1; } EXPORT_SYMBOL(avm_pa_dev_get_stats); int avm_pa_dev_get_hw_stats(struct avm_pa_dev_info *devinfo, struct avm_pa_traffic_stats *stats, unsigned int prio) { if (prio >= AVM_PA_MAX_PRIOS) return -1; if (devinfo->vpid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_vpid *vpid = PA_VPID(ctx, devinfo->vpid_handle); if (vpid->vpid_handle == devinfo->vpid_handle) { stats->pkts = vpid->hw_stats[prio].pkts; stats->bytes = vpid->hw_stats[prio].bytes; return 0; } } memset(stats, 0, sizeof(struct avm_pa_traffic_stats)); return -1; } EXPORT_SYMBOL(avm_pa_dev_get_hw_stats); #ifdef AVM_PA_HAS_GUEST_STATS int avm_pa_dev_get_hw_guest_stats(struct avm_pa_dev_info *devinfo, struct avm_pa_traffic_stats *stats, unsigned int prio) { if (prio >= AVM_PA_MAX_PRIOS) return -1; if (devinfo->vpid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_vpid *vpid = PA_VPID(ctx, devinfo->vpid_handle); if (vpid->vpid_handle == devinfo->vpid_handle) { stats->pkts = vpid->guest_hw_stats[prio].pkts; stats->bytes = vpid->guest_hw_stats[prio].bytes; return 0; } } memset(stats, 0, sizeof(struct avm_pa_traffic_stats)); return -1; } EXPORT_SYMBOL(avm_pa_dev_get_hw_guest_stats); #endif int avm_pa_dev_get_prio_stats(struct avm_pa_dev_info *devinfo, struct avm_pa_prio_stats *stats, unsigned int prio) { if (prio >= AVM_PA_MAX_PRIOS) return -1; if (devinfo->vpid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_vpid *vpid = PA_VPID(ctx, devinfo->vpid_handle); if (vpid->vpid_handle == devinfo->vpid_handle) { stats->sw = vpid->sw_stats[prio]; stats->hw = vpid->hw_stats[prio]; stats->associated_sw = vpid->associated_sw_stats[prio]; stats->associated_hw = vpid->associated_hw_stats[prio]; stats->timestamp = vpid->prio_stats_timestamp; return 0; } } memset(stats, 0, sizeof(struct avm_pa_prio_stats)); return -1; } EXPORT_SYMBOL(avm_pa_dev_get_prio_stats); #ifdef AVM_PA_HAS_GUEST_STATS int avm_pa_dev_get_guest_prio_stats(struct avm_pa_dev_info *devinfo, struct avm_pa_prio_stats *stats, unsigned int prio) { if (prio >= AVM_PA_MAX_PRIOS) return -1; if (devinfo->vpid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_vpid *vpid = PA_VPID(ctx, devinfo->vpid_handle); if (vpid->vpid_handle == devinfo->vpid_handle) { stats->sw = vpid->guest_sw_stats[prio]; stats->hw = vpid->guest_hw_stats[prio]; stats->timestamp = vpid->prio_stats_timestamp; return 0; } } memset(stats, 0, sizeof(struct avm_pa_prio_stats)); return -1; } EXPORT_SYMBOL(avm_pa_dev_get_guest_prio_stats); #endif int avm_pa_dev_get_ingress_prio_stats(struct avm_pa_dev_info *devinfo, struct avm_pa_prio_stats *stats, unsigned int prio) { if (prio >= AVM_PA_MAX_PRIOS) return -1; if (devinfo->vpid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_vpid *vpid = PA_VPID(ctx, devinfo->vpid_handle); if (vpid->vpid_handle == devinfo->vpid_handle) { stats->sw = vpid->ingress_sw_stats[prio]; stats->hw = vpid->ingress_hw_stats[prio]; stats->associated_sw = vpid->associated_ingress_sw_stats[prio]; stats->associated_hw = vpid->associated_ingress_hw_stats[prio]; stats->timestamp = vpid->prio_stats_timestamp; return 0; } } memset(stats, 0, sizeof(struct avm_pa_prio_stats)); return -1; } EXPORT_SYMBOL(avm_pa_dev_get_ingress_prio_stats); int avm_pa_dev_reset_stats(struct avm_pa_dev_info *devinfo) { if (devinfo->vpid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_vpid *vpid = PA_VPID(ctx, devinfo->vpid_handle); if (vpid->vpid_handle == devinfo->vpid_handle) { memset(&vpid->stats, 0, sizeof(struct avm_pa_vpid_stats)); memset(vpid->sw_stats, 0, sizeof(vpid->sw_stats)); memset(vpid->hw_stats, 0, sizeof(vpid->hw_stats)); return 0; } } return -1; } EXPORT_SYMBOL(avm_pa_dev_reset_stats); void avm_pa_flush_sessions(void) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); session = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_head; while (session) { pa_session_flush_unlocked(session, "flush"); ctx->stats.sess_flushed++; session = session->lru_next; } AVM_PA_WRITE_UNLOCK(); } EXPORT_SYMBOL(avm_pa_flush_sessions); void avm_pa_flush_lispencap_sessions(void) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session, *next; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); session = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_head; while (session) { next = session->lru_next; if (session->mod.pkttype & AVM_PA_PKTTYPE_LISP) { pa_session_flush_unlocked(session, "lispencap flush"); ctx->stats.sess_flushed++; } session = next; } AVM_PA_WRITE_UNLOCK(); } EXPORT_SYMBOL(avm_pa_flush_lispencap_sessions); void avm_pa_flush_rtp_session(struct sock *sk) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session, *next; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); session = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_head; while (session) { next = session->lru_next; if ( session->negress == 1 && session->egress[0].type == avm_pa_egresstype_rtp && session->egress[0].rtp.sk == sk) { pa_session_flush_unlocked(session, "rtp flush"); ctx->stats.sess_flushed++; } session = next; } AVM_PA_WRITE_UNLOCK(); } EXPORT_SYMBOL(avm_pa_flush_rtp_session); void avm_pa_flush_multicast_sessions(void) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session, *next; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); session = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_head; while (session) { next = session->lru_next; if (session->ingress.casttype == AVM_PA_IS_MULTICAST) { pa_session_flush_unlocked(session, "multicast flush"); ctx->stats.sess_flushed++; } session = next; } AVM_PA_WRITE_UNLOCK(); } EXPORT_SYMBOL(avm_pa_flush_multicast_sessions); void avm_pa_flush_multicast_sessions_for_group(u32 group) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session, *next; AVM_PA_LOCK_DECLARE; AVM_PA_WRITE_LOCK(); session = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_head; while (session) { next = session->lru_next; if (session->ingress.casttype == AVM_PA_IS_MULTICAST) { int i; for (i = 0; i < session->ingress.nmatch; i++) { struct avm_pa_match_info *p = &session->ingress.match[i]; if (p->type == AVM_PA_IPV4) { hdrunion_t *hdr = (hdrunion_t *)&session->ingress.hdrcopy[p->offset + session->ingress.hdroff]; if (group == hdr->iph.daddr) { pa_session_flush_unlocked(session, "multicast flush"); ctx->stats.sess_flushed++; break; } } } } session = next; } AVM_PA_WRITE_UNLOCK(); } EXPORT_SYMBOL(avm_pa_flush_multicast_sessions_for_group); void avm_pa_flush_sessions_for_vpid(avm_vpid_handle vpid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session, *next; AVM_PA_LOCK_DECLARE; if ( vpid_handle == 0 || PA_VPID(ctx, vpid_handle)->vpid_handle != vpid_handle) return; AVM_PA_WRITE_LOCK(); session = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_head; while (session) { next = session->lru_next; if (session->ingress_vpid_handle == vpid_handle) { pa_session_flush_unlocked(session, "ingress vpid flush"); ctx->stats.sess_flushed++; } else { int negress; for (negress = 0; negress < session->negress; negress++) { struct avm_pa_egress *egress = &session->egress[negress]; if (egress->vpid_handle == vpid_handle) { pa_session_flush_unlocked(session, "egress vpid flush"); ctx->stats.sess_flushed++; break; } } } session = next; } AVM_PA_WRITE_UNLOCK(); } EXPORT_SYMBOL(avm_pa_flush_sessions_for_vpid); void avm_pa_flush_sessions_for_pid(avm_pid_handle pid_handle) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_session *session, *next; AVM_PA_LOCK_DECLARE; if ( pid_handle == 0 || PA_PID(ctx, pid_handle)->pid_handle != pid_handle) return; AVM_PA_WRITE_LOCK(); session = ctx->sess_lru[AVM_PA_LRU_ACTIVE].lru_head; while (session) { next = session->lru_next; if (session->ingress_pid_handle == pid_handle) { pa_session_flush_unlocked(session, "ingress pid flush"); ctx->stats.sess_flushed++; } else { int negress; for (negress = 0; negress < session->negress; negress++) { struct avm_pa_egress *egress = &session->egress[negress]; if (egress->pid_handle == pid_handle) { pa_session_flush_unlocked(session, "egress pid flush"); ctx->stats.sess_flushed++; break; } } } session = next; } AVM_PA_WRITE_UNLOCK(); } EXPORT_SYMBOL(avm_pa_flush_sessions_for_pid); static void avm_pa_sip_is_active(int state) { struct avm_pa_global *ctx = &pa_glob; unsigned rate; if (ctx->disabled) return; if (state) { if (ctx->telephony_active == 0) { rate = ctx->tbf_enabled ? ctx->rate : ctx->maxrate; ctx->rate = rate - (rate*ctx->telephony_reduce)/100; ctx->load_control = LOADCONTROL_POWERIRQ; avm_pa_tbf_update(ctx->rate, ctx->pktbuffer, ctx->pktpeak); ctx->tbf_enabled = 1; printk(KERN_INFO "avm_pa: telephony active%s\n", ctx->rate != rate ? " (reduce)" : ""); } ctx->telephony_active = 1; } else { if (ctx->telephony_active) { ctx->load_control = LOADCONTROL_IDLE; avm_pa_tbf_disable(); printk(KERN_INFO "avm_pa: telephony inactive\n"); } ctx->telephony_active = 0; } if (ctx->hardware_pa.telephony_state) (*ctx->hardware_pa.telephony_state)(ctx->telephony_active); } void avm_pa_telefon_state(int state) { printk(KERN_INFO "avm_pa: avm_pa_telefon_state\n"); } EXPORT_SYMBOL(avm_pa_telefon_state); /* ------------------------------------------------------------------------ */ /* ------- packet rate estimater ------------------------------------------ */ /* ------------------------------------------------------------------------ */ static void avm_pa_est_timer(unsigned long data) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_est *e; u32 npackets; u32 rate; /* fw pkts/s */ e = &ctx->fw_est; npackets = ctx->stats.fw_pkts; if (npackets >= e->last_packets) { rate = (npackets - e->last_packets)<<(12 - ctx->est_idx); e->last_packets = npackets; e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); ctx->stats.fw_pps = (e->avpps+0x1FF)>>10; } else { e->last_packets = npackets; } if ( ctx->load_reduce == 0 && ctx->stats.fw_pps > ctx->maxrate) ctx->maxrate = ctx->stats.fw_pps; /* rx pkts/s */ e = &ctx->rx_est; npackets = ctx->stats.rx_pkts; if (npackets >= e->last_packets) { rate = (npackets - e->last_packets)<<(12 - ctx->est_idx); e->last_packets = npackets; e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); ctx->stats.rx_pps = (e->avpps+0x1FF)>>10; } else { e->last_packets = npackets; } /* queued pkts/s */ e = &ctx->overlimit_est; npackets = ctx->stats.rx_overlimit; if (npackets >= e->last_packets) { rate = (npackets - e->last_packets)<<(12 - ctx->est_idx); e->last_packets = npackets; e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); ctx->stats.overlimit_pps = (e->avpps+0x1FF)>>10; } else { e->last_packets = npackets; } mod_timer(&ctx->est_timer, jiffies + ((HZ/4) << ctx->est_idx)); } static void avm_pa_setup_est(void) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_est *e; del_timer(&ctx->est_timer); e = &ctx->fw_est; e->ewma_log = ctx->ewma_log; e->last_packets = ctx->stats.fw_pkts; e = &ctx->rx_est; e->ewma_log = ctx->ewma_log; e->last_packets = ctx->stats.rx_pkts; e = &ctx->overlimit_est; e->ewma_log = ctx->ewma_log; e->last_packets = ctx->stats.rx_overlimit; mod_timer(&ctx->est_timer, jiffies + ((HZ/4) << ctx->est_idx)); } static void avm_pa_unsetup_est(void) { struct avm_pa_global *ctx = &pa_glob; del_timer(&ctx->est_timer); } /* ------------------------------------------------------------------------ */ /* -------- cputime estimater --------------------------------------------- */ /* ------------------------------------------------------------------------ */ static void avm_pa_add_cputimes(int cpu, cputime64_t *usersum, cputime64_t *idlesum, cputime64_t *irqsum) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0) *usersum += kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; *usersum += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; *usersum += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; *idlesum += kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; *idlesum += arch_idle_time(cpu); *idlesum += kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; *irqsum += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; *irqsum += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; #else *usersum = cputime64_add(*usersum, kstat_cpu(cpu).cpustat.user); *usersum = cputime64_add(*usersum, kstat_cpu(cpu).cpustat.nice); *usersum = cputime64_add(*usersum, kstat_cpu(cpu).cpustat.system); *idlesum = cputime64_add(*idlesum, kstat_cpu(cpu).cpustat.idle); *idlesum = cputime64_add(*idlesum, arch_idle_time(cpu)); *idlesum = cputime64_add(*idlesum, kstat_cpu(cpu).cpustat.iowait); *irqsum = cputime64_add(*irqsum, kstat_cpu(cpu).cpustat.irq); *irqsum = cputime64_add(*irqsum, kstat_cpu(cpu).cpustat.softirq); #endif } static inline void avm_pa_get_cputimes(cputime64_t *usertime, cputime64_t *idletime, cputime64_t *irqtime) { cputime64_t usersum, idlesum, irqsum; int i; usersum = idlesum = irqsum = cputime64_zero; for_each_possible_cpu(i) { avm_pa_add_cputimes(i, &usersum, &idlesum, &irqsum); } irqsum += arch_irq_stat(); *usertime = usersum; *idletime = idlesum; *irqtime = irqsum; } static void avm_pa_cputime_est_timer(unsigned long data) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_cputime_est *e; cputime64_t usersum, idlesum, irqsum; cputime64_t cputime; u32 rate; u32 userdiff = 0; u32 idlediff = 0; u32 irqdiff = 0; avm_pa_get_cputimes(&usersum, &idlesum, &irqsum); /* usertime/s */ e = &ctx->cputime_user_est; cputime = usersum; if (cputime >= e->last_cputime) { userdiff = cputime_to_msecs(cputime - e->last_cputime); rate = userdiff<<(12 - ctx->cputime_est_idx); e->last_cputime = cputime; e->avtps += (rate >> e->ewma_log) - (e->avtps >> e->ewma_log); ctx->stats.userms = (e->avtps+0x1FF)>>10; } else { e->last_cputime = cputime; } /* idletime/s */ e = &ctx->cputime_idle_est; cputime = idlesum; if (cputime >= e->last_cputime) { idlediff = cputime_to_msecs(cputime - e->last_cputime); rate = idlediff<<(12 - ctx->cputime_est_idx); e->last_cputime = cputime; e->avtps += (rate >> e->ewma_log) - (e->avtps >> e->ewma_log); ctx->stats.idlems = (e->avtps+0x1FF)>>10; } else { e->last_cputime = cputime; } /* irqtime/s */ e = &ctx->cputime_irq_est; cputime = irqsum; if (cputime >= e->last_cputime) { irqdiff = cputime_to_msecs(cputime - e->last_cputime); rate = irqdiff<<(12 - ctx->cputime_est_idx); e->last_cputime = cputime; e->avtps += (rate >> e->ewma_log) - (e->avtps >> e->ewma_log); ctx->stats.irqms = (e->avtps+0x1FF)>>10; } else { e->last_cputime = cputime; } if (ctx->dbgcputime) printk(KERN_INFO "avm_pa: %lu/%lu/%lu (%lu/%lu/%lu)\n", (unsigned long)userdiff, (unsigned long)idlediff, (unsigned long)irqdiff, (unsigned long)ctx->stats.userms, (unsigned long)ctx->stats.idlems, (unsigned long)ctx->stats.irqms); mod_timer(&ctx->cputime_est_timer, jiffies + ((HZ/4)<cputime_est_idx)); } static void avm_pa_setup_cputime_est(void) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_cputime_est *e; cputime64_t usersum, idlesum, irqsum; del_timer(&ctx->cputime_est_timer); avm_pa_get_cputimes(&usersum, &idlesum, &irqsum); e = &ctx->cputime_user_est; e->ewma_log = ctx->cputime_ewma_log; e->last_cputime = cputime_to_msecs(usersum); e = &ctx->cputime_idle_est; e->ewma_log = ctx->cputime_ewma_log; e->last_cputime = cputime_to_msecs(idlesum); e = &ctx->cputime_irq_est; e->ewma_log = ctx->cputime_ewma_log; e->last_cputime = cputime_to_msecs(irqsum); mod_timer(&ctx->cputime_est_timer, jiffies + ((HZ/4)<cputime_est_idx)); } static void avm_pa_unsetup_cputime_est(void) { struct avm_pa_global *ctx = &pa_glob; del_timer(&ctx->cputime_est_timer); } /* ------------------------------------------------------------------------ */ /* -------- value log ----------------------------------------------------- */ /* ------------------------------------------------------------------------ */ #if AVM_PA_TOKSTATS static int avm_pa_thread(void *reply_data) { struct avm_pa_global *ctx = &pa_glob; unsigned long wtime = msecs_to_jiffies(100); unsigned long rx_overlimit; set_user_nice(current, 19); { sigset_t blocked; sigfillset(&blocked); sigprocmask(SIG_BLOCK, &blocked, NULL); flush_signals(current); } rx_overlimit = ctx->stats.rx_overlimit; while (!kthread_should_stop()) { unsigned long endtime = jiffies + wtime; unsigned long overtime; unsigned long overlimit; unsigned long pps; schedule_timeout_interruptible(wtime); overlimit = ctx->stats.rx_overlimit - rx_overlimit; rx_overlimit = ctx->stats.rx_overlimit; overtime = jiffies - endtime; pps = ctx->stats.fw_pps; ctx->tok_pos = (ctx->tok_pos+1)%TOK_SAMLES; ctx->tok_state[ctx->tok_pos] = ctx->load_reduce; ctx->tok_overtime[ctx->tok_pos] = overtime; ctx->tok_rate[ctx->tok_pos] = ctx->rate; ctx->tok_pps[ctx->tok_pos] = pps; ctx->tok_overlimit[ctx->tok_pos] = overlimit; } return 0; } #endif /* ------------------------------------------------------------------------ */ static inline void avm_pa_start_lc_timer(void) { struct avm_pa_global *ctx = &pa_glob; if (mod_timer(&ctx->lc_timer, jiffies + AVM_PA_LC_TIMEOUT*HZ) == 0) ctx->lc_overlimit = ctx->stats.rx_overlimit; } static inline void avm_pa_stop_lc_timer(void) { struct avm_pa_global *ctx = &pa_glob; del_timer(&ctx->lc_timer); } static void avm_pa_lc_timer_expired(unsigned long data) { struct avm_pa_global *ctx = &pa_glob; u32 overlimit = ctx->stats.rx_overlimit - ctx->lc_overlimit; unsigned rate; ctx->lc_overlimit = ctx->stats.rx_overlimit; if (ctx->load_control & LOADCONTROL_IRQ) { if ( ctx->stats.irqms >= ctx->irq_mswin_high && ctx->stats.fw_pps > AVM_PA_MINRATE) { unsigned percent = 1; if (ctx->tbf_enabled == 0) { ctx->rate = ctx->maxrate; percent = 4; } rate = ctx->rate; rate = rate - (rate*percent)/100; ctx->rate = rate; avm_pa_tbf_update(ctx->rate, ctx->pktbuffer, ctx->pktpeak); ctx->tbf_enabled = 1; printk(KERN_INFO "avm_pa: load reduce 0, rate %u down (pps %lu ov_pps %lu irqms %lu)\n", ctx->rate, (unsigned long)ctx->stats.fw_pps, (unsigned long)ctx->stats.overlimit_pps, (unsigned long)ctx->stats.irqms); } else if ( overlimit && ctx->load_reduce == 0 && ctx->tbf_enabled && ctx->stats.irqms < ctx->irq_mswin_low) { unsigned rate = ctx->rate; unsigned percent = 1; rate = rate + (rate*percent)/100; ctx->rate = rate; avm_pa_tbf_update(ctx->rate, ctx->pktbuffer, ctx->pktpeak); printk(KERN_INFO "avm_pa: load reduce 0, rate %u up (pps %lu ov_pps %lu irqms %lu)\n", ctx->rate, (unsigned long)ctx->stats.fw_pps, (unsigned long)ctx->stats.overlimit_pps, (unsigned long)ctx->stats.irqms); } } if (ctx->load_control & LOADCONTROL_IDLE) { static unsigned count = 0; static unsigned good = 0; static unsigned lowcount = 0; if (ctx->tbf_enabled) { if (ctx->stats.fw_pps > AVM_PA_MINRATE) lowcount = 0; else lowcount++; if (lowcount*AVM_PA_LC_TIMEOUT >= AVM_PA_TRAFFIC_IDLE_TBFDISABLE) { avm_pa_tbf_disable(); printk(KERN_INFO "avm_pa: %d seconds idle, tbf deactivated\n", lowcount*AVM_PA_LC_TIMEOUT); lowcount = 0; } } if ( ctx->stats.idlems <= ctx->idle_mswin_low && ctx->stats.fw_pps > AVM_PA_MINRATE) { unsigned percent; if (ctx->tbf_enabled == 0) { ctx->rate = ctx->maxrate; percent = 5; } else if (good) { percent = 5; } else { if (count < 3) percent = 1; else if (count < 5) percent = 2; else percent = 5; } good = 0; count++; rate = ctx->rate; rate = rate - (rate*percent)/100; ctx->rate = rate; avm_pa_tbf_update(ctx->rate, ctx->pktbuffer, ctx->pktpeak); ctx->tbf_enabled = 1; printk(KERN_INFO "avm_pa: rate %u down (pps %lu ov_pps %lu idlems %lu count %u)\n", ctx->rate, (unsigned long)ctx->stats.fw_pps, (unsigned long)ctx->stats.overlimit_pps, (unsigned long)ctx->stats.idlems, count); } else { count = 0; if ( overlimit && good && ctx->load_reduce == 0 && ctx->tbf_enabled && ctx->stats.idlems > ctx->idle_mswin_high) { unsigned rate = ctx->rate; unsigned percent = 1; rate = rate + (rate*percent)/100; ctx->rate = rate; avm_pa_tbf_update(ctx->rate, ctx->pktbuffer, ctx->pktpeak); printk(KERN_INFO "avm_pa: rate %u up (pps %lu ov_pps %lu idlems %lu)\n", ctx->rate, (unsigned long)ctx->stats.fw_pps, (unsigned long)ctx->stats.overlimit_pps, (unsigned long)ctx->stats.idlems); } good++; } } avm_pa_start_lc_timer(); } #ifdef CONFIG_AVM_POWERMETER static void avm_pa_load_control_cb(int load_reduce, void *context) { struct avm_pa_global *ctx = &pa_glob; unsigned rate; if (ctx->disabled || (ctx->load_control & LOADCONTROL_POWER) == 0) { ctx->load_reduce = 0; return; } load_reduce = LOAD_CONTROL_REDUCE(load_reduce); if (load_reduce < 0) load_reduce = 0; else if (load_reduce > 10) load_reduce = 10; if (load_reduce == 0) { if (ctx->load_reduce) { printk(KERN_INFO "avm_pa: load reduce %d => %d, rate %u (pps %lu ov_pps %lu)\n", ctx->load_reduce, load_reduce, ctx->rate, (unsigned long)ctx->stats.fw_pps, (unsigned long)ctx->stats.overlimit_pps); } } else if (ctx->stats.fw_pps > AVM_PA_MINRATE) { int change = ctx->load_reduce - load_reduce; unsigned percent; if (ctx->tbf_enabled == 0) ctx->rate = ctx->maxrate; rate = ctx->rate; if (change <= 0) { /* get worth */ if (ctx->load_reduce == 0) { if (ctx->tbf_enabled) percent = (-change)*4; else percent = (-change)*20; } else { percent = (-change)*8; } rate = rate - (rate*percent)/100; } else { /* get better */ percent = change*4; rate = rate + (rate*percent)/100; } printk(KERN_INFO "avm_pa: load reduce %d => %d, rate %u => %u (change %d %u%% pps %lu ov_pps %lu)\n", ctx->load_reduce, load_reduce, ctx->rate, rate, change, percent, (unsigned long)ctx->stats.fw_pps, (unsigned long)ctx->stats.overlimit_pps); ctx->rate = rate; avm_pa_tbf_update(ctx->rate, ctx->pktbuffer, ctx->pktpeak); ctx->tbf_enabled = 1; } else { printk(KERN_INFO "avm_pa: load reduce %d => %d, rate %u (pps %lu)\n", ctx->load_reduce, load_reduce, ctx->rate, (unsigned long)ctx->stats.fw_pps); } ctx->load_reduce = load_reduce; } #endif #if AVM_PA_TOKSTATS static void pa_show_tstats(pa_fprintf fprintffunc, void *arg) { struct avm_pa_global *ctx = &pa_glob; struct avm_pa_tbf *tbf = &ctx->tbf; int i = TOK_SAMLES; int pos = ctx->tok_pos; (*fprintffunc)(arg, "load_reduce %d tbf_enabled %d maxrate %u\n", ctx->load_reduce, ctx->tbf_enabled, ctx->maxrate); (*fprintffunc)(arg, "rate %u buffer %u peak %u\n", ctx->rate, ctx->pktbuffer, ctx->pktpeak); (*fprintffunc)(arg, "tbf: buffer %u peak %u pkttime %u tokens %ld/%ld\n", tbf->buffer, tbf->pbuffer, tbf->pkttime, tbf->tokens, tbf->ptokens); while (i--) { if (--pos < 0) pos = TOK_SAMLES-1; (*fprintffunc)(arg, "%d/%u/%u-%u/%lu%s", ctx->tok_state[pos], ctx->tok_overtime[pos], ctx->tok_rate[pos], ctx->tok_pps[pos], ctx->tok_overlimit[pos], i % 8 ? " " : "\n"); } } static void avm_pa_thread_start(void) { struct avm_pa_global *ctx = &pa_glob; if (ctx->task == 0) { ctx->task = kthread_run(avm_pa_thread, 0, "avm_pa"); if (IS_ERR(ctx->task)) { printk(KERN_CRIT "avm_pa: failed to start task\n"); ctx->task = 0; } } } static void avm_pa_thread_stop(void) { struct avm_pa_global *ctx = &pa_glob; if (ctx->task) { (void)kthread_stop(ctx->task); ctx->task = 0; } } #endif static void avm_pa_enable(void) { struct avm_pa_global *ctx = &pa_glob; #if AVM_PA_TOKSTATS avm_pa_thread_start(); #endif avm_pa_setup_est(); avm_pa_setup_cputime_est(); avm_pa_tbf_init(ctx->rate, ctx->pktbuffer, ctx->pktpeak); avm_pa_start_lc_timer(); } static void avm_pa_disable(void) { avm_pa_tbf_exit(); #if AVM_PA_TOKSTATS avm_pa_thread_stop(); #endif avm_pa_unsetup_cputime_est(); avm_pa_unsetup_est(); avm_pa_stop_lc_timer(); } #ifdef CONFIG_PROC_FS /* ------------------------------------------------------------------------ */ /* -------- procfs functions ---------------------------------------------- */ /* ------------------------------------------------------------------------ */ static int brief_show(struct seq_file *m, void *v) { pa_show_brief((pa_fprintf *)seq_printf, m); return 0; } static int brief_show_open(struct inode *inode, struct file *file) { return single_open(file, brief_show, PDE_DATA(inode)); } static const struct file_operations brief_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = brief_show_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, /* brief_show_open() uses single_open() */ }; /* ------------------------------------------------------------------------ */ static int status_show(struct seq_file *m, void *v) { pa_show_status((pa_fprintf *)seq_printf, m); return 0; } static int status_show_open(struct inode *inode, struct file *file) { return single_open(file, status_show, PDE_DATA(inode)); } static const struct file_operations status_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = status_show_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, /* status_show_open() uses single_open() */ }; /* ------------------------------------------------------------------------ */ static int stats_show(struct seq_file *m, void *v) { pa_show_stats((pa_fprintf *)seq_printf, m); return 0; } static int stats_show_open(struct inode *inode, struct file *file) { return single_open(file, stats_show, PDE_DATA(inode)); } static const struct file_operations stats_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = stats_show_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, /* stats_show_open() uses single_open() */ }; /* ------------------------------------------------------------------------ */ static int pids_show(struct seq_file *m, void *v) { pa_show_pids((pa_fprintf *)seq_printf, m); return 0; } static int pids_show_open(struct inode *inode, struct file *file) { return single_open(file, pids_show, PDE_DATA(inode)); } static const struct file_operations pids_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = pids_show_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, /* pids_show_open() uses single_open() */ }; /* ------------------------------------------------------------------------ */ static int vpids_show(struct seq_file *m, void *v) { pa_show_vpids((pa_fprintf *)seq_printf, m); return 0; } static int vpids_show_open(struct inode *inode, struct file *file) { return single_open(file, vpids_show, PDE_DATA(inode)); } static const struct file_operations vpids_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = vpids_show_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, /* vpids_show_open() uses single_open() */ }; static int vpids_hw_stats_show(struct seq_file *m, void *v) { pa_show_vpids_hw_stats((pa_fprintf *)seq_printf, m); return 0; } static int vpids_hw_stats_show_open(struct inode *inode, struct file *file) { return single_open(file, vpids_hw_stats_show, PDE_DATA(inode)); } static const struct file_operations vpids_hw_stats_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = vpids_hw_stats_show_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, /* vpids_hw_stats_show_open() uses single_open() */ }; static int vpids_all_stats_show(struct seq_file *m, void *v) { pa_show_vpids_all_stats((pa_fprintf *)seq_printf, m); return 0; } static int vpids_all_stats_show_open(struct inode *inode, struct file *file) { return single_open(file, vpids_all_stats_show, PDE_DATA(inode)); } static const struct file_operations vpids_all_stats_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = vpids_all_stats_show_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, /* vpids_all_stats_show_open() uses single_open() */ }; /* ------------------------------------------------------------------------ */ struct handle_iter { unsigned short handle; }; static inline unsigned short next_session(struct avm_pa_global *ctx, unsigned short handle) { while (++handle < CONFIG_AVM_PA_MAX_SESSION) { struct avm_pa_session *sess = pa_session_get(handle); if (sess && session_is_selected(ctx, sess)) return handle; } return 0; } static void *sess_show_seq_start(struct seq_file *seq, loff_t *pos) { struct avm_pa_global *ctx = &pa_glob; struct handle_iter *it = seq->private; loff_t i; if ((it->handle = next_session(ctx, 0)) == 0) return 0; for (i = 0; i < *pos; i++) { if ((it->handle = next_session(ctx, it->handle)) == 0) return 0; } return PA_SESSION(ctx, it->handle); } static void *sess_show_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct avm_pa_global *ctx = &pa_glob; struct handle_iter *it = seq->private; ++*pos; if ((it->handle = next_session(ctx, it->handle)) == 0) return 0; return PA_SESSION(ctx, it->handle); } static void sess_show_seq_stop(struct seq_file *seq, void *v) { } static int sess_show_seq_show(struct seq_file *seq, void *v) { struct avm_pa_global *ctx = &pa_glob; const struct handle_iter *it = seq->private; seq_printf(seq, "\n"); pa_show_session(PA_SESSION(ctx, it->handle), (pa_fprintf *)seq_printf, seq); return 0; } static struct seq_operations sess_show_seq_ops = { .start = sess_show_seq_start, .next = sess_show_seq_next, .stop = sess_show_seq_stop, .show = sess_show_seq_show, }; static int sess_show_open(struct inode *inode, struct file *file) { return seq_open_private(file, &sess_show_seq_ops, sizeof(struct handle_iter)); } static const struct file_operations sess_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = sess_show_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, /* sess_show_open() uses seq_open_private() */ }; /* ------------------------------------------------------------------------ */ static inline unsigned short next_bsession(struct avm_pa_global *ctx, unsigned short handle) { while (++handle < CONFIG_AVM_PA_MAX_SESSION) { struct avm_pa_session *session; if ((session = pa_session_get(handle)) != 0 && session->bsession) return handle; } return 0; } static void *bsess_show_seq_start(struct seq_file *seq, loff_t *pos) { struct avm_pa_global *ctx = &pa_glob; struct handle_iter *it = seq->private; loff_t i; if ((it->handle = next_bsession(ctx, 0)) == 0) return 0; for (i = 0; i < *pos; i++) { if ((it->handle = next_bsession(ctx, it->handle)) == 0) return 0; } return PA_BSESSION(ctx, it->handle); } static void *bsess_show_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct avm_pa_global *ctx = &pa_glob; struct handle_iter *it = seq->private; ++*pos; if ((it->handle = next_bsession(ctx, it->handle)) == 0) return 0; return PA_BSESSION(ctx, it->handle); } static void bsess_show_seq_stop(struct seq_file *seq, void *v) { } static int bsess_show_seq_show(struct seq_file *seq, void *v) { struct avm_pa_global *ctx = &pa_glob; const struct handle_iter *it = seq->private; seq_printf(seq, "\n"); pa_show_bsession(PA_BSESSION(ctx, it->handle), (pa_fprintf *)seq_printf, seq); return 0; } static struct seq_operations bsess_show_seq_ops = { .start = bsess_show_seq_start, .next = bsess_show_seq_next, .stop = bsess_show_seq_stop, .show = bsess_show_seq_show, }; static int bsess_show_open(struct inode *inode, struct file *file) { return seq_open_private(file, &bsess_show_seq_ops, sizeof(struct handle_iter)); } static const struct file_operations bsess_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = bsess_show_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, /* bsess_show_open() uses seq_open_private() */ }; /* ------------------------------------------------------------------------ */ static inline int next_macaddrhash(struct avm_pa_global *ctx, int idx) { while (++idx < CONFIG_AVM_PA_MAX_SESSION) { if (ctx->macaddr_hash[idx]) return idx; } return 0; } static void *macaddr_show_seq_start(struct seq_file *seq, loff_t *pos) { struct avm_pa_global *ctx = &pa_glob; struct handle_iter *it = seq->private; loff_t i; if ((it->handle = next_macaddrhash(ctx, -1)) == 0) return 0; for (i = 0; i < *pos; i++) { if ((it->handle = next_macaddrhash(ctx, it->handle)) == 0) return 0; } return ctx->macaddr_hash[it->handle]; } static void *macaddr_show_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct avm_pa_global *ctx = &pa_glob; struct handle_iter *it = seq->private; ++*pos; if ((it->handle = next_macaddrhash(ctx, it->handle)) == 0) return 0; return ctx->macaddr_hash[it->handle]; } static void macaddr_show_seq_stop(struct seq_file *seq, void *v) { } static int macaddr_show_seq_show(struct seq_file *seq, void *v) { struct avm_pa_global *ctx = &pa_glob; const struct handle_iter *it = seq->private; struct avm_pa_macaddr *p; char buf[128]; seq_printf(seq, "%3d: ", it->handle); for (p = ctx->macaddr_hash[it->handle]; p; p = p->link) { mac2str(&p->mac, buf, sizeof(buf)); seq_printf(seq, " %s (%lu %d/%s)", buf, p->refcount, p->pid_handle, PA_PID(ctx, p->pid_handle)->cfg.name); } seq_printf(seq, "\n"); return 0; } static struct seq_operations macaddr_show_seq_ops = { .start = macaddr_show_seq_start, .next = macaddr_show_seq_next, .stop = macaddr_show_seq_stop, .show = macaddr_show_seq_show, }; static int macaddr_show_open(struct inode *inode, struct file *file) { return seq_open_private(file, &macaddr_show_seq_ops, sizeof(struct handle_iter)); } static const struct file_operations macaddr_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = macaddr_show_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, /* macaddr_show_open() uses seq_open_private() */ }; /* ------------------------------------------------------------------------ */ static inline unsigned short next_pid(struct avm_pa_global *ctx, unsigned short handle) { while (++handle < CONFIG_AVM_PA_MAX_PID) { if (PA_PID(ctx, handle)->pid_handle) return handle; } return 0; } static void *pid_show_seq_start(struct seq_file *seq, loff_t *pos) { struct avm_pa_global *ctx = &pa_glob; struct handle_iter *it = seq->private; loff_t i; if ((it->handle = next_pid(ctx, 0)) == 0) return 0; for (i = 0; i < *pos; i++) { if ((it->handle = next_pid(ctx, it->handle)) == 0) return 0; } return PA_PID(ctx, it->handle); } static void *pid_show_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct avm_pa_global *ctx = &pa_glob; struct handle_iter *it = seq->private; ++*pos; if ((it->handle = next_pid(ctx, it->handle)) == 0) return 0; return PA_PID(ctx, it->handle); } static void pid_show_seq_stop(struct seq_file *seq, void *v) { } static int hash_show_seq_show(struct seq_file *seq, void *v) { struct avm_pa_global *ctx = &pa_glob; const struct handle_iter *it = seq->private; struct avm_pa_pid *pid = PA_PID(ctx, it->handle); struct avm_pa_session *p; int i; seq_printf(seq, "PID%-3d: %s\n", it->handle, PA_PID(ctx, it->handle)->cfg.name); for (i = 0; i < CONFIG_AVM_PA_MAX_SESSION; i++) { if ((p = pid->hash_sess[i]) != 0) { seq_printf(seq, "%3d: ", i); for (; p; p = p->link) seq_printf(seq, " %3d", p->session_handle); seq_printf(seq, "\n"); } } return 0; } static struct seq_operations hash_show_seq_ops = { .start = pid_show_seq_start, .next = pid_show_seq_next, .stop = pid_show_seq_stop, .show = hash_show_seq_show, }; static int hash_show_open(struct inode *inode, struct file *file) { return seq_open_private(file, &hash_show_seq_ops, sizeof(struct handle_iter)); } static const struct file_operations hash_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = hash_show_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, /* hash_show_open() uses seq_open_private() */ }; /* ------------------------------------------------------------------------ */ static int prioack_show(struct seq_file *seq, void *v) { struct avm_pa_global *ctx = &pa_glob; int i, j; seq_printf(seq, "Packet Threshold : %u\n", ctx->prioack_thresh_packets); seq_printf(seq, "Ratio : %u\n", ctx->prioack_ratio); for (i = 1; i < CONFIG_AVM_PA_MAX_PID; ++i) { struct avm_pa_pid *pid = PA_PID(ctx, i); if (avm_pa_pid_tack_enabled(pid)) { seq_printf(seq, "PID%d: Detected ACKs : %u\n", pid->pid_handle, pid->prioack_acks); seq_printf(seq, "PID%d: Accelerated ACK : %u\n", pid->pid_handle, pid->prioack_accl_acks); for (j = 0; j < AVM_PA_MAX_PRIOS; ++j) { seq_printf(seq, "PID%d: TACK Priority[%d]: %x\n", pid->pid_handle, j, pid->prio_maps[AVM_PA_PRIO_MAP_TACK].prios[j]); } } if (avm_pa_pid_tget_enabled(pid)) { for (j = 0; j < AVM_PA_MAX_PRIOS; ++j) { seq_printf(seq, "PID%d: TGET Priority[%d]: %x\n", pid->pid_handle, j, pid->prio_maps[AVM_PA_PRIO_MAP_TGET].prios[j]); } } } return 0; } static int prioack_show_open(struct inode *inode, struct file *file) { return single_open(file, prioack_show, PDE_DATA(inode)); } static const struct file_operations prioack_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = prioack_show_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, /* prioack_show_open() uses single_open() */ }; static int priomaps_show(struct seq_file *seq, void *v) { struct avm_pa_global *ctx = &pa_glob; int i, j, k; for (i = 1; i < CONFIG_AVM_PA_MAX_PID; ++i) { struct avm_pa_pid *pid = PA_PID(ctx, i); if (pid->pid_handle == 0) { continue; } seq_printf(seq, "PID %d Prio Maps\n", pid->pid_handle); for (j = 0; j < AVM_PA_COUNT_PRIO_MAPS; ++j) { if (!pid->prio_maps[j].enabled) { continue; } seq_printf(seq, "Prio Map[%d]\n", j); for (k = 0; k < AVM_PA_MAX_PRIOS; ++k) { seq_printf(seq, "Queue[%d]: %x\n", k, pid->prio_maps[j].prios[k]); } } } return 0; } static int priomaps_show_open(struct inode *inode, struct file *file) { return single_open(file, priomaps_show, PDE_DATA(inode)); } static const struct file_operations priomaps_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = priomaps_show_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, /* priomaps_show_open() uses single_open() */ }; /* ------------------------------------------------------------------------ */ #if AVM_PA_TOKSTATS static int tstats_show(struct seq_file *m, void *v) { pa_show_tstats((pa_fprintf *)seq_printf, m); return 0; } static int tstats_show_open(struct inode *inode, struct file *file) { return single_open(file, tstats_show, PDE_DATA(inode)); } static const struct file_operations tstats_show_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .open = tstats_show_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, /* tstats_show_open() uses single_open() */ }; #endif /* ------------------------------------------------------------------------ */ static void pa_dev_transmit(void *arg, struct sk_buff *skb) { int rc; skb->dev = (struct net_device *)arg; rc = dev_queue_xmit(skb); if (rc != 0 && net_ratelimit()) { pa_printk(KERN_ERR, "pa_dev_transmit(%s) %d", ((struct net_device *)arg)->name, rc); } } /** * avm_pa session selector framework * * This extensible framework allows to specify session selectors via procfs. * The user-facing side of the selectors is modelled after tcpdump with numerous * limitations (the user space tool showpainfo should nullify some of them). * * The purpose is to filter the output of /proc/net/avm_pa/sessions. However, it * is designed such that it can be repurposed to filter sessions at the creation * time already later on. * * For example a user can do the following: * echo ip > /proc/net/avm_pa/xsession * echo port 80 > /proc/net/avm_pa/xsession * echo ip6 dst port 25 > /proc/net/avm_pa/xsession * echo not vpid internet > /proc/net/avm_pa/xsession * * /proc/net/avm_pa/sessions will show only sessions matching the criterea. Other sessions * are hidden, but still exist (and packets of hidden sessions are still accelerated). * * The current set of selectors can be read with "cat /proc/net/avm_pa/xsession" * * The kernel side of the framework is described below to help future expansions. * * all_selectors has a preprogrammed array of available selectors. Each element is of * the type pa_selector. * * Each pa_selector defines the following aspects: * - a prefix: a selector may support/require a prefix that must be echoed before the * actual type (e.g. echo src pid > [...]). The meaning is selector-defined but * is conventionally used to express a direction, for example src/dst or ingress/egress. * - the label: the label is the string that identifies the selector. However, a label * might occur multiple times within the all_selectors array, only the tuple of * prefix and label must be unique. This is so that a selector can support more * than one prefix or none. * - a set of methods, setup(), check() and show(). * * setup() is called during parsing of the xsession input. it'll receive the input * as argument vector, plus the prefix. input that has been consumed by earlier * selectors is not passed. * * check() is called during the output of sessions. it'll receive the session and * should simply return a boolean whether that session is selected or not. for example * a "port 80" selector should return 1 for any session that has port 80 (either * source or dest). * * show() is called during the output of xsession. it'll receive a buffer and should * print itself into that buffer. This is so that the user can view current set * of selectors with "cat /proc/net/avm_pa/xsession". Usually the user input * should be mirrored. * * Each pa_selector defines also its data structure (struct *_selector_data) that becomes * part of the selector_union union. The data is accssible through the pa_selector_entry * that is passed to each method. Generally, setup() sets up the data structure based * on the user input, and check() does the selection based on the information * in the data structure. * * The active selectors are stored in a linux doubly-linked list. session_is_selected() * goes through that list and calls the check() method of all selectors. Currently, * the selectors are logically connected with AND, so for a session to be shown it must * be selected by ALL selectors. Logical OR is not implemented yet. However, NOT is * implemented. Each selector can be negated by prefixing it with NOT (or prefxing the * prefix :-). The NOT is only applied to next selector, any earlier or * following ones are unaffected. Therefore: * echo not ip port 80 > [...] * is probably *not* doing what you expect. In truth it applies NOT *only* to the ip * selector, meaning that only ipv6 packets are selected. These packets must also be * match the "port 80" criteria. Check the output of xsession to understand it better. * * To add new parametrized selectors do the following (named foo as an example): * 1) define a data structure foo_selector_data * 2) add struct foo_selector_data to the union selector_union * 3) implement setup(), check() and show() methods. * 4) add one or more entries to all_selectors, depending on the prefixes it supports * (or requires) * * For unparametrized selectors, that usually consist of only a single label and no * prefix, do: * 1) implement check() method * 2) add one entry to all_selectors, setup() can be NULL and for show() you can use * single_show() that simply echoes back the label. * */ static int prefix_to_dir(const char *prefix) { if (!prefix) return 0; else if (!strcmp(prefix, "dst") || !strcmp(prefix, "egress")) return 1; else if (!strcmp(prefix, "src") || !strcmp(prefix, "ingress")) return -1; else return 0; /* any */ } struct port_selector_data { int dir; __be16 port; }; struct pid_selector_data { int dir; u32 pid; /* can be either pid or vpid */ }; struct host_selector_data { short dir; short ver; union { __be32 ip4; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr ip6; #endif } u; }; union selector_union { struct port_selector_data port_data; struct pid_selector_data pid_data; struct host_selector_data host_data; }; struct pa_selector_entry { struct list_head list; union selector_union data; struct pa_selector *selector; int negated; }; struct pa_selector { const char *prefix; const char *label; int args; int (*setup)(struct pa_selector_entry* sel, int argc, const char **argv, const char *prefix); int (*check)(struct pa_selector_entry* sel, struct avm_pa_session *sess); ssize_t (*show) (struct pa_selector_entry* sel, char *buf, size_t bufsz); }; /* Used to kill the pid/vpid selectors when the actual pid/vpid is unregistered at * runtime */ static void clear_pid_selector(struct avm_pa_global *ctx, u32 pid, int isvpid) { struct pa_selector_entry *sel, *temp; const char *label = isvpid ? "vpid" : "pid"; list_for_each_entry_safe(sel, temp, &ctx->session_selector, list) { if (!strcmp(sel->selector->label, label) && sel->data.pid_data.pid == pid) { printk(KERN_INFO "Removing %s selector because underlying %s %s is about to vanish\n", label, label, PA_PID(ctx, pid)->cfg.name); list_del(&sel->list); kfree(sel); } } } /* This function ultimately decides whether a session is selected by calling * into the check method of all selectors */ static int session_is_selected(struct avm_pa_global *ctx, struct avm_pa_session *sess) { struct pa_selector_entry *sel; /* Selectors are connected with logical AND, so the first failed check kicks it out. * Selectors may be negated in which case the check() return shall be inverted. */ list_for_each_entry(sel, &ctx->session_selector, list) { if (sel->selector->check(sel, sess) == sel->negated) return 0; } return 1; } static int ip_selector_check(struct pa_selector_entry *sel, struct avm_pa_session *sess) { return (sess->ingress.pkttype & AVM_PA_PKTTYPE_IP_MASK) == AVM_PA_PKTTYPE_IPV4 || (sess->ingress.pkttype & AVM_PA_PKTTYPE_IPENCAP_MASK) == AVM_PA_PKTTYPE_IPV4ENCAP; } static int ip6_selector_check(struct pa_selector_entry *sel, struct avm_pa_session *sess) { return (sess->ingress.pkttype & AVM_PA_PKTTYPE_IP_MASK) == AVM_PA_PKTTYPE_IPV6 || (sess->ingress.pkttype & AVM_PA_PKTTYPE_IPENCAP_MASK) == AVM_PA_PKTTYPE_IPV6ENCAP; } static int dslite_selector_check(struct pa_selector_entry* sel, struct avm_pa_session *sess) { return (sess->ingress.pkttype & AVM_PA_PKTTYPE_IP_MASK) == AVM_PA_PKTTYPE_IPV4 && (sess->ingress.pkttype & AVM_PA_PKTTYPE_IPENCAP_MASK) == AVM_PA_PKTTYPE_IPV6ENCAP; } static int gre_selector_check(struct pa_selector_entry* sel, struct avm_pa_session *sess) { return (sess->ingress.pkttype & AVM_PA_PKTTYPE_GRE) != 0; } static int l2tp_selector_check(struct pa_selector_entry* sel, struct avm_pa_session *sess) { return (sess->ingress.pkttype & AVM_PA_PKTTYPE_L2TP) != 0; } static int lisp_selector_check(struct pa_selector_entry* sel, struct avm_pa_session *sess) { return (sess->ingress.pkttype & AVM_PA_PKTTYPE_LISP) != 0; } static int port_selector_setup(struct pa_selector_entry *sel, int argc, const char **argv, const char *prefix) { int ret; long port; struct port_selector_data *data = &sel->data.port_data; if (argc < 2) return -EINVAL; ret = kstrtol(argv[1], 10, &port); if (ret < 0) return ret; data->dir = prefix_to_dir(prefix); data->port = htons(port&0xffff); return 0; } static int port_selector_check(struct pa_selector_entry *sel, struct avm_pa_session *sess) { /* locate the ports first. use udphdr, for tcp the location is the same */ hdrunion_t *hdr; struct avm_pa_pkt_match *info = &sess->ingress; struct port_selector_data *data = &sel->data.port_data; int ports_off, proto; if (AVM_PA_PKTTYPE_IP_VERSION(info->pkttype) == 4) { struct iphdr *iph = (struct iphdr *) (HDRCOPY(info) + info->ip_offset); proto = iph->protocol & 0xff; ports_off = info->ip_offset + PA_IPHLEN(iph); } else if (AVM_PA_PKTTYPE_IP_VERSION(info->pkttype) == 6) { struct ipv6hdr *iph = (struct ipv6hdr *) (HDRCOPY(info) + info->ip_offset); proto = iph->nexthdr & 0xff; ports_off = info->ip_offset + sizeof(struct ipv6hdr); } else { /* Ports can only match IP packets */ return 0; } hdr = (hdrunion_t *)(HDRCOPY(info)+ports_off); if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) return 0; switch (data->dir) { case -1: return data->port == hdr->ports[0]; case 0: return data->port == hdr->ports[0] || data->port == hdr->ports[1]; case 1: return data->port == hdr->ports[1]; default: /* never reached */ return 0; } } static ssize_t port_selector_show(struct pa_selector_entry* sel, char *buf, size_t bufsz) { struct port_selector_data *data = &sel->data.port_data; const char *dir; if (data->dir == -1) dir = "src "; else if (data->dir == 1) dir = "dst "; else dir = ""; return snprintf(buf, bufsz, "%sport %d", dir, ntohs(data->port)); } static int host_selector_setup(struct pa_selector_entry *sel, int argc, const char **argv, const char *prefix) { struct host_selector_data *data = &sel->data.host_data; if (argc < 2) return -EINVAL; data->dir = prefix_to_dir(prefix); if (!strchr(argv[1], ':')) { unsigned char *ip = (unsigned char *) &data->u.ip4; data->ver = 4; if (sscanf(argv[1], "%hhu.%hhu.%hhu.%hhu", &ip[0], &ip[1], &ip[2], &ip[3]) != 4) return -EINVAL; } #if IS_ENABLED(CONFIG_IPV6) else { unsigned short *ip = (unsigned short *) &data->u.ip6; data->ver = 6; if (sscanf(argv[1], "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx", &ip[0], &ip[1], &ip[2], &ip[3], &ip[4], &ip[5], &ip[6], &ip[7]) != 8) return -EINVAL; } #endif return 0; } static int host_selector_check(struct pa_selector_entry *sel, struct avm_pa_session *sess) { struct avm_pa_pkt_match *info = &sess->ingress; struct host_selector_data *data = &sel->data.host_data; if (data->ver == AVM_PA_PKTTYPE_IP_VERSION(info->pkttype)) { if (data->ver == 4) { struct iphdr *iph = (struct iphdr *) (HDRCOPY(info) + info->ip_offset); switch (data->dir) { case -1: return data->u.ip4 == iph->saddr; case 0: return data->u.ip4 == iph->saddr || data->u.ip4 == iph->daddr; case 1: return data->u.ip4 == iph->daddr; } } #if IS_ENABLED(CONFIG_IPV6) else { struct ipv6hdr *iph = (struct ipv6hdr *) (HDRCOPY(info) + info->ip_offset); switch (data->dir) { case -1: return ipv6_addr_equal(&data->u.ip6, &iph->saddr); case 0: return ipv6_addr_equal(&data->u.ip6, &iph->saddr) || ipv6_addr_equal(&data->u.ip6, &iph->daddr); case 1: return ipv6_addr_equal(&data->u.ip6, &iph->daddr); } } #endif } return 0; } static ssize_t host_selector_show(struct pa_selector_entry* sel, char *buf, size_t bufsz) { struct host_selector_data *data = &sel->data.host_data; const char *dir; if (data->dir == -1) dir = "src "; else if (data->dir == 1) dir = "dst "; else dir = ""; #if IS_ENABLED(CONFIG_IPV6) if (data->ver == 6) return snprintf(buf, bufsz, "%shost %pI6", dir, &data->u.ip6); #endif return snprintf(buf, bufsz, "%shost %pI4", dir, &data->u.ip4); } static int get_session_proto(struct avm_pa_session *sess) { struct avm_pa_pkt_match *info = &sess->ingress; if (info->pkttype & AVM_PA_PKTTYPE_IP_MASK) return AVM_PA_PKTTYPE_IPPROTO(info->pkttype); return 0; } static int tcp_selector_check(struct pa_selector_entry* sel, struct avm_pa_session *sess) { return get_session_proto(sess) == IPPROTO_TCP; } static int udp_selector_check(struct pa_selector_entry* sel, struct avm_pa_session *sess) { return get_session_proto(sess) == IPPROTO_UDP; } static int pid_selector_setup(struct pa_selector_entry *sel, int argc, const char **argv, const char *prefix) { struct avm_pa_global *ctx = &pa_glob; const char *pidname; unsigned long pid_l; int isvpid, n; size_t max_pid; /* This function is shared between pid and vpid selectors */ isvpid = sel->selector->label[0] == 'v'; max_pid = isvpid ? CONFIG_AVM_PA_MAX_VPID : CONFIG_AVM_PA_MAX_PID; if (argc < 2) return -EINVAL; pidname = argv[1]; /* PID may be specified by id */ if (isdigit(*pidname)) { if (kstrtoul(pidname, 10, &pid_l) < 0 || pid_l > max_pid || pid_l == 0) return -ENODEV; pidname = 0; } sel->data.pid_data.pid = 0; sel->data.pid_data.dir = prefix_to_dir(prefix); for (n = 1; n < max_pid; n++) { if (isvpid) { struct avm_pa_vpid *vpid = PA_VPID(ctx, n); if (vpid->vpid_handle == 0) continue; if (pidname && 0 == strcmp(vpid->cfg.name, pidname)) sel->data.pid_data.pid = vpid->vpid_handle; else if (!pidname && pid_l == vpid->vpid_handle) sel->data.pid_data.pid = vpid->vpid_handle; } else { struct avm_pa_pid *pid = PA_PID(ctx, n); if (pid->pid_handle == 0) continue; if (pidname && 0 == strcmp(pid->cfg.name, pidname)) sel->data.pid_data.pid = pid->pid_handle; else if (!pidname && pid_l == pid->pid_handle) sel->data.pid_data.pid = pid->pid_handle; } if (sel->data.pid_data.pid > 0) return 0; } return -ENODEV; } static int pid_selector_check(struct pa_selector_entry *sel, struct avm_pa_session *sess) { int i, dir = sel->data.pid_data.dir; avm_pid_handle pid = sel->data.pid_data.pid; if (pid == sess->ingress_pid_handle && dir != 1) return 1; for (i = 0; i < sess->negress && (dir != -1); i++) { if (pid == sess->egress[i].pid_handle) return 1; } return 0; } static int vpid_selector_check(struct pa_selector_entry *sel, struct avm_pa_session *sess) { int i, dir = sel->data.pid_data.dir; avm_vpid_handle vpid = sel->data.pid_data.pid; if (vpid == sess->ingress_vpid_handle && dir != 1) return 1; for (i = 0; i < sess->negress && (dir != -1); i++) { if (vpid == sess->egress[i].vpid_handle) return 1; } return 0; } static ssize_t pid_selector_show(struct pa_selector_entry *sel, char *buf, size_t bufsz) { struct avm_pa_global *ctx = &pa_glob; int dir = sel->data.pid_data.dir; avm_pid_handle pid = sel->data.pid_data.pid; const char *pdir; const char *name; int isvpid; /* This function is shared between pid and vpid selectors */ isvpid = sel->selector->label[0] == 'v'; if (dir == -1) pdir = "ingress "; else if (dir == 1) pdir = "egress "; else pdir = ""; name = isvpid ? PA_VPID(ctx, pid)->cfg.name : PA_PID(ctx, pid)->cfg.name; return snprintf(buf, bufsz, "%s%s %s", pdir, sel->selector->label, name); } static ssize_t single_show(struct pa_selector_entry* sel, char *buf, size_t bufsz) { return strlcpy(buf, sel->selector->label, bufsz); } static struct pa_selector all_selectors[] = { { "", "ip", 0, NULL, ip_selector_check , single_show }, { "", "ip6", 0, NULL, ip6_selector_check, single_show }, { "", "tcp", 0, NULL, tcp_selector_check, single_show }, { "", "udp", 0, NULL, udp_selector_check, single_show }, { "", "dslite", 0, NULL, dslite_selector_check, single_show }, { "", "gre", 0, NULL, gre_selector_check, single_show }, { "", "lt2p", 0, NULL, l2tp_selector_check, single_show }, { "", "lisp", 0, NULL, lisp_selector_check, single_show }, { "src", "port", 1, port_selector_setup, port_selector_check, port_selector_show }, { "dst", "port", 1, port_selector_setup, port_selector_check, port_selector_show }, { "", "port", 1, port_selector_setup, port_selector_check, port_selector_show }, { "src", "host", 1, host_selector_setup, host_selector_check, host_selector_show }, { "dst", "host", 1, host_selector_setup, host_selector_check, host_selector_show }, { "", "host", 1, host_selector_setup, host_selector_check, host_selector_show }, { "ingress", "pid", 1, pid_selector_setup, pid_selector_check, pid_selector_show }, { "egress", "pid", 1, pid_selector_setup, pid_selector_check, pid_selector_show }, { "", "pid", 1, pid_selector_setup, pid_selector_check, pid_selector_show }, { "ingress", "vpid", 1, pid_selector_setup, vpid_selector_check, pid_selector_show }, { "egress", "vpid", 1, pid_selector_setup, vpid_selector_check, pid_selector_show }, { "", "vpid", 1, pid_selector_setup, vpid_selector_check, pid_selector_show }, }; static int is_prefix(const char *prefix) { int i; for (i = 0; i < ARRAY_SIZE(all_selectors); i++) { if (!strcmp(prefix, all_selectors[i].prefix)) return 1; } return 0; } static struct pa_selector *find_selector(const char *label, const char *prefix) { int i; for (i = 0; i < ARRAY_SIZE(all_selectors); i++) { if (!strcmp(label, all_selectors[i].label)) { /* If the selector has a prefix then user space must provide it and * it must match as well. If the selector has no prefix then user space * must not provide one. However, all_selectors can contain the same selectors * with and without prefix */ if (!prefix && !all_selectors[i].prefix[0]) return &all_selectors[i]; else if (prefix && !strcmp(prefix, all_selectors[i].prefix)) return &all_selectors[i]; } } return 0; } static int parse_selectors(struct list_head *list, int argc, const char **argv) { const char *prefix = 0; struct pa_selector *_sel; struct pa_selector_entry *sel, *temp; int i = 0, ret = 0, negate = 0; /* all clears the selectors (all sessions will be shown again) */ if (argc == 1 && !strcmp(argv[0], "all")) goto out; while (i < argc) { if (!strcmp(argv[i], "not")) { negate ^= 1; i += 1; ret = -EINVAL; continue; } /* and between selectors is implicit, but can be specified for clarity (as a bonus * it helps detecting errors when it occurs where a selector is expected) */ else if (!strcmp(argv[i], "and")) { i += 1; continue; } else if (is_prefix(argv[i])) { prefix = argv[i]; i += 1; ret = -EINVAL; continue; } /* locate selector, which may require a certain prefix as well */ _sel = find_selector(argv[i], prefix); if (!_sel) { ret = -EINVAL; goto out; } sel = kmalloc(sizeof(struct pa_selector_entry), GFP_KERNEL); if (!sel) { ret = -ENOMEM; goto out; } INIT_LIST_HEAD(&sel->list); sel->selector = _sel; sel->negated = negate; ret = _sel->setup ? _sel->setup(sel, argc - i, argv + i, prefix) : 0; if (ret < 0) goto out; i += _sel->args + 1; /* skip args consumed by selector */ list_add_tail(&sel->list, list); prefix = 0; negate = 0; } return ret; out: list_for_each_entry_safe(sel, temp, list, list) { list_del(&sel->list); kfree(sel); } return ret; } static ssize_t avm_pa_read_selectors(struct file *file, char __user *buffer, size_t count, loff_t *offset) { struct avm_pa_global *ctx = &pa_glob; struct pa_selector_entry *sel; size_t pos = 0; if (*offset != 0) return 0; *buffer = 0; list_for_each_entry(sel, &ctx->session_selector, list) { if (sel->negated) pos += strlcpy(buffer + pos, "( NOT ", count - pos); pos += sel->selector->show(sel, buffer + pos, count - pos); if (sel->negated) pos += strlcpy(buffer + pos, " )", count - pos); if (!list_is_last(&sel->list, &ctx->session_selector)) pos += strlcpy(buffer + pos, " AND ", count - pos); if (pos >= count) break; } pos += strlcpy(buffer + pos, "\n", count - pos); *offset = pos; return pos; } static ssize_t avm_pa_write_selectors(struct file *file, const char __user *buffer, size_t count, loff_t *offset) { struct avm_pa_global *ctx = &pa_glob; char pp_cmd[100]; char* argv[32]; int argc = 0; char* ptr_cmd; char* delimitters = " \n\t"; char* ptr_next_tok; int ret = 0; LIST_HEAD(new_selectors); /* Validate the length of data passed. */ if (count > 100) count = 100; /* Initialize the buffer before using it. */ memset ((void *)&pp_cmd[0], 0, sizeof(pp_cmd)); memset ((void *)&argv[0], 0, sizeof(argv)); /* Copy from user space. */ if (copy_from_user (&pp_cmd, buffer, count)) return -EFAULT; ptr_next_tok = &pp_cmd[0]; ptr_cmd = strsep(&ptr_next_tok, delimitters); if (ptr_cmd == NULL) return -1; do { argv[argc++] = ptr_cmd; if (argc >= ARRAY_SIZE(argv)) { printk(KERN_ERR "avm_pa: too many parameters dropping the command\n"); return -EIO; } ptr_cmd = strsep(&ptr_next_tok, delimitters); if (ptr_cmd && ptr_cmd[0] == 0) ptr_cmd = NULL; } while (ptr_cmd != NULL); ret = parse_selectors(&new_selectors, argc, (const char **) argv); /* Replace current selectors on success, otherwise leave them unchanged */ if (ret == 0) { struct pa_selector_entry *sel, *temp; list_for_each_entry_safe(sel, temp, &ctx->session_selector, list) { list_del(&sel->list); kfree(sel); } list_replace(&new_selectors, &ctx->session_selector); return count; } return ret; } static avm_pid_handle pa_find_pid_by_name(const char *pidname) { struct avm_pa_global *ctx = &pa_glob; avm_pid_handle pid_handle; for (pid_handle = 1; pid_handle < CONFIG_AVM_PA_MAX_PID; pid_handle++) { struct avm_pa_pid *pid = PA_PID(ctx, pid_handle); if ( pid->pid_handle == pid_handle && strcmp(pid->cfg.name, pidname) == 0) { return pid_handle; } } return 0; } static ssize_t avm_pa_write_cmds(struct file *file, const char __user *buffer, size_t count, loff_t *offset) { struct avm_pa_global *ctx = &pa_glob; char pp_cmd[100]; char* argv[10]; int argc = 0; char* ptr_cmd; char* delimitters = " \n\t"; char* ptr_next_tok; avm_pid_handle pid_handle; /* Validate the length of data passed. */ if (count > 100) count = 100; /* Initialize the buffer before using it. */ memset ((void *)&pp_cmd[0], 0, sizeof(pp_cmd)); memset ((void *)&argv[0], 0, sizeof(argv)); /* Copy from user space. */ if (copy_from_user (&pp_cmd, buffer, count)) return -EFAULT; ptr_next_tok = &pp_cmd[0]; ptr_cmd = strsep(&ptr_next_tok, delimitters); if (ptr_cmd == NULL) return -1; do { argv[argc++] = ptr_cmd; if (argc >=10) { printk(KERN_ERR "avm_pa: too many parameters dropping the command\n"); return -EIO; } ptr_cmd = strsep(&ptr_next_tok, delimitters); if (ptr_cmd && ptr_cmd[0] == 0) ptr_cmd = NULL; } while (ptr_cmd != NULL); /* enable | disable | testmode */ if (strcmp(argv[0], "enable") == 0) { ctx->fw_disabled = 0; ctx->disabled = 0; avm_pa_enable(); printk(KERN_DEBUG "avm_pa: enabled\n"); } else if (strcmp(argv[0], "disable") == 0) { ctx->disabled = 1; ctx->fw_disabled = 1; avm_pa_disable(); avm_pa_flush_sessions(); printk(KERN_DEBUG "avm_pa: disabled\n"); } else if (strcmp(argv[0], "testmode") == 0) { ctx->fw_disabled = 1; ctx->disabled = 0; avm_pa_disable(); printk(KERN_DEBUG "avm_pa: testmode\n"); /* hw_enable | hw_disable */ } else if (strcmp(argv[0], "hw_enable") == 0) { ctx->hw_ppa_disabled = 0; printk(KERN_DEBUG "avm_pa: hw enabled\n"); } else if (strcmp(argv[0], "hw_disable") == 0) { ctx->hw_ppa_disabled = 1; printk(KERN_DEBUG "avm_pa: hw disabled\n"); /* flush */ } else if (strcmp(argv[0], "flush") == 0) { if (argv[1]) { avm_vpid_handle vpid_handle = simple_strtoul(argv[1], 0, 10); if ( vpid_handle && PA_VPID(ctx, vpid_handle)->vpid_handle == vpid_handle) { avm_pa_flush_sessions_for_vpid(vpid_handle); printk(KERN_DEBUG "avm_pa: flush %u\n", (unsigned)vpid_handle); } else { printk(KERN_DEBUG "avm_pa: flush %s: illegal vpid\n", argv[1]); } } else { avm_pa_flush_sessions(); printk(KERN_DEBUG "avm_pa: flush\n"); } /* loadcontrol | noloadcontrol */ } else if (strcmp(argv[0], "loadcontrol") == 0) { if (argv[1]) { if (strcmp(argv[1], "irq") == 0) { ctx->load_control = LOADCONTROL_IRQ; } else if (strcmp(argv[1], "idle") == 0) { ctx->load_control = LOADCONTROL_IDLE; } else if (strcmp(argv[1], "off") == 0) { ctx->load_control = LOADCONTROL_OFF; } else { ctx->load_control = LOADCONTROL_POWERIRQ; } } else { ctx->load_control = LOADCONTROL_POWERIRQ; } if ( ctx->load_control == LOADCONTROL_OFF || ( (ctx->load_control & LOADCONTROL_POWER) && ctx->load_reduce == 0)) { avm_pa_tbf_disable(); } else { ctx->rate = ctx->maxrate; avm_pa_start_lc_timer(); if ((ctx->load_control & LOADCONTROL_POWER) && ctx->load_reduce) { avm_pa_tbf_update(ctx->rate, ctx->pktbuffer, ctx->pktpeak); ctx->tbf_enabled = 1; } } switch (ctx->load_control) { case LOADCONTROL_OFF: printk(KERN_DEBUG "avm_pa: loadcontrol off\n"); break; case LOADCONTROL_IRQ: printk(KERN_DEBUG "avm_pa: loadcontrol irq\n"); break; case LOADCONTROL_IDLE: printk(KERN_DEBUG "avm_pa: loadcontrol idle\n"); break; case LOADCONTROL_POWERIRQ: printk(KERN_DEBUG "avm_pa: loadcontrol powerirq\n"); break; } } else if (strcmp(argv[0], "noloadcontrol") == 0) { ctx->load_control = LOADCONTROL_OFF; avm_pa_tbf_disable(); printk(KERN_DEBUG "avm_pa: loadcontrol off\n"); /* tbfenable | tbfdisable */ } else if (strcmp(argv[0], "tbfenable") == 0) { ctx->tbf_enabled = 1; printk(KERN_DEBUG "avm_pa: tbf enabled\n"); } else if (strcmp(argv[0], "tbfdisable") == 0) { ctx->tbf_enabled = 0; printk(KERN_DEBUG "avm_pa: tbf disabled\n"); /* mswin 800 900 */ } else if (strcmp(argv[0], "mswin") == 0) { unsigned mswin; if (argv[1]) { mswin = simple_strtoul(argv[1], 0, 10); if (mswin > 0) ctx->irq_mswin_low = mswin; } if (argv[2]) { mswin = simple_strtoul(argv[2], 0, 10); if (mswin > 0) ctx->irq_mswin_high = mswin; } printk(KERN_DEBUG "avm_pa: mswin %u %u\n", ctx->irq_mswin_low, ctx->irq_mswin_high); /* idlewin 10 20 */ } else if (strcmp(argv[0], "idlewin") == 0) { unsigned mswin; if (argv[1]) { mswin = simple_strtoul(argv[1], 0, 10); if (mswin > 0) ctx->idle_mswin_low = mswin; } if (argv[2]) { mswin = simple_strtoul(argv[2], 0, 10); if (mswin > 0) ctx->idle_mswin_high = mswin; } printk(KERN_DEBUG "avm_pa: idlewin %u %u\n", ctx->idle_mswin_low, ctx->idle_mswin_high); /* ewma 0-31 */ } else if (strcmp(argv[0], "ewma") == 0) { if (argv[1]) { unsigned ewma = simple_strtoul(argv[1], 0, 10); if (ewma <= 31) { struct avm_pa_cputime_est *e; ctx->cputime_ewma_log = ewma; e = &ctx->cputime_user_est; e->ewma_log = ctx->cputime_ewma_log; e = &ctx->cputime_idle_est; e->ewma_log = ctx->cputime_ewma_log; e = &ctx->cputime_irq_est; e->ewma_log = ctx->cputime_ewma_log; printk(KERN_DEBUG "avm_pa: ewma %d\n", ctx->cputime_ewma_log); } } /* rate pps */ } else if (strcmp(argv[0], "rate") == 0) { if (argv[1]) { unsigned rate = simple_strtoul(argv[1], 0, 10); if (rate > 0) { ctx->rate = rate; ctx->maxrate = rate; avm_pa_tbf_update(ctx->rate, ctx->pktbuffer, ctx->pktpeak); if (ctx->load_control == 0) { if (ctx->tbf_enabled == 0) { ctx->tbf_enabled = 1; avm_pa_tbf_reset(); } } printk(KERN_DEBUG "avm_pa: rate %u\n", ctx->rate); } } /* buffer pkts */ } else if (strcmp(argv[0], "buffer") == 0) { if (argv[1]) { unsigned pktbuffer = simple_strtoul(argv[1], 0, 10); if (pktbuffer > 0) { ctx->pktbuffer = pktbuffer; avm_pa_tbf_update(ctx->rate, ctx->pktbuffer, ctx->pktpeak); printk(KERN_DEBUG "avm_pa: buffer %u\n", ctx->pktbuffer); } } /* peak pkts */ } else if (strcmp(argv[0], "peak") == 0) { if (argv[1]) { unsigned peak = simple_strtoul(argv[1], 0, 10); if (buffer > 0) { ctx->pktpeak = peak; avm_pa_tbf_update(ctx->rate, ctx->pktbuffer, ctx->pktpeak); printk(KERN_DEBUG "avm_pa: peak %u\n", ctx->pktpeak); } } } else if (strcmp(argv[0], "treduce") == 0) { unsigned reduce; if (argv[1]) { reduce = simple_strtoul(argv[1], 0, 10); if (reduce > 0 && reduce <= 80) ctx->telephony_reduce = reduce; } printk(KERN_DEBUG "avm_pa: telephony_reduce %u\n", ctx->telephony_reduce); } else if (strcmp(argv[0], "sipactive") == 0) { int sip_is_active; if (argv[1]) { sip_is_active = simple_strtoul(argv[1], 0, 10); avm_pa_sip_is_active(sip_is_active); printk(KERN_DEBUG "avm_pa: sip telephony is %sactive\n", sip_is_active ? "" : "not "); } /* nodbg */ } else if (strcmp(argv[0], "nodbg") == 0) { ctx->dbgcapture = 0; ctx->dbgsession = 0; ctx->dbgnosession = 0; ctx->dbgtrace = 0; ctx->dbgmatch = 0; ctx->dbgcputime = 0; ctx->dbgprioack = 0; ctx->dbgprioacktrace = 0; ctx->dbgstats = 0; printk(KERN_DEBUG "avm_pa: all debugs off\n"); /* dbgcapture | nodbgcapture */ } else if (strcmp(argv[0], "dbgcapture") == 0) { ctx->dbgcapture = 1; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); } else if (strcmp(argv[0], "nodbgcapture") == 0) { ctx->dbgcapture = 0; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); /* dbgsession | nodbgsession */ } else if (strcmp(argv[0], "dbgsession") == 0) { ctx->dbgsession = 1; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); } else if (strcmp(argv[0], "nodbgsession") == 0) { ctx->dbgsession = 0; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); /* dbgnosession | nodbgnosession */ } else if (strcmp(argv[0], "dbgnosession") == 0) { ctx->dbgnosession = 1; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); } else if (strcmp(argv[0], "nodbgnosession") == 0) { ctx->dbgnosession = 0; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); /* trace | notrace */ } else if (strcmp(argv[0], "trace") == 0) { #if AVM_PA_TRACE ctx->dbgtrace = 1; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); #else printk(KERN_ERR "avm_pa: trace not compiled in\n"); #endif } else if (strcmp(argv[0], "notrace") == 0) { ctx->dbgtrace = 0; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); /* dbgmatch | nodbgmatch */ } else if (strcmp(argv[0], "nodbgmatch") == 0) { ctx->dbgmatch = 0; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); } else if (strcmp(argv[0], "dbgmatch") == 0) { ctx->dbgmatch = 1; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); /* dbgcputime | nodbgcputime */ } else if (strcmp(argv[0], "nodbgcputime") == 0) { ctx->dbgcputime = 0; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); } else if (strcmp(argv[0], "dbgcputime") == 0) { ctx->dbgcputime = 1; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); /* dbgprioack | nodbgprioack */ } else if (strcmp(argv[0], "dbgprioack") == 0) { ctx->dbgprioack = 1; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); } else if (strcmp(argv[0], "nodbgprioack") == 0) { ctx->dbgprioack = 0; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); /* dbgprioacktrace | nodbgprioacktrace */ } else if (strcmp(argv[0], "dbgprioacktrace") == 0) { ctx->dbgprioacktrace = 1; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); } else if (strcmp(argv[0], "nodbgprioacktrace") == 0) { ctx->dbgprioacktrace = 0; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); /* dbgstats | nodbgstats */ } else if (strcmp(argv[0], "dbgstats") == 0) { ctx->dbgstats = 1; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); } else if (strcmp(argv[0], "nodbgstats") == 0) { ctx->dbgstats = 0; printk(KERN_DEBUG "avm_pa: %s\n", argv[0]); /* pid */ } else if (strcmp(argv[0], "pid") == 0 && argv[1]) { struct net_device *dev = dev_get_by_name(&init_net, argv[1]); if (dev) { struct avm_pa_pid_cfg cfg; snprintf(cfg.name, sizeof(cfg.name), "%s", argv[1]); cfg.framing = avm_pa_framing_dev; cfg.default_mtu = 1500; cfg.tx_func = pa_dev_transmit; cfg.tx_arg = dev; if (avm_pa_dev_pid_register(AVM_PA_DEVINFO(dev), &cfg) < 0) printk(KERN_ERR "%s: failed to register PA PID\n", argv[1]); dev_put(dev); } else { printk(KERN_ERR "avm_pa_write_cmds(pid): dev %s not found\n", argv[1]); } /* vpid */ } else if (strcmp(argv[0], "vpid") == 0 && argv[1]) { struct net_device *dev = dev_get_by_name(&init_net, argv[1]); if (dev) { struct avm_pa_vpid_cfg cfg; snprintf(cfg.name, sizeof(cfg.name), "%s", argv[1]); cfg.v4_mtu = 1500; cfg.v6_mtu = 1500; if (avm_pa_dev_vpid_register(AVM_PA_DEVINFO(dev), &cfg) < 0) printk(KERN_ERR "%s: failed to register PA VPID\n", argv[1]); dev_put(dev); } else { printk(KERN_ERR "avm_pa_write_cmds(vpid): dev %s not found\n", argv[1]); } /* unreg */ } else if (strcmp(argv[0], "unreg") == 0 && argv[1]) { struct net_device *dev = dev_get_by_name(&init_net, argv[1]); if (dev) { avm_pa_dev_unregister(AVM_PA_DEVINFO(dev)); dev_put(dev); } else { printk(KERN_ERR "avm_pa_write_cmds(unreg): dev %s not found\n", argv[1]); } /* prioack * * Note: This interface is now partially obsolete (prioack ) * in favour of the priomap interface defined below. */ } else if (strcmp(argv[0], "prioack") == 0) { unsigned val = 0; if (argv[1]) { printk(KERN_DEBUG "avm_pa: prioack %s %s %s\n", argv[1], argv[2] ? argv[2] : "", argv[3] ? argv[3] : ""); if (strcmp(argv[1], "enable") == 0) { if (argv[2] && argv[3]) { if ((pid_handle = pa_find_pid_by_name(argv[2])) != 0) { avm_pa_pid_activate_tcpackprio(pid_handle, 1, simple_strtoul(argv[3], 0, 0)); } else { printk(KERN_ERR "avm_pa: %s %s: %s not found\n", argv[0], argv[1], argv[2]); } } } else if (strcmp(argv[1], "disable") == 0) { if (argv[2]) { if ((pid_handle = pa_find_pid_by_name(argv[2])) != 0) { avm_pa_pid_activate_tcpackprio(pid_handle, 0, 0); } else { printk(KERN_ERR "avm_pa: prioack %s: %s not found\n", argv[1], argv[2]); } } else { int n; for (n = 1; n < CONFIG_AVM_PA_MAX_PID; ++n) { avm_pa_pid_activate_tcpackprio(n, 0, 0); } } } else if (strcmp(argv[1], "tgetenable") == 0) { if (argv[2] && argv[3]) { if ((pid_handle = pa_find_pid_by_name(argv[2])) != 0) { avm_pa_pid_activate_tgetprio(pid_handle, 1, simple_strtoul(argv[3], 0, 0)); } else { printk(KERN_ERR "avm_pa: %s %s: %s not found\n", argv[0], argv[1], argv[2]); } } } else if (strcmp(argv[1], "tgetdisable") == 0) { if (argv[2]) { if ((pid_handle = pa_find_pid_by_name(argv[2])) != 0) { avm_pa_pid_activate_tgetprio(pid_handle, 0, 0); } else { printk(KERN_ERR "avm_pa: %s %s: %s not found\n", argv[0], argv[1], argv[2]); } } else { int n; for (n = 1; n < CONFIG_AVM_PA_MAX_PID; ++n) { avm_pa_pid_activate_tgetprio(n, 0, 0); } } } else if (strcmp(argv[1], "pthresh") == 0) { if (argv[2]) val = simple_strtoul(argv[2], 0, 0); if (val) ctx->prioack_thresh_packets = val; } else if (strcmp(argv[1], "ratio") == 0) { if (argv[2]) val = simple_strtoul(argv[2], 0, 0); if (val) ctx->prioack_ratio = val; } else { printk(KERN_DEBUG "avm_pa: prioack unknown command %s \n (available commands: enable,disable,psize,pthresh,prio,ratio)\n", argv[1]); } } /* The priomap interface supersedes the old prioack interface. */ } else if (strcmp(argv[0], "priomap") == 0) { if (argv[1] && argv[2] && argv[3]) { unsigned short prio_map = simple_strtoul(argv[1], 0, 0); if ((pid_handle = pa_find_pid_by_name(argv[2])) != 0) { /* Command: priomap * * Effect: Enables or disables the priority map attached to the * device specified by the 'dev' parameter. The 'priomap' parameter * MUST equal either AVM_PA_PRIO_MAP_TACK (= 0x0000) or * AVM_PA_PRIO_MAP_TGET (= 0x0001) as defined in avm_pa.h. */ if (strcmp(argv[3], "enable") == 0) { avm_pa_pid_prio_map_enable(pid_handle, prio_map, 1); } else if (strcmp(argv[3], "disable") == 0) { avm_pa_pid_prio_map_enable(pid_handle, prio_map, 0); /* Command: priomap reset * * Effect: Resets the priority map attached to the device specified by * the 'dev' parameter. The 'priomap' parameter MUST equal either * AVM_PA_PRIO_MAP_TACK (= 0x0000) or AVM_PA_PRIO_MAP_TGET (= 0x0001) * as defined in avm_pa.h. */ } else if (strcmp(argv[3], "reset") == 0) { avm_pa_pid_prio_map_reset(pid_handle, prio_map); /* Command: priomap set_prio * * Effect: Manipulates the priority map entry specified by the * 'queue' parameter which is stored in the priority map attached * to the device specified by the 'dev' parameter. The 'priomap' * parameter MUST equal either AVM_PA_PRIO_MAP_TACK (= 0x0000) or * AVM_PA_PRIO_MAP_TGET (= 0x0001) as defined in avm_pa.h. */ } else if (strcmp(argv[3], "setprio") == 0 && argv[4] && argv[5]) { avm_pa_pid_prio_map_set_prio_per_queue(pid_handle, prio_map, simple_strtoul(argv[4], 0, 0), /* queue */ simple_strtoul(argv[5], 0, 0)); /* prio */ } else { printk(KERN_ERR "avm_pa: priomap unknown command '%s'\n (available commands: enable, disable, reset, setprio)\n", argv[3]); } } else { printk(KERN_ERR "avm_pa: %s %s %s %s: %s not found\n", argv[0], argv[1], argv[2], argv[3], argv[2]); } } else { printk(KERN_ERR "avm_pa: %s: not enough parameters\n", argv[0]); } } else { printk(KERN_ERR "avm_pa_write_cmds: %s: unknown command\n", argv[0]); } return count; } /* ------------------------------------------------------------------------ */ const struct file_operations selector_fops = { .read = avm_pa_read_selectors, .write = avm_pa_write_selectors, }; const struct file_operations avm_pa_control_fops = { .write = avm_pa_write_cmds, }; static struct proc_dir_entry *dir_entry = 0; static void __init avm_pa_proc_init(void) { dir_entry = proc_net_mkdir(&init_net, "avm_pa", init_net.proc_net); proc_create("control", S_IFREG|S_IWUSR, dir_entry, &avm_pa_control_fops); proc_create("brief", S_IRUGO, dir_entry, &brief_show_fops); proc_create("status", S_IRUGO, dir_entry, &status_show_fops); proc_create("stats", S_IRUGO, dir_entry, &stats_show_fops); proc_create("pids", S_IRUGO, dir_entry, &pids_show_fops); proc_create("vpids", S_IRUGO, dir_entry, &vpids_show_fops); proc_create("sessions", S_IRUGO, dir_entry, &sess_show_fops); proc_create("xsession", S_IRUGO|S_IWUSR, dir_entry, &selector_fops); proc_create("bsessions", S_IRUGO, dir_entry, &bsess_show_fops); proc_create("macaddrs", S_IRUGO, dir_entry, &macaddr_show_fops); #if AVM_PA_TOKSTATS proc_create("tokstats", S_IRUGO, dir_entry, &tstats_show_fops); #endif proc_create("hashes", S_IRUGO, dir_entry, &hash_show_fops); proc_create("prioack", S_IRUGO, dir_entry, &prioack_show_fops); proc_create("priomaps", S_IRUGO, dir_entry, &priomaps_show_fops); proc_create("vpidpriostats", S_IRUGO, dir_entry, &vpids_all_stats_show_fops); proc_create("avm_pp_queue_stats", S_IRUGO, init_net.proc_net, &vpids_hw_stats_show_fops); } static void __exit avm_pa_proc_exit(void) { struct pa_selector_entry *sel, *temp; remove_proc_entry("control", dir_entry); remove_proc_entry("brief", dir_entry); remove_proc_entry("status", dir_entry); remove_proc_entry("stats", dir_entry); remove_proc_entry("pids", dir_entry); remove_proc_entry("vpids", dir_entry); remove_proc_entry("sessions", dir_entry); remove_proc_entry("xsession", dir_entry); remove_proc_entry("bsessions", dir_entry); remove_proc_entry("macaddrs", dir_entry); #if AVM_PA_TOKSTATS remove_proc_entry("tokstats", dir_entry); #endif remove_proc_entry("hashes", dir_entry); remove_proc_entry("prioack", dir_entry); remove_proc_entry("priomaps", dir_entry); remove_proc_entry("vpidpriostats", dir_entry); remove_proc_entry("avm_pa", init_net.proc_net); remove_proc_entry("avm_pp_queue_stats", init_net.proc_net); list_for_each_entry_safe(sel, temp, &pa_glob.session_selector, list) { list_del(&sel->list); kfree(sel); } } #endif /* ------------------------------------------------------------------------ */ /* -------- misc device for capture tracking ------------------------------ */ /* ------------------------------------------------------------------------ */ static ssize_t avm_pa_misc_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { return 0; } static unsigned int avm_pa_misc_poll(struct file *file, poll_table *wait) { return 0; } static int avm_pa_misc_open(struct inode *inode, struct file *file) { struct avm_pa_global *ctx = &pa_glob; atomic_inc(&ctx->misc_is_open); return 0; } static int avm_pa_misc_release(struct inode *inode, struct file *file) { struct avm_pa_global *ctx = &pa_glob; if (atomic_read(&ctx->misc_is_open) > 0) atomic_dec(&ctx->misc_is_open); return 0; } static const struct file_operations avm_pa_misc_fops = { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) .owner = THIS_MODULE, #endif .llseek = no_llseek, .read = avm_pa_misc_read, .poll = avm_pa_misc_poll, .open = avm_pa_misc_open, .release = avm_pa_misc_release, }; static struct miscdevice avm_pa_misc_dev = { .minor = MISC_DYNAMIC_MINOR, .name = "avm_pa", .fops = &avm_pa_misc_fops }; /* ------------------------------------------------------------------------ */ /* -------- init & exit functions ----------------------------------------- */ /* ------------------------------------------------------------------------ */ int __init avm_pa_init(void) { struct avm_pa_global *ctx = &pa_glob; INIT_LIST_HEAD(&ctx->session_selector); setup_timer(&ctx->stat_timer, pa_stat_timer_expired, 0); setup_timer(&ctx->gc_timer, pa_gc_timer_expired, 0); setup_timer(&ctx->est_timer, avm_pa_est_timer, 0); setup_timer(&ctx->cputime_est_timer, avm_pa_cputime_est_timer, 0); setup_timer(&ctx->lc_timer, avm_pa_lc_timer_expired, 0); skb_queue_head_init(&ctx->irqqueue); tasklet_init(&ctx->irqtasklet, avm_pa_irq_tasklet, 0); skb_queue_head_init(&ctx->tbfqueue); tasklet_init(&ctx->tbftasklet, avm_pa_tbf_tasklet, 0); printk(KERN_INFO "AVM PA for Linux %s\n", UTS_RELEASE); printk(KERN_INFO "AVM PA skb pktinfo at offset %zu size %zu\n", offsetof(struct sk_buff, avm_pa), sizeof(struct avm_pa_pkt_info)); avm_pa_init_freelist(); if (ctx->disabled == 0) avm_pa_enable(); if (misc_register(&avm_pa_misc_dev) < 0) printk(KERN_ERR "avm_pa: misc_register() failed"); #ifdef CONFIG_PROC_FS avm_pa_proc_init(); #endif #ifdef CONFIG_AVM_POWERMETER ctx->load_control_handle = avm_powermanager_load_control_register("avm_pa", avm_pa_load_control_cb, 0); #endif pa_start_stat_timer(); return 0; } void __exit avm_pa_exit(void) { struct avm_pa_global *ctx = &pa_glob; struct sk_buff *skb; #ifdef CONFIG_AVM_POWERMETER if (ctx->load_control_handle) { avm_powermanager_load_control_release(ctx->load_control_handle); ctx->load_control_handle = 0; } #endif ctx->disabled = 1; ctx->fw_disabled = 1; avm_pa_disable(); tasklet_kill(&ctx->irqtasklet); while ((skb = skb_dequeue(&ctx->irqqueue)) != 0) kfree_skb(skb); while ((skb = skb_dequeue(&ctx->tbfqueue)) != 0) kfree_skb(skb); pa_session_gc(1); pa_session_gc(1); #ifdef CONFIG_PROC_FS avm_pa_proc_exit(); #endif misc_deregister(&avm_pa_misc_dev); avm_pa_reset_stats(); } #ifdef CONFIG_IFX_PPA void avm_pa_disable_atm_hw_tx_acl(void){ int n; struct avm_pa_global *ctx = &pa_glob; for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) { struct avm_pa_pid *pid = PA_PID(ctx, n); if (pid->pid_handle == 0) continue; if (pid->hw && pid->hw->atmvcc){ pid->hw->flags |= AVMNET_DEVICE_IFXPPA_DISABLE_TX_ACL; } } } EXPORT_SYMBOL(avm_pa_disable_atm_hw_tx_acl); void avm_pa_enable_atm_hw_tx_acl(void){ struct avm_pa_global *ctx = &pa_glob; int n; for (n=1; n < CONFIG_AVM_PA_MAX_PID; n++) { struct avm_pa_pid *pid = PA_PID(ctx, n); if (pid->pid_handle == 0) continue; if (pid->hw && pid->hw->atmvcc){ pid->hw->flags &= ~AVMNET_DEVICE_IFXPPA_DISABLE_TX_ACL; } } } EXPORT_SYMBOL(avm_pa_enable_atm_hw_tx_acl); #endif module_init(avm_pa_init); module_exit(avm_pa_exit);