// SPDX-License-Identifier: GPL-2.0+ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hwpa.h" #include "hwpa_prx.h" /* * mpe-se/firmware is not robust against immediate reusage * of hw_session memory. * MxL proposes udelay(8) in mpe_hal_del_session_ipv{4,6}, * so our 250ms approach is far more defensive. */ #define MPE_GRACE_PERIOD (HZ >> 2) static struct kmem_cache *kmem; static struct list_head free_list; static struct delayed_work free_work; static DEFINE_SPINLOCK(free_list_lock); struct hkey { enum { HKEY_V4, HKEY_V6, } tag; union { struct ipv4_hash_auto_key v4; struct ipv6_hash_auto_key v6; }; }; struct session { struct hkey key; unsigned long entry; struct session_mib last_mib; u8 templ_buf[AVM_PA_MAX_HEADER]; struct session_action action; struct list_head free_list_node; unsigned long erase_time; }; static enum hwpa_backend_rv extract_key_v6(const struct avm_pa_session *s, struct ipv6_hash_auto_key *k) { const u16 *ports; const struct ipv6hdr *hdr; hdr = hwpa_get_hdr(&s->ingress, AVM_PA_IPV6); memcpy(&k->srcip, &hdr->saddr, sizeof(k->srcip)); memcpy(&k->dstip, &hdr->daddr, sizeof(k->dstip)); ports = hwpa_get_hdr(&s->ingress, AVM_PA_PORTS); k->srcport = ports[0]; k->dstport = ports[1]; switch (AVM_PA_PKTTYPE_IPPROTO(s->ingress.pkttype)) { case IPPROTO_UDP: case IPPROTO_UDPLITE: k->extn = 1; break; case IPPROTO_TCP: k->extn = 0; break; default: return HWPA_BACKEND_ERR_INTERNAL; } return HWPA_BACKEND_SUCCESS; } static enum hwpa_backend_rv extract_key_v4(const struct avm_pa_session *s, struct ipv4_hash_auto_key *k) { const u16 *ports; const struct iphdr *hdr; hdr = hwpa_get_hdr(&s->ingress, AVM_PA_IPV4); memcpy(&k->srcip, &hdr->saddr, sizeof(k->srcip)); memcpy(&k->dstip, &hdr->daddr, sizeof(k->dstip)); ports = hwpa_get_hdr(&s->ingress, AVM_PA_PORTS); k->srcport = ports[0]; k->dstport = ports[1]; switch (AVM_PA_PKTTYPE_IPPROTO(s->ingress.pkttype)) { case IPPROTO_UDP: case IPPROTO_UDPLITE: k->extn = 1; break; case IPPROTO_TCP: k->extn = 0; break; default: return HWPA_BACKEND_ERR_INTERNAL; } return HWPA_BACKEND_SUCCESS; } static enum hwpa_backend_rv extract_key(const struct avm_pa_session *s, struct hkey *k) { enum hwpa_backend_rv rv; switch (s->ingress.pkttype & AVM_PA_PKTTYPE_IP_MASK) { case AVM_PA_PKTTYPE_IPV6: rv = extract_key_v6(s, &k->v6); k->tag = HKEY_V6; break; case AVM_PA_PKTTYPE_IPV4: rv = extract_key_v4(s, &k->v4); k->tag = HKEY_V4; break; default: rv = HWPA_BACKEND_ERR_INTERNAL; } return rv; } static enum hwpa_backend_rv extract_action_pppoe(const struct avm_pa_pkt_match *eg_match, struct session_action *a) { struct pppoe_hdr *pppoe; pppoe = (void *)hwpa_get_hdr(eg_match, AVM_PA_PPPOE); if (pppoe) { /* MPE FW needs to fill the PPPoE template with a valid * length. It does so by adding the amount of bytes it deems * to be the inner packet length to the length contained in * the template. Currently, the inner packet length is the * length of the innermost IP frame. */ a->pppoe_offset_en = 1; a->pppoe_offset = eg_match->pppoe_offset; pppoe = (void *)(a->templ_buf + a->pppoe_offset); /* read as: inner_ip_offset - (pppoe_offset + pppoe_size) * This is supposed to be the length of the static part of the * PPPoE payload. MPE considers the length of the innermost IP * frame to be variable across a specific flow and adds it * itself for each packet. */ pppoe->length = eg_match->ip_offset - (eg_match->pppoe_offset + sizeof(*pppoe)); } return HWPA_BACKEND_SUCCESS; } static enum hwpa_backend_rv extract_action_nat(const struct avm_pa_pkt_match *ig_match, const struct avm_pa_pkt_match *eg_match, struct session_action *a) { const void *addr_snat, *addr_dnat; size_t addr_size; bool port_snat, port_dnat; /* compare UDP/TCP ports and copy translated value */ { const u16 *ig_ports, *eg_ports; ig_ports = hwpa_get_hdr(ig_match, AVM_PA_PORTS); eg_ports = hwpa_get_hdr(eg_match, AVM_PA_PORTS); port_snat = ig_ports[0] != eg_ports[0]; if (port_snat) a->new_src_port = eg_ports[0]; port_dnat = ig_ports[1] != eg_ports[1]; if (port_dnat) a->new_dst_port = eg_ports[1]; if (port_snat || port_dnat) a->routing_flag = 1; } /* check for NAT and copy address */ if ((eg_match->pkttype & AVM_PA_PKTTYPE_IP_MASK) == AVM_PA_PKTTYPE_IPV6) { const struct ipv6hdr *ip6_ig, *ip6_eg; //a->eIpType = GSW_RT_IP_V6; ip6_ig = (struct ipv6hdr *)hwpa_get_hdr(ig_match, (AVM_PA_IPV6)); ip6_eg = (struct ipv6hdr *)hwpa_get_hdr(eg_match, (AVM_PA_IPV6)); addr_size = sizeof(ip6_ig->daddr); addr_dnat = (port_dnat || memcmp(&ip6_ig->daddr, &ip6_eg->daddr, addr_size)) ? &ip6_eg->daddr : NULL; addr_snat = (port_snat || memcmp(&ip6_ig->saddr, &ip6_eg->saddr, addr_size)) ? &ip6_eg->saddr : NULL; } else { const struct iphdr *ip4_ig, *ip4_eg; //a->eIpType = GSW_RT_IP_V4; ip4_ig = (struct iphdr *)hwpa_get_hdr(ig_match, (AVM_PA_IPV4)); ip4_eg = (struct iphdr *)hwpa_get_hdr(eg_match, (AVM_PA_IPV4)); addr_size = sizeof(ip4_ig->daddr); addr_dnat = (port_dnat || memcmp(&ip4_ig->daddr, &ip4_eg->daddr, addr_size)) ? &ip4_eg->daddr : NULL; addr_snat = (port_snat || memcmp(&ip4_ig->saddr, &ip4_eg->saddr, addr_size)) ? &ip4_eg->saddr : NULL; } if (addr_dnat) { a->new_dst_ip_en = 1; memcpy(&a->new_dst_ip, addr_dnat, addr_size); } if (addr_snat) { a->new_src_ip_en = 1; memcpy(&a->new_src_ip, addr_snat, addr_size); } if (addr_snat || addr_dnat) { } /* TODO using this would be nice */ //ip4h->tos |= (s_act->new_inner_dscp << 2); return HWPA_BACKEND_SUCCESS; } static bool pid_supported(avm_pid_handle pid) { struct net_device *netdev; PPA_SUBIF dp_port; netdev = hwpa_get_netdev(pid); if (!netdev) return false; if (dp_get_netif_subifid(netdev, NULL, NULL, NULL, &dp_port, 0)) { dev_put(netdev); return false; } dev_put(netdev); return true; } /* implementations required by backend */ enum hwpa_backend_rv hwpa_backend_probe_session(const struct avm_pa_session *s, unsigned long *handle_out) { if (!pid_supported(s->ingress_pid_handle)) return HWPA_BACKEND_ERR_INTERNAL; if (!pid_supported(avm_pa_first_egress(s)->pid_handle)) return HWPA_BACKEND_ERR_INTERNAL; *handle_out = hw_handle_invalid; return HWPA_BACKEND_SUCCESS; } enum hwpa_backend_rv hwpa_backend_add_session(const struct avm_pa_session *s, unsigned long *handle_out) { //struct ipv4_hash_auto_key; struct session_action *action; struct net_device *dev; const struct avm_pa_egress *eg; const struct avm_pa_pkt_match *ig_match, *eg_match; enum hwpa_backend_rv rv; struct session *hws; uint32_t ppa_rv; bool is_wlan; bool is_vani; PPA_SUBIF dp_port = { 0 }; u16 eg_encap, ig_encap, encap; hws = kmem_cache_alloc(kmem, GFP_KERNEL); if (!hws) return HWPA_BACKEND_ERR_INTERNAL; memset(hws, 0, sizeof(*hws)); rv = extract_key(s, &hws->key); if (rv != HWPA_BACKEND_SUCCESS) goto err; rv = HWPA_BACKEND_ERR_INTERNAL; action = &hws->action; eg = avm_pa_first_egress(s); ig_match = &s->ingress; eg_match = &eg->match; dev = hwpa_get_netdev(eg->pid_handle); if (!dev) goto err; /* for dc-wlan we need to lookup vap and stationId (by destmac), which * is stored in subifid and uc_vap_list[0], respectively */ if (dp_get_netif_subifid(dev, NULL, NULL, eg->destmac ? eg->destmac->mac : NULL, &dp_port, 0)) { dev_put(dev); goto err; } dev_put(dev); is_wlan = (dp_port.alloc_flag & DP_F_FAST_WLAN); is_vani = (dp_port.alloc_flag & DP_F_VUNI) && (dp_port.data_flag & DP_SUBIF_VANI); if (is_vani) { PPA_SUBIF dp_port_vuni = { 0 }; if (dp_get_netif_subifid(dp_port.associate_netif, NULL, NULL, NULL, &dp_port_vuni, 0)) goto err; /* in PON-TX-direction we want to bypass VANI/VUNI bridge, * thus we are only interested in VUNI parameters * VANI (which is represented by dp_port) is not interesting * for MPE */ dp_port = dp_port_vuni; /* TODO where is the qos index / vap for GPON coming from? */ } action->entry_vld = 1; action->redirect = !is_vani; action->dst_pmac_port_num = 1; action->dst_pmac_port_list[0] = dp_port.port_id; action->uc_vap_list[0] = dp_port.subif; ig_encap = s->ingress.pkttype & AVM_PA_PKTTYPE_IPENCAP_MASK; eg_encap = avm_pa_first_egress(s)->match.pkttype & AVM_PA_PKTTYPE_IPENCAP_MASK; if (ig_encap) { encap = ig_encap; action->mtu = eg->mtu; } else if (eg_encap) { encap = eg_encap; /* hw does mtu check before encapsulation, but this mtu * parameter is after encapsulation */ action->mtu = eg->mtu - s->mod.push_encap_len; } else { encap = 0; action->mtu = eg->mtu; } /* hw does mtu check only against smaller values (<), but the * software assumes the mtu to be compared as smaller or equal (<=). * So to include the max given mtu inside the check, add one. */ action->mtu++; switch (encap) { case AVM_PA_PKTTYPE_IPV4ENCAP: action->tunnel_type = TUNL_6RD; action->in_eth_iphdr_offset_en = 1; break; case AVM_PA_PKTTYPE_IPV6ENCAP: action->tunnel_type = TUNL_DSLITE; action->in_eth_iphdr_offset_en = 1; break; default: action->tunnel_type = TUNL_NULL; action->in_eth_iphdr_offset_en = 0; break; } /* GRE key */ action->key_en = 0; /* ipsec tunnel */ action->tunnel_id = 0; action->templ_len = s->mod.push_l2_len + s->mod.push_encap_len; if (action->templ_len > sizeof(hws->templ_buf)) goto err; /* Need a local copy of the template for modifications */ action->templ_buf = hws->templ_buf; memcpy(action->templ_buf, HDRCOPY(&s->mod), action->templ_len); action->pkt_len_delta = s->mod.push_l2_len - s->mod.pull_l2_len; /* decrease ttl */ #ifdef AVM_PA_MOD_TTL action->routing_flag = !!(s->mod.modflags & AVM_PA_MOD_TTL); #else action->routing_flag = s->routed; #endif extract_action_pppoe(eg_match, action); extract_action_nat(ig_match, eg_match, action); /* tunnel_rm_en causes MPE FW to remove encapsulation matching the * tunnel type automatically. It does so by determining the equivalent * of pull_encap_len and subtracting it from the delta. * One could either choose to let MPE FW remove encapsulation based on * classification data of the incoming packet, or to make the * adjustment to pkt_len_delta manually. The latter would strip the * same length from any packet matching 5-tuple and tunnel_type. * The manual approach seems less safe. Ignore pull_encap_len and let * MPE FW do its thing. */ action->tunnel_rm_en = !!s->mod.pull_encap_len; action->pkt_len_delta += s->mod.push_encap_len; /* activates IPIP * template offset of outer ip */ action->tunnel_ip_offset_en = !!s->mod.push_encap_len; action->tunnel_ip_offset = s->mod.push_l2_len; /* template offset of inner ip */ action->in_eth_iphdr_offset = s->mod.push_l2_len + s->mod.push_encap_len; /* Structure is IP + UDP + IP */ action->tunnel_udp_offset_en = !!s->mod.push_udpoffset; action->tunnel_udp_offset = s->mod.push_udpoffset; if (hws->key.tag == HKEY_V6) { ppa_rv = mpe_add_session_v6(&hws->key.v6, action, &hws->entry); } else { ppa_rv = mpe_add_session_v4(&hws->key.v4, action, &hws->entry); } /* set the traffic class */ /* TODO: Add new egress types, as soon as we can accelerate them.*/ action->new_traffic_class_en = 1; if (eg->type == avm_pa_egresstype_output) { action->traffic_class = clamp(TC_H_MIN(eg->output.priority), 1u, 7u); } else { action->traffic_class = 0; /* default queue */ } if (ppa_rv) goto err; if (hws->action.sess_mib_ix_en) mpe_hal_get_session_mib(hws->key.tag == HKEY_V6, action->sess_mib_ix, &hws->last_mib); *handle_out = (unsigned long)hws; return HWPA_BACKEND_SUCCESS; err: kmem_cache_free(kmem, hws); return rv; } enum hwpa_backend_rv hwpa_backend_rem_session(unsigned long handle) { struct session *hws; hws = (void *)handle; /* in case mpe-se-does not read its session_lookup_table immediately * do at least invalidate session-action, so that mpe-firmware will not * apply this action any longer. */ hws->action.entry_vld = 0; mb(); if (hws->key.tag == HKEY_V4) { mpe_del_session_v4(&hws->key.v4, hws->entry); } else { mpe_del_session_v6(&hws->key.v6, hws->entry); } /* wait for MPE_GRACE_PERIOD, before we do kmem_cache_free */ hws->erase_time = jiffies + MPE_GRACE_PERIOD; spin_lock(&free_list_lock); list_add_tail(&hws->free_list_node, &free_list); spin_unlock(&free_list_lock); schedule_delayed_work(&free_work, MPE_GRACE_PERIOD); return HWPA_BACKEND_SUCCESS; } static inline bool in_grace_period(struct session *hws) { return time_is_after_eq_jiffies(hws->erase_time); } static void mpe_free_worker(struct work_struct *work) { struct list_head* pos; struct list_head* tmp; spin_lock(&free_list_lock); list_for_each_safe(pos, tmp, &free_list) { struct session *hws = list_entry(pos, struct session, free_list_node); if (in_grace_period(hws)){ schedule_delayed_work(&free_work, hws->erase_time - jiffies); spin_unlock(&free_list_lock); return; } list_del(pos); kmem_cache_free(kmem, hws); } spin_unlock(&free_list_lock); } enum hwpa_backend_rv hwpa_backend_stats(unsigned long handle, struct avm_pa_session_stats *stats) { struct session_mib session_mib; struct session *hws; hws = (void *)handle; memset(stats, 0, sizeof(*stats)); if (!hws->action.sess_mib_ix_en) return HWPA_BACKEND_SUCCESS; if (mpe_hal_get_session_mib(hws->key.tag == HKEY_V6, hws->action.sess_mib_ix, &session_mib)) return HWPA_BACKEND_SUCCESS; stats->tx_pkts = session_mib.mib.pkt - hws->last_mib.mib.pkt; stats->tx_bytes = session_mib.mib.bytes - hws->last_mib.mib.bytes; hws->last_mib = session_mib; stats->validflags |= AVM_PA_SESSION_STATS_VALID_BYTES * !!stats->tx_pkts; stats->validflags |= AVM_PA_SESSION_STATS_VALID_PKTS * !!stats->tx_bytes; return HWPA_BACKEND_SUCCESS; } int try_to_accelerate(avm_pid_handle pid_handle, struct sk_buff *skb) { /* Let avm_dev_pid_receive continue to process the packet. */ return AVM_PA_RX_BYPASS; } enum hwpa_backend_rv backend_activate_hw(avm_pid_handle pid_handle) { struct net_device *dev = hwpa_get_netdev(pid_handle); if (set_mpe_checksum_netdev(dev) != 0) return HWPA_BACKEND_SUCCESS; return HWPA_BACKEND_ERR_INTERNAL; } enum hwpa_backend_rv hwpa_backend_init(struct hwpa_backend_config *hw_pa_config) { kmem = kmem_cache_create("offpa_sess", sizeof(struct session), 0, SLAB_HWCACHE_ALIGN | SLAB_RED_ZONE, NULL); INIT_LIST_HEAD(&free_list); INIT_DELAYED_WORK(&free_work, mpe_free_worker); return HWPA_BACKEND_SUCCESS; } void hwpa_backend_exit(void) { cancel_delayed_work_sync(&free_work); kmem_cache_destroy(kmem); }